Add annotations.
This commit is contained in:
parent
3bbba35145
commit
1cc6c4e8e9
|
@ -1,5 +1,22 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
'''
|
||||||
|
Monitor for Docklet
|
||||||
|
Description:Monitor system for docklet will collect data on resources usages and status of vnode
|
||||||
|
and phyiscal machines. And master can fetch these data and then show them on the web page.
|
||||||
|
Besides, Monitor will also bill the vnodes according to their resources usage amount.
|
||||||
|
|
||||||
|
Design:Monitor mainly consists of three parts: Collectors, Master_Collector and Fetchers.
|
||||||
|
1.Collectors will collect data every two seconds on each worker. And 'Container_Collector' will
|
||||||
|
collect data of containers(vnodes), while 'Collector' will collect data of physical machines.
|
||||||
|
2.'Master_Collector' only runs on Master. It fetches the data on workers every two seconds by rpc
|
||||||
|
and stores them in the memory of Master.
|
||||||
|
3.Fetchers are classes that Master will use them to fetch specific data in the memory and then show
|
||||||
|
them on the web. 'Container_Fetcher' is the class to fetch the containers data in 'monitor_vnodes',
|
||||||
|
while 'Fetcher' is the class to fetch the data of physical machines in 'monitor_hosts'.
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
import subprocess,re,os,etcdlib,psutil,math,sys
|
import subprocess,re,os,etcdlib,psutil,math,sys
|
||||||
import time,threading,json,traceback,platform
|
import time,threading,json,traceback,platform
|
||||||
import env
|
import env
|
||||||
|
@ -10,30 +27,69 @@ from log import logger
|
||||||
from httplib2 import Http
|
from httplib2 import Http
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
a_cpu = 500
|
# billing parameters
|
||||||
b_mem = 1000000
|
a_cpu = 500 # seconds
|
||||||
c_disk = 4000
|
b_mem = 1000000 # MB
|
||||||
|
c_disk = 4000 # MB
|
||||||
|
|
||||||
monitor_hosts = {}
|
# major dict to store the monitoring data
|
||||||
|
# only use on Master
|
||||||
|
# monitor_hosts: use workers' ip addresses as first key.
|
||||||
|
# second key: cpuinfo,diskinfo,meminfo,osinfo,cpuconfig,running,containers,containerslist
|
||||||
|
# 1.cpuinfo stores the cpu usages data, and it has keys: user,system,idle,iowait
|
||||||
|
# 2.diskinfo stores the disks usages data, and it has keys: device,mountpoint,total,used,free,percent
|
||||||
|
# 3.meminfo stores the memory usages data, and it has keys: total,used,free,buffers,cached,percent
|
||||||
|
# 4.osinfo stores the information of operating system,
|
||||||
|
# and it has keys: platform,system,node,release,version,machine,processor
|
||||||
|
# 5.cpuconfig stores the information of processors, and it is a list, each element of list is a dict
|
||||||
|
# which stores the information of a processor, each element has key: processor,model name,
|
||||||
|
# core id, cpu MHz, cache size, physical id.
|
||||||
|
# 6.running indicates the status of worker,and it has two values: True, False.
|
||||||
|
# 7.containers store the amount of containers on the worker.
|
||||||
|
# 8.containers store a list which consists of the names of containers on the worker.
|
||||||
|
moitor_hosts = {}
|
||||||
|
|
||||||
|
# monitor_vnodes: use the names of vnodes(containers) as first key.
|
||||||
|
# second key: cpu_use,mem_use,disk_use,basic_info,quota
|
||||||
|
# 1.cpu_use has keys: val,unit,hostpercent
|
||||||
|
# 2.mem_use has keys: val,unit,usedp
|
||||||
|
# 3.disk_use has keys: device,mountpoint,total,used,free,percent
|
||||||
|
# 4.basic_info has keys: Name,State,PID,IP,RunningTime,billing,billing_this_hour
|
||||||
|
# 5.quota has keys: cpu,memeory
|
||||||
monitor_vnodes = {}
|
monitor_vnodes = {}
|
||||||
|
|
||||||
|
# major dict to store the monitoring data on Worker
|
||||||
|
# only use on Worker
|
||||||
|
# workerinfo: only store the data collected on current Worker,
|
||||||
|
# has the first keys same as the second keys in monitor_hosts.
|
||||||
workerinfo = {}
|
workerinfo = {}
|
||||||
|
|
||||||
|
# workercinfo: only store the data collected on current Worker,
|
||||||
|
# has the first keys same as the second keys in monitor_vnodes.
|
||||||
workercinfo = {}
|
workercinfo = {}
|
||||||
|
|
||||||
|
# only use on worker
|
||||||
containerpids = []
|
containerpids = []
|
||||||
pid2name = {}
|
pid2name = {}
|
||||||
G_masterip = ""
|
G_masterip = ""
|
||||||
|
|
||||||
|
# only use on worker
|
||||||
laststopcpuval = {}
|
laststopcpuval = {}
|
||||||
laststopruntime = {}
|
laststopruntime = {}
|
||||||
lastbillingtime = {}
|
lastbillingtime = {}
|
||||||
increment = {}
|
# increment has keys: lastcputime,memincrement.
|
||||||
|
# record the cpu val at last billing time and accumulate the memory usages during this billing hour.
|
||||||
|
increment = {}
|
||||||
|
|
||||||
|
# send http request to master
|
||||||
def request_master(url,data):
|
def request_master(url,data):
|
||||||
|
global G_masterip
|
||||||
header = {'Content-Type':'application/x-www-form-urlencoded'}
|
header = {'Content-Type':'application/x-www-form-urlencoded'}
|
||||||
http = Http()
|
http = Http()
|
||||||
[resp,content] = http.request("http://"+G_masterip+url,"POST",urlencode(data),headers = header)
|
[resp,content] = http.request("http://"+G_masterip+url,"POST",urlencode(data),headers = header)
|
||||||
logger.info("response from master:"+content.decode('utf-8'))
|
logger.info("response from master:"+content.decode('utf-8'))
|
||||||
|
|
||||||
|
# The class is to collect data of containers on each worker
|
||||||
class Container_Collector(threading.Thread):
|
class Container_Collector(threading.Thread):
|
||||||
|
|
||||||
def __init__(self,test=False):
|
def __init__(self,test=False):
|
||||||
|
@ -41,15 +97,15 @@ class Container_Collector(threading.Thread):
|
||||||
global workercinfo
|
global workercinfo
|
||||||
threading.Thread.__init__(self)
|
threading.Thread.__init__(self)
|
||||||
self.thread_stop = False
|
self.thread_stop = False
|
||||||
self.interval = 2
|
self.interval = 2
|
||||||
self.billingtime = 3600
|
self.billingtime = 3600 # billing interval
|
||||||
self.test = test
|
self.test = test
|
||||||
self.cpu_last = {}
|
self.cpu_last = {}
|
||||||
self.cpu_quota = {}
|
self.cpu_quota = {}
|
||||||
self.mem_quota = {}
|
self.mem_quota = {}
|
||||||
self.cores_num = int(subprocess.getoutput("grep processor /proc/cpuinfo | wc -l"))
|
self.cores_num = int(subprocess.getoutput("grep processor /proc/cpuinfo | wc -l"))
|
||||||
containers = self.list_container()
|
containers = self.list_container()
|
||||||
for container in containers:
|
for container in containers: # recovery
|
||||||
if not container == '':
|
if not container == '':
|
||||||
try:
|
try:
|
||||||
vnode = VNode.query.get(container)
|
vnode = VNode.query.get(container)
|
||||||
|
@ -63,13 +119,15 @@ class Container_Collector(threading.Thread):
|
||||||
laststopcpuval[container] = 0
|
laststopcpuval[container] = 0
|
||||||
laststopruntime[container] = 0
|
laststopruntime[container] = 0
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# list containers on this worker
|
||||||
def list_container(self):
|
def list_container(self):
|
||||||
output = subprocess.check_output(["sudo lxc-ls"],shell=True)
|
output = subprocess.check_output(["sudo lxc-ls"],shell=True)
|
||||||
output = output.decode('utf-8')
|
output = output.decode('utf-8')
|
||||||
containers = re.split('\s+',output)
|
containers = re.split('\s+',output)
|
||||||
return containers
|
return containers
|
||||||
|
|
||||||
|
# get running time of a process, return seconds
|
||||||
def get_proc_etime(self,pid):
|
def get_proc_etime(self,pid):
|
||||||
fmt = subprocess.getoutput("ps -A -opid,etime | grep '^ *%d ' | awk '{print $NF}'" % pid).strip()
|
fmt = subprocess.getoutput("ps -A -opid,etime | grep '^ *%d ' | awk '{print $NF}'" % pid).strip()
|
||||||
if fmt == '':
|
if fmt == '':
|
||||||
|
@ -84,6 +142,8 @@ class Container_Collector(threading.Thread):
|
||||||
seconds = int(parts[1])
|
seconds = int(parts[1])
|
||||||
return ((days * 24 + hours) * 60 + minutes) * 60 + seconds
|
return ((days * 24 + hours) * 60 + minutes) * 60 + seconds
|
||||||
|
|
||||||
|
# compute the billing val this running hour
|
||||||
|
# if isreal is True, it will also make users' beans decrease to pay for the bill.
|
||||||
@classmethod
|
@classmethod
|
||||||
def billing_increment(cls,vnode_name,isreal=True):
|
def billing_increment(cls,vnode_name,isreal=True):
|
||||||
global increment
|
global increment
|
||||||
|
|
Loading…
Reference in New Issue