Add annotations.

This commit is contained in:
zhuyj17 2016-11-12 17:52:09 +08:00
parent 3bbba35145
commit 1cc6c4e8e9
1 changed files with 71 additions and 11 deletions

View File

@ -1,5 +1,22 @@
#!/usr/bin/python3 #!/usr/bin/python3
'''
Monitor for Docklet
Description:Monitor system for docklet will collect data on resources usages and status of vnode
and phyiscal machines. And master can fetch these data and then show them on the web page.
Besides, Monitor will also bill the vnodes according to their resources usage amount.
Design:Monitor mainly consists of three parts: Collectors, Master_Collector and Fetchers.
1.Collectors will collect data every two seconds on each worker. And 'Container_Collector' will
collect data of containers(vnodes), while 'Collector' will collect data of physical machines.
2.'Master_Collector' only runs on Master. It fetches the data on workers every two seconds by rpc
and stores them in the memory of Master.
3.Fetchers are classes that Master will use them to fetch specific data in the memory and then show
them on the web. 'Container_Fetcher' is the class to fetch the containers data in 'monitor_vnodes',
while 'Fetcher' is the class to fetch the data of physical machines in 'monitor_hosts'.
'''
import subprocess,re,os,etcdlib,psutil,math,sys import subprocess,re,os,etcdlib,psutil,math,sys
import time,threading,json,traceback,platform import time,threading,json,traceback,platform
import env import env
@ -10,30 +27,69 @@ from log import logger
from httplib2 import Http from httplib2 import Http
from urllib.parse import urlencode from urllib.parse import urlencode
a_cpu = 500 # billing parameters
b_mem = 1000000 a_cpu = 500 # seconds
c_disk = 4000 b_mem = 1000000 # MB
c_disk = 4000 # MB
monitor_hosts = {} # major dict to store the monitoring data
# only use on Master
# monitor_hosts: use workers' ip addresses as first key.
# second key: cpuinfo,diskinfo,meminfo,osinfo,cpuconfig,running,containers,containerslist
# 1.cpuinfo stores the cpu usages data, and it has keys: user,system,idle,iowait
# 2.diskinfo stores the disks usages data, and it has keys: device,mountpoint,total,used,free,percent
# 3.meminfo stores the memory usages data, and it has keys: total,used,free,buffers,cached,percent
# 4.osinfo stores the information of operating system,
# and it has keys: platform,system,node,release,version,machine,processor
# 5.cpuconfig stores the information of processors, and it is a list, each element of list is a dict
# which stores the information of a processor, each element has key: processor,model name,
# core id, cpu MHz, cache size, physical id.
# 6.running indicates the status of worker,and it has two values: True, False.
# 7.containers store the amount of containers on the worker.
# 8.containers store a list which consists of the names of containers on the worker.
moitor_hosts = {}
# monitor_vnodes: use the names of vnodes(containers) as first key.
# second key: cpu_use,mem_use,disk_use,basic_info,quota
# 1.cpu_use has keys: val,unit,hostpercent
# 2.mem_use has keys: val,unit,usedp
# 3.disk_use has keys: device,mountpoint,total,used,free,percent
# 4.basic_info has keys: Name,State,PID,IP,RunningTime,billing,billing_this_hour
# 5.quota has keys: cpu,memeory
monitor_vnodes = {} monitor_vnodes = {}
# major dict to store the monitoring data on Worker
# only use on Worker
# workerinfo: only store the data collected on current Worker,
# has the first keys same as the second keys in monitor_hosts.
workerinfo = {} workerinfo = {}
# workercinfo: only store the data collected on current Worker,
# has the first keys same as the second keys in monitor_vnodes.
workercinfo = {} workercinfo = {}
# only use on worker
containerpids = [] containerpids = []
pid2name = {} pid2name = {}
G_masterip = "" G_masterip = ""
# only use on worker
laststopcpuval = {} laststopcpuval = {}
laststopruntime = {} laststopruntime = {}
lastbillingtime = {} lastbillingtime = {}
increment = {} # increment has keys: lastcputime,memincrement.
# record the cpu val at last billing time and accumulate the memory usages during this billing hour.
increment = {}
# send http request to master
def request_master(url,data): def request_master(url,data):
global G_masterip
header = {'Content-Type':'application/x-www-form-urlencoded'} header = {'Content-Type':'application/x-www-form-urlencoded'}
http = Http() http = Http()
[resp,content] = http.request("http://"+G_masterip+url,"POST",urlencode(data),headers = header) [resp,content] = http.request("http://"+G_masterip+url,"POST",urlencode(data),headers = header)
logger.info("response from master:"+content.decode('utf-8')) logger.info("response from master:"+content.decode('utf-8'))
# The class is to collect data of containers on each worker
class Container_Collector(threading.Thread): class Container_Collector(threading.Thread):
def __init__(self,test=False): def __init__(self,test=False):
@ -41,15 +97,15 @@ class Container_Collector(threading.Thread):
global workercinfo global workercinfo
threading.Thread.__init__(self) threading.Thread.__init__(self)
self.thread_stop = False self.thread_stop = False
self.interval = 2 self.interval = 2
self.billingtime = 3600 self.billingtime = 3600 # billing interval
self.test = test self.test = test
self.cpu_last = {} self.cpu_last = {}
self.cpu_quota = {} self.cpu_quota = {}
self.mem_quota = {} self.mem_quota = {}
self.cores_num = int(subprocess.getoutput("grep processor /proc/cpuinfo | wc -l")) self.cores_num = int(subprocess.getoutput("grep processor /proc/cpuinfo | wc -l"))
containers = self.list_container() containers = self.list_container()
for container in containers: for container in containers: # recovery
if not container == '': if not container == '':
try: try:
vnode = VNode.query.get(container) vnode = VNode.query.get(container)
@ -63,13 +119,15 @@ class Container_Collector(threading.Thread):
laststopcpuval[container] = 0 laststopcpuval[container] = 0
laststopruntime[container] = 0 laststopruntime[container] = 0
return return
# list containers on this worker
def list_container(self): def list_container(self):
output = subprocess.check_output(["sudo lxc-ls"],shell=True) output = subprocess.check_output(["sudo lxc-ls"],shell=True)
output = output.decode('utf-8') output = output.decode('utf-8')
containers = re.split('\s+',output) containers = re.split('\s+',output)
return containers return containers
# get running time of a process, return seconds
def get_proc_etime(self,pid): def get_proc_etime(self,pid):
fmt = subprocess.getoutput("ps -A -opid,etime | grep '^ *%d ' | awk '{print $NF}'" % pid).strip() fmt = subprocess.getoutput("ps -A -opid,etime | grep '^ *%d ' | awk '{print $NF}'" % pid).strip()
if fmt == '': if fmt == '':
@ -84,6 +142,8 @@ class Container_Collector(threading.Thread):
seconds = int(parts[1]) seconds = int(parts[1])
return ((days * 24 + hours) * 60 + minutes) * 60 + seconds return ((days * 24 + hours) * 60 + minutes) * 60 + seconds
# compute the billing val this running hour
# if isreal is True, it will also make users' beans decrease to pay for the bill.
@classmethod @classmethod
def billing_increment(cls,vnode_name,isreal=True): def billing_increment(cls,vnode_name,isreal=True):
global increment global increment