Add annotations.

2016-11-12 17:52:09 +08:00 · 2016-11-12 17:52:09 +08:00 · 1cc6c4e8e9
parent 3bbba35145
commit 1cc6c4e8e9
1 changed files with 71 additions and 11 deletions
--- a/src/monitor.py
+++ b/src/monitor.py
@ -1,5 +1,22 @@
 #!/usr/bin/python3
 '''
 Monitor for Docklet
 Description:Monitor system for docklet will collect data on resources usages and status of vnode 
            and phyiscal machines. And master can fetch these data and then show them on the web page.
            Besides, Monitor will also bill the vnodes according to their resources usage amount.
 Design:Monitor mainly consists of three parts: Collectors, Master_Collector and Fetchers.
       1.Collectors will collect data every two seconds on each worker. And 'Container_Collector' will 
       collect data of containers(vnodes), while 'Collector' will collect data of physical machines.
       2.'Master_Collector' only runs on Master. It fetches the data on workers every two seconds by rpc 
       and stores them in the memory of Master.
       3.Fetchers are classes that Master will use them to fetch specific data in the memory and then show 
       them on the web. 'Container_Fetcher' is the class to fetch the containers data in 'monitor_vnodes', 
       while 'Fetcher' is the class to fetch the data of physical machines in 'monitor_hosts'.
 '''
 import subprocess,re,os,etcdlib,psutil,math,sys
 import time,threading,json,traceback,platform
 import env
@ -10,30 +27,69 @@ from log import logger
 from httplib2 import Http
 from urllib.parse import urlencode
-a_cpu = 500
+# billing parameters
-b_mem = 1000000
+a_cpu = 500         # seconds
-c_disk = 4000
+b_mem = 1000000     # MB
 c_disk = 4000       # MB
-monitor_hosts = {}
+# major dict to store the monitoring data
 # only use on Master
 # monitor_hosts: use workers' ip addresses as first key.
 # second key: cpuinfo,diskinfo,meminfo,osinfo,cpuconfig,running,containers,containerslist
 # 1.cpuinfo stores the cpu usages data, and it has keys: user,system,idle,iowait
 # 2.diskinfo stores the disks usages data, and it has keys: device,mountpoint,total,used,free,percent
 # 3.meminfo stores the memory usages data, and it has keys: total,used,free,buffers,cached,percent
 # 4.osinfo stores the information of operating system, 
 # and it has keys: platform,system,node,release,version,machine,processor              
 # 5.cpuconfig stores the information of processors, and it is a list, each element of list is a dict
 # which stores the information of a processor, each element has key: processor,model name,
 # core id, cpu MHz, cache size, physical id.
 # 6.running indicates the status of worker,and it has two values: True, False.
 # 7.containers store the amount of containers on the worker.
 # 8.containers store a list which consists of the names of containers on the worker.
 moitor_hosts = {}
 # monitor_vnodes: use the names of vnodes(containers) as first key.
 # second key: cpu_use,mem_use,disk_use,basic_info,quota
 # 1.cpu_use has keys: val,unit,hostpercent
 # 2.mem_use has keys: val,unit,usedp
 # 3.disk_use has keys: device,mountpoint,total,used,free,percent
 # 4.basic_info has keys: Name,State,PID,IP,RunningTime,billing,billing_this_hour
 # 5.quota has keys: cpu,memeory
 monitor_vnodes = {}
 # major dict to store the monitoring data on Worker
 # only use on Worker
 # workerinfo: only store the data collected on current Worker, 
 # has the first keys same as the second keys in monitor_hosts.
 workerinfo = {}
 # workercinfo: only store the data collected on current Worker,
 # has the first keys same as the second keys in monitor_vnodes.
 workercinfo = {}
 # only use on worker
 containerpids = []
 pid2name = {}
 G_masterip = ""
 # only use on worker
 laststopcpuval = {}
 laststopruntime = {}
 lastbillingtime = {}        
 # increment has keys: lastcputime,memincrement. 
 # record the cpu val at last billing time and accumulate the memory usages during this billing hour.    
 increment = {}  
 # send http request to master
 def request_master(url,data):
    global G_masterip
    header = {'Content-Type':'application/x-www-form-urlencoded'}
    http = Http()
    [resp,content] = http.request("http://"+G_masterip+url,"POST",urlencode(data),headers = header)
    logger.info("response from master:"+content.decode('utf-8'))  
 # The class is to collect data of containers on each worker
 class Container_Collector(threading.Thread):
    def __init__(self,test=False):
@ -42,14 +98,14 @@ class Container_Collector(threading.Thread):
        threading.Thread.__init__(self)
        self.thread_stop = False
        self.interval = 2       
-        self.billingtime = 3600
+        self.billingtime = 3600     # billing interval
        self.test = test
        self.cpu_last = {}
        self.cpu_quota = {}
        self.mem_quota = {}
        self.cores_num = int(subprocess.getoutput("grep processor /proc/cpuinfo | wc -l"))
        containers = self.list_container()
-        for container in containers:
+        for container in containers:    # recovery
            if not container == '':
                try:
                    vnode = VNode.query.get(container)
@ -64,12 +120,14 @@ class Container_Collector(threading.Thread):
                    laststopruntime[container] = 0
        return
    # list containers on this worker
    def list_container(self):
        output = subprocess.check_output(["sudo lxc-ls"],shell=True)
        output = output.decode('utf-8')
        containers = re.split('\s+',output)
        return containers
    # get running time of a process, return seconds
    def get_proc_etime(self,pid):
        fmt = subprocess.getoutput("ps -A -opid,etime | grep '^ *%d ' | awk '{print $NF}'" % pid).strip()
        if fmt == '':
@ -84,6 +142,8 @@ class Container_Collector(threading.Thread):
        seconds = int(parts[1])
        return ((days * 24 + hours) * 60 + minutes) * 60 + seconds
    # compute the billing val this running hour
    # if isreal is True, it will also make users' beans decrease to pay for the bill.
    @classmethod
    def billing_increment(cls,vnode_name,isreal=True):
        global increment