diff --git a/src/httprest.py b/src/httprest.py index ae18254..66a5eb8 100755 --- a/src/httprest.py +++ b/src/httprest.py @@ -23,7 +23,7 @@ import http.server, cgi, json, sys, shutil from socketserver import ThreadingMixIn import nodemgr, vclustermgr, etcdlib, network, imagemgr import userManager -import monitor +import monitor,traceback import threading import sysmgr @@ -405,15 +405,15 @@ def vnodes_monitor(cur_user, user, form, con_id, issue): global G_clustername logger.info("handle request: monitor/vnodes") res = {} - fetcher = monitor.Container_Fetcher() + fetcher = monitor.Container_Fetcher(con_id) if issue == 'cpu_use': - res['cpu_use'] = fetcher.get_cpu_use(con_id) + res['cpu_use'] = fetcher.get_cpu_use() elif issue == 'mem_use': - res['mem_use'] = fetcher.get_mem_use(con_id) + res['mem_use'] = fetcher.get_mem_use() elif issue == 'disk_use': - res['disk_use'] = fetcher.get_disk_use(con_id) + res['disk_use'] = fetcher.get_disk_use() elif issue == 'basic_info': - res['basic_info'] = fetcher.get_basic_info(con_id) + res['basic_info'] = fetcher.get_basic_info() elif issue == 'owner': names = con_id.split('-') result = G_usermgr.query(username = names[0], cur_user = cur_user) @@ -657,6 +657,7 @@ def resetall_system(cur_user, user, form): @app.errorhandler(500) def internal_server_error(error): logger.debug("An internel server error occured") + logger.error(traceback.format_exc()) return json.dumps({'success':'false', 'message':'500 Internal Server Error', 'Unauthorized': 'True'}) diff --git a/src/monitor.py b/src/monitor.py index b22fcab..b2e1c7d 100755 --- a/src/monitor.py +++ b/src/monitor.py @@ -30,6 +30,20 @@ class Container_Collector(threading.Thread): containers = re.split('\s+',output) return containers + def get_proc_etime(self,pid): + fmt = subprocess.getoutput("ps -A -opid,etime | grep '^ *%d' | awk '{print $NF}'" % pid).strip() + if fmt == '': + return -1 + parts = fmt.split('-') + days = int(parts[0]) if len(parts) == 2 else 0 + fmt = parts[-1] + parts = fmt.split(':') + hours = int(parts[0]) if len(parts) == 3 else 0 + parts = parts[len(parts)-2:] + minutes = int(parts[0]) + seconds = int(parts[1]) + return ((days * 24 + hours) * 60 + minutes) * 60 + seconds + def collect_containerinfo(self,container_name): global workercinfo output = subprocess.check_output("sudo lxc-info -n %s" % (container_name),shell=True) @@ -37,6 +51,9 @@ class Container_Collector(threading.Thread): parts = re.split('\n',output) info = {} basic_info = {} + basic_exist = 'basic_info' in workercinfo[container_name].keys() + if basic_exist: + basic_info = workercinfo[container_name]['basic_info'] for part in parts: if not part == '': key_val = re.split(':',part) @@ -45,11 +62,24 @@ class Container_Collector(threading.Thread): info[key] = val.lstrip() basic_info['Name'] = info['Name'] basic_info['State'] = info['State'] + #if basic_exist: + # logger.info(workercinfo[container_name]['basic_info']) if(info['State'] == 'STOPPED'): workercinfo[container_name]['basic_info'] = basic_info + logger.info(basic_info) return False + running_time = self.get_proc_etime(int(info['PID'])) + if basic_exist and 'PID' in workercinfo[container_name]['basic_info'].keys(): + last_time = workercinfo[container_name]['basic_info']['LastTime'] + if not info['PID'] == workercinfo[container_name]['basic_info']['PID']: + last_time = workercinfo[container_name]['basic_info']['RunningTime'] + else: + last_time = 0 + basic_info['LastTime'] = last_time + running_time += last_time basic_info['PID'] = info['PID'] basic_info['IP'] = info['IP'] + basic_info['RunningTime'] = running_time workercinfo[container_name]['basic_info'] = basic_info cpu_parts = re.split(' +',info['CPU use']) @@ -256,6 +286,10 @@ def workerFetchInfo(): global workercinfo return str([workerinfo, workercinfo]) +def get_owner(container_name): + names = container_name.split('-') + return names[0] + class Master_Collector(threading.Thread): def __init__(self,nodemgr): @@ -274,12 +308,14 @@ class Master_Collector(threading.Thread): for worker in workers: try: ip = self.nodemgr.rpc_to_ip(worker) - #[info,cinfo] = worker.workerFetchInfo() info = list(eval(worker.workerFetchInfo())) - logger.info(info[1]) + #logger.info(info[1]) monitor_hosts[ip] = info[0] for container in info[1].keys(): - monitor_vnodes[container] = info[1][container] + owner = get_owner(container) + if not owner in monitor_vnodes.keys(): + monitor_vnodes[owner] = {} + monitor_vnodes[owner][container] = info[1][container] except Exception as err: logger.warning(traceback.format_exc()) logger.warning(err) @@ -291,45 +327,47 @@ class Master_Collector(threading.Thread): return class Container_Fetcher: - def __init__(self): + def __init__(self,container_name): + self.owner = get_owner(container_name) + self.con_id = container_name return - def get_cpu_use(self,container_name): + def get_cpu_use(self): global monitor_vnodes try: - res = monitor_vnodes[container_name]['cpu_use'] - res['quota'] = monitor_vnodes[container_name]['quota'] + res = monitor_vnodes[self.owner][self.con_id]['cpu_use'] + res['quota'] = monitor_vnodes[self.owner][self.con_id]['quota'] except Exception as err: logger.warning(traceback.format_exc()) logger.warning(err) res = {} return res - def get_mem_use(self,container_name): + def get_mem_use(self): global monitor_vnodes try: - res = monitor_vnodes[container_name]['mem_use'] - res['quota'] = monitor_vnodes[container_name]['quota'] + res = monitor_vnodes[self.owner][self.con_id]['mem_use'] + res['quota'] = monitor_vnodes[self.owner][self.con_id]['quota'] except Exception as err: logger.warning(traceback.format_exc()) logger.warning(err) res = {} return res - def get_disk_use(self,container_name): + def get_disk_use(self): global monitor_vnodes try: - res = monitor_vnodes[container_name]['disk_use'] + res = monitor_vnodes[self.owner][self.con_id]['disk_use'] except Exception as err: logger.warning(traceback.format_exc()) logger.warning(err) res = {} return res - def get_basic_info(self,container_name): + def get_basic_info(self): global monitor_vnodes try: - res = monitor_vnodes[container_name]['basic_info'] + res = monitor_vnodes[self.owner][self.con_id]['basic_info'] except Exception as err: logger.warning(traceback.format_exc()) logger.warning(err) diff --git a/web/static/js/plot_monitor.js b/web/static/js/plot_monitor.js index 53fcc3b..d72f10a 100755 --- a/web/static/js/plot_monitor.js +++ b/web/static/js/plot_monitor.js @@ -1,15 +1,24 @@ var mem_usedp = 0; var cpu_usedp = 0; +var is_running = true; function processMemData(data) { - mem_usedp = data.monitor.mem_use.usedp; - var usedp = data.monitor.mem_use.usedp; - var unit = data.monitor.mem_use.unit; - var quota = data.monitor.mem_use.quota.memory/1024.0; - var val = data.monitor.mem_use.val; - var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)"; - $("#con_mem").html((usedp/0.01).toFixed(2)+"%
"+out); + if(is_running) + { + mem_usedp = data.monitor.mem_use.usedp; + var usedp = data.monitor.mem_use.usedp; + var unit = data.monitor.mem_use.unit; + var quota = data.monitor.mem_use.quota.memory/1024.0; + var val = data.monitor.mem_use.val; + var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)"; + $("#con_mem").html((usedp/0.01).toFixed(2)+"%
"+out); + } + else + { + mem_usedp = 0; + $("#con_mem").html("--"); + } } function getMemY() { @@ -17,16 +26,24 @@ function getMemY() } function processCpuData(data) { - cpu_usedp = data.monitor.cpu_use.usedp; - var val = data.monitor.cpu_use.val; - var unit = data.monitor.cpu_use.unit; - var quota = data.monitor.cpu_use.quota.cpu; - var quotaout = "("+quota; - if(quota == 1) - quotaout += " Core)"; + if(is_running) + { + cpu_usedp = data.monitor.cpu_use.usedp; + var val = data.monitor.cpu_use.val; + var unit = data.monitor.cpu_use.unit; + var quota = data.monitor.cpu_use.quota.cpu; + var quotaout = "("+quota; + if(quota == 1) + quotaout += " Core)"; + else + quotaout += " Cores)"; + $("#con_cpu").html(val +" "+ unit+"
"+quotaout); + } else - quotaout += " Cores)"; - $("#con_cpu").html(val +" "+ unit+"
"+quotaout); + { + cpu_usedp = 0; + $("#con_cpu").html("--"); + } } function getCpuY() { @@ -173,3 +190,25 @@ function processDiskData() },"json"); } setInterval(processDiskData,1000); + +function processBasicInfo() +{ + $.post(url+"/basic_info/",{},function(data){ + basic_info = data.monitor.basic_info; + state = basic_info.State; + if(state == 'STOPPED') + { + is_running = false; + $("#con_state").html("
Stopped
"); + $("#con_ip").html("--"); + } + else + { + is_running = true; + $("#con_state").html("
Running
"); + $("#con_ip").html(basic_info.IP); + } + $("#con_time").html(basic_info.RunningTime+"s"); + },"json"); +} +setInterval(processBasicInfo,1000); diff --git a/web/templates/monitor/status.html b/web/templates/monitor/status.html index e8c1dac..f674ce6 100644 --- a/web/templates/monitor/status.html +++ b/web/templates/monitor/status.html @@ -82,6 +82,7 @@ Node Name IP Address Status + Running Time Cpu Usage Mem Usage Disk Usage @@ -100,6 +101,7 @@ {% else %}
Running
{% endif %} + -- -- -- -- @@ -124,7 +126,7 @@ $.post(url+"/basic_info/",{},function(data){ var state = data.monitor.basic_info.State; - if(state == 'RUNNING') + if(state == 'RUNNING') { var tmp = $("#"+index+"_state"); tmp.removeClass(); @@ -145,6 +147,7 @@ $("#"+index+"_mem").html('--'); return; } + $("#"+index+"_time").html(data.monitor.basic_info.RunningTime+"s") $.post(url+"/cpu_use/",{},function(data){ var usedp = data.monitor.cpu_use.usedp; diff --git a/web/templates/monitor/statusRealtime.html b/web/templates/monitor/statusRealtime.html index 290c93a..a847753 100644 --- a/web/templates/monitor/statusRealtime.html +++ b/web/templates/monitor/statusRealtime.html @@ -41,6 +41,7 @@ State IP Address + Running Time CPU Usage Mem Usage Disk Usage @@ -49,12 +50,13 @@ {% if container['State'] == 'STOPPED' %} -
Stopped
+
Stopped
-- {% else %} -
Running
+
Running
{{ container['IP'] }} {% endif %} + {{ container['RunningTime'] }}s -- -- --