From 1a866b9c8bd9d565fda8ecde39e0d0fe553e228c Mon Sep 17 00:00:00 2001 From: zhuyj17 Date: Sun, 14 Oct 2018 20:38:50 +0800 Subject: [PATCH 1/3] Update container collector to monitor container from batch job. --- src/master/testTaskCtrler.py | 6 +++--- src/worker/monitor.py | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/master/testTaskCtrler.py b/src/master/testTaskCtrler.py index 32f425c..7574c6f 100644 --- a/src/master/testTaskCtrler.py +++ b/src/master/testTaskCtrler.py @@ -10,13 +10,13 @@ def run(): channel = grpc.insecure_channel('localhost:50051') stub = rpc_pb2_grpc.WorkerStub(channel) - comm = rpc_pb2.Command(commandLine="echo \"stestsfdsf\\ntewtgsdgfdsgret\newarsafsda\" > /root/oss/test-for-docklet/test.txt;ls /root/oss/test-for-docklet", packagePath="/root", envVars={'test1':'10','test2':'20'}) # | awk '{print \"test\\\"\\n\"}' - paras = rpc_pb2.Parameters(command=comm, stderrRedirectPath="/root/nfs/", stdoutRedirectPath="/root/oss/test-for-docklet") + comm = rpc_pb2.Command(commandLine="echo \"stestsfdsf\\ntewtgsdgfdsgret\newarsafsda\" > /root/test.txt;ls /root", packagePath="/root", envVars={'test1':'10','test2':'20'}) # | awk '{print \"test\\\"\\n\"}' + paras = rpc_pb2.Parameters(command=comm, stderrRedirectPath="/root/nfs/", stdoutRedirectPath="") img = rpc_pb2.Image(name="base", type=rpc_pb2.Image.BASE, owner="docklet") inst = rpc_pb2.Instance(cpu=2, memory=2000, disk=500, gpu=0) mnt = rpc_pb2.Mount(localPath="",provider='aliyun',remotePath="test-for-docklet",other="oss-cn-beijing.aliyuncs.com",accessKey="LTAIdl7gmmIhfqA9",secretKey="") - clu = rpc_pb2.Cluster(image=img, instance=inst, mount=[mnt]) + clu = rpc_pb2.Cluster(image=img, instance=inst, mount=[]) task = rpc_pb2.TaskInfo(id="test",username="root",instanceid=1,instanceCount=1,maxRetryCount=1,parameters=paras,cluster=clu,timeout=5,token="test") diff --git a/src/worker/monitor.py b/src/worker/monitor.py index 23f2892..a69c905 100755 --- a/src/worker/monitor.py +++ b/src/worker/monitor.py @@ -262,6 +262,7 @@ class Container_Collector(threading.Thread): global pid2name global laststopcpuval global laststopruntime + is_batch = container_name.split('-')[1] == 'batch' # collect basic information, such as running time,state,pid,ip,name container = lxc.Container(container_name) basic_info = {} @@ -286,7 +287,8 @@ class Container_Collector(threading.Thread): containerpids.append(container_pid_str) pid2name[container_pid_str] = container_name running_time = self.get_proc_etime(container.init_pid) - running_time += laststopruntime[container_name] + if not is_batch: + running_time += laststopruntime[container_name] basic_info['PID'] = container_pid_str basic_info['IP'] = container.get_ips()[0] basic_info['RunningTime'] = running_time @@ -326,7 +328,8 @@ class Container_Collector(threading.Thread): cpu_use = {} lastval = 0 try: - lastval = laststopcpuval[container_name] + if not is_batch: + lastval = laststopcpuval[container_name] except: logger.warning(traceback.format_exc()) cpu_val += lastval @@ -369,7 +372,7 @@ class Container_Collector(threading.Thread): # deal with network used data containerids = re.split("-",container_name) - if len(containerids) >= 3: + if not is_batch and len(containerids) >= 3: workercinfo[container_name]['net_stats'] = self.net_stats[containerids[1] + '-' + containerids[2]] #logger.info(workercinfo[container_name]['net_stats']) @@ -378,7 +381,7 @@ class Container_Collector(threading.Thread): lasttime = lastbillingtime[container_name] #logger.info(lasttime) # process real billing if running time reach an hour - if not int(running_time/self.billingtime) == lasttime: + if not is_batch and not int(running_time/self.billingtime) == lasttime: #logger.info("billing:"+str(float(cpu_val))) lastbillingtime[container_name] = int(running_time/self.billingtime) self.billing_increment(container_name) From c1ba31a4b6bc4d7047d4c4aeed35d2650be8350c Mon Sep 17 00:00:00 2001 From: zhuyj17 Date: Sun, 14 Oct 2018 21:34:46 +0800 Subject: [PATCH 2/3] Aggregate api of monitor at the backend --- src/master/httprest.py | 4 +++- src/master/monitor.py | 9 +++++++++ web/static/js/plot_monitor.js | 31 +++++++++++++------------------ web/templates/monitor/status.html | 19 +++++++------------ 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/src/master/httprest.py b/src/master/httprest.py index db31931..dd1991a 100755 --- a/src/master/httprest.py +++ b/src/master/httprest.py @@ -579,7 +579,9 @@ def vnodes_monitor(user, beans, form, con_id, issue): logger.info("handle request: monitor/vnodes") res = {} fetcher = monitor.Container_Fetcher(con_id) - if issue == 'cpu_use': + if issue == 'info': + res = fetcher.get_info() + elif issue == 'cpu_use': res['cpu_use'] = fetcher.get_cpu_use() elif issue == 'mem_use': res['mem_use'] = fetcher.get_mem_use() diff --git a/src/master/monitor.py b/src/master/monitor.py index 219d2d4..34097e4 100644 --- a/src/master/monitor.py +++ b/src/master/monitor.py @@ -111,6 +111,15 @@ class Container_Fetcher: self.con_id = container_name return + def get_info(self): + res = {} + res['cpu_use'] = self.get_cpu_use() + res['mem_use'] = self.get_mem_use() + res['disk_use'] = self.get_disk_use() + res['net_stats'] = self.get_net_stats() + res['basic_info'] = self.get_basic_info() + return res + def get_cpu_use(self): global monitor_vnodes try: diff --git a/web/static/js/plot_monitor.js b/web/static/js/plot_monitor.js index 6c87b63..d178006 100755 --- a/web/static/js/plot_monitor.js +++ b/web/static/js/plot_monitor.js @@ -194,19 +194,6 @@ var node_name = $("#node_name").html(); var masterip = $("#masterip").html(); var url = "http://" + host + "/monitor/" + masterip + "/vnodes/" + node_name; -function processDiskData() -{ - $.post(url+"/disk_use/",{},function(data){ - var diskuse = data.monitor.disk_use; - var usedp = diskuse.percent; - var total = diskuse.total/1024.0/1024.0; - var used = diskuse.used/1024.0/1024.0; - var detail = "("+used.toFixed(2)+"MiB/"+total.toFixed(2)+"MiB)"; - $("#con_disk").html(usedp+"%
"+detail); - },"json"); -} -setInterval(processDiskData,1000); - function num2human(data) { units=['','K','M','G','T']; @@ -222,9 +209,9 @@ function num2human(data) return tempdata.toFixed(2) + units[4]; } -function processBasicInfo() +function processInfo() { - $.post(url+"/basic_info/",{},function(data){ + $.post(url+"/info/",{},function(data){ basic_info = data.monitor.basic_info; state = basic_info.State; if(state == 'STOPPED') @@ -246,8 +233,16 @@ function processBasicInfo() $("#con_time").html(hour+"h "+min+"m "+secs+"s"); $("#con_billing").html(""+basic_info.billing+" "); $("#con_billingthishour").html(""+basic_info.billing_this_hour.total+" "); - },"json"); - $.post(url+"/net_stats/",{},function(data){ + + //processDiskData + var diskuse = data.monitor.disk_use; + var usedp = diskuse.percent; + var total = diskuse.total/1024.0/1024.0; + var used = diskuse.used/1024.0/1024.0; + var detail = "("+used.toFixed(2)+"MiB/"+total.toFixed(2)+"MiB)"; + $("#con_disk").html(usedp+"%
"+detail); + + //processNetStats var net_stats = data.monitor.net_stats; var in_rate = parseInt(net_stats.bytes_recv_per_sec); var out_rate = parseInt(net_stats.bytes_sent_per_sec); @@ -280,7 +275,7 @@ function plot_net(host,monitorurl) },"json"); } -setInterval(processBasicInfo,1000); +setInterval(processInfo,1000); plot_graph($("#mem-chart"),url + "/mem_use/",processMemData,getMemY); plot_graph($("#cpu-chart"),url + "/cpu_use/",processCpuData,getCpuY); plot_net(host, url + "/net_stats/"); diff --git a/web/templates/monitor/status.html b/web/templates/monitor/status.html index 41b72f4..a3391ed 100644 --- a/web/templates/monitor/status.html +++ b/web/templates/monitor/status.html @@ -314,16 +314,15 @@ function num2human(data) function update(url,index) { - $.post(url+"/basic_info/",{},function(data){ + $.post(url+"/info/",{},function(data){ - $.post(url+"/disk_use/",{},function(data){ - var diskuse = data.monitor.disk_use; + var diskuse = data.monitor.disk_use; var usedp = diskuse.percent; - var total = diskuse.total/1024.0/1024.0; - var used = diskuse.used/1024.0/1024.0; - var detail = "("+used.toFixed(2)+"MiB/"+total.toFixed(2)+"MiB)"; + var total = diskuse.total/1024.0/1024.0; + var used = diskuse.used/1024.0/1024.0; + var detail = "("+used.toFixed(2)+"MiB/"+total.toFixed(2)+"MiB)"; $("#"+index+"_disk").html(usedp+"%
"+detail); - },"json"); + var total = parseInt(data.monitor.basic_info.RunningTime); var hour = Math.floor(total / 3600); var min = Math.floor(total % 3600 / 60); @@ -364,7 +363,7 @@ function num2human(data) $("#"+index+"_billing_disk_c").html(data.monitor.basic_info.c_disk) $("#"+index+"_billing_port_d").html(data.monitor.basic_info.d_port) - var state = data.monitor.basic_info.State; + var state = data.monitor.basic_info.State; if(state == 'RUNNING') { var tmp = $("#"+index+"_state"); @@ -387,7 +386,6 @@ function num2human(data) return; } - $.post(url+"/cpu_use/",{},function(data){ var usedp = data.monitor.cpu_use.usedp; var quota = data.monitor.cpu_use.quota.cpu; var quotaout = "("+quota; @@ -396,16 +394,13 @@ function num2human(data) else quotaout += " Cores)"; $("#"+index+"_cpu").html((usedp/0.01).toFixed(2)+"%
"+quotaout); - },"json"); - $.post(url+"/mem_use/",{},function(data){ var usedp = data.monitor.mem_use.usedp; var unit = data.monitor.mem_use.unit; var quota = data.monitor.mem_use.quota.memory/1024.0; var val = data.monitor.mem_use.val; var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)"; $("#"+index+"_mem").html((usedp/0.01).toFixed(2)+"%
"+out); - },"json"); },"json"); } From 6a3e1eace792cd17e07f38e46de1ad05810d75d9 Mon Sep 17 00:00:00 2001 From: zhuyj17 Date: Sun, 14 Oct 2018 22:45:24 +0800 Subject: [PATCH 3/3] Aggregate http request on status realtime pages for monitor information --- web/static/js/plot_monitor.js | 67 +++++++++++++++++------------------ 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/web/static/js/plot_monitor.js b/web/static/js/plot_monitor.js index d178006..fd15e0e 100755 --- a/web/static/js/plot_monitor.js +++ b/web/static/js/plot_monitor.js @@ -8,21 +8,6 @@ var egress_rate_limit = 0; function processMemData(data) { - if(is_running) - { - mem_usedp = data.monitor.mem_use.usedp; - var usedp = data.monitor.mem_use.usedp; - var unit = data.monitor.mem_use.unit; - var quota = data.monitor.mem_use.quota.memory/1024.0; - var val = data.monitor.mem_use.val; - var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)"; - $("#con_mem").html((usedp/0.01).toFixed(2)+"%
"+out); - } - else - { - mem_usedp = 0; - $("#con_mem").html("--"); - } } function getMemY() { @@ -30,24 +15,6 @@ function getMemY() } function processCpuData(data) { - if(is_running) - { - cpu_usedp = data.monitor.cpu_use.usedp; - var val = (data.monitor.cpu_use.val).toFixed(2); - var unit = data.monitor.cpu_use.unit; - var quota = data.monitor.cpu_use.quota.cpu; - var quotaout = "("+quota; - if(quota == 1) - quotaout += " Core)"; - else - quotaout += " Cores)"; - $("#con_cpu").html(val +" "+ unit+"
"+quotaout); - } - else - { - cpu_usedp = 0; - $("#con_cpu").html("--"); - } } function getCpuY() { @@ -234,6 +201,36 @@ function processInfo() $("#con_billing").html(""+basic_info.billing+" "); $("#con_billingthishour").html(""+basic_info.billing_this_hour.total+" "); + if(is_running) + { + cpu_usedp = data.monitor.cpu_use.usedp; + var val = (data.monitor.cpu_use.val).toFixed(2); + var unit = data.monitor.cpu_use.unit; + var quota = data.monitor.cpu_use.quota.cpu; + var quotaout = "("+quota; + if(quota == 1) + quotaout += " Core)"; + else + quotaout += " Cores)"; + $("#con_cpu").html(val +" "+ unit+"
"+quotaout); + + mem_usedp = data.monitor.mem_use.usedp; + var usedp = data.monitor.mem_use.usedp; + unit = data.monitor.mem_use.unit; + var quota = data.monitor.mem_use.quota.memory/1024.0; + val = data.monitor.mem_use.val; + var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)"; + $("#con_mem").html((usedp/0.01).toFixed(2)+"%
"+out); + } + else + { + cpu_usedp = 0; + $("#con_cpu").html("--"); + + mem_usedp = 0; + $("#con_mem").html("--"); + } + //processDiskData var diskuse = data.monitor.disk_use; var usedp = diskuse.percent; @@ -276,6 +273,6 @@ function plot_net(host,monitorurl) } setInterval(processInfo,1000); -plot_graph($("#mem-chart"),url + "/mem_use/",processMemData,getMemY); -plot_graph($("#cpu-chart"),url + "/cpu_use/",processCpuData,getCpuY); +plot_graph($("#mem-chart"),url + "/mem_use/",processMemData,getMemY,false); +plot_graph($("#cpu-chart"),url + "/cpu_use/",processCpuData,getCpuY,false); plot_net(host, url + "/net_stats/");