Merge pull request #342 from FirmlyReality/batch

Update container collector to monitor container from batch job.
This commit is contained in:
Yujian Zhu 2018-10-14 22:49:28 +08:00 committed by GitHub
commit 941c70d4b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 74 additions and 73 deletions

View File

@ -579,7 +579,9 @@ def vnodes_monitor(user, beans, form, con_id, issue):
logger.info("handle request: monitor/vnodes") logger.info("handle request: monitor/vnodes")
res = {} res = {}
fetcher = monitor.Container_Fetcher(con_id) fetcher = monitor.Container_Fetcher(con_id)
if issue == 'cpu_use': if issue == 'info':
res = fetcher.get_info()
elif issue == 'cpu_use':
res['cpu_use'] = fetcher.get_cpu_use() res['cpu_use'] = fetcher.get_cpu_use()
elif issue == 'mem_use': elif issue == 'mem_use':
res['mem_use'] = fetcher.get_mem_use() res['mem_use'] = fetcher.get_mem_use()

View File

@ -111,6 +111,15 @@ class Container_Fetcher:
self.con_id = container_name self.con_id = container_name
return return
def get_info(self):
res = {}
res['cpu_use'] = self.get_cpu_use()
res['mem_use'] = self.get_mem_use()
res['disk_use'] = self.get_disk_use()
res['net_stats'] = self.get_net_stats()
res['basic_info'] = self.get_basic_info()
return res
def get_cpu_use(self): def get_cpu_use(self):
global monitor_vnodes global monitor_vnodes
try: try:

View File

@ -10,13 +10,13 @@ def run():
channel = grpc.insecure_channel('localhost:50051') channel = grpc.insecure_channel('localhost:50051')
stub = rpc_pb2_grpc.WorkerStub(channel) stub = rpc_pb2_grpc.WorkerStub(channel)
comm = rpc_pb2.Command(commandLine="echo \"stestsfdsf\\ntewtgsdgfdsgret\newarsafsda\" > /root/oss/test-for-docklet/test.txt;ls /root/oss/test-for-docklet", packagePath="/root", envVars={'test1':'10','test2':'20'}) # | awk '{print \"test\\\"\\n\"}' comm = rpc_pb2.Command(commandLine="echo \"stestsfdsf\\ntewtgsdgfdsgret\newarsafsda\" > /root/test.txt;ls /root", packagePath="/root", envVars={'test1':'10','test2':'20'}) # | awk '{print \"test\\\"\\n\"}'
paras = rpc_pb2.Parameters(command=comm, stderrRedirectPath="/root/nfs/", stdoutRedirectPath="/root/oss/test-for-docklet") paras = rpc_pb2.Parameters(command=comm, stderrRedirectPath="/root/nfs/", stdoutRedirectPath="")
img = rpc_pb2.Image(name="base", type=rpc_pb2.Image.BASE, owner="docklet") img = rpc_pb2.Image(name="base", type=rpc_pb2.Image.BASE, owner="docklet")
inst = rpc_pb2.Instance(cpu=2, memory=2000, disk=500, gpu=0) inst = rpc_pb2.Instance(cpu=2, memory=2000, disk=500, gpu=0)
mnt = rpc_pb2.Mount(localPath="",provider='aliyun',remotePath="test-for-docklet",other="oss-cn-beijing.aliyuncs.com",accessKey="LTAIdl7gmmIhfqA9",secretKey="") mnt = rpc_pb2.Mount(localPath="",provider='aliyun',remotePath="test-for-docklet",other="oss-cn-beijing.aliyuncs.com",accessKey="LTAIdl7gmmIhfqA9",secretKey="")
clu = rpc_pb2.Cluster(image=img, instance=inst, mount=[mnt]) clu = rpc_pb2.Cluster(image=img, instance=inst, mount=[])
task = rpc_pb2.TaskInfo(id="test",username="root",instanceid=1,instanceCount=1,maxRetryCount=1,parameters=paras,cluster=clu,timeout=5,token="test") task = rpc_pb2.TaskInfo(id="test",username="root",instanceid=1,instanceCount=1,maxRetryCount=1,parameters=paras,cluster=clu,timeout=5,token="test")

View File

@ -262,6 +262,7 @@ class Container_Collector(threading.Thread):
global pid2name global pid2name
global laststopcpuval global laststopcpuval
global laststopruntime global laststopruntime
is_batch = container_name.split('-')[1] == 'batch'
# collect basic information, such as running time,state,pid,ip,name # collect basic information, such as running time,state,pid,ip,name
container = lxc.Container(container_name) container = lxc.Container(container_name)
basic_info = {} basic_info = {}
@ -286,7 +287,8 @@ class Container_Collector(threading.Thread):
containerpids.append(container_pid_str) containerpids.append(container_pid_str)
pid2name[container_pid_str] = container_name pid2name[container_pid_str] = container_name
running_time = self.get_proc_etime(container.init_pid) running_time = self.get_proc_etime(container.init_pid)
running_time += laststopruntime[container_name] if not is_batch:
running_time += laststopruntime[container_name]
basic_info['PID'] = container_pid_str basic_info['PID'] = container_pid_str
basic_info['IP'] = container.get_ips()[0] basic_info['IP'] = container.get_ips()[0]
basic_info['RunningTime'] = running_time basic_info['RunningTime'] = running_time
@ -326,7 +328,8 @@ class Container_Collector(threading.Thread):
cpu_use = {} cpu_use = {}
lastval = 0 lastval = 0
try: try:
lastval = laststopcpuval[container_name] if not is_batch:
lastval = laststopcpuval[container_name]
except: except:
logger.warning(traceback.format_exc()) logger.warning(traceback.format_exc())
cpu_val += lastval cpu_val += lastval
@ -369,7 +372,7 @@ class Container_Collector(threading.Thread):
# deal with network used data # deal with network used data
containerids = re.split("-",container_name) containerids = re.split("-",container_name)
if len(containerids) >= 3: if not is_batch and len(containerids) >= 3:
workercinfo[container_name]['net_stats'] = self.net_stats[containerids[1] + '-' + containerids[2]] workercinfo[container_name]['net_stats'] = self.net_stats[containerids[1] + '-' + containerids[2]]
#logger.info(workercinfo[container_name]['net_stats']) #logger.info(workercinfo[container_name]['net_stats'])
@ -378,7 +381,7 @@ class Container_Collector(threading.Thread):
lasttime = lastbillingtime[container_name] lasttime = lastbillingtime[container_name]
#logger.info(lasttime) #logger.info(lasttime)
# process real billing if running time reach an hour # process real billing if running time reach an hour
if not int(running_time/self.billingtime) == lasttime: if not is_batch and not int(running_time/self.billingtime) == lasttime:
#logger.info("billing:"+str(float(cpu_val))) #logger.info("billing:"+str(float(cpu_val)))
lastbillingtime[container_name] = int(running_time/self.billingtime) lastbillingtime[container_name] = int(running_time/self.billingtime)
self.billing_increment(container_name) self.billing_increment(container_name)

View File

@ -8,21 +8,6 @@ var egress_rate_limit = 0;
function processMemData(data) function processMemData(data)
{ {
if(is_running)
{
mem_usedp = data.monitor.mem_use.usedp;
var usedp = data.monitor.mem_use.usedp;
var unit = data.monitor.mem_use.unit;
var quota = data.monitor.mem_use.quota.memory/1024.0;
var val = data.monitor.mem_use.val;
var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)";
$("#con_mem").html((usedp/0.01).toFixed(2)+"%<br/>"+out);
}
else
{
mem_usedp = 0;
$("#con_mem").html("--");
}
} }
function getMemY() function getMemY()
{ {
@ -30,24 +15,6 @@ function getMemY()
} }
function processCpuData(data) function processCpuData(data)
{ {
if(is_running)
{
cpu_usedp = data.monitor.cpu_use.usedp;
var val = (data.monitor.cpu_use.val).toFixed(2);
var unit = data.monitor.cpu_use.unit;
var quota = data.monitor.cpu_use.quota.cpu;
var quotaout = "("+quota;
if(quota == 1)
quotaout += " Core)";
else
quotaout += " Cores)";
$("#con_cpu").html(val +" "+ unit+"<br/>"+quotaout);
}
else
{
cpu_usedp = 0;
$("#con_cpu").html("--");
}
} }
function getCpuY() function getCpuY()
{ {
@ -194,19 +161,6 @@ var node_name = $("#node_name").html();
var masterip = $("#masterip").html(); var masterip = $("#masterip").html();
var url = "http://" + host + "/monitor/" + masterip + "/vnodes/" + node_name; var url = "http://" + host + "/monitor/" + masterip + "/vnodes/" + node_name;
function processDiskData()
{
$.post(url+"/disk_use/",{},function(data){
var diskuse = data.monitor.disk_use;
var usedp = diskuse.percent;
var total = diskuse.total/1024.0/1024.0;
var used = diskuse.used/1024.0/1024.0;
var detail = "("+used.toFixed(2)+"MiB/"+total.toFixed(2)+"MiB)";
$("#con_disk").html(usedp+"%<br/>"+detail);
},"json");
}
setInterval(processDiskData,1000);
function num2human(data) function num2human(data)
{ {
units=['','K','M','G','T']; units=['','K','M','G','T'];
@ -222,9 +176,9 @@ function num2human(data)
return tempdata.toFixed(2) + units[4]; return tempdata.toFixed(2) + units[4];
} }
function processBasicInfo() function processInfo()
{ {
$.post(url+"/basic_info/",{},function(data){ $.post(url+"/info/",{},function(data){
basic_info = data.monitor.basic_info; basic_info = data.monitor.basic_info;
state = basic_info.State; state = basic_info.State;
if(state == 'STOPPED') if(state == 'STOPPED')
@ -246,8 +200,46 @@ function processBasicInfo()
$("#con_time").html(hour+"h "+min+"m "+secs+"s"); $("#con_time").html(hour+"h "+min+"m "+secs+"s");
$("#con_billing").html("<a target='_blank' title='How to figure out it?' href='https://unias.github.io/docklet/book/en/billing/billing.html'>"+basic_info.billing+" <img src='/static/img/bean.png' /></a>"); $("#con_billing").html("<a target='_blank' title='How to figure out it?' href='https://unias.github.io/docklet/book/en/billing/billing.html'>"+basic_info.billing+" <img src='/static/img/bean.png' /></a>");
$("#con_billingthishour").html("<a target='_blank' title='How to figure out it?' href='https://unias.github.io/docklet/book/en/billing/billing.html'>"+basic_info.billing_this_hour.total+" <img src='/static/img/bean.png' /></a>"); $("#con_billingthishour").html("<a target='_blank' title='How to figure out it?' href='https://unias.github.io/docklet/book/en/billing/billing.html'>"+basic_info.billing_this_hour.total+" <img src='/static/img/bean.png' /></a>");
},"json");
$.post(url+"/net_stats/",{},function(data){ if(is_running)
{
cpu_usedp = data.monitor.cpu_use.usedp;
var val = (data.monitor.cpu_use.val).toFixed(2);
var unit = data.monitor.cpu_use.unit;
var quota = data.monitor.cpu_use.quota.cpu;
var quotaout = "("+quota;
if(quota == 1)
quotaout += " Core)";
else
quotaout += " Cores)";
$("#con_cpu").html(val +" "+ unit+"<br/>"+quotaout);
mem_usedp = data.monitor.mem_use.usedp;
var usedp = data.monitor.mem_use.usedp;
unit = data.monitor.mem_use.unit;
var quota = data.monitor.mem_use.quota.memory/1024.0;
val = data.monitor.mem_use.val;
var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)";
$("#con_mem").html((usedp/0.01).toFixed(2)+"%<br/>"+out);
}
else
{
cpu_usedp = 0;
$("#con_cpu").html("--");
mem_usedp = 0;
$("#con_mem").html("--");
}
//processDiskData
var diskuse = data.monitor.disk_use;
var usedp = diskuse.percent;
var total = diskuse.total/1024.0/1024.0;
var used = diskuse.used/1024.0/1024.0;
var detail = "("+used.toFixed(2)+"MiB/"+total.toFixed(2)+"MiB)";
$("#con_disk").html(usedp+"%<br/>"+detail);
//processNetStats
var net_stats = data.monitor.net_stats; var net_stats = data.monitor.net_stats;
var in_rate = parseInt(net_stats.bytes_recv_per_sec); var in_rate = parseInt(net_stats.bytes_recv_per_sec);
var out_rate = parseInt(net_stats.bytes_sent_per_sec); var out_rate = parseInt(net_stats.bytes_sent_per_sec);
@ -280,7 +272,7 @@ function plot_net(host,monitorurl)
},"json"); },"json");
} }
setInterval(processBasicInfo,1000); setInterval(processInfo,1000);
plot_graph($("#mem-chart"),url + "/mem_use/",processMemData,getMemY); plot_graph($("#mem-chart"),url + "/mem_use/",processMemData,getMemY,false);
plot_graph($("#cpu-chart"),url + "/cpu_use/",processCpuData,getCpuY); plot_graph($("#cpu-chart"),url + "/cpu_use/",processCpuData,getCpuY,false);
plot_net(host, url + "/net_stats/"); plot_net(host, url + "/net_stats/");

View File

@ -314,16 +314,15 @@ function num2human(data)
function update(url,index) function update(url,index)
{ {
$.post(url+"/basic_info/",{},function(data){ $.post(url+"/info/",{},function(data){
$.post(url+"/disk_use/",{},function(data){ var diskuse = data.monitor.disk_use;
var diskuse = data.monitor.disk_use;
var usedp = diskuse.percent; var usedp = diskuse.percent;
var total = diskuse.total/1024.0/1024.0; var total = diskuse.total/1024.0/1024.0;
var used = diskuse.used/1024.0/1024.0; var used = diskuse.used/1024.0/1024.0;
var detail = "("+used.toFixed(2)+"MiB/"+total.toFixed(2)+"MiB)"; var detail = "("+used.toFixed(2)+"MiB/"+total.toFixed(2)+"MiB)";
$("#"+index+"_disk").html(usedp+"%<br/>"+detail); $("#"+index+"_disk").html(usedp+"%<br/>"+detail);
},"json");
var total = parseInt(data.monitor.basic_info.RunningTime); var total = parseInt(data.monitor.basic_info.RunningTime);
var hour = Math.floor(total / 3600); var hour = Math.floor(total / 3600);
var min = Math.floor(total % 3600 / 60); var min = Math.floor(total % 3600 / 60);
@ -364,7 +363,7 @@ function num2human(data)
$("#"+index+"_billing_disk_c").html(data.monitor.basic_info.c_disk) $("#"+index+"_billing_disk_c").html(data.monitor.basic_info.c_disk)
$("#"+index+"_billing_port_d").html(data.monitor.basic_info.d_port) $("#"+index+"_billing_port_d").html(data.monitor.basic_info.d_port)
var state = data.monitor.basic_info.State; var state = data.monitor.basic_info.State;
if(state == 'RUNNING') if(state == 'RUNNING')
{ {
var tmp = $("#"+index+"_state"); var tmp = $("#"+index+"_state");
@ -387,7 +386,6 @@ function num2human(data)
return; return;
} }
$.post(url+"/cpu_use/",{},function(data){
var usedp = data.monitor.cpu_use.usedp; var usedp = data.monitor.cpu_use.usedp;
var quota = data.monitor.cpu_use.quota.cpu; var quota = data.monitor.cpu_use.quota.cpu;
var quotaout = "("+quota; var quotaout = "("+quota;
@ -396,16 +394,13 @@ function num2human(data)
else else
quotaout += " Cores)"; quotaout += " Cores)";
$("#"+index+"_cpu").html((usedp/0.01).toFixed(2)+"%<br/>"+quotaout); $("#"+index+"_cpu").html((usedp/0.01).toFixed(2)+"%<br/>"+quotaout);
},"json");
$.post(url+"/mem_use/",{},function(data){
var usedp = data.monitor.mem_use.usedp; var usedp = data.monitor.mem_use.usedp;
var unit = data.monitor.mem_use.unit; var unit = data.monitor.mem_use.unit;
var quota = data.monitor.mem_use.quota.memory/1024.0; var quota = data.monitor.mem_use.quota.memory/1024.0;
var val = data.monitor.mem_use.val; var val = data.monitor.mem_use.val;
var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)"; var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)";
$("#"+index+"_mem").html((usedp/0.01).toFixed(2)+"%<br/>"+out); $("#"+index+"_mem").html((usedp/0.01).toFixed(2)+"%<br/>"+out);
},"json");
},"json"); },"json");
} }