Merge pull request #110 from FirmlyReality/realtime

Compute running time of containers.
This commit is contained in:
leebaok 2016-05-17 04:13:27 -05:00
commit 755f135f60
5 changed files with 122 additions and 39 deletions

View File

@ -23,7 +23,7 @@ import http.server, cgi, json, sys, shutil
from socketserver import ThreadingMixIn
import nodemgr, vclustermgr, etcdlib, network, imagemgr
import userManager
import monitor
import monitor,traceback
import threading
import sysmgr
@ -405,15 +405,15 @@ def vnodes_monitor(cur_user, user, form, con_id, issue):
global G_clustername
logger.info("handle request: monitor/vnodes")
res = {}
fetcher = monitor.Container_Fetcher()
fetcher = monitor.Container_Fetcher(con_id)
if issue == 'cpu_use':
res['cpu_use'] = fetcher.get_cpu_use(con_id)
res['cpu_use'] = fetcher.get_cpu_use()
elif issue == 'mem_use':
res['mem_use'] = fetcher.get_mem_use(con_id)
res['mem_use'] = fetcher.get_mem_use()
elif issue == 'disk_use':
res['disk_use'] = fetcher.get_disk_use(con_id)
res['disk_use'] = fetcher.get_disk_use()
elif issue == 'basic_info':
res['basic_info'] = fetcher.get_basic_info(con_id)
res['basic_info'] = fetcher.get_basic_info()
elif issue == 'owner':
names = con_id.split('-')
result = G_usermgr.query(username = names[0], cur_user = cur_user)
@ -657,6 +657,7 @@ def resetall_system(cur_user, user, form):
@app.errorhandler(500)
def internal_server_error(error):
logger.debug("An internel server error occured")
logger.error(traceback.format_exc())
return json.dumps({'success':'false', 'message':'500 Internal Server Error', 'Unauthorized': 'True'})

View File

@ -30,6 +30,20 @@ class Container_Collector(threading.Thread):
containers = re.split('\s+',output)
return containers
def get_proc_etime(self,pid):
fmt = subprocess.getoutput("ps -A -opid,etime | grep '^ *%d' | awk '{print $NF}'" % pid).strip()
if fmt == '':
return -1
parts = fmt.split('-')
days = int(parts[0]) if len(parts) == 2 else 0
fmt = parts[-1]
parts = fmt.split(':')
hours = int(parts[0]) if len(parts) == 3 else 0
parts = parts[len(parts)-2:]
minutes = int(parts[0])
seconds = int(parts[1])
return ((days * 24 + hours) * 60 + minutes) * 60 + seconds
def collect_containerinfo(self,container_name):
global workercinfo
output = subprocess.check_output("sudo lxc-info -n %s" % (container_name),shell=True)
@ -37,6 +51,9 @@ class Container_Collector(threading.Thread):
parts = re.split('\n',output)
info = {}
basic_info = {}
basic_exist = 'basic_info' in workercinfo[container_name].keys()
if basic_exist:
basic_info = workercinfo[container_name]['basic_info']
for part in parts:
if not part == '':
key_val = re.split(':',part)
@ -45,11 +62,24 @@ class Container_Collector(threading.Thread):
info[key] = val.lstrip()
basic_info['Name'] = info['Name']
basic_info['State'] = info['State']
#if basic_exist:
# logger.info(workercinfo[container_name]['basic_info'])
if(info['State'] == 'STOPPED'):
workercinfo[container_name]['basic_info'] = basic_info
logger.info(basic_info)
return False
running_time = self.get_proc_etime(int(info['PID']))
if basic_exist and 'PID' in workercinfo[container_name]['basic_info'].keys():
last_time = workercinfo[container_name]['basic_info']['LastTime']
if not info['PID'] == workercinfo[container_name]['basic_info']['PID']:
last_time = workercinfo[container_name]['basic_info']['RunningTime']
else:
last_time = 0
basic_info['LastTime'] = last_time
running_time += last_time
basic_info['PID'] = info['PID']
basic_info['IP'] = info['IP']
basic_info['RunningTime'] = running_time
workercinfo[container_name]['basic_info'] = basic_info
cpu_parts = re.split(' +',info['CPU use'])
@ -256,6 +286,10 @@ def workerFetchInfo():
global workercinfo
return str([workerinfo, workercinfo])
def get_owner(container_name):
names = container_name.split('-')
return names[0]
class Master_Collector(threading.Thread):
def __init__(self,nodemgr):
@ -274,12 +308,14 @@ class Master_Collector(threading.Thread):
for worker in workers:
try:
ip = self.nodemgr.rpc_to_ip(worker)
#[info,cinfo] = worker.workerFetchInfo()
info = list(eval(worker.workerFetchInfo()))
logger.info(info[1])
#logger.info(info[1])
monitor_hosts[ip] = info[0]
for container in info[1].keys():
monitor_vnodes[container] = info[1][container]
owner = get_owner(container)
if not owner in monitor_vnodes.keys():
monitor_vnodes[owner] = {}
monitor_vnodes[owner][container] = info[1][container]
except Exception as err:
logger.warning(traceback.format_exc())
logger.warning(err)
@ -291,45 +327,47 @@ class Master_Collector(threading.Thread):
return
class Container_Fetcher:
def __init__(self):
def __init__(self,container_name):
self.owner = get_owner(container_name)
self.con_id = container_name
return
def get_cpu_use(self,container_name):
def get_cpu_use(self):
global monitor_vnodes
try:
res = monitor_vnodes[container_name]['cpu_use']
res['quota'] = monitor_vnodes[container_name]['quota']
res = monitor_vnodes[self.owner][self.con_id]['cpu_use']
res['quota'] = monitor_vnodes[self.owner][self.con_id]['quota']
except Exception as err:
logger.warning(traceback.format_exc())
logger.warning(err)
res = {}
return res
def get_mem_use(self,container_name):
def get_mem_use(self):
global monitor_vnodes
try:
res = monitor_vnodes[container_name]['mem_use']
res['quota'] = monitor_vnodes[container_name]['quota']
res = monitor_vnodes[self.owner][self.con_id]['mem_use']
res['quota'] = monitor_vnodes[self.owner][self.con_id]['quota']
except Exception as err:
logger.warning(traceback.format_exc())
logger.warning(err)
res = {}
return res
def get_disk_use(self,container_name):
def get_disk_use(self):
global monitor_vnodes
try:
res = monitor_vnodes[container_name]['disk_use']
res = monitor_vnodes[self.owner][self.con_id]['disk_use']
except Exception as err:
logger.warning(traceback.format_exc())
logger.warning(err)
res = {}
return res
def get_basic_info(self,container_name):
def get_basic_info(self):
global monitor_vnodes
try:
res = monitor_vnodes[container_name]['basic_info']
res = monitor_vnodes[self.owner][self.con_id]['basic_info']
except Exception as err:
logger.warning(traceback.format_exc())
logger.warning(err)

View File

@ -1,15 +1,24 @@
var mem_usedp = 0;
var cpu_usedp = 0;
var is_running = true;
function processMemData(data)
{
mem_usedp = data.monitor.mem_use.usedp;
var usedp = data.monitor.mem_use.usedp;
var unit = data.monitor.mem_use.unit;
var quota = data.monitor.mem_use.quota.memory/1024.0;
var val = data.monitor.mem_use.val;
var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)";
$("#con_mem").html((usedp/0.01).toFixed(2)+"%<br/>"+out);
if(is_running)
{
mem_usedp = data.monitor.mem_use.usedp;
var usedp = data.monitor.mem_use.usedp;
var unit = data.monitor.mem_use.unit;
var quota = data.monitor.mem_use.quota.memory/1024.0;
var val = data.monitor.mem_use.val;
var out = "("+val+unit+"/"+quota.toFixed(2)+"MiB)";
$("#con_mem").html((usedp/0.01).toFixed(2)+"%<br/>"+out);
}
else
{
mem_usedp = 0;
$("#con_mem").html("--");
}
}
function getMemY()
{
@ -17,16 +26,24 @@ function getMemY()
}
function processCpuData(data)
{
cpu_usedp = data.monitor.cpu_use.usedp;
var val = data.monitor.cpu_use.val;
var unit = data.monitor.cpu_use.unit;
var quota = data.monitor.cpu_use.quota.cpu;
var quotaout = "("+quota;
if(quota == 1)
quotaout += " Core)";
if(is_running)
{
cpu_usedp = data.monitor.cpu_use.usedp;
var val = data.monitor.cpu_use.val;
var unit = data.monitor.cpu_use.unit;
var quota = data.monitor.cpu_use.quota.cpu;
var quotaout = "("+quota;
if(quota == 1)
quotaout += " Core)";
else
quotaout += " Cores)";
$("#con_cpu").html(val +" "+ unit+"<br/>"+quotaout);
}
else
quotaout += " Cores)";
$("#con_cpu").html(val +" "+ unit+"<br/>"+quotaout);
{
cpu_usedp = 0;
$("#con_cpu").html("--");
}
}
function getCpuY()
{
@ -173,3 +190,25 @@ function processDiskData()
},"json");
}
setInterval(processDiskData,1000);
function processBasicInfo()
{
$.post(url+"/basic_info/",{},function(data){
basic_info = data.monitor.basic_info;
state = basic_info.State;
if(state == 'STOPPED')
{
is_running = false;
$("#con_state").html("<div class='label label-danger'>Stopped</div>");
$("#con_ip").html("--");
}
else
{
is_running = true;
$("#con_state").html("<div class='label label-primary'>Running</div>");
$("#con_ip").html(basic_info.IP);
}
$("#con_time").html(basic_info.RunningTime+"s");
},"json");
}
setInterval(processBasicInfo,1000);

View File

@ -82,6 +82,7 @@
<th>Node Name</th>
<th>IP Address</th>
<th>Status</th>
<th>Running Time</th>
<th>Cpu Usage</th>
<th>Mem Usage</th>
<th>Disk Usage</th>
@ -100,6 +101,7 @@
{% else %}
<td><div id='{{cluster}}_{{ loop.index }}_state' class="label label-primary">Running</div></td>
{% endif %}
<td id='{{cluster}}_{{ loop.index }}_time'>--</td>
<td id='{{cluster}}_{{ loop.index }}_cpu'>--</td>
<td id='{{cluster}}_{{ loop.index }}_mem'>--</td>
<td id='{{cluster}}_{{ loop.index }}_disk'>--</td>
@ -124,7 +126,7 @@
$.post(url+"/basic_info/",{},function(data){
var state = data.monitor.basic_info.State;
if(state == 'RUNNING')
if(state == 'RUNNING')
{
var tmp = $("#"+index+"_state");
tmp.removeClass();
@ -145,6 +147,7 @@
$("#"+index+"_mem").html('--');
return;
}
$("#"+index+"_time").html(data.monitor.basic_info.RunningTime+"s")
$.post(url+"/cpu_use/",{},function(data){
var usedp = data.monitor.cpu_use.usedp;

View File

@ -41,6 +41,7 @@
<tr>
<th>State</th>
<th>IP Address</th>
<th>Running Time</th>
<th>CPU Usage</th>
<th>Mem Usage</th>
<th>Disk Usage</th>
@ -49,12 +50,13 @@
<tbody>
<tr>
{% if container['State'] == 'STOPPED' %}
<td><div id='con_state' class="label label-danger">Stopped</div></td>
<td id='con_state'><div class="label label-danger">Stopped</div></td>
<td id='con_ip'>--</td>
{% else %}
<td><div id='con_state' class="label label-primary">Running</div></td>
<td id='con_state'><div class="label label-primary">Running</div></td>
<td id='con_ip'>{{ container['IP'] }}</td>
{% endif %}
<td id='con_time'>{{ container['RunningTime'] }}s</td>
<td id='con_cpu'>--</td>
<td id='con_mem'>--</td>
<td id='con_disk'>--</td>