commit
0c14f3684d
|
@ -792,6 +792,7 @@ def resetall_system(user, beans, form):
|
||||||
|
|
||||||
@app.route("/batch/job/add/", methods=['POST'])
|
@app.route("/batch/job/add/", methods=['POST'])
|
||||||
@login_required
|
@login_required
|
||||||
|
@beans_check
|
||||||
def add_job(user,beans,form):
|
def add_job(user,beans,form):
|
||||||
global G_jobmgr
|
global G_jobmgr
|
||||||
job_data = form.to_dict()
|
job_data = form.to_dict()
|
||||||
|
@ -879,6 +880,17 @@ def list_job(user,beans,form):
|
||||||
}
|
}
|
||||||
return json.dumps(result)
|
return json.dumps(result)
|
||||||
|
|
||||||
|
@app.route("/batch/job/info/", methods=['POST'])
|
||||||
|
@login_required
|
||||||
|
def info_job(user,beans,form):
|
||||||
|
global G_jobmgr
|
||||||
|
jobid = form.get("jobid","")
|
||||||
|
[success, data] = G_jobmgr.get_job(user, jobid)
|
||||||
|
if success:
|
||||||
|
return json.dumps({'success':'true', 'data':data})
|
||||||
|
else:
|
||||||
|
return json.dumps({'success':'false', 'message': data})
|
||||||
|
|
||||||
@app.route("/batch/job/stop/", methods=['POST'])
|
@app.route("/batch/job/stop/", methods=['POST'])
|
||||||
@login_required
|
@login_required
|
||||||
def stop_job(user,beans,form):
|
def stop_job(user,beans,form):
|
||||||
|
@ -904,12 +916,6 @@ def get_output(user,beans,form):
|
||||||
}
|
}
|
||||||
return json.dumps(result)
|
return json.dumps(result)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/batch/job/info/", methods=['POST'])
|
|
||||||
@login_required
|
|
||||||
def info_job(user,beans,form):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@app.route("/batch/task/info/", methods=['POST'])
|
@app.route("/batch/task/info/", methods=['POST'])
|
||||||
@login_required
|
@login_required
|
||||||
def info_task(user,beans,form):
|
def info_task(user,beans,form):
|
||||||
|
|
|
@ -1,33 +1,46 @@
|
||||||
import time, threading, random, string, os, traceback
|
import time, threading, random, string, os, traceback, requests
|
||||||
import master.monitor
|
import master.monitor
|
||||||
import subprocess,json
|
import subprocess,json
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from utils.log import initlogging, logger
|
from utils.log import initlogging, logger
|
||||||
|
from utils.model import db, Batchjob, Batchtask
|
||||||
from utils import env
|
from utils import env
|
||||||
|
|
||||||
|
def db_commit():
|
||||||
|
try:
|
||||||
|
db.session.commit()
|
||||||
|
except Exception as err:
|
||||||
|
db.session.rollback()
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
raise
|
||||||
|
|
||||||
class BatchJob(object):
|
class BatchJob(object):
|
||||||
def __init__(self, user, job_info):
|
def __init__(self, jobid, user, job_info):
|
||||||
|
self.job_db = Batchjob(jobid,user,job_info['jobName'],int(job_info['jobPriority']))
|
||||||
self.user = user
|
self.user = user
|
||||||
self.raw_job_info = job_info
|
#self.raw_job_info = job_info
|
||||||
self.job_id = None
|
self.job_id = jobid
|
||||||
self.job_name = job_info['jobName']
|
self.job_name = job_info['jobName']
|
||||||
self.job_priority = int(job_info['jobPriority'])
|
self.job_priority = int(job_info['jobPriority'])
|
||||||
self.status = 'pending'
|
|
||||||
self.create_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime())
|
|
||||||
self.lock = threading.Lock()
|
self.lock = threading.Lock()
|
||||||
self.tasks = {}
|
self.tasks = {}
|
||||||
self.dependency_out = {}
|
self.dependency_out = {}
|
||||||
self.tasks_cnt = {'pending':0, 'scheduling':0, 'running':0, 'retrying':0, 'failed':0, 'finished':0}
|
self.tasks_cnt = {'pending':0, 'scheduling':0, 'running':0, 'retrying':0, 'failed':0, 'finished':0, 'stopped':0}
|
||||||
|
|
||||||
#init self.tasks & self.dependency_out & self.tasks_cnt
|
#init self.tasks & self.dependency_out & self.tasks_cnt
|
||||||
logger.debug("Init BatchJob user:%s job_name:%s create_time:%s" % (self.user, self.job_name, self.create_time))
|
logger.debug("Init BatchJob user:%s job_name:%s create_time:%s" % (self.job_db.username, self.job_db.name, str(self.job_db.create_time)))
|
||||||
raw_tasks = self.raw_job_info["tasks"]
|
raw_tasks = job_info["tasks"]
|
||||||
self.tasks_cnt['pending'] = len(raw_tasks.keys())
|
self.tasks_cnt['pending'] = len(raw_tasks.keys())
|
||||||
for task_idx in raw_tasks.keys():
|
for task_idx in raw_tasks.keys():
|
||||||
task_info = raw_tasks[task_idx]
|
task_info = raw_tasks[task_idx]
|
||||||
|
task_db = Batchtask(jobid+"_"+task_idx, task_idx, task_info)
|
||||||
|
self.job_db.tasks.append(task_db)
|
||||||
self.tasks[task_idx] = {}
|
self.tasks[task_idx] = {}
|
||||||
|
self.tasks[task_idx]['id'] = jobid+"_"+task_idx
|
||||||
self.tasks[task_idx]['config'] = task_info
|
self.tasks[task_idx]['config'] = task_info
|
||||||
|
self.tasks[task_idx]['db'] = task_db
|
||||||
self.tasks[task_idx]['status'] = 'pending'
|
self.tasks[task_idx]['status'] = 'pending'
|
||||||
self.tasks[task_idx]['dependency'] = []
|
self.tasks[task_idx]['dependency'] = []
|
||||||
dependency = task_info['dependency'].strip().replace(' ', '').split(',')
|
dependency = task_info['dependency'].strip().replace(' ', '').split(',')
|
||||||
|
@ -41,8 +54,11 @@ class BatchJob(object):
|
||||||
self.dependency_out[d] = []
|
self.dependency_out[d] = []
|
||||||
self.dependency_out[d].append(task_idx)
|
self.dependency_out[d].append(task_idx)
|
||||||
|
|
||||||
|
db.session.add(self.job_db)
|
||||||
|
db_commit()
|
||||||
|
|
||||||
self.log_status()
|
self.log_status()
|
||||||
logger.debug("BatchJob(id:%s) dependency_out: %s" % (self.job_id, json.dumps(self.dependency_out, indent=3)))
|
logger.debug("BatchJob(id:%s) dependency_out: %s" % (self.job_db.id, json.dumps(self.dependency_out, indent=3)))
|
||||||
|
|
||||||
def data_lock(f):
|
def data_lock(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
|
@ -60,7 +76,7 @@ class BatchJob(object):
|
||||||
# return the tasks without dependencies
|
# return the tasks without dependencies
|
||||||
@data_lock
|
@data_lock
|
||||||
def get_tasks_no_dependency(self,update_status=False):
|
def get_tasks_no_dependency(self,update_status=False):
|
||||||
logger.debug("Get tasks without dependencies of BatchJob(id:%s)" % self.job_id)
|
logger.debug("Get tasks without dependencies of BatchJob(id:%s)" % self.job_db.id)
|
||||||
ret_tasks = []
|
ret_tasks = []
|
||||||
for task_idx in self.tasks.keys():
|
for task_idx in self.tasks.keys():
|
||||||
if (self.tasks[task_idx]['status'] == 'pending' and
|
if (self.tasks[task_idx]['status'] == 'pending' and
|
||||||
|
@ -68,53 +84,83 @@ class BatchJob(object):
|
||||||
if update_status:
|
if update_status:
|
||||||
self.tasks_cnt['pending'] -= 1
|
self.tasks_cnt['pending'] -= 1
|
||||||
self.tasks_cnt['scheduling'] += 1
|
self.tasks_cnt['scheduling'] += 1
|
||||||
|
self.tasks[task_idx]['db'] = Batchtask.query.get(self.tasks[task_idx]['id'])
|
||||||
|
self.tasks[task_idx]['db'].status = 'scheduling'
|
||||||
self.tasks[task_idx]['status'] = 'scheduling'
|
self.tasks[task_idx]['status'] = 'scheduling'
|
||||||
task_name = self.job_id + '_' + task_idx
|
task_name = self.tasks[task_idx]['db'].id
|
||||||
ret_tasks.append([task_name, self.tasks[task_idx]['config'], self.job_priority])
|
ret_tasks.append([task_name, self.tasks[task_idx]['config'], self.job_priority])
|
||||||
self.log_status()
|
self.log_status()
|
||||||
|
db_commit()
|
||||||
return ret_tasks
|
return ret_tasks
|
||||||
|
|
||||||
@data_lock
|
@data_lock
|
||||||
def stop_tasks(self):
|
def stop_job(self):
|
||||||
for task_idx in self.tasks.keys():
|
self.job_db = Batchjob.query.get(self.job_id)
|
||||||
self.tasks[task_idx]['status'] = 'stopped'
|
self.job_db.status = 'stopping'
|
||||||
|
db_commit()
|
||||||
|
|
||||||
# update status of this job based
|
# update status of this job based
|
||||||
def _update_job_status(self):
|
def _update_job_status(self):
|
||||||
allcnt = len(self.tasks.keys())
|
allcnt = len(self.tasks.keys())
|
||||||
if self.tasks_cnt['failed'] != 0:
|
if self.tasks_cnt['failed'] != 0:
|
||||||
self.status = 'failed'
|
self.job_db.status = 'failed'
|
||||||
elif self.tasks_cnt['running'] != 0:
|
self.job_db.end_time = datetime.now()
|
||||||
self.status = 'running'
|
|
||||||
elif self.tasks_cnt['finished'] == allcnt:
|
elif self.tasks_cnt['finished'] == allcnt:
|
||||||
self.status = 'done'
|
self.job_db.status = 'done'
|
||||||
|
self.job_db.end_time = datetime.now()
|
||||||
|
elif self.job_db.status == 'stopping':
|
||||||
|
if self.tasks_cnt['running'] == 0 and self.tasks_cnt['scheduling'] == 0 and self.tasks_cnt['retrying'] == 0:
|
||||||
|
self.job_db.status = 'stopped'
|
||||||
|
self.job_db.end_time = datetime.now()
|
||||||
|
elif self.tasks_cnt['running'] != 0 or self.tasks_cnt['retrying'] != 0:
|
||||||
|
self.job_db.status = 'running'
|
||||||
else:
|
else:
|
||||||
self.status = 'pending'
|
self.job_db.status = 'pending'
|
||||||
|
db_commit()
|
||||||
|
|
||||||
# start run a task, update status
|
# start run a task, update status
|
||||||
@data_lock
|
@data_lock
|
||||||
def update_task_running(self, task_idx):
|
def update_task_running(self, task_idx):
|
||||||
logger.debug("Update status of task(idx:%s) of BatchJob(id:%s) running." % (task_idx, self.job_id))
|
logger.debug("Update status of task(idx:%s) of BatchJob(id:%s) running." % (task_idx, self.job_id))
|
||||||
old_status = self.tasks[task_idx]['status'].split('(')[0]
|
old_status = self.tasks[task_idx]['status']
|
||||||
|
if old_status == 'stopping':
|
||||||
|
logger.info("Task(idx:%s) of BatchJob(id:%s) has been stopped."% (task_idx, self.job_id))
|
||||||
|
return
|
||||||
self.tasks_cnt[old_status] -= 1
|
self.tasks_cnt[old_status] -= 1
|
||||||
self.tasks[task_idx]['status'] = 'running'
|
self.tasks[task_idx]['status'] = 'running'
|
||||||
|
self.tasks[task_idx]['db'] = Batchtask.query.get(self.tasks[task_idx]['id'])
|
||||||
|
self.tasks[task_idx]['db'].status = 'running'
|
||||||
|
self.tasks[task_idx]['db'].start_time = datetime.now()
|
||||||
self.tasks_cnt['running'] += 1
|
self.tasks_cnt['running'] += 1
|
||||||
|
self.job_db = Batchjob.query.get(self.job_id)
|
||||||
self._update_job_status()
|
self._update_job_status()
|
||||||
self.log_status()
|
self.log_status()
|
||||||
|
|
||||||
# a task has finished, update dependency and return tasks without dependencies
|
# a task has finished, update dependency and return tasks without dependencies
|
||||||
@data_lock
|
@data_lock
|
||||||
def finish_task(self, task_idx):
|
def finish_task(self, task_idx, running_time, billing):
|
||||||
if task_idx not in self.tasks.keys():
|
if task_idx not in self.tasks.keys():
|
||||||
logger.error('Task_idx %s not in job. user:%s job_name:%s job_id:%s'%(task_idx, self.user, self.job_name, self.job_id))
|
logger.error('Task_idx %s not in job. user:%s job_name:%s job_id:%s'%(task_idx, self.user, self.job_name, self.job_id))
|
||||||
return []
|
return []
|
||||||
logger.debug("Task(idx:%s) of BatchJob(id:%s) has finished. Update dependency..." % (task_idx, self.job_id))
|
logger.debug("Task(idx:%s) of BatchJob(id:%s) has finished(running_time=%d,billing=%d). Update dependency..." % (task_idx, self.job_id, running_time, billing))
|
||||||
old_status = self.tasks[task_idx]['status'].split('(')[0]
|
old_status = self.tasks[task_idx]['status']
|
||||||
|
if old_status == 'stopping':
|
||||||
|
logger.info("Task(idx:%s) of BatchJob(id:%s) has been stopped."% (task_idx, self.job_id))
|
||||||
|
return
|
||||||
self.tasks_cnt[old_status] -= 1
|
self.tasks_cnt[old_status] -= 1
|
||||||
self.tasks[task_idx]['status'] = 'finished'
|
self.tasks[task_idx]['status'] = 'finished'
|
||||||
|
self.tasks[task_idx]['db'] = Batchtask.query.get(self.tasks[task_idx]['id'])
|
||||||
|
self.tasks[task_idx]['db'].status = 'finished'
|
||||||
|
self.tasks[task_idx]['db'].tried_times += 1
|
||||||
|
self.tasks[task_idx]['db'].running_time = running_time
|
||||||
|
self.tasks[task_idx]['db'].end_time = datetime.now()
|
||||||
|
self.tasks[task_idx]['db'].billing = billing
|
||||||
|
self.job_db = Batchjob.query.get(self.job_id)
|
||||||
|
self.job_db.billing += billing
|
||||||
self.tasks_cnt['finished'] += 1
|
self.tasks_cnt['finished'] += 1
|
||||||
self._update_job_status()
|
|
||||||
if task_idx not in self.dependency_out.keys():
|
if task_idx not in self.dependency_out.keys():
|
||||||
|
self._update_job_status()
|
||||||
self.log_status()
|
self.log_status()
|
||||||
return []
|
return []
|
||||||
ret_tasks = []
|
ret_tasks = []
|
||||||
|
@ -129,8 +175,11 @@ class BatchJob(object):
|
||||||
self.tasks_cnt['pending'] -= 1
|
self.tasks_cnt['pending'] -= 1
|
||||||
self.tasks_cnt['scheduling'] += 1
|
self.tasks_cnt['scheduling'] += 1
|
||||||
self.tasks[out_idx]['status'] = 'scheduling'
|
self.tasks[out_idx]['status'] = 'scheduling'
|
||||||
|
self.tasks[out_idx]['db'] = Batchtask.query.get(self.tasks[out_idx]['id'])
|
||||||
|
self.tasks[out_idx]['db'].status = 'scheduling'
|
||||||
task_name = self.job_id + '_' + out_idx
|
task_name = self.job_id + '_' + out_idx
|
||||||
ret_tasks.append([task_name, self.tasks[out_idx]['config'], self.job_priority])
|
ret_tasks.append([task_name, self.tasks[out_idx]['config'], self.job_priority])
|
||||||
|
self._update_job_status()
|
||||||
self.log_status()
|
self.log_status()
|
||||||
return ret_tasks
|
return ret_tasks
|
||||||
|
|
||||||
|
@ -138,27 +187,62 @@ class BatchJob(object):
|
||||||
@data_lock
|
@data_lock
|
||||||
def update_task_retrying(self, task_idx, reason, tried_times):
|
def update_task_retrying(self, task_idx, reason, tried_times):
|
||||||
logger.debug("Update status of task(idx:%s) of BatchJob(id:%s) retrying. reason:%s tried_times:%d" % (task_idx, self.job_id, reason, int(tried_times)))
|
logger.debug("Update status of task(idx:%s) of BatchJob(id:%s) retrying. reason:%s tried_times:%d" % (task_idx, self.job_id, reason, int(tried_times)))
|
||||||
old_status = self.tasks[task_idx]['status'].split('(')[0]
|
old_status = self.tasks[task_idx]['status']
|
||||||
|
if old_status == 'stopping':
|
||||||
|
logger.info("Task(idx:%s) of BatchJob(id:%s) has been stopped."% (task_idx, self.job_id))
|
||||||
|
return
|
||||||
self.tasks_cnt[old_status] -= 1
|
self.tasks_cnt[old_status] -= 1
|
||||||
self.tasks_cnt['retrying'] += 1
|
self.tasks_cnt['retrying'] += 1
|
||||||
self.tasks[task_idx]['status'] = 'retrying(%s)(%d times)' % (reason, int(tried_times))
|
self.tasks[task_idx]['db'] = Batchtask.query.get(self.tasks[task_idx]['id'])
|
||||||
|
self.tasks[task_idx]['db'].status = 'retrying'
|
||||||
|
self.tasks[task_idx]['db'].failed_reason = reason
|
||||||
|
self.tasks[task_idx]['db'].tried_times += 1
|
||||||
|
self.tasks[task_idx]['status'] = 'retrying'
|
||||||
|
self.job_db = Batchjob.query.get(self.job_id)
|
||||||
self._update_job_status()
|
self._update_job_status()
|
||||||
self.log_status()
|
self.log_status()
|
||||||
|
|
||||||
# update failed status of task
|
# update failed status of task
|
||||||
@data_lock
|
@data_lock
|
||||||
def update_task_failed(self, task_idx, reason, tried_times):
|
def update_task_failed(self, task_idx, reason, tried_times, running_time, billing):
|
||||||
logger.debug("Update status of task(idx:%s) of BatchJob(id:%s) failed. reason:%s tried_times:%d" % (task_idx, self.job_id, reason, int(tried_times)))
|
logger.debug("Update status of task(idx:%s) of BatchJob(id:%s) failed. reason:%s tried_times:%d" % (task_idx, self.job_id, reason, int(tried_times)))
|
||||||
old_status = self.tasks[task_idx]['status'].split('(')[0]
|
old_status = self.tasks[task_idx]['status']
|
||||||
self.tasks_cnt[old_status] -= 1
|
self.tasks_cnt[old_status] -= 1
|
||||||
self.tasks_cnt['failed'] += 1
|
self.tasks_cnt['failed'] += 1
|
||||||
if reason == "OUTPUTERROR":
|
self.tasks[task_idx]['status'] = 'failed'
|
||||||
self.tasks[task_idx]['status'] = 'failed(OUTPUTERROR)'
|
self.tasks[task_idx]['db'] = Batchtask.query.get(self.tasks[task_idx]['id'])
|
||||||
else:
|
self.tasks[task_idx]['db'].status = 'failed'
|
||||||
self.tasks[task_idx]['status'] = 'failed(%s)(%d times)' % (reason, int(tried_times))
|
self.tasks[task_idx]['db'].failed_reason = reason
|
||||||
|
self.tasks[task_idx]['db'].tried_times += 1
|
||||||
|
self.tasks[task_idx]['db'].end_time = datetime.now()
|
||||||
|
self.tasks[task_idx]['db'].running_time = running_time
|
||||||
|
self.tasks[task_idx]['db'].billing = billing
|
||||||
|
self.job_db = Batchjob.query.get(self.job_id)
|
||||||
|
self.job_db.billing += billing
|
||||||
self._update_job_status()
|
self._update_job_status()
|
||||||
self.log_status()
|
self.log_status()
|
||||||
|
|
||||||
|
@data_lock
|
||||||
|
def update_task_stopped(self, task_idx, running_time, billing):
|
||||||
|
logger.debug("Update status of task(idx:%s) of BatchJob(id:%s) stopped.running_time:%d billing:%d" % (task_idx, self.job_id, int(running_time), billing))
|
||||||
|
old_status = self.tasks[task_idx]['status']
|
||||||
|
if old_status == 'failed' or old_status == 'finished' or old_status == 'stopped':
|
||||||
|
logger.info("task(idx:%s) of BatchJob(id:%s) has been done."%(task_idx, self.job_id))
|
||||||
|
return False
|
||||||
|
self.tasks_cnt[old_status] -= 1
|
||||||
|
self.tasks_cnt['stopped'] += 1
|
||||||
|
self.tasks[task_idx]['status'] = 'stopped'
|
||||||
|
self.tasks[task_idx]['db'] = Batchtask.query.get(self.tasks[task_idx]['id'])
|
||||||
|
self.tasks[task_idx]['db'].status = 'stopped'
|
||||||
|
self.tasks[task_idx]['db'].end_time = datetime.now()
|
||||||
|
self.tasks[task_idx]['db'].running_time = running_time
|
||||||
|
self.tasks[task_idx]['db'].billing = billing
|
||||||
|
self.job_db = Batchjob.query.get(self.job_id)
|
||||||
|
self.job_db.billing += billing
|
||||||
|
self._update_job_status()
|
||||||
|
self.log_status()
|
||||||
|
return True
|
||||||
|
|
||||||
# print status for debuging
|
# print status for debuging
|
||||||
def log_status(self):
|
def log_status(self):
|
||||||
task_copy = {}
|
task_copy = {}
|
||||||
|
@ -168,24 +252,55 @@ class BatchJob(object):
|
||||||
task_copy[task_idx]['dependency'] = self.tasks[task_idx]['dependency']
|
task_copy[task_idx]['dependency'] = self.tasks[task_idx]['dependency']
|
||||||
logger.debug("BatchJob(id:%s) tasks status: %s" % (self.job_id, json.dumps(task_copy, indent=3)))
|
logger.debug("BatchJob(id:%s) tasks status: %s" % (self.job_id, json.dumps(task_copy, indent=3)))
|
||||||
logger.debug("BatchJob(id:%s) tasks_cnt: %s" % (self.job_id, self.tasks_cnt))
|
logger.debug("BatchJob(id:%s) tasks_cnt: %s" % (self.job_id, self.tasks_cnt))
|
||||||
logger.debug("BatchJob(id:%s) job_status: %s" %(self.job_id, self.status))
|
logger.debug("BatchJob(id:%s) job_status: %s" %(self.job_id, self.job_db.status))
|
||||||
|
|
||||||
|
|
||||||
class JobMgr():
|
class JobMgr():
|
||||||
# load job information from etcd
|
# load job information from etcd
|
||||||
# initial a job queue and job schedueler
|
# initial a job queue and job schedueler
|
||||||
def __init__(self, taskmgr):
|
def __init__(self, taskmgr):
|
||||||
|
try:
|
||||||
|
Batchjob.query.all()
|
||||||
|
except:
|
||||||
|
db.create_all(bind='__all__')
|
||||||
self.job_map = {}
|
self.job_map = {}
|
||||||
self.taskmgr = taskmgr
|
self.taskmgr = taskmgr
|
||||||
self.fspath = env.getenv('FS_PREFIX')
|
self.fspath = env.getenv('FS_PREFIX')
|
||||||
|
self.lock = threading.Lock()
|
||||||
|
self.userpoint = "http://" + env.getenv('USER_IP') + ":" + str(env.getenv('USER_PORT'))
|
||||||
|
self.auth_key = env.getenv('AUTH_KEY')
|
||||||
|
|
||||||
|
def charge_beans(self,username,billing):
|
||||||
|
logger.debug("Charge user(%s) for %d beans"%(username, billing))
|
||||||
|
data = {"owner_name":username,"billing":billing, "auth_key":self.auth_key}
|
||||||
|
url = "/billing/beans/"
|
||||||
|
return requests.post(self.userpoint+url,data=data).json()
|
||||||
|
|
||||||
|
def add_lock(f):
|
||||||
|
@wraps(f)
|
||||||
|
def new_f(self, *args, **kwargs):
|
||||||
|
self.lock.acquire()
|
||||||
|
try:
|
||||||
|
result = f(self, *args, **kwargs)
|
||||||
|
except Exception as err:
|
||||||
|
self.lock.release()
|
||||||
|
raise err
|
||||||
|
self.lock.release()
|
||||||
|
return result
|
||||||
|
return new_f
|
||||||
|
|
||||||
|
@add_lock
|
||||||
|
def create_job(self, user, job_info):
|
||||||
|
jobid = self.gen_jobid()
|
||||||
|
job = BatchJob(jobid, user, job_info)
|
||||||
|
return job
|
||||||
|
|
||||||
# user: username
|
# user: username
|
||||||
# job_info: a json string
|
# job_info: a json string
|
||||||
# user submit a new job, add this job to queue and database
|
# user submit a new job, add this job to queue and database
|
||||||
def add_job(self, user, job_info):
|
def add_job(self, user, job_info):
|
||||||
try:
|
try:
|
||||||
job = BatchJob(user, job_info)
|
job = self.create_job(user, job_info)
|
||||||
job.job_id = self.gen_jobid()
|
|
||||||
self.job_map[job.job_id] = job
|
self.job_map[job.job_id] = job
|
||||||
self.process_job(job)
|
self.process_job(job)
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
|
@ -201,17 +316,18 @@ class JobMgr():
|
||||||
# jobid: the id of job
|
# jobid: the id of job
|
||||||
def stop_job(self, user, job_id):
|
def stop_job(self, user, job_id):
|
||||||
logger.info("[jobmgr] stop job(id:%s) user(%s)"%(job_id, user))
|
logger.info("[jobmgr] stop job(id:%s) user(%s)"%(job_id, user))
|
||||||
|
if job_id not in self.job_map.keys():
|
||||||
|
return [False,"Job id %s does not exists! Maybe it has been finished."%job_id]
|
||||||
try:
|
try:
|
||||||
job = self.job_map[job_id]
|
job = self.job_map[job_id]
|
||||||
if job.status == 'done' or job.status == 'failed':
|
if job.job_db.status == 'done' or job.job_db.status == 'failed':
|
||||||
return [True,""]
|
return [True,""]
|
||||||
if job.user != user:
|
if job.user != user:
|
||||||
raise Exception("Wrong User.")
|
raise Exception("Wrong User.")
|
||||||
for task_idx in job.tasks.keys():
|
for task_idx in job.tasks.keys():
|
||||||
taskid = job_id + '_' + task_idx
|
taskid = job_id + '_' + task_idx
|
||||||
task = self.taskmgr.get_task(taskid)
|
self.taskmgr.lazy_stop_task(taskid)
|
||||||
self.taskmgr.stop_remove_task(task)
|
job.stop_job()
|
||||||
job.status = 'stopped'
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.error(traceback.format_exc())
|
logger.error(traceback.format_exc())
|
||||||
#logger.error(err)
|
#logger.error(err)
|
||||||
|
@ -221,42 +337,44 @@ class JobMgr():
|
||||||
# user: username
|
# user: username
|
||||||
# list a user's all job
|
# list a user's all job
|
||||||
def list_jobs(self,user):
|
def list_jobs(self,user):
|
||||||
|
alljobs = Batchjob.query.filter_by(username=user).all()
|
||||||
res = []
|
res = []
|
||||||
for job_id in self.job_map.keys():
|
for job in alljobs:
|
||||||
job = self.job_map[job_id]
|
jobdata = json.loads(str(job))
|
||||||
logger.debug('job_id: %s, user: %s' % (job_id, job.user))
|
tasks = job.tasks.all()
|
||||||
if job.user == user:
|
jobdata['tasks'] = [t.idx for t in tasks]
|
||||||
all_tasks = job.raw_job_info['tasks']
|
tasks_vnodeCount = {}
|
||||||
tasks_vnodeCount = {}
|
for t in tasks:
|
||||||
for task in all_tasks.keys():
|
tasks_vnodeCount[t.idx] = int(json.loads(t.config)['vnodeCount'])
|
||||||
tasks_vnodeCount[task] = int(all_tasks[task]['vnodeCount'])
|
jobdata['tasks_vnodeCount'] = tasks_vnodeCount
|
||||||
res.append({
|
res.append(jobdata)
|
||||||
'job_name': job.job_name,
|
|
||||||
'job_id': job.job_id,
|
|
||||||
'status': job.status,
|
|
||||||
'create_time': job.create_time,
|
|
||||||
'tasks': list(all_tasks.keys()),
|
|
||||||
'tasks_vnodeCount': tasks_vnodeCount
|
|
||||||
})
|
|
||||||
res.sort(key=lambda x:x['create_time'],reverse=True)
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
# user: username
|
# user: username
|
||||||
# jobid: the id of job
|
# jobid: the id of job
|
||||||
# get the information of a job, including the status, json description and other information
|
# get the information of a job, including the status, json description and other information
|
||||||
# call get_task to get the task information
|
|
||||||
def get_job(self, user, job_id):
|
def get_job(self, user, job_id):
|
||||||
pass
|
job = Batchjob.query.get(job_id)
|
||||||
|
if job is None:
|
||||||
|
return [False, "Jobid(%s) does not exist."%job_id]
|
||||||
|
if job.username != user:
|
||||||
|
return [False, "Wrong User!"]
|
||||||
|
jobdata = json.loads(str(job))
|
||||||
|
tasks = job.tasks.order_by(Batchtask.idx).all()
|
||||||
|
tasksdata = [json.loads(str(t)) for t in tasks]
|
||||||
|
jobdata['tasks'] = tasksdata
|
||||||
|
return [True, jobdata]
|
||||||
|
|
||||||
# check if a job exists
|
# check if a job exists
|
||||||
def is_job_exist(self, job_id):
|
def is_job_exist(self, job_id):
|
||||||
return job_id in self.job_map.keys()
|
return Batchjob.query.get(job_id) != None
|
||||||
|
|
||||||
# generate a random job id
|
# generate a random job id
|
||||||
def gen_jobid(self):
|
def gen_jobid(self):
|
||||||
job_id = ''.join(random.sample(string.ascii_letters + string.digits, 8))
|
datestr = datetime.now().strftime("%y%m%d")
|
||||||
|
job_id = datestr+''.join(random.sample(string.ascii_letters + string.digits, 3))
|
||||||
while self.is_job_exist(job_id):
|
while self.is_job_exist(job_id):
|
||||||
job_id = ''.join(random.sample(string.ascii_letters + string.digits, 8))
|
job_id = datestr+''.join(random.sample(string.ascii_letters + string.digits, 3))
|
||||||
return job_id
|
return job_id
|
||||||
|
|
||||||
# add tasks into taskmgr's queue
|
# add tasks into taskmgr's queue
|
||||||
|
@ -277,27 +395,53 @@ class JobMgr():
|
||||||
|
|
||||||
# report task status from taskmgr when running, failed and finished
|
# report task status from taskmgr when running, failed and finished
|
||||||
# task_name: job_id + '_' + task_idx
|
# task_name: job_id + '_' + task_idx
|
||||||
# status: 'running', 'finished', 'retrying', 'failed'
|
# status: 'running', 'finished', 'retrying', 'failed', 'stopped'
|
||||||
# reason: reason for failure or retrying, such as "FAILED", "TIMEOUT", "OUTPUTERROR"
|
# reason: reason for failure or retrying, such as "FAILED", "TIMEOUT", "OUTPUTERROR"
|
||||||
# tried_times: how many times the task has been tried.
|
# tried_times: how many times the task has been tried.
|
||||||
def report(self, user, task_name, status, reason="", tried_times=1):
|
def report(self, user, task_name, status, reason="", tried_times=1, running_time=0, billing=0):
|
||||||
split_task_name = task_name.split('_')
|
split_task_name = task_name.split('_')
|
||||||
if len(split_task_name) != 2:
|
if len(split_task_name) != 2:
|
||||||
logger.error("Illegal task_name(%s) report from taskmgr" % task_name)
|
logger.error("[jobmgr report]Illegal task_name(%s) report from taskmgr" % task_name)
|
||||||
return
|
return
|
||||||
|
if billing > 0 and (status == 'failed' or status == 'finished'):
|
||||||
|
self.charge_beans(user, billing)
|
||||||
job_id, task_idx = split_task_name
|
job_id, task_idx = split_task_name
|
||||||
|
if job_id not in self.job_map.keys():
|
||||||
|
logger.error("[jobmgr report]jobid(%s) does not exist. task_name(%s)" % (job_id,task_name))
|
||||||
|
#update data in db
|
||||||
|
taskdb = Batchtask.query.get(task_name)
|
||||||
|
if (taskdb is None or taskdb.status == 'finished' or
|
||||||
|
taskdb.status == 'failed' or taskdb.status == 'stopped'):
|
||||||
|
return
|
||||||
|
taskdb.status = status
|
||||||
|
if status == 'failed':
|
||||||
|
taskdb.failed_reason = reason
|
||||||
|
if status == 'failed' or status == 'stopped' or status == 'finished':
|
||||||
|
taskdb.end_time = datetime.now()
|
||||||
|
if billing > 0:
|
||||||
|
taskdb.running_time = running_time
|
||||||
|
taskdb.billing = billing
|
||||||
|
db_commit()
|
||||||
|
return
|
||||||
job = self.job_map[job_id]
|
job = self.job_map[job_id]
|
||||||
if status == "running":
|
if status == "running":
|
||||||
|
#logger.debug(str(job.job_db))
|
||||||
job.update_task_running(task_idx)
|
job.update_task_running(task_idx)
|
||||||
|
#logger.debug(str(job.job_db))
|
||||||
elif status == "finished":
|
elif status == "finished":
|
||||||
next_tasks = job.finish_task(task_idx)
|
#logger.debug(str(job.job_db))
|
||||||
if len(next_tasks) == 0:
|
next_tasks = job.finish_task(task_idx, running_time, billing)
|
||||||
return
|
|
||||||
ret = self.add_task_taskmgr(user, next_tasks)
|
ret = self.add_task_taskmgr(user, next_tasks)
|
||||||
|
#logger.debug(str(job.job_db))
|
||||||
elif status == "retrying":
|
elif status == "retrying":
|
||||||
job.update_task_retrying(task_idx, reason, tried_times)
|
job.update_task_retrying(task_idx, reason, tried_times)
|
||||||
elif status == "failed":
|
elif status == "failed":
|
||||||
job.update_task_failed(task_idx, reason, tried_times)
|
job.update_task_failed(task_idx, reason, tried_times, running_time, billing)
|
||||||
|
elif status == "stopped":
|
||||||
|
if job.update_task_stopped(task_idx, running_time, billing) and billing > 0:
|
||||||
|
self.charge_beans(user, billing)
|
||||||
|
if job.job_db.status == 'done' or job.job_db.status == 'failed' or job.job_db.status == 'stopped':
|
||||||
|
del self.job_map[job_id]
|
||||||
|
|
||||||
# Get Batch job stdout or stderr from its file
|
# Get Batch job stdout or stderr from its file
|
||||||
def get_output(self, username, jobid, taskid, vnodeid, issue):
|
def get_output(self, username, jobid, taskid, vnodeid, issue):
|
||||||
|
|
|
@ -3,7 +3,7 @@ import time
|
||||||
import string
|
import string
|
||||||
import os
|
import os
|
||||||
import random, copy, subprocess
|
import random, copy, subprocess
|
||||||
import json
|
import json, math
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
# must import logger after initlogging, ugly
|
# must import logger after initlogging, ugly
|
||||||
|
@ -29,6 +29,7 @@ class Task():
|
||||||
self.id = task_id
|
self.id = task_id
|
||||||
self.username = username
|
self.username = username
|
||||||
self.status = WAITING
|
self.status = WAITING
|
||||||
|
self.failed_reason = ""
|
||||||
# if all the vnodes must be started at the same time
|
# if all the vnodes must be started at the same time
|
||||||
self.at_same_time = at_same_time
|
self.at_same_time = at_same_time
|
||||||
# priority the bigger the better
|
# priority the bigger the better
|
||||||
|
@ -46,6 +47,26 @@ class Task():
|
||||||
max_retry_count = task_info['max_retry_count']
|
max_retry_count = task_info['max_retry_count']
|
||||||
) for (index, task_info) in enumerate(task_infos)]
|
) for (index, task_info) in enumerate(task_infos)]
|
||||||
|
|
||||||
|
def get_billing(self):
|
||||||
|
billing_beans = 0
|
||||||
|
running_time = 0
|
||||||
|
cpu_price = 1 / 3600.0 # /core*s
|
||||||
|
mem_price = 1 / 3600.0 # /GB*s
|
||||||
|
disk_price = 1 / 3600.0 # /GB*s
|
||||||
|
gpu_price = 100 / 3600.0 # /core*s
|
||||||
|
for subtask in self.subtask_list:
|
||||||
|
tmp_time = subtask.running_time
|
||||||
|
cpu_beans = subtask.vnode_info.vnode.instance.cpu * tmp_time * cpu_price
|
||||||
|
mem_beans = subtask.vnode_info.vnode.instance.memory / 1024.0 * tmp_time * mem_price
|
||||||
|
disk_beans = subtask.vnode_info.vnode.instance.disk / 1024.0 * tmp_time * disk_price
|
||||||
|
gpu_beans = subtask.vnode_info.vnode.instance.gpu * tmp_time * gpu_price
|
||||||
|
logger.info("subtask:%s running_time=%f beans for: cpu=%f mem_beans=%f disk_beans=%f gpu_beans=%f"
|
||||||
|
%(self.id, tmp_time, cpu_beans, mem_beans, disk_beans, gpu_beans ))
|
||||||
|
beans = math.ceil(cpu_beans + mem_beans + disk_beans + gpu_beans)
|
||||||
|
running_time += tmp_time
|
||||||
|
billing_beans += beans
|
||||||
|
return running_time, billing_beans
|
||||||
|
|
||||||
def __lt__(self, other):
|
def __lt__(self, other):
|
||||||
return self.priority < other.priority
|
return self.priority < other.priority
|
||||||
|
|
||||||
|
@ -87,16 +108,18 @@ class SubTask():
|
||||||
self.task_started = False
|
self.task_started = False
|
||||||
self.start_at = 0
|
self.start_at = 0
|
||||||
self.end_at = 0
|
self.end_at = 0
|
||||||
|
self.running_time = 0
|
||||||
self.status = WAITING
|
self.status = WAITING
|
||||||
self.status_reason = ''
|
self.status_reason = ''
|
||||||
self.try_count = 0
|
self.try_count = 0
|
||||||
self.worker = None
|
self.worker = None
|
||||||
|
|
||||||
def waiting_for_retry(self):
|
def waiting_for_retry(self,reason=""):
|
||||||
self.try_count += 1
|
self.try_count += 1
|
||||||
self.status = WAITING if self.try_count <= self.max_retry_count else FAILED
|
self.status = WAITING if self.try_count <= self.max_retry_count else FAILED
|
||||||
if self.status == FAILED and self.root_task.at_same_time:
|
if self.status == FAILED:
|
||||||
self.root_task.status = FAILED
|
self.root_task.status = FAILED
|
||||||
|
self.failed_reason = reason
|
||||||
|
|
||||||
|
|
||||||
class TaskReporter(MasterServicer):
|
class TaskReporter(MasterServicer):
|
||||||
|
@ -123,7 +146,10 @@ class TaskMgr(threading.Thread):
|
||||||
self.task_queue = []
|
self.task_queue = []
|
||||||
self.lazy_append_list = []
|
self.lazy_append_list = []
|
||||||
self.lazy_delete_list = []
|
self.lazy_delete_list = []
|
||||||
|
self.lazy_stop_list = []
|
||||||
self.task_queue_lock = threading.Lock()
|
self.task_queue_lock = threading.Lock()
|
||||||
|
self.stop_lock = threading.Lock()
|
||||||
|
self.add_lock = threading.Lock()
|
||||||
#self.user_containers = {}
|
#self.user_containers = {}
|
||||||
|
|
||||||
self.scheduler_interval = scheduler_interval
|
self.scheduler_interval = scheduler_interval
|
||||||
|
@ -155,23 +181,21 @@ class TaskMgr(threading.Thread):
|
||||||
self.logger.info("Free nets addresses pool %s" % str(self.free_nets))
|
self.logger.info("Free nets addresses pool %s" % str(self.free_nets))
|
||||||
self.logger.info("Each Batch Net CIDR:%s"%(str(self.task_cidr)))
|
self.logger.info("Each Batch Net CIDR:%s"%(str(self.task_cidr)))
|
||||||
|
|
||||||
def queue_lock(f):
|
def data_lock(lockname):
|
||||||
@wraps(f)
|
def lock(f):
|
||||||
def new_f(self, *args, **kwargs):
|
@wraps(f)
|
||||||
self.task_queue_lock.acquire()
|
def new_f(self, *args, **kwargs):
|
||||||
result = f(self, *args, **kwargs)
|
lockobj = getattr(self,lockname)
|
||||||
self.task_queue_lock.release()
|
lockobj.acquire()
|
||||||
return result
|
try:
|
||||||
return new_f
|
result = f(self, *args, **kwargs)
|
||||||
|
except Exception as err:
|
||||||
def net_lock(f):
|
lockobj.release()
|
||||||
@wraps(f)
|
raise err
|
||||||
def new_f(self, *args, **kwargs):
|
lockobj.release()
|
||||||
self.network_lock.acquire()
|
return result
|
||||||
result = f(self, *args, **kwargs)
|
return new_f
|
||||||
self.network_lock.release()
|
return lock
|
||||||
return result
|
|
||||||
return new_f
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.serve()
|
self.serve()
|
||||||
|
@ -195,14 +219,36 @@ class TaskMgr(threading.Thread):
|
||||||
self.server.stop(0)
|
self.server.stop(0)
|
||||||
self.logger.info('[taskmgr_rpc] stop rpc server')
|
self.logger.info('[taskmgr_rpc] stop rpc server')
|
||||||
|
|
||||||
@queue_lock
|
@data_lock('task_queue_lock')
|
||||||
|
@data_lock('add_lock')
|
||||||
|
@data_lock('stop_lock')
|
||||||
def sort_out_task_queue(self):
|
def sort_out_task_queue(self):
|
||||||
|
|
||||||
|
for task in self.task_queue:
|
||||||
|
if task.id in self.lazy_stop_list:
|
||||||
|
self.stop_remove_task(task)
|
||||||
|
self.lazy_delete_list.append(task)
|
||||||
|
running_time, billing = task.get_billing()
|
||||||
|
self.logger.info('task %s stopped, running_time:%s billing:%d'%(task.id, str(running_time), billing))
|
||||||
|
running_time = math.ceil(running_time)
|
||||||
|
self.jobmgr.report(task.username, task.id,'stopped',running_time=running_time,billing=billing)
|
||||||
|
|
||||||
while self.lazy_delete_list:
|
while self.lazy_delete_list:
|
||||||
task = self.lazy_delete_list.pop(0)
|
task = self.lazy_delete_list.pop(0)
|
||||||
try:
|
try:
|
||||||
self.task_queue.remove(task)
|
self.task_queue.remove(task)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
self.logger.warning(str(err))
|
self.logger.warning(str(err))
|
||||||
|
|
||||||
|
new_append_list = []
|
||||||
|
for task in self.lazy_append_list:
|
||||||
|
if task.id in self.lazy_stop_list:
|
||||||
|
self.jobmgr.report(task.username, task.id, 'stopped')
|
||||||
|
else:
|
||||||
|
new_append_list.append(task)
|
||||||
|
|
||||||
|
self.lazy_append_list = new_append_list
|
||||||
|
self.lazy_stop_list.clear()
|
||||||
if self.lazy_append_list:
|
if self.lazy_append_list:
|
||||||
self.task_queue.extend(self.lazy_append_list)
|
self.task_queue.extend(self.lazy_append_list)
|
||||||
self.lazy_append_list.clear()
|
self.lazy_append_list.clear()
|
||||||
|
@ -240,6 +286,7 @@ class TaskMgr(threading.Thread):
|
||||||
return [False, e]
|
return [False, e]
|
||||||
subtask.vnode_started = False
|
subtask.vnode_started = False
|
||||||
subtask.end_at = time.time()
|
subtask.end_at = time.time()
|
||||||
|
subtask.running_time += subtask.end_at - subtask.start_at
|
||||||
self.cpu_usage[subtask.worker] -= subtask.vnode_info.vnode.instance.cpu
|
self.cpu_usage[subtask.worker] -= subtask.vnode_info.vnode.instance.cpu
|
||||||
self.gpu_usage[subtask.worker] -= subtask.vnode_info.vnode.instance.gpu
|
self.gpu_usage[subtask.worker] -= subtask.vnode_info.vnode.instance.gpu
|
||||||
return [True, '']
|
return [True, '']
|
||||||
|
@ -261,7 +308,7 @@ class TaskMgr(threading.Thread):
|
||||||
|
|
||||||
def stop_subtask(self, subtask):
|
def stop_subtask(self, subtask):
|
||||||
try:
|
try:
|
||||||
self.logger.info('[task_processor] Stoping task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
|
self.logger.info('[task_processor] Stopping task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
|
||||||
channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
|
channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
|
||||||
stub = WorkerStub(channel)
|
stub = WorkerStub(channel)
|
||||||
response = stub.stop_task(subtask.command_info)
|
response = stub.stop_task(subtask.command_info)
|
||||||
|
@ -275,14 +322,14 @@ class TaskMgr(threading.Thread):
|
||||||
subtask.task_started = False
|
subtask.task_started = False
|
||||||
return [True, '']
|
return [True, '']
|
||||||
|
|
||||||
@net_lock
|
@data_lock('network_lock')
|
||||||
def acquire_task_ips(self, task):
|
def acquire_task_ips(self, task):
|
||||||
self.logger.info("[acquire_task_ips] user(%s) task(%s) net(%s)" % (task.username, task.id, str(task.task_base_ip)))
|
self.logger.info("[acquire_task_ips] user(%s) task(%s) net(%s)" % (task.username, task.id, str(task.task_base_ip)))
|
||||||
if task.task_base_ip == None:
|
if task.task_base_ip == None:
|
||||||
task.task_base_ip = self.free_nets.pop(0)
|
task.task_base_ip = self.free_nets.pop(0)
|
||||||
return task.task_base_ip
|
return task.task_base_ip
|
||||||
|
|
||||||
@net_lock
|
@data_lock('network_lock')
|
||||||
def release_task_ips(self, task):
|
def release_task_ips(self, task):
|
||||||
self.logger.info("[release_task_ips] user(%s) task(%s) net(%s)" % (task.username, task.id, str(task.task_base_ip)))
|
self.logger.info("[release_task_ips] user(%s) task(%s) net(%s)" % (task.username, task.id, str(task.task_base_ip)))
|
||||||
if task.task_base_ip == None:
|
if task.task_base_ip == None:
|
||||||
|
@ -352,7 +399,8 @@ class TaskMgr(threading.Thread):
|
||||||
placed_workers.append(sub_task.worker)
|
placed_workers.append(sub_task.worker)
|
||||||
[success, msg] = self.start_vnode(sub_task)
|
[success, msg] = self.start_vnode(sub_task)
|
||||||
if not success:
|
if not success:
|
||||||
sub_task.waiting_for_retry()
|
sub_task.waiting_for_retry("Fail to start vnode.")
|
||||||
|
self.jobmgr.report(task.username, task.id, 'retrying', "Fail to start vnode.")
|
||||||
sub_task.worker = None
|
sub_task.worker = None
|
||||||
start_all_vnode_success = False
|
start_all_vnode_success = False
|
||||||
|
|
||||||
|
@ -371,7 +419,8 @@ class TaskMgr(threading.Thread):
|
||||||
if success:
|
if success:
|
||||||
sub_task.status = RUNNING
|
sub_task.status = RUNNING
|
||||||
else:
|
else:
|
||||||
sub_task.waiting_for_retry()
|
sub_task.waiting_for_retry("Failt to start task.")
|
||||||
|
self.jobmgr.report(task.username, task.id, 'retrying', "Fail to start task.")
|
||||||
|
|
||||||
def clear_sub_tasks(self, sub_task_list):
|
def clear_sub_tasks(self, sub_task_list):
|
||||||
for sub_task in sub_task_list:
|
for sub_task in sub_task_list:
|
||||||
|
@ -385,6 +434,10 @@ class TaskMgr(threading.Thread):
|
||||||
self.stop_vnode(sub_task)
|
self.stop_vnode(sub_task)
|
||||||
#pass
|
#pass
|
||||||
|
|
||||||
|
@data_lock('stop_lock')
|
||||||
|
def lazy_stop_task(self, taskid):
|
||||||
|
self.lazy_stop_list.append(taskid)
|
||||||
|
|
||||||
def stop_remove_task(self, task):
|
def stop_remove_task(self, task):
|
||||||
if task is None:
|
if task is None:
|
||||||
return
|
return
|
||||||
|
@ -392,7 +445,6 @@ class TaskMgr(threading.Thread):
|
||||||
self.clear_sub_tasks(task.subtask_list)
|
self.clear_sub_tasks(task.subtask_list)
|
||||||
self.release_task_ips(task)
|
self.release_task_ips(task)
|
||||||
self.remove_tasknet(task)
|
self.remove_tasknet(task)
|
||||||
self.lazy_delete_list.append(task)
|
|
||||||
|
|
||||||
def check_task_completed(self, task):
|
def check_task_completed(self, task):
|
||||||
if task.status == RUNNING or task.status == WAITING:
|
if task.status == RUNNING or task.status == WAITING:
|
||||||
|
@ -400,11 +452,15 @@ class TaskMgr(threading.Thread):
|
||||||
if sub_task.command_info != None and (sub_task.status == RUNNING or sub_task.status == WAITING):
|
if sub_task.command_info != None and (sub_task.status == RUNNING or sub_task.status == WAITING):
|
||||||
return False
|
return False
|
||||||
self.logger.info('task %s finished, status %d, subtasks: %s' % (task.id, task.status, str([sub_task.status for sub_task in task.subtask_list])))
|
self.logger.info('task %s finished, status %d, subtasks: %s' % (task.id, task.status, str([sub_task.status for sub_task in task.subtask_list])))
|
||||||
if task.at_same_time and task.status == FAILED:
|
|
||||||
self.jobmgr.report(task.username,task.id,"failed","",task.subtask_list[0].max_retry_count+1)
|
|
||||||
else:
|
|
||||||
self.jobmgr.report(task.username,task.id,'finished')
|
|
||||||
self.stop_remove_task(task)
|
self.stop_remove_task(task)
|
||||||
|
self.lazy_delete_list.append(task)
|
||||||
|
running_time, billing = task.get_billing()
|
||||||
|
self.logger.info('task %s running_time:%s billing:%d'%(task.id, str(running_time), billing))
|
||||||
|
running_time = math.ceil(running_time)
|
||||||
|
if task.status == FAILED:
|
||||||
|
self.jobmgr.report(task.username,task.id,"failed",task.failed_reason,task.subtask_list[0].max_retry_count+1, running_time, billing)
|
||||||
|
else:
|
||||||
|
self.jobmgr.report(task.username,task.id,'finished',running_time=running_time,billing=billing)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# this method is called when worker send heart-beat rpc request
|
# this method is called when worker send heart-beat rpc request
|
||||||
|
@ -430,12 +486,12 @@ class TaskMgr(threading.Thread):
|
||||||
sub_task.status_reason = report.errmsg
|
sub_task.status_reason = report.errmsg
|
||||||
|
|
||||||
if report.subTaskStatus == FAILED or report.subTaskStatus == TIMEOUT:
|
if report.subTaskStatus == FAILED or report.subTaskStatus == TIMEOUT:
|
||||||
sub_task.waiting_for_retry()
|
sub_task.waiting_for_retry(report.errmsg)
|
||||||
|
self.jobmgr.report(task.username, task.id, 'retrying', report.errmsg)
|
||||||
elif report.subTaskStatus == OUTPUTERROR:
|
elif report.subTaskStatus == OUTPUTERROR:
|
||||||
sub_task.status = FAILED
|
sub_task.status = FAILED
|
||||||
if task.at_same_time:
|
task.status = FAILED
|
||||||
task.status = FAILED
|
task.failed_reason = report.errmsg
|
||||||
self.clear_sub_task(sub_task)
|
|
||||||
elif report.subTaskStatus == COMPLETED:
|
elif report.subTaskStatus == COMPLETED:
|
||||||
self.clear_sub_task(sub_task)
|
self.clear_sub_task(sub_task)
|
||||||
|
|
||||||
|
@ -445,7 +501,7 @@ class TaskMgr(threading.Thread):
|
||||||
self.logger.info('[task_scheduler] scheduling... (%d tasks remains)' % len(self.task_queue))
|
self.logger.info('[task_scheduler] scheduling... (%d tasks remains)' % len(self.task_queue))
|
||||||
|
|
||||||
for task in self.task_queue:
|
for task in self.task_queue:
|
||||||
if task in self.lazy_delete_list:
|
if task in self.lazy_delete_list or task.id in self.lazy_stop_list:
|
||||||
continue
|
continue
|
||||||
self.logger.info('task %s sub_tasks %s' % (task.id, str([sub_task.status for sub_task in task.subtask_list])))
|
self.logger.info('task %s sub_tasks %s' % (task.id, str([sub_task.status for sub_task in task.subtask_list])))
|
||||||
if self.check_task_completed(task):
|
if self.check_task_completed(task):
|
||||||
|
@ -577,6 +633,7 @@ class TaskMgr(threading.Thread):
|
||||||
|
|
||||||
# save the task information into database
|
# save the task information into database
|
||||||
# called when jobmgr assign task to taskmgr
|
# called when jobmgr assign task to taskmgr
|
||||||
|
@data_lock('add_lock')
|
||||||
def add_task(self, username, taskid, json_task, task_priority=1):
|
def add_task(self, username, taskid, json_task, task_priority=1):
|
||||||
# decode json string to object defined in grpc
|
# decode json string to object defined in grpc
|
||||||
self.logger.info('[taskmgr add_task] receive task %s' % taskid)
|
self.logger.info('[taskmgr add_task] receive task %s' % taskid)
|
||||||
|
@ -654,12 +711,12 @@ class TaskMgr(threading.Thread):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@queue_lock
|
@data_lock('task_queue_lock')
|
||||||
def get_task_list(self):
|
def get_task_list(self):
|
||||||
return self.task_queue.copy()
|
return self.task_queue.copy()
|
||||||
|
|
||||||
|
|
||||||
@queue_lock
|
@data_lock('task_queue_lock')
|
||||||
def get_task(self, taskid):
|
def get_task(self, taskid):
|
||||||
for task in self.task_queue:
|
for task in self.task_queue:
|
||||||
if task.id == taskid:
|
if task.id == taskid:
|
||||||
|
|
|
@ -44,6 +44,7 @@ app.config['SQLALCHEMY_BINDS'] = {
|
||||||
'history': 'sqlite:///'+fsdir+'/global/sys/HistoryTable.db',
|
'history': 'sqlite:///'+fsdir+'/global/sys/HistoryTable.db',
|
||||||
'beansapplication': 'sqlite:///'+fsdir+'/global/sys/BeansApplication.db',
|
'beansapplication': 'sqlite:///'+fsdir+'/global/sys/BeansApplication.db',
|
||||||
'system': 'sqlite:///'+fsdir+'/global/sys/System.db',
|
'system': 'sqlite:///'+fsdir+'/global/sys/System.db',
|
||||||
|
'batch':'sqlite:///'+fsdir+'/global/sys/Batch.db?check_same_thread=False',
|
||||||
'login': 'sqlite:///'+fsdir+'/global/sys/Login.db'
|
'login': 'sqlite:///'+fsdir+'/global/sys/Login.db'
|
||||||
}
|
}
|
||||||
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True
|
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True
|
||||||
|
@ -435,3 +436,90 @@ class Image(db.Model):
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "{\"id\":\"%d\",\"imagename\":\"%s\",\"hasPrivate\":\"%s\",\"hasPublic\":\"%s\",\"ownername\":\"%s\",\"updatetime\":\"%s\",\"description\":\"%s\"}" % (self.id,self.imagename,str(self.hasPrivate),str(self.hasPublic),self.create_time.strftime("%Y-%m-%d %H:%M:%S"),self.ownername,self.description)
|
return "{\"id\":\"%d\",\"imagename\":\"%s\",\"hasPrivate\":\"%s\",\"hasPublic\":\"%s\",\"ownername\":\"%s\",\"updatetime\":\"%s\",\"description\":\"%s\"}" % (self.id,self.imagename,str(self.hasPrivate),str(self.hasPublic),self.create_time.strftime("%Y-%m-%d %H:%M:%S"),self.ownername,self.description)
|
||||||
|
|
||||||
|
class Batchjob(db.Model):
|
||||||
|
__bind_key__ = 'batch'
|
||||||
|
id = db.Column(db.String(9), primary_key=True)
|
||||||
|
username = db.Column(db.String(10))
|
||||||
|
name = db.Column(db.String(30))
|
||||||
|
priority = db.Column(db.Integer)
|
||||||
|
status = db.Column(db.String(10))
|
||||||
|
failed_reason = db.Column(db.Text)
|
||||||
|
create_time = db.Column(db.DateTime)
|
||||||
|
end_time = db.Column(db.DateTime)
|
||||||
|
billing = db.Column(db.Integer)
|
||||||
|
tasks = db.relationship('Batchtask', backref='batchjob', lazy='dynamic')
|
||||||
|
|
||||||
|
def __init__(self,id,username,name,priority):
|
||||||
|
self.id = id
|
||||||
|
self.username = username
|
||||||
|
self.name = name
|
||||||
|
self.priority = priority
|
||||||
|
self.status = "pending"
|
||||||
|
self.failed_reason = ""
|
||||||
|
self.create_time = datetime.now()
|
||||||
|
self.end_time = None
|
||||||
|
self.billing = 0
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
info = {}
|
||||||
|
info['job_id'] = self.id
|
||||||
|
info['username'] = self.username
|
||||||
|
info['job_name'] = self.name
|
||||||
|
info['priority'] = self.priority
|
||||||
|
info['status'] = self.status
|
||||||
|
info['failed_reason'] = self.failed_reason
|
||||||
|
info['create_time'] = self.create_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
if self.end_time is None:
|
||||||
|
info['end_time'] = "------"
|
||||||
|
else:
|
||||||
|
info['end_time'] = self.end_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
info['billing'] = self.billing
|
||||||
|
return json.dumps(info)
|
||||||
|
|
||||||
|
class Batchtask(db.Model):
|
||||||
|
__bind_key__ = 'batch'
|
||||||
|
id = db.Column(db.String(15), primary_key=True)
|
||||||
|
idx = db.Column(db.String(10))
|
||||||
|
jobid = db.Column(db.String(9), db.ForeignKey('batchjob.id'))
|
||||||
|
status = db.Column(db.String(15))
|
||||||
|
failed_reason = db.Column(db.Text)
|
||||||
|
start_time = db.Column(db.DateTime)
|
||||||
|
end_time = db.Column(db.DateTime)
|
||||||
|
running_time = db.Column(db.Integer)
|
||||||
|
billing = db.Column(db.Integer)
|
||||||
|
config = db.Column(db.Text)
|
||||||
|
tried_times = db.Column(db.Integer)
|
||||||
|
|
||||||
|
def __init__(self, id, idx, config):
|
||||||
|
self.id = id
|
||||||
|
self.idx = idx
|
||||||
|
self.status = "pending"
|
||||||
|
self.failed_reason = ""
|
||||||
|
self.start_time = None
|
||||||
|
self.end_time = None
|
||||||
|
self.running_time = 0
|
||||||
|
self.billing = 0
|
||||||
|
self.config = json.dumps(config)
|
||||||
|
self.tried_times = 0
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
info = {}
|
||||||
|
info['id'] = self.id
|
||||||
|
info['idx'] = self.idx
|
||||||
|
info['jobid'] = self.jobid
|
||||||
|
info['status'] = self.status
|
||||||
|
info['failed_reason'] = self.failed_reason
|
||||||
|
if self.start_time is None:
|
||||||
|
info['start_time'] = "------"
|
||||||
|
else:
|
||||||
|
info['start_time'] = self.start_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
if self.end_time is None:
|
||||||
|
info['end_time'] = "------"
|
||||||
|
else:
|
||||||
|
info['end_time'] = self.end_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
info['running_time'] = self.running_time
|
||||||
|
info['billing'] = self.billing
|
||||||
|
info['config'] = json.loads(self.config)
|
||||||
|
info['tried_times'] = self.tried_times
|
||||||
|
return json.dumps(info)
|
||||||
|
|
|
@ -430,6 +430,10 @@ class portcontrol(object):
|
||||||
ports_lock.release()
|
ports_lock.release()
|
||||||
try:
|
try:
|
||||||
subprocess.run(['iptables','-t','nat','-A','PREROUTING','-p','tcp','--dport',str(free_port),"-j","DNAT",'--to-destination','%s:%s'%(container_ip,container_port)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True)
|
subprocess.run(['iptables','-t','nat','-A','PREROUTING','-p','tcp','--dport',str(free_port),"-j","DNAT",'--to-destination','%s:%s'%(container_ip,container_port)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True)
|
||||||
|
except subprocess.CalledProcessError as suberror:
|
||||||
|
return [False, "set port mapping failed : %s" % suberror.stdout.decode('utf-8')]
|
||||||
|
try:
|
||||||
|
subprocess.run(['iptables','-t','nat','-A','PREROUTING','-p','udp','--dport',str(free_port),"-j","DNAT",'--to-destination','%s:%s'%(container_ip,container_port)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True)
|
||||||
return [True, str(free_port)]
|
return [True, str(free_port)]
|
||||||
except subprocess.CalledProcessError as suberror:
|
except subprocess.CalledProcessError as suberror:
|
||||||
return [False, "set port mapping failed : %s" % suberror.stdout.decode('utf-8')]
|
return [False, "set port mapping failed : %s" % suberror.stdout.decode('utf-8')]
|
||||||
|
@ -447,6 +451,10 @@ class portcontrol(object):
|
||||||
subprocess.run(['iptables','-t','nat','-D','PREROUTING','-p','tcp','--dport',str(free_port),"-j","DNAT",'--to-destination','%s:%s'%(container_ip,container_port)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True)
|
subprocess.run(['iptables','-t','nat','-D','PREROUTING','-p','tcp','--dport',str(free_port),"-j","DNAT",'--to-destination','%s:%s'%(container_ip,container_port)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True)
|
||||||
except subprocess.CalledProcessError as suberror:
|
except subprocess.CalledProcessError as suberror:
|
||||||
return [False, "release port mapping failed : %s" % suberror.stdout.decode('utf-8')]
|
return [False, "release port mapping failed : %s" % suberror.stdout.decode('utf-8')]
|
||||||
|
try:
|
||||||
|
subprocess.run(['iptables','-t','nat','-D','PREROUTING','-p','udp','--dport',str(free_port),"-j","DNAT",'--to-destination','%s:%s'%(container_ip,container_port)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True)
|
||||||
|
except subprocess.CalledProcessError as suberror:
|
||||||
|
return [False, "release port mapping failed : %s" % suberror.stdout.decode('utf-8')]
|
||||||
ports_lock.acquire()
|
ports_lock.acquire()
|
||||||
free_ports[free_port] = True
|
free_ports[free_port] = True
|
||||||
allocated_ports[container_name].pop(container_port)
|
allocated_ports[container_name].pop(container_port)
|
||||||
|
|
|
@ -432,7 +432,7 @@ class TaskWorker(rpc_pb2_grpc.WorkerServicer):
|
||||||
self.add_msg(taskid,username,vnodeid,rpc_pb2.COMPLETED,token,"")
|
self.add_msg(taskid,username,vnodeid,rpc_pb2.COMPLETED,token,"")
|
||||||
else:
|
else:
|
||||||
logger.info("Task(%s-%s-%s) failed." % (str(taskid),str(vnodeid),token))
|
logger.info("Task(%s-%s-%s) failed." % (str(taskid),str(vnodeid),token))
|
||||||
self.add_msg(taskid,username,vnodeid,rpc_pb2.FAILED,token,"")
|
self.add_msg(taskid,username,vnodeid,rpc_pb2.FAILED,token,"Runtime Error. More information in stderr log.")
|
||||||
|
|
||||||
def add_msg(self,taskid,username,vnodeid,status,token,errmsg):
|
def add_msg(self,taskid,username,vnodeid,status,token,errmsg):
|
||||||
self.msgslock.acquire()
|
self.msgslock.acquire()
|
||||||
|
|
|
@ -43,6 +43,14 @@
|
||||||
<div class="form-group"><label class="col-sm-2 control-label">Job Name</label>
|
<div class="form-group"><label class="col-sm-2 control-label">Job Name</label>
|
||||||
<div class="col-sm-10"><input type="text" class="form-control" name="jobName" id="job_name" required></div>
|
<div class="col-sm-10"><input type="text" class="form-control" name="jobName" id="job_name" required></div>
|
||||||
</div>
|
</div>
|
||||||
|
<br/>
|
||||||
|
<div class="form-group"><label class="col-sm-2 control-label">Location</label>
|
||||||
|
<div class="col-sm-10"><select id="masterselector" class="form-control">
|
||||||
|
{% for master in masterips %}
|
||||||
|
<option value="{{master.split("@")[0]}}">{{master.split("@")[1]}}</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select></div>
|
||||||
|
</div>
|
||||||
<div class="hr-line-dashed"></div>
|
<div class="hr-line-dashed"></div>
|
||||||
<br/>
|
<br/>
|
||||||
<div class="form-group"><label class="col-sm-2 control-label">Priority</label>
|
<div class="form-group"><label class="col-sm-2 control-label">Priority</label>
|
||||||
|
@ -64,7 +72,7 @@
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<div class="col-sm-4 col-sm-offset-2">
|
<div class="col-sm-4 col-sm-offset-2">
|
||||||
<button class="btn btn-primary" type="button" id="add_task" class="btn btn-box-tool" title="add a task">Add Task <i class="fa fa-plus"></i></button>
|
<button class="btn btn-primary" type="button" id="add_task" class="btn btn-box-tool" title="add a task">Add Task <i class="fa fa-plus"></i></button>
|
||||||
<button class="btn btn-primary" type="submit">Create</button>
|
<button class="btn btn-primary" type="submit">Create Job</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -96,6 +104,11 @@
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
var task_number = 0;
|
var task_number = 0;
|
||||||
var mapping_number = 0;
|
var mapping_number = 0;
|
||||||
|
var images_text = "{{ images }}";
|
||||||
|
images_text = images_text.replace(/'/g,"\"");
|
||||||
|
console.log(images_text);
|
||||||
|
var images_info = JSON.parse(images_text);
|
||||||
|
console.log(images_info);
|
||||||
$().ready(function() {
|
$().ready(function() {
|
||||||
$("#form").validate();
|
$("#form").validate();
|
||||||
});
|
});
|
||||||
|
@ -132,8 +145,69 @@
|
||||||
+'Remove</button></div>';
|
+'Remove</button></div>';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$("select#masterselector").change(function() {
|
||||||
|
var masterip=$(this).children('option:selected').val();
|
||||||
|
$("#form").attr("action","/batch_job/"+ masterip +"/add/");
|
||||||
|
var mastername=$(this).children('option:selected').html();
|
||||||
|
console.log(masterip);
|
||||||
|
var host = window.location.host;
|
||||||
|
var images = images_info;
|
||||||
|
for(var tnum = 1; tnum<=task_number; ++tnum)
|
||||||
|
{
|
||||||
|
var imagehtml =
|
||||||
|
"<thead>"
|
||||||
|
+"<tr>"
|
||||||
|
+"<th>ImageName</th>"
|
||||||
|
+"<th>Type</th>"
|
||||||
|
+"<th>Owner</th>"
|
||||||
|
+"<th>Size</th>"
|
||||||
|
+"<th>Description</th>"
|
||||||
|
+"<th>Choose</th>"
|
||||||
|
+"</tr>"
|
||||||
|
+"</thead>"
|
||||||
|
+"<tbody>"
|
||||||
|
+"<tr>"
|
||||||
|
+"<td>base</td>"
|
||||||
|
+"<td>public</td>"
|
||||||
|
+"<td>docklet</td>"
|
||||||
|
+"<td>--</td>"
|
||||||
|
+"<td>A base image for you</td>"
|
||||||
|
+'<td><div class="i-checks"><label><input type="radio" name="image_' + tnum + '" value="base_base_base" checked="checked"></label></div></td>'
|
||||||
|
+"</tr>";
|
||||||
|
for(var index in images[masterip].private) {
|
||||||
|
var image = images[masterip].private[index];
|
||||||
|
imagehtml +=
|
||||||
|
"<tr>"
|
||||||
|
+"<td>"+image.name+"</td>"
|
||||||
|
+"<td>private</td>"
|
||||||
|
+"<td>{{user}}</td>"
|
||||||
|
+"<td>"+image.size_format+"</td>"
|
||||||
|
+'<td><a href="/image/' + masterip + '/description/' + image.name + '_' + '{{user}}' + '_private/" target="_blank">' + image.description + '</a></td>'
|
||||||
|
+'<td><div class="i-checks"><label><input type="radio" name="image_' + tnum + '" value="'+image.name+'_{{user}}_private"><label></div></td>'
|
||||||
|
+"</tr>";
|
||||||
|
}
|
||||||
|
for(var p_user in images[masterip].public) {
|
||||||
|
for(var index in images[masterip].public[p_user]) {
|
||||||
|
image=images[masterip].public[p_user][index];
|
||||||
|
imagehtml +=
|
||||||
|
"<tr>"
|
||||||
|
+"<td>"+image.name+"</td>"
|
||||||
|
+"<td>public</td>"
|
||||||
|
+"<td>" + p_user + "</td>"
|
||||||
|
+"<td>"+image.size_format+"</td>"
|
||||||
|
+'<td><a href="/image/' + masterip + '/description/' + image.name + "_" + p_user + '_public/" target="_blank">' + image.description + '</a></td>'
|
||||||
|
+'<td><div class="i-checks"><label><input type="radio" name="image_' + tnum + '" value="'+image.name+'_{{p_user}}_public"><label></div></td>'
|
||||||
|
+"</tr>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
imagehtml += "</tbody>";
|
||||||
|
$("#imagetable"+tnum).html(imagehtml);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
function addTask() {
|
function addTask() {
|
||||||
task_number += 1;
|
task_number += 1;
|
||||||
|
var masterip=$("select#masterselector").children('option:selected').val();
|
||||||
mapping_number = 0;
|
mapping_number = 0;
|
||||||
var task_html = '';
|
var task_html = '';
|
||||||
task_html +=
|
task_html +=
|
||||||
|
@ -161,14 +235,14 @@
|
||||||
+'<div class="col-sm-3"><input type="number" class="form-control" name="diskSetting_' + task_number + '" id="diskSetting_' + task_number + '" value= 1024 min="128" max="10000" required/>'
|
+'<div class="col-sm-3"><input type="number" class="form-control" name="diskSetting_' + task_number + '" id="diskSetting_' + task_number + '" value= 1024 min="128" max="10000" required/>'
|
||||||
+'</div>MB</div>'
|
+'</div>MB</div>'
|
||||||
+'<div class="form-group">'
|
+'<div class="form-group">'
|
||||||
+'<label class="col-sm-2 control-label">VNode Count</label>'
|
+'<label class="col-sm-2 control-label">VNode Number</label>'
|
||||||
+'<div class="col-sm-3"><input type="number" class="form-control" name="vnodeCount_' + task_number + '" id="vnodeCount_' + task_number + '" value= 1 min="1" max="14" required/>'
|
+'<div class="col-sm-3"><input type="number" class="form-control" name="vnodeCount_' + task_number + '" id="vnodeCount_' + task_number + '" value= 1 min="1" max="14" required/>'
|
||||||
+'</div>'
|
+'</div>'
|
||||||
+'<label class="col-sm-2 control-label">Max Retry Count</label>'
|
+'<label class="col-sm-2 control-label">Max Retry Times</label>'
|
||||||
+'<div class="col-sm-3"><input type="number" class="form-control" name="retryCount_' + task_number + '" id="retryCount_' + task_number + '" value= 1 min="0" max="5" required/>'
|
+'<div class="col-sm-3"><input type="number" class="form-control" name="retryCount_' + task_number + '" id="retryCount_' + task_number + '" value= 1 min="0" max="5" required/>'
|
||||||
+'</div></div>'
|
+'</div></div>'
|
||||||
+'<div class="form-group">'
|
+'<div class="form-group">'
|
||||||
+'<label class="col-sm-2 control-label">Source Code Address</label>'
|
+'<label class="col-sm-2 control-label">Running Path</label>'
|
||||||
+'<div class="col-sm-3"><input type="text" class="form-control" name="srcAddr_' + task_number + '" id="srcAddr_' + task_number + '" value="/root" required/>'
|
+'<div class="col-sm-3"><input type="text" class="form-control" name="srcAddr_' + task_number + '" id="srcAddr_' + task_number + '" value="/root" required/>'
|
||||||
+'</div>'
|
+'</div>'
|
||||||
+'<label class="col-sm-2 control-label">Expire Time</label>'
|
+'<label class="col-sm-2 control-label">Expire Time</label>'
|
||||||
|
@ -195,50 +269,60 @@
|
||||||
+'<label class="col-sm-2 control-label">Start at the Same Time</label>'
|
+'<label class="col-sm-2 control-label">Start at the Same Time</label>'
|
||||||
+'<div class="col-sm-3"><input type="checkbox" name="atSameTime_' + task_number + '" checked="checked"/>'
|
+'<div class="col-sm-3"><input type="checkbox" name="atSameTime_' + task_number + '" checked="checked"/>'
|
||||||
+'</div></div>'
|
+'</div></div>'
|
||||||
+'<div class="form-group"><label class="col-sm-2 control-label">Image Choose</label>'
|
var images = images_info
|
||||||
+'<div class="col-sm-10">'
|
task_html +=
|
||||||
+'<table id="imagetable" class="table table-striped table-bordered table-hover table-image" >'
|
'<div class="form-group"><label class="col-sm-2 control-label">Image Choose</label>'
|
||||||
+'<thead>'
|
+'<div class="col-sm-10">'
|
||||||
+'<tr>'
|
+'<table id="imagetable' + task_number +'" class="table table-striped table-bordered table-hover table-image" >'
|
||||||
+'<th>ImageName</th>'
|
+"<thead>"
|
||||||
+'<th>Type</th>'
|
+"<tr>"
|
||||||
+'<th>Owner</th>'
|
+"<th>ImageName</th>"
|
||||||
+'<th>Description</th>'
|
+"<th>Type</th>"
|
||||||
+'<th>Choose</th>'
|
+"<th>Owner</th>"
|
||||||
+'</tr>'
|
+"<th>Size</th>"
|
||||||
+'</thead>'
|
+"<th>Description</th>"
|
||||||
+'<tbody>'
|
+"<th>Choose</th>"
|
||||||
+'<tr>'
|
+"</tr>"
|
||||||
+'<td>base</td>'
|
+"</thead>"
|
||||||
+'<td>public</td>'
|
+"<tbody>"
|
||||||
+'<td>docklet</td>'
|
+"<tr>"
|
||||||
+'<td>A base image for you</td>'
|
+"<td>base</td>"
|
||||||
+'<td><div class="i-checks"><label><input type="radio" name="image_' + task_number + '" value="base_base_base" checked="checked"></label></div></td>'
|
+"<td>public</td>"
|
||||||
+'</tr>'
|
+"<td>docklet</td>"
|
||||||
+'{% for image in images['private'] %}'
|
+"<td>--</td>"
|
||||||
+'<tr>'
|
+"<td>A base image for you</td>"
|
||||||
+'<td>{{image['name']}}</td>'
|
+'<td><div class="i-checks"><label><input type="radio" name="image_' + task_number + '" value="base_base_base" checked="checked"></label></div></td>'
|
||||||
+'<td>private</td>'
|
+"</tr>";
|
||||||
+'<td>{{user}}</td>'
|
for(var index in images[masterip].private) {
|
||||||
+'<td><a href="/image/{{masterips[0].split("@")[1]}}/description/{{image['name']}}_{{user}}_private/" target="_blank">{{image['description']}}</a></td>'
|
var image = images[masterip].private[index];
|
||||||
+'<td><div class="i-checks"><label><input type="radio" name="image_' + task_number + '" value="{{image['name']}}_{{user}}_private"></label></div></td>'
|
task_html +=
|
||||||
+'</tr>'
|
"<tr>"
|
||||||
+'{% endfor %}'
|
+"<td>"+image.name+"</td>"
|
||||||
+'{% for p_user,p_images in images['public'].items() %}'
|
+"<td>private</td>"
|
||||||
+'{% for image in p_images %}'
|
+"<td>{{user}}</td>"
|
||||||
+'<tr>'
|
+"<td>"+image.size_format+"</td>"
|
||||||
+'<td>{{image['name']}}</td>'
|
+'<td><a href="/image/' + masterip + '/description/' + image.name + '_' + '{{user}}' + '_private/" target="_blank">' + image.description + '</a></td>'
|
||||||
+'<td>public</td>'
|
+'<td><div class="i-checks"><label><input type="radio" name="image_' + task_number + '" value="'+image.name+'_{{user}}_private"><label></div></td>'
|
||||||
+'<td>{{p_user}}</td>'
|
+"</tr>";
|
||||||
+'<td><a href="/image/{{masterips[0].split("@")[1]}}/description/{{image['name']}}_{{p_user}}_public/" target="_blank">{{image['description']}}</a></td>'
|
}
|
||||||
+'<td><div class="i-checks"><label><input type="radio" name="image_' + task_number + '" value="{{image['name']}}_{{p_user}}_public"></label></div></td>'
|
for(var p_user in images[masterip].public) {
|
||||||
+'</tr>'
|
for(var index in images[masterip].public[p_user]) {
|
||||||
+'{% endfor %}'
|
image=images[masterip].public[p_user][index];
|
||||||
+'{% endfor %}'
|
task_html +=
|
||||||
+'</tbody>'
|
"<tr>"
|
||||||
+'</table>'
|
+"<td>"+image.name+"</td>"
|
||||||
+'</div>'
|
+"<td>public</td>"
|
||||||
+'</div>'
|
+"<td>" + p_user + "</td>"
|
||||||
|
+"<td>"+image.size_format+"</td>"
|
||||||
|
+'<td><a href="/image/' + masterip + '/description/' + image.name + "_" + p_user + '_public/" target="_blank">' + image.description + '</a></td>'
|
||||||
|
+'<td><div class="i-checks"><label><input type="radio" name="image_' + task_number + '" value="'+image.name+'_{{p_user}}_public"><label></div></td>'
|
||||||
|
+"</tr>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
task_html +=
|
||||||
|
'</tbody></table>'
|
||||||
|
+'</div>'
|
||||||
|
+'</div>'
|
||||||
+'<div class="form-group">'
|
+'<div class="form-group">'
|
||||||
+'<span>'
|
+'<span>'
|
||||||
+'<label class="col-sm-2 contril-label">Exteranl Storage Mapping</label>'
|
+'<label class="col-sm-2 contril-label">Exteranl Storage Mapping</label>'
|
||||||
|
|
|
@ -0,0 +1,238 @@
|
||||||
|
{% extends 'base_AdminLTE.html' %}
|
||||||
|
|
||||||
|
{% block title %}Docklet | Batch Job Info{% endblock %}
|
||||||
|
|
||||||
|
{% block panel_title %}Info for {{ jobinfo['job_id'] }}{% endblock %}
|
||||||
|
|
||||||
|
{% block css_src %}
|
||||||
|
<link href="//cdn.bootcss.com/datatables/1.10.11/css/dataTables.bootstrap.min.css" rel="stylesheet">
|
||||||
|
<link href="//cdn.bootcss.com/datatables/1.10.11/css/jquery.dataTables_themeroller.css" rel="stylesheet">
|
||||||
|
<link href="/static/dist/css/modalconfig.css" rel="stylesheet">
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block panel_list %}
|
||||||
|
<ol class="breadcrumb">
|
||||||
|
<li>
|
||||||
|
<a href="/dashboard/"><i class="fa fa-dashboard"></i>Home</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href='/batch_jobs/'>Batch Job</a>
|
||||||
|
</li>
|
||||||
|
<li class='active'>
|
||||||
|
<strong>Info</strong>
|
||||||
|
</li>
|
||||||
|
</ol>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-12">
|
||||||
|
<div class="box box-info">
|
||||||
|
<div class="box-header with-border">
|
||||||
|
<h3 class="box-title">Overview</h3>
|
||||||
|
|
||||||
|
<div class="box-tools pull-right">
|
||||||
|
<button type="button" class="btn btn-box-tool" data-widget="collapse"><i class="fa fa-minus"></i>
|
||||||
|
</button>
|
||||||
|
<button type="button" class="btn btn-box-tool" data-widget="remove"><i class="fa fa-times"></i></button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="box-body table-responsive">
|
||||||
|
<table class="table table-bordered">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Job ID</th>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Priority</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Create Time</th>
|
||||||
|
<th>End Time</th>
|
||||||
|
<th>Billing</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>{{ jobinfo['job_id'] }}</td>
|
||||||
|
<td>{{ jobinfo['job_name'] }}</td>
|
||||||
|
<td>{{ jobinfo['priority'] }}</td>
|
||||||
|
<td>{{ jobinfo['status'] }}</td>
|
||||||
|
<td>{{ jobinfo['create_time'] }}</td>
|
||||||
|
<td>{{ jobinfo['end_time'] }}</td>
|
||||||
|
<td>{{ jobinfo['billing'] }} <img src='/static/img/bean.png' /></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-12">
|
||||||
|
<div class="box box-info">
|
||||||
|
<div class="box-header with-border">
|
||||||
|
<h3 class="box-title">Tasks Overview</h3>
|
||||||
|
|
||||||
|
<div class="box-tools pull-right">
|
||||||
|
<button type="button" class="btn btn-box-tool" data-widget="collapse"><i class="fa fa-minus"></i>
|
||||||
|
</button>
|
||||||
|
<button type="button" class="btn btn-box-tool" data-widget="remove"><i class="fa fa-times"></i></button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="box-body table-responsive">
|
||||||
|
<table width="100%" cellspacing="0" style="margin: 0 auto;" id="table-tasks" class="table table-striped table-bordered table-hover">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Task Index</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Failed Reason(if fails)</th>
|
||||||
|
<th>Tried Times</th>
|
||||||
|
<th>Start Time</th>
|
||||||
|
<th>End Time</th>
|
||||||
|
<th>Total Running Time</th>
|
||||||
|
<th>Billing</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for task in jobinfo['tasks'] %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ task['idx'] }}</td>
|
||||||
|
<td>{{ task['status'] }}</td>
|
||||||
|
<td>{{ task['failed_reason'] }}</td>
|
||||||
|
<td>{{ task['tried_times'] }}</td>
|
||||||
|
<td>{{ task['start_time'] }}</td>
|
||||||
|
<td>{{ task['end_time'] }}</td>
|
||||||
|
<td>{{ task['running_time'] }} s</td>
|
||||||
|
<td>{{ task['billing'] }} <img src='/static/img/bean.png' /></td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-12">
|
||||||
|
<div class="box box-info">
|
||||||
|
<div class="box-header with-border">
|
||||||
|
<h3 class="box-title">Tasks Configs</h3>
|
||||||
|
|
||||||
|
<div class="box-tools pull-right">
|
||||||
|
<button type="button" class="btn btn-box-tool" data-widget="collapse"><i class="fa fa-minus"></i>
|
||||||
|
</button>
|
||||||
|
<button type="button" class="btn btn-box-tool" data-widget="remove"><i class="fa fa-times"></i></button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="box-body">
|
||||||
|
{% for task in jobinfo['tasks'] %}
|
||||||
|
<div class="panel panel-default" id="task_pannel_{{ task['idx'] }}">
|
||||||
|
<div class="panel-heading">
|
||||||
|
<h4 class="panel-title">
|
||||||
|
<a data-toggle="collapse" data-panel="#accordion" href="#collapse{{ task['idx'] }}">
|
||||||
|
Task #{{ task['idx'] }}
|
||||||
|
</a>
|
||||||
|
</h4>
|
||||||
|
</div>
|
||||||
|
<div id="collapse{{ task['idx'] }}" class="panel-collapse collapse in">
|
||||||
|
<div class="panel-body">
|
||||||
|
<div class="table-responsive">
|
||||||
|
<table class="table table-bordered table-hover">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>CPU Cores</th>
|
||||||
|
<th>Memory</th>
|
||||||
|
<th>GPU</th>
|
||||||
|
<th>Disk</th>
|
||||||
|
<th>VNode Number</th>
|
||||||
|
<th>Max Retry Times</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>{{ task['config']['cpuSetting'] }}</td>
|
||||||
|
<td>{{ task['config']['memorySetting'] }} MB</td>
|
||||||
|
<td>{{ task['config']['gpuSetting'] }}</td>
|
||||||
|
<td>{{ task['config']['diskSetting'] }} MB</td>
|
||||||
|
<td>{{ task['config']['vnodeCount'] }}</td>
|
||||||
|
<td>{{ task['config']['retryCount'] }}</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Running Path</th>
|
||||||
|
<th>Expire Time</th>
|
||||||
|
<th>Stdout Redirect Path</th>
|
||||||
|
<th>Stderr Redirect Path</th>
|
||||||
|
<th>Dependency</th>
|
||||||
|
<th>Command</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>{{ task['config']['srcAddr'] }}</td>
|
||||||
|
<td>{{ task['config']['expTime'] }} seconds</td>
|
||||||
|
<td>{{ task['config']['stdOutRedPth'] }}</td>
|
||||||
|
<td>{{ task['config']['stdErrRedPth'] }}</td>
|
||||||
|
<td>{{ task['config']['dependency'] }}</td>
|
||||||
|
<td>{{ task['config']['command'] }}</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Run on</th>
|
||||||
|
<th>Start at the Same Time</th>
|
||||||
|
<th>Image Name</th>
|
||||||
|
<th>Image Owner</th>
|
||||||
|
<th>Image Type</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
{% if task['config']['runon'] == 'all' %}
|
||||||
|
<td>all vnodes</td>
|
||||||
|
{% else %}
|
||||||
|
<td>master vnode</td>
|
||||||
|
{% endif %}
|
||||||
|
{% if 'atSameTime' in task['config'].keys() %}
|
||||||
|
<td>True</td>
|
||||||
|
{% else %}
|
||||||
|
<td>False</td>
|
||||||
|
{% endif %}
|
||||||
|
{% if task['config']['image'] == 'base_base_base' %}
|
||||||
|
<td>base</td>
|
||||||
|
<td>docklet</td>
|
||||||
|
<td>public</td>
|
||||||
|
{% else %}
|
||||||
|
<td>{{ task['config']['image'].split('_')[0] }}</td>
|
||||||
|
<td>{{ task['config']['image'].split('_')[1] }}</td>
|
||||||
|
<td>{{ task['config']['image'].split('_')[2] }}</td>
|
||||||
|
{% endif %}
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block script_src %}
|
||||||
|
<script src="//cdn.bootcss.com/datatables/1.10.11/js/jquery.dataTables.min.js"></script>
|
||||||
|
<script src="//cdn.bootcss.com/datatables/1.10.11/js/dataTables.bootstrap.min.js"></script>
|
||||||
|
|
||||||
|
<script type="text/javascript">
|
||||||
|
$(document).ready(function() {
|
||||||
|
$("#table-tasks").DataTable({"scrollX":true,"order":[[ 0, "asc" ]]});
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
|
@ -3,6 +3,13 @@
|
||||||
|
|
||||||
{% block panel_title %}Batch Job{% endblock %}
|
{% block panel_title %}Batch Job{% endblock %}
|
||||||
|
|
||||||
|
{% block css_src %}
|
||||||
|
<link href="//cdn.bootcss.com/datatables/1.10.11/css/dataTables.bootstrap.min.css" rel="stylesheet">
|
||||||
|
<link href="//cdn.bootcss.com/datatables/1.10.11/css/jquery.dataTables_themeroller.css" rel="stylesheet">
|
||||||
|
<link href="/static/dist/css/modalconfig.css" rel="stylesheet">
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
{% block panel_list %}
|
{% block panel_list %}
|
||||||
<ol class="breadcrumb">
|
<ol class="breadcrumb">
|
||||||
<li>
|
<li>
|
||||||
|
@ -31,8 +38,9 @@
|
||||||
<p>
|
<p>
|
||||||
<a href="/batch_job/create/"><button type="button" class="btn btn-primary btn-sm"><i class="fa fa-plus"></i> Create Batch Job</button></a>
|
<a href="/batch_job/create/"><button type="button" class="btn btn-primary btn-sm"><i class="fa fa-plus"></i> Create Batch Job</button></a>
|
||||||
</p>
|
</p>
|
||||||
{% for job_info in job_list %}
|
{% for master in masterips %}
|
||||||
<div class="modal inmodal" id='OutputModal_{{ job_info['job_id'] }}' tabindex="-1" role="dialog" aria-hidden="true">
|
{% for job_info in job_list[master.split('@')[0]] %}
|
||||||
|
<div class="modal inmodal" id='OutputModal_{{ master.split('@')[1] }}_{{ job_info['job_id'] }}' tabindex="-1" role="dialog" aria-hidden="true">
|
||||||
<div class="modal-dialog">
|
<div class="modal-dialog">
|
||||||
<div class="modal-content animated fadeIn">
|
<div class="modal-content animated fadeIn">
|
||||||
<div class="modal-header">
|
<div class="modal-header">
|
||||||
|
@ -55,8 +63,8 @@
|
||||||
<tr>
|
<tr>
|
||||||
<td>{{ taskid }}</td>
|
<td>{{ taskid }}</td>
|
||||||
<td>{{ vnodeid }}</td>
|
<td>{{ vnodeid }}</td>
|
||||||
<td><a class="btn btn-info btn-xs" href='/batch_job/output/{{ job_info["job_id"] }}/{{ taskid }}/{{ vnodeid }}/stdout/' target="_blank">Stdout</a></td>
|
<td><a class="btn btn-info btn-xs" href='/batch_job/output/{{ master.split('@')[0] }}/{{ job_info["job_id"] }}/{{ taskid }}/{{ vnodeid }}/stdout/' target="_blank">Stdout</a></td>
|
||||||
<td><a class="btn btn-info btn-xs" href='/batch_job/output/{{ job_info["job_id"] }}/{{ taskid }}/{{ vnodeid }}/stderr/' target="_blank">Stderr</a></td>
|
<td><a class="btn btn-info btn-xs" href='/batch_job/output/{{ master.split('@')[0] }}/{{ job_info["job_id"] }}/{{ taskid }}/{{ vnodeid }}/stderr/' target="_blank">Stderr</a></td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
@ -70,33 +78,46 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
<div class="table">
|
<div class="table">
|
||||||
<table width="100%" cellspacing="0" style="margin: 0 auto;" class="table table-striped table-bordered table-hover table-batch">
|
<table width="100%" cellspacing="0" style="margin: 0 auto;" class="table table-striped table-bordered table-hover table-batch">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
|
<th>Location</th>
|
||||||
<th>ID</th>
|
<th>ID</th>
|
||||||
<th>Name</th>
|
<th>Name</th>
|
||||||
<th>Status</th>
|
<th>Status</th>
|
||||||
<th>Tasks</th>
|
|
||||||
<th>Operations</th>
|
<th>Operations</th>
|
||||||
<th>Create Time</th>
|
<th>Create Time</th>
|
||||||
|
<th>End Time</th>
|
||||||
|
<th>billing</th>
|
||||||
<th>Stdout and Stderr</th>
|
<th>Stdout and Stderr</th>
|
||||||
|
<th>Detailed Info</th>
|
||||||
</tr>
|
</tr>
|
||||||
<thead>
|
<thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for job_info in job_list %}
|
{% for master in masterips %}
|
||||||
|
{% for job_info in job_list[master.split('@')[0]] %}
|
||||||
<tr>
|
<tr>
|
||||||
|
<td>{{ master.split('@')[1] }}</td>
|
||||||
<td>{{ job_info['job_id'] }}</td>
|
<td>{{ job_info['job_id'] }}</td>
|
||||||
<td>{{ job_info['job_name'] }}</td>
|
<td>{{ job_info['job_name'] }}</td>
|
||||||
<td>
|
<td>
|
||||||
{{ job_info['status'] }}
|
{{ job_info['status'] }}
|
||||||
</td>
|
</td>
|
||||||
<td>Tasks</td>
|
{% if job_info['status'] == 'done' or job_info['status'] == 'failed' or job_info['status'] == 'stopping' or job_info['status'] == 'stopped'%}
|
||||||
<td><a href="/batch_job/{{masterips[0].split("@")[0]}}/stop/{{ job_info['job_id'] }}/"><button type="button" class="btn btn-xs btn-warning"> Stop </button></a></td>
|
<td><button type="button" class="btn btn-xs btn-default"> Stop </button></td>
|
||||||
|
{% else %}
|
||||||
|
<td><a href="/batch_job/{{master.split("@")[0]}}/stop/{{ job_info['job_id'] }}/"><button type="button" class="btn btn-xs btn-danger"> Stop </button></a></td>
|
||||||
|
{% endif %}
|
||||||
<td>{{ job_info['create_time'] }}</td>
|
<td>{{ job_info['create_time'] }}</td>
|
||||||
<td><a role="button" class="btn btn-info btn-xs" id='{{ job_info['job_id'] }}_output' data-toggle="modal" data-target='#OutputModal_{{ job_info['job_id'] }}'>Get Output</a></td>
|
<td>{{ job_info['end_time'] }}</td>
|
||||||
|
<td>{{ job_info['billing'] }} <img src='/static/img/bean.png' /></td>
|
||||||
|
<td><a role="button" class="btn btn-info btn-xs" id='{{ master }}_{{ job_info['job_id'] }}_output' data-toggle="modal" data-target='#OutputModal_{{ master.split('@')[1] }}_{{ job_info['job_id'] }}'>Get Output</a></td>
|
||||||
|
<td><a href="/batch_job/{{master.split("@")[0]}}/info/{{ job_info['job_id'] }}/"><button type="button" class="btn btn-xs btn-info"> Info </button></a></td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
|
|
13
web/web.py
13
web/web.py
|
@ -152,14 +152,17 @@ def stop_batch_job(masterip,jobid):
|
||||||
stopBatchJobView.jobid = jobid
|
stopBatchJobView.jobid = jobid
|
||||||
return stopBatchJobView().as_view()
|
return stopBatchJobView().as_view()
|
||||||
|
|
||||||
@app.route("/batch_job/state/", methods=['GET'])
|
@app.route("/batch_job/<masterip>/info/<jobid>/", methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
def state_batch_job():
|
def info_batch_job(masterip,jobid):
|
||||||
return stateBatchJobView().as_view()
|
infoBatchJobView.masterip = masterip
|
||||||
|
infoBatchJobView.jobid = jobid
|
||||||
|
return infoBatchJobView().as_view()
|
||||||
|
|
||||||
@app.route("/batch_job/output/<jobid>/<taskid>/<vnodeid>/<issue>/", methods=['GET'])
|
@app.route("/batch_job/output/<masterip>/<jobid>/<taskid>/<vnodeid>/<issue>/", methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
def output_batch_job(jobid, taskid, vnodeid, issue):
|
def output_batch_job(masterip, jobid, taskid, vnodeid, issue):
|
||||||
|
outputBatchJobView.masterip = masterip
|
||||||
outputBatchJobView.jobid = jobid
|
outputBatchJobView.jobid = jobid
|
||||||
outputBatchJobView.taskid = taskid
|
outputBatchJobView.taskid = taskid
|
||||||
outputBatchJobView.vnodeid = vnodeid
|
outputBatchJobView.vnodeid = vnodeid
|
||||||
|
|
|
@ -3,6 +3,7 @@ from webViews.view import normalView
|
||||||
from webViews.log import logger
|
from webViews.log import logger
|
||||||
from webViews.checkname import checkname
|
from webViews.checkname import checkname
|
||||||
from webViews.dockletrequest import dockletRequest
|
from webViews.dockletrequest import dockletRequest
|
||||||
|
import json
|
||||||
|
|
||||||
class batchJobListView(normalView):
|
class batchJobListView(normalView):
|
||||||
template_path = "batch/batch_list.html"
|
template_path = "batch/batch_list.html"
|
||||||
|
@ -10,9 +11,12 @@ class batchJobListView(normalView):
|
||||||
@classmethod
|
@classmethod
|
||||||
def get(self):
|
def get(self):
|
||||||
masterips = dockletRequest.post_to_all()
|
masterips = dockletRequest.post_to_all()
|
||||||
result = dockletRequest.post("/batch/job/list/",{},masterips[0].split("@")[0])
|
job_list = {}
|
||||||
job_list = result.get("data")
|
for ipname in masterips:
|
||||||
logger.debug("job_list: %s" % job_list)
|
ip = ipname.split("@")[0]
|
||||||
|
result = dockletRequest.post("/batch/job/list/",{},ip)
|
||||||
|
job_list[ip] = result.get("data")
|
||||||
|
logger.debug("job_list[%s]: %s" % (ip,job_list[ip]))
|
||||||
if True:
|
if True:
|
||||||
return self.render(self.template_path, masterips=masterips, job_list=job_list)
|
return self.render(self.template_path, masterips=masterips, job_list=job_list)
|
||||||
else:
|
else:
|
||||||
|
@ -24,49 +28,63 @@ class createBatchJobView(normalView):
|
||||||
@classmethod
|
@classmethod
|
||||||
def get(self):
|
def get(self):
|
||||||
masterips = dockletRequest.post_to_all()
|
masterips = dockletRequest.post_to_all()
|
||||||
images = dockletRequest.post("/image/list/",{},masterips[0].split("@")[0]).get("images")
|
images = {}
|
||||||
if True:
|
for master in masterips:
|
||||||
return self.render(self.template_path, masterips=masterips, images=images)
|
images[master.split("@")[0]] = dockletRequest.post("/image/list/",{},master.split("@")[0]).get("images")
|
||||||
else:
|
logger.info(images)
|
||||||
return self.error()
|
return self.render(self.template_path, masterips=masterips, images=images)
|
||||||
|
|
||||||
class stateBatchJobView(normalView):
|
|
||||||
template_path = "batch/batch_state.html"
|
class infoBatchJobView(normalView):
|
||||||
|
template_path = "batch/batch_info.html"
|
||||||
|
error_path = "error.html"
|
||||||
|
masterip = ""
|
||||||
|
jobid = ""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get(self):
|
def get(self):
|
||||||
if True:
|
data = {
|
||||||
return self.render(self.template_path)
|
'jobid':self.jobid
|
||||||
|
}
|
||||||
|
result = dockletRequest.post("/batch/job/info/",data,self.masterip)
|
||||||
|
data = result.get("data")
|
||||||
|
logger.info(str(data))
|
||||||
|
#logger.debug("job_list: %s" % job_list)
|
||||||
|
if result.get('success',"") == "true":
|
||||||
|
return self.render(self.template_path, masterip=self.masterip, jobinfo=data)
|
||||||
else:
|
else:
|
||||||
return self.error()
|
return self.render(self.error_path, message = result.get('message'))
|
||||||
|
|
||||||
class addBatchJobView(normalView):
|
class addBatchJobView(normalView):
|
||||||
template_path = "batch/batch_list.html"
|
template_path = "batch/batch_list.html"
|
||||||
|
error_path = "error.html"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def post(self):
|
def post(self):
|
||||||
masterip = self.masterip
|
masterip = self.masterip
|
||||||
result = dockletRequest.post("/batch/job/add/", self.job_data, masterip)
|
result = dockletRequest.post("/batch/job/add/", self.job_data, masterip)
|
||||||
#if result.get('success', None) == "true":
|
if result.get('success', None) == "true":
|
||||||
return redirect('/batch_jobs/')
|
return redirect('/batch_jobs/')
|
||||||
#else:
|
else:
|
||||||
#return self.error()
|
return self.render(self.error_path, message = result.get('message'))
|
||||||
|
|
||||||
class stopBatchJobView(normalView):
|
class stopBatchJobView(normalView):
|
||||||
template_path = "batch/batch_list.html"
|
template_path = "batch/batch_list.html"
|
||||||
|
error_path = "error.html"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get(self):
|
def get(self):
|
||||||
masterip = self.masterip
|
masterip = self.masterip
|
||||||
data = {'jobid':self.jobid}
|
data = {'jobid':self.jobid}
|
||||||
result = dockletRequest.post("/batch/job/stop/", data, masterip)
|
result = dockletRequest.post("/batch/job/stop/", data, masterip)
|
||||||
#if result.get('success', None) == "true":
|
if result.get('success', None) == "true":
|
||||||
return redirect('/batch_jobs/')
|
return redirect('/batch_jobs/')
|
||||||
#else:
|
else:
|
||||||
#return self.error()
|
return self.render(self.error_path, message = result.get('message'))
|
||||||
|
|
||||||
class outputBatchJobView(normalView):
|
class outputBatchJobView(normalView):
|
||||||
template_path = "batch/batch_output.html"
|
template_path = "batch/batch_output.html"
|
||||||
|
masterip = ""
|
||||||
jobid = ""
|
jobid = ""
|
||||||
taskid = ""
|
taskid = ""
|
||||||
vnodeid = ""
|
vnodeid = ""
|
||||||
|
@ -74,18 +92,17 @@ class outputBatchJobView(normalView):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get(self):
|
def get(self):
|
||||||
masterips = dockletRequest.post_to_all()
|
|
||||||
data = {
|
data = {
|
||||||
'jobid':self.jobid,
|
'jobid':self.jobid,
|
||||||
'taskid':self.taskid,
|
'taskid':self.taskid,
|
||||||
'vnodeid':self.vnodeid,
|
'vnodeid':self.vnodeid,
|
||||||
'issue':self.issue
|
'issue':self.issue
|
||||||
}
|
}
|
||||||
result = dockletRequest.post("/batch/job/output/",data,masterips[0].split("@")[0])
|
result = dockletRequest.post("/batch/job/output/",data,self.masterip)
|
||||||
output = result.get("data")
|
output = result.get("data")
|
||||||
#logger.debug("job_list: %s" % job_list)
|
#logger.debug("job_list: %s" % job_list)
|
||||||
if result.get('success',"") == "true":
|
if result.get('success',"") == "true":
|
||||||
return self.render(self.template_path, masterip=masterips[0].split("@")[0], jobid=self.jobid,
|
return self.render(self.template_path, masterip=self.masterip, jobid=self.jobid,
|
||||||
taskid=self.taskid, vnodeid=self.vnodeid, issue=self.issue, output=output)
|
taskid=self.taskid, vnodeid=self.vnodeid, issue=self.issue, output=output)
|
||||||
else:
|
else:
|
||||||
return self.error()
|
return self.error()
|
||||||
|
|
Loading…
Reference in New Issue