refactor taskmgr

This commit is contained in:
Gallen 2019-03-05 17:46:12 +08:00
parent 8f14b4e9c8
commit c85342bec8
2 changed files with 255 additions and 165 deletions

View File

@ -24,16 +24,12 @@ def int_to_ip(num):
return str((num>>24)&255)+"."+str((num>>16)&255)+"."+str((num>>8)&255)+"."+str(num&255) return str((num>>24)&255)+"."+str((num>>16)&255)+"."+str((num>>8)&255)+"."+str(num&255)
class Task(): class Task():
def __init__(self, configinfo, vnodeinfo, taskinfo, priority, max_size): def __init__(self, task_id, username, at_same_time, priority, max_size, task_infos):
self.vnodeinfo = vnodeinfo self.id = task_id
self.taskinfo = taskinfo self.username = username
self.status = WAITING self.status = WAITING
self.subtask_list = [] # if all the vnodes must be started at the same time
self.token = '' self.at_same_time = at_same_time
self.maxRetryCount = self.configinfo['maxRetryCount']
self.atSameTime = self.configinfo['atSameTime']
self.multicommand = self.configinfo['multicommand']
self.vnode_nums = self.configinfo['vnode_nums']
# priority the bigger the better # priority the bigger the better
# self.priority the smaller the better # self.priority the smaller the better
self.priority = int(time.time()) / 60 / 60 - priority self.priority = int(time.time()) / 60 / 60 - priority
@ -41,8 +37,13 @@ class Task():
self.ips = None self.ips = None
self.max_size = max_size self.max_size = max_size
for i in range(self.vnode_nums): self.subtask_list = [SubTask(
self.subtask_list.append({'status':'WAITING','try_count':0}) idx = index,
root_task = self,
vnode_info = task_info['vnode_info'],
command_info = task_info['command_info'],
max_retry_count = task_info['max_retry_count']
) for (index, task_info) in enumerate(task_infos)]
def __lt__(self, other): def __lt__(self, other):
return self.priority < other.priority return self.priority < other.priority
@ -55,8 +56,8 @@ class Task():
self.ips.append(int_to_ip(base_ip + self.task_base_ip + i + 2)) self.ips.append(int_to_ip(base_ip + self.task_base_ip + i + 2))
def gen_hosts(self): def gen_hosts(self):
username = self.taskinfo.username username = self.username
taskid = self.taskinfo.taskid taskid = self.id
logger.info("Generate hosts for user(%s) task(%s) base_ip(%s)"%(username,taskid,str(self.task_base_ip))) logger.info("Generate hosts for user(%s) task(%s) base_ip(%s)"%(username,taskid,str(self.task_base_ip)))
fspath = env.getenv('FS_PREFIX') fspath = env.getenv('FS_PREFIX')
if not os.path.isdir("%s/global/users/%s" % (fspath,username)): if not os.path.isdir("%s/global/users/%s" % (fspath,username)):
@ -72,11 +73,26 @@ class Task():
i += 1 i += 1
hosts_file.close() hosts_file.close()
def get_one_resources_need(self): class SubTask():
return self.vnodeinfo.vnode.instance def __init__(self, idx, root_task, vnode_info, command_info, max_retry_count):
self.root_task = root_task
self.vnode_info = vnode_info
self.vnode_info.vnodeid = idx
self.command_info = command_info
self.command_info.vnodeid = idx
self.max_retry_count = max_retry_count
self.vnode_started = False
self.task_started = False
self.status = WAITING
self.status_reason = ''
self.try_count = 0
self.worker = None
def get_all_resources_need(self): def waiting_for_retry(self):
return [self.vnodeinfo.vnode.instance for i in range(self.vnode_nums)] self.try_count += 1
self.status = WAITING if self.try_count <= self.max_retry_count else FAILED
if self.status == FAILED and self.root_task.at_same_time:
self.root_task.status = FAILED
class TaskReporter(MasterServicer): class TaskReporter(MasterServicer):
@ -89,6 +105,7 @@ class TaskReporter(MasterServicer):
self.taskmgr.on_task_report(task_report) self.taskmgr.on_task_report(task_report)
return Reply(status=Reply.ACCEPTED, message='') return Reply(status=Reply.ACCEPTED, message='')
class TaskMgr(threading.Thread): class TaskMgr(threading.Thread):
# load task information from etcd # load task information from etcd
@ -156,9 +173,9 @@ class TaskMgr(threading.Thread):
self.serve() self.serve()
while not self.thread_stop: while not self.thread_stop:
self.sort_out_task_queue() self.sort_out_task_queue()
task, vnodes_workers = self.task_scheduler() task, sub_task_list = self.task_scheduler()
if task is not None and workers is not None: if task is not None and sub_task_list is not None:
self.task_processor(task, vnodes_workers) self.task_processor(task, sub_task_list)
else: else:
time.sleep(self.scheduler_interval) time.sleep(self.scheduler_interval)
@ -185,39 +202,86 @@ class TaskMgr(threading.Thread):
self.task_queue.append(task) self.task_queue.append(task)
self.task_queue = sorted(self.task_queue, key=lambda x: x.priority) self.task_queue = sorted(self.task_queue, key=lambda x: x.priority)
def stop_vnode(self, worker, task, vnodeid): def start_vnode(self, subtask):
vnodeinfo = copy.copy(task.vnodeinfo)
vnodeinfo.vnodeid = vnodeid
try: try:
self.logger.info('[task_processor] Stopping vnode for task [%s] vnode [%d]' % (task.vnodeinfo.id, vnodeid)) self.logger.info('[task_processor] Starting vnode for task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
channel = grpc.insecure_channel('%s:%s' % (worker, self.worker_port)) channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
stub = WorkerStub(channel) stub = WorkerStub(channel)
response = stub.stop_vnode(vnodeinfo) response = stub.start_vnode(subtask.vnode_info)
if response.status != Reply.ACCEPTED: if response.status != Reply.ACCEPTED:
raise Exception(response.message) raise Exception(response.message)
except Exception as e: except Exception as e:
self.logger.error('[task_processor] rpc error message: %s' % e) self.logger.error('[task_processor] rpc error message: %s' % e)
subtask.status_reason = str(e)
return [False, e] return [False, e]
return [True, ""] subtask.vnode_started = True
self.cpu_usage[subtask.worker] += subtask.vnode_info.vnode.instance.cpu
self.gpu_usage[subtask.worker] += subtask.vnode_info.vnode.instance.gpu
return [True, '']
def stop_vnode(self, subtask):
try:
self.logger.info('[task_processor] Stopping vnode for task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
stub = WorkerStub(channel)
response = stub.stop_vnode(subtask.vnode_info)
if response.status != Reply.ACCEPTED:
raise Exception(response.message)
except Exception as e:
self.logger.error('[task_processor] rpc error message: %s' % e)
subtask.status_reason = str(e)
return [False, e]
subtask.vnode_started = False
self.cpu_usage[subtask.worker] -= subtask.vnode_info.vnode.instance.cpu
self.gpu_usage[subtask.worker] -= subtask.vnode_info.vnode.instance.gpu
return [True, '']
def start_task(self, subtask):
try:
self.logger.info('[task_processor] Starting task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
stub = WorkerStub(channel)
response = stub.start_task(subtask.command_info)
if response.status != Reply.ACCEPTED:
raise Exception(response.message)
except Exception as e:
self.logger.error('[task_processor] rpc error message: %s' % e)
subtask.status_reason = str(e)
subtask.task_started = True
def stop_task(self, subtask):
try:
self.logger.info('[task_processor] Stoping task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
stub = WorkerStub(channel)
response = stub.stop_stask(subtask.command_info)
if response.status != Reply.ACCEPTED:
raise Exception(response.message)
except Exception as e:
self.logger.error('[task_processor] rpc error message: %s' % e)
subtask.status = FAILED
subtask.status_reason = str(e)
subtask.task_started = False
@net_lock @net_lock
def acquire_task_ips(self, task): def acquire_task_ips(self, task):
self.logger.info("[acquire_task_ips] user(%s) task(%s) net(%s)"%(task.taskinfo.username, task.taskinfo.taskid, str(task.task_base_ip))) self.logger.info("[acquire_task_ips] user(%s) task(%s) net(%s)" % (task.username, task.id, str(task.task_base_ip)))
if task.task_base_ip == None: if task.task_base_ip == None:
task.task_base_ip = self.free_nets.pop(0) task.task_base_ip = self.free_nets.pop(0)
return task.task_base_ip return task.task_base_ip
@net_lock @net_lock
def release_task_ips(self, task): def release_task_ips(self, task):
self.logger.info("[release_task_ips] user(%s) task(%s) net(%s)"%(task.taskinfo.username, task.taskinfo.taskid, str(task.task_base_ip))) self.logger.info("[release_task_ips] user(%s) task(%s) net(%s)" % (task.username, task.id, str(task.task_base_ip)))
if task.task_base_ip == None: if task.task_base_ip == None:
return return
self.free_nets.append(task.task_base_ip) self.free_nets.append(task.task_base_ip)
task.task_base_ip = None
self.logger.error('[release task_net] %s' % str(e)) self.logger.error('[release task_net] %s' % str(e))
def setup_tasknet(self, task, workers=None): def setup_tasknet(self, task, workers=None):
taskid = task.taskinfo.taskid taskid = task.id
username = task.taskinfo.username username = task.username
brname = "docklet-batch-%s-%s"%(username, taskid) brname = "docklet-batch-%s-%s"%(username, taskid)
gwname = "Batch-%s-%s"%(username, taskid) gwname = "Batch-%s-%s"%(username, taskid)
if task.task_base_ip == None: if task.task_base_ip == None:
@ -232,42 +296,37 @@ class TaskMgr(threading.Thread):
return [True, gatewayip] return [True, gatewayip]
def remove_tasknet(self, task): def remove_tasknet(self, task):
taskid = task.taskinfo.taskid taskid = task.id
username = task.taskinfo.username username = task.username
brname = "docklet-batch-%s-%s"%(username, taskid) brname = "docklet-batch-%s-%s"%(username, taskid)
netcontrol.del_bridge(brname) netcontrol.del_bridge(brname)
def task_processor(self, task, vnodes_workers): def task_processor(self, task, sub_task_list):
task.status = RUNNING task.status = RUNNING
self.jobmgr.report(task.taskinfo.taskid,'running') # self.jobmgr.report(task.id,'running')
# properties for transactio # properties for transactio
self.acquire_task_net(task) self.acquire_task_ips(task)
task.gen_ips_from_base(self.base_ip) task.gen_ips_from_base(self.base_ip)
task.gen_hosts() task.gen_hosts()
#need to create hosts #need to create hosts
[success, gwip] = self.setup_tasknet(task,[w[1] for w in vnodes_workers]) [success, gwip] = self.setup_tasknet(task, [sub_task.worker for sub_task in sub_task_list])
if not success: if not success:
self.release_task_ips(task) self.release_task_ips(task)
return [False, gwip] return [False, gwip]
token = ''.join(random.sample(string.ascii_letters + string.digits, 8))
placed_workers = [] placed_workers = []
start_all_vnode_success = True
# start vc # start vc
for vid, worker in vnodes_workers: for sub_task in sub_task_list:
vnodeinfo = copy.copy(task.vnodeinfo) vnode_info = sub_task.vnode_info
vnodeinfo.vnodeid = vid vnode_info.vnode.hostname = "batch-"+str(vid%task.max_size)
vnodeinfo.vnode.hostname = "batch-"+str(vid%task.max_size) if sub_task.vnode_started:
vnode = task.subtask_list[vid] continue
vnode['status'] = RUNNING
vnode['try_count'] += 1
vnode['token'] = token
vnode['worker'] = worker
self.cpu_usage[worker] += task.vnodeinfo.vnode.instance.cpu username = sub_task.username
self.gpu_usage[worker] += task.vnodeinfo.vnode.instance.gpu
username = task.vnodeinfo.username
#container_name = task.info.username + '-batch-' + task.info.id + '-' + str(instance_id) + '-' + task.info.token #container_name = task.info.username + '-batch-' + task.info.id + '-' + str(instance_id) + '-' + task.info.token
#if not username in self.user_containers.keys(): #if not username in self.user_containers.keys():
#self.user_containers[username] = [] #self.user_containers[username] = []
@ -275,42 +334,75 @@ class TaskMgr(threading.Thread):
ipaddr = task.ips[vid % task.max_size] ipaddr = task.ips[vid % task.max_size]
brname = "docklet-batch-%s-%s" % (username, taskid) brname = "docklet-batch-%s-%s" % (username, taskid)
networkinfo = Network(ipaddr=ipaddr, gateway=gwip, masterip=self.masterip, brname=brname) networkinfo = Network(ipaddr=ipaddr, gateway=gwip, masterip=self.masterip, brname=brname)
vnodeinfo.vnode.network = networkinfo vnode_info.vnode.network = networkinfo
try:
self.logger.info('[task_processor] starting vnode for task [%s] instance [%d]' % (task.vnodeinfo.id, vid))
channel = grpc.insecure_channel('%s:%s' % (worker, self.worker_port))
stub = WorkerStub(channel)
response = stub.start_vnode(vnodeinfo)
placed_workers.append(worker) placed_workers.append(worker)
if response.status != Reply.ACCEPTED: if not self.start_vnode(sub_task):
raise Exception(response.message) sub_task.waiting_for_retry()
except Exception as e: sub_task.worker = None
self.logger.error('[task_processor] rpc error message: %s' % e) start_all_vnode_success = False
task.status = FAILED
vnode['status'] = FAILED if not start_all_vnode_success:
vnode['try_count'] -= 1
for pl_worker in placed_workers:
pass
return return
#self.user_containers[username].remove(container_name)
# start tasks # start tasks
for vid, worker in vnodes_workers: for sub_task in sub_task_list:
taskinfo = copy.copy(task.taskinfo) task_info = sub_task.command_info
taskinfo.vnodeid = vid task_info.token = ''.join(random.sample(string.ascii_letters + string.digits, 8))
taskinfo.token = token
vnode = task.subtask_list[vid] if self.start_task(sub_task):
try: sub_task.status = RUNNING
self.logger.info('[task_processor] starting task [%s] instance [%d]' % (task.vnodeinfo.id, vid)) else:
channel = grpc.insecure_channel('%s:%s' % (worker, self.worker_port)) sub_task.waiting_for_retry()
stub = WorkerStub(channel)
response = stub.start_task(taskinfo) def clear_sub_tasks(self, sub_task_list):
if response.status != Reply.ACCEPTED: for sub_task in sub_task_list:
raise Exception(response.message) self.clear_sub_task(sub_task)
except Exception as e:
self.logger.error('[task_processor] rpc error message: %s' % e) def clear_sub_task(self, sub_task):
task.status = FAILED if sub_task.task_started:
self.stop_task(sub_task)
if sub_task.vnode_started:
self.stop_vnode(sub_task)
def check_task_completed(self, task):
if task.status == RUNNING or task.status == WAITING:
for sub_task in task.subtask_list:
if sub_task.status == RUNNING or sub_task.status == WAITING:
return False
self.logger.info('task %s completed' % task.id)
if task.at_same_time and task.status == FAILED:
self.clear_sub_tasks(task.subtask_list)
# TODO report to jobmgr
self.lazy_delete_list.append(task)
return True
# this method is called when worker send heart-beat rpc request
def on_task_report(self, report):
self.logger.info('[on_task_report] receive task report: id %s-%d, status %d' % (report.taskid, report.vnodeid, report.subTaskStatus))
task = self.get_task(report.taskid)
if task == None:
self.logger.error('[on_task_report] task not found')
return
sub_task = task.subtask_list[report.vnodeid]
if sub_task.token != report.token:
self.logger.warning('[on_task_report] wrong token')
return
username = task.username
# container_name = username + '-batch-' + task.info.id + '-' + str(report.instanceid) + '-' + report.token
# self.user_containers[username].remove(container_name)
if sub_task.status != RUNNING:
self.logger.error('[on_task_report] receive task report when instance is not running')
sub_task.status = report.subTaskStatus
sub_task.status_reason = report.errmsg
self.clear_sub_task(sub_task)
if report.subTaskStatus == FAILED or report.subTaskStatus == TIMEOUT:
sub_task.waiting_for_retry()
# return task, workers # return task, workers
def task_scheduler(self): def task_scheduler(self):
@ -320,60 +412,45 @@ class TaskMgr(threading.Thread):
for task in self.task_queue: for task in self.task_queue:
if task in self.lazy_delete_list: if task in self.lazy_delete_list:
continue continue
if self.check_task_completed(task):
if task.atSameTime:
# parallel tasks
if task.status == RUNNING:
continue continue
workers = self.find_proper_workers(task.get_all_resources_need())
if len(workers) < task.vnode_nums: if task.at_same_time:
# parallel tasks
workers = self.find_proper_workers(task.subtask_list)
if len(workers) == 0:
return None, None return None, None
else: else:
idxs = [i for i in range(task.vnode_nums)] for i in range(len(workers)):
return task, zip(idxs,workers) task.subtask_list[i].worker = workers[i]
return task, task.subtask_list
else: else:
# traditional tasks # traditional tasks
workers = self.find_proper_workers([task.get_one_resources_need()]) for sub_task in task.subtask_list:
if len(workers) < task.vnode_nums: if sub_task.status == WAITING:
return None, None, None workers = self.find_proper_workers([sub_task])
'''for index, instance in enumerate(task.instance_list): if len(workers) > 0:
# find instance to retry sub_task.worker = workers[0]
if (instance['status'] == FAILED or instance['status'] == TIMEOUT) and instance['try_count'] <= task.info.maxRetryCount: return task, [sub_task]
if worker is not None:
self.logger.info('[task_scheduler] retry')
return task, index, worker
# find timeout instance
elif instance['status'] == RUNNING:
if not self.is_alive(instance['worker']):
instance['status'] = FAILED
instance['token'] = ''
self.cpu_usage[instance['worker']] -= task.info.cluster.instance.cpu
self.gpu_usage[instance['worker']] -= task.info.cluster.instance.gpu
self.logger.warning('[task_scheduler] worker dead, retry task [%s] instance [%d]' % (task.info.id, index)) return None, None
if worker is not None:
return task, index, worker
if worker is not None: def find_proper_workers(self, sub_task_list):
# start new instance
if len(task.instance_list) < task.info.instanceCount:
instance = {}
instance['try_count'] = 0
task.instance_list.append(instance)
return task, len(task.instance_list) - 1, worker'''
self.check_task_completed(task)
return None, None, None
def find_proper_workers(self, vnodes_configs):
nodes = self.get_all_nodes() nodes = self.get_all_nodes()
if nodes is None or len(nodes) == 0: if nodes is None or len(nodes) == 0:
self.logger.warning('[task_scheduler] running nodes not found') self.logger.warning('[task_scheduler] running nodes not found')
return None return None
proper_workers = [] proper_workers = []
for needs in vnodes_configs: has_waiting = False
for sub_task in sub_task_list:
if sub_task.status == WAITING:
has_waiting = True
if sub_task.worker is not None and sub_task.vnode_started:
proper_workers.append(sub_task.worker)
continue
needs = sub_task.vnode_info.vnode.instance
proper_worker = None
for worker_ip, worker_info in nodes: for worker_ip, worker_info in nodes:
if needs.cpu + self.get_cpu_usage(worker_ip) > worker_info['cpu']: if needs.cpu + self.get_cpu_usage(worker_ip) > worker_info['cpu']:
continue continue
@ -391,11 +468,16 @@ class TaskMgr(threading.Thread):
worker_info['memory'] -= needs.memory worker_info['memory'] -= needs.memory
worker_info['gpu'] -= needs.gpu worker_info['gpu'] -= needs.gpu
worker_info['disk'] -= needs.disk worker_info['disk'] -= needs.disk
proper_workers.append(worker_ip) proper_worker = worker_ip
break break
if proper_worker is not None:
proper_workers.append(proper_worker)
else: else:
return [] return []
if has_waiting:
return proper_workers return proper_workers
else:
return []
def get_all_nodes(self): def get_all_nodes(self):
# cache running nodes # cache running nodes
@ -445,11 +527,16 @@ class TaskMgr(threading.Thread):
"base": Image.BASE, "base": Image.BASE,
"public": Image.PUBLIC "public": Image.PUBLIC
} }
configinfo = {'vnode_nums':7,'atSameTime':True,'MultiStart':True, task = Task(
'maxRetryCount':int(json_task['retryCount'])} task_id = taskid,
# json_task = json.loads(json_task) username = username,
task = Task(configinfo, # all vnode must be started at the same time
VNodeInfo( at_same_time = json_task['at_same_time'],
priority = task_priority,
max_size = (1 << self.task_cidr) - 2,
task_infos = [{
'max_retry_count': int(json_task['retryCount']),
'vnode_info': VNodeInfo(
taskid = taskid, taskid = taskid,
username = username, username = username,
vnode = Vnode( vnode = Vnode(
@ -461,9 +548,14 @@ class TaskMgr(threading.Thread):
cpu = int(json_task['cpuSetting']), cpu = int(json_task['cpuSetting']),
memory = int(json_task['memorySetting']), memory = int(json_task['memorySetting']),
disk = int(json_task['diskSetting']), disk = int(json_task['diskSetting']),
gpu = int(json_task['gpuSetting']))) gpu = int(json_task['gpuSetting'])),
mount = [Mount(
localPath = json_task['mapping'][mapping_key]['mappingLocalDir'],
remotePath=json_task['mapping'][mapping_key]['mappingRemoteDir'])
for mapping_key in json_task['mapping']] if 'mapping' in json_task else []
), ),
TaskInfo( ),
'command_info': TaskInfo(
taskid = taskid, taskid = taskid,
username = username, username = username,
parameters = Parameters( parameters = Parameters(
@ -473,13 +565,11 @@ class TaskMgr(threading.Thread):
envVars = {}), envVars = {}),
stderrRedirectPath = json_task.get('stdErrRedPth',""), stderrRedirectPath = json_task.get('stdErrRedPth',""),
stdoutRedirectPath = json_task.get('stdOutRedPth',"")), stdoutRedirectPath = json_task.get('stdOutRedPth',"")),
timeout = int(json_task['expTime']), timeout = int(json_task['expTime'])
), # commands are executed in all vnodes / only excuted in the first vnode
priority=task_priority,max_size=(1<<self.task_cidr)-2) # if in traditional mode, commands will be executed in all vnodes
if 'mapping' in json_task: ) if (not json_task['at_same_time'] or json_task['multicommand'] or instance_index == 0) else None
task.vnodeinfo.vnode.mount.extend([Mount(localPath=json_task['mapping'][mapping_key]['mappingLocalDir'], } for instance_index in range(json_task['instCount'])])
remotePath=json_task['mapping'][mapping_key]['mappingRemoteDir'])
for mapping_key in json_task['mapping']])
self.lazy_append_list.append(task) self.lazy_append_list.append(task)
@ -488,7 +578,7 @@ class TaskMgr(threading.Thread):
@queue_lock @queue_lock
def get_task(self, taskid): def get_task(self, taskid):
for task in self.task_queue: for task in self.task_queue:
if task.info.id == taskid: if task.id == taskid:
return task return task
return None return None