refactor taskmgr

2019-03-05 17:46:12 +08:00 · 2019-03-05 17:46:12 +08:00 · c85342bec8
parent 8f14b4e9c8
commit c85342bec8
2 changed files with 255 additions and 165 deletions
--- a/src/master/taskmgr.py
+++ b/src/master/taskmgr.py
@ -24,16 +24,12 @@ def int_to_ip(num):
    return str((num>>24)&255)+"."+str((num>>16)&255)+"."+str((num>>8)&255)+"."+str(num&255)
 class Task():
-    def __init__(self, configinfo, vnodeinfo, taskinfo, priority, max_size):
+    def __init__(self, task_id, username, at_same_time, priority, max_size, task_infos):
-        self.vnodeinfo = vnodeinfo
+        self.id = task_id
-        self.taskinfo = taskinfo
+        self.username = username
        self.status = WAITING
-        self.subtask_list = []
+        # if all the vnodes must be started at the same time
-        self.token = ''
+        self.at_same_time = at_same_time
        self.maxRetryCount = self.configinfo['maxRetryCount']
        self.atSameTime = self.configinfo['atSameTime']
        self.multicommand = self.configinfo['multicommand']
        self.vnode_nums = self.configinfo['vnode_nums']
        # priority the bigger the better
        # self.priority the smaller the better
        self.priority = int(time.time()) / 60 / 60 - priority
@ -41,8 +37,13 @@ class Task():
        self.ips = None
        self.max_size = max_size
-        for i in range(self.vnode_nums):
+        self.subtask_list = [SubTask(
-            self.subtask_list.append({'status':'WAITING','try_count':0})
+                idx = index,
                root_task = self, 
                vnode_info = task_info['vnode_info'], 
                command_info = task_info['command_info'], 
                max_retry_count = task_info['max_retry_count']
            ) for (index, task_info) in enumerate(task_infos)]
    def __lt__(self, other):
        return self.priority < other.priority
@ -55,8 +56,8 @@ class Task():
            self.ips.append(int_to_ip(base_ip + self.task_base_ip + i + 2))
    def gen_hosts(self):
-        username = self.taskinfo.username
+        username = self.username
-        taskid = self.taskinfo.taskid
+        taskid = self.id
        logger.info("Generate hosts for user(%s) task(%s) base_ip(%s)"%(username,taskid,str(self.task_base_ip)))
        fspath = env.getenv('FS_PREFIX')
        if not os.path.isdir("%s/global/users/%s" % (fspath,username)):
@ -72,11 +73,26 @@ class Task():
            i += 1
        hosts_file.close()
-    def get_one_resources_need(self):
+class SubTask():
-        return self.vnodeinfo.vnode.instance
+    def __init__(self, idx, root_task, vnode_info, command_info, max_retry_count):
        self.root_task = root_task
        self.vnode_info = vnode_info
        self.vnode_info.vnodeid = idx
        self.command_info = command_info
        self.command_info.vnodeid = idx
        self.max_retry_count = max_retry_count
        self.vnode_started = False
        self.task_started = False
        self.status = WAITING
        self.status_reason = ''
        self.try_count = 0
        self.worker = None
-    def get_all_resources_need(self):
+    def waiting_for_retry(self):
-        return [self.vnodeinfo.vnode.instance for i in range(self.vnode_nums)]
+        self.try_count += 1
        self.status = WAITING if self.try_count <= self.max_retry_count else FAILED
        if self.status == FAILED and self.root_task.at_same_time:
            self.root_task.status = FAILED
 class TaskReporter(MasterServicer):
@ -89,6 +105,7 @@ class TaskReporter(MasterServicer):
            self.taskmgr.on_task_report(task_report)
        return Reply(status=Reply.ACCEPTED, message='')
 class TaskMgr(threading.Thread):
    # load task information from etcd
@ -156,9 +173,9 @@ class TaskMgr(threading.Thread):
        self.serve()
        while not self.thread_stop:
            self.sort_out_task_queue()
-            task, vnodes_workers = self.task_scheduler()
+            task, sub_task_list = self.task_scheduler()
-            if task is not None and workers is not None:
+            if task is not None and sub_task_list is not None:
-                self.task_processor(task, vnodes_workers)
+                self.task_processor(task, sub_task_list)
            else:
                time.sleep(self.scheduler_interval)
@ -185,39 +202,86 @@ class TaskMgr(threading.Thread):
                self.task_queue.append(task)
            self.task_queue = sorted(self.task_queue, key=lambda x: x.priority)
-    def stop_vnode(self, worker, task, vnodeid):
+    def start_vnode(self, subtask):
        vnodeinfo = copy.copy(task.vnodeinfo)
        vnodeinfo.vnodeid = vnodeid
        try:
-            self.logger.info('[task_processor] Stopping vnode for task [%s] vnode [%d]' % (task.vnodeinfo.id, vnodeid))
+            self.logger.info('[task_processor] Starting vnode for task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
-            channel = grpc.insecure_channel('%s:%s' % (worker, self.worker_port))
+            channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
            stub = WorkerStub(channel)
-            response = stub.stop_vnode(vnodeinfo)
+            response = stub.start_vnode(subtask.vnode_info)
            if response.status != Reply.ACCEPTED:
                raise Exception(response.message)
        except Exception as e:
            self.logger.error('[task_processor] rpc error message: %s' % e)
            subtask.status_reason = str(e)
            return [False, e]
-        return [True, ""]
+        subtask.vnode_started = True
        self.cpu_usage[subtask.worker] += subtask.vnode_info.vnode.instance.cpu
        self.gpu_usage[subtask.worker] += subtask.vnode_info.vnode.instance.gpu
        return [True, '']
    def stop_vnode(self, subtask):
        try:
            self.logger.info('[task_processor] Stopping vnode for task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
            channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
            stub = WorkerStub(channel)
            response = stub.stop_vnode(subtask.vnode_info)
            if response.status != Reply.ACCEPTED:
                raise Exception(response.message)
        except Exception as e:
            self.logger.error('[task_processor] rpc error message: %s' % e)
            subtask.status_reason = str(e)
            return [False, e]
        subtask.vnode_started = False
        self.cpu_usage[subtask.worker] -= subtask.vnode_info.vnode.instance.cpu
        self.gpu_usage[subtask.worker] -= subtask.vnode_info.vnode.instance.gpu
        return [True, '']
    def start_task(self, subtask):
        try:
            self.logger.info('[task_processor] Starting task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
            channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
            stub = WorkerStub(channel)
            response = stub.start_task(subtask.command_info)
            if response.status != Reply.ACCEPTED:
                raise Exception(response.message)
        except Exception as e:
            self.logger.error('[task_processor] rpc error message: %s' % e)
            subtask.status_reason = str(e)
        subtask.task_started = True
    def stop_task(self, subtask):
        try:
            self.logger.info('[task_processor] Stoping task [%s] vnode [%d]' % (subtask.vnode_info.taskid, subtask.vnode_info.vnodeid))
            channel = grpc.insecure_channel('%s:%s' % (subtask.worker, self.worker_port))
            stub = WorkerStub(channel)
            response = stub.stop_stask(subtask.command_info)
            if response.status != Reply.ACCEPTED:
                raise Exception(response.message)
        except Exception as e:
            self.logger.error('[task_processor] rpc error message: %s' % e)
            subtask.status = FAILED
            subtask.status_reason = str(e)
        subtask.task_started = False
    @net_lock
    def acquire_task_ips(self, task):
-        self.logger.info("[acquire_task_ips] user(%s) task(%s) net(%s)"%(task.taskinfo.username, task.taskinfo.taskid, str(task.task_base_ip)))
+        self.logger.info("[acquire_task_ips] user(%s) task(%s) net(%s)" % (task.username, task.id, str(task.task_base_ip)))
        if task.task_base_ip == None:
            task.task_base_ip = self.free_nets.pop(0)
        return task.task_base_ip
    @net_lock
    def release_task_ips(self, task):
-        self.logger.info("[release_task_ips] user(%s) task(%s) net(%s)"%(task.taskinfo.username, task.taskinfo.taskid, str(task.task_base_ip)))
+        self.logger.info("[release_task_ips] user(%s) task(%s) net(%s)" % (task.username, task.id, str(task.task_base_ip)))
        if task.task_base_ip == None:
            return
        self.free_nets.append(task.task_base_ip)
        task.task_base_ip = None
        self.logger.error('[release task_net] %s' % str(e))
    def setup_tasknet(self, task, workers=None):
-        taskid = task.taskinfo.taskid
+        taskid = task.id
-        username = task.taskinfo.username
+        username = task.username
        brname = "docklet-batch-%s-%s"%(username, taskid)
        gwname = "Batch-%s-%s"%(username, taskid)
        if task.task_base_ip == None:
@ -232,42 +296,37 @@ class TaskMgr(threading.Thread):
        return [True, gatewayip]
    def remove_tasknet(self, task):
-        taskid = task.taskinfo.taskid
+        taskid = task.id
-        username = task.taskinfo.username
+        username = task.username
        brname = "docklet-batch-%s-%s"%(username, taskid)
        netcontrol.del_bridge(brname)
-    def task_processor(self, task, vnodes_workers):
+    def task_processor(self, task, sub_task_list):
        task.status = RUNNING
-        self.jobmgr.report(task.taskinfo.taskid,'running')
+        # self.jobmgr.report(task.id,'running')
        # properties for transactio
-        self.acquire_task_net(task)
+        self.acquire_task_ips(task)
        task.gen_ips_from_base(self.base_ip)
        task.gen_hosts()
        #need to create hosts
-        [success, gwip] = self.setup_tasknet(task,[w[1] for w in vnodes_workers])
+        [success, gwip] = self.setup_tasknet(task, [sub_task.worker for sub_task in sub_task_list])
        if not success:
            self.release_task_ips(task)
            return [False, gwip]
-        token = ''.join(random.sample(string.ascii_letters + string.digits, 8))
+
        placed_workers = []
        start_all_vnode_success = True
        # start vc
-        for vid, worker in vnodes_workers:
+        for sub_task in sub_task_list:
-            vnodeinfo = copy.copy(task.vnodeinfo)
+            vnode_info = sub_task.vnode_info
-            vnodeinfo.vnodeid = vid
+            vnode_info.vnode.hostname = "batch-"+str(vid%task.max_size)
-            vnodeinfo.vnode.hostname = "batch-"+str(vid%task.max_size)
+            if sub_task.vnode_started:
-            vnode = task.subtask_list[vid]
+                continue
            vnode['status'] = RUNNING
            vnode['try_count'] += 1
            vnode['token'] = token
            vnode['worker'] = worker
-            self.cpu_usage[worker] += task.vnodeinfo.vnode.instance.cpu
+            username = sub_task.username
            self.gpu_usage[worker] += task.vnodeinfo.vnode.instance.gpu
            username = task.vnodeinfo.username
            #container_name = task.info.username + '-batch-' + task.info.id + '-' + str(instance_id) + '-' + task.info.token
            #if not username in self.user_containers.keys():
                #self.user_containers[username] = []
@ -275,42 +334,75 @@ class TaskMgr(threading.Thread):
            ipaddr = task.ips[vid % task.max_size]
            brname = "docklet-batch-%s-%s" % (username, taskid)
            networkinfo = Network(ipaddr=ipaddr, gateway=gwip, masterip=self.masterip, brname=brname)
-            vnodeinfo.vnode.network = networkinfo
+            vnode_info.vnode.network = networkinfo
            try:
                self.logger.info('[task_processor] starting vnode for task [%s] instance [%d]' % (task.vnodeinfo.id, vid))
                channel = grpc.insecure_channel('%s:%s' % (worker, self.worker_port))
                stub = WorkerStub(channel)
                response = stub.start_vnode(vnodeinfo)
            placed_workers.append(worker)
-                if response.status != Reply.ACCEPTED:
+            if not self.start_vnode(sub_task):
-                    raise Exception(response.message)
+                sub_task.waiting_for_retry()
-            except Exception as e:
+                sub_task.worker = None
-                self.logger.error('[task_processor] rpc error message: %s' % e)
+                start_all_vnode_success = False
-                task.status = FAILED
+
-                vnode['status'] = FAILED
+        if not start_all_vnode_success:
                vnode['try_count'] -= 1
                for pl_worker in placed_workers:
                    pass
            return
                #self.user_containers[username].remove(container_name)
        # start tasks
-        for vid, worker in vnodes_workers:
+        for sub_task in sub_task_list:
-            taskinfo = copy.copy(task.taskinfo)
+            task_info = sub_task.command_info
-            taskinfo.vnodeid = vid
+            task_info.token = ''.join(random.sample(string.ascii_letters + string.digits, 8))
-            taskinfo.token = token
+
-            vnode = task.subtask_list[vid]
+            if self.start_task(sub_task):
-            try:
+                sub_task.status = RUNNING
-                self.logger.info('[task_processor] starting task [%s] instance [%d]' % (task.vnodeinfo.id, vid))
+            else:
-                channel = grpc.insecure_channel('%s:%s' % (worker, self.worker_port))
+                sub_task.waiting_for_retry()
-                stub = WorkerStub(channel)
+
-                response = stub.start_task(taskinfo)
+    def clear_sub_tasks(self, sub_task_list):
-                if response.status != Reply.ACCEPTED:
+        for sub_task in sub_task_list:
-                    raise Exception(response.message)
+            self.clear_sub_task(sub_task)
-            except Exception as e:
+
-                self.logger.error('[task_processor] rpc error message: %s' % e)
+    def clear_sub_task(self, sub_task):
-                task.status = FAILED
+        if sub_task.task_started:
            self.stop_task(sub_task)
        if sub_task.vnode_started:
            self.stop_vnode(sub_task)
    def check_task_completed(self, task):
        if task.status == RUNNING or task.status == WAITING:
            for sub_task in task.subtask_list:
                if sub_task.status == RUNNING or sub_task.status == WAITING:
                    return False
        self.logger.info('task %s completed' % task.id)
        if task.at_same_time and task.status == FAILED:
            self.clear_sub_tasks(task.subtask_list)
        # TODO report to jobmgr
        self.lazy_delete_list.append(task)
        return True
    # this method is called when worker send heart-beat rpc request
    def on_task_report(self, report):
        self.logger.info('[on_task_report] receive task report: id %s-%d, status %d' % (report.taskid, report.vnodeid, report.subTaskStatus))
        task = self.get_task(report.taskid)
        if task == None:
            self.logger.error('[on_task_report] task not found')
            return
        sub_task = task.subtask_list[report.vnodeid]
        if sub_task.token != report.token:
            self.logger.warning('[on_task_report] wrong token')
            return
        username = task.username
        # container_name = username + '-batch-' + task.info.id + '-' + str(report.instanceid) + '-' + report.token
        # self.user_containers[username].remove(container_name)
        if sub_task.status != RUNNING:
            self.logger.error('[on_task_report] receive task report when instance is not running')
        sub_task.status = report.subTaskStatus
        sub_task.status_reason = report.errmsg
        self.clear_sub_task(sub_task)
        if report.subTaskStatus == FAILED or report.subTaskStatus == TIMEOUT:
            sub_task.waiting_for_retry()
    # return task, workers
    def task_scheduler(self):
@ -320,60 +412,45 @@ class TaskMgr(threading.Thread):
        for task in self.task_queue:
            if task in self.lazy_delete_list:
                continue
-
+            if self.check_task_completed(task):
            if task.atSameTime:
                # parallel tasks
                if task.status == RUNNING:
                continue
-                workers = self.find_proper_workers(task.get_all_resources_need())
+
-                if len(workers) < task.vnode_nums:
+            if task.at_same_time:
                # parallel tasks
                workers = self.find_proper_workers(task.subtask_list)
                if len(workers) == 0:
                    return None, None
                else:
-                    idxs = [i for i in range(task.vnode_nums)]
+                    for i in range(len(workers)):
-                    return task, zip(idxs,workers)
+                        task.subtask_list[i].worker = workers[i]
                    return task, task.subtask_list
            else:
                # traditional tasks
-                workers = self.find_proper_workers([task.get_one_resources_need()])
+                for sub_task in task.subtask_list:
-                if len(workers) < task.vnode_nums:
+                    if sub_task.status == WAITING:
-                    return None, None, None
+                        workers = self.find_proper_workers([sub_task])
-                '''for index, instance in enumerate(task.instance_list):
+                        if len(workers) > 0:
-                    # find instance to retry
+                            sub_task.worker = workers[0]
-                    if (instance['status'] == FAILED or instance['status'] == TIMEOUT) and instance['try_count'] <= task.info.maxRetryCount:
+                            return task, [sub_task]
                        if worker is not None:
                            self.logger.info('[task_scheduler] retry')
                            return task, index, worker
                    # find timeout instance
                    elif instance['status'] == RUNNING:
                        if not self.is_alive(instance['worker']):
                            instance['status'] = FAILED
                            instance['token'] = ''
                            self.cpu_usage[instance['worker']] -= task.info.cluster.instance.cpu
                            self.gpu_usage[instance['worker']] -= task.info.cluster.instance.gpu
-                            self.logger.warning('[task_scheduler] worker dead, retry task [%s] instance [%d]' % (task.info.id, index))
+        return None, None
                            if worker is not None:
                                return task, index, worker
-                if worker is not None:
+    def find_proper_workers(self, sub_task_list):
                    # start new instance
                    if len(task.instance_list) < task.info.instanceCount:
                        instance = {}
                        instance['try_count'] = 0
                        task.instance_list.append(instance)
                        return task, len(task.instance_list) - 1, worker'''
            self.check_task_completed(task)
        return None, None, None
    def find_proper_workers(self, vnodes_configs):
        nodes = self.get_all_nodes()
        if nodes is None or len(nodes) == 0:
            self.logger.warning('[task_scheduler] running nodes not found')
            return None
        proper_workers = []
-        for needs in vnodes_configs:
+        has_waiting = False
        for sub_task in sub_task_list:
            if sub_task.status == WAITING:
                has_waiting = True
            if sub_task.worker is not None and sub_task.vnode_started:
                proper_workers.append(sub_task.worker)
                continue
            needs = sub_task.vnode_info.vnode.instance
            proper_worker = None
            for worker_ip, worker_info in nodes:
                if needs.cpu + self.get_cpu_usage(worker_ip) > worker_info['cpu']:
                    continue
@ -391,11 +468,16 @@ class TaskMgr(threading.Thread):
                    worker_info['memory'] -= needs.memory
                    worker_info['gpu'] -= needs.gpu
                    worker_info['disk'] -= needs.disk
-                    proper_workers.append(worker_ip)
+                    proper_worker = worker_ip
                    break
            if proper_worker is not None:
                proper_workers.append(proper_worker)
            else:
                return []
        if has_waiting:
            return proper_workers
        else:
            return []
    def get_all_nodes(self):
        # cache running nodes
@ -445,11 +527,16 @@ class TaskMgr(threading.Thread):
            "base": Image.BASE,
            "public": Image.PUBLIC
        }
-        configinfo  = {'vnode_nums':7,'atSameTime':True,'MultiStart':True,
+        task = Task(
-                        'maxRetryCount':int(json_task['retryCount'])}
+            task_id = taskid,
-        # json_task = json.loads(json_task)
+            username = username,
-        task = Task(configinfo,
+            # all vnode must be started at the same time
-            VNodeInfo(
+            at_same_time = json_task['at_same_time'],
            priority = task_priority,
            max_size = (1 << self.task_cidr) - 2,
            task_infos = [{
                'max_retry_count': int(json_task['retryCount']),
                'vnode_info': VNodeInfo(
                    taskid = taskid,
                    username = username,
                    vnode = Vnode(
@ -461,9 +548,14 @@ class TaskMgr(threading.Thread):
                            cpu = int(json_task['cpuSetting']),
                            memory = int(json_task['memorySetting']),
                            disk = int(json_task['diskSetting']),
-                    gpu = int(json_task['gpuSetting'])))
+                            gpu = int(json_task['gpuSetting'])),
                        mount = [Mount(
                                    localPath = json_task['mapping'][mapping_key]['mappingLocalDir'],
                                    remotePath=json_task['mapping'][mapping_key]['mappingRemoteDir'])
                                for mapping_key in json_task['mapping']] if 'mapping' in json_task else []
                        ),
-            TaskInfo(
+                ),
                'command_info': TaskInfo(
                    taskid = taskid,
                    username = username,
                    parameters = Parameters(
@ -473,13 +565,11 @@ class TaskMgr(threading.Thread):
                            envVars = {}),
                        stderrRedirectPath = json_task.get('stdErrRedPth',""),
                        stdoutRedirectPath = json_task.get('stdOutRedPth',"")),
-            timeout = int(json_task['expTime']),
+                    timeout = int(json_task['expTime'])
-            ),
+                # commands are executed in all vnodes / only excuted in the first vnode
-            priority=task_priority,max_size=(1<<self.task_cidr)-2)
+                # if in traditional mode, commands will be executed in all vnodes
-        if 'mapping' in json_task:
+                ) if (not json_task['at_same_time'] or json_task['multicommand'] or instance_index == 0) else None
-            task.vnodeinfo.vnode.mount.extend([Mount(localPath=json_task['mapping'][mapping_key]['mappingLocalDir'],
+            } for instance_index in range(json_task['instCount'])])
                                                  remotePath=json_task['mapping'][mapping_key]['mappingRemoteDir'])
                                            for mapping_key in json_task['mapping']])
        self.lazy_append_list.append(task)
@ -488,7 +578,7 @@ class TaskMgr(threading.Thread):
    @queue_lock
    def get_task(self, taskid):
        for task in self.task_queue:
-            if task.info.id == taskid:
+            if task.id == taskid:
                return task
        return None