From a670020e8db0158128d49f983c507d9dff3c7c88 Mon Sep 17 00:00:00 2001 From: Gallen Date: Tue, 17 Jul 2018 14:32:49 +0800 Subject: [PATCH] modify rpc proto --- src/master/taskmgr.py | 25 ++++++++++++++++--- src/protos/rpc.proto | 9 ++++--- src/protos/rpc_pb2.py | 58 +++++++++++++++++++++++-------------------- 3 files changed, 57 insertions(+), 35 deletions(-) diff --git a/src/master/taskmgr.py b/src/master/taskmgr.py index 1bcee81..6e23f3b 100644 --- a/src/master/taskmgr.py +++ b/src/master/taskmgr.py @@ -54,8 +54,14 @@ class TaskMgr(threading.Thread): # this method is called when worker send heart-beat rpc request - def on_task_report(self, task): - logger.info('receive task report: id %d, status %d' % (task.id, task.status)) + def on_task_report(self, report): + logger.info('[on_task_report] receive task report: id %d, status %d' % (report.id, report.status)) + task = get_task(report.id) + if task == None: + logger.error('[on_task_report] task not found') + return + + task.status = report.status if task.status == Task.RUNNING: pass elif task.status == Task.COMPLETED: @@ -63,7 +69,15 @@ class TaskMgr(threading.Thread): pass elif task.status == Task.FAILED || task.status == Task.TIMEOUT: # retry - pass + if task.maxRetryCount <= 0: + # tell jobmgr + pass + else: + # decrease max retry count & waiting for retry + task.maxRetryCount -= 1 + task.status = Task.WAITING + else: + logger.error('[on_task_report] receive report from waiting task') # this is a thread to process task(or a instance) @@ -95,4 +109,7 @@ class TaskMgr(threading.Thread): # user: username # get the information of a task, including the status, task description and other information def get_task(self, taskid): - pass + for task in self.taskQueue: + if task.id == taskid: + return task + return None diff --git a/src/protos/rpc.proto b/src/protos/rpc.proto index 4bcf62d..d1adc74 100644 --- a/src/protos/rpc.proto +++ b/src/protos/rpc.proto @@ -27,10 +27,11 @@ message Task { int32 Timeout = 7; // 超时阈值 enum TaskStatus { - RUNNING = 0; - COMPLETED = 1; - FAILED = 2; - TIMEOUT = 3; + WAITING = 0; + RUNNING = 1; + COMPLETED = 2; + FAILED = 3; + TIMEOUT = 4; } } diff --git a/src/protos/rpc_pb2.py b/src/protos/rpc_pb2.py index aa8a69c..4cc4ceb 100644 --- a/src/protos/rpc_pb2.py +++ b/src/protos/rpc_pb2.py @@ -19,7 +19,7 @@ DESCRIPTOR = _descriptor.FileDescriptor( name='protos/rpc.proto', package='', syntax='proto3', - serialized_pb=_b('\n\x10protos/rpc.proto\"V\n\x05Reply\x12#\n\x07message\x18\x01 \x01(\x0e\x32\x12.Reply.ReplyStatus\"(\n\x0bReplyStatus\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x00\x12\x0b\n\x07REFUSED\x10\x01\"\xf2\x01\n\x04Task\x12\n\n\x02id\x18\x01 \x01(\t\x12 \n\x06status\x18\x02 \x01(\x0e\x32\x10.Task.TaskStatus\x12\x15\n\rinstanceCount\x18\x03 \x01(\x05\x12\x15\n\rmaxRetryCount\x18\x04 \x01(\x05\x12\x1f\n\nparameters\x18\x05 \x01(\x0b\x32\x0b.Parameters\x12\x19\n\x07\x63luster\x18\x06 \x01(\x0b\x32\x08.Cluster\x12\x0f\n\x07Timeout\x18\x07 \x01(\x05\"A\n\nTaskStatus\x12\x0b\n\x07RUNNING\x10\x00\x12\r\n\tCOMPLETED\x10\x01\x12\n\n\x06\x46\x41ILED\x10\x02\x12\x0b\n\x07TIMEOUT\x10\x03\"_\n\nParameters\x12\x19\n\x07\x63ommand\x18\x01 \x01(\x0b\x32\x08.Command\x12\x1a\n\x12stderrRedirectPath\x18\x02 \x01(\t\x12\x1a\n\x12stdoutRedirectPath\x18\x03 \x01(\t\"\x8b\x01\n\x07\x43ommand\x12\x13\n\x0b\x63ommandLine\x18\x01 \x01(\t\x12\x13\n\x0bpackagePath\x18\x02 \x01(\t\x12&\n\x07\x65nvVars\x18\x03 \x03(\x0b\x32\x15.Command.EnvVarsEntry\x1a.\n\x0c\x45nvVarsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"T\n\x07\x43luster\x12\x15\n\x05image\x18\x01 \x01(\x0b\x32\x06.Image\x12\x1b\n\x08instance\x18\x02 \x01(\x0b\x32\t.Instance\x12\x15\n\x05mount\x18\x03 \x03(\x0b\x32\x06.Mount\"j\n\x05Image\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1e\n\x04type\x18\x02 \x01(\x0e\x32\x10.Image.ImageType\x12\r\n\x05owner\x18\x03 \x01(\t\"$\n\tImageType\x12\n\n\x06PUBLIC\x10\x00\x12\x0b\n\x07PRIVATE\x10\x01\".\n\x05Mount\x12\x11\n\tlocalPath\x18\x01 \x01(\t\x12\x12\n\nremotePath\x18\x02 \x01(\t\"B\n\x08Instance\x12\x0b\n\x03\x63pu\x18\x01 \x01(\x05\x12\x0e\n\x06memory\x18\x02 \x01(\x05\x12\x0c\n\x04\x64isk\x18\x03 \x01(\x05\x12\x0b\n\x03gpu\x18\x04 \x01(\x05\x32#\n\x06Master\x12\x19\n\x06report\x12\x05.Task\x1a\x06.Reply\"\x00\x32%\n\x06Worker\x12\x1b\n\x08\x61\x64\x64_task\x12\x05.Task\x1a\x06.Reply\"\x00\x62\x06proto3') + serialized_pb=_b('\n\x10protos/rpc.proto\"V\n\x05Reply\x12#\n\x07message\x18\x01 \x01(\x0e\x32\x12.Reply.ReplyStatus\"(\n\x0bReplyStatus\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x00\x12\x0b\n\x07REFUSED\x10\x01\"\xff\x01\n\x04Task\x12\n\n\x02id\x18\x01 \x01(\t\x12 \n\x06status\x18\x02 \x01(\x0e\x32\x10.Task.TaskStatus\x12\x15\n\rinstanceCount\x18\x03 \x01(\x05\x12\x15\n\rmaxRetryCount\x18\x04 \x01(\x05\x12\x1f\n\nparameters\x18\x05 \x01(\x0b\x32\x0b.Parameters\x12\x19\n\x07\x63luster\x18\x06 \x01(\x0b\x32\x08.Cluster\x12\x0f\n\x07Timeout\x18\x07 \x01(\x05\"N\n\nTaskStatus\x12\x0b\n\x07WAITING\x10\x00\x12\x0b\n\x07RUNNING\x10\x01\x12\r\n\tCOMPLETED\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\x0b\n\x07TIMEOUT\x10\x04\"_\n\nParameters\x12\x19\n\x07\x63ommand\x18\x01 \x01(\x0b\x32\x08.Command\x12\x1a\n\x12stderrRedirectPath\x18\x02 \x01(\t\x12\x1a\n\x12stdoutRedirectPath\x18\x03 \x01(\t\"\x8b\x01\n\x07\x43ommand\x12\x13\n\x0b\x63ommandLine\x18\x01 \x01(\t\x12\x13\n\x0bpackagePath\x18\x02 \x01(\t\x12&\n\x07\x65nvVars\x18\x03 \x03(\x0b\x32\x15.Command.EnvVarsEntry\x1a.\n\x0c\x45nvVarsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"T\n\x07\x43luster\x12\x15\n\x05image\x18\x01 \x01(\x0b\x32\x06.Image\x12\x1b\n\x08instance\x18\x02 \x01(\x0b\x32\t.Instance\x12\x15\n\x05mount\x18\x03 \x03(\x0b\x32\x06.Mount\"j\n\x05Image\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1e\n\x04type\x18\x02 \x01(\x0e\x32\x10.Image.ImageType\x12\r\n\x05owner\x18\x03 \x01(\t\"$\n\tImageType\x12\n\n\x06PUBLIC\x10\x00\x12\x0b\n\x07PRIVATE\x10\x01\".\n\x05Mount\x12\x11\n\tlocalPath\x18\x01 \x01(\t\x12\x12\n\nremotePath\x18\x02 \x01(\t\"B\n\x08Instance\x12\x0b\n\x03\x63pu\x18\x01 \x01(\x05\x12\x0e\n\x06memory\x18\x02 \x01(\x05\x12\x0c\n\x04\x64isk\x18\x03 \x01(\x05\x12\x0b\n\x03gpu\x18\x04 \x01(\x05\x32#\n\x06Master\x12\x19\n\x06report\x12\x05.Task\x1a\x06.Reply\"\x00\x32%\n\x06Worker\x12\x1b\n\x08\x61\x64\x64_task\x12\x05.Task\x1a\x06.Reply\"\x00\x62\x06proto3') ) @@ -53,26 +53,30 @@ _TASK_TASKSTATUS = _descriptor.EnumDescriptor( file=DESCRIPTOR, values=[ _descriptor.EnumValueDescriptor( - name='RUNNING', index=0, number=0, + name='WAITING', index=0, number=0, options=None, type=None), _descriptor.EnumValueDescriptor( - name='COMPLETED', index=1, number=1, + name='RUNNING', index=1, number=1, options=None, type=None), _descriptor.EnumValueDescriptor( - name='FAILED', index=2, number=2, + name='COMPLETED', index=2, number=2, options=None, type=None), _descriptor.EnumValueDescriptor( - name='TIMEOUT', index=3, number=3, + name='FAILED', index=3, number=3, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='TIMEOUT', index=4, number=4, options=None, type=None), ], containing_type=None, options=None, serialized_start=286, - serialized_end=351, + serialized_end=364, ) _sym_db.RegisterEnumDescriptor(_TASK_TASKSTATUS) @@ -93,8 +97,8 @@ _IMAGE_IMAGETYPE = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=748, - serialized_end=784, + serialized_start=761, + serialized_end=797, ) _sym_db.RegisterEnumDescriptor(_IMAGE_IMAGETYPE) @@ -201,7 +205,7 @@ _TASK = _descriptor.Descriptor( oneofs=[ ], serialized_start=109, - serialized_end=351, + serialized_end=364, ) @@ -245,8 +249,8 @@ _PARAMETERS = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=353, - serialized_end=448, + serialized_start=366, + serialized_end=461, ) @@ -283,8 +287,8 @@ _COMMAND_ENVVARSENTRY = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=544, - serialized_end=590, + serialized_start=557, + serialized_end=603, ) _COMMAND = _descriptor.Descriptor( @@ -327,8 +331,8 @@ _COMMAND = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=451, - serialized_end=590, + serialized_start=464, + serialized_end=603, ) @@ -372,8 +376,8 @@ _CLUSTER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=592, - serialized_end=676, + serialized_start=605, + serialized_end=689, ) @@ -418,8 +422,8 @@ _IMAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=678, - serialized_end=784, + serialized_start=691, + serialized_end=797, ) @@ -456,8 +460,8 @@ _MOUNT = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=786, - serialized_end=832, + serialized_start=799, + serialized_end=845, ) @@ -508,8 +512,8 @@ _INSTANCE = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=834, - serialized_end=900, + serialized_start=847, + serialized_end=913, ) _REPLY.fields_by_name['message'].enum_type = _REPLY_REPLYSTATUS @@ -610,8 +614,8 @@ _MASTER = _descriptor.ServiceDescriptor( file=DESCRIPTOR, index=0, options=None, - serialized_start=902, - serialized_end=937, + serialized_start=915, + serialized_end=950, methods=[ _descriptor.MethodDescriptor( name='report', @@ -634,8 +638,8 @@ _WORKER = _descriptor.ServiceDescriptor( file=DESCRIPTOR, index=1, options=None, - serialized_start=939, - serialized_end=976, + serialized_start=952, + serialized_end=989, methods=[ _descriptor.MethodDescriptor( name='add_task',