add network into taskmgr
This commit is contained in:
parent
9129a75537
commit
782baee617
|
@ -201,13 +201,12 @@
|
||||||
# default: 50051
|
# default: 50051
|
||||||
# BATCH_WORKER_PORT=50051
|
# BATCH_WORKER_PORT=50051
|
||||||
|
|
||||||
# BATCH_GATEWAY: the ip address of gateway for the containers processing
|
# BATCH_NET: ip addresses range of containers for batch job, default is 10.16.0.0/16
|
||||||
# batch jobs. default: 10.0.3.1
|
# BATCH_NET=10.16.0.0/16
|
||||||
# BATCH_GATEWAY=10.0.3.1
|
|
||||||
|
|
||||||
# BATCH_NET: ip addresses range of containers for batch job, default is 10.0.3.0/24
|
# BATCH_TASK_CIDR: 2^(BATCH_TASK_CIDR) is the number of ip addresses for a task, default is 4
|
||||||
# BATCH_NET=10.0.3.0/24
|
# BATCH_TASK_CIDR=4
|
||||||
|
|
||||||
# BATCH_MAX_THREAD_WORKER: the maximun number of threads of the rpc server on
|
# BATCH_MAX_THREAD_WORKER: the maximun number of threads of the rpc server on
|
||||||
# the batch job worker. default:5
|
# the batch job worker. default:5
|
||||||
# BATCH_MAX_THREAD_WORKER=5
|
# BATCH_MAX_THREAD_WORKER=5
|
||||||
|
|
|
@ -13,11 +13,18 @@ from concurrent import futures
|
||||||
import grpc
|
import grpc
|
||||||
from protos.rpc_pb2 import *
|
from protos.rpc_pb2 import *
|
||||||
from protos.rpc_pb2_grpc import MasterServicer, add_MasterServicer_to_server, WorkerStub
|
from protos.rpc_pb2_grpc import MasterServicer, add_MasterServicer_to_server, WorkerStub
|
||||||
|
from utils.nettools import netcontrol
|
||||||
from utils import env
|
from utils import env
|
||||||
|
|
||||||
|
def ip_to_int(addr):
|
||||||
|
[a, b, c, d] = addr.split('.')
|
||||||
|
return (int(a)<<24) + (int(b)<<16) + (int(c)<<8) + int(d)
|
||||||
|
|
||||||
|
def int_to_ip(num):
|
||||||
|
return str((num>>24)&255)+"."+str((num>>16)&255)+"."+str((num>>8)&255)+"."+str(num&255)
|
||||||
|
|
||||||
class Task():
|
class Task():
|
||||||
def __init__(self, configinfo, vnodeinfo, taskinfo, priority):
|
def __init__(self, configinfo, vnodeinfo, taskinfo, priority, max_size):
|
||||||
self.vnodeinfo = vnodeinfo
|
self.vnodeinfo = vnodeinfo
|
||||||
self.taskinfo = taskinfo
|
self.taskinfo = taskinfo
|
||||||
self.status = WAITING
|
self.status = WAITING
|
||||||
|
@ -30,6 +37,8 @@ class Task():
|
||||||
# priority the bigger the better
|
# priority the bigger the better
|
||||||
# self.priority the smaller the better
|
# self.priority the smaller the better
|
||||||
self.priority = int(time.time()) / 60 / 60 - priority
|
self.priority = int(time.time()) / 60 / 60 - priority
|
||||||
|
self.network = None
|
||||||
|
self.max_size = max_size
|
||||||
|
|
||||||
for i in range(self.vnode_nums):
|
for i in range(self.vnode_nums):
|
||||||
self.subtask_list.append({'status':'WAITING','try_count':0})
|
self.subtask_list.append({'status':'WAITING','try_count':0})
|
||||||
|
@ -59,10 +68,11 @@ class TaskMgr(threading.Thread):
|
||||||
# load task information from etcd
|
# load task information from etcd
|
||||||
# initial a task queue and task schedueler
|
# initial a task queue and task schedueler
|
||||||
# taskmgr: a taskmgr instance
|
# taskmgr: a taskmgr instance
|
||||||
def __init__(self, nodemgr, monitor_fetcher, scheduler_interval=2, external_logger=None):
|
def __init__(self, nodemgr, monitor_fetcher, master_ip, scheduler_interval=2, external_logger=None):
|
||||||
threading.Thread.__init__(self)
|
threading.Thread.__init__(self)
|
||||||
self.thread_stop = False
|
self.thread_stop = False
|
||||||
self.jobmgr = None
|
self.jobmgr = None
|
||||||
|
self.master_ip = self.master_ip
|
||||||
self.task_queue = []
|
self.task_queue = []
|
||||||
self.lazy_append_list = []
|
self.lazy_append_list = []
|
||||||
self.lazy_delete_list = []
|
self.lazy_delete_list = []
|
||||||
|
@ -84,6 +94,19 @@ class TaskMgr(threading.Thread):
|
||||||
# self.last_nodes_info_update_time = 0
|
# self.last_nodes_info_update_time = 0
|
||||||
# self.nodes_info_update_interval = 30 # (s)
|
# self.nodes_info_update_interval = 30 # (s)
|
||||||
|
|
||||||
|
self.network_lock = threading.Lock()
|
||||||
|
batch_net = env.getenv('BATCH_NET')
|
||||||
|
self.batch_cidr = int(batch_net.split('/')[1])
|
||||||
|
batch_net = batch_net.split('/')[0]
|
||||||
|
task_cidr = int(env.getenv('BATCH_TASK_CIDR'))
|
||||||
|
task_cidr = min(task_cidr,31-self.batch_cidr)
|
||||||
|
self.task_cidr = max(task_cidr,2)
|
||||||
|
self.base_ip = ip_to_int(batch_net)
|
||||||
|
self.free_nets = []
|
||||||
|
for i in range((1 << self.task_cidr), (1 << (32-self.batch_cidr)) - 1):
|
||||||
|
self.free_nets.append(i)
|
||||||
|
logger.info("Free nets addresses pool %s" % str(self.free_nets))
|
||||||
|
logger.info("Each Batch Net CIDR:%s"%(str(self.task_cidr)))
|
||||||
|
|
||||||
def queue_lock(f):
|
def queue_lock(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
|
@ -94,6 +117,14 @@ class TaskMgr(threading.Thread):
|
||||||
return result
|
return result
|
||||||
return new_f
|
return new_f
|
||||||
|
|
||||||
|
def net_lock(f):
|
||||||
|
@wraps(f)
|
||||||
|
def new_f(self, *args, **kwargs):
|
||||||
|
self.network_lock.acquire()
|
||||||
|
result = f(self, *args, **kwargs)
|
||||||
|
self.network_lock.release()
|
||||||
|
return result
|
||||||
|
return new_f
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.serve()
|
self.serve()
|
||||||
|
@ -128,14 +159,73 @@ class TaskMgr(threading.Thread):
|
||||||
self.task_queue.append(task)
|
self.task_queue.append(task)
|
||||||
self.task_queue = sorted(self.task_queue, key=lambda x: x.priority)
|
self.task_queue = sorted(self.task_queue, key=lambda x: x.priority)
|
||||||
|
|
||||||
|
def stop_vnode(self, worker, task, vnodeid):
|
||||||
|
vnodeinfo = copy.copy(task.vnodeinfo)
|
||||||
|
vnodeinfo.vnodeid = vnodeid
|
||||||
|
try:
|
||||||
|
self.logger.info('[task_processor] Stopping vnode for task [%s] vnode [%d]' % (task.vnodeinfo.id, vnodeid))
|
||||||
|
channel = grpc.insecure_channel('%s:%s' % (worker, self.worker_port))
|
||||||
|
stub = WorkerStub(channel)
|
||||||
|
response = stub.stop_vnode(vnodeinfo)
|
||||||
|
if response.status != Reply.ACCEPTED:
|
||||||
|
raise Exception(response.message)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error('[task_processor] rpc error message: %s' % e)
|
||||||
|
return [False, e]
|
||||||
|
return [True, ""]
|
||||||
|
|
||||||
|
@net_lock
|
||||||
|
def acquire_task_net(self, task):
|
||||||
|
self.logger.info("[acquire_task_net] user(%s) task(%s) net(%s)"%(task.taskinfo.username, task.taskinfo.taskid, str(task.network)))
|
||||||
|
if task.network == None:
|
||||||
|
task.network = self.free_nets.pop(0)
|
||||||
|
return task.network
|
||||||
|
|
||||||
|
@net_lock
|
||||||
|
def release_task_net(self,task):
|
||||||
|
self.logger.info("[release_task_net] user(%s) task(%s) net(%s)"%(task.taskinfo.username, task.taskinfo.taskid, str(task.network)))
|
||||||
|
if task.network == None:
|
||||||
|
return
|
||||||
|
self.free_nets.append(task.network)
|
||||||
|
self.logger.error('[release task_net] %s'%str(e))
|
||||||
|
|
||||||
|
def setup_tasknet(self, task, workers=None):
|
||||||
|
taskid = task.taskinfo.taskid
|
||||||
|
username = task.taskinfo.username
|
||||||
|
brname = "docklet-batch-%s-%s"%(username, taskid)
|
||||||
|
gwname = "Batch-%s-%s"%(username, taskid)
|
||||||
|
if task.network == None:
|
||||||
|
return [False, "task.network is None!"]
|
||||||
|
gatewayip = int_to_ip(self.base_ip + task.network + 1)
|
||||||
|
gatewayipcidr += "/" + str(32-self.task_cidr)
|
||||||
|
netcontrol.new_bridge(brname)
|
||||||
|
netcontrol.setup_gw(brname,gwname,gatewayipcidr,0,0)
|
||||||
|
|
||||||
|
for wip in workers:
|
||||||
|
netcontrol.setup_gre(brname,wip)
|
||||||
|
return [True, gatewayip]
|
||||||
|
|
||||||
|
def remove_tasknet(self, task):
|
||||||
|
taskid = task.taskinfo.taskid
|
||||||
|
username = task.taskinfo.username
|
||||||
|
brname = "docklet-batch-%s-%s"%(username, taskid)
|
||||||
|
netcontrol.del_bridge(brname)
|
||||||
|
|
||||||
def task_processor(self, task, vnodes_workers):
|
def task_processor(self, task, vnodes_workers):
|
||||||
task.status = RUNNING
|
task.status = RUNNING
|
||||||
self.jobmgr.report(task.taskinfo.taskid,'running')
|
self.jobmgr.report(task.taskinfo.taskid,'running')
|
||||||
|
|
||||||
# properties for transaction
|
# properties for transactio
|
||||||
|
|
||||||
|
self.acquire_task_net(task)
|
||||||
|
[success, gwip] = self.setup_tasknet(task,[w[1] for w in vnodes_workers])
|
||||||
|
if not success:
|
||||||
|
return [False, gwip]
|
||||||
|
|
||||||
token = ''.join(random.sample(string.ascii_letters + string.digits, 8))
|
token = ''.join(random.sample(string.ascii_letters + string.digits, 8))
|
||||||
|
placed_workers = []
|
||||||
|
|
||||||
|
# start vc
|
||||||
for vid, worker in vnodes_workers:
|
for vid, worker in vnodes_workers:
|
||||||
vnodeinfo = copy.copy(task.vnodeinfo)
|
vnodeinfo = copy.copy(task.vnodeinfo)
|
||||||
vnodeinfo.vnodeid = vid
|
vnodeinfo.vnodeid = vid
|
||||||
|
@ -152,21 +242,30 @@ class TaskMgr(threading.Thread):
|
||||||
#if not username in self.user_containers.keys():
|
#if not username in self.user_containers.keys():
|
||||||
#self.user_containers[username] = []
|
#self.user_containers[username] = []
|
||||||
#self.user_containers[username].append(container_name)
|
#self.user_containers[username].append(container_name)
|
||||||
|
ipaddr = int_to_ip(self.base_ip + task.network + vid%task.max_size + 2)
|
||||||
|
brname = "docklet-batch-%s-%s"%(username, taskid)
|
||||||
|
networkinfo = Network(ipaddr=ipaddr, gateway=gwip, masterip=self.masterip, brname=brname)
|
||||||
|
vnode.network = networkinfo
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.logger.info('[task_processor] starting vnode for task [%s] instance [%d]' % (task.vnodeinfo.id, vid))
|
self.logger.info('[task_processor] starting vnode for task [%s] instance [%d]' % (task.vnodeinfo.id, vid))
|
||||||
channel = grpc.insecure_channel('%s:%s' % (worker, self.worker_port))
|
channel = grpc.insecure_channel('%s:%s' % (worker, self.worker_port))
|
||||||
stub = WorkerStub(channel)
|
stub = WorkerStub(channel)
|
||||||
response = stub.start_vnode(vnodeinfo)
|
response = stub.start_vnode(vnodeinfo)
|
||||||
|
placed_workers.append(worker)
|
||||||
if response.status != Reply.ACCEPTED:
|
if response.status != Reply.ACCEPTED:
|
||||||
raise Exception(response.message)
|
raise Exception(response.message)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error('[task_processor] rpc error message: %s' % e)
|
self.logger.error('[task_processor] rpc error message: %s' % e)
|
||||||
|
task.status = FAILED
|
||||||
vnode['status'] = FAILED
|
vnode['status'] = FAILED
|
||||||
vnode['try_count'] -= 1
|
vnode['try_count'] -= 1
|
||||||
|
for pl_worker in placed_workers:
|
||||||
|
pass
|
||||||
return
|
return
|
||||||
#self.user_containers[username].remove(container_name)
|
#self.user_containers[username].remove(container_name)
|
||||||
|
|
||||||
|
# start tasks
|
||||||
for vid, worker in vnodes_workers:
|
for vid, worker in vnodes_workers:
|
||||||
taskinfo = copy.copy(task.taskinfo)
|
taskinfo = copy.copy(task.taskinfo)
|
||||||
taskinfo.vnodeid = vid
|
taskinfo.vnodeid = vid
|
||||||
|
@ -181,8 +280,7 @@ class TaskMgr(threading.Thread):
|
||||||
raise Exception(response.message)
|
raise Exception(response.message)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error('[task_processor] rpc error message: %s' % e)
|
self.logger.error('[task_processor] rpc error message: %s' % e)
|
||||||
vnode['status'] = FAILED
|
task.status = FAILED
|
||||||
vnode['try_count'] -= 1
|
|
||||||
|
|
||||||
# return task, workers
|
# return task, workers
|
||||||
def task_scheduler(self):
|
def task_scheduler(self):
|
||||||
|
@ -347,7 +445,7 @@ class TaskMgr(threading.Thread):
|
||||||
stdoutRedirectPath = json_task.get('stdOutRedPth',"")),
|
stdoutRedirectPath = json_task.get('stdOutRedPth',"")),
|
||||||
timeout = int(json_task['expTime']),
|
timeout = int(json_task['expTime']),
|
||||||
),
|
),
|
||||||
priority=task_priority)
|
priority=task_priority,max_size=(1<<self.task_cidr)-2)
|
||||||
if 'mapping' in json_task:
|
if 'mapping' in json_task:
|
||||||
task.vnodeinfo.vnode.mount.extend([Mount(localPath=json_task['mapping'][mapping_key]['mappingLocalDir'],
|
task.vnodeinfo.vnode.mount.extend([Mount(localPath=json_task['mapping'][mapping_key]['mappingLocalDir'],
|
||||||
remotePath=json_task['mapping'][mapping_key]['mappingRemoteDir'])
|
remotePath=json_task['mapping'][mapping_key]['mappingRemoteDir'])
|
||||||
|
|
|
@ -59,7 +59,7 @@ def start_task():
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
#for i in range(10):
|
#for i in range(10):
|
||||||
#run()
|
#run()
|
||||||
start_task()
|
#start_task()
|
||||||
#stop_vnode()
|
stop_vnode()
|
||||||
#time.sleep(4)
|
#time.sleep(4)
|
||||||
#stop_task()
|
#stop_task()
|
||||||
|
|
|
@ -7,7 +7,7 @@ service Master {
|
||||||
service Worker {
|
service Worker {
|
||||||
rpc start_vnode (VNodeInfo) returns (Reply) {}
|
rpc start_vnode (VNodeInfo) returns (Reply) {}
|
||||||
rpc start_task (TaskInfo) returns (Reply) {}
|
rpc start_task (TaskInfo) returns (Reply) {}
|
||||||
rpc stop_task (ReportMsg) returns (Reply) {}
|
rpc stop_task (TaskInfo) returns (Reply) {}
|
||||||
rpc stop_vnode (VNodeInfo) returns (Reply) {}
|
rpc stop_vnode (VNodeInfo) returns (Reply) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ DESCRIPTOR = _descriptor.FileDescriptor(
|
||||||
name='rpc.proto',
|
name='rpc.proto',
|
||||||
package='',
|
package='',
|
||||||
syntax='proto3',
|
syntax='proto3',
|
||||||
serialized_pb=_b('\n\trpc.proto\"U\n\tVNodeInfo\x12\x0e\n\x06taskid\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\x12\x0f\n\x07vnodeid\x18\x03 \x01(\x05\x12\x15\n\x05vnode\x18\x04 \x01(\x0b\x32\x06.VNode\"f\n\x05Reply\x12\"\n\x06status\x18\x01 \x01(\x0e\x32\x12.Reply.ReplyStatus\x12\x0f\n\x07message\x18\x02 \x01(\t\"(\n\x0bReplyStatus\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x00\x12\x0b\n\x07REFUSED\x10\x01\"\'\n\tReportMsg\x12\x1a\n\x08taskmsgs\x18\x01 \x03(\x0b\x32\x08.TaskMsg\"{\n\x07TaskMsg\x12\x0e\n\x06taskid\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\x12\x0f\n\x07vnodeid\x18\x03 \x01(\x05\x12\x1e\n\rsubTaskStatus\x18\x04 \x01(\x0e\x32\x07.Status\x12\r\n\x05token\x18\x05 \x01(\t\x12\x0e\n\x06\x65rrmsg\x18\x06 \x01(\t\"~\n\x08TaskInfo\x12\x0e\n\x06taskid\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\x12\x0f\n\x07vnodeid\x18\x03 \x01(\x05\x12\x1f\n\nparameters\x18\x04 \x01(\x0b\x32\x0b.Parameters\x12\x0f\n\x07timeout\x18\x05 \x01(\x05\x12\r\n\x05token\x18\x06 \x01(\t\"_\n\nParameters\x12\x19\n\x07\x63ommand\x18\x01 \x01(\x0b\x32\x08.Command\x12\x1a\n\x12stderrRedirectPath\x18\x02 \x01(\t\x12\x1a\n\x12stdoutRedirectPath\x18\x03 \x01(\t\"\x8b\x01\n\x07\x43ommand\x12\x13\n\x0b\x63ommandLine\x18\x01 \x01(\t\x12\x13\n\x0bpackagePath\x18\x02 \x01(\t\x12&\n\x07\x65nvVars\x18\x03 \x03(\x0b\x32\x15.Command.EnvVarsEntry\x1a.\n\x0c\x45nvVarsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"m\n\x05VNode\x12\x15\n\x05image\x18\x01 \x01(\x0b\x32\x06.Image\x12\x1b\n\x08instance\x18\x02 \x01(\x0b\x32\t.Instance\x12\x15\n\x05mount\x18\x03 \x03(\x0b\x32\x06.Mount\x12\x19\n\x07network\x18\x04 \x01(\x0b\x32\x08.Network\"L\n\x07Network\x12\x0e\n\x06ipaddr\x18\x01 \x01(\t\x12\x0f\n\x07gateway\x18\x02 \x01(\t\x12\x10\n\x08masterip\x18\x03 \x01(\t\x12\x0e\n\x06\x62rname\x18\x04 \x01(\t\"t\n\x05Image\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1e\n\x04type\x18\x02 \x01(\x0e\x32\x10.Image.ImageType\x12\r\n\x05owner\x18\x03 \x01(\t\".\n\tImageType\x12\x08\n\x04\x42\x41SE\x10\x00\x12\n\n\x06PUBLIC\x10\x01\x12\x0b\n\x07PRIVATE\x10\x02\"u\n\x05Mount\x12\x10\n\x08provider\x18\x01 \x01(\t\x12\x11\n\tlocalPath\x18\x02 \x01(\t\x12\x12\n\nremotePath\x18\x03 \x01(\t\x12\x11\n\taccessKey\x18\x04 \x01(\t\x12\x11\n\tsecretKey\x18\x05 \x01(\t\x12\r\n\x05other\x18\x06 \x01(\t\"B\n\x08Instance\x12\x0b\n\x03\x63pu\x18\x01 \x01(\x05\x12\x0e\n\x06memory\x18\x02 \x01(\x05\x12\x0c\n\x04\x64isk\x18\x03 \x01(\x05\x12\x0b\n\x03gpu\x18\x04 \x01(\x05*[\n\x06Status\x12\x0b\n\x07WAITING\x10\x00\x12\x0b\n\x07RUNNING\x10\x01\x12\r\n\tCOMPLETED\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\x0b\n\x07TIMEOUT\x10\x04\x12\x0f\n\x0bOUTPUTERROR\x10\x05\x32(\n\x06Master\x12\x1e\n\x06report\x12\n.ReportMsg\x1a\x06.Reply\"\x00\x32\x97\x01\n\x06Worker\x12#\n\x0bstart_vnode\x12\n.VNodeInfo\x1a\x06.Reply\"\x00\x12!\n\nstart_task\x12\t.TaskInfo\x1a\x06.Reply\"\x00\x12!\n\tstop_task\x12\n.ReportMsg\x1a\x06.Reply\"\x00\x12\"\n\nstop_vnode\x12\n.VNodeInfo\x1a\x06.Reply\"\x00\x62\x06proto3')
|
serialized_pb=_b('\n\trpc.proto\"U\n\tVNodeInfo\x12\x0e\n\x06taskid\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\x12\x0f\n\x07vnodeid\x18\x03 \x01(\x05\x12\x15\n\x05vnode\x18\x04 \x01(\x0b\x32\x06.VNode\"f\n\x05Reply\x12\"\n\x06status\x18\x01 \x01(\x0e\x32\x12.Reply.ReplyStatus\x12\x0f\n\x07message\x18\x02 \x01(\t\"(\n\x0bReplyStatus\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x00\x12\x0b\n\x07REFUSED\x10\x01\"\'\n\tReportMsg\x12\x1a\n\x08taskmsgs\x18\x01 \x03(\x0b\x32\x08.TaskMsg\"{\n\x07TaskMsg\x12\x0e\n\x06taskid\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\x12\x0f\n\x07vnodeid\x18\x03 \x01(\x05\x12\x1e\n\rsubTaskStatus\x18\x04 \x01(\x0e\x32\x07.Status\x12\r\n\x05token\x18\x05 \x01(\t\x12\x0e\n\x06\x65rrmsg\x18\x06 \x01(\t\"~\n\x08TaskInfo\x12\x0e\n\x06taskid\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\x12\x0f\n\x07vnodeid\x18\x03 \x01(\x05\x12\x1f\n\nparameters\x18\x04 \x01(\x0b\x32\x0b.Parameters\x12\x0f\n\x07timeout\x18\x05 \x01(\x05\x12\r\n\x05token\x18\x06 \x01(\t\"_\n\nParameters\x12\x19\n\x07\x63ommand\x18\x01 \x01(\x0b\x32\x08.Command\x12\x1a\n\x12stderrRedirectPath\x18\x02 \x01(\t\x12\x1a\n\x12stdoutRedirectPath\x18\x03 \x01(\t\"\x8b\x01\n\x07\x43ommand\x12\x13\n\x0b\x63ommandLine\x18\x01 \x01(\t\x12\x13\n\x0bpackagePath\x18\x02 \x01(\t\x12&\n\x07\x65nvVars\x18\x03 \x03(\x0b\x32\x15.Command.EnvVarsEntry\x1a.\n\x0c\x45nvVarsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"m\n\x05VNode\x12\x15\n\x05image\x18\x01 \x01(\x0b\x32\x06.Image\x12\x1b\n\x08instance\x18\x02 \x01(\x0b\x32\t.Instance\x12\x15\n\x05mount\x18\x03 \x03(\x0b\x32\x06.Mount\x12\x19\n\x07network\x18\x04 \x01(\x0b\x32\x08.Network\"L\n\x07Network\x12\x0e\n\x06ipaddr\x18\x01 \x01(\t\x12\x0f\n\x07gateway\x18\x02 \x01(\t\x12\x10\n\x08masterip\x18\x03 \x01(\t\x12\x0e\n\x06\x62rname\x18\x04 \x01(\t\"t\n\x05Image\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1e\n\x04type\x18\x02 \x01(\x0e\x32\x10.Image.ImageType\x12\r\n\x05owner\x18\x03 \x01(\t\".\n\tImageType\x12\x08\n\x04\x42\x41SE\x10\x00\x12\n\n\x06PUBLIC\x10\x01\x12\x0b\n\x07PRIVATE\x10\x02\"u\n\x05Mount\x12\x10\n\x08provider\x18\x01 \x01(\t\x12\x11\n\tlocalPath\x18\x02 \x01(\t\x12\x12\n\nremotePath\x18\x03 \x01(\t\x12\x11\n\taccessKey\x18\x04 \x01(\t\x12\x11\n\tsecretKey\x18\x05 \x01(\t\x12\r\n\x05other\x18\x06 \x01(\t\"B\n\x08Instance\x12\x0b\n\x03\x63pu\x18\x01 \x01(\x05\x12\x0e\n\x06memory\x18\x02 \x01(\x05\x12\x0c\n\x04\x64isk\x18\x03 \x01(\x05\x12\x0b\n\x03gpu\x18\x04 \x01(\x05*[\n\x06Status\x12\x0b\n\x07WAITING\x10\x00\x12\x0b\n\x07RUNNING\x10\x01\x12\r\n\tCOMPLETED\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\x0b\n\x07TIMEOUT\x10\x04\x12\x0f\n\x0bOUTPUTERROR\x10\x05\x32(\n\x06Master\x12\x1e\n\x06report\x12\n.ReportMsg\x1a\x06.Reply\"\x00\x32\x96\x01\n\x06Worker\x12#\n\x0bstart_vnode\x12\n.VNodeInfo\x1a\x06.Reply\"\x00\x12!\n\nstart_task\x12\t.TaskInfo\x1a\x06.Reply\"\x00\x12 \n\tstop_task\x12\t.TaskInfo\x1a\x06.Reply\"\x00\x12\"\n\nstop_vnode\x12\n.VNodeInfo\x1a\x06.Reply\"\x00\x62\x06proto3')
|
||||||
)
|
)
|
||||||
|
|
||||||
_STATUS = _descriptor.EnumDescriptor(
|
_STATUS = _descriptor.EnumDescriptor(
|
||||||
|
@ -924,7 +924,7 @@ _WORKER = _descriptor.ServiceDescriptor(
|
||||||
index=1,
|
index=1,
|
||||||
options=None,
|
options=None,
|
||||||
serialized_start=1367,
|
serialized_start=1367,
|
||||||
serialized_end=1518,
|
serialized_end=1517,
|
||||||
methods=[
|
methods=[
|
||||||
_descriptor.MethodDescriptor(
|
_descriptor.MethodDescriptor(
|
||||||
name='start_vnode',
|
name='start_vnode',
|
||||||
|
@ -949,7 +949,7 @@ _WORKER = _descriptor.ServiceDescriptor(
|
||||||
full_name='Worker.stop_task',
|
full_name='Worker.stop_task',
|
||||||
index=2,
|
index=2,
|
||||||
containing_service=None,
|
containing_service=None,
|
||||||
input_type=_REPORTMSG,
|
input_type=_TASKINFO,
|
||||||
output_type=_REPLY,
|
output_type=_REPLY,
|
||||||
options=None,
|
options=None,
|
||||||
),
|
),
|
||||||
|
|
|
@ -68,7 +68,7 @@ class WorkerStub(object):
|
||||||
)
|
)
|
||||||
self.stop_task = channel.unary_unary(
|
self.stop_task = channel.unary_unary(
|
||||||
'/Worker/stop_task',
|
'/Worker/stop_task',
|
||||||
request_serializer=rpc__pb2.ReportMsg.SerializeToString,
|
request_serializer=rpc__pb2.TaskInfo.SerializeToString,
|
||||||
response_deserializer=rpc__pb2.Reply.FromString,
|
response_deserializer=rpc__pb2.Reply.FromString,
|
||||||
)
|
)
|
||||||
self.stop_vnode = channel.unary_unary(
|
self.stop_vnode = channel.unary_unary(
|
||||||
|
@ -125,7 +125,7 @@ def add_WorkerServicer_to_server(servicer, server):
|
||||||
),
|
),
|
||||||
'stop_task': grpc.unary_unary_rpc_method_handler(
|
'stop_task': grpc.unary_unary_rpc_method_handler(
|
||||||
servicer.stop_task,
|
servicer.stop_task,
|
||||||
request_deserializer=rpc__pb2.ReportMsg.FromString,
|
request_deserializer=rpc__pb2.TaskInfo.FromString,
|
||||||
response_serializer=rpc__pb2.Reply.SerializeToString,
|
response_serializer=rpc__pb2.Reply.SerializeToString,
|
||||||
),
|
),
|
||||||
'stop_vnode': grpc.unary_unary_rpc_method_handler(
|
'stop_vnode': grpc.unary_unary_rpc_method_handler(
|
||||||
|
|
|
@ -85,10 +85,10 @@ def getenv(key):
|
||||||
return os.environ.get("BATCH_MASTER_PORT","50050")
|
return os.environ.get("BATCH_MASTER_PORT","50050")
|
||||||
elif key == "BATCH_WORKER_PORT":
|
elif key == "BATCH_WORKER_PORT":
|
||||||
return os.environ.get("BATCH_WORKER_PORT","50051")
|
return os.environ.get("BATCH_WORKER_PORT","50051")
|
||||||
elif key == "BATCH_GATEWAY":
|
elif key == "BATCH_TASK_CIDR":
|
||||||
return os.environ.get("BATCH_GATEWAY","10.0.3.1")
|
return os.environ.get("BATCH_TASK_CIDR","4")
|
||||||
elif key == "BATCH_NET":
|
elif key == "BATCH_NET":
|
||||||
return os.environ.get("BATCH_NET","10.0.3.0/24")
|
return os.environ.get("BATCH_NET","10.16.0.0/16")
|
||||||
elif key == "BATCH_MAX_THREAD_WORKER":
|
elif key == "BATCH_MAX_THREAD_WORKER":
|
||||||
return os.environ.get("BATCH_MAX_THREAD_WORKER","5")
|
return os.environ.get("BATCH_MAX_THREAD_WORKER","5")
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -211,7 +211,7 @@ class ovscontrol(object):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_port_gre(bridge, port, remote):
|
def add_port_gre(bridge, port, remote):
|
||||||
try:
|
try:
|
||||||
subprocess.run(['ovs-vsctl', 'add-port', str(bridge), str(port), '--', 'set', 'interface', str(port), 'type=gre', 'options:remote_ip='+str(remote)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True)
|
subprocess.run(['ovs-vsctl', '--may-exist', 'add-port', str(bridge), str(port), '--', 'set', 'interface', str(port), 'type=gre', 'options:remote_ip='+str(remote)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True)
|
||||||
return [True, str(port)]
|
return [True, str(port)]
|
||||||
except subprocess.CalledProcessError as suberror:
|
except subprocess.CalledProcessError as suberror:
|
||||||
return [False, "add port failed : %s" % suberror.stdout.decode('utf-8')]
|
return [False, "add port failed : %s" % suberror.stdout.decode('utf-8')]
|
||||||
|
|
|
@ -228,10 +228,12 @@ class TaskWorker(rpc_pb2_grpc.WorkerServicer):
|
||||||
return rpc_pb2.Reply(status=rpc_pb2.Reply.ACCEPTED,message="")
|
return rpc_pb2.Reply(status=rpc_pb2.Reply.ACCEPTED,message="")
|
||||||
|
|
||||||
def stop_task(self, request, context):
|
def stop_task(self, request, context):
|
||||||
for msg in request.taskmsgs:
|
taskid = request.taskid
|
||||||
lxcname = '%s-batch-%s-%s-%s' % (msg.username,msg.taskid,str(msg.vnodeid),msg.token)
|
username = request.username
|
||||||
logger.info("Stop the task with lxc:"+lxcname)
|
vnodeid = request.vnodeid
|
||||||
subprocess.run("lxc-stop -k -n %s" % lxcname, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
|
lxcname = '%s-batch-%s-%s' % (username,taskid,str(vnodeid))
|
||||||
|
logger.info("Stop the task with lxc:"+lxcname)
|
||||||
|
subprocess.run("lxc-stop -k -n %s" % lxcname, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
|
||||||
return rpc_pb2.Reply(status=rpc_pb2.Reply.ACCEPTED,message="")
|
return rpc_pb2.Reply(status=rpc_pb2.Reply.ACCEPTED,message="")
|
||||||
|
|
||||||
# stop and remove container
|
# stop and remove container
|
||||||
|
|
Loading…
Reference in New Issue