From 7736f219db142bfd4b52026d53ada27c675996ef Mon Sep 17 00:00:00 2001 From: Firmlyzhu Date: Sat, 20 Apr 2019 18:18:00 +0800 Subject: [PATCH 1/3] add migrate container in vclustermgr --- src/master/vclustermgr.py | 53 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/master/vclustermgr.py b/src/master/vclustermgr.py index caef4ad..63fed07 100755 --- a/src/master/vclustermgr.py +++ b/src/master/vclustermgr.py @@ -728,6 +728,59 @@ class VclusterMgr(object): full_clusters.append(single_cluster)''' return [True, clusters] + def migrate_container(self, clustername, username, containername, new_host, proxy_public_ip, user_info): + [status, info] = self.get_clusterinfo(clustername, username) + if not status: + return [False, "cluster not found"] + if info['status'] != 'stopped': + return [False, 'cluster is not stopped'] + + con_db = Container.query.get(containername) + if con_db is None: + return [False, 'Container not found'] + if host == new_host: + return [False, 'Container has been on the new host'] + + oldworker = self.nodemgr.ip_to_rpc(con_db.host) + if oldworker is None: + return [False, "Old host worker can't be found or has been stopped."] + oldworker.stop_container(containername) + imagename = "migrate-" + containername + "-" + datetime.datetime.now().strftime("%Y-%m-%d") + status,msg = oldworker.create_image(username,imagename,containername,"",10000) + if not status: + return [False, msg] + #con_db.lastsave = datetime.datetime.now() + #con_db.image = imagename + + '''self.networkmgr.load_usrgw(username) + proxy_server_ip = self.networkmgr.usrgws[username] + [status, proxy_public_ip] = self.etcd.getkey("machines/publicIP/"+proxy_server_ip) + if not status: + logger.error("Fail to get proxy_public_ip %s."%(proxy_server_ip)) + return [False, "Fail to get proxy server public IP."]''' + uid = json.loads(user_info)["data"]["id"] + setting = { + 'cpu': con_db.setting_cpu, + 'memory': con_db.setting_mem, + 'disk': con_db.setting_disk + } + _, clusterid, cid = containername.split('-') + hostname = "host-"+str(cid) + gateway = self.networkmgr.get_usergw(username) + image = {'name':imagename,'type':'private','owner':username } + + worker = self.nodemgr.ip_to_rpc(new_host) + if worker is None: + self.imagemgr.removeImage(username,imagename) + return [False, "New host worker can't be found or has been stopped."] + status,msg = worker.create_container(containername, proxy_public_ip, username, uid, json.dumps(setting), + clustername, str(clusterid), str(cid), hostname, con_db.ip, gateway, json.dumps(image)) + if not status: + return [False, msg] + oldworker.delete_container(containername) + self.imagemgr.removeImage(username,imagename) + return [True,""] + def is_cluster(self, clustername, username): [status, clusters] = self.list_clusters(username) if clustername in clusters: From e9dc2b0fdce5933ddc19983fceec19e5bd29fb01 Mon Sep 17 00:00:00 2001 From: Firmlyzhu Date: Sun, 21 Apr 2019 19:53:41 +0800 Subject: [PATCH 2/3] Add migrate cluster --- src/master/httprest.py | 38 +++++++++++++++++++++++++++++++++++ src/master/userManager.py | 2 ++ src/master/vclustermgr.py | 42 +++++++++++++++++++++++++++++++++++---- 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/src/master/httprest.py b/src/master/httprest.py index fb3b059..0aa18f7 100755 --- a/src/master/httprest.py +++ b/src/master/httprest.py @@ -389,6 +389,44 @@ def save_cluster(user, beans, form): finally: G_ulockmgr.release(user) +@app.route("/admin/migrate_cluster/", methods=['POST']) +@auth_key_required +def migrate_cluster(): + global G_vclustermgr + global G_ulockmgr + user = request.form.get('username',None) + if user is None: + return json.dumps({'success':'false', 'message':'User is required!'}) + clustername = form.get('clustername', None) + if (clustername == None): + return json.dumps({'success':'false', 'message':'clustername is null'}) + containername = form.get('containername', None) + new_hosts = form.get('new_hosts', None) + if (new_hosts == None): + return json.dumps({'success':'false', 'message':'new_hosts is null'}) + new_host_list = new_hosts.split(',') + G_ulockmgr.acquire(user) + auth_key = env.getenv('AUTH_KEY') + try: + logger.info ("handle request : migrate cluster to %s. user:%s clustername:%s" % (str(new_hosts), user, clustername)) + res = post_to_user("/master/user/groupinfo/", {'auth_key':auth_key}) + groups = json.loads(res['groups']) + quotas = {} + rc_info = post_to_user("/master/user/recoverinfo/", {'username':user,'auth_key':auth_key}) + groupname = re_info['groupname'] + user_info = {"data":{"id":rc_info['uid'],"groupinfo":quotas[groupname]}} + + [status,msg] = G_vclustermgr.migrate_cluster(clustername, username, new_host_list, user_info) + if not status: + logger.error(msg) + return json.dumps({'success':'false', 'message': msg}) + return json.dumps({'success':'true', 'action':'migrate_container'}) + except Exception as ex: + logger.error(str(ex)) + return json.dumps({'success':'false', 'message': str(ex)}) + finally: + G_ulockmgr.release(user) + @app.route("/image/list/", methods=['POST']) @login_required diff --git a/src/master/userManager.py b/src/master/userManager.py index 4f7d5f1..0f5d5b7 100755 --- a/src/master/userManager.py +++ b/src/master/userManager.py @@ -411,6 +411,7 @@ class userManager: "success":'true', "data":{ "username" : user.username, + "id": user.id, "password" : user.password, "avatar" : user.avatar, "nickname" : user.nickname, @@ -440,6 +441,7 @@ class userManager: "success": 'true', "data":{ "username" : user.username, + "id": user.id, "password" : user.password, "avatar" : user.avatar, "nickname" : user.nickname, diff --git a/src/master/vclustermgr.py b/src/master/vclustermgr.py index 63fed07..0b8b69f 100755 --- a/src/master/vclustermgr.py +++ b/src/master/vclustermgr.py @@ -728,7 +728,7 @@ class VclusterMgr(object): full_clusters.append(single_cluster)''' return [True, clusters] - def migrate_container(self, clustername, username, containername, new_host, proxy_public_ip, user_info): + def migrate_container(self, clustername, username, containername, new_host, user_info): [status, info] = self.get_clusterinfo(clustername, username) if not status: return [False, "cluster not found"] @@ -746,19 +746,21 @@ class VclusterMgr(object): return [False, "Old host worker can't be found or has been stopped."] oldworker.stop_container(containername) imagename = "migrate-" + containername + "-" + datetime.datetime.now().strftime("%Y-%m-%d") + logger.info("Save Image for container:%s imagename:%s host:%s"%(containername, imagename, con_db.host)) status,msg = oldworker.create_image(username,imagename,containername,"",10000) if not status: return [False, msg] #con_db.lastsave = datetime.datetime.now() #con_db.image = imagename - '''self.networkmgr.load_usrgw(username) + self.networkmgr.load_usrgw(username) proxy_server_ip = self.networkmgr.usrgws[username] [status, proxy_public_ip] = self.etcd.getkey("machines/publicIP/"+proxy_server_ip) if not status: + self.imagemgr.removeImage(username,imagename) logger.error("Fail to get proxy_public_ip %s."%(proxy_server_ip)) - return [False, "Fail to get proxy server public IP."]''' - uid = json.loads(user_info)["data"]["id"] + return [False, "Fail to get proxy server public IP."] + uid = user_info['data']['id'] setting = { 'cpu': con_db.setting_cpu, 'memory': con_db.setting_mem, @@ -768,6 +770,9 @@ class VclusterMgr(object): hostname = "host-"+str(cid) gateway = self.networkmgr.get_usergw(username) image = {'name':imagename,'type':'private','owner':username } + logger.info("Migrate: proxy_ip:%s uid:%s setting:%s clusterid:%s cid:%s hostname:%s gateway:%s image:%s" + %(proxy_public_ip, str(uid), str(setting), clusterid, cid, hostname, gateway, str(image)) + logger.info("Migrate: create container(%s) on new host %s"%(containername, new_host)) worker = self.nodemgr.ip_to_rpc(new_host) if worker is None: @@ -776,11 +781,40 @@ class VclusterMgr(object): status,msg = worker.create_container(containername, proxy_public_ip, username, uid, json.dumps(setting), clustername, str(clusterid), str(cid), hostname, con_db.ip, gateway, json.dumps(image)) if not status: + self.imagemgr.removeImage(username,imagename) return [False, msg] + con_db.host = new_host + db.session.commit() oldworker.delete_container(containername) self.imagemgr.removeImage(username,imagename) return [True,""] + def migrate_cluster(self, clustername, username, new_host_list, user_info): + [status, info] = self.get_clusterinfo(clustername, username) + if not status: + return [False, "cluster not found"] + prestatus = info['status'] + self.stop_cluster(clustername, username) + for container in info['containers']: + if container['host'] in new_host_list: + continue + random.shuffle(new_host_list) + for new_host in new_host_list: + status,msg = self.migrate_container(clustername,username,container['containername'],new_host,user_info) + if status: + break + else: + logger.error(msg) + else: + if prestatus == 'running': + self.start_cluster(clustername, username, user_info) + return [False, msg] + if prestatus == 'running': + status, msg = self.start_cluster(clustername, username, user_info) + if not status: + return [False, msg] + return [True, ""] + def is_cluster(self, clustername, username): [status, clusters] = self.list_clusters(username) if clustername in clusters: From aedee43caf90e5d02c1b8bb202a2ce11d1b718db Mon Sep 17 00:00:00 2001 From: zhuyj17 Date: Mon, 22 Apr 2019 00:48:08 +0800 Subject: [PATCH 3/3] debug migration --- src/master/httprest.py | 17 +++++++++++------ src/master/vclustermgr.py | 23 ++++++++++++----------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/master/httprest.py b/src/master/httprest.py index 0aa18f7..620652d 100755 --- a/src/master/httprest.py +++ b/src/master/httprest.py @@ -397,11 +397,10 @@ def migrate_cluster(): user = request.form.get('username',None) if user is None: return json.dumps({'success':'false', 'message':'User is required!'}) - clustername = form.get('clustername', None) + clustername = request.form.get('clustername', None) if (clustername == None): return json.dumps({'success':'false', 'message':'clustername is null'}) - containername = form.get('containername', None) - new_hosts = form.get('new_hosts', None) + new_hosts = request.form.get('new_hosts', None) if (new_hosts == None): return json.dumps({'success':'false', 'message':'new_hosts is null'}) new_host_list = new_hosts.split(',') @@ -412,17 +411,23 @@ def migrate_cluster(): res = post_to_user("/master/user/groupinfo/", {'auth_key':auth_key}) groups = json.loads(res['groups']) quotas = {} + for group in groups: + #logger.info(group) + quotas[group['name']] = group['quotas'] rc_info = post_to_user("/master/user/recoverinfo/", {'username':user,'auth_key':auth_key}) - groupname = re_info['groupname'] + groupname = rc_info['groupname'] user_info = {"data":{"id":rc_info['uid'],"groupinfo":quotas[groupname]}} - [status,msg] = G_vclustermgr.migrate_cluster(clustername, username, new_host_list, user_info) + logger.info("Migrate cluster for user(%s) cluster(%s) to new_hosts(%s). user_info(%s)" + %(clustername, user, str(new_host_list), user_info)) + + [status,msg] = G_vclustermgr.migrate_cluster(clustername, user, new_host_list, user_info) if not status: logger.error(msg) return json.dumps({'success':'false', 'message': msg}) return json.dumps({'success':'true', 'action':'migrate_container'}) except Exception as ex: - logger.error(str(ex)) + logger.error(traceback.format_exc()) return json.dumps({'success':'false', 'message': str(ex)}) finally: G_ulockmgr.release(user) diff --git a/src/master/vclustermgr.py b/src/master/vclustermgr.py index 0b8b69f..fb3cff8 100755 --- a/src/master/vclustermgr.py +++ b/src/master/vclustermgr.py @@ -738,7 +738,7 @@ class VclusterMgr(object): con_db = Container.query.get(containername) if con_db is None: return [False, 'Container not found'] - if host == new_host: + if con_db.host == new_host: return [False, 'Container has been on the new host'] oldworker = self.nodemgr.ip_to_rpc(con_db.host) @@ -757,7 +757,7 @@ class VclusterMgr(object): proxy_server_ip = self.networkmgr.usrgws[username] [status, proxy_public_ip] = self.etcd.getkey("machines/publicIP/"+proxy_server_ip) if not status: - self.imagemgr.removeImage(username,imagename) + self.imgmgr.removeImage(username,imagename) logger.error("Fail to get proxy_public_ip %s."%(proxy_server_ip)) return [False, "Fail to get proxy server public IP."] uid = user_info['data']['id'] @@ -771,22 +771,22 @@ class VclusterMgr(object): gateway = self.networkmgr.get_usergw(username) image = {'name':imagename,'type':'private','owner':username } logger.info("Migrate: proxy_ip:%s uid:%s setting:%s clusterid:%s cid:%s hostname:%s gateway:%s image:%s" - %(proxy_public_ip, str(uid), str(setting), clusterid, cid, hostname, gateway, str(image)) + %(proxy_public_ip, str(uid), str(setting), clusterid, cid, hostname, gateway, str(image))) logger.info("Migrate: create container(%s) on new host %s"%(containername, new_host)) worker = self.nodemgr.ip_to_rpc(new_host) if worker is None: - self.imagemgr.removeImage(username,imagename) + self.imgmgr.removeImage(username,imagename) return [False, "New host worker can't be found or has been stopped."] status,msg = worker.create_container(containername, proxy_public_ip, username, uid, json.dumps(setting), clustername, str(clusterid), str(cid), hostname, con_db.ip, gateway, json.dumps(image)) if not status: - self.imagemgr.removeImage(username,imagename) + self.imgmgr.removeImage(username,imagename) return [False, msg] con_db.host = new_host db.session.commit() oldworker.delete_container(containername) - self.imagemgr.removeImage(username,imagename) + self.imgmgr.removeImage(username,imagename) return [True,""] def migrate_cluster(self, clustername, username, new_host_list, user_info): @@ -809,11 +809,12 @@ class VclusterMgr(object): if prestatus == 'running': self.start_cluster(clustername, username, user_info) return [False, msg] - if prestatus == 'running': - status, msg = self.start_cluster(clustername, username, user_info) - if not status: - return [False, msg] - return [True, ""] + logger.info("[Migrate] prestatus:%s for cluster(%s) user(%s)"%(prestatus, clustername, username)) + if prestatus == 'running': + status, msg = self.start_cluster(clustername, username, user_info) + if not status: + return [False, msg] + return [True, ""] def is_cluster(self, clustername, username): [status, clusters] = self.list_clusters(username)