From 1fdc53c385762e5c8188ef7562fe51af0a83cecf Mon Sep 17 00:00:00 2001 From: Jiri Denemark Date: Mon, 19 Mar 2012 16:48:43 +0100 Subject: [PATCH] qemu: Avoid dangling migration-out job when client dies When a client which started non-p2p migration dies in a bad time, the source libvirtd never clears the migration job and almost nothing can be done with the domain without restarting the daemon. This patch makes use of connection close callbacks and ensures that migration job is properly discarded when the client disconnects. --- src/qemu/qemu_driver.c | 4 +++ src/qemu/qemu_migration.c | 66 +++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_migration.h | 4 +++ 3 files changed, 74 insertions(+) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 4c272251cf..b00257ba8a 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8838,6 +8838,9 @@ qemuDomainMigrateBegin3(virDomainPtr domain, * This prevents any other APIs being invoked while migration is taking * place. */ + if (qemuDriverCloseCallbackSet(driver, vm, domain->conn, + qemuMigrationCleanup) < 0) + goto endjob; if (qemuMigrationJobContinue(vm) == 0) { vm = NULL; qemuReportError(VIR_ERR_OPERATION_FAILED, @@ -9069,6 +9072,7 @@ qemuDomainMigrateConfirm3(virDomainPtr domain, phase = QEMU_MIGRATION_PHASE_CONFIRM3; qemuMigrationJobStartPhase(driver, vm, phase); + qemuDriverCloseCallbackUnset(driver, vm, qemuMigrationCleanup); ret = qemuMigrationConfirm(driver, domain->conn, vm, cookiein, cookieinlen, diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 69d4f0c520..12ce10c86f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1037,6 +1037,67 @@ qemuDomainMigrateGraphicsRelocate(struct qemud_driver *driver, } +/* This is called for outgoing non-p2p migrations when a connection to the + * client which initiated the migration was closed but we were waiting for it + * to follow up with the next phase, that is, in between + * qemuDomainMigrateBegin3 and qemuDomainMigratePerform3 or + * qemuDomainMigratePerform3 and qemuDomainMigrateConfirm3. + */ +virDomainObjPtr +qemuMigrationCleanup(struct qemud_driver *driver, + virDomainObjPtr vm, + virConnectPtr conn) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + VIR_DEBUG("vm=%s, conn=%p, asyncJob=%s, phase=%s", + vm->def->name, conn, + qemuDomainAsyncJobTypeToString(priv->job.asyncJob), + qemuDomainAsyncJobPhaseToString(priv->job.asyncJob, + priv->job.phase)); + + if (!qemuMigrationJobIsActive(vm, QEMU_ASYNC_JOB_MIGRATION_OUT)) + goto cleanup; + + VIR_DEBUG("The connection which started outgoing migration of domain %s" + " was closed; canceling the migration", + vm->def->name); + + switch ((enum qemuMigrationJobPhase) priv->job.phase) { + case QEMU_MIGRATION_PHASE_BEGIN3: + /* just forget we were about to migrate */ + qemuDomainObjDiscardAsyncJob(driver, vm); + break; + + case QEMU_MIGRATION_PHASE_PERFORM3_DONE: + VIR_WARN("Migration of domain %s finished but we don't know if the" + " domain was successfully started on destination or not", + vm->def->name); + /* clear the job and let higher levels decide what to do */ + qemuDomainObjDiscardAsyncJob(driver, vm); + break; + + case QEMU_MIGRATION_PHASE_PERFORM3: + /* cannot be seen without an active migration API; unreachable */ + case QEMU_MIGRATION_PHASE_CONFIRM3: + case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED: + /* all done; unreachable */ + case QEMU_MIGRATION_PHASE_PREPARE: + case QEMU_MIGRATION_PHASE_FINISH2: + case QEMU_MIGRATION_PHASE_FINISH3: + /* incoming migration; unreachable */ + case QEMU_MIGRATION_PHASE_PERFORM2: + /* single phase outgoing migration; unreachable */ + case QEMU_MIGRATION_PHASE_NONE: + case QEMU_MIGRATION_PHASE_LAST: + /* unreachable */ + ; + } + +cleanup: + return vm; +} + /* The caller is supposed to lock the vm and start a migration job. */ char *qemuMigrationBegin(struct qemud_driver *driver, virDomainObjPtr vm, @@ -2547,6 +2608,7 @@ qemuMigrationPerformPhase(struct qemud_driver *driver, } qemuMigrationJobStartPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3); + qemuDriverCloseCallbackUnset(driver, vm, qemuMigrationCleanup); resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING; ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen, @@ -2577,6 +2639,10 @@ qemuMigrationPerformPhase(struct qemud_driver *driver, qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE); + if (qemuDriverCloseCallbackSet(driver, vm, conn, + qemuMigrationCleanup) < 0) + goto endjob; + endjob: if (ret < 0) refs = qemuMigrationJobFinish(driver, vm); diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 41e4eac9dd..5fab0ca88f 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -77,6 +77,10 @@ int qemuMigrationJobFinish(struct qemud_driver *driver, virDomainObjPtr obj) int qemuMigrationSetOffline(struct qemud_driver *driver, virDomainObjPtr vm); +virDomainObjPtr qemuMigrationCleanup(struct qemud_driver *driver, + virDomainObjPtr vm, + virConnectPtr conn); + char *qemuMigrationBegin(struct qemud_driver *driver, virDomainObjPtr vm, const char *xmlin,