mirror of https://gitee.com/openkylin/libvirt.git
qemu: Recover from interrupted jobs
Detect and react on situations when libvirtd was restarted or killed when a job was active.
This commit is contained in:
parent
ff340a84b8
commit
e6704af1fc
|
@ -142,6 +142,20 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
|
|||
memset(&job->signalsData, 0, sizeof(job->signalsData));
|
||||
}
|
||||
|
||||
void
|
||||
qemuDomainObjRestoreJob(virDomainObjPtr obj,
|
||||
struct qemuDomainJobObj *job)
|
||||
{
|
||||
qemuDomainObjPrivatePtr priv = obj->privateData;
|
||||
|
||||
memset(job, 0, sizeof(*job));
|
||||
job->active = priv->job.active;
|
||||
job->asyncJob = priv->job.asyncJob;
|
||||
|
||||
qemuDomainObjResetJob(priv);
|
||||
qemuDomainObjResetAsyncJob(priv);
|
||||
}
|
||||
|
||||
static void
|
||||
qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv)
|
||||
{
|
||||
|
|
|
@ -184,6 +184,8 @@ void qemuDomainObjEndNestedJob(struct qemud_driver *driver,
|
|||
void qemuDomainObjSaveJob(struct qemud_driver *driver, virDomainObjPtr obj);
|
||||
void qemuDomainObjSetAsyncJobMask(virDomainObjPtr obj,
|
||||
unsigned long long allowedJobs);
|
||||
void qemuDomainObjRestoreJob(virDomainObjPtr obj,
|
||||
struct qemuDomainJobObj *job);
|
||||
void qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver,
|
||||
virDomainObjPtr obj);
|
||||
|
||||
|
|
|
@ -2231,6 +2231,80 @@ qemuProcessUpdateState(struct qemud_driver *driver, virDomainObjPtr vm)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
qemuProcessRecoverJob(struct qemud_driver *driver,
|
||||
virDomainObjPtr vm,
|
||||
virConnectPtr conn,
|
||||
const struct qemuDomainJobObj *job)
|
||||
{
|
||||
virDomainState state;
|
||||
int reason;
|
||||
|
||||
state = virDomainObjGetState(vm, &reason);
|
||||
|
||||
switch (job->asyncJob) {
|
||||
case QEMU_ASYNC_JOB_MIGRATION_OUT:
|
||||
case QEMU_ASYNC_JOB_MIGRATION_IN:
|
||||
/* we don't know what to do yet */
|
||||
break;
|
||||
|
||||
case QEMU_ASYNC_JOB_SAVE:
|
||||
case QEMU_ASYNC_JOB_DUMP:
|
||||
/* TODO cancel possibly running migrate operation */
|
||||
/* resume the domain but only if it was paused as a result of
|
||||
* running save/dump operation */
|
||||
if (state == VIR_DOMAIN_PAUSED &&
|
||||
((job->asyncJob == QEMU_ASYNC_JOB_DUMP &&
|
||||
reason == VIR_DOMAIN_PAUSED_DUMP) ||
|
||||
(job->asyncJob == QEMU_ASYNC_JOB_SAVE &&
|
||||
reason == VIR_DOMAIN_PAUSED_SAVE) ||
|
||||
reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
|
||||
if (qemuProcessStartCPUs(driver, vm, conn,
|
||||
VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
|
||||
VIR_WARN("Could not resume domain %s after", vm->def->name);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case QEMU_ASYNC_JOB_NONE:
|
||||
case QEMU_ASYNC_JOB_LAST:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!virDomainObjIsActive(vm))
|
||||
return -1;
|
||||
|
||||
switch (job->active) {
|
||||
case QEMU_JOB_QUERY:
|
||||
/* harmless */
|
||||
break;
|
||||
|
||||
case QEMU_JOB_DESTROY:
|
||||
VIR_DEBUG("Domain %s should have already been destroyed",
|
||||
vm->def->name);
|
||||
return -1;
|
||||
|
||||
case QEMU_JOB_SUSPEND:
|
||||
/* mostly harmless */
|
||||
break;
|
||||
|
||||
case QEMU_JOB_MODIFY:
|
||||
/* XXX depending on the command we may be in an inconsistent state and
|
||||
* we should probably fall back to "monitor error" state and refuse to
|
||||
*/
|
||||
break;
|
||||
|
||||
case QEMU_JOB_ASYNC:
|
||||
case QEMU_JOB_ASYNC_NESTED:
|
||||
/* async job was already handled above */
|
||||
case QEMU_JOB_NONE:
|
||||
case QEMU_JOB_LAST:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct qemuProcessReconnectData {
|
||||
virConnectPtr conn;
|
||||
struct qemud_driver *driver;
|
||||
|
@ -2247,9 +2321,12 @@ qemuProcessReconnect(void *payload, const void *name ATTRIBUTE_UNUSED, void *opa
|
|||
struct qemud_driver *driver = data->driver;
|
||||
qemuDomainObjPrivatePtr priv;
|
||||
virConnectPtr conn = data->conn;
|
||||
struct qemuDomainJobObj oldjob;
|
||||
|
||||
virDomainObjLock(obj);
|
||||
|
||||
qemuDomainObjRestoreJob(obj, &oldjob);
|
||||
|
||||
VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name);
|
||||
|
||||
priv = obj->privateData;
|
||||
|
@ -2295,6 +2372,9 @@ qemuProcessReconnect(void *payload, const void *name ATTRIBUTE_UNUSED, void *opa
|
|||
if (qemuProcessFiltersInstantiate(conn, obj->def))
|
||||
goto error;
|
||||
|
||||
if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0)
|
||||
goto error;
|
||||
|
||||
priv->job.active = QEMU_JOB_NONE;
|
||||
|
||||
/* update domain state XML with possibly updated state in virDomainObj */
|
||||
|
|
Loading…
Reference in New Issue