From f25b13b6e5927da3b7e0546986a8d3f9a9fefe1b Mon Sep 17 00:00:00 2001 From: Jonathon Jongsma Date: Tue, 2 Feb 2021 16:21:21 -0600 Subject: [PATCH] nodedev: fix hang when destroying an mdev in use Calling `mdevctl stop` for a mediated device that is in use by an active domain will block until that vm exits (or the vm closes the device). Since the nodedev driver cannot query the hypervisor driver to see whether any active domains are using the device, we resort to a workaround that relies on the fact that a vfio group can only be opened by one user at a time. If we get an EBUSY error when attempting to open the group file, we assume the device is in use and refuse to try to destroy that device. Signed-off-by: Jonathon Jongsma Reviewed-by: Erik Skultety --- src/node_device/node_device_driver.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/node_device/node_device_driver.c b/src/node_device/node_device_driver.c index 3c79c3aa42..01e189b73a 100644 --- a/src/node_device/node_device_driver.c +++ b/src/node_device/node_device_driver.c @@ -1181,8 +1181,27 @@ nodeDeviceDestroy(virNodeDevicePtr device) ret = 0; } else if (nodeDeviceHasCapability(def, VIR_NODE_DEV_CAP_MDEV)) { + /* If this mediated device is in use by a vm, attempting to stop it + * will block until the vm closes the device. The nodedev driver + * cannot query the hypervisor driver to determine whether the device + * is in use by any active domains, since that would introduce circular + * dependencies between daemons and add a risk of deadlocks. So we need + * to resort to a workaround. vfio only allows the group for a device + * to be opened by one user at a time. So if we get EBUSY when opening + * the group, we infer that the device is in use and therefore we + * shouldn't try to remove the device. */ + g_autofree char *vfiogroup = + virMediatedDeviceGetIOMMUGroupDev(def->caps->data.mdev.uuid); + VIR_AUTOCLOSE fd = open(vfiogroup, O_RDONLY); g_autofree char *errmsg = NULL; + if (fd < 0 && errno == EBUSY) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to destroy '%s': device in use"), + def->name); + goto cleanup; + } + if (virMdevctlStop(def, &errmsg) < 0) { if (errmsg) virReportError(VIR_ERR_INTERNAL_ERROR,