From 6e13860cb46e74c94ee67477942693d8757cf5de Mon Sep 17 00:00:00 2001 From: Laine Stump Date: Mon, 29 Apr 2013 13:15:26 -0400 Subject: [PATCH] qemu: add vfio devices to cgroup ACL when appropriate PCIO device assignment using VFIO requires read/write access by the qemu process to /dev/vfio/vfio, and /dev/vfio/nn, where "nn" is the VFIO group number that the assigned device belongs to (and can be found with the function virPCIDeviceGetVFIOGroupDev) /dev/vfio/vfio can be accessible to any guest without danger (according to vfio developers), so it is added to the static ACL. The group device must be dynamically added to the cgroup ACL for each vfio hostdev in two places: 1) for any devices in the persistent config when the domain is started (done during qemuSetupCgroup()) 2) at device attach time for any hotplug devices (done in qemuDomainAttachHostDevice) The group device must be removed from the ACL when a device it "hot-unplugged" (in qemuDomainDetachHostDevice()) Note that USB devices are already doing their own cgroup setup and teardown in the hostdev-usb specific function. I chose to make the new functions generic and call them in a common location though. We can then move the USB-specific code (which is duplicated in two locations) to this single location. I'll be posting a followup patch to do that. --- src/qemu/qemu.conf | 2 +- src/qemu/qemu_cgroup.c | 133 ++++++++++++++++++++++++++++- src/qemu/qemu_cgroup.h | 8 +- src/qemu/qemu_hotplug.c | 12 ++- src/qemu/test_libvirtd_qemu.aug.in | 1 + 5 files changed, 152 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index 87bdf70d8d..0f0a24c20e 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -241,7 +241,7 @@ # "/dev/null", "/dev/full", "/dev/zero", # "/dev/random", "/dev/urandom", # "/dev/ptmx", "/dev/kvm", "/dev/kqemu", -# "/dev/rtc","/dev/hpet" +# "/dev/rtc","/dev/hpet", "/dev/vfio/vfio" #] diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 891984a5b1..92c53d9e1c 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -39,7 +39,7 @@ static const char *const defaultDeviceACL[] = { "/dev/null", "/dev/full", "/dev/zero", "/dev/random", "/dev/urandom", "/dev/ptmx", "/dev/kvm", "/dev/kqemu", - "/dev/rtc", "/dev/hpet", + "/dev/rtc", "/dev/hpet", "/dev/vfio/vfio", NULL, }; #define DEVICE_PTY_MAJOR 136 @@ -214,6 +214,131 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED, } +int +qemuSetupHostdevCGroup(virDomainObjPtr vm, + virDomainHostdevDefPtr dev) +{ + int ret = -1; + qemuDomainObjPrivatePtr priv = vm->privateData; + virPCIDevicePtr pci = NULL; + char *path = NULL; + + /* currently this only does something for PCI devices using vfio + * for device assignment, but it is called for *all* hostdev + * devices. + */ + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) { + + switch (dev->source.subsys.type) { + case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI: + if (dev->source.subsys.u.pci.backend + != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { + int rc; + + pci = virPCIDeviceNew(dev->source.subsys.u.pci.addr.domain, + dev->source.subsys.u.pci.addr.bus, + dev->source.subsys.u.pci.addr.slot, + dev->source.subsys.u.pci.addr.function); + if (!pci) + goto cleanup; + + if (!(path = virPCIDeviceGetVFIOGroupDev(pci))) + goto cleanup; + + VIR_DEBUG("Cgroup allow %s for PCI device assignment", path); + rc = virCgroupAllowDevicePath(priv->cgroup, path, + VIR_CGROUP_DEVICE_RW); + virDomainAuditCgroupPath(vm, priv->cgroup, + "allow", path, "rw", rc); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to allow access " + "for device path %s"), + path); + goto cleanup; + } + } + break; + default: + break; + } + } + + ret = 0; +cleanup: + virPCIDeviceFree(pci); + VIR_FREE(path); + return ret; +} + + + +int +qemuTeardownHostdevCgroup(virDomainObjPtr vm, + virDomainHostdevDefPtr dev) +{ + int ret = -1; + qemuDomainObjPrivatePtr priv = vm->privateData; + virPCIDevicePtr pci = NULL; + char *path = NULL; + + /* currently this only does something for PCI devices using vfio + * for device assignment, but it is called for *all* hostdev + * devices. + */ + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) { + + switch (dev->source.subsys.type) { + case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI: + if (dev->source.subsys.u.pci.backend + != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { + int rc; + + pci = virPCIDeviceNew(dev->source.subsys.u.pci.addr.domain, + dev->source.subsys.u.pci.addr.bus, + dev->source.subsys.u.pci.addr.slot, + dev->source.subsys.u.pci.addr.function); + if (!pci) + goto cleanup; + + if (!(path = virPCIDeviceGetVFIOGroupDev(pci))) + goto cleanup; + + VIR_DEBUG("Cgroup deny %s for PCI device assignment", path); + rc = virCgroupDenyDevicePath(priv->cgroup, path, + VIR_CGROUP_DEVICE_RWM); + virDomainAuditCgroupPath(vm, priv->cgroup, + "deny", path, "rwm", rc); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to deny access " + "for device path %s"), + path); + goto cleanup; + } + } + break; + default: + break; + } + } + + ret = 0; +cleanup: + virPCIDeviceFree(pci); + VIR_FREE(path); + return ret; +} + + int qemuInitCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm, bool startup) @@ -423,6 +548,12 @@ int qemuSetupCgroup(virQEMUDriverPtr driver, virDomainHostdevDefPtr hostdev = vm->def->hostdevs[i]; virUSBDevicePtr usb; + if (qemuSetupHostdevCGroup(vm, hostdev) < 0) + goto cleanup; + + /* NB: the code below here should be moved into + * qemuSetupHostdevCGroup() + */ if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) continue; if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB) diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index e63f443ed9..f499cb6873 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -1,7 +1,7 @@ /* * qemu_cgroup.h: QEMU cgroup management * - * Copyright (C) 2006-2007, 2009-2012 Red Hat, Inc. + * Copyright (C) 2006-2007, 2009-2013 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -36,6 +36,12 @@ int qemuTeardownDiskCgroup(virDomainObjPtr vm, int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev, const char *path, void *opaque); +int qemuSetupHostdevCGroup(virDomainObjPtr vm, + virDomainHostdevDefPtr dev) + ATTRIBUTE_RETURN_CHECK; +int qemuTeardownHostdevCgroup(virDomainObjPtr vm, + virDomainHostdevDefPtr dev) + ATTRIBUTE_RETURN_CHECK; int qemuInitCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm, bool startup); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index f5fa1c4cf0..6beb2d6ee1 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1225,9 +1225,12 @@ int qemuDomainAttachHostDevice(virQEMUDriverPtr driver, virUSBDeviceListSteal(list, usb); } + if (qemuSetupHostdevCGroup(vm, hostdev) < 0) + goto cleanup; + if (virSecurityManagerSetHostdevLabel(driver->securityManager, vm->def, hostdev, NULL) < 0) - goto cleanup; + goto teardown_cgroup; switch (hostdev->source.subsys.type) { case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI: @@ -1257,6 +1260,10 @@ error: vm->def, hostdev, NULL) < 0) VIR_WARN("Unable to restore host device labelling on hotplug fail"); +teardown_cgroup: + if (qemuTeardownHostdevCgroup(vm, hostdev) < 0) + VIR_WARN("Unable to remove host device cgroup ACL on hotplug fail"); + cleanup: virObjectUnref(list); if (usb) @@ -2499,6 +2506,9 @@ int qemuDomainDetachThisHostDevice(virQEMUDriverPtr driver, } if (!ret) { + if (qemuTeardownHostdevCgroup(vm, detach) < 0) + VIR_WARN("Failed to remove host device cgroup ACL"); + if (virSecurityManagerRestoreHostdevLabel(driver->securityManager, vm->def, detach, NULL) < 0) { VIR_WARN("Failed to restore host device labelling"); diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in index 0aec9977de..26ca0688d8 100644 --- a/src/qemu/test_libvirtd_qemu.aug.in +++ b/src/qemu/test_libvirtd_qemu.aug.in @@ -42,6 +42,7 @@ module Test_libvirtd_qemu = { "8" = "/dev/kqemu" } { "9" = "/dev/rtc" } { "10" = "/dev/hpet" } + { "11" = "/dev/vfio/vfio" } } { "save_image_format" = "raw" } { "dump_image_format" = "raw" }