KVM: PPC: Book3S HV: XICS: Replace the 'destroy' method by a 'release' method

Similarly to what was done with XICS-on-XIVE and XIVE native KVM devices
with commit 5422e95103 ("KVM: PPC: Book3S HV: XIVE: Replace the 'destroy'
method by a 'release' method"), convert the historical XICS KVM device to
implement the 'release' method. This is needed to run nested guests with
an in-kernel IRQ chip. A typical POWER9 guest can select XICS or XIVE
during boot, which requires to be able to destroy and to re-create the
KVM device. Only the historical XICS KVM device is available under pseries
at the current time and it still uses the legacy 'destroy' method.

Switching to 'release' means that vCPUs might still be running when the
device is destroyed. In order to avoid potential use-after-free, the
kvmppc_xics structure is allocated on first usage and kept around until
the VM exits. The same pointer is used each time a KVM XICS device is
being created, but this is okay since we only have one per VM.

Clear the ICP of each vCPU with vcpu->mutex held. This ensures that the
next time the vCPU resumes execution, it won't be going into the XICS
code anymore.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Tested-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
Greg Kurz 2020-08-10 12:08:05 +02:00 committed by Paul Mackerras
parent d012a7190f
commit 5706d14d2a
3 changed files with 72 additions and 19 deletions

View File

@ -326,6 +326,7 @@ struct kvm_arch {
#endif #endif
#ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics; struct kvmppc_xics *xics;
struct kvmppc_xics *xics_device;
struct kvmppc_xive *xive; /* Current XIVE device in use */ struct kvmppc_xive *xive; /* Current XIVE device in use */
struct { struct {
struct kvmppc_xive *native; struct kvmppc_xive *native;

View File

@ -879,13 +879,15 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
#ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XICS
/* /*
* Free the XIVE devices which are not directly freed by the * Free the XIVE and XICS devices which are not directly freed by the
* device 'release' method * device 'release' method
*/ */
kfree(kvm->arch.xive_devices.native); kfree(kvm->arch.xive_devices.native);
kvm->arch.xive_devices.native = NULL; kvm->arch.xive_devices.native = NULL;
kfree(kvm->arch.xive_devices.xics_on_xive); kfree(kvm->arch.xive_devices.xics_on_xive);
kvm->arch.xive_devices.xics_on_xive = NULL; kvm->arch.xive_devices.xics_on_xive = NULL;
kfree(kvm->arch.xics_device);
kvm->arch.xics_device = NULL;
#endif /* CONFIG_KVM_XICS */ #endif /* CONFIG_KVM_XICS */
} }

View File

@ -1334,47 +1334,97 @@ static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
return -ENXIO; return -ENXIO;
} }
static void kvmppc_xics_free(struct kvm_device *dev) /*
* Called when device fd is closed. kvm->lock is held.
*/
static void kvmppc_xics_release(struct kvm_device *dev)
{ {
struct kvmppc_xics *xics = dev->private; struct kvmppc_xics *xics = dev->private;
int i; int i;
struct kvm *kvm = xics->kvm; struct kvm *kvm = xics->kvm;
struct kvm_vcpu *vcpu;
pr_devel("Releasing xics device\n");
/*
* Since this is the device release function, we know that
* userspace does not have any open fd referring to the
* device. Therefore there can not be any of the device
* attribute set/get functions being executed concurrently,
* and similarly, the connect_vcpu and set/clr_mapped
* functions also cannot be being executed.
*/
debugfs_remove(xics->dentry); debugfs_remove(xics->dentry);
/*
* We should clean up the vCPU interrupt presenters first.
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
/*
* Take vcpu->mutex to ensure that no one_reg get/set ioctl
* (i.e. kvmppc_xics_[gs]et_icp) can be done concurrently.
* Holding the vcpu->mutex also means that execution is
* excluded for the vcpu until the ICP was freed. When the vcpu
* can execute again, vcpu->arch.icp and vcpu->arch.irq_type
* have been cleared and the vcpu will not be going into the
* XICS code anymore.
*/
mutex_lock(&vcpu->mutex);
kvmppc_xics_free_icp(vcpu);
mutex_unlock(&vcpu->mutex);
}
if (kvm) if (kvm)
kvm->arch.xics = NULL; kvm->arch.xics = NULL;
for (i = 0; i <= xics->max_icsid; i++) for (i = 0; i <= xics->max_icsid; i++) {
kfree(xics->ics[i]); kfree(xics->ics[i]);
kfree(xics); xics->ics[i] = NULL;
}
/*
* A reference of the kvmppc_xics pointer is now kept under
* the xics_device pointer of the machine for reuse. It is
* freed when the VM is destroyed for now until we fix all the
* execution paths.
*/
kfree(dev); kfree(dev);
} }
static struct kvmppc_xics *kvmppc_xics_get_device(struct kvm *kvm)
{
struct kvmppc_xics **kvm_xics_device = &kvm->arch.xics_device;
struct kvmppc_xics *xics = *kvm_xics_device;
if (!xics) {
xics = kzalloc(sizeof(*xics), GFP_KERNEL);
*kvm_xics_device = xics;
} else {
memset(xics, 0, sizeof(*xics));
}
return xics;
}
static int kvmppc_xics_create(struct kvm_device *dev, u32 type) static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
{ {
struct kvmppc_xics *xics; struct kvmppc_xics *xics;
struct kvm *kvm = dev->kvm; struct kvm *kvm = dev->kvm;
int ret = 0;
xics = kzalloc(sizeof(*xics), GFP_KERNEL); pr_devel("Creating xics for partition\n");
/* Already there ? */
if (kvm->arch.xics)
return -EEXIST;
xics = kvmppc_xics_get_device(kvm);
if (!xics) if (!xics)
return -ENOMEM; return -ENOMEM;
dev->private = xics; dev->private = xics;
xics->dev = dev; xics->dev = dev;
xics->kvm = kvm; xics->kvm = kvm;
kvm->arch.xics = xics;
/* Already there ? */
if (kvm->arch.xics)
ret = -EEXIST;
else
kvm->arch.xics = xics;
if (ret) {
kfree(xics);
return ret;
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (cpu_has_feature(CPU_FTR_ARCH_206) && if (cpu_has_feature(CPU_FTR_ARCH_206) &&
@ -1399,7 +1449,7 @@ struct kvm_device_ops kvm_xics_ops = {
.name = "kvm-xics", .name = "kvm-xics",
.create = kvmppc_xics_create, .create = kvmppc_xics_create,
.init = kvmppc_xics_init, .init = kvmppc_xics_init,
.destroy = kvmppc_xics_free, .release = kvmppc_xics_release,
.set_attr = xics_set_attr, .set_attr = xics_set_attr,
.get_attr = xics_get_attr, .get_attr = xics_get_attr,
.has_attr = xics_has_attr, .has_attr = xics_has_attr,
@ -1415,7 +1465,7 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
return -EPERM; return -EPERM;
if (xics->kvm != vcpu->kvm) if (xics->kvm != vcpu->kvm)
return -EPERM; return -EPERM;
if (vcpu->arch.irq_type) if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY; return -EBUSY;
r = kvmppc_xics_create_icp(vcpu, xcpu); r = kvmppc_xics_create_icp(vcpu, xcpu);