mirror of https://gitee.com/openkylin/libvirt.git
qemu: adjust memlock for multiple vfio/vdpa devices
When multiple VFIO or VDPA devices are assigned to a guest, the guest can fail to start because the guest fails to map enough memory. For example, the case mentioned in https://bugzilla.redhat.com/show_bug.cgi?id=2111317 results in this failure: 2021-08-05T09:51:47.692578Z qemu-kvm: failed to write, fd=31, errno=14 (Bad address) 2021-08-05T09:51:47.692590Z qemu-kvm: vhost vdpa map fail! 2021-08-05T09:51:47.692594Z qemu-kvm: vhost-vdpa: DMA mapping failed, unable to continue The current memlock limit calculation does not work for scenarios where there are multiple such devices assigned to a guest. The root causes are a little bit different between VFIO and VDPA devices. For VFIO devices, the issue only occurs when a vIOMMU is present. In this scenario, each vfio device is assigned a separate AddressSpace fully mapping guest RAM. When there is no vIOMMU, the devices are all within the same AddressSpace so no additional memory limit is needed. For VDPA devices, each device requires the full memory to be mapped regardless of whether there is a vIOMMU or not. In order to enable these scenarios, we need to multiply memlock limit by the number of VDPA devices plus the number of VFIO devices for guests with a vIOMMU. This has the potential for pushing the memlock limit above the host physical memory and negating any protection that these locked memory limits are providing, but there is no other short-term solution. In the future, there should be have a revised userspace iommu interface (iommufd) that the VFIO and VDPA backends can make use of. This will be able to share locked memory limits between both vfio and vdpa use cases and address spaces and then we can disable these short term hacks. But this is still in development upstream. Resolves: https://bugzilla.redhat.com/2111317 Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> Reviewed-by: Laine Stump <laine@redhat.com>
This commit is contained in:
parent
ce2358d07a
commit
8d5704e2c4
|
@ -9233,6 +9233,40 @@ getPPC64MemLockLimitBytes(virDomainDef *def,
|
|||
}
|
||||
|
||||
|
||||
static int
|
||||
qemuDomainGetNumVFIODevices(const virDomainDef *def)
|
||||
{
|
||||
size_t i;
|
||||
int n = 0;
|
||||
|
||||
for (i = 0; i < def->nhostdevs; i++) {
|
||||
if (virHostdevIsVFIODevice(def->hostdevs[i]) ||
|
||||
virHostdevIsMdevDevice(def->hostdevs[i]))
|
||||
n++;
|
||||
}
|
||||
for (i = 0; i < def->ndisks; i++) {
|
||||
if (virStorageSourceChainHasNVMe(def->disks[i]->src))
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
qemuDomainGetNumVDPANetDevices(const virDomainDef *def)
|
||||
{
|
||||
size_t i;
|
||||
int n = 0;
|
||||
|
||||
for (i = 0; i < def->nnets; i++) {
|
||||
if (virDomainNetGetActualType(def->nets[i]) == VIR_DOMAIN_NET_TYPE_VDPA)
|
||||
n++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* qemuDomainGetMemLockLimitBytes:
|
||||
* @def: domain definition
|
||||
|
@ -9252,6 +9286,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDef *def,
|
|||
bool forceVFIO)
|
||||
{
|
||||
unsigned long long memKB = 0;
|
||||
int nvfio;
|
||||
int nvdpa;
|
||||
|
||||
/* prefer the hard limit */
|
||||
if (virMemoryLimitIsSet(def->mem.hard_limit)) {
|
||||
|
@ -9270,6 +9306,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDef *def,
|
|||
if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM)
|
||||
return getPPC64MemLockLimitBytes(def, forceVFIO);
|
||||
|
||||
nvfio = qemuDomainGetNumVFIODevices(def);
|
||||
nvdpa = qemuDomainGetNumVDPANetDevices(def);
|
||||
/* For device passthrough using VFIO the guest memory and MMIO memory
|
||||
* regions need to be locked persistent in order to allow DMA.
|
||||
*
|
||||
|
@ -9288,8 +9326,22 @@ qemuDomainGetMemLockLimitBytes(virDomainDef *def,
|
|||
*
|
||||
* Note that this may not be valid for all platforms.
|
||||
*/
|
||||
if (forceVFIO || qemuDomainNeedsVFIO(def) || virDomainDefHasVDPANet(def))
|
||||
memKB = virDomainDefGetMemoryTotal(def) + 1024 * 1024;
|
||||
if (forceVFIO || nvfio || nvdpa) {
|
||||
/* At present, the full memory needs to be locked for each VFIO / VDPA
|
||||
* device. For VFIO devices, this only applies when there is a vIOMMU
|
||||
* present. Yes, this may result in a memory limit that is greater than
|
||||
* the host physical memory, which is not ideal. The long-term solution
|
||||
* is a new userspace iommu interface (iommufd) which should eliminate
|
||||
* this duplicate memory accounting. But for now this is the only way
|
||||
* to enable configurations with e.g. multiple vdpa devices.
|
||||
*/
|
||||
int factor = nvdpa;
|
||||
|
||||
if (def->iommu)
|
||||
factor += nvfio;
|
||||
|
||||
memKB = MAX(factor, 1) * virDomainDefGetMemoryTotal(def) + 1024 * 1024;
|
||||
}
|
||||
|
||||
return memKB << 10;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue