From 24b74d187cab48a9dc9f409ea78900154c709579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 15 Nov 2018 15:55:53 +0400 Subject: [PATCH] qemu: add memfd source type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new memoryBacking source type "memfd", supported by QEMU (when the capability is available). A memfd is a specialized anonymous memory kind. As such, an anonymous source type could be automatically using a memfd. However, there are some complications when migrating from different memory backends in qemu (mainly due to the internal object naming at this point, but there could be more). For now, it is simpler and safer to simply introduce a new source type "memfd". Eventually, the "anonymous" type could learn to use memfd transparently in a separate change. The main benefits are that it doesn't need to create filesystem files, and it also enforces sealing, providing a bit more safety. Signed-off-by: Marc-André Lureau Signed-off-by: Michal Privoznik --- docs/formatdomain.html.in | 9 +-- docs/schemas/domaincommon.rng | 1 + src/conf/domain_conf.c | 3 +- src/conf/domain_conf.h | 1 + src/qemu/qemu_command.c | 69 +++++++++++++------ src/qemu/qemu_domain.c | 12 +++- .../memfd-memory-numa.x86_64-latest.args | 34 +++++++++ tests/qemuxml2argvdata/memfd-memory-numa.xml | 36 ++++++++++ tests/qemuxml2argvtest.c | 2 + 9 files changed, 140 insertions(+), 27 deletions(-) create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 8a23b785dd..8d5edbcca5 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1126,7 +1126,7 @@ </hugepages> <nosharepages/> <locked/> - <source type="file|anonymous"/> + <source type="file|anonymous|memfd"/> <access mode="shared|private"/> <allocation mode="immediate|ondemand"/> <discard/> @@ -1177,9 +1177,10 @@ suitable for the specific environment at the same time to mitigate the risks described above. Since 1.0.6
source
-
Using the type attribute, it's possible to provide - "file" to utilize file memorybacking or keep the default - "anonymous".
+
Using the type attribute, it's possible to + provide "file" to utilize file memorybacking or keep the + default "anonymous". Since 4.10.0, + you may choose "memfd" backing. (QEMU/KVM only)
access
Using the mode attribute, specify if the memory is to be "shared" or "private". This can be overridden per numa node by diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index fa934dfba0..5ee727eefa 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -655,6 +655,7 @@ file anonymous + memfd diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 793bbe1fbd..c3dbba6919 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -898,7 +898,8 @@ VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST, VIR_ENUM_IMPL(virDomainMemorySource, VIR_DOMAIN_MEMORY_SOURCE_LAST, "none", "file", - "anonymous") + "anonymous", + "memfd") VIR_ENUM_IMPL(virDomainMemoryAllocation, VIR_DOMAIN_MEMORY_ALLOCATION_LAST, "none", diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index c167f8c43c..467785cd83 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -607,6 +607,7 @@ typedef enum { VIR_DOMAIN_MEMORY_SOURCE_NONE = 0, /* No memory source defined */ VIR_DOMAIN_MEMORY_SOURCE_FILE, /* Memory source is set as file */ VIR_DOMAIN_MEMORY_SOURCE_ANONYMOUS, /* Memory source is set as anonymous */ + VIR_DOMAIN_MEMORY_SOURCE_MEMFD, /* Memory source is set as memfd */ VIR_DOMAIN_MEMORY_SOURCE_LAST, } virDomainMemorySource; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 9aea5af26f..23a6661c10 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3176,6 +3176,26 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, } +static int +qemuBuildMemoryBackendPropsShare(virJSONValuePtr props, + virDomainMemoryAccess memAccess) +{ + switch (memAccess) { + case VIR_DOMAIN_MEMORY_ACCESS_SHARED: + return virJSONValueObjectAdd(props, "b:share", true, NULL); + + case VIR_DOMAIN_MEMORY_ACCESS_PRIVATE: + return virJSONValueObjectAdd(props, "b:share", false, NULL); + + case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT: + case VIR_DOMAIN_MEMORY_ACCESS_LAST: + break; + } + + return 0; +} + + /** * qemuBuildMemoryBackendProps: * @backendProps: [out] constructed object @@ -3195,7 +3215,7 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, * configuration value of 1 is returned. This behaviour can be suppressed by * setting @force to true in which case 0 would be returned. * - * Then, if one of the two memory-backend-* should be used, the @qemuCaps is + * Then, if one of the three memory-backend-* should be used, the @qemuCaps is * consulted to check if qemu does support it. * * Returns: 0 on success, @@ -3321,7 +3341,19 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, if (!(props = virJSONValueNewObject())) return -1; - if (useHugepage || mem->nvdimmPath || memAccess || + if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_MEMFD) { + backendType = "memory-backend-memfd"; + + if (useHugepage && + (virJSONValueObjectAdd(props, "b:hugetlb", useHugepage, NULL) < 0 || + virJSONValueObjectAdd(props, "U:hugetlbsize", pagesize << 10, NULL) < 0)) { + goto cleanup; + } + + if (qemuBuildMemoryBackendPropsShare(props, memAccess) < 0) + goto cleanup; + + } else if (useHugepage || mem->nvdimmPath || memAccess || def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { if (mem->nvdimmPath) { @@ -3359,21 +3391,8 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, goto cleanup; } - switch (memAccess) { - case VIR_DOMAIN_MEMORY_ACCESS_SHARED: - if (virJSONValueObjectAdd(props, "b:share", true, NULL) < 0) - goto cleanup; - break; - - case VIR_DOMAIN_MEMORY_ACCESS_PRIVATE: - if (virJSONValueObjectAdd(props, "b:share", false, NULL) < 0) - goto cleanup; - break; - - case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT: - case VIR_DOMAIN_MEMORY_ACCESS_LAST: - break; - } + if (qemuBuildMemoryBackendPropsShare(props, memAccess) < 0) + goto cleanup; } else { backendType = "memory-backend-ram"; } @@ -3403,7 +3422,9 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, if (!needHugepage && !mem->sourceNodes && !nodeSpecified && !mem->nvdimmPath && memAccess == VIR_DOMAIN_MEMORY_ACCESS_DEFAULT && - def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE && !force) { + def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE && + def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_MEMFD && + !force) { /* report back that using the new backend is not necessary * to achieve the desired configuration */ ret = 1; @@ -3421,6 +3442,12 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, _("this qemu doesn't support the " "memory-backend-ram object")); goto cleanup; + } else if (STREQ(backendType, "memory-backend-memory") && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("this qemu doesn't support the " + "memory-backend-memfd object")); + goto cleanup; } ret = 0; @@ -7654,7 +7681,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, if (virDomainNumatuneHasPerNodeBinding(def->numa) && !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) || - virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE))) { + virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE) || + virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD))) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("Per-node memory binding is not supported " "with this QEMU")); @@ -7680,7 +7708,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, * need to check which approach to use */ for (i = 0; i < ncells; i++) { if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) || - virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) { + virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE) || + virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD)) { if ((rc = qemuBuildMemoryCellBackendStr(def, cfg, i, priv, &nodeBackends[i])) < 0) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index fbe63e2e1d..1beb5969c1 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -3952,7 +3952,8 @@ qemuDomainDefValidateFeatures(const virDomainDef *def, static int -qemuDomainDefValidateMemory(const virDomainDef *def) +qemuDomainDefValidateMemory(const virDomainDef *def, + virQEMUCapsPtr qemuCaps) { const long system_page_size = virGetSystemPageSizeKB(); const virDomainMemtune *mem = &def->mem; @@ -3974,6 +3975,13 @@ qemuDomainDefValidateMemory(const virDomainDef *def) return -1; } + if (mem->source == VIR_DOMAIN_MEMORY_SOURCE_MEMFD && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("hugepages is not support with memfd memory source")); + return -1; + } + /* We can't guarantee any other mem.access * if no guest NUMA nodes are defined. */ if (mem->hugepages[0].size != system_page_size && @@ -4139,7 +4147,7 @@ qemuDomainDefValidate(const virDomainDef *def, if (qemuDomainDefValidateFeatures(def, qemuCaps) < 0) goto cleanup; - if (qemuDomainDefValidateMemory(def) < 0) + if (qemuDomainDefValidateMemory(def, qemuCaps) < 0) goto cleanup; ret = 0; diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args b/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args new file mode 100644 index 0000000000..d0f4057e01 --- /dev/null +++ b/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args @@ -0,0 +1,34 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name guest=instance-00000092,debug-threads=on \ +-S \ +-object secret,id=masterKey0,format=raw,\ +file=/tmp/lib/domain--1-instance-00000092/master-key.aes \ +-machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \ +-m 14336 \ +-mem-prealloc \ +-realtime mlock=off \ +-smp 8,sockets=1,cores=8,threads=1 \ +-object memory-backend-memfd,id=ram-node0,hugetlb=yes,hugetlbsize=2097152,\ +share=yes,size=15032385536,host-nodes=3,policy=preferred \ +-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \ +-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-no-acpi \ +-boot strict=on \ +-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x2 \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\ +resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml new file mode 100644 index 0000000000..8416a990fa --- /dev/null +++ b/tests/qemuxml2argvdata/memfd-memory-numa.xml @@ -0,0 +1,36 @@ + + instance-00000092 + 126f2720-6f8e-45ab-a886-ec9277079a67 + 14680064 + 14680064 + + + + + + + + + + + + 8 + + hvm + + + + + + + + + + destroy + restart + destroy + + /usr/bin/qemu-system-x86_64 + + + diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index bf164f5f8d..95429b3ae7 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2974,6 +2974,8 @@ mymain(void) DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE, QEMU_CAPS_KVM); + DO_TEST_CAPS_LATEST("memfd-memory-numa"); + DO_TEST("cpu-check-none", QEMU_CAPS_KVM); DO_TEST("cpu-check-partial", QEMU_CAPS_KVM); DO_TEST("cpu-check-full", QEMU_CAPS_KVM);