From 0ab8c021c6c594846915cbeb501fa87ab8780949 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sun, 14 Mar 2021 21:03:00 +0100 Subject: [PATCH 01/19] virtio: Fix virtio_mmio_read()/virtio_mmio_write() Both functions don't check the personality of the interface (legacy or modern) before accessing the configuration memory and always use virtio_config_readX()/virtio_config_writeX(). With this patch, they now check the personality and in legacy mode call virtio_config_readX()/virtio_config_writeX(), otherwise call virtio_config_modern_readX()/virtio_config_modern_writeX(). This change has been tested with virtio-mmio guests (virt stretch/armhf and virt sid/m68k) and virtio-pci guests (pseries RHEL-7.3/ppc64 and /ppc64le). Signed-off-by: Laurent Vivier Message-Id: <20210314200300.3259170-1-laurent@vivier.eu> Reviewed-by: Stefano Garzarella Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-mmio.c | 74 +++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 6990b9879c..342c918ea7 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -112,15 +112,28 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) if (offset >= VIRTIO_MMIO_CONFIG) { offset -= VIRTIO_MMIO_CONFIG; - switch (size) { - case 1: - return virtio_config_readb(vdev, offset); - case 2: - return virtio_config_readw(vdev, offset); - case 4: - return virtio_config_readl(vdev, offset); - default: - abort(); + if (proxy->legacy) { + switch (size) { + case 1: + return virtio_config_readb(vdev, offset); + case 2: + return virtio_config_readw(vdev, offset); + case 4: + return virtio_config_readl(vdev, offset); + default: + abort(); + } + } else { + switch (size) { + case 1: + return virtio_config_modern_readb(vdev, offset); + case 2: + return virtio_config_modern_readw(vdev, offset); + case 4: + return virtio_config_modern_readl(vdev, offset); + default: + abort(); + } } } if (size != 4) { @@ -245,20 +258,37 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, if (offset >= VIRTIO_MMIO_CONFIG) { offset -= VIRTIO_MMIO_CONFIG; - switch (size) { - case 1: - virtio_config_writeb(vdev, offset, value); - break; - case 2: - virtio_config_writew(vdev, offset, value); - break; - case 4: - virtio_config_writel(vdev, offset, value); - break; - default: - abort(); + if (proxy->legacy) { + switch (size) { + case 1: + virtio_config_writeb(vdev, offset, value); + break; + case 2: + virtio_config_writew(vdev, offset, value); + break; + case 4: + virtio_config_writel(vdev, offset, value); + break; + default: + abort(); + } + return; + } else { + switch (size) { + case 1: + virtio_config_modern_writeb(vdev, offset, value); + break; + case 2: + virtio_config_modern_writew(vdev, offset, value); + break; + case 4: + virtio_config_modern_writel(vdev, offset, value); + break; + default: + abort(); + } + return; } - return; } if (size != 4) { qemu_log_mask(LOG_GUEST_ERROR, From a890557d5a90b2c99988bc478bfd7f77392cfd8d Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 12 Mar 2021 10:22:06 +0100 Subject: [PATCH 02/19] vhost-user: Drop misleading EAGAIN checks in slave_read() slave_read() checks EAGAIN when reading or writing to the socket fails. This gives the impression that the slave channel is in non-blocking mode, which is certainly not the case with the current code base. And the rest of the code isn't actually ready to cope with non-blocking I/O. Just drop the checks everywhere in this function for the sake of clarity. Signed-off-by: Greg Kurz Message-Id: <20210312092212.782255-2-groug@kaod.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/vhost-user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 2fdd5daf74..6af9b43a72 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1420,7 +1420,7 @@ static void slave_read(void *opaque) do { size = recvmsg(u->slave_fd, &msgh, 0); - } while (size < 0 && (errno == EINTR || errno == EAGAIN)); + } while (size < 0 && errno == EINTR); if (size != VHOST_USER_HDR_SIZE) { error_report("Failed to read from slave."); @@ -1452,7 +1452,7 @@ static void slave_read(void *opaque) /* Read payload */ do { size = read(u->slave_fd, &payload, hdr.size); - } while (size < 0 && (errno == EINTR || errno == EAGAIN)); + } while (size < 0 && errno == EINTR); if (size != hdr.size) { error_report("Failed to read payload from slave."); @@ -1503,7 +1503,7 @@ static void slave_read(void *opaque) do { size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); - } while (size < 0 && (errno == EINTR || errno == EAGAIN)); + } while (size < 0 && errno == EINTR); if (size != VHOST_USER_HDR_SIZE + hdr.size) { error_report("Failed to send msg reply to slave."); From 9e06080bed293d94ec1f874e62f25f147b20bc6c Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 12 Mar 2021 10:22:07 +0100 Subject: [PATCH 03/19] vhost-user: Fix double-close on slave_read() error path Some message types, e.g. VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG, can convey file descriptors. These must be closed before returning from slave_read() to avoid being leaked. This can currently be done in two different places: [1] just after the request has been processed [2] on the error path, under the goto label err: These path are supposed to be mutually exclusive but they are not actually. If the VHOST_USER_NEED_REPLY_MASK flag was passed and the sending of the reply fails, both [1] and [2] are performed with the same descriptor values. This can potentially cause subtle bugs if one of the descriptor was recycled by some other thread in the meantime. This code duplication complicates rollback for no real good benefit. Do the closing in a unique place, under a new fdcleanup: goto label at the end of the function. Signed-off-by: Greg Kurz Message-Id: <20210312092212.782255-3-groug@kaod.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/vhost-user.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 6af9b43a72..acde1d2936 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1475,13 +1475,6 @@ static void slave_read(void *opaque) ret = -EINVAL; } - /* Close the remaining file descriptors. */ - for (i = 0; i < fdsize; i++) { - if (fd[i] != -1) { - close(fd[i]); - } - } - /* * REPLY_ACK feature handling. Other reply types has to be managed * directly in their request handlers. @@ -1511,12 +1504,14 @@ static void slave_read(void *opaque) } } - return; + goto fdcleanup; err: qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); close(u->slave_fd); u->slave_fd = -1; + +fdcleanup: for (i = 0; i < fdsize; i++) { if (fd[i] != -1) { close(fd[i]); From de62e4946052076186428900a85d6547627e84c6 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 12 Mar 2021 10:22:08 +0100 Subject: [PATCH 04/19] vhost-user: Factor out duplicated slave_fd teardown code Signed-off-by: Greg Kurz Message-Id: <20210312092212.782255-4-groug@kaod.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/vhost-user.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index acde1d2936..cb0c98f30a 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1392,6 +1392,13 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, return 0; } +static void close_slave_channel(struct vhost_user *u) +{ + qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); + close(u->slave_fd); + u->slave_fd = -1; +} + static void slave_read(void *opaque) { struct vhost_dev *dev = opaque; @@ -1507,9 +1514,7 @@ static void slave_read(void *opaque) goto fdcleanup; err: - qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); - close(u->slave_fd); - u->slave_fd = -1; + close_slave_channel(u); fdcleanup: for (i = 0; i < fdsize; i++) { @@ -1560,9 +1565,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) out: close(sv[1]); if (ret) { - qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); - close(u->slave_fd); - u->slave_fd = -1; + close_slave_channel(u); } return ret; @@ -1915,9 +1918,7 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) u->postcopy_fd.handler = NULL; } if (u->slave_fd >= 0) { - qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); - close(u->slave_fd); - u->slave_fd = -1; + close_slave_channel(u); } g_free(u->region_rb); u->region_rb = NULL; From 57dc02173cb089c11d3c84a0570cb60fe7d7f0d5 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 12 Mar 2021 10:22:09 +0100 Subject: [PATCH 05/19] vhost-user: Convert slave channel to QIOChannelSocket The slave channel is implemented with socketpair() : QEMU creates the pair, passes one of the socket to virtiofsd and monitors the other one with the main event loop using qemu_set_fd_handler(). In order to fix a potential deadlock between QEMU and a vhost-user external process (e.g. virtiofsd with DAX), we want to be able to monitor and service the slave channel while handling vhost-user requests. Prepare ground for this by converting the slave channel to be a QIOChannelSocket. This will make monitoring of the slave channel as simple as calling qio_channel_add_watch_source(). Since the connection is already established between the two sockets, only incoming I/O (G_IO_IN) and disconnect (G_IO_HUP) need to be serviced. This also allows to get rid of the ancillary data parsing since QIOChannelSocket can do this for us. Note that the MSG_CTRUNC check is dropped on the way because QIOChannelSocket ignores this case. This isn't a problem since slave_read() provisions space for 8 file descriptors, but affected vhost-user slave protocol messages generally only convey one. If for some reason a buggy implementation passes more file descriptors, no need to break the connection, just like we don't break it if some other type of ancillary data is received : this isn't explicitely violating the protocol per-se so it seems better to ignore it. The current code errors out on short reads and writes. Use the qio_channel_*_all() variants to address this on the way. Signed-off-by: Greg Kurz Message-Id: <20210312092212.782255-5-groug@kaod.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/vhost-user.c | 99 +++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 60 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index cb0c98f30a..3c1e1611b0 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -16,6 +16,7 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-net.h" #include "chardev/char-fe.h" +#include "io/channel-socket.h" #include "sysemu/kvm.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" @@ -237,7 +238,8 @@ struct vhost_user { struct vhost_dev *dev; /* Shared between vhost devs of the same virtio device */ VhostUserState *user; - int slave_fd; + QIOChannel *slave_ioc; + GSource *slave_src; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; @@ -1394,61 +1396,37 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, static void close_slave_channel(struct vhost_user *u) { - qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); - close(u->slave_fd); - u->slave_fd = -1; + g_source_destroy(u->slave_src); + g_source_unref(u->slave_src); + u->slave_src = NULL; + object_unref(OBJECT(u->slave_ioc)); + u->slave_ioc = NULL; } -static void slave_read(void *opaque) +static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) { struct vhost_dev *dev = opaque; struct vhost_user *u = dev->opaque; VhostUserHeader hdr = { 0, }; VhostUserPayload payload = { 0, }; - int size, ret = 0; + Error *local_err = NULL; + gboolean rc = G_SOURCE_CONTINUE; + int ret = 0; struct iovec iov; - struct msghdr msgh; - int fd[VHOST_USER_SLAVE_MAX_FDS]; - char control[CMSG_SPACE(sizeof(fd))]; - struct cmsghdr *cmsg; - int i, fdsize = 0; - - memset(&msgh, 0, sizeof(msgh)); - msgh.msg_iov = &iov; - msgh.msg_iovlen = 1; - msgh.msg_control = control; - msgh.msg_controllen = sizeof(control); - - memset(fd, -1, sizeof(fd)); + g_autofree int *fd = NULL; + size_t fdsize = 0; + int i; /* Read header */ iov.iov_base = &hdr; iov.iov_len = VHOST_USER_HDR_SIZE; - do { - size = recvmsg(u->slave_fd, &msgh, 0); - } while (size < 0 && errno == EINTR); - - if (size != VHOST_USER_HDR_SIZE) { - error_report("Failed to read from slave."); + if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { + error_report_err(local_err); goto err; } - if (msgh.msg_flags & MSG_CTRUNC) { - error_report("Truncated message."); - goto err; - } - - for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msgh, cmsg)) { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_RIGHTS) { - fdsize = cmsg->cmsg_len - CMSG_LEN(0); - memcpy(fd, CMSG_DATA(cmsg), fdsize); - break; - } - } - if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { error_report("Failed to read msg header." " Size %d exceeds the maximum %zu.", hdr.size, @@ -1457,12 +1435,8 @@ static void slave_read(void *opaque) } /* Read payload */ - do { - size = read(u->slave_fd, &payload, hdr.size); - } while (size < 0 && errno == EINTR); - - if (size != hdr.size) { - error_report("Failed to read payload from slave."); + if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { + error_report_err(local_err); goto err; } @@ -1475,7 +1449,7 @@ static void slave_read(void *opaque) break; case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, - fd[0]); + fd ? fd[0] : -1); break; default: error_report("Received unexpected msg type: %d.", hdr.request); @@ -1501,12 +1475,8 @@ static void slave_read(void *opaque) iovec[1].iov_base = &payload; iovec[1].iov_len = hdr.size; - do { - size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); - } while (size < 0 && errno == EINTR); - - if (size != VHOST_USER_HDR_SIZE + hdr.size) { - error_report("Failed to send msg reply to slave."); + if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { + error_report_err(local_err); goto err; } } @@ -1515,14 +1485,15 @@ static void slave_read(void *opaque) err: close_slave_channel(u); + rc = G_SOURCE_REMOVE; fdcleanup: - for (i = 0; i < fdsize; i++) { - if (fd[i] != -1) { + if (fd) { + for (i = 0; i < fdsize; i++) { close(fd[i]); } } - return; + return rc; } static int vhost_setup_slave_channel(struct vhost_dev *dev) @@ -1535,6 +1506,8 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) int sv[2], ret = 0; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); + Error *local_err = NULL; + QIOChannel *ioc; if (!virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { @@ -1546,8 +1519,15 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) return -1; } - u->slave_fd = sv[0]; - qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); + ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); + if (!ioc) { + error_report_err(local_err); + return -1; + } + u->slave_ioc = ioc; + u->slave_src = qio_channel_add_watch_source(u->slave_ioc, + G_IO_IN | G_IO_HUP, + slave_read, dev, NULL, NULL); if (reply_supported) { msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; @@ -1802,7 +1782,6 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) u = g_new0(struct vhost_user, 1); u->user = opaque; - u->slave_fd = -1; u->dev = dev; dev->opaque = u; @@ -1917,7 +1896,7 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) close(u->postcopy_fd.fd); u->postcopy_fd.handler = NULL; } - if (u->slave_fd >= 0) { + if (u->slave_ioc) { close_slave_channel(u); } g_free(u->region_rb); From a7f523c7d114d445c5d83aecdba3efc038e5a692 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 12 Mar 2021 10:22:10 +0100 Subject: [PATCH 06/19] vhost-user: Introduce nested event loop in vhost_user_read() A deadlock condition potentially exists if a vhost-user process needs to request something to QEMU on the slave channel while processing a vhost-user message. This doesn't seem to affect any vhost-user implementation so far, but this is currently biting the upcoming enablement of DAX with virtio-fs. The issue is being observed when the guest does an emergency reboot while a mapping still exits in the DAX window, which is very easy to get with a busy enough workload (e.g. as simulated by blogbench [1]) : - QEMU sends VHOST_USER_GET_VRING_BASE to virtiofsd. - In order to complete the request, virtiofsd then asks QEMU to remove the mapping on the slave channel. All these dialogs are synchronous, hence the deadlock. As pointed out by Stefan Hajnoczi: When QEMU's vhost-user master implementation sends a vhost-user protocol message, vhost_user_read() does a "blocking" read during which slave_fd is not monitored by QEMU. The natural solution for this issue is an event loop. The main event loop cannot be nested though since we have no guarantees that its fd handlers are prepared for re-entrancy. Introduce a new event loop that only monitors the chardev I/O for now in vhost_user_read() and push the actual reading to a one-shot handler. A subsequent patch will teach the loop to monitor and process messages from the slave channel as well. [1] https://github.com/jedisct1/Blogbench Suggested-by: Stefan Hajnoczi Signed-off-by: Greg Kurz Message-Id: <20210312092212.782255-6-groug@kaod.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/vhost-user.c | 65 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 3c1e1611b0..00256fa318 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -296,15 +296,27 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) return 0; } -static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) +struct vhost_user_read_cb_data { + struct vhost_dev *dev; + VhostUserMsg *msg; + GMainLoop *loop; + int ret; +}; + +static gboolean vhost_user_read_cb(GIOChannel *source, GIOCondition condition, + gpointer opaque) { + struct vhost_user_read_cb_data *data = opaque; + struct vhost_dev *dev = data->dev; + VhostUserMsg *msg = data->msg; struct vhost_user *u = dev->opaque; CharBackend *chr = u->user->chr; uint8_t *p = (uint8_t *) msg; int r, size; if (vhost_user_read_header(dev, msg) < 0) { - return -1; + data->ret = -1; + goto end; } /* validate message size is sane */ @@ -312,7 +324,8 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) error_report("Failed to read msg header." " Size %d exceeds the maximum %zu.", msg->hdr.size, VHOST_USER_PAYLOAD_SIZE); - return -1; + data->ret = -1; + goto end; } if (msg->hdr.size) { @@ -322,11 +335,53 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) if (r != size) { error_report("Failed to read msg payload." " Read %d instead of %d.", r, msg->hdr.size); - return -1; + data->ret = -1; + goto end; } } - return 0; +end: + g_main_loop_quit(data->loop); + return G_SOURCE_REMOVE; +} + +static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) +{ + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->user->chr; + GMainContext *prev_ctxt = chr->chr->gcontext; + GMainContext *ctxt = g_main_context_new(); + GMainLoop *loop = g_main_loop_new(ctxt, FALSE); + struct vhost_user_read_cb_data data = { + .dev = dev, + .loop = loop, + .msg = msg, + .ret = 0 + }; + + /* + * We want to be able to monitor the slave channel fd while waiting + * for chr I/O. This requires an event loop, but we can't nest the + * one to which chr is currently attached : its fd handlers might not + * be prepared for re-entrancy. So we create a new one and switch chr + * to use it. + */ + qemu_chr_be_update_read_handlers(chr->chr, ctxt); + qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); + + g_main_loop_run(loop); + + /* + * Restore the previous event loop context. This also destroys/recreates + * event sources : this guarantees that all pending events in the original + * context that have been processed by the nested loop are purged. + */ + qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); + + g_main_loop_unref(loop); + g_main_context_unref(ctxt); + + return data.ret; } static int process_message_reply(struct vhost_dev *dev, From db8a3772e300c1a656331a92da0785d81667dc81 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 12 Mar 2021 10:22:11 +0100 Subject: [PATCH 07/19] vhost-user: Monitor slave channel in vhost_user_read() Now that everything is in place, have the nested event loop to monitor the slave channel. The source in the main event loop is destroyed and recreated to ensure any pending even for the slave channel that was previously detected is purged. This guarantees that the main loop wont invoke slave_read() based on an event that was already handled by the nested loop. Signed-off-by: Greg Kurz Message-Id: <20210312092212.782255-7-groug@kaod.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/vhost-user.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 00256fa318..ded0c10453 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -345,6 +345,35 @@ end: return G_SOURCE_REMOVE; } +static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, + gpointer opaque); + +/* + * This updates the read handler to use a new event loop context. + * Event sources are removed from the previous context : this ensures + * that events detected in the previous context are purged. They will + * be re-detected and processed in the new context. + */ +static void slave_update_read_handler(struct vhost_dev *dev, + GMainContext *ctxt) +{ + struct vhost_user *u = dev->opaque; + + if (!u->slave_ioc) { + return; + } + + if (u->slave_src) { + g_source_destroy(u->slave_src); + g_source_unref(u->slave_src); + } + + u->slave_src = qio_channel_add_watch_source(u->slave_ioc, + G_IO_IN | G_IO_HUP, + slave_read, dev, NULL, + ctxt); +} + static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) { struct vhost_user *u = dev->opaque; @@ -366,6 +395,7 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) * be prepared for re-entrancy. So we create a new one and switch chr * to use it. */ + slave_update_read_handler(dev, ctxt); qemu_chr_be_update_read_handlers(chr->chr, ctxt); qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); @@ -377,6 +407,7 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) * context that have been processed by the nested loop are purged. */ qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); + slave_update_read_handler(dev, NULL); g_main_loop_unref(loop); g_main_context_unref(ctxt); @@ -1580,9 +1611,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) return -1; } u->slave_ioc = ioc; - u->slave_src = qio_channel_add_watch_source(u->slave_ioc, - G_IO_IN | G_IO_HUP, - slave_read, dev, NULL, NULL); + slave_update_read_handler(dev, NULL); if (reply_supported) { msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; From d2adda34a9989404a4fc86cb4127a3ea103a7938 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Tue, 16 Mar 2021 22:41:45 -0400 Subject: [PATCH 08/19] virtio-pmem: fix virtio_pmem_resp assign problem ret in virtio_pmem_resp is a uint32_t variable, which should be assigned using virtio_stl_p. The kernel side driver does not guarantee virtio_pmem_resp to be initialized to zero in advance, So sometimes the flush operation will fail. Signed-off-by: Wang Liang Message-Id: <20210317024145.271212-1-wangliangzz@126.com> Reviewed-by: Stefano Garzarella Reviewed-by: David Hildenbrand Reviewed-by: Pankaj Gupta Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c index a3e0688a89..d1aeb90a31 100644 --- a/hw/virtio/virtio-pmem.c +++ b/hw/virtio/virtio-pmem.c @@ -47,7 +47,7 @@ static int worker_cb(void *opaque) err = 1; } - virtio_stw_p(req_data->vdev, &req_data->resp.ret, err); + virtio_stl_p(req_data->vdev, &req_data->resp.ret, err); return 0; } From 79a2aca20cb4601e6a30bbe8620ee1ef9b960ae1 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 15 Mar 2021 14:00:57 -0400 Subject: [PATCH 09/19] tests: acpi: temporary whitelist DSDT changes Signed-off-by: Igor Mammedov Message-Id: <20210315180102.3008391-2-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..fddcfc061f 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,12 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/pc/DSDT", +"tests/data/acpi/pc/DSDT.acpihmat", +"tests/data/acpi/pc/DSDT.bridge", +"tests/data/acpi/pc/DSDT.cphp", +"tests/data/acpi/pc/DSDT.dimmpxm", +"tests/data/acpi/pc/DSDT.hpbridge", +"tests/data/acpi/pc/DSDT.hpbrroot", +"tests/data/acpi/pc/DSDT.ipmikcs", +"tests/data/acpi/pc/DSDT.memhp", +"tests/data/acpi/pc/DSDT.numamem", +"tests/data/acpi/pc/DSDT.roothp", From b32bd763a1ca929677e22ae1c51cb3920921bdce Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 15 Mar 2021 14:00:58 -0400 Subject: [PATCH 10/19] pci: introduce acpi-index property for PCI device In x86/ACPI world, linux distros are using predictable network interface naming since systemd v197. Which on QEMU based VMs results into path based naming scheme, that names network interfaces based on PCI topology. With itm on has to plug NIC in exactly the same bus/slot, which was used when disk image was first provisioned/configured or one risks to loose network configuration due to NIC being renamed to actually used topology. That also restricts freedom to reshape PCI configuration of VM without need to reconfigure used guest image. systemd also offers "onboard" naming scheme which is preferred over PCI slot/topology one, provided that firmware implements: " PCI Firmware Specification 3.1 4.6.7. DSM for Naming a PCI or PCI Express Device Under Operating Systems " that allows to assign user defined index to PCI device, which systemd will use to name NIC. For example, using -device e1000,acpi-index=100 guest will rename NIC to 'eno100', where 'eno' is default prefix for "onboard" naming scheme. This doesn't require any advance configuration on guest side to com in effect at 'onboard' scheme takes priority over path based naming. Hope is that 'acpi-index' it will be easier to consume by management layer, compared to forcing specific PCI topology and/or having several disk image templates for different topologies and will help to simplify process of spawning VM from the same template without need to reconfigure guest NIC. This patch adds, 'acpi-index'* property and wires up a 32bit register on top of pci hotplug register block to pass index value to AML code at runtime. Following patch will add corresponding _DSM code and wire it up to PCI devices described in ACPI. *) name comes from linux kernel terminology Signed-off-by: Igor Mammedov Message-Id: <20210315180102.3008391-3-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/pci.c | 1 - hw/acpi/pcihp.c | 58 +++++++++++++++++++++++++++++++++++++++-- hw/acpi/piix4.c | 3 ++- hw/acpi/trace-events | 2 ++ hw/i386/acpi-build.c | 13 ++++++++- hw/pci/pci.c | 1 + include/hw/acpi/pcihp.h | 9 +++++-- include/hw/pci/pci.h | 1 + 8 files changed, 81 insertions(+), 7 deletions(-) diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c index ec455c3b25..75b1103ec4 100644 --- a/hw/acpi/pci.c +++ b/hw/acpi/pci.c @@ -59,4 +59,3 @@ void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info, build_header(linker, table_data, (void *)(table_data->data + mcfg_start), "MCFG", table_data->len - mcfg_start, 1, oem_id, oem_table_id); } - diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 9dc4d3e2db..ceab287bd3 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -39,12 +39,13 @@ #include "trace.h" #define ACPI_PCIHP_ADDR 0xae00 -#define ACPI_PCIHP_SIZE 0x0014 +#define ACPI_PCIHP_SIZE 0x0018 #define PCI_UP_BASE 0x0000 #define PCI_DOWN_BASE 0x0004 #define PCI_EJ_BASE 0x0008 #define PCI_RMV_BASE 0x000c #define PCI_SEL_BASE 0x0010 +#define PCI_AIDX_BASE 0x0014 typedef struct AcpiPciHpFind { int bsel; @@ -251,9 +252,13 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off) acpi_pcihp_update(s); } +#define ONBOARD_INDEX_MAX (16 * 1024 - 1) + void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + PCIDevice *pdev = PCI_DEVICE(dev); + /* Only hotplugged devices need the hotplug capability. */ if (dev->hotplugged && acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev))) < 0) { @@ -261,6 +266,17 @@ void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, ACPI_PCIHP_PROP_BSEL "' set"); return; } + + /* + * capped by systemd (see: udev-builtin-net_id.c) + * as it's the only known user honor it to avoid users + * misconfigure QEMU and then wonder why acpi-index doesn't work + */ + if (pdev->acpi_index > ONBOARD_INDEX_MAX) { + error_setg(errp, "acpi-index should be less or equal to %u", + ONBOARD_INDEX_MAX); + return; + } } void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, @@ -347,7 +363,6 @@ static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size) trace_acpi_pci_down_read(val); break; case PCI_EJ_BASE: - /* No feature defined yet */ trace_acpi_pci_features_read(val); break; case PCI_RMV_BASE: @@ -357,6 +372,12 @@ static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size) case PCI_SEL_BASE: val = s->hotplug_select; trace_acpi_pci_sel_read(val); + break; + case PCI_AIDX_BASE: + val = s->acpi_index; + s->acpi_index = 0; + trace_acpi_pci_acpi_index_read(val); + break; default: break; } @@ -367,8 +388,35 @@ static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size) static void pci_write(void *opaque, hwaddr addr, uint64_t data, unsigned int size) { + int slot; + PCIBus *bus; + BusChild *kid, *next; AcpiPciHpState *s = opaque; + + s->acpi_index = 0; switch (addr) { + case PCI_AIDX_BASE: + /* + * fetch acpi-index for specified slot so that follow up read from + * PCI_AIDX_BASE can return it to guest + */ + slot = ctz32(data); + + if (s->hotplug_select >= ACPI_PCIHP_MAX_HOTPLUG_BUS) { + break; + } + + bus = acpi_pcihp_find_hotplug_bus(s, s->hotplug_select); + QTAILQ_FOREACH_SAFE(kid, &bus->qbus.children, sibling, next) { + Object *o = OBJECT(kid->child); + PCIDevice *dev = PCI_DEVICE(o); + if (PCI_SLOT(dev->devfn) == slot) { + s->acpi_index = object_property_get_uint(o, "acpi-index", NULL); + break; + } + } + trace_acpi_pci_acpi_index_write(s->hotplug_select, slot, s->acpi_index); + break; case PCI_EJ_BASE: if (s->hotplug_select >= ACPI_PCIHP_MAX_HOTPLUG_BUS) { break; @@ -413,6 +461,12 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, OBJ_PROP_FLAG_READ); } +bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id) +{ + AcpiPciHpState *s = opaque; + return s->acpi_index; +} + const VMStateDescription vmstate_acpi_pcihp_pci_status = { .name = "acpi_pcihp_pci_status", .version_id = 1, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 1efc0ded9f..6056d51667 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -297,7 +297,8 @@ static const VMStateDescription vmstate_acpi = { 2, vmstate_pci_status, struct AcpiPciHpPciStatus), VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState, - vmstate_test_use_acpi_hotplug_bridge), + vmstate_test_use_acpi_hotplug_bridge, + vmstate_acpi_pcihp_use_acpi_index), VMSTATE_END_OF_LIST() }, .subsections = (const VMStateDescription*[]) { diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index f91ced477d..dcc1438f3a 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -41,6 +41,8 @@ acpi_pci_unplug_request(int bsel, int slot) "bsel: %d slot: %d" acpi_pci_up_read(uint32_t val) "%" PRIu32 acpi_pci_down_read(uint32_t val) "%" PRIu32 acpi_pci_features_read(uint32_t val) "%" PRIu32 +acpi_pci_acpi_index_read(uint32_t val) "%" PRIu32 +acpi_pci_acpi_index_write(unsigned bsel, unsigned slot, uint32_t aidx) "bsel: %u slot: %u aidx: %" PRIu32 acpi_pci_rmv_read(uint32_t val) "%" PRIu32 acpi_pci_sel_read(uint32_t val) "%" PRIu32 acpi_pci_ej_write(uint64_t addr, uint64_t data) "0x%" PRIx64 " <== %" PRIu64 diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 442b4629a9..e49fae2bfd 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1168,9 +1168,10 @@ static void build_piix4_pci_hotplug(Aml *table) aml_append(scope, field); aml_append(scope, - aml_operation_region("BNMR", AML_SYSTEM_IO, aml_int(0xae10), 0x04)); + aml_operation_region("BNMR", AML_SYSTEM_IO, aml_int(0xae10), 0x08)); field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("BNUM", 32)); + aml_append(field, aml_named_field("PIDX", 32)); aml_append(scope, field); aml_append(scope, aml_mutex("BLCK", 0)); @@ -1184,6 +1185,16 @@ static void build_piix4_pci_hotplug(Aml *table) aml_append(method, aml_return(aml_int(0))); aml_append(scope, method); + method = aml_method("AIDX", 2, AML_NOTSERIALIZED); + aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); + aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); + aml_append(method, + aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("PIDX"))); + aml_append(method, aml_store(aml_name("PIDX"), aml_local(0))); + aml_append(method, aml_release(aml_name("BLCK"))); + aml_append(method, aml_return(aml_local(0))); + aml_append(scope, method); + aml_append(table, scope); } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 0eadcdbc9e..ac9a24889c 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -79,6 +79,7 @@ static Property pci_props[] = { QEMU_PCIE_EXTCAP_INIT_BITNR, true), DEFINE_PROP_STRING("failover_pair_id", PCIDevice, failover_pair_id), + DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), DEFINE_PROP_END_OF_LIST() }; diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index dfd375820f..2dd90aea30 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -46,6 +46,7 @@ typedef struct AcpiPciHpPciStatus { typedef struct AcpiPciHpState { AcpiPciHpPciStatus acpi_pcihp_pci_status[ACPI_PCIHP_MAX_HOTPLUG_BUS]; uint32_t hotplug_select; + uint32_t acpi_index; PCIBus *root; MemoryRegion io; bool legacy_piix; @@ -71,13 +72,17 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off); extern const VMStateDescription vmstate_acpi_pcihp_pci_status; -#define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp) \ +bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id); + +#define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp, test_acpi_index) \ VMSTATE_UINT32_TEST(pcihp.hotplug_select, state, \ test_pcihp), \ VMSTATE_STRUCT_ARRAY_TEST(pcihp.acpi_pcihp_pci_status, state, \ ACPI_PCIHP_MAX_HOTPLUG_BUS, \ test_pcihp, 1, \ vmstate_acpi_pcihp_pci_status, \ - AcpiPciHpPciStatus) + AcpiPciHpPciStatus), \ + VMSTATE_UINT32_TEST(pcihp.acpi_index, state, \ + test_acpi_index) #endif diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 1bc231480f..6be4e0c460 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -359,6 +359,7 @@ struct PCIDevice { /* ID of standby device in net_failover pair */ char *failover_pair_id; + uint32_t acpi_index; }; void pci_register_bar(PCIDevice *pci_dev, int region_num, From 4fd7da4c0336c8fd822cd808d62f7ff8c9936aef Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 15 Mar 2021 14:00:59 -0400 Subject: [PATCH 11/19] pci: acpi: ensure that acpi-index is unique it helps to avoid device naming conflicts when guest OS is configured to use acpi-index for naming. Spec ialso says so: PCI Firmware Specification Revision 3.2 4.6.7. _DSM for Naming a PCI or PCI Express Device Under Operating Systems " Instance number must be unique under \_SB scope. This instance number does not have to be sequential in a given system configuration. " Signed-off-by: Igor Mammedov Message-Id: <20210315180102.3008391-4-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/pcihp.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index ceab287bd3..f4cb3c979d 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -52,6 +52,21 @@ typedef struct AcpiPciHpFind { PCIBus *bus; } AcpiPciHpFind; +static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data) +{ + return a - b; +} + +static GSequence *pci_acpi_index_list(void) +{ + static GSequence *used_acpi_index_list; + + if (!used_acpi_index_list) { + used_acpi_index_list = g_sequence_new(NULL); + } + return used_acpi_index_list; +} + static int acpi_pcihp_get_bsel(PCIBus *bus) { Error *local_err = NULL; @@ -277,6 +292,23 @@ void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, ONBOARD_INDEX_MAX); return; } + + /* + * make sure that acpi-index is unique across all present PCI devices + */ + if (pdev->acpi_index) { + GSequence *used_indexes = pci_acpi_index_list(); + + if (g_sequence_lookup(used_indexes, GINT_TO_POINTER(pdev->acpi_index), + g_cmp_uint32, NULL)) { + error_setg(errp, "a PCI device with acpi-index = %" PRIu32 + " already exist", pdev->acpi_index); + return; + } + g_sequence_insert_sorted(used_indexes, + GINT_TO_POINTER(pdev->acpi_index), + g_cmp_uint32, NULL); + } } void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, @@ -315,8 +347,22 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, DeviceState *dev, Error **errp) { + PCIDevice *pdev = PCI_DEVICE(dev); + trace_acpi_pci_unplug(PCI_SLOT(PCI_DEVICE(dev)->devfn), acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev)))); + + /* + * clean up acpi-index so it could reused by another device + */ + if (pdev->acpi_index) { + GSequence *used_indexes = pci_acpi_index_list(); + + g_sequence_remove(g_sequence_lookup(used_indexes, + GINT_TO_POINTER(pdev->acpi_index), + g_cmp_uint32, NULL)); + } + qdev_unrealize(dev); } From 910e4069710d854757c8fe8921dcff5b62dcd960 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 15 Mar 2021 14:01:00 -0400 Subject: [PATCH 12/19] acpi: add aml_to_decimalstring() and aml_call6() helpers it will be used by follow up patches Signed-off-by: Igor Mammedov Message-Id: <20210315180102.3008391-5-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/aml-build.c | 28 ++++++++++++++++++++++++++++ include/hw/acpi/aml-build.h | 3 +++ 2 files changed, 31 insertions(+) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index a2cd7a5830..d33ce8954a 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -634,6 +634,19 @@ Aml *aml_to_buffer(Aml *src, Aml *dst) return var; } +/* ACPI 2.0a: 17.2.4.4 Type 2 Opcodes Encoding: DefToDecimalString */ +Aml *aml_to_decimalstring(Aml *src, Aml *dst) +{ + Aml *var = aml_opcode(0x97 /* ToDecimalStringOp */); + aml_append(var, src); + if (dst) { + aml_append(var, dst); + } else { + build_append_byte(var->buf, 0x00 /* NullNameOp */); + } + return var; +} + /* ACPI 1.0b: 16.2.5.4 Type 2 Opcodes Encoding: DefStore */ Aml *aml_store(Aml *val, Aml *target) { @@ -835,6 +848,21 @@ Aml *aml_call5(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4, return var; } +/* helper to call method with 5 arguments */ +Aml *aml_call6(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4, + Aml *arg5, Aml *arg6) +{ + Aml *var = aml_alloc(); + build_append_namestring(var->buf, "%s", method); + aml_append(var, arg1); + aml_append(var, arg2); + aml_append(var, arg3); + aml_append(var, arg4); + aml_append(var, arg5); + aml_append(var, arg6); + return var; +} + /* * ACPI 5.0: 6.4.3.8.1 GPIO Connection Descriptor * Type 1, Large Item Name 0xC diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 380d3e3924..e652106e26 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -301,6 +301,7 @@ Aml *aml_arg(int pos); Aml *aml_to_integer(Aml *arg); Aml *aml_to_hexstring(Aml *src, Aml *dst); Aml *aml_to_buffer(Aml *src, Aml *dst); +Aml *aml_to_decimalstring(Aml *src, Aml *dst); Aml *aml_store(Aml *val, Aml *target); Aml *aml_and(Aml *arg1, Aml *arg2, Aml *dst); Aml *aml_or(Aml *arg1, Aml *arg2, Aml *dst); @@ -323,6 +324,8 @@ Aml *aml_call3(const char *method, Aml *arg1, Aml *arg2, Aml *arg3); Aml *aml_call4(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4); Aml *aml_call5(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4, Aml *arg5); +Aml *aml_call6(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4, + Aml *arg5, Aml *arg6); Aml *aml_gpio_int(AmlConsumerAndProducer con_and_pro, AmlLevelAndEdge edge_level, AmlActiveHighAndLow active_level, AmlShared shared, From b7f23f62e40bb7bc87fe170471a31ab1fb8a0784 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 15 Mar 2021 14:01:01 -0400 Subject: [PATCH 13/19] pci: acpi: add _DSM method to PCI devices Implement _DSM according to: PCI Firmware Specification 3.1 4.6.7. DSM for Naming a PCI or PCI Express Device Under Operating Systems and wire it up to cold and hot-plugged PCI devices. Feature depends on ACPI hotplug being enabled (as that provides PCI devices descriptions in ACPI and MMIO registers that are reused to fetch acpi-index). acpi-index should work for - cold plugged NICs: $QEMU -device e1000,acpi-index=100 => 'eno100' - hot-plugged (monitor) device_add e1000,acpi-index=200,id=remove_me => 'eno200' - re-plugged (monitor) device_del remove_me (monitor) device_add e1000,acpi-index=1 => 'eno1' Windows also sees index under "PCI Label Id" field in properties dialog but otherwise it doesn't seem to have any effect. Signed-off-by: Igor Mammedov Message-Id: <20210315180102.3008391-6-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 105 ++++++++++++++++++++++++++++++++++++++++-- include/hw/acpi/pci.h | 1 + 2 files changed, 103 insertions(+), 3 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index e49fae2bfd..a95b42c8b3 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -397,6 +397,13 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) ); aml_append(dev, method); + method = aml_method("_DSM", 4, AML_SERIALIZED); + aml_append(method, + aml_return(aml_call6("PDSM", aml_arg(0), aml_arg(1), + aml_arg(2), aml_arg(3), + aml_name("BSEL"), aml_name("_SUN"))) + ); + aml_append(dev, method); aml_append(parent_scope, dev); build_append_pcihp_notify_entry(notify_method, slot); @@ -424,6 +431,16 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, dev = aml_device("S%.02X", PCI_DEVFN(slot, 0)); aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); + if (bsel) { + aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); + method = aml_method("_DSM", 4, AML_SERIALIZED); + aml_append(method, aml_return( + aml_call6("PDSM", aml_arg(0), aml_arg(1), aml_arg(2), + aml_arg(3), aml_name("BSEL"), aml_name("_SUN")) + )); + aml_append(dev, method); + } + if (pc->class_id == PCI_CLASS_DISPLAY_VGA) { /* add VGA specific AML methods */ int s3d; @@ -446,9 +463,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, aml_append(method, aml_return(aml_int(s3d))); aml_append(dev, method); } else if (hotplug_enabled_dev) { - /* add _SUN/_EJ0 to make slot hotpluggable */ - aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); - + /* add _EJ0 to make slot hotpluggable */ method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); aml_append(method, aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) @@ -511,6 +526,88 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, qobject_unref(bsel); } +Aml *aml_pci_device_dsm(void) +{ + Aml *method, *UUID, *ifctx, *ifctx1, *ifctx2, *ifctx3, *elsectx; + Aml *acpi_index = aml_local(0); + Aml *zero = aml_int(0); + Aml *bnum = aml_arg(4); + Aml *func = aml_arg(2); + Aml *rev = aml_arg(1); + Aml *sun = aml_arg(5); + + method = aml_method("PDSM", 6, AML_SERIALIZED); + + /* + * PCI Firmware Specification 3.1 + * 4.6. _DSM Definitions for PCI + */ + UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); + ifctx = aml_if(aml_equal(aml_arg(0), UUID)); + { + aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sun), acpi_index)); + ifctx1 = aml_if(aml_equal(func, zero)); + { + uint8_t byte_list[1]; + + ifctx2 = aml_if(aml_equal(rev, aml_int(2))); + { + /* + * advertise function 7 if device has acpi-index + * acpi_index values: + * 0: not present (default value) + * FFFFFFFF: not supported (old QEMU without PIDX reg) + * other: device's acpi-index + */ + ifctx3 = aml_if(aml_lnot( + aml_or(aml_equal(acpi_index, zero), + aml_equal(acpi_index, aml_int(0xFFFFFFFF)), NULL) + )); + { + byte_list[0] = + 1 /* have supported functions */ | + 1 << 7 /* support for function 7 */ + ; + aml_append(ifctx3, aml_return(aml_buffer(1, byte_list))); + } + aml_append(ifctx2, ifctx3); + } + aml_append(ifctx1, ifctx2); + + byte_list[0] = 0; /* nothing supported */ + aml_append(ifctx1, aml_return(aml_buffer(1, byte_list))); + } + aml_append(ifctx, ifctx1); + elsectx = aml_else(); + /* + * PCI Firmware Specification 3.1 + * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under + * Operating Systems + */ + ifctx1 = aml_if(aml_equal(func, aml_int(7))); + { + Aml *pkg = aml_package(2); + Aml *ret = aml_local(1); + + aml_append(pkg, zero); + /* + * optional, if not impl. should return null string + */ + aml_append(pkg, aml_string("%s", "")); + aml_append(ifctx1, aml_store(pkg, ret)); + /* + * update acpi-index to actual value + */ + aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero))); + aml_append(ifctx1, aml_return(ret)); + } + aml_append(elsectx, ifctx1); + aml_append(ifctx, elsectx); + } + aml_append(method, ifctx); + return method; +} + /** * build_prt_entry: * @link_name: link name for PCI route entry @@ -1195,6 +1292,8 @@ static void build_piix4_pci_hotplug(Aml *table) aml_append(method, aml_return(aml_local(0))); aml_append(scope, method); + aml_append(scope, aml_pci_device_dsm()); + aml_append(table, scope); } diff --git a/include/hw/acpi/pci.h b/include/hw/acpi/pci.h index e514f179d8..b5deee0a9d 100644 --- a/include/hw/acpi/pci.h +++ b/include/hw/acpi/pci.h @@ -35,4 +35,5 @@ typedef struct AcpiMcfgInfo { void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info, const char *oem_id, const char *oem_table_id); +Aml *aml_pci_device_dsm(void); #endif From 835fde4a781f8abf230d567f759647c403944b57 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 15 Mar 2021 14:01:02 -0400 Subject: [PATCH 14/19] tests: acpi: update expected blobs expected changes are: * larger BNMR operation region * new PIDX field and method to fetch acpi-index * PDSM method that implements PCI device _DSM + per device _DSM that calls PDSM @@ -221,10 +221,11 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) B0EJ, 32 } - OperationRegion (BNMR, SystemIO, 0xAE10, 0x04) + OperationRegion (BNMR, SystemIO, 0xAE10, 0x08) Field (BNMR, DWordAcc, NoLock, WriteAsZeros) { - BNUM, 32 + BNUM, 32, + PIDX, 32 } Mutex (BLCK, 0x00) @@ -236,6 +237,52 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) Release (BLCK) Return (Zero) } + + Method (AIDX, 2, NotSerialized) + { + Acquire (BLCK, 0xFFFF) + BNUM = Arg0 + PIDX = (One << Arg1) + Local0 = PIDX /* \_SB_.PCI0.PIDX */ + Release (BLCK) + Return (Local0) + } + + Method (PDSM, 6, Serialized) + { + If ((Arg0 == ToUUID ("e5c937d0-3553-4d7a-9117-ea4d19c3434d") /* Device Labeling Interface */)) + { + Local0 = AIDX (Arg4, Arg5) + If ((Arg2 == Zero)) + { + If ((Arg1 == 0x02)) + { + If (!((Local0 == Zero) | (Local0 == 0xFFFFFFFF))) + { + Return (Buffer (One) + { + 0x81 // . + }) + } + } + + Return (Buffer (One) + { + 0x00 // . + }) + } + ElseIf ((Arg2 == 0x07)) + { + Local1 = Package (0x02) + { + Zero, + "" + } + Local1 [Zero] = Local0 + Return (Local1) + } + } + } } Scope (_SB) @@ -785,7 +832,7 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) 0xAE00, // Range Minimum 0xAE00, // Range Maximum 0x01, // Alignment - 0x14, // Length + 0x18, // Length ) }) } @@ -842,11 +889,22 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) Device (S00) { Name (_ADR, Zero) // _ADR: Address + Name (_SUN, Zero) // _SUN: Slot User Number + Method (_DSM, 4, Serialized) // _DSM: Device-Specific Method + { + Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN)) + } } Device (S10) { Name (_ADR, 0x00020000) // _ADR: Address + Name (_SUN, 0x02) // _SUN: Slot User Number + Method (_DSM, 4, Serialized) // _DSM: Device-Specific Method + { + Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN)) + } + Method (_S1D, 0, NotSerialized) // _S1D: S1 Device State { Return (Zero) [...] Signed-off-by: Igor Mammedov Message-Id: <20210315180102.3008391-7-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/pc/DSDT | Bin 5065 -> 6002 bytes tests/data/acpi/pc/DSDT.acpihmat | Bin 6390 -> 7327 bytes tests/data/acpi/pc/DSDT.bridge | Bin 6924 -> 8668 bytes tests/data/acpi/pc/DSDT.cphp | Bin 5529 -> 6466 bytes tests/data/acpi/pc/DSDT.dimmpxm | Bin 6719 -> 7656 bytes tests/data/acpi/pc/DSDT.hpbridge | Bin 5026 -> 5969 bytes tests/data/acpi/pc/DSDT.ipmikcs | Bin 5137 -> 6074 bytes tests/data/acpi/pc/DSDT.memhp | Bin 6424 -> 7361 bytes tests/data/acpi/pc/DSDT.nohpet | Bin 4923 -> 5860 bytes tests/data/acpi/pc/DSDT.numamem | Bin 5071 -> 6008 bytes tests/data/acpi/pc/DSDT.roothp | Bin 5261 -> 6210 bytes tests/qtest/bios-tables-test-allowed-diff.h | 11 ----------- 12 files changed, 11 deletions(-) diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/pc/DSDT index 11ef89bd322271ee30f3971b880dadfda565d413..b9dd9b38e4ef720636ba19ccbdf262de8a6439d5 100644 GIT binary patch delta 1301 zcmajfKTE?<5CHH?>Yo~-P1puW8^!=sfM~y4g)ZgSKKwr>LO{d zYeBzxD9F0DuE=^;8_%TFO)~C_Rix=;D`m|IIjyWUn@*mVojs-ilsGilW`q_!TT**6 zs-X?I>28u2L!9t5|M+6e?Q6&3H*Mrj(Hz>Vv}3yyqzLW^DR8VCIyoRV5Swqd4tS!E zIivw}H`Ka*lnJ1MqTn-N*Ma!^GqZm6h6-WyqGWSj#A>xL-Dw(qLAj zMm%o$t)#jRe#^+}Acd{gT)ao8%NL5<)X{=jUF|XwZOa3&f1YDIJ&;=c5-NB=lNnWL zXS4Lmtjb+P>Yp0x^!OJnV4#SBQw-c-;2i@)1IQLh43sc%hJjlQd|+TWfNY^)U>5`D v7`Vs4Ck93ZWQ!#XI2gFVzyk)pFfb+}TP$OsjDbrGG{k7&-$zh?6`z?OB0Yu? delta 345 zcmeyQcT%0pCDmqyI*(Jxoj<9FtEldGoSFH*!1q`35;V`GxvUR%O=P{Dhf@ zlTl>yN1jY0*MgY%V5fKi-~0e)PXi98U{@c8=qB!91A};uct@8YAV(|M5X#|UU|?dH ze2Q0=QFHQ3UJd5xCe6vZd=R=2N^gbIKcTcOKSaI}N*{&NoB|MWUno5hN?(Q2l7bNV zSSY;^Niz1tEIsTJI1M z;R^c&qHCv$uugr2h%P}yL|x5So&uS77=|~$H_Tk#ccQVtNvgEghyXCp7u*%gDn8O; zGk%v=^9qW*Dhf(o9?zwvbu#XluJTjKPReRgAuDFuZL>(r<{nc5N=zCIZGskUgO~bq zk}7+wtdkUd0nYn4fB&M+?aN1@S9#<{lug>JvSnCNQU+^;lsMK393Kz^h~aK`4%j{i zQ=`2#b!=S$6bP&%7Rvp zsqwh((v#9ExO6qI03XsjbGdCgU9nW&q>dJ}`bwXg=vp@b`{y~f*8{%OFCl>qG?|gS z-E5X#X?3BGB>j_-UXOo41p_A-xW>R62EqYkND%|u7&yhiEe75(Fv=raq%dG&;0yzI w82G?IB#3OWgn=CloMWJcflmyK4I^8mF|dn)1_mB5@HHG8`u7oB218HG4_Q%&Gynhq delta 345 zcmbPl`OT2aCD5fk8Y+yrWAHkfRlB2<7lFFfcJp z{w^fTs5x0oSc5sbNpo@(l%5BrA3uWnZzLCZcw@l zN}q$$LgEnlFep6>O5cOhN)izHG$_3aO232BMv@TuGAO+ZO8PErv0HYv@`$E9vC F0szp9V*vmF diff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/pc/DSDT.bridge index a234075518fa8e187349d64c313779cc25db8299..a9b4d5659457f6de30b993962bce673c9413d81d 100644 GIT binary patch delta 2375 zcmeA%yW`B|66_LkN0EVn@y153Jxok~f|E}$c?)qwHwrlU`35;V`Gxu_1bDhcOjcml zs~6FB1TvYTm7RQ?z5o9&$N)(fR5E5VfRq$~h=gc$kmQmC5f`=qmtbGM1wO2kGX#aX z1TL7Ld>U+8Mk$u^4C=Gt@91WedKNWpDF3fM#mSYb#= zK@1@U?SvHk=Of^TyM#>O5F=nh13v);afD3hB&2{*fPfYE37NnpPQV0HLJATHDd;Ao zU>hL?4+$yYks#m;b3zK%3lcD)myiiN2q`cSB47odq$WRF3AMReh?9v4n6XE*C9sqR XhQnyKgoMLrwuFSk0A))($$N|dt?#vOym?uo8@ZkQe1n{w{6c*vt1|0te!|Sd z$tW`UBTuH0$CH@&V5fKihbIBfo(3FF!LB|G(M{aJ1_tpQ@s2J*K#o?hA(X?zz`(>X z`3$eDq<>kkp#?+*P>`7ctdTLgNoBGkpE7fFlg8u(K6N0iIe9Uk2AF;hrM3AX{Cp_A z8A^YL($)eH`FbdQ7)rAXLd3nHbU&2745h_|Ao9^rdOnnX45ihDA@bQ!dOei>45iIQ uAoA5vdOwtA7KMnrL+NfPeI7~+i$UbW#WWYs5MyH0oIH$uxS3tyDkA`KW3MOx diff --git a/tests/data/acpi/pc/DSDT.cphp b/tests/data/acpi/pc/DSDT.cphp index 6ac47a7d1001c711b957f8e28cab1a143d2cf65f..8d86155e275aa688f8767dd92c4b9df08b4a18ad 100644 GIT binary patch delta 1301 zcmajfPfG$(5CHJmRZAArT~{%oL(ri_2sI*z2wOMkkGqJQ5NxpPA#$x?uLU7`icZyw zsM`wr1;R^E5!Iz`eSj`OMM&SleB~*Kd52+m^LxY0<(((*Gn}l+D^eVQIk@1iSymOu zBfA{->vdjIm10g(OUiIzQC=m(p7B~FIoC~DeMHLVmOCB0Ld*6JV*w>L4W>Ryi>?`w z`wFtA1f9H(R0NUZpQoPQwaH!ODArcu4>`rAy|%VZXM|M2i4%)s9r1XNm_W=xr+dH^ zJWQPiXvUlICz##^%Fz|r^fk(EO!N3Lvjxlh9flmyG0U5&H8Hv@XS0<{JoRhA995 delta 345 zcmX?PG*g?)CDrP7Q%eJ6|N=F2wBTK$O1Uo3)NE$a8dda&hJf?77a}@g5#xhaysFaw8<3c z#kiJ87+zyS&Vbj@=2Z|QhC7$fVbkUEg*E0_LNCtLXgz<{2S9#3PpTD&ZoeKG9AKAT zneGUFBGu4Cp|iDGiC%QUB?ewG&>$jD3}7IOfg=oDVc;DDjZMfFLl{`bz%d4{G4O$b yKr^z%Fa}mIaDssw418fA6hXEa#lR{C&Mc({V$fHZ`=5fk8Y+yrWAHkfRlB2<7lFFfcJp zb`X(e)SMhIqQM;9q&ayplztASwM8M~`A~W@l>QE-t;Hbn^-%gSlx7!)h4H0*T(%n${ytL+K0ht?& E09A8hhX4Qo diff --git a/tests/data/acpi/pc/DSDT.hpbridge b/tests/data/acpi/pc/DSDT.hpbridge index 9dfac45eab12b680bc963d0528553a7149a378cc..5d8ba195055f2eda74223323baeb88390ea36739 100644 GIT binary patch delta 1315 zcmajf&o9GZ7zgmDZ~d62SZ6X42Z_imi)ACik**?GYxW|NO4{AFA?X#7Wpi=my;IVN zh)BfM&4r5)#M!~c-yo)M>?Gmo;mwmipMKxxx%4>tn2hl^w+LO#l#Au5!xf9FR&eeho1RVvR63afvl^qx zq!q9xv>XI1x@bT+V)=8)9-ppIEUj|K8)kK;fu`zK1|UCPB7c1l_VZF2c)*JxZMCX`>)@^)xGj8 Fd;_9>h+F^w delta 354 zcmcbpw@97KCDyN1jY0zucJkV5fKix7+|{PXi98U{@c8=qB!91A};uct@8YAV(|M5X#|UU|?dH ze2Q0Ah(FlS0-^vY#tc-XIa!iVgBe7}Lg|H2`YDvwk2(eNzQg0e`@P3|hp!duE()5VZFeI8O!r<0aBMzIBrYrS zWp3u+b4EOF z@m5OP0^Tx;c~ByjznAFI?JCvU0d=%sHa7#t(w`Lp*k3QOgBlcn-jW6$&}3O7dN9PQ z0a6OarUtc`O&^?L-~j{g7~n(54l5Yg#lQsy9x?EN0bvZeB7p%L1D6!Vd-n=Z)joeOtzCq4TexbgTbD8xv>#*=} zGKx&LQ5)9r+>htx)IBqI0z0Zg0vMWiijo={L_SD6od##5wU3%vMNZilar`V z#4&|TZqi9qq`2tjR}fT0{0_BGI|*`cIF5I}cib)SBGZ`QRYP5A^Z_u5EB=;cb%8vx zOHs}|h{(FOtjPMR)}LQc*GPY0v?7kowo=v{kaNmXv*{LT+1+Ivpv0xYHV0|Zw?(xx zuNqp9m+K%!A;w3Zvrliv_?~tmeQL=EMRRGd?H$`2ASLjU#Nk;lcDhe&AarB3a))v0PqWSJz(}Q334eomHFDbWyn@!rj}P0Qk-ne<-n{C z8>y7VS+nXgaF(&EgD6?8xk5gjuIQA?)X{=jT?`uOwhaNWzn*9Rdmy&@rB(2OCK)x- z&c^7KS(SriBRbUg-{T)}gn?@eyka06LuQ!7z$ONcF>r%{cMJ$3vc(hzwlHvtfjbO* vU?3JpwwT6%gMl*)++*Mq1MyyDix~`*F>sE71_r(`AW6gFKOaFoE4?t^B&CLx delta 344 zcmX?TIm3v{CD#vOym?uo8@ZkQe1n{w{6c*vt1|0te!?8h z$0#z{K`7J6wIC)w*ePDXH$TAH(}2S%*wu$2x`{j3z#yI@-q9rp$k7TmgmQQo7?>C) zOA5;}YEHHg)?kiq(wtlar4K-977>WJ2bAuC(ifn#h$uup0!q(;(hs1tiWo#b14^%f z(jTC-i8w^Q0!r_J(32z}!Y)v{14^HP(gKnY`4A{Q14`e4(h5=#`4lLU+8Ff0^bAUZjV zi@iXIm5G6uA+aE#Bawk&Nuq$CS$wclyujuYOe~y?5|htzXV!Zxi~)-|EDUh=G~jRw zcJ*P1Zqf`kFo@@fcXSD2;D`?n^uW8JStxAax*#=q4|gU_+={9tH*` zhQDP&P9ut!X$7CxeZ(f$@Ms6oR-ymlvzfj-F)0y-(hcWYT zGKx%2=E*d2Er^K^c8VA9%@1()G~jRwcJ*P1ZsHC$Fo@@fcXSB?aHJGEDG$+r8(vP9EIv+$l8%nQ-(x0KUIX^_c8cOen(#!%7ad#-)4W-XR zX<X=f3Ld^?mrEuy)ZNAw0G E0LKMiz5oCK diff --git a/tests/data/acpi/pc/DSDT.numamem b/tests/data/acpi/pc/DSDT.numamem index 195f8da900c5fc56c504adfef756af8f74f5823d..8632dfe8a8bdd991871a1e633162eb9a2e1497ea 100644 GIT binary patch delta 1291 zcmajePfG$(5CHJmXO?aLTQjVXkUE$IqCs@6b)!so5jP>&VAq>k!4eS`gygAfy<0J!!Ae?^EQ&|8uV zd9;I|sK~{PsFdVbAuFxYm@C-`Pt7|i(I&)vCfDhh6;?KPxhgPXvS4U&RC0Dv?2`h!S?Lki#oF>A4Oi}k?V|XvRNCOhBZNJV2#i!PprY?eQE$TydCF(Ex5P? z7N8ZehG;Fs_n9F#0Uwl2yi^2_u9g%CM|5W|ug$irRBP+Zv4YlI?orc!)(b#>Jx{tdgq?Y532b1=v_uDd zej@hJQQu^=TZ`6ozybzL44h!#1_SRHi29K$7BR4efm00JV&DSw1Ct_3mi!A|i4zWD*po(3FF!LB|G(M{aJ1_tpQ@s2J*K#o?hA(X?zz`(>X z`2w#jqvquIyc*2WO`4O9`5<&Tl->=c|3hhKeu#WKls*lm`2`^2!BBcSl)eq6QB+?S&!o%~1L{l;#$Ji2Fn7$x!+_l$I8S$j6InZeAdI GgAo8NGGr|P diff --git a/tests/data/acpi/pc/DSDT.roothp b/tests/data/acpi/pc/DSDT.roothp index 1d0a2c2f3cc4bfac75948d2ed6a69403cd18379b..cee3b4d80b51ad30153953ace46127923ce8b271 100644 GIT binary patch delta 1279 zcmeCxJY>M-66_MP562flQ`oWhWnJ@BjY`GCR!(lh0_DkB3=1SDXL2zu5S~;!Ie}qv0?&USU|1r^ z%-9Gb7#4~z5S^SQ#Km49#LC3L%aB-*(2>Z%uq1J_A2Sapqr~KBo=h%}busb5PVoW` z>n5M$v1jsIF`1uNjL~MY8n3d9hUJd*T77|i$hLD2qgcK<85paVqAq5qL6f7pB;2a?ZKM5&N;V0k=e?khX2q{=f zNWld{3Vst(pe8`T7lDKn)DTjzoRES`gcSTGq(DQEfG>gxDX1f)U?m|1R|qNiPe_55 T5CLC=3Tg7A<^Ij3LJ~{>th0qv delta 353 zcmX?P(5uPi66_MvE5g9QSh|sG4-=CI$K(@C-n=Z)joeOtzCq4TexbgTRhjiRKVjzK zWE7eFktdVOvnD1!*ePDXv1W1#uf3#yS+Jo+JV(5vOArqO12Y4MQ?RQKLv)kM|#jWZ(b* diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index fddcfc061f..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,12 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/pc/DSDT", -"tests/data/acpi/pc/DSDT.acpihmat", -"tests/data/acpi/pc/DSDT.bridge", -"tests/data/acpi/pc/DSDT.cphp", -"tests/data/acpi/pc/DSDT.dimmpxm", -"tests/data/acpi/pc/DSDT.hpbridge", -"tests/data/acpi/pc/DSDT.hpbrroot", -"tests/data/acpi/pc/DSDT.ipmikcs", -"tests/data/acpi/pc/DSDT.memhp", -"tests/data/acpi/pc/DSDT.numamem", -"tests/data/acpi/pc/DSDT.roothp", From 6c2b24d1d2b19cd330d971cdbc8e6b115dc97ca4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 4 Mar 2021 11:55:51 +0100 Subject: [PATCH 15/19] acpi: Set proper maximum size for "etc/table-loader" blob The resizeable memory region / RAMBlock that is created for the cmd blob has a maximum size of whole host pages (e.g., 4k), because RAMBlocks work on full host pages. In addition, in i386 ACPI code: acpi_align_size(tables->linker->cmd_blob, ACPI_BUILD_ALIGN_SIZE); makes sure to align to multiples of 4k, padding with 0. For example, if our cmd_blob is created with a size of 2k, the maximum size is 4k - we cannot grow beyond that. Growing might be required due to guest action when rebuilding the tables, but also on incoming migration. This automatic generation of the maximum size used to be sufficient, however, there are cases where we cross host pages now when growing at runtime: we exceed the maximum size of the RAMBlock and can crash QEMU when trying to resize the resizeable memory region / RAMBlock: $ build/qemu-system-x86_64 --enable-kvm \ -machine q35,nvdimm=on \ -smp 1 \ -cpu host \ -m size=2G,slots=8,maxmem=4G \ -object memory-backend-file,id=mem0,mem-path=/tmp/nvdimm,size=256M \ -device nvdimm,label-size=131072,memdev=mem0,id=nvdimm0,slot=1 \ -nodefaults \ -device vmgenid \ -device intel-iommu Results in: Unexpected error in qemu_ram_resize() at ../softmmu/physmem.c:1850: qemu-system-x86_64: Size too large: /rom@etc/table-loader: 0x2000 > 0x1000: Invalid argument In this configuration, we consume exactly 4k (32 entries, 128 bytes each) when creating the VM. However, once the guest boots up and maps the MCFG, we also create the MCFG table and end up consuming 2 additional entries (pointer + checksum) -- which is where we try resizing the memory region / RAMBlock, however, the maximum size does not allow for it. Currently, we get the following maximum sizes for our different mutable tables based on behavior of resizeable RAMBlock: hw table max_size ------- --------------------------------------------------------- virt "etc/acpi/tables" ACPI_BUILD_TABLE_MAX_SIZE (0x200000) virt "etc/table-loader" HOST_PAGE_ALIGN(initial_size) virt "etc/acpi/rsdp" HOST_PAGE_ALIGN(initial_size) i386 "etc/acpi/tables" ACPI_BUILD_TABLE_MAX_SIZE (0x200000) i386 "etc/table-loader" HOST_PAGE_ALIGN(initial_size) i386 "etc/acpi/rsdp" HOST_PAGE_ALIGN(initial_size) microvm "etc/acpi/tables" ACPI_BUILD_TABLE_MAX_SIZE (0x200000) microvm "etc/table-loader" HOST_PAGE_ALIGN(initial_size) microvm "etc/acpi/rsdp" HOST_PAGE_ALIGN(initial_size) Let's set the maximum table size for "etc/table-loader" to 64k, so we can properly grow at runtime, which should be good enough for the future. Migration is not concerned with the maximum size of a RAMBlock, only with the used size - so existing setups are not affected. Of course, we cannot migrate a VM that would have crash when started on older QEMU from new QEMU to older QEMU without failing early on the destination when synchronizing the RAM state: qemu-system-x86_64: Size too large: /rom@etc/table-loader: 0x2000 > 0x1000: Invalid argument qemu-system-x86_64: error while loading state for instance 0x0 of device 'ram' qemu-system-x86_64: load of migration failed: Invalid argument We'll refactor the code next, to make sure we get rid of this implicit behavior for "etc/acpi/rsdp" as well and to make the code easier to grasp. Reviewed-by: Igor Mammedov Cc: Alistair Francis Cc: Paolo Bonzini Cc: "Michael S. Tsirkin" Cc: Igor Mammedov Cc: Peter Maydell Cc: Shannon Zhao Cc: Marcel Apfelbaum Cc: Paolo Bonzini Cc: Richard Henderson Cc: Laszlo Ersek Signed-off-by: David Hildenbrand Message-Id: <20210304105554.121674-2-david@redhat.com> Reviewed-by: Laszlo Ersek Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/virt-acpi-build.c | 3 ++- hw/i386/acpi-build.c | 3 ++- hw/i386/acpi-microvm.c | 2 +- include/hw/acpi/aml-build.h | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index f9c9df916c..a91550de6f 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -865,7 +865,8 @@ void virt_acpi_setup(VirtMachineState *vms) build_state->linker_mr = acpi_add_rom_blob(virt_acpi_build_update, build_state, - tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE, 0); + tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE, + ACPI_BUILD_LOADER_MAX_SIZE); fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, acpi_data_len(tables.tcpalog)); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a95b42c8b3..dc56006353 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2634,7 +2634,8 @@ void acpi_setup(void) build_state->linker_mr = acpi_add_rom_blob(acpi_build_update, build_state, - tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE, 0); + tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE, + ACPI_BUILD_LOADER_MAX_SIZE); fw_cfg_add_file(x86ms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, acpi_data_len(tables.tcpalog)); diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c index 54b3af478a..01f1945ac1 100644 --- a/hw/i386/acpi-microvm.c +++ b/hw/i386/acpi-microvm.c @@ -255,7 +255,7 @@ void acpi_setup_microvm(MicrovmMachineState *mms) ACPI_BUILD_TABLE_MAX_SIZE); acpi_add_rom_blob(acpi_build_no_update, NULL, tables.linker->cmd_blob, - "etc/table-loader", 0); + "etc/table-loader", ACPI_BUILD_LOADER_MAX_SIZE); acpi_add_rom_blob(acpi_build_no_update, NULL, tables.rsdp, ACPI_BUILD_RSDP_FILE, 0); diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index e652106e26..ca781f3531 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -6,6 +6,7 @@ /* Reserve RAM space for tables: add another order of magnitude. */ #define ACPI_BUILD_TABLE_MAX_SIZE 0x200000 +#define ACPI_BUILD_LOADER_MAX_SIZE 0x10000 #define ACPI_BUILD_APPNAME6 "BOCHS " #define ACPI_BUILD_APPNAME8 "BXPC " From 2a3bdc5cec1a16fb731661d2eac896284f691e1f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 4 Mar 2021 11:55:52 +0100 Subject: [PATCH 16/19] microvm: Don't open-code "etc/table-loader" Let's just reuse ACPI_BUILD_LOADER_FILE. Cc: Alistair Francis Cc: Paolo Bonzini Cc: "Michael S. Tsirkin" Cc: Igor Mammedov Cc: Peter Maydell Cc: Shannon Zhao Cc: Marcel Apfelbaum Cc: Paolo Bonzini Cc: Richard Henderson Cc: Laszlo Ersek Signed-off-by: David Hildenbrand Message-Id: <20210304105554.121674-3-david@redhat.com> Reviewed-by: Laszlo Ersek Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-microvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c index 01f1945ac1..502aac0ba2 100644 --- a/hw/i386/acpi-microvm.c +++ b/hw/i386/acpi-microvm.c @@ -255,7 +255,7 @@ void acpi_setup_microvm(MicrovmMachineState *mms) ACPI_BUILD_TABLE_MAX_SIZE); acpi_add_rom_blob(acpi_build_no_update, NULL, tables.linker->cmd_blob, - "etc/table-loader", ACPI_BUILD_LOADER_MAX_SIZE); + ACPI_BUILD_LOADER_FILE, ACPI_BUILD_LOADER_MAX_SIZE); acpi_add_rom_blob(acpi_build_no_update, NULL, tables.rsdp, ACPI_BUILD_RSDP_FILE, 0); From 6930ba0d44b2f4420948aec5209f665385411f7f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 4 Mar 2021 11:55:53 +0100 Subject: [PATCH 17/19] acpi: Move maximum size logic into acpi_add_rom_blob() We want to have safety margins for all tables based on the table type. Let's move the maximum size logic into acpi_add_rom_blob() and make it dependent on the table name, so we don't have to replicate for each and every instance that creates such tables. Suggested-by: Laszlo Ersek Cc: Alistair Francis Cc: Paolo Bonzini Cc: "Michael S. Tsirkin" Cc: Igor Mammedov Cc: Peter Maydell Cc: Shannon Zhao Cc: Marcel Apfelbaum Cc: Paolo Bonzini Cc: Richard Henderson Cc: Laszlo Ersek Signed-off-by: David Hildenbrand Message-Id: <20210304105554.121674-4-david@redhat.com> Reviewed-by: Laszlo Ersek Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/utils.c | 12 ++++++++++-- hw/arm/virt-acpi-build.c | 13 ++++++------- hw/i386/acpi-build.c | 8 +++----- hw/i386/acpi-microvm.c | 16 ++++++---------- include/hw/acpi/aml-build.h | 4 ---- include/hw/acpi/utils.h | 3 +-- 6 files changed, 26 insertions(+), 30 deletions(-) diff --git a/hw/acpi/utils.c b/hw/acpi/utils.c index a134a4d554..f2d69a6d92 100644 --- a/hw/acpi/utils.c +++ b/hw/acpi/utils.c @@ -27,9 +27,17 @@ #include "hw/loader.h" MemoryRegion *acpi_add_rom_blob(FWCfgCallback update, void *opaque, - GArray *blob, const char *name, - uint64_t max_size) + GArray *blob, const char *name) { + uint64_t max_size = 0; + + /* Reserve RAM space for tables: add another order of magnitude. */ + if (!strcmp(name, ACPI_BUILD_TABLE_FILE)) { + max_size = 0x200000; + } else if (!strcmp(name, ACPI_BUILD_LOADER_FILE)) { + max_size = 0x10000; + } + return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1, name, update, opaque, NULL, true); } diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index a91550de6f..f5a2b2d4cb 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -859,14 +859,13 @@ void virt_acpi_setup(VirtMachineState *vms) /* Now expose it all to Guest */ build_state->table_mr = acpi_add_rom_blob(virt_acpi_build_update, build_state, tables.table_data, - ACPI_BUILD_TABLE_FILE, - ACPI_BUILD_TABLE_MAX_SIZE); + ACPI_BUILD_TABLE_FILE); assert(build_state->table_mr != NULL); - build_state->linker_mr = - acpi_add_rom_blob(virt_acpi_build_update, build_state, - tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE, - ACPI_BUILD_LOADER_MAX_SIZE); + build_state->linker_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, + tables.linker->cmd_blob, + ACPI_BUILD_LOADER_FILE); fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, acpi_data_len(tables.tcpalog)); @@ -880,7 +879,7 @@ void virt_acpi_setup(VirtMachineState *vms) build_state->rsdp_mr = acpi_add_rom_blob(virt_acpi_build_update, build_state, tables.rsdp, - ACPI_BUILD_RSDP_FILE, 0); + ACPI_BUILD_RSDP_FILE); qemu_register_reset(virt_acpi_build_reset, build_state); virt_acpi_build_reset(build_state); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index dc56006353..3aeae15e57 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2628,14 +2628,12 @@ void acpi_setup(void) /* Now expose it all to Guest */ build_state->table_mr = acpi_add_rom_blob(acpi_build_update, build_state, tables.table_data, - ACPI_BUILD_TABLE_FILE, - ACPI_BUILD_TABLE_MAX_SIZE); + ACPI_BUILD_TABLE_FILE); assert(build_state->table_mr != NULL); build_state->linker_mr = acpi_add_rom_blob(acpi_build_update, build_state, - tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE, - ACPI_BUILD_LOADER_MAX_SIZE); + tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE); fw_cfg_add_file(x86ms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, acpi_data_len(tables.tcpalog)); @@ -2674,7 +2672,7 @@ void acpi_setup(void) build_state->rsdp = NULL; build_state->rsdp_mr = acpi_add_rom_blob(acpi_build_update, build_state, tables.rsdp, - ACPI_BUILD_RSDP_FILE, 0); + ACPI_BUILD_RSDP_FILE); } qemu_register_reset(acpi_build_reset, build_state); diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c index 502aac0ba2..271710eb92 100644 --- a/hw/i386/acpi-microvm.c +++ b/hw/i386/acpi-microvm.c @@ -249,16 +249,12 @@ void acpi_setup_microvm(MicrovmMachineState *mms) acpi_build_microvm(&tables, mms); /* Now expose it all to Guest */ - acpi_add_rom_blob(acpi_build_no_update, NULL, - tables.table_data, - ACPI_BUILD_TABLE_FILE, - ACPI_BUILD_TABLE_MAX_SIZE); - acpi_add_rom_blob(acpi_build_no_update, NULL, - tables.linker->cmd_blob, - ACPI_BUILD_LOADER_FILE, ACPI_BUILD_LOADER_MAX_SIZE); - acpi_add_rom_blob(acpi_build_no_update, NULL, - tables.rsdp, - ACPI_BUILD_RSDP_FILE, 0); + acpi_add_rom_blob(acpi_build_no_update, NULL, tables.table_data, + ACPI_BUILD_TABLE_FILE); + acpi_add_rom_blob(acpi_build_no_update, NULL, tables.linker->cmd_blob, + ACPI_BUILD_LOADER_FILE); + acpi_add_rom_blob(acpi_build_no_update, NULL, tables.rsdp, + ACPI_BUILD_RSDP_FILE); acpi_build_tables_cleanup(&tables, false); } diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index ca781f3531..471266d739 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -4,10 +4,6 @@ #include "hw/acpi/acpi-defs.h" #include "hw/acpi/bios-linker-loader.h" -/* Reserve RAM space for tables: add another order of magnitude. */ -#define ACPI_BUILD_TABLE_MAX_SIZE 0x200000 -#define ACPI_BUILD_LOADER_MAX_SIZE 0x10000 - #define ACPI_BUILD_APPNAME6 "BOCHS " #define ACPI_BUILD_APPNAME8 "BXPC " diff --git a/include/hw/acpi/utils.h b/include/hw/acpi/utils.h index 140b4de603..0022df027d 100644 --- a/include/hw/acpi/utils.h +++ b/include/hw/acpi/utils.h @@ -4,6 +4,5 @@ #include "hw/nvram/fw_cfg.h" MemoryRegion *acpi_add_rom_blob(FWCfgCallback update, void *opaque, - GArray *blob, const char *name, - uint64_t max_size); + GArray *blob, const char *name); #endif From 50337286b796f3866d1880f6e8a895fa5853b543 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 4 Mar 2021 11:55:54 +0100 Subject: [PATCH 18/19] acpi: Set proper maximum size for "etc/acpi/rsdp" blob Let's also set a maximum size for "etc/acpi/rsdp", so the maximum size doesn't get implicitly set based on the initial table size. In my experiments, the table size was in the range of 22 bytes, so a single page (== what we used until now) seems to be good enough. Now that we have defined maximum sizes for all currently used table types, let's assert that we catch usage with new tables that need a proper maximum size definition. Also assert that our initial size does not exceed the maximum size; while qemu_ram_alloc_internal() properly asserts that the initial RAMBlock size is <= its maximum size, the result might differ when the host page size is bigger than 4k. Suggested-by: Laszlo Ersek Cc: Alistair Francis Cc: Paolo Bonzini Cc: "Michael S. Tsirkin" Cc: Igor Mammedov Cc: Peter Maydell Cc: Shannon Zhao Cc: Marcel Apfelbaum Cc: Paolo Bonzini Cc: Richard Henderson Cc: Laszlo Ersek Signed-off-by: David Hildenbrand Message-Id: <20210304105554.121674-5-david@redhat.com> Reviewed-by: Laszlo Ersek Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/utils.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/acpi/utils.c b/hw/acpi/utils.c index f2d69a6d92..0c486ea29f 100644 --- a/hw/acpi/utils.c +++ b/hw/acpi/utils.c @@ -29,14 +29,19 @@ MemoryRegion *acpi_add_rom_blob(FWCfgCallback update, void *opaque, GArray *blob, const char *name) { - uint64_t max_size = 0; + uint64_t max_size; /* Reserve RAM space for tables: add another order of magnitude. */ if (!strcmp(name, ACPI_BUILD_TABLE_FILE)) { max_size = 0x200000; } else if (!strcmp(name, ACPI_BUILD_LOADER_FILE)) { max_size = 0x10000; + } else if (!strcmp(name, ACPI_BUILD_RSDP_FILE)) { + max_size = 0x1000; + } else { + g_assert_not_reached(); } + g_assert(acpi_data_len(blob) <= max_size); return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1, name, update, opaque, NULL, true); From d07b22863b8e0981bdc9384a787a703f1fd4ba42 Mon Sep 17 00:00:00 2001 From: Marian Postevca Date: Sun, 21 Feb 2021 02:17:36 +0200 Subject: [PATCH 19/19] acpi: Move setters/getters of oem fields to X86MachineState The code that sets/gets oem fields is duplicated in both PC and MICROVM variants. This commit moves it to X86MachineState so that all x86 variants can use it and duplication is removed. Signed-off-by: Marian Postevca Message-Id: <20210221001737.24499-2-posteuca@mutex.one> Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 48 ++++++++++++++-------------- hw/i386/acpi-microvm.c | 16 +++++----- hw/i386/microvm.c | 66 --------------------------------------- hw/i386/pc.c | 63 ------------------------------------- hw/i386/x86.c | 64 +++++++++++++++++++++++++++++++++++++ include/hw/i386/microvm.h | 4 --- include/hw/i386/pc.h | 4 --- include/hw/i386/x86.h | 4 +++ 8 files changed, 100 insertions(+), 169 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 3aeae15e57..de98750aef 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1807,7 +1807,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); build_header(linker, table_data, (void *)(table_data->data + table_data->len - dsdt->buf->len), - "DSDT", dsdt->buf->len, 1, pcms->oem_id, pcms->oem_table_id); + "DSDT", dsdt->buf->len, 1, x86ms->oem_id, x86ms->oem_table_id); free_aml_allocator(); } @@ -1984,8 +1984,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) build_header(linker, table_data, (void *)(table_data->data + srat_start), "SRAT", - table_data->len - srat_start, 1, pcms->oem_id, - pcms->oem_table_id); + table_data->len - srat_start, 1, x86ms->oem_id, + x86ms->oem_table_id); } /* @@ -2338,13 +2338,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) if (slic_oem.id) { oem_id = slic_oem.id; } else { - oem_id = pcms->oem_id; + oem_id = x86ms->oem_id; } if (slic_oem.table_id) { oem_table_id = slic_oem.table_id; } else { - oem_table_id = pcms->oem_table_id; + oem_table_id = x86ms->oem_table_id; } table_offsets = g_array_new(false, true /* clear */, @@ -2385,30 +2385,30 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); acpi_build_madt(tables_blob, tables->linker, x86ms, - ACPI_DEVICE_IF(x86ms->acpi_dev), pcms->oem_id, - pcms->oem_table_id); + ACPI_DEVICE_IF(x86ms->acpi_dev), x86ms->oem_id, + x86ms->oem_table_id); vmgenid_dev = find_vmgenid_dev(); if (vmgenid_dev) { acpi_add_table(table_offsets, tables_blob); vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob, - tables->vmgenid, tables->linker, pcms->oem_id); + tables->vmgenid, tables->linker, x86ms->oem_id); } if (misc.has_hpet) { acpi_add_table(table_offsets, tables_blob); - build_hpet(tables_blob, tables->linker, pcms->oem_id, - pcms->oem_table_id); + build_hpet(tables_blob, tables->linker, x86ms->oem_id, + x86ms->oem_table_id); } if (misc.tpm_version != TPM_VERSION_UNSPEC) { if (misc.tpm_version == TPM_VERSION_1_2) { acpi_add_table(table_offsets, tables_blob); build_tpm_tcpa(tables_blob, tables->linker, tables->tcpalog, - pcms->oem_id, pcms->oem_table_id); + x86ms->oem_id, x86ms->oem_table_id); } else { /* TPM_VERSION_2_0 */ acpi_add_table(table_offsets, tables_blob); build_tpm2(tables_blob, tables->linker, tables->tcpalog, - pcms->oem_id, pcms->oem_table_id); + x86ms->oem_id, x86ms->oem_table_id); } } if (pcms->numa_nodes) { @@ -2416,40 +2416,40 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) build_srat(tables_blob, tables->linker, machine); if (machine->numa_state->have_numa_distance) { acpi_add_table(table_offsets, tables_blob); - build_slit(tables_blob, tables->linker, machine, pcms->oem_id, - pcms->oem_table_id); + build_slit(tables_blob, tables->linker, machine, x86ms->oem_id, + x86ms->oem_table_id); } if (machine->numa_state->hmat_enabled) { acpi_add_table(table_offsets, tables_blob); build_hmat(tables_blob, tables->linker, machine->numa_state, - pcms->oem_id, pcms->oem_table_id); + x86ms->oem_id, x86ms->oem_table_id); } } if (acpi_get_mcfg(&mcfg)) { acpi_add_table(table_offsets, tables_blob); - build_mcfg(tables_blob, tables->linker, &mcfg, pcms->oem_id, - pcms->oem_table_id); + build_mcfg(tables_blob, tables->linker, &mcfg, x86ms->oem_id, + x86ms->oem_table_id); } if (x86_iommu_get_default()) { IommuType IOMMUType = x86_iommu_get_type(); if (IOMMUType == TYPE_AMD) { acpi_add_table(table_offsets, tables_blob); - build_amd_iommu(tables_blob, tables->linker, pcms->oem_id, - pcms->oem_table_id); + build_amd_iommu(tables_blob, tables->linker, x86ms->oem_id, + x86ms->oem_table_id); } else if (IOMMUType == TYPE_INTEL) { acpi_add_table(table_offsets, tables_blob); - build_dmar_q35(tables_blob, tables->linker, pcms->oem_id, - pcms->oem_table_id); + build_dmar_q35(tables_blob, tables->linker, x86ms->oem_id, + x86ms->oem_table_id); } } if (machine->nvdimms_state->is_enabled) { nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, machine->nvdimms_state, machine->ram_slots, - pcms->oem_id, pcms->oem_table_id); + x86ms->oem_id, x86ms->oem_table_id); } acpi_add_table(table_offsets, tables_blob); - build_waet(tables_blob, tables->linker, pcms->oem_id, pcms->oem_table_id); + build_waet(tables_blob, tables->linker, x86ms->oem_id, x86ms->oem_table_id); /* Add tables supplied by user (if any) */ for (u = acpi_table_first(); u; u = acpi_table_next(u)) { @@ -2468,7 +2468,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) { AcpiRsdpData rsdp_data = { .revision = 0, - .oem_id = pcms->oem_id, + .oem_id = x86ms->oem_id, .xsdt_tbl_offset = NULL, .rsdt_tbl_offset = &rsdt, }; diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c index 271710eb92..ccd3303aac 100644 --- a/hw/i386/acpi-microvm.c +++ b/hw/i386/acpi-microvm.c @@ -149,7 +149,7 @@ build_dsdt_microvm(GArray *table_data, BIOSLinker *linker, g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); build_header(linker, table_data, (void *)(table_data->data + table_data->len - dsdt->buf->len), - "DSDT", dsdt->buf->len, 2, mms->oem_id, mms->oem_table_id); + "DSDT", dsdt->buf->len, 2, x86ms->oem_id, x86ms->oem_table_id); free_aml_allocator(); } @@ -201,24 +201,24 @@ static void acpi_build_microvm(AcpiBuildTables *tables, pmfadt.dsdt_tbl_offset = &dsdt; pmfadt.xdsdt_tbl_offset = &dsdt; acpi_add_table(table_offsets, tables_blob); - build_fadt(tables_blob, tables->linker, &pmfadt, mms->oem_id, - mms->oem_table_id); + build_fadt(tables_blob, tables->linker, &pmfadt, x86ms->oem_id, + x86ms->oem_table_id); acpi_add_table(table_offsets, tables_blob); acpi_build_madt(tables_blob, tables->linker, X86_MACHINE(machine), - ACPI_DEVICE_IF(x86ms->acpi_dev), mms->oem_id, - mms->oem_table_id); + ACPI_DEVICE_IF(x86ms->acpi_dev), x86ms->oem_id, + x86ms->oem_table_id); xsdt = tables_blob->len; - build_xsdt(tables_blob, tables->linker, table_offsets, mms->oem_id, - mms->oem_table_id); + build_xsdt(tables_blob, tables->linker, table_offsets, x86ms->oem_id, + x86ms->oem_table_id); /* RSDP is in FSEG memory, so allocate it separately */ { AcpiRsdpData rsdp_data = { /* ACPI 2.0: 5.2.4.3 RSDP Structure */ .revision = 2, /* xsdt needs v2 */ - .oem_id = mms->oem_id, + .oem_id = x86ms->oem_id, .xsdt_tbl_offset = &xsdt, .rsdt_tbl_offset = NULL, }; diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 4e0cf4c522..edf2b0f061 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -648,51 +648,6 @@ static void microvm_powerdown_req(Notifier *notifier, void *data) } } -static char *microvm_machine_get_oem_id(Object *obj, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - - return g_strdup(mms->oem_id); -} - -static void microvm_machine_set_oem_id(Object *obj, const char *value, - Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - size_t len = strlen(value); - - if (len > 6) { - error_setg(errp, - "User specified "MICROVM_MACHINE_OEM_ID" value is bigger than " - "6 bytes in size"); - return; - } - - strncpy(mms->oem_id, value, 6); -} - -static char *microvm_machine_get_oem_table_id(Object *obj, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - - return g_strdup(mms->oem_table_id); -} - -static void microvm_machine_set_oem_table_id(Object *obj, const char *value, - Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - size_t len = strlen(value); - - if (len > 8) { - error_setg(errp, - "User specified "MICROVM_MACHINE_OEM_TABLE_ID" value is bigger than " - "8 bytes in size"); - return; - } - strncpy(mms->oem_table_id, value, 8); -} - static void microvm_machine_initfn(Object *obj) { MicrovmMachineState *mms = MICROVM_MACHINE(obj); @@ -714,9 +669,6 @@ static void microvm_machine_initfn(Object *obj) qemu_add_machine_init_done_notifier(&mms->machine_done); mms->powerdown_req.notify = microvm_powerdown_req; qemu_register_powerdown_notifier(&mms->powerdown_req); - - mms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); - mms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); } static void microvm_class_init(ObjectClass *oc, void *data) @@ -805,24 +757,6 @@ static void microvm_class_init(ObjectClass *oc, void *data) MICROVM_MACHINE_AUTO_KERNEL_CMDLINE, "Set off to disable adding virtio-mmio devices to the kernel cmdline"); - object_class_property_add_str(oc, MICROVM_MACHINE_OEM_ID, - microvm_machine_get_oem_id, - microvm_machine_set_oem_id); - object_class_property_set_description(oc, MICROVM_MACHINE_OEM_ID, - "Override the default value of field OEMID " - "in ACPI table header." - "The string may be up to 6 bytes in size"); - - - object_class_property_add_str(oc, MICROVM_MACHINE_OEM_TABLE_ID, - microvm_machine_get_oem_table_id, - microvm_machine_set_oem_table_id); - object_class_property_set_description(oc, MICROVM_MACHINE_OEM_TABLE_ID, - "Override the default value of field OEM Table ID " - "in ACPI table header." - "The string may be up to 8 bytes in size"); - - machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 35e1770950..8a84b25a03 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1608,49 +1608,6 @@ static void pc_machine_set_max_fw_size(Object *obj, Visitor *v, pcms->max_fw_size = value; } -static char *pc_machine_get_oem_id(Object *obj, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - return g_strdup(pcms->oem_id); -} - -static void pc_machine_set_oem_id(Object *obj, const char *value, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - size_t len = strlen(value); - - if (len > 6) { - error_setg(errp, - "User specified "PC_MACHINE_OEM_ID" value is bigger than " - "6 bytes in size"); - return; - } - - strncpy(pcms->oem_id, value, 6); -} - -static char *pc_machine_get_oem_table_id(Object *obj, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - return g_strdup(pcms->oem_table_id); -} - -static void pc_machine_set_oem_table_id(Object *obj, const char *value, - Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - size_t len = strlen(value); - - if (len > 8) { - error_setg(errp, - "User specified "PC_MACHINE_OEM_TABLE_ID" value is bigger than " - "8 bytes in size"); - return; - } - strncpy(pcms->oem_table_id, value, 8); -} static void pc_machine_initfn(Object *obj) { @@ -1664,8 +1621,6 @@ static void pc_machine_initfn(Object *obj) pcms->max_ram_below_4g = 0; /* use default */ /* acpi build is enabled by default if machine supports it */ pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; - pcms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); - pcms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); pcms->smbus_enabled = true; pcms->sata_enabled = true; pcms->pit_enabled = true; @@ -1802,24 +1757,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, PC_MACHINE_MAX_FW_SIZE, "Maximum combined firmware size"); - - object_class_property_add_str(oc, PC_MACHINE_OEM_ID, - pc_machine_get_oem_id, - pc_machine_set_oem_id); - object_class_property_set_description(oc, PC_MACHINE_OEM_ID, - "Override the default value of field OEMID " - "in ACPI table header." - "The string may be up to 6 bytes in size"); - - - object_class_property_add_str(oc, PC_MACHINE_OEM_TABLE_ID, - pc_machine_get_oem_table_id, - pc_machine_set_oem_table_id); - object_class_property_set_description(oc, PC_MACHINE_OEM_TABLE_ID, - "Override the default value of field OEM Table ID " - "in ACPI table header." - "The string may be up to 8 bytes in size"); - } static const TypeInfo pc_machine_info = { diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 7865660e2c..ed796fe6ba 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1201,6 +1201,51 @@ static void x86_machine_set_acpi(Object *obj, Visitor *v, const char *name, visit_type_OnOffAuto(v, name, &x86ms->acpi, errp); } +static char *x86_machine_get_oem_id(Object *obj, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + + return g_strdup(x86ms->oem_id); +} + +static void x86_machine_set_oem_id(Object *obj, const char *value, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + size_t len = strlen(value); + + if (len > 6) { + error_setg(errp, + "User specified "X86_MACHINE_OEM_ID" value is bigger than " + "6 bytes in size"); + return; + } + + strncpy(x86ms->oem_id, value, 6); +} + +static char *x86_machine_get_oem_table_id(Object *obj, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + + return g_strdup(x86ms->oem_table_id); +} + +static void x86_machine_set_oem_table_id(Object *obj, const char *value, + Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + size_t len = strlen(value); + + if (len > 8) { + error_setg(errp, + "User specified "X86_MACHINE_OEM_TABLE_ID + " value is bigger than " + "8 bytes in size"); + return; + } + strncpy(x86ms->oem_table_id, value, 8); +} + static void x86_machine_initfn(Object *obj) { X86MachineState *x86ms = X86_MACHINE(obj); @@ -1209,6 +1254,8 @@ static void x86_machine_initfn(Object *obj) x86ms->acpi = ON_OFF_AUTO_AUTO; x86ms->smp_dies = 1; x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS; + x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); } static void x86_machine_class_init(ObjectClass *oc, void *data) @@ -1235,6 +1282,23 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, X86_MACHINE_ACPI, "Enable ACPI"); + + object_class_property_add_str(oc, X86_MACHINE_OEM_ID, + x86_machine_get_oem_id, + x86_machine_set_oem_id); + object_class_property_set_description(oc, X86_MACHINE_OEM_ID, + "Override the default value of field OEMID " + "in ACPI table header." + "The string may be up to 6 bytes in size"); + + + object_class_property_add_str(oc, X86_MACHINE_OEM_TABLE_ID, + x86_machine_get_oem_table_id, + x86_machine_set_oem_table_id); + object_class_property_set_description(oc, X86_MACHINE_OEM_TABLE_ID, + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); } static const TypeInfo x86_machine_info = { diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h index 372b05774e..f25f837441 100644 --- a/include/hw/i386/microvm.h +++ b/include/hw/i386/microvm.h @@ -76,8 +76,6 @@ #define MICROVM_MACHINE_ISA_SERIAL "isa-serial" #define MICROVM_MACHINE_OPTION_ROMS "x-option-roms" #define MICROVM_MACHINE_AUTO_KERNEL_CMDLINE "auto-kernel-cmdline" -#define MICROVM_MACHINE_OEM_ID "oem-id" -#define MICROVM_MACHINE_OEM_TABLE_ID "oem-table-id" struct MicrovmMachineClass { X86MachineClass parent; @@ -106,8 +104,6 @@ struct MicrovmMachineState { Notifier machine_done; Notifier powerdown_req; struct GPEXConfig gpex; - char *oem_id; - char *oem_table_id; }; #define TYPE_MICROVM_MACHINE MACHINE_TYPE_NAME("microvm") diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index d4c3d73c11..dcf060b791 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -46,8 +46,6 @@ typedef struct PCMachineState { bool pit_enabled; bool hpet_enabled; uint64_t max_fw_size; - char *oem_id; - char *oem_table_id; /* NUMA information: */ uint64_t numa_nodes; @@ -65,8 +63,6 @@ typedef struct PCMachineState { #define PC_MACHINE_SATA "sata" #define PC_MACHINE_PIT "pit" #define PC_MACHINE_MAX_FW_SIZE "max-fw-size" -#define PC_MACHINE_OEM_ID "oem-id" -#define PC_MACHINE_OEM_TABLE_ID "oem-table-id" /** * PCMachineClass: * diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 56080bd1fb..26c9cc45a4 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -67,6 +67,8 @@ struct X86MachineState { OnOffAuto smm; OnOffAuto acpi; + char *oem_id; + char *oem_table_id; /* * Address space used by IOAPIC device. All IOAPIC interrupts * will be translated to MSI messages in the address space. @@ -76,6 +78,8 @@ struct X86MachineState { #define X86_MACHINE_SMM "smm" #define X86_MACHINE_ACPI "acpi" +#define X86_MACHINE_OEM_ID "oem-id" +#define X86_MACHINE_OEM_TABLE_ID "oem-table-id" #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE)