From 9aa47edd4ee69fa0628c5f9adb52d5050a5bce6a Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Thu, 1 Oct 2020 16:09:45 -0400 Subject: [PATCH 01/15] vhost-vdpa: negotiate VIRTIO_NET_F_STATUS with driver Vendor driver may not support or implement config interrupt delivery for link status notifications. In this event, vendor driver is expected to NACK the feature, but guest will keep link always up. Signed-off-by: Si-Wei Liu Message-Id: <1601582985-14944-1-git-send-email-si-wei.liu@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index e2b3ba85bf..99c476db8c 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -55,6 +55,7 @@ const int vdpa_feature_bits[] = { VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_PACKED, VIRTIO_NET_F_GUEST_ANNOUNCE, + VIRTIO_NET_F_STATUS, VHOST_INVALID_FEATURE_BIT }; From 384c2561bddfa00cd3eaf9edbc1af6c7c120511f Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 29 Oct 2020 15:48:49 +0100 Subject: [PATCH 02/15] vhost-vsock: set vhostfd to non-blocking mode vhost IOTLB API uses read()/write() to exchange iotlb messages with the kernel module. The QEMU implementation expects a non-blocking fd, indeed commit c471ad0e9b ("vhost_net: device IOTLB support") set it for vhost-net. Without this patch, if we enable iommu for the vhost-vsock device, QEMU can hang when exchanging IOTLB messages. As commit 894022e616 ("net: check if the file descriptor is valid before using it") did for tap, let's use qemu_try_set_nonblock() when fd is provided by the user. Signed-off-by: Stefano Garzarella Message-Id: <20201029144849.70958-1-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vsock.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index f9db4beb47..8ddfb9abfe 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -16,6 +16,7 @@ #include "qapi/error.h" #include "hw/virtio/virtio-access.h" #include "qemu/error-report.h" +#include "qemu/sockets.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-vsock.h" #include "monitor/monitor.h" @@ -148,6 +149,13 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) error_prepend(errp, "vhost-vsock: unable to parse vhostfd: "); return; } + + ret = qemu_try_set_nonblock(vhostfd); + if (ret < 0) { + error_setg_errno(errp, -ret, + "vhost-vsock: unable to set non-blocking mode"); + return; + } } else { vhostfd = open("/dev/vhost-vsock", O_RDWR); if (vhostfd < 0) { @@ -155,6 +163,8 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) "vhost-vsock: failed to open vhost device"); return; } + + qemu_set_nonblock(vhostfd); } vhost_vsock_common_realize(vdev, "vhost-vsock"); From acab9d8a9e31cc85ec95e5432500575680e7f07b Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 26 Oct 2020 12:39:23 -0700 Subject: [PATCH 03/15] acpi/crs: Prevent bad ranges for host bridges Prevent _CRS resources being quietly chopped off and instead throw an assertion. _CRS is used by host bridges to declare regions of io and/or memory that they consume. On some (all?) platforms the host bridge doesn't have PCI header space and so they need some way to convey the information. Signed-off-by: Ben Widawsky Message-Id: <20201026193924.985014-1-ben.widawsky@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Igor Mammedov --- hw/i386/acpi-build.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index e3a4bc206c..98ff9f5cef 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -866,6 +866,8 @@ static Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set) crs_range_merge(temp_range_set.mem_ranges); for (i = 0; i < temp_range_set.mem_ranges->len; i++) { entry = g_ptr_array_index(temp_range_set.mem_ranges, i); + assert(entry->limit <= UINT32_MAX && + (entry->limit - entry->base + 1) <= UINT32_MAX); aml_append(crs, aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, AML_NON_CACHEABLE, From 9390255468e33811e6791d5afef3113a40770aba Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 26 Oct 2020 12:39:24 -0700 Subject: [PATCH 04/15] acpi/crs: Support ranges > 32b for hosts According to PCIe spec 5.0 Type 1 header space Base Address Registers are defined by 7.5.1.2.1 Base Address Registers (same as Type 0). The _CRS region should allow for the same range (up to 64b). Prior to this change, any host bridge utilizing more than 32b for the BAR would have the address truncated and likely lead to conflicts when the operating systems reads the _CRS object. Signed-off-by: Ben Widawsky Message-Id: <20201026193924.985014-2-ben.widawsky@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Igor Mammedov --- hw/i386/acpi-build.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 98ff9f5cef..4f66642d88 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -786,8 +786,14 @@ static Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set) crs_range_insert(temp_range_set.io_ranges, range_base, range_limit); } else { /* "memory" */ - crs_range_insert(temp_range_set.mem_ranges, - range_base, range_limit); + uint64_t length = range_limit - range_base + 1; + if (range_limit <= UINT32_MAX && length <= UINT32_MAX) { + crs_range_insert(temp_range_set.mem_ranges, range_base, + range_limit); + } else { + crs_range_insert(temp_range_set.mem_64bit_ranges, + range_base, range_limit); + } } } From 8acb3218b98c5f1bc02597ce5985fd02da7af0b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 28 Oct 2020 16:40:04 +0100 Subject: [PATCH 05/15] hw/virtio/vhost-vdpa: Fix Coverity CID 1432864 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix uninitialized value issues reported by Coverity: Field 'msg.reserved' is uninitialized when calling write(). Fixes: a5bd05800f8 ("vhost-vdpa: batch updating IOTLB mappings") Reported-by: Coverity (CID 1432864: UNINIT) Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20201028154004.776760-1-philmd@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 4f1039910a..01d2101d09 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -90,7 +90,7 @@ static void vhost_vdpa_listener_begin(MemoryListener *listener) { struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); struct vhost_dev *dev = v->dev; - struct vhost_msg_v2 msg; + struct vhost_msg_v2 msg = {}; int fd = v->device_fd; if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { @@ -110,7 +110,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) { struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); struct vhost_dev *dev = v->dev; - struct vhost_msg_v2 msg; + struct vhost_msg_v2 msg = {}; int fd = v->device_fd; if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { From b06fe3e703f833866914c03c3fb0acc02385c824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 24 Oct 2020 22:38:59 +0200 Subject: [PATCH 06/15] hw/pci: Extract pci_bus_change_irq_level() from pci_change_irq_level() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract pci_bus_change_irq_level() from pci_change_irq_level() to make it clearer it operates on the bus. Reported-by: Mark Cave-Ayland Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20201024203900.3619498-2-f4bug@amsat.org> Reviewed-by: Mark Cave-Ayland Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 100c9381c2..081ddcadd1 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -248,6 +248,12 @@ static inline void pci_set_irq_state(PCIDevice *d, int irq_num, int level) d->irq_state |= level << irq_num; } +static void pci_bus_change_irq_level(PCIBus *bus, int irq_num, int change) +{ + bus->irq_count[irq_num] += change; + bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); +} + static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) { PCIBus *bus; @@ -258,8 +264,7 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) break; pci_dev = bus->parent_dev; } - bus->irq_count[irq_num] += change; - bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); + pci_bus_change_irq_level(bus, irq_num, change); } int pci_bus_get_irq_level(PCIBus *bus, int irq_num) From 459ca8bfa41b42b9d80739929f09f792207f15f3 Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Sat, 24 Oct 2020 22:39:00 +0200 Subject: [PATCH 07/15] pci: Assert irqnum is between 0 and bus->nirqs in pci_bus_change_irq_level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These assertions similar to those in the adjacent pci_bus_get_irq_level() function ensure that irqnum lies within the valid PCI bus IRQ range. Signed-off-by: Mark Cave-Ayland Message-Id: <20201011082022.3016-1-mark.cave-ayland@ilande.co.uk> Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20201024203900.3619498-3-f4bug@amsat.org> Reviewed-by: Mark Cave-Ayland Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 081ddcadd1..dc4019865b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -250,6 +250,8 @@ static inline void pci_set_irq_state(PCIDevice *d, int irq_num, int level) static void pci_bus_change_irq_level(PCIBus *bus, int irq_num, int change) { + assert(irq_num >= 0); + assert(irq_num < bus->nirq); bus->irq_count[irq_num] += change; bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); } From 170a6794efde98fb1ad70f59d4cd9af7decf279d Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 7 Oct 2020 18:30:34 +0200 Subject: [PATCH 08/15] vhost: Don't special case vq->used_phys in vhost_get_log_size() The first loop in vhost_get_log_size() computes the size of the dirty log bitmap so that it allows to track changes in the entire guest memory, in terms of GPA. When not using a vIOMMU, the address of the vring's used structure, vq->used_phys, is a GPA. It is thus already covered by the first loop. When using a vIOMMU, vq->used_phys is a GIOVA that will be translated to an HVA when the vhost backend needs to update the used structure. It will log the corresponding GPAs into the bitmap but it certainly won't log the GIOVA. So in any case, vq->used_phys shouldn't be explicitly used to size the bitmap. Drop the second loop. This fixes a crash of the source when migrating a guest using in-kernel vhost-net and iommu_platform=on on POWER, because DMA regions are put over 0x800000000000000ULL. The resulting insanely huge log size causes g_malloc0() to abort. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1879349 Signed-off-by: Greg Kurz Message-Id: <160208823418.29027.15172801181796272300.stgit@bahia.lan> Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 3077fa6ef5..79b2be20df 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -172,16 +172,6 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) reg->memory_size); log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); } - for (i = 0; i < dev->nvqs; ++i) { - struct vhost_virtqueue *vq = dev->vqs + i; - - if (!vq->used_phys && !vq->used_size) { - continue; - } - - uint64_t last = vq->used_phys + vq->used_size - 1; - log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); - } return log_size; } From 0259c78ca79190df6e307a6ae43886dcb69eb92a Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 21 Oct 2020 10:47:16 -0400 Subject: [PATCH 09/15] pc: Implement -no-hpet as sugar for -machine hpet=on Get rid of yet another global variable. The default will be hpet=on only if CONFIG_HPET=y. Signed-off-by: Eduardo Habkost Message-Id: <20201021144716.1536388-1-ehabkost@redhat.com> Acked-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 63 +++++++++++++++++++++++++++++-------------- hw/i386/pc_piix.c | 2 +- include/hw/i386/pc.h | 1 + include/hw/i386/x86.h | 3 --- softmmu/vl.c | 4 +-- 5 files changed, 47 insertions(+), 26 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 4e323755d0..416fb0e0f6 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1142,28 +1142,31 @@ void pc_basic_device_init(struct PCMachineState *pcms, * Without KVM_CAP_PIT_STATE2, we cannot switch off the in-kernel PIT * when the HPET wants to take over. Thus we have to disable the latter. */ - if (!no_hpet && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) { + if (pcms->hpet_enabled && (!kvm_irqchip_in_kernel() || + kvm_has_pit_state2())) { hpet = qdev_try_new(TYPE_HPET); - if (hpet) { - /* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7 - * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23, - * IRQ8 and IRQ2. - */ - uint8_t compat = object_property_get_uint(OBJECT(hpet), - HPET_INTCAP, NULL); - if (!compat) { - qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs); - } - sysbus_realize_and_unref(SYS_BUS_DEVICE(hpet), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE); - - for (i = 0; i < GSI_NUM_PINS; i++) { - sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]); - } - pit_isa_irq = -1; - pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT); - rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT); + if (!hpet) { + error_report("couldn't create HPET device"); + exit(1); } + /* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7 + * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23, + * IRQ8 and IRQ2. + */ + uint8_t compat = object_property_get_uint(OBJECT(hpet), + HPET_INTCAP, NULL); + if (!compat) { + qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs); + } + sysbus_realize_and_unref(SYS_BUS_DEVICE(hpet), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE); + + for (i = 0; i < GSI_NUM_PINS; i++) { + sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]); + } + pit_isa_irq = -1; + pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT); + rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT); } *rtc_state = mc146818_rtc_init(isa_bus, 2000, rtc_irq); @@ -1535,6 +1538,20 @@ static void pc_machine_set_pit(Object *obj, bool value, Error **errp) pcms->pit_enabled = value; } +static bool pc_machine_get_hpet(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->hpet_enabled; +} + +static void pc_machine_set_hpet(Object *obj, bool value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->hpet_enabled = value; +} + static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -1585,6 +1602,9 @@ static void pc_machine_initfn(Object *obj) pcms->smbus_enabled = true; pcms->sata_enabled = true; pcms->pit_enabled = true; +#ifdef CONFIG_HPET + pcms->hpet_enabled = true; +#endif pc_system_flash_create(pcms); pcms->pcspk = isa_new(TYPE_PC_SPEAKER); @@ -1705,6 +1725,9 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, PC_MACHINE_PIT, pc_machine_get_pit, pc_machine_set_pit); + + object_class_property_add_bool(oc, "hpet", + pc_machine_get_hpet, pc_machine_set_hpet); } static const TypeInfo pc_machine_info = { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 0cf22a57ad..13d1628f13 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -216,7 +216,7 @@ static void pc_init1(MachineState *machine, i440fx_state = NULL; isa_bus = isa_bus_new(NULL, get_system_memory(), system_io, &error_abort); - no_hpet = 1; + pcms->hpet_enabled = false; } isa_bus_irqs(isa_bus, x86ms->gsi); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 84639d0ebc..911e460097 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -43,6 +43,7 @@ typedef struct PCMachineState { bool smbus_enabled; bool sata_enabled; bool pit_enabled; + bool hpet_enabled; /* NUMA information: */ uint64_t numa_nodes; diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index bfa9cb2a25..739fac5087 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -126,7 +126,4 @@ qemu_irq x86_allocate_cpu_irq(void); void gsi_handler(void *opaque, int n, int level); void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); -/* hpet.c */ -extern int no_hpet; - #endif diff --git a/softmmu/vl.c b/softmmu/vl.c index 7c1c6d37ef..a537a0377f 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -146,7 +146,6 @@ static Chardev **serial_hds; Chardev *parallel_hds[MAX_PARALLEL_PORTS]; int win2k_install_hack = 0; int singlestep = 0; -int no_hpet = 0; int fd_bootchk = 1; static int no_reboot; int no_shutdown = 0; @@ -3562,7 +3561,8 @@ void qemu_init(int argc, char **argv, char **envp) qemu_opts_parse_noisily(olist, "acpi=off", false); break; case QEMU_OPTION_no_hpet: - no_hpet = 1; + olist = qemu_find_opts("machine"); + qemu_opts_parse_noisily(olist, "hpet=off", false); break; case QEMU_OPTION_no_reboot: no_reboot = 1; From 4c70875372b821b045e84f462466a5c04b091ef5 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 9 Sep 2020 16:17:31 +0800 Subject: [PATCH 10/15] pci: advertise a page aligned ATS After Linux kernel commit 61363c1474b1 ("iommu/vt-d: Enable ATS only if the device uses page aligned address."), ATS will be only enabled if device advertises a page aligned request. Unfortunately, vhost-net is the only user and we don't advertise the aligned request capability in the past since both vhost IOTLB and address_space_get_iotlb_entry() can support non page aligned request. Though it's not clear that if the above kernel commit makes sense. Let's advertise a page aligned ATS here to make vhost device IOTLB work with Intel IOMMU again. Note that in the future we may extend pcie_ats_init() to accept parameters like queue depth and page alignment. Cc: qemu-stable@nongnu.org Signed-off-by: Jason Wang Message-Id: <20200909081731.24688-1-jasowang@redhat.com> Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 5b48bae0f6..d4010cf8f3 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -971,8 +971,9 @@ void pcie_ats_init(PCIDevice *dev, uint16_t offset) dev->exp.ats_cap = offset; - /* Invalidate Queue Depth 0, Page Aligned Request 0 */ - pci_set_word(dev->config + offset + PCI_ATS_CAP, 0); + /* Invalidate Queue Depth 0, Page Aligned Request 1 */ + pci_set_word(dev->config + offset + PCI_ATS_CAP, + PCI_ATS_CAP_PAGE_ALIGNED); /* STU 0, Disabled by default */ pci_set_word(dev->config + offset + PCI_ATS_CTRL, 0); From 2c729dc8ceaab88f213c7724de0fa181ffc7f078 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 15 Oct 2020 11:14:10 -0700 Subject: [PATCH 11/15] pci: Change error_report to assert(3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Asserts are used for developer bugs. As registering a bar of the wrong size is not something that should be possible for a user to achieve, this is a developer bug. While here, use the more obvious helper function. Signed-off-by: Ben Widawsky Message-Id: <20201015181411.89104-1-ben.widawsky@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé --- hw/pci/pci.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index dc4019865b..e5b7c9a42b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1151,11 +1151,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); - if (size & (size-1)) { - error_report("ERROR: PCI region size must be pow2 " - "type=0x%x, size=0x%"FMT_PCIBUS"", type, size); - exit(1); - } + assert(is_power_of_2(size)); r = &pci_dev->io_regions[region_num]; r->addr = PCI_BAR_UNMAPPED; From 6a5b19ca63b1795011f53244f2fd9a2cf8189b72 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 15 Oct 2020 11:14:11 -0700 Subject: [PATCH 12/15] pci: Disallow improper BAR registration for type 1 Prevent future developers working on root complexes, root ports, or bridges that also wish to implement a BAR for those, from shooting themselves in the foot. PCI type 1 headers only support 2 base address registers. It is incorrect and difficult to figure out what is wrong with the device when this mistake is made. With this, it is immediate and obvious what has gone wrong. Signed-off-by: Ben Widawsky Message-Id: <20201015181411.89104-2-ben.widawsky@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e5b7c9a42b..0131d9d02c 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1148,11 +1148,17 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, uint32_t addr; /* offset in pci config space */ uint64_t wmask; pcibus_t size = memory_region_size(memory); + uint8_t hdr_type; assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); + /* A PCI bridge device (with Type 1 header) may only have at most 2 BARs */ + hdr_type = + pci_dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION; + assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2); + r = &pci_dev->io_regions[region_num]; r->addr = PCI_BAR_UNMAPPED; r->size = size; From adb29c027341ba095a3ef4beef6aaef86d3a520e Mon Sep 17 00:00:00 2001 From: Jin Yu Date: Thu, 10 Sep 2020 21:48:51 +0800 Subject: [PATCH 13/15] vhost-blk: set features before setting inflight feature Virtqueue has split and packed, so before setting inflight, you need to inform the back-end virtqueue format. Signed-off-by: Jin Yu Message-Id: <20200910134851.7817-1-jin.yu@intel.com> Acked-by: Raphael Norwitz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 6 ++++++ hw/virtio/vhost.c | 18 ++++++++++++++++++ include/hw/virtio/vhost.h | 1 + 3 files changed, 25 insertions(+) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index a076b1e54d..f67b29bbf3 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -131,6 +131,12 @@ static int vhost_user_blk_start(VirtIODevice *vdev) s->dev.acked_features = vdev->guest_features; + ret = vhost_dev_prepare_inflight(&s->dev); + if (ret < 0) { + error_report("Error set inflight format: %d", -ret); + goto err_guest_notifiers; + } + if (!s->inflight->addr) { ret = vhost_dev_get_inflight(&s->dev, s->queue_size, s->inflight); if (ret < 0) { diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 79b2be20df..f2482378c6 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1645,6 +1645,24 @@ int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f) return 0; } +int vhost_dev_prepare_inflight(struct vhost_dev *hdev) +{ + int r; + + if (hdev->vhost_ops->vhost_get_inflight_fd == NULL || + hdev->vhost_ops->vhost_set_inflight_fd == NULL) { + return 0; + } + + r = vhost_dev_set_features(hdev, hdev->log_enabled); + if (r < 0) { + VHOST_OPS_DEBUG("vhost_dev_prepare_inflight failed"); + return r; + } + + return 0; +} + int vhost_dev_set_inflight(struct vhost_dev *dev, struct vhost_inflight *inflight) { diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 94585067f7..839bfb153c 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -141,6 +141,7 @@ void vhost_dev_reset_inflight(struct vhost_inflight *inflight); void vhost_dev_free_inflight(struct vhost_inflight *inflight); void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f); int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f); +int vhost_dev_prepare_inflight(struct vhost_dev *hdev); int vhost_dev_set_inflight(struct vhost_dev *dev, struct vhost_inflight *inflight); int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, From d68cdae30eef62dde61c8b8467a96c01c8f80270 Mon Sep 17 00:00:00 2001 From: Felipe Franciosi Date: Wed, 28 Oct 2020 13:47:03 +0000 Subject: [PATCH 14/15] virtio: skip guest index check on device load QEMU must be careful when loading device state off migration streams to prevent a malicious source from exploiting the emulator. Overdoing these checks has the side effect of allowing a guest to "pin itself" in cloud environments by messing with state which is entirely in its control. Similarly to what f3081539 achieved in usb_device_post_load(), this commit removes such a check from virtio_load(). Worth noting, the result of a load without this check is the same as if a guest enables a VQ with invalid indexes to begin with. That is, the virtual device is set in a broken state (by the datapath handler) and must be reset. Signed-off-by: Felipe Franciosi Message-Id: <20201028134643.110698-1-felipe@nutanix.com> Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 6f8f865aff..ceb58fda6c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -17,6 +17,7 @@ #include "trace.h" #include "exec/address-spaces.h" #include "qemu/error-report.h" +#include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" #include "hw/virtio/virtio.h" @@ -3160,12 +3161,12 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; /* Check it isn't doing strange things with descriptor numbers. */ if (nheads > vdev->vq[i].vring.num) { - error_report("VQ %d size 0x%x Guest index 0x%x " - "inconsistent with Host index 0x%x: delta 0x%x", - i, vdev->vq[i].vring.num, - vring_avail_idx(&vdev->vq[i]), - vdev->vq[i].last_avail_idx, nheads); - return -1; + qemu_log_mask(LOG_GUEST_ERROR, + "VQ %d size 0x%x Guest index 0x%x " + "inconsistent with Host index 0x%x: delta 0x%x", + i, vdev->vq[i].vring.num, + vring_avail_idx(&vdev->vq[i]), + vdev->vq[i].last_avail_idx, nheads); } vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]); vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]); From 73beb01ec54969f76ab32d1e0605a759b6c95ab0 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 19 Oct 2020 13:39:22 -0400 Subject: [PATCH 15/15] intel_iommu: Fix two misuse of "0x%u" prints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dave magically found this. Fix them with "0x%x". Reported-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20201019173922.100270-1-peterx@redhat.com> Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Philippe Mathieu-Daudé Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 749eb6ad63..70ac837733 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2665,7 +2665,7 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) if (addr + size > DMAR_REG_SIZE) { error_report_once("%s: MMIO over range: addr=0x%" PRIx64 - " size=0x%u", __func__, addr, size); + " size=0x%x", __func__, addr, size); return (uint64_t)-1; } @@ -2716,7 +2716,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr, if (addr + size > DMAR_REG_SIZE) { error_report_once("%s: MMIO over range: addr=0x%" PRIx64 - " size=0x%u", __func__, addr, size); + " size=0x%x", __func__, addr, size); return; }