From 3a75e74c76cef8daa7944159b016c4ee7c56e568 Mon Sep 17 00:00:00 2001 From: Mike Ryan Date: Wed, 1 Dec 2010 11:16:47 -0800 Subject: [PATCH 01/12] net/sock: option to specify local address Add an option to specify the host IP to send multicast packets from, when using a multicast socket for networking. The option takes an IP address and sets the IP_MULTICAST_IF socket option, which causes the packets to use that IP's interface as an egress. This is useful if the host machine has several interfaces with several virtual networks across disparate interfaces. Signed-off-by: Mike Ryan Signed-off-by: Michael S. Tsirkin --- net.c | 4 ++++ net/socket.c | 52 ++++++++++++++++++++++++++++++++++++------------- qemu-options.hx | 11 +++++++++-- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/net.c b/net.c index c5e6063fcf..9ba5be22dd 100644 --- a/net.c +++ b/net.c @@ -1050,6 +1050,10 @@ static const struct { .name = "mcast", .type = QEMU_OPT_STRING, .help = "UDP multicast address and port number", + }, { + .name = "localaddr", + .type = QEMU_OPT_STRING, + .help = "source address for multicast packets", }, { /* end of list */ } }, diff --git a/net/socket.c b/net/socket.c index 1c4e153e3f..916c2a8e10 100644 --- a/net/socket.c +++ b/net/socket.c @@ -149,7 +149,7 @@ static void net_socket_send_dgram(void *opaque) qemu_send_packet(&s->nc, s->buf, size); } -static int net_socket_mcast_create(struct sockaddr_in *mcastaddr) +static int net_socket_mcast_create(struct sockaddr_in *mcastaddr, struct in_addr *localaddr) { struct ip_mreq imr; int fd; @@ -183,7 +183,11 @@ static int net_socket_mcast_create(struct sockaddr_in *mcastaddr) /* Add host to multicast group */ imr.imr_multiaddr = mcastaddr->sin_addr; - imr.imr_interface.s_addr = htonl(INADDR_ANY); + if (localaddr) { + imr.imr_interface = *localaddr; + } else { + imr.imr_interface.s_addr = htonl(INADDR_ANY); + } ret = setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, (const char *)&imr, sizeof(struct ip_mreq)); @@ -201,6 +205,15 @@ static int net_socket_mcast_create(struct sockaddr_in *mcastaddr) goto fail; } + /* If a bind address is given, only send packets from that address */ + if (localaddr != NULL) { + ret = setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF, localaddr, sizeof(*localaddr)); + if (ret < 0) { + perror("setsockopt(IP_MULTICAST_IF)"); + goto fail; + } + } + socket_set_nonblock(fd); return fd; fail: @@ -248,7 +261,7 @@ static NetSocketState *net_socket_fd_init_dgram(VLANState *vlan, return NULL; } /* clone dgram socket */ - newfd = net_socket_mcast_create(&saddr); + newfd = net_socket_mcast_create(&saddr, NULL); if (newfd < 0) { /* error already reported by net_socket_mcast_create() */ close(fd); @@ -468,17 +481,26 @@ static int net_socket_connect_init(VLANState *vlan, static int net_socket_mcast_init(VLANState *vlan, const char *model, const char *name, - const char *host_str) + const char *host_str, + const char *localaddr_str) { NetSocketState *s; int fd; struct sockaddr_in saddr; + struct in_addr localaddr, *param_localaddr; if (parse_host_port(&saddr, host_str) < 0) return -1; + if (localaddr_str != NULL) { + if (inet_aton(localaddr_str, &localaddr) == 0) + return -1; + param_localaddr = &localaddr; + } else { + param_localaddr = NULL; + } - fd = net_socket_mcast_create(&saddr); + fd = net_socket_mcast_create(&saddr, param_localaddr); if (fd < 0) return -1; @@ -505,8 +527,9 @@ int net_init_socket(QemuOpts *opts, if (qemu_opt_get(opts, "listen") || qemu_opt_get(opts, "connect") || - qemu_opt_get(opts, "mcast")) { - error_report("listen=, connect= and mcast= is invalid with fd="); + qemu_opt_get(opts, "mcast") || + qemu_opt_get(opts, "localaddr")) { + error_report("listen=, connect=, mcast= and localaddr= is invalid with fd=\n"); return -1; } @@ -524,8 +547,9 @@ int net_init_socket(QemuOpts *opts, if (qemu_opt_get(opts, "fd") || qemu_opt_get(opts, "connect") || - qemu_opt_get(opts, "mcast")) { - error_report("fd=, connect= and mcast= is invalid with listen="); + qemu_opt_get(opts, "mcast") || + qemu_opt_get(opts, "localaddr")) { + error_report("fd=, connect=, mcast= and localaddr= is invalid with listen=\n"); return -1; } @@ -539,8 +563,9 @@ int net_init_socket(QemuOpts *opts, if (qemu_opt_get(opts, "fd") || qemu_opt_get(opts, "listen") || - qemu_opt_get(opts, "mcast")) { - error_report("fd=, listen= and mcast= is invalid with connect="); + qemu_opt_get(opts, "mcast") || + qemu_opt_get(opts, "localaddr")) { + error_report("fd=, listen=, mcast= and localaddr= is invalid with connect=\n"); return -1; } @@ -550,7 +575,7 @@ int net_init_socket(QemuOpts *opts, return -1; } } else if (qemu_opt_get(opts, "mcast")) { - const char *mcast; + const char *mcast, *localaddr; if (qemu_opt_get(opts, "fd") || qemu_opt_get(opts, "connect") || @@ -560,8 +585,9 @@ int net_init_socket(QemuOpts *opts, } mcast = qemu_opt_get(opts, "mcast"); + localaddr = qemu_opt_get(opts, "localaddr"); - if (net_socket_mcast_init(vlan, "socket", name, mcast) == -1) { + if (net_socket_mcast_init(vlan, "socket", name, mcast, localaddr) == -1) { return -1; } } else { diff --git a/qemu-options.hx b/qemu-options.hx index 4d99a58fc5..accd16a55c 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1061,8 +1061,9 @@ DEF("net", HAS_ARG, QEMU_OPTION_net, #endif "-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n" " connect the vlan 'n' to another VLAN using a socket connection\n" - "-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port]\n" + "-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]]\n" " connect the vlan 'n' to multicast maddr and port\n" + " use 'localaddr=addr' to specify the host address to send packets from\n" #ifdef CONFIG_VDE "-net vde[,vlan=n][,name=str][,sock=socketpath][,port=n][,group=groupname][,mode=octalmode]\n" " connect the vlan 'n' to port 'n' of a vde switch running\n" @@ -1256,7 +1257,7 @@ qemu linux.img -net nic,macaddr=52:54:00:12:34:57 \ -net socket,connect=127.0.0.1:1234 @end example -@item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}] [,mcast=@var{maddr}:@var{port}] +@item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] Create a VLAN @var{n} shared with another QEMU virtual machines using a UDP multicast socket, effectively making a bus for @@ -1296,6 +1297,12 @@ qemu linux.img -net nic,macaddr=52:54:00:12:34:56 \ /path/to/linux ubd0=/path/to/root_fs eth0=mcast @end example +Example (send packets from host's 1.2.3.4): +@example +qemu linux.img -net nic,macaddr=52:54:00:12:34:56 \ + -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4 +@end example + @item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}] Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and listening for incoming connections on @var{socketpath}. Use GROUP @var{groupname} From 55df6f33655ed4139c824aa3ab9a4ab0c19dca83 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 22 Nov 2010 19:52:22 +0200 Subject: [PATCH 02/12] cpus: flush all requests on each vm stop Flush all requests once we have stopped all cpus and devices. Make sure disk is in consistent state. Signed-off-by: Michael S. Tsirkin Tested-by: Jason Wang Acked-by: Marcelo Tosatti --- cpus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpus.c b/cpus.c index 91a0fb1146..0309189bfd 100644 --- a/cpus.c +++ b/cpus.c @@ -111,6 +111,8 @@ static void do_vm_stop(int reason) vm_running = 0; pause_all_vcpus(); vm_state_notify(0, reason); + qemu_aio_flush(); + bdrv_flush_all(); monitor_protocol_event(QEVENT_STOP, NULL); } } From eff06c40d330cdbea414331337ca6f5032dbf6d7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 22 Nov 2010 19:52:26 +0200 Subject: [PATCH 03/12] migration/savevm: no need to flush requests There's no need to flush requests after vmstop as vmstop does it for us automatically now. Signed-off-by: Michael S. Tsirkin Tested-by: Jason Wang --- migration.c | 2 -- savevm.c | 4 ---- 2 files changed, 6 deletions(-) diff --git a/migration.c b/migration.c index 622a9d2d95..e5ba51c314 100644 --- a/migration.c +++ b/migration.c @@ -370,8 +370,6 @@ void migrate_fd_put_ready(void *opaque) DPRINTF("done iterating\n"); vm_stop(0); - qemu_aio_flush(); - bdrv_flush_all(); if ((qemu_savevm_state_complete(s->mon, s->file)) < 0) { if (old_vm_running) { vm_start(); diff --git a/savevm.c b/savevm.c index d38f79e6bd..90aa237c9c 100644 --- a/savevm.c +++ b/savevm.c @@ -1575,8 +1575,6 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f) saved_vm_running = vm_running; vm_stop(0); - bdrv_flush_all(); - ret = qemu_savevm_state_begin(mon, f, 0, 0); if (ret < 0) goto out; @@ -1885,8 +1883,6 @@ void do_savevm(Monitor *mon, const QDict *qdict) monitor_printf(mon, "No block device can accept snapshots\n"); return; } - /* ??? Should this occur after vm_stop? */ - qemu_aio_flush(); saved_vm_running = vm_running; vm_stop(0); From 954773230484f5afeb675e9ff814c97e54e69e17 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 22 Nov 2010 19:52:19 +0200 Subject: [PATCH 04/12] virtio-net: don't dma while vm is stopped DMA into memory while VM is stopped makes it hard to debug migration (consequitive saves result in different files). Fixing this completely is a large effort, this patch does this for virtio-net. Signed-off-by: Michael S. Tsirkin Tested-by: Jason Wang --- hw/virtio-net.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 1d61f191b6..43a2b3daf0 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -424,6 +424,9 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) static int virtio_net_can_receive(VLANClientState *nc) { VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque; + if (!n->vm_running) { + return 0; + } if (!virtio_queue_ready(n->rx_vq) || !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) From 783e7706937fe15523b609b545587a028a2bdd03 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 22 Nov 2010 19:52:30 +0200 Subject: [PATCH 05/12] virtio-net: stop/start bh when appropriate Avoid sending out packets, and modifying memory, when VM is stopped. Add assert statements to verify this does not happen. Avoid scheduling bh when vhost-net is started. Stop bh when driver disabled bus mastering (we must not access memory after this). Signed-off-by: Michael S. Tsirkin Tested-by: Jason Wang --- hw/virtio-net.c | 66 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 43a2b3daf0..58819612be 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -99,9 +99,14 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) } } -static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) +static bool virtio_net_started(VirtIONet *n, uint8_t status) +{ + return (status & VIRTIO_CONFIG_S_DRIVER_OK) && + (n->status & VIRTIO_NET_S_LINK_UP) && n->vm_running; +} + +static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) { - VirtIONet *n = to_virtio_net(vdev); if (!n->nic->nc.peer) { return; } @@ -112,9 +117,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) if (!tap_get_vhost_net(n->nic->nc.peer)) { return; } - if (!!n->vhost_started == ((status & VIRTIO_CONFIG_S_DRIVER_OK) && - (n->status & VIRTIO_NET_S_LINK_UP) && - n->vm_running)) { + if (!!n->vhost_started == virtio_net_started(n, status)) { return; } if (!n->vhost_started) { @@ -131,6 +134,32 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) } } +static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) +{ + VirtIONet *n = to_virtio_net(vdev); + + virtio_net_vhost_status(n, status); + + if (!n->tx_waiting) { + return; + } + + if (virtio_net_started(n, status) && !n->vhost_started) { + if (n->tx_timer) { + qemu_mod_timer(n->tx_timer, + qemu_get_clock(vm_clock) + n->tx_timeout); + } else { + qemu_bh_schedule(n->tx_bh); + } + } else { + if (n->tx_timer) { + qemu_del_timer(n->tx_timer); + } else { + qemu_bh_cancel(n->tx_bh); + } + } +} + static void virtio_net_set_link_status(VLANClientState *nc) { VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque; @@ -675,11 +704,12 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) { VirtQueueElement elem; int32_t num_packets = 0; - if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) { return num_packets; } + assert(n->vm_running); + if (n->async_tx.elem.out_num) { virtio_queue_set_notification(n->tx_vq, 0); return num_packets; @@ -738,6 +768,12 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) { VirtIONet *n = to_virtio_net(vdev); + /* This happens when device was stopped but VCPU wasn't. */ + if (!n->vm_running) { + n->tx_waiting = 1; + return; + } + if (n->tx_waiting) { virtio_queue_set_notification(vq, 1); qemu_del_timer(n->tx_timer); @@ -758,14 +794,19 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) if (unlikely(n->tx_waiting)) { return; } + n->tx_waiting = 1; + /* This happens when device was stopped but VCPU wasn't. */ + if (!n->vm_running) { + return; + } virtio_queue_set_notification(vq, 0); qemu_bh_schedule(n->tx_bh); - n->tx_waiting = 1; } static void virtio_net_tx_timer(void *opaque) { VirtIONet *n = opaque; + assert(n->vm_running); n->tx_waiting = 0; @@ -782,6 +823,8 @@ static void virtio_net_tx_bh(void *opaque) VirtIONet *n = opaque; int32_t ret; + assert(n->vm_running); + n->tx_waiting = 0; /* Just in case the driver is not ready on more */ @@ -926,15 +969,6 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) } } n->mac_table.first_multi = i; - - if (n->tx_waiting) { - if (n->tx_timer) { - qemu_mod_timer(n->tx_timer, - qemu_get_clock(vm_clock) + n->tx_timeout); - } else { - qemu_bh_schedule(n->tx_bh); - } - } return 0; } From b1aeb92666d2fde413c34578b3b42bbfe5f2a506 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 26 Nov 2010 21:01:41 +0900 Subject: [PATCH 06/12] pci: make command SERR bit writable pcie aer needs SERR bit to be writable, and the PCI spec requires this as well. For compatibility, introduce compat global property command_serr_enable and make this bit readonly for a pre 0.14 pc machine. Signed-off-by: Isaku Yamahata Signed-off-by: Michael S. Tsirkin --- hw/pc_piix.c | 20 ++++++++++++++++++++ hw/pci.c | 5 +++++ hw/pci.h | 4 ++++ 3 files changed, 29 insertions(+) diff --git a/hw/pc_piix.c b/hw/pc_piix.c index 7d29d43190..a2fb554aa2 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -217,6 +217,14 @@ static QEMUMachine pc_machine = { .desc = "Standard PC", .init = pc_init_pci, .max_cpus = 255, + .compat_props = (GlobalProperty[]) { + { + .driver = "PCI", + .property = "command_serr_enable", + .value = "off", + }, + { /* end of list */ } + }, .is_default = 1, }; @@ -265,6 +273,10 @@ static QEMUMachine pc_machine_v0_12 = { .driver = "vmware-svga", .property = "rombar", .value = stringify(0), + },{ + .driver = "PCI", + .property = "command_serr_enable", + .value = "off", }, { /* end of list */ } } @@ -300,6 +312,10 @@ static QEMUMachine pc_machine_v0_11 = { .driver = "PCI", .property = "rombar", .value = stringify(0), + },{ + .driver = "PCI", + .property = "command_serr_enable", + .value = "off", }, { /* end of list */ } } @@ -347,6 +363,10 @@ static QEMUMachine pc_machine_v0_10 = { .driver = "PCI", .property = "rombar", .value = stringify(0), + },{ + .driver = "PCI", + .property = "command_serr_enable", + .value = "off", }, { /* end of list */ } }, diff --git a/hw/pci.c b/hw/pci.c index 0c15b13037..ca878e88cb 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -57,6 +57,8 @@ struct BusInfo pci_bus_info = { DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1), DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present, QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false), + DEFINE_PROP_BIT("command_serr_enable", PCIDevice, cap_present, + QEMU_PCI_CAP_SERR_BITNR, true), DEFINE_PROP_END_OF_LIST() } }; @@ -568,6 +570,9 @@ static void pci_init_wmask(PCIDevice *dev) pci_set_word(dev->wmask + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE); + if (dev->cap_present & QEMU_PCI_CAP_SERR) { + pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR); + } memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff, config_size - PCI_CONFIG_HEADER_SIZE); diff --git a/hw/pci.h b/hw/pci.h index 89f7b761e7..099c2517ae 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -118,6 +118,10 @@ enum { /* multifunction capable device */ #define QEMU_PCI_CAP_MULTIFUNCTION_BITNR 3 QEMU_PCI_CAP_MULTIFUNCTION = (1 << QEMU_PCI_CAP_MULTIFUNCTION_BITNR), + + /* command register SERR bit enabled */ +#define QEMU_PCI_CAP_SERR_BITNR 4 + QEMU_PCI_CAP_SERR = (1 << QEMU_PCI_CAP_SERR_BITNR), }; struct PCIDevice { From 4a9dd6658268a942a8ea230f950a951229355cbb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Dec 2010 17:46:23 +0900 Subject: [PATCH 07/12] pci: untangle pci/msi dependency msi depends on pci but pci should not depend on msi. The only dependency we have is a recent addition of pci_msi_ functions, IMO they add little enough to open-code in the small number of users. Follow-up patches add more cleanups. Signed-off-by: Michael S. Tsirkin Signed-off-by: Isaku Yamahata --- hw/pci.c | 19 ------------------- hw/pci.h | 3 --- hw/pcie.c | 8 +++++--- hw/pcie_aer.c | 24 ++++++++++++++++-------- 4 files changed, 21 insertions(+), 33 deletions(-) diff --git a/hw/pci.c b/hw/pci.c index ca878e88cb..254647bc34 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -25,8 +25,6 @@ #include "pci.h" #include "pci_bridge.h" #include "pci_internals.h" -#include "msix.h" -#include "msi.h" #include "monitor.h" #include "net.h" #include "sysemu.h" @@ -1099,23 +1097,6 @@ static void pci_set_irq(void *opaque, int irq_num, int level) pci_change_irq_level(pci_dev, irq_num, change); } -bool pci_msi_enabled(PCIDevice *dev) -{ - return msix_enabled(dev) || msi_enabled(dev); -} - -void pci_msi_notify(PCIDevice *dev, unsigned int vector) -{ - if (msix_enabled(dev)) { - msix_notify(dev, vector); - } else if (msi_enabled(dev)) { - msi_notify(dev, vector); - } else { - /* MSI/MSI-X must be enabled */ - abort(); - } -} - /***********************************************************/ /* monitor info on PCI */ diff --git a/hw/pci.h b/hw/pci.h index 099c2517ae..aa3afe9684 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -261,9 +261,6 @@ void do_pci_info_print(Monitor *mon, const QObject *data); void do_pci_info(Monitor *mon, QObject **ret_data); void pci_bridge_update_mappings(PCIBus *b); -bool pci_msi_enabled(PCIDevice *dev); -void pci_msi_notify(PCIDevice *dev, unsigned int vector); - static inline void pci_set_byte(uint8_t *config, uint8_t val) { diff --git a/hw/pcie.c b/hw/pcie.c index f461c1cfbe..d1f0086559 100644 --- a/hw/pcie.c +++ b/hw/pcie.c @@ -167,10 +167,12 @@ static void hotplug_event_notify(PCIDevice *dev) * The Port may optionally send an MSI when there are hot-plug events that * occur while interrupt generation is disabled, and interrupt generation is * subsequently enabled. */ - if (!pci_msi_enabled(dev)) { + if (msix_enabled(dev)) { + msix_notify(dev, pcie_cap_flags_get_vector(dev)); + } else if (msi_enabled(dev)) { + msi_notify(dev, pcie_cap_flags_get_vector(dev)); + } else { qemu_set_irq(dev->irq[dev->exp.hpev_intx], dev->exp.hpev_notified); - } else if (dev->exp.hpev_notified) { - pci_msi_notify(dev, pcie_cap_flags_get_vector(dev)); } } diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c index 47d64003fc..dac27c39fe 100644 --- a/hw/pcie_aer.c +++ b/hw/pcie_aer.c @@ -339,9 +339,13 @@ static bool pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) if (root_cmd & msg->severity) { /* 6.2.4.1.2 Interrupt Generation */ - if (pci_msi_enabled(dev)) { + if (msix_enabled(dev)) { if (msi_trigger) { - pci_msi_notify(dev, pcie_aer_root_get_vector(dev)); + msix_notify(dev, pcie_aer_root_get_vector(dev)); + } + } else if (msi_enabled(dev)) { + if (msi_trigger) { + msi_notify(dev, pcie_aer_root_get_vector(dev)); } } else { qemu_set_irq(dev->irq[dev->exp.aer_intx], 1); @@ -761,16 +765,20 @@ void pcie_aer_root_write_config(PCIDevice *dev, /* 6.2.4.1.2 Interrupt Generation */ /* 0 -> 1 */ - uint32_t root_cmd_set = (root_cmd_prev ^ root_cmd) & root_cmd; + uint32_t root_cmd_set = ~root_cmd_prev & root_cmd; uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); + bool trigger = pcie_aer_root_does_trigger(root_cmd_set, root_status); - if (pci_msi_enabled(dev)) { - if (pcie_aer_root_does_trigger(root_cmd_set, root_status)) { - pci_msi_notify(dev, pcie_aer_root_get_vector(dev)); + if (msix_enabled(dev)) { + if (trigger) { + msix_notify(dev, pcie_aer_root_get_vector(dev)); + } + } else if (msi_enabled(dev)) { + if (trigger) { + msi_notify(dev, pcie_aer_root_get_vector(dev)); } } else { - int int_level = pcie_aer_root_does_trigger(root_cmd, root_status); - qemu_set_irq(dev->irq[dev->exp.aer_intx], int_level); + qemu_set_irq(dev->irq[dev->exp.aer_intx], trigger); } } } From 624c716cc576ec5e6bed50da3deb45aa9537cacd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Dec 2010 17:46:24 +0900 Subject: [PATCH 08/12] Makefile: make msix/msi depend on CONFIG_PCI Possible now that pci is not depending on these. Signed-off-by: Michael S. Tsirkin --- Makefile.objs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile.objs b/Makefile.objs index 04625eb10f..d1e63ce8e3 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -168,7 +168,8 @@ hw-obj-$(CONFIG_VIRTIO) += virtio.o virtio-console.o hw-obj-y += fw_cfg.o # FIXME: Core PCI code and its direct dependencies are required by the # QMP query-pci command. -hw-obj-y += pci.o pci_bridge.o msix.o msi.o +hw-obj-y += pci.o pci_bridge.o +hw-obj-$(CONFIG_PCI) += msix.o msi.o hw-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o hw-obj-$(CONFIG_PCI) += ioh3420.o xio3130_upstream.o xio3130_downstream.o hw-obj-y += watchdog.o From c3f33667a64a6de0b92106c862247d97d81490ef Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Dec 2010 17:46:25 +0900 Subject: [PATCH 09/12] pci/aer: fix error injection Fix the injection logic upon aer message to follow 6.2.4.1.2 more closely: specifically only send an msi interrupt when the logical or of the enabled bits changed, not when a bit which was previously clear becomes set. Signed-off-by: Michael S. Tsirkin Signed-off-by: Isaku Yamahata --- hw/pcie_aer.c | 51 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c index dac27c39fe..9505c93343 100644 --- a/hw/pcie_aer.c +++ b/hw/pcie_aer.c @@ -257,6 +257,22 @@ static unsigned int pcie_aer_root_get_vector(PCIDevice *dev) return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT; } +/* Given a status register, get corresponding bits in the command register */ +static uint32_t pcie_aer_status_to_cmd(uint32_t status) +{ + uint32_t cmd = 0; + if (status & PCI_ERR_ROOT_COR_RCV) { + cmd |= PCI_ERR_ROOT_CMD_COR_EN; + } + if (status & PCI_ERR_ROOT_NONFATAL_RCV) { + cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN; + } + if (status & PCI_ERR_ROOT_FATAL_RCV) { + cmd |= PCI_ERR_ROOT_CMD_FATAL_EN; + } + return cmd; +} + /* * return value: * true: error message is sent up @@ -272,14 +288,14 @@ static bool pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) uint16_t cmd; uint8_t *aer_cap; uint32_t root_cmd; - uint32_t root_status; + uint32_t root_status, prev_status; bool msi_trigger; msg_sent = false; cmd = pci_get_word(dev->config + PCI_COMMAND); aer_cap = dev->config + dev->exp.aer_cap; root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND); - root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); + prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); msi_trigger = false; if (cmd & PCI_COMMAND_SERR) { @@ -337,20 +353,23 @@ static bool pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) } pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status); - if (root_cmd & msg->severity) { - /* 6.2.4.1.2 Interrupt Generation */ - if (msix_enabled(dev)) { - if (msi_trigger) { - msix_notify(dev, pcie_aer_root_get_vector(dev)); - } - } else if (msi_enabled(dev)) { - if (msi_trigger) { - msi_notify(dev, pcie_aer_root_get_vector(dev)); - } - } else { - qemu_set_irq(dev->irq[dev->exp.aer_intx], 1); - } - msg_sent = true; + /* 6.2.4.1.2 Interrupt Generation */ + /* All the above did was set some bits in the status register. + * Specifically these that match message severity. + * The below code relies on this fact. */ + if (!(root_cmd & msg->severity) || + (pcie_aer_status_to_cmd(prev_status) & root_cmd)) { + /* Condition is not being set or was already true so nothing to do. */ + return msg_sent; + } + + msg_sent = true; + if (msix_enabled(dev)) { + msix_notify(dev, pcie_aer_root_get_vector(dev)); + } else if (msi_enabled(dev)) { + msi_notify(dev, pcie_aer_root_get_vector(dev)); + } else { + qemu_set_irq(dev->irq[dev->exp.aer_intx], 1); } return msg_sent; } From 2b3cb353e7af7a90eec22ba9720dcef2a80c7f6f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Dec 2010 17:46:26 +0900 Subject: [PATCH 10/12] pci/aer: fix interrupt on config write config write handling for aer seems broken: For example, it won't clear a level interrupt when command register is set to 0. Make it match the spec: level should equal the logical or of enabled bits, msi only be sent when the logical or changes. Signed-off-by: Michael S. Tsirkin Signed-off-by: Isaku Yamahata --- hw/pcie_aer.c | 46 +++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c index 9505c93343..04b0f45047 100644 --- a/hw/pcie_aer.c +++ b/hw/pcie_aer.c @@ -762,43 +762,31 @@ void pcie_aer_root_reset(PCIDevice *dev) */ } -static bool pcie_aer_root_does_trigger(uint32_t cmd, uint32_t status) -{ - return - ((cmd & PCI_ERR_ROOT_CMD_COR_EN) && (status & PCI_ERR_ROOT_COR_RCV)) || - ((cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) && - (status & PCI_ERR_ROOT_NONFATAL_RCV)) || - ((cmd & PCI_ERR_ROOT_CMD_FATAL_EN) && - (status & PCI_ERR_ROOT_FATAL_RCV)); -} - void pcie_aer_root_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len, uint32_t root_cmd_prev) { uint8_t *aer_cap = dev->config + dev->exp.aer_cap; - - /* root command register */ + uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); + uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status); uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND); - if (root_cmd & PCI_ERR_ROOT_CMD_EN_MASK) { - /* 6.2.4.1.2 Interrupt Generation */ + /* 6.2.4.1.2 Interrupt Generation */ + if (!msix_enabled(dev) && !msi_enabled(dev)) { + qemu_set_irq(dev->irq[dev->exp.aer_intx], !!(root_cmd & enabled_cmd)); + return; + } - /* 0 -> 1 */ - uint32_t root_cmd_set = ~root_cmd_prev & root_cmd; - uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); - bool trigger = pcie_aer_root_does_trigger(root_cmd_set, root_status); + if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) { + /* Send MSI on transition from false to true. */ + return; + } - if (msix_enabled(dev)) { - if (trigger) { - msix_notify(dev, pcie_aer_root_get_vector(dev)); - } - } else if (msi_enabled(dev)) { - if (trigger) { - msi_notify(dev, pcie_aer_root_get_vector(dev)); - } - } else { - qemu_set_irq(dev->irq[dev->exp.aer_intx], trigger); - } + if (msix_enabled(dev)) { + msix_notify(dev, pcie_aer_root_get_vector(dev)); + } else if (msi_enabled(dev)) { + msi_notify(dev, pcie_aer_root_get_vector(dev)); + } else { + abort(); } } From 5f47c187d992d5147397ddd5323c732d3d39cb8f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Dec 2010 17:46:27 +0900 Subject: [PATCH 11/12] pci/aer: remove dead code Remove some unused variables and return values. Signed-off-by: Michael S. Tsirkin Signed-off-by: Isaku Yamahata --- hw/pcie_aer.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c index 04b0f45047..f24784ac98 100644 --- a/hw/pcie_aer.c +++ b/hw/pcie_aer.c @@ -274,29 +274,21 @@ static uint32_t pcie_aer_status_to_cmd(uint32_t status) } /* - * return value: - * true: error message is sent up - * false: error message is masked - * * 6.2.6 Error Message Control * Figure 6-3 * root port part */ -static bool pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) +static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) { - bool msg_sent; uint16_t cmd; uint8_t *aer_cap; uint32_t root_cmd; uint32_t root_status, prev_status; - bool msi_trigger; - msg_sent = false; cmd = pci_get_word(dev->config + PCI_COMMAND); aer_cap = dev->config + dev->exp.aer_cap; root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND); prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); - msi_trigger = false; if (cmd & PCI_COMMAND_SERR) { /* System Error. @@ -315,25 +307,14 @@ static bool pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) if (root_status & PCI_ERR_ROOT_COR_RCV) { root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; } else { - if (root_cmd & PCI_ERR_ROOT_CMD_COR_EN) { - msi_trigger = true; - } pci_set_word(aer_cap + PCI_ERR_ROOT_COR_SRC, msg->source_id); } root_status |= PCI_ERR_ROOT_COR_RCV; break; case PCI_ERR_ROOT_CMD_NONFATAL_EN: - if (!(root_status & PCI_ERR_ROOT_NONFATAL_RCV) && - root_cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) { - msi_trigger = true; - } root_status |= PCI_ERR_ROOT_NONFATAL_RCV; break; case PCI_ERR_ROOT_CMD_FATAL_EN: - if (!(root_status & PCI_ERR_ROOT_FATAL_RCV) && - root_cmd & PCI_ERR_ROOT_CMD_FATAL_EN) { - msi_trigger = true; - } if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) { root_status |= PCI_ERR_ROOT_FIRST_FATAL; } @@ -360,10 +341,9 @@ static bool pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) if (!(root_cmd & msg->severity) || (pcie_aer_status_to_cmd(prev_status) & root_cmd)) { /* Condition is not being set or was already true so nothing to do. */ - return msg_sent; + return; } - msg_sent = true; if (msix_enabled(dev)) { msix_notify(dev, pcie_aer_root_get_vector(dev)); } else if (msi_enabled(dev)) { @@ -371,7 +351,6 @@ static bool pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) } else { qemu_set_irq(dev->irq[dev->exp.aer_intx], 1); } - return msg_sent; } /* From 513691b7ff20262efe9aafb85c8dd4615588ad48 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Dec 2010 17:46:28 +0900 Subject: [PATCH 12/12] pci/aer: factor out common code Same logic is used to assert interrupts and send msix messages, so add a static functin for this. Signed-off-by: Michael S. Tsirkin --- hw/pcie_aer.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c index f24784ac98..cb97a95d61 100644 --- a/hw/pcie_aer.c +++ b/hw/pcie_aer.c @@ -273,6 +273,17 @@ static uint32_t pcie_aer_status_to_cmd(uint32_t status) return cmd; } +static void pcie_aer_root_notify(PCIDevice *dev) +{ + if (msix_enabled(dev)) { + msix_notify(dev, pcie_aer_root_get_vector(dev)); + } else if (msi_enabled(dev)) { + msi_notify(dev, pcie_aer_root_get_vector(dev)); + } else { + qemu_set_irq(dev->irq[dev->exp.aer_intx], 1); + } +} + /* * 6.2.6 Error Message Control * Figure 6-3 @@ -344,13 +355,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) return; } - if (msix_enabled(dev)) { - msix_notify(dev, pcie_aer_root_get_vector(dev)); - } else if (msi_enabled(dev)) { - msi_notify(dev, pcie_aer_root_get_vector(dev)); - } else { - qemu_set_irq(dev->irq[dev->exp.aer_intx], 1); - } + pcie_aer_root_notify(dev); } /* @@ -760,13 +765,7 @@ void pcie_aer_root_write_config(PCIDevice *dev, return; } - if (msix_enabled(dev)) { - msix_notify(dev, pcie_aer_root_get_vector(dev)); - } else if (msi_enabled(dev)) { - msi_notify(dev, pcie_aer_root_get_vector(dev)); - } else { - abort(); - } + pcie_aer_root_notify(dev); } static const VMStateDescription vmstate_pcie_aer_err = {