From 41e7313f878813efeac4a65680018efcaff322a9 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 26 Mar 2011 22:53:09 +0100 Subject: [PATCH 01/34] cirrus_vga: remove unneeded reset cirrus_reset is already called by the reset framework, so there is no need to call it in cirrus_init_common. Cc: Michael S. Tsirkin Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/cirrus_vga.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index 2724f7b480..bdf4c8b32d 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -3024,7 +3024,6 @@ static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci) s->vga.cursor_draw_line = cirrus_cursor_draw_line; qemu_register_reset(cirrus_reset, s); - cirrus_reset(s); } /*************************************** From 9ddf8437856539c352070dee0e9fb6a33ab6ff5c Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 1 Apr 2011 20:43:21 +0900 Subject: [PATCH 02/34] pci: add accessor function to get irq levels Introduce accessor function to know INTx levels. It will be used later by q35. Although piix_pci tracks the intx line levels, it can be eliminated by this helper function. Cc: Michael S. Tsirkin Signed-off-by: Isaku Yamahata Signed-off-by: Michael S. Tsirkin --- hw/pci.c | 7 +++++++ hw/pci.h | 1 + 2 files changed, 8 insertions(+) diff --git a/hw/pci.c b/hw/pci.c index 6b577e1e3e..3ee48715fc 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -126,6 +126,13 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); } +int pci_bus_get_irq_level(PCIBus *bus, int irq_num) +{ + assert(irq_num >= 0); + assert(irq_num < bus->nirq); + return !!bus->irq_count[irq_num]; +} + /* Update interrupt status bit in config space on interrupt * state change. */ static void pci_update_irq_status(PCIDevice *dev) diff --git a/hw/pci.h b/hw/pci.h index 52ee8c9c5b..a5f875d06b 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -234,6 +234,7 @@ void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent, PCIBus *pci_bus_new(DeviceState *parent, const char *name, uint8_t devfn_min); void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, void *irq_opaque, int nirq); +int pci_bus_get_irq_level(PCIBus *bus, int irq_num); void pci_bus_hotplug(PCIBus *bus, pci_hotplug_fn hotplug, DeviceState *dev); PCIBus *pci_register_bus(DeviceState *parent, const char *name, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, From e735b55a8c11dd455e31ccd4420e6c9485191d0c Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 1 Apr 2011 20:43:22 +0900 Subject: [PATCH 03/34] piix_pci: eliminate PIIX3State::pci_irq_levels PIIX3State::pci_irq_levels are redundant which is already tracked by PCIBus layer. So eliminate them. Cc: Juan Quintela Cc: Michael S. Tsirkin Signed-off-by: Isaku Yamahata Signed-off-by: Michael S. Tsirkin --- hw/piix_pci.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/hw/piix_pci.c b/hw/piix_pci.c index 358da58a80..35e420c31d 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -37,10 +37,14 @@ typedef PCIHostState I440FXState; +#define PIIX_NUM_PIRQS 4ULL /* PIRQ[A-D] */ + typedef struct PIIX3State { PCIDevice dev; - int pci_irq_levels[4]; qemu_irq *pic; + + /* This member isn't used. Just for save/load compatibility */ + int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS]; } PIIX3State; struct PCII440FXState { @@ -162,9 +166,11 @@ static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id) i440fx_update_memory_mappings(d); qemu_get_8s(f, &d->smm_enabled); - if (version_id == 2) - for (i = 0; i < 4; i++) - d->piix3->pci_irq_levels[i] = qemu_get_be32(f); + if (version_id == 2) { + for (i = 0; i < PIIX_NUM_PIRQS; i++) { + qemu_get_be32(f); /* dummy load for compatibility */ + } + } return 0; } @@ -236,7 +242,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq * piix3 = DO_UPCAST(PIIX3State, dev, pci_create_simple_multifunction(b, -1, true, "PIIX3")); piix3->pic = pic; - pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, piix3, 4); + pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, piix3, PIIX_NUM_PIRQS); (*pi440fx_state)->piix3 = piix3; *piix3_devfn = piix3->dev.devfn; @@ -256,8 +262,6 @@ static void piix3_set_irq(void *opaque, int irq_num, int level) int i, pic_irq, pic_level; PIIX3State *piix3 = opaque; - piix3->pci_irq_levels[irq_num] = level; - /* now we change the pic irq level according to the piix irq mappings */ /* XXX: optimize */ pic_irq = piix3->dev.config[0x60 + irq_num]; @@ -266,8 +270,9 @@ static void piix3_set_irq(void *opaque, int irq_num, int level) to it */ pic_level = 0; for (i = 0; i < 4; i++) { - if (pic_irq == piix3->dev.config[0x60 + i]) - pic_level |= piix3->pci_irq_levels[i]; + if (pic_irq == piix3->dev.config[0x60 + i]) { + pic_level |= pci_bus_get_irq_level(piix3->dev.bus, i); + } } qemu_set_irq(piix3->pic[pic_irq], pic_level); } @@ -309,8 +314,17 @@ static void piix3_reset(void *opaque) pci_conf[0xab] = 0x00; pci_conf[0xac] = 0x00; pci_conf[0xae] = 0x00; +} - memset(d->pci_irq_levels, 0, sizeof(d->pci_irq_levels)); +static void piix3_pre_save(void *opaque) +{ + int i; + PIIX3State *piix3 = opaque; + + for (i = 0; i < ARRAY_SIZE(piix3->pci_irq_levels_vmstate); i++) { + piix3->pci_irq_levels_vmstate[i] = + pci_bus_get_irq_level(piix3->dev.bus, i); + } } static const VMStateDescription vmstate_piix3 = { @@ -318,9 +332,11 @@ static const VMStateDescription vmstate_piix3 = { .version_id = 3, .minimum_version_id = 2, .minimum_version_id_old = 2, + .pre_save = piix3_pre_save, .fields = (VMStateField []) { VMSTATE_PCI_DEVICE(dev, PIIX3State), - VMSTATE_INT32_ARRAY_V(pci_irq_levels, PIIX3State, 4, 3), + VMSTATE_INT32_ARRAY_V(pci_irq_levels_vmstate, PIIX3State, + PIIX_NUM_PIRQS, 3), VMSTATE_END_OF_LIST() } }; From ab431c283e7055bcd6fb622f212bb29e84a6a134 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 1 Apr 2011 20:43:23 +0900 Subject: [PATCH 04/34] piix_pci: optimize set irq path optimize irq routing in piix_pic.c which has been a TODO. So far piix3 tracks each pirq level and checks whether a given pic pins is asserted by seeing if each pirq is mapped into the pic pin. This is independent on irq routing, but data path is on slow path. Given that irq routing is rarely changed and asserting pic pins is on data path, the path that asserts pic pins should be optimized and chainging irq routing should be on slow path. The new behavior with this patch series is to use bitmap which is addressed by pirq and pic pins with a given irq routing. When pirq is asserted, the bitmap is set and see if the pic pins is asserted by checking the bitmaps. When irq routing is changed, rebuild the bitmap and re-assert pic pins. test: - create VM with 4 e1000 nics in different pci slots (i.e. fn=0 for each e1000) Thus those e1000's INTA are connected to each PIRQ[A-D]. - run linux as guest and saw each devices triggers interrupt by seeing /proc/interrupts. And then confirmed that each PIRQ[A-D] surely asserted interrupts. Because irq 10 and 11 are shared by 4 e1000's, it only one NIC is activated with ifconfig ethN up/down when counting interrupts. Cc: Michael S. Tsirkin Signed-off-by: Isaku Yamahata Signed-off-by: Michael S. Tsirkin --- hw/piix_pci.c | 105 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 19 deletions(-) diff --git a/hw/piix_pci.c b/hw/piix_pci.c index 35e420c31d..7ffb8218ce 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -37,10 +37,27 @@ typedef PCIHostState I440FXState; +#define PIIX_NUM_PIC_IRQS 16 /* i8259 * 2 */ #define PIIX_NUM_PIRQS 4ULL /* PIRQ[A-D] */ +#define PIIX_PIRQC 0x60 typedef struct PIIX3State { PCIDevice dev; + + /* + * bitmap to track pic levels. + * The pic level is the logical OR of all the PCI irqs mapped to it + * So one PIC level is tracked by PIIX_NUM_PIRQS bits. + * + * PIRQ is mapped to PIC pins, we track it by + * PIIX_NUM_PIRQS * PIIX_NUM_PIC_IRQS = 64 bits with + * pic_irq * PIIX_NUM_PIRQS + pirq + */ +#if PIIX_NUM_PIC_IRQS * PIIX_NUM_PIRQS > 64 +#error "unable to encode pic state in 64bit in pic_levels." +#endif + uint64_t pic_levels; + qemu_irq *pic; /* This member isn't used. Just for save/load compatibility */ @@ -59,16 +76,16 @@ struct PCII440FXState { #define I440FX_PAM_SIZE 7 #define I440FX_SMRAM 0x72 -static void piix3_set_irq(void *opaque, int irq_num, int level); +static void piix3_set_irq(void *opaque, int pirq, int level); /* return the global irq number corresponding to a given device irq pin. We could also use the bus number to have a more precise mapping. */ -static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) +static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx) { int slot_addend; slot_addend = (pci_dev->devfn >> 3) - 1; - return (irq_num + slot_addend) & 3; + return (pci_intx + slot_addend) & 3; } static void update_pam(PCII440FXState *d, uint32_t start, uint32_t end, int r) @@ -256,25 +273,64 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq * } /* PIIX3 PCI to ISA bridge */ - -static void piix3_set_irq(void *opaque, int irq_num, int level) +static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq) { - int i, pic_irq, pic_level; - PIIX3State *piix3 = opaque; + qemu_set_irq(piix3->pic[pic_irq], + !!(piix3->pic_levels & + (((1UL << PIIX_NUM_PIRQS) - 1) << + (pic_irq * PIIX_NUM_PIRQS)))); +} - /* now we change the pic irq level according to the piix irq mappings */ - /* XXX: optimize */ - pic_irq = piix3->dev.config[0x60 + irq_num]; - if (pic_irq < 16) { - /* The pic level is the logical OR of all the PCI irqs mapped - to it */ - pic_level = 0; - for (i = 0; i < 4; i++) { - if (pic_irq == piix3->dev.config[0x60 + i]) { - pic_level |= pci_bus_get_irq_level(piix3->dev.bus, i); - } +static void piix3_set_irq_level(PIIX3State *piix3, int pirq, int level, + bool propagate) +{ + int pic_irq; + uint64_t mask; + + pic_irq = piix3->dev.config[PIIX_PIRQC + pirq]; + if (pic_irq >= PIIX_NUM_PIC_IRQS) { + return; + } + + mask = 1ULL << ((pic_irq * PIIX_NUM_PIRQS) + pirq); + piix3->pic_levels &= ~mask; + piix3->pic_levels |= mask * !!level; + + if (propagate) { + piix3_set_irq_pic(piix3, pic_irq); + } +} + +static void piix3_set_irq(void *opaque, int pirq, int level) +{ + PIIX3State *piix3 = opaque; + piix3_set_irq_level(piix3, pirq, level, true); +} + +/* irq routing is changed. so rebuild bitmap */ +static void piix3_update_irq_levels(PIIX3State *piix3) +{ + int pirq; + + piix3->pic_levels = 0; + for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) { + piix3_set_irq_level(piix3, pirq, + pci_bus_get_irq_level(piix3->dev.bus, pirq), + false); + } +} + +static void piix3_write_config(PCIDevice *dev, + uint32_t address, uint32_t val, int len) +{ + pci_default_write_config(dev, address, val, len); + if (ranges_overlap(address, len, PIIX_PIRQC, 4)) { + PIIX3State *piix3 = DO_UPCAST(PIIX3State, dev, dev); + int pic_irq; + piix3_update_irq_levels(piix3); + for (pic_irq = 0; pic_irq < PIIX_NUM_PIC_IRQS; pic_irq++) { + piix3_set_irq_pic(piix3, pic_irq); } - qemu_set_irq(piix3->pic[pic_irq], pic_level); } } @@ -314,6 +370,15 @@ static void piix3_reset(void *opaque) pci_conf[0xab] = 0x00; pci_conf[0xac] = 0x00; pci_conf[0xae] = 0x00; + + d->pic_levels = 0; +} + +static int piix3_post_load(void *opaque, int version_id) +{ + PIIX3State *piix3 = opaque; + piix3_update_irq_levels(piix3); + return 0; } static void piix3_pre_save(void *opaque) @@ -332,6 +397,7 @@ static const VMStateDescription vmstate_piix3 = { .version_id = 3, .minimum_version_id = 2, .minimum_version_id_old = 2, + .post_load = piix3_post_load, .pre_save = piix3_pre_save, .fields = (VMStateField []) { VMSTATE_PCI_DEVICE(dev, PIIX3State), @@ -375,6 +441,7 @@ static PCIDeviceInfo i440fx_info[] = { .qdev.no_user = 1, .no_hotplug = 1, .init = piix3_initfn, + .config_write = piix3_write_config, },{ /* end of list */ } From afe3ef1d01dd767b6824c0580ecb92f6a27b75cc Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 1 Apr 2011 20:43:24 +0900 Subject: [PATCH 05/34] piix_pci: load path clean up The previous patch didn't change the behavior when load, it resulted in ugly code. This patch cleans it up. With this patch, pic irq lines are manipulated when loaded. It is expected that it won't change the behaviour because the interrupts are level: at the moment e.g. pci devices already reassert interrupts on load. Test: - rung linux as guest and use flooding ping (ping -f) to host in order to trigger interrupts for e1000 emulated. - savevm/loadvm and see guest kept running after loadvm. To be honest, I'm not sure that ping -f caused enough interrupts because Linux e1000 driver supports NAPI. TODO: test more OSes, stress test with save/load, live-migration Signed-off-by: Isaku Yamahata Signed-off-by: Michael S. Tsirkin --- hw/piix_pci.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hw/piix_pci.c b/hw/piix_pci.c index 7ffb8218ce..5f0d92f10d 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -281,8 +281,7 @@ static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq) (pic_irq * PIIX_NUM_PIRQS)))); } -static void piix3_set_irq_level(PIIX3State *piix3, int pirq, int level, - bool propagate) +static void piix3_set_irq_level(PIIX3State *piix3, int pirq, int level) { int pic_irq; uint64_t mask; @@ -296,15 +295,13 @@ static void piix3_set_irq_level(PIIX3State *piix3, int pirq, int level, piix3->pic_levels &= ~mask; piix3->pic_levels |= mask * !!level; - if (propagate) { - piix3_set_irq_pic(piix3, pic_irq); - } + piix3_set_irq_pic(piix3, pic_irq); } static void piix3_set_irq(void *opaque, int pirq, int level) { PIIX3State *piix3 = opaque; - piix3_set_irq_level(piix3, pirq, level, true); + piix3_set_irq_level(piix3, pirq, level); } /* irq routing is changed. so rebuild bitmap */ @@ -315,8 +312,7 @@ static void piix3_update_irq_levels(PIIX3State *piix3) piix3->pic_levels = 0; for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) { piix3_set_irq_level(piix3, pirq, - pci_bus_get_irq_level(piix3->dev.bus, pirq), - false); + pci_bus_get_irq_level(piix3->dev.bus, pirq)); } } From 0fd542fb7d13ddf12f897bb27c5950f31638b1df Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 6 Apr 2011 22:25:38 +0300 Subject: [PATCH 06/34] cpu: add set_memory flag to request dirty logging Pass the flag to all cpu notifiers, doing nothing at this point. Will be used by follow-up patches. Signed-off-by: Michael S. Tsirkin --- cpu-common.h | 22 +++++++++++++++++----- exec.c | 14 ++++++++------ hw/vhost.c | 3 ++- kvm-all.c | 3 ++- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/cpu-common.h b/cpu-common.h index ef4e8dab7a..c239cc0def 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -34,10 +34,21 @@ typedef unsigned long ram_addr_t; typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value); typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr); -void cpu_register_physical_memory_offset(target_phys_addr_t start_addr, - ram_addr_t size, - ram_addr_t phys_offset, - ram_addr_t region_offset); +void cpu_register_physical_memory_log(target_phys_addr_t start_addr, + ram_addr_t size, + ram_addr_t phys_offset, + ram_addr_t region_offset, + bool log_dirty); + +static inline void cpu_register_physical_memory_offset(target_phys_addr_t start_addr, + ram_addr_t size, + ram_addr_t phys_offset, + ram_addr_t region_offset) +{ + cpu_register_physical_memory_log(start_addr, size, phys_offset, + region_offset, false); +} + static inline void cpu_register_physical_memory(target_phys_addr_t start_addr, ram_addr_t size, ram_addr_t phys_offset) @@ -91,7 +102,8 @@ struct CPUPhysMemoryClient { void (*set_memory)(struct CPUPhysMemoryClient *client, target_phys_addr_t start_addr, ram_addr_t size, - ram_addr_t phys_offset); + ram_addr_t phys_offset, + bool log_dirty); int (*sync_dirty_bitmap)(struct CPUPhysMemoryClient *client, target_phys_addr_t start_addr, target_phys_addr_t end_addr); diff --git a/exec.c b/exec.c index 964ce318fb..d1a066c5ac 100644 --- a/exec.c +++ b/exec.c @@ -1711,11 +1711,12 @@ static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list static void cpu_notify_set_memory(target_phys_addr_t start_addr, ram_addr_t size, - ram_addr_t phys_offset) + ram_addr_t phys_offset, + bool log_dirty) { CPUPhysMemoryClient *client; QLIST_FOREACH(client, &memory_client_list, list) { - client->set_memory(client, start_addr, size, phys_offset); + client->set_memory(client, start_addr, size, phys_offset, log_dirty); } } @@ -1755,7 +1756,7 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client, for (i = 0; i < L2_SIZE; ++i) { if (pd[i].phys_offset != IO_MEM_UNASSIGNED) { client->set_memory(client, pd[i].region_offset, - TARGET_PAGE_SIZE, pd[i].phys_offset); + TARGET_PAGE_SIZE, pd[i].phys_offset, false); } } } else { @@ -2600,10 +2601,11 @@ static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys, start_addr and region_offset are rounded down to a page boundary before calculating this offset. This should not be a problem unless the low bits of start_addr and region_offset differ. */ -void cpu_register_physical_memory_offset(target_phys_addr_t start_addr, +void cpu_register_physical_memory_log(target_phys_addr_t start_addr, ram_addr_t size, ram_addr_t phys_offset, - ram_addr_t region_offset) + ram_addr_t region_offset, + bool log_dirty) { target_phys_addr_t addr, end_addr; PhysPageDesc *p; @@ -2611,7 +2613,7 @@ void cpu_register_physical_memory_offset(target_phys_addr_t start_addr, ram_addr_t orig_size = size; subpage_t *subpage; - cpu_notify_set_memory(start_addr, size, phys_offset); + cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty); if (phys_offset == IO_MEM_UNASSIGNED) { region_offset = start_addr; diff --git a/hw/vhost.c b/hw/vhost.c index 14b571d07c..dc3d0e2834 100644 --- a/hw/vhost.c +++ b/hw/vhost.c @@ -300,7 +300,8 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, static void vhost_client_set_memory(CPUPhysMemoryClient *client, target_phys_addr_t start_addr, ram_addr_t size, - ram_addr_t phys_offset) + ram_addr_t phys_offset, + bool log_dirty) { struct vhost_dev *dev = container_of(client, struct vhost_dev, client); ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK; diff --git a/kvm-all.c b/kvm-all.c index 1d7e8eabf4..1647e1a273 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -625,7 +625,8 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, static void kvm_client_set_memory(struct CPUPhysMemoryClient *client, target_phys_addr_t start_addr, - ram_addr_t size, ram_addr_t phys_offset) + ram_addr_t size, ram_addr_t phys_offset, + bool log_dirty) { kvm_set_phys_mem(start_addr, size, phys_offset); } From 25254bbc4d91408b6be706d095e3fbc60a972db4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 6 Apr 2011 22:09:54 +0300 Subject: [PATCH 07/34] kvm: halve number of set memory calls for vga use the new api to reduce the number of these (expensive) system calls. Note: using this API, we should be able to get rid of vga_dirty_log_xxx APIs. Using them doesn't affect the performance though because we detects the log_dirty flag set and ignores the call. Signed-off-by: Michael S. Tsirkin --- kvm-all.c | 59 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 1647e1a273..7ace9a2d7c 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -245,48 +245,60 @@ err: /* * dirty pages logging control */ -static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, - ram_addr_t size, int flags, int mask) + +static int kvm_mem_flags(KVMState *s, bool log_dirty) +{ + return log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0; +} + +static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty) { KVMState *s = kvm_state; - KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size); + int flags, mask = KVM_MEM_LOG_DIRTY_PAGES; int old_flags; - if (mem == NULL) { - fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-" - TARGET_FMT_plx "\n", __func__, phys_addr, - (target_phys_addr_t)(phys_addr + size - 1)); - return -EINVAL; - } - old_flags = mem->flags; - flags = (mem->flags & ~mask) | flags; + flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty); mem->flags = flags; /* If nothing changed effectively, no need to issue ioctl */ if (s->migration_log) { flags |= KVM_MEM_LOG_DIRTY_PAGES; } + if (flags == old_flags) { - return 0; + return 0; } return kvm_set_user_memory_region(s, mem); } +static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, + ram_addr_t size, bool log_dirty) +{ + KVMState *s = kvm_state; + KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size); + + if (mem == NULL) { + fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-" + TARGET_FMT_plx "\n", __func__, phys_addr, + (target_phys_addr_t)(phys_addr + size - 1)); + return -EINVAL; + } + return kvm_slot_dirty_pages_log_change(mem, log_dirty); +} + static int kvm_log_start(CPUPhysMemoryClient *client, target_phys_addr_t phys_addr, ram_addr_t size) { - return kvm_dirty_pages_log_change(phys_addr, size, KVM_MEM_LOG_DIRTY_PAGES, - KVM_MEM_LOG_DIRTY_PAGES); + return kvm_dirty_pages_log_change(phys_addr, size, true); } static int kvm_log_stop(CPUPhysMemoryClient *client, target_phys_addr_t phys_addr, ram_addr_t size) { - return kvm_dirty_pages_log_change(phys_addr, size, 0, - KVM_MEM_LOG_DIRTY_PAGES); + return kvm_dirty_pages_log_change(phys_addr, size, false); } static int kvm_set_migration_log(int enable) @@ -495,7 +507,7 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list) } static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, - ram_addr_t phys_offset) + ram_addr_t phys_offset, bool log_dirty) { KVMState *s = kvm_state; ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK; @@ -520,7 +532,8 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, (start_addr + size <= mem->start_addr + mem->memory_size) && (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) { /* The new slot fits into the existing one and comes with - * identical parameters - nothing to be done. */ + * identical parameters - update flags and done. */ + kvm_slot_dirty_pages_log_change(mem, log_dirty); return; } @@ -550,7 +563,7 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, mem->memory_size = old.memory_size; mem->start_addr = old.start_addr; mem->phys_offset = old.phys_offset; - mem->flags = 0; + mem->flags = kvm_mem_flags(s, log_dirty); err = kvm_set_user_memory_region(s, mem); if (err) { @@ -571,7 +584,7 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, mem->memory_size = start_addr - old.start_addr; mem->start_addr = old.start_addr; mem->phys_offset = old.phys_offset; - mem->flags = 0; + mem->flags = kvm_mem_flags(s, log_dirty); err = kvm_set_user_memory_region(s, mem); if (err) { @@ -590,7 +603,7 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, size_delta = mem->start_addr - old.start_addr; mem->memory_size = old.memory_size - size_delta; mem->phys_offset = old.phys_offset + size_delta; - mem->flags = 0; + mem->flags = kvm_mem_flags(s, log_dirty); err = kvm_set_user_memory_region(s, mem); if (err) { @@ -613,7 +626,7 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, mem->memory_size = size; mem->start_addr = start_addr; mem->phys_offset = phys_offset; - mem->flags = 0; + mem->flags = kvm_mem_flags(s, log_dirty); err = kvm_set_user_memory_region(s, mem); if (err) { @@ -628,7 +641,7 @@ static void kvm_client_set_memory(struct CPUPhysMemoryClient *client, ram_addr_t size, ram_addr_t phys_offset, bool log_dirty) { - kvm_set_phys_mem(start_addr, size, phys_offset); + kvm_set_phys_mem(start_addr, size, phys_offset, log_dirty); } static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client, From f5a4e64f8e12d088a11b9a2743875cc88671e13a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 6 Apr 2011 22:30:24 +0300 Subject: [PATCH 08/34] vhost: skip memory which needs dirty logging vhost doesn't support write logging (except for migration), anyway. Signed-off-by: Michael S. Tsirkin --- hw/vhost.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/vhost.c b/hw/vhost.c index dc3d0e2834..257e3dd685 100644 --- a/hw/vhost.c +++ b/hw/vhost.c @@ -311,6 +311,10 @@ static void vhost_client_set_memory(CPUPhysMemoryClient *client, int r; dev->mem = qemu_realloc(dev->mem, s); + if (log_dirty) { + flags = IO_MEM_UNASSIGNED; + } + assert(size); vhost_dev_unassign_memory(dev, start_addr, size); From 4e789564d30a9c5f9408657857560a88386b0ac4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 31 Mar 2011 15:45:51 +0200 Subject: [PATCH 09/34] vhost: optimize out no-change assignment Cirrus VGA (at least) calls register memory region with the same values again and again. The registration in vhost-net slows this a lot, optimize by checking that the same data is already registered. Signed-off-by: Michael S. Tsirkin --- hw/vhost.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/hw/vhost.c b/hw/vhost.c index 257e3dd685..80f771e448 100644 --- a/hw/vhost.c +++ b/hw/vhost.c @@ -297,6 +297,45 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, return 0; } +static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev, + uint64_t start_addr, + uint64_t size) +{ + int i, n = dev->mem->nregions; + for (i = 0; i < n; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + if (ranges_overlap(reg->guest_phys_addr, reg->memory_size, + start_addr, size)) { + return reg; + } + } + return NULL; +} + +static bool vhost_dev_cmp_memory(struct vhost_dev *dev, + uint64_t start_addr, + uint64_t size, + uint64_t uaddr) +{ + struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size); + uint64_t reglast; + uint64_t memlast; + + if (!reg) { + return true; + } + + reglast = range_get_last(reg->guest_phys_addr, reg->memory_size); + memlast = range_get_last(start_addr, size); + + /* Need to extend region? */ + if (start_addr < reg->guest_phys_addr || memlast > reglast) { + return true; + } + /* userspace_addr changed? */ + return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr; +} + static void vhost_client_set_memory(CPUPhysMemoryClient *client, target_phys_addr_t start_addr, ram_addr_t size, @@ -309,6 +348,7 @@ static void vhost_client_set_memory(CPUPhysMemoryClient *client, (dev->mem->nregions + 1) * sizeof dev->mem->regions[0]; uint64_t log_size; int r; + dev->mem = qemu_realloc(dev->mem, s); if (log_dirty) { @@ -317,6 +357,20 @@ static void vhost_client_set_memory(CPUPhysMemoryClient *client, assert(size); + /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */ + if (flags == IO_MEM_RAM) { + if (!vhost_dev_cmp_memory(dev, start_addr, size, + (uintptr_t)qemu_get_ram_ptr(phys_offset))) { + /* Region exists with same address. Nothing to do. */ + return; + } + } else { + if (!vhost_dev_find_reg(dev, start_addr, size)) { + /* Removing region that we don't access. Nothing to do. */ + return; + } + } + vhost_dev_unassign_memory(dev, start_addr, size); if (flags == IO_MEM_RAM) { /* Add given mapping, merging adjacent regions if any */ From af94482bcee9640d9f8a6aa06104d8456bbe0d7f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 6 Apr 2011 22:54:18 +0300 Subject: [PATCH 10/34] cirrus_vga: flag on-device ram for dirty logging Signed-off-by: Michael S. Tsirkin --- hw/cirrus_vga.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index bdf4c8b32d..7212849567 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -2489,7 +2489,9 @@ static void map_linear_vram(CirrusVGAState *s) if (!s->vga.map_addr && s->vga.lfb_addr && s->vga.lfb_end) { s->vga.map_addr = s->vga.lfb_addr; s->vga.map_end = s->vga.lfb_end; - cpu_register_physical_memory(s->vga.map_addr, s->vga.map_end - s->vga.map_addr, s->vga.vram_offset); + cpu_register_physical_memory_log(s->vga.map_addr, + s->vga.map_end - s->vga.map_addr, + s->vga.vram_offset, 0, true); } if (!s->vga.map_addr) @@ -2502,10 +2504,14 @@ static void map_linear_vram(CirrusVGAState *s) && !((s->vga.gr[0x0B] & 0x14) == 0x14) && !(s->vga.gr[0x0B] & 0x02)) { - cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x8000, - (s->vga.vram_offset + s->cirrus_bank_base[0]) | IO_MEM_RAM); - cpu_register_physical_memory(isa_mem_base + 0xa8000, 0x8000, - (s->vga.vram_offset + s->cirrus_bank_base[1]) | IO_MEM_RAM); + cpu_register_physical_memory_log(isa_mem_base + 0xa0000, 0x8000, + (s->vga.vram_offset + + s->cirrus_bank_base[0]) | + IO_MEM_RAM, 0, true); + cpu_register_physical_memory_log(isa_mem_base + 0xa8000, 0x8000, + (s->vga.vram_offset + + s->cirrus_bank_base[1]) | + IO_MEM_RAM, 0, true); s->vga.lfb_vram_mapped = 1; } From 17cbcb0bf79c605aecaab3661dc8bad627e4cb3b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:27:58 +0300 Subject: [PATCH 11/34] pci: add pci_register_bar_simple() API This is similar to pci_register_bar(), but automatically registers a single memory region spanning the entire BAR. Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/pci.c | 17 +++++++++++++++++ hw/pci.h | 3 +++ 2 files changed, 20 insertions(+) diff --git a/hw/pci.c b/hw/pci.c index 3ee48715fc..410b67bace 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -866,6 +866,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, r->filtered_size = size; r->type = type; r->map_func = map_func; + r->ram_addr = IO_MEM_UNASSIGNED; wmask = ~(size - 1); addr = pci_bar(pci_dev, region_num); @@ -884,6 +885,22 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, } } +static void pci_simple_bar_mapfunc(PCIDevice *pci_dev, int region_num, + pcibus_t addr, pcibus_t size, int type) +{ + cpu_register_physical_memory(addr, size, + pci_dev->io_regions[region_num].ram_addr); +} + +void pci_register_bar_simple(PCIDevice *pci_dev, int region_num, + pcibus_t size, uint8_t attr, ram_addr_t ram_addr) +{ + pci_register_bar(pci_dev, region_num, size, + PCI_BASE_ADDRESS_SPACE_MEMORY | attr, + pci_simple_bar_mapfunc); + pci_dev->io_regions[region_num].ram_addr = ram_addr; +} + static void pci_bridge_filter(PCIDevice *d, pcibus_t *addr, pcibus_t *size, uint8_t type) { diff --git a/hw/pci.h b/hw/pci.h index a5f875d06b..c6a6eb67b6 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -92,6 +92,7 @@ typedef struct PCIIORegion { pcibus_t filtered_size; uint8_t type; PCIMapIORegionFunc *map_func; + ram_addr_t ram_addr; } PCIIORegion; #define PCI_ROM_SLOT 6 @@ -200,6 +201,8 @@ PCIDevice *pci_register_device(PCIBus *bus, const char *name, void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size, uint8_t type, PCIMapIORegionFunc *map_func); +void pci_register_bar_simple(PCIDevice *pci_dev, int region_num, + pcibus_t size, uint8_t attr, ram_addr_t ram_addr); int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t offset, uint8_t size); From f5de212c4c022dd5eb49e9223201bb702d9f33ab Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:27:59 +0300 Subject: [PATCH 12/34] rtl8139: convert to pci_register_bar_simple() Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/rtl8139.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/hw/rtl8139.c b/hw/rtl8139.c index d5459336e5..822038daae 100644 --- a/hw/rtl8139.c +++ b/hw/rtl8139.c @@ -3341,14 +3341,6 @@ static const VMStateDescription vmstate_rtl8139 = { /***********************************************************/ /* PCI RTL8139 definitions */ -static void rtl8139_mmio_map(PCIDevice *pci_dev, int region_num, - pcibus_t addr, pcibus_t size, int type) -{ - RTL8139State *s = DO_UPCAST(RTL8139State, dev, pci_dev); - - cpu_register_physical_memory(addr + 0, 0x100, s->rtl8139_mmio_io_addr); -} - static void rtl8139_ioport_map(PCIDevice *pci_dev, int region_num, pcibus_t addr, pcibus_t size, int type) { @@ -3444,8 +3436,7 @@ static int pci_rtl8139_init(PCIDevice *dev) pci_register_bar(&s->dev, 0, 0x100, PCI_BASE_ADDRESS_SPACE_IO, rtl8139_ioport_map); - pci_register_bar(&s->dev, 1, 0x100, - PCI_BASE_ADDRESS_SPACE_MEMORY, rtl8139_mmio_map); + pci_register_bar_simple(&s->dev, 1, 0x100, 0, s->rtl8139_mmio_io_addr); qemu_macaddr_default_if_unset(&s->conf.macaddr); From e30376da4b7ba5c5d0ea7ce49d775ce38e5194c8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:28:00 +0300 Subject: [PATCH 13/34] cirrus-vga: convert to pci_register_bar_simple() Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/cirrus_vga.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index 7212849567..722cac7544 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -3081,15 +3081,6 @@ static void cirrus_pci_lfb_map(PCIDevice *d, int region_num, vga_dirty_log_start(&s->vga); } -static void cirrus_pci_mmio_map(PCIDevice *d, int region_num, - pcibus_t addr, pcibus_t size, int type) -{ - CirrusVGAState *s = &DO_UPCAST(PCICirrusVGAState, dev, d)->cirrus_vga; - - cpu_register_physical_memory(addr, CIRRUS_PNPMMIO_SIZE, - s->cirrus_mmio_io_addr); -} - static void pci_cirrus_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { @@ -3128,8 +3119,8 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev) pci_register_bar(&d->dev, 0, 0x2000000, PCI_BASE_ADDRESS_MEM_PREFETCH, cirrus_pci_lfb_map); if (device_id == CIRRUS_ID_CLGD5446) { - pci_register_bar(&d->dev, 1, CIRRUS_PNPMMIO_SIZE, - PCI_BASE_ADDRESS_SPACE_MEMORY, cirrus_pci_mmio_map); + pci_register_bar_simple(&d->dev, 1, CIRRUS_PNPMMIO_SIZE, 0, + s->cirrus_mmio_io_addr); } return 0; } From 22ec60937a8004fc10597b78fdfe5014bb0c37bc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:28:01 +0300 Subject: [PATCH 14/34] eepro100: convert to pci_register_bar_simple() Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 43 +++++++++++++------------------------------ 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/hw/eepro100.c b/hw/eepro100.c index edf48f61d1..f2505e4e9b 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -228,7 +228,7 @@ typedef struct { uint8_t scb_stat; /* SCB stat/ack byte */ uint8_t int_stat; /* PCI interrupt status */ /* region must not be saved by nic_save. */ - uint32_t region[3]; /* PCI region addresses */ + uint32_t region1; /* PCI region 1 address */ uint16_t mdimem[32]; eeprom_t *eeprom; uint32_t device; /* device variant */ @@ -1488,19 +1488,19 @@ static uint32_t ioport_read1(void *opaque, uint32_t addr) #if 0 logout("addr=%s\n", regname(addr)); #endif - return eepro100_read1(s, addr - s->region[1]); + return eepro100_read1(s, addr - s->region1); } static uint32_t ioport_read2(void *opaque, uint32_t addr) { EEPRO100State *s = opaque; - return eepro100_read2(s, addr - s->region[1]); + return eepro100_read2(s, addr - s->region1); } static uint32_t ioport_read4(void *opaque, uint32_t addr) { EEPRO100State *s = opaque; - return eepro100_read4(s, addr - s->region[1]); + return eepro100_read4(s, addr - s->region1); } static void ioport_write1(void *opaque, uint32_t addr, uint32_t val) @@ -1509,19 +1509,19 @@ static void ioport_write1(void *opaque, uint32_t addr, uint32_t val) #if 0 logout("addr=%s val=0x%02x\n", regname(addr), val); #endif - eepro100_write1(s, addr - s->region[1], val); + eepro100_write1(s, addr - s->region1, val); } static void ioport_write2(void *opaque, uint32_t addr, uint32_t val) { EEPRO100State *s = opaque; - eepro100_write2(s, addr - s->region[1], val); + eepro100_write2(s, addr - s->region1, val); } static void ioport_write4(void *opaque, uint32_t addr, uint32_t val) { EEPRO100State *s = opaque; - eepro100_write4(s, addr - s->region[1], val); + eepro100_write4(s, addr - s->region1, val); } /***********************************************************/ @@ -1544,7 +1544,7 @@ static void pci_map(PCIDevice * pci_dev, int region_num, register_ioport_write(addr, size, 4, ioport_write4, s); register_ioport_read(addr, size, 4, ioport_read4, s); - s->region[region_num] = addr; + s->region1 = addr; } /***************************************************************************** @@ -1619,22 +1619,6 @@ static CPUReadMemoryFunc * const pci_mmio_read[] = { pci_mmio_readl }; -static void pci_mmio_map(PCIDevice * pci_dev, int region_num, - pcibus_t addr, pcibus_t size, int type) -{ - EEPRO100State *s = DO_UPCAST(EEPRO100State, dev, pci_dev); - - TRACE(OTHER, logout("region %d, addr=0x%08"FMT_PCIBUS", " - "size=0x%08"FMT_PCIBUS", type=%d\n", - region_num, addr, size, type)); - - assert(region_num == 0 || region_num == 2); - - /* Map control / status registers and flash. */ - cpu_register_physical_memory(addr, size, s->mmio_index); - s->region[region_num] = addr; -} - static int nic_can_receive(VLANClientState *nc) { EEPRO100State *s = DO_UPCAST(NICState, nc, nc)->opaque; @@ -1882,17 +1866,16 @@ static int e100_nic_init(PCIDevice *pci_dev) cpu_register_io_memory(pci_mmio_read, pci_mmio_write, s, DEVICE_NATIVE_ENDIAN); - pci_register_bar(&s->dev, 0, PCI_MEM_SIZE, - PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_PREFETCH, pci_mmio_map); + pci_register_bar_simple(&s->dev, 0, PCI_MEM_SIZE, + PCI_BASE_ADDRESS_MEM_PREFETCH, s->mmio_index); + pci_register_bar(&s->dev, 1, PCI_IO_SIZE, PCI_BASE_ADDRESS_SPACE_IO, pci_map); - pci_register_bar(&s->dev, 2, PCI_FLASH_SIZE, PCI_BASE_ADDRESS_SPACE_MEMORY, - pci_mmio_map); + pci_register_bar_simple(&s->dev, 2, PCI_FLASH_SIZE, 0, s->mmio_index); qemu_macaddr_default_if_unset(&s->conf.macaddr); logout("macaddr: %s\n", nic_dump(&s->conf.macaddr.a[0], 6)); - assert(s->region[1] == 0); + assert(s->region1 == 0); nic_reset(s); From d28ca60a47f59bc6e36d9c3167ed59b866e63630 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:28:03 +0300 Subject: [PATCH 15/34] hda-intel: convert to pci_register_bar_simple() Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/intel-hda.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/hw/intel-hda.c b/hw/intel-hda.c index b0b1d1292a..7f83745d1a 100644 --- a/hw/intel-hda.c +++ b/hw/intel-hda.c @@ -1109,14 +1109,6 @@ static CPUWriteMemoryFunc * const intel_hda_mmio_write[3] = { intel_hda_mmio_writel, }; -static void intel_hda_map(PCIDevice *pci, int region_num, - pcibus_t addr, pcibus_t size, int type) -{ - IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci); - - cpu_register_physical_memory(addr, 0x4000, d->mmio_addr); -} - /* --------------------------------------------------------------------- */ static void intel_hda_reset(DeviceState *dev) @@ -1158,8 +1150,7 @@ static int intel_hda_init(PCIDevice *pci) d->mmio_addr = cpu_register_io_memory(intel_hda_mmio_read, intel_hda_mmio_write, d, DEVICE_NATIVE_ENDIAN); - pci_register_bar(&d->pci, 0, 0x4000, PCI_BASE_ADDRESS_SPACE_MEMORY, - intel_hda_map); + pci_register_bar_simple(&d->pci, 0, 0x4000, 0, d->mmio_addr); if (d->msi) { msi_init(&d->pci, 0x50, 1, true, false); } From f32dd06ba6299e6c9174e94bd0ac9e4f7828bd78 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:28:04 +0300 Subject: [PATCH 16/34] hda-intel: convert to pci_register_bar_simple() (partial) Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/lsi53c895a.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c index 84a49928dc..6c811cf590 100644 --- a/hw/lsi53c895a.c +++ b/hw/lsi53c895a.c @@ -2037,15 +2037,6 @@ static void lsi_ram_mapfunc(PCIDevice *pci_dev, int region_num, cpu_register_physical_memory(addr + 0, 0x2000, s->ram_io_addr); } -static void lsi_mmio_mapfunc(PCIDevice *pci_dev, int region_num, - pcibus_t addr, pcibus_t size, int type) -{ - LSIState *s = DO_UPCAST(LSIState, dev, pci_dev); - - DPRINTF("Mapping registers at %08"FMT_PCIBUS"\n", addr); - cpu_register_physical_memory(addr + 0, 0x400, s->mmio_io_addr); -} - static void lsi_scsi_reset(DeviceState *dev) { LSIState *s = DO_UPCAST(LSIState, dev.qdev, dev); @@ -2188,8 +2179,7 @@ static int lsi_scsi_init(PCIDevice *dev) pci_register_bar(&s->dev, 0, 256, PCI_BASE_ADDRESS_SPACE_IO, lsi_io_mapfunc); - pci_register_bar(&s->dev, 1, 0x400, - PCI_BASE_ADDRESS_SPACE_MEMORY, lsi_mmio_mapfunc); + pci_register_bar_simple(&s->dev, 1, 0x400, 0, s->mmio_io_addr); pci_register_bar(&s->dev, 2, 0x2000, PCI_BASE_ADDRESS_SPACE_MEMORY, lsi_ram_mapfunc); QTAILQ_INIT(&s->queue); From 667bb59d2358daeef179583c944becba3f1f9680 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:28:02 +0300 Subject: [PATCH 17/34] ich/ahci: convert to pci_register_bar_simple() Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/ide/ahci.c | 9 --------- hw/ide/ahci.h | 3 --- hw/ide/ich.c | 3 +-- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 98bdf7059a..c6e0c7767e 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1129,15 +1129,6 @@ void ahci_uninit(AHCIState *s) qemu_free(s->dev); } -void ahci_pci_map(PCIDevice *pci_dev, int region_num, - pcibus_t addr, pcibus_t size, int type) -{ - struct AHCIPCIState *d = (struct AHCIPCIState *)pci_dev; - AHCIState *s = &d->ahci; - - cpu_register_physical_memory(addr, size, s->mem); -} - void ahci_reset(void *opaque) { struct AHCIPCIState *d = opaque; diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h index a4560c41b6..dc86951ebf 100644 --- a/hw/ide/ahci.h +++ b/hw/ide/ahci.h @@ -325,9 +325,6 @@ typedef struct NCQFrame { void ahci_init(AHCIState *s, DeviceState *qdev, int ports); void ahci_uninit(AHCIState *s); -void ahci_pci_map(PCIDevice *pci_dev, int region_num, - pcibus_t addr, pcibus_t size, int type); - void ahci_reset(void *opaque); #endif /* HW_IDE_AHCI_H */ diff --git a/hw/ide/ich.c b/hw/ide/ich.c index f242d7a81f..eb00f03b33 100644 --- a/hw/ide/ich.c +++ b/hw/ide/ich.c @@ -95,8 +95,7 @@ static int pci_ich9_ahci_init(PCIDevice *dev) qemu_register_reset(ahci_reset, d); /* XXX BAR size should be 1k, but that breaks, so bump it to 4k for now */ - pci_register_bar(&d->card, 5, 0x1000, PCI_BASE_ADDRESS_SPACE_MEMORY, - ahci_pci_map); + pci_register_bar_simple(&d->card, 5, 0x1000, 0, d->ahci.mem); msi_init(dev, 0x50, 1, true, false); From 27a4154324b9de74b8621c83980fd82ac80f3b8f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:28:05 +0300 Subject: [PATCH 18/34] pcnet-pci: convert to pci_register_bar_simple() Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/pcnet-pci.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/hw/pcnet-pci.c b/hw/pcnet-pci.c index 339a401967..4ac3e3297c 100644 --- a/hw/pcnet-pci.c +++ b/hw/pcnet-pci.c @@ -214,19 +214,6 @@ static CPUReadMemoryFunc * const pcnet_mmio_read[] = { &pcnet_mmio_readl }; -static void pcnet_mmio_map(PCIDevice *pci_dev, int region_num, - pcibus_t addr, pcibus_t size, int type) -{ - PCIPCNetState *d = DO_UPCAST(PCIPCNetState, pci_dev, pci_dev); - -#ifdef PCNET_DEBUG_IO - printf("pcnet_mmio_map addr=0x%08"FMT_PCIBUS" 0x%08"FMT_PCIBUS"\n", - addr, size); -#endif - - cpu_register_physical_memory(addr, PCNET_PNPMMIO_SIZE, d->state.mmio_index); -} - static void pci_physical_memory_write(void *dma_opaque, target_phys_addr_t addr, uint8_t *buf, int len, int do_bswap) { @@ -300,8 +287,7 @@ static int pci_pcnet_init(PCIDevice *pci_dev) pci_register_bar(pci_dev, 0, PCNET_IOPORT_SIZE, PCI_BASE_ADDRESS_SPACE_IO, pcnet_ioport_map); - pci_register_bar(pci_dev, 1, PCNET_PNPMMIO_SIZE, - PCI_BASE_ADDRESS_SPACE_MEMORY, pcnet_mmio_map); + pci_register_bar_simple(pci_dev, 1, PCNET_PNPMMIO_SIZE, 0, s->mmio_index); s->irq = pci_dev->irq[0]; s->phys_mem_read = pci_physical_memory_read; From 6e964ded1e791aaaab270b5f04af5cd23c60c514 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:28:06 +0300 Subject: [PATCH 19/34] usb-ohci: convert to pci_register_bar_simple() Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/usb-ohci.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/hw/usb-ohci.c b/hw/usb-ohci.c index d2b14f7b4f..73d47b845b 100644 --- a/hw/usb-ohci.c +++ b/hw/usb-ohci.c @@ -1713,13 +1713,6 @@ typedef struct { OHCIState state; } OHCIPCIState; -static void ohci_mapfunc(PCIDevice *pci_dev, int i, - pcibus_t addr, pcibus_t size, int type) -{ - OHCIPCIState *ohci = DO_UPCAST(OHCIPCIState, pci_dev, pci_dev); - cpu_register_physical_memory(addr, size, ohci->state.mem); -} - static int usb_ohci_initfn_pci(struct PCIDevice *dev) { OHCIPCIState *ohci = DO_UPCAST(OHCIPCIState, pci_dev, dev); @@ -1737,8 +1730,7 @@ static int usb_ohci_initfn_pci(struct PCIDevice *dev) ohci->state.irq = ohci->pci_dev.irq[0]; /* TODO: avoid cast below by using dev */ - pci_register_bar(&ohci->pci_dev, 0, 256, - PCI_BASE_ADDRESS_SPACE_MEMORY, ohci_mapfunc); + pci_register_bar_simple(&ohci->pci_dev, 0, 256, 0, ohci->state.mem); return 0; } From 22f3647b781ab36f654de4ec29997f549f979c97 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Apr 2011 18:28:07 +0300 Subject: [PATCH 20/34] wdt_i6300esb: convert to pci_register_bar_simple() Signed-off-by: Avi Kivity Signed-off-by: Michael S. Tsirkin --- hw/wdt_i6300esb.c | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/hw/wdt_i6300esb.c b/hw/wdt_i6300esb.c index 4a7fba7f2a..07917212c3 100644 --- a/hw/wdt_i6300esb.c +++ b/hw/wdt_i6300esb.c @@ -355,31 +355,6 @@ static void i6300esb_mem_writel(void *vp, target_phys_addr_t addr, uint32_t val) } } -static void i6300esb_map(PCIDevice *dev, int region_num, - pcibus_t addr, pcibus_t size, int type) -{ - static CPUReadMemoryFunc * const mem_read[3] = { - i6300esb_mem_readb, - i6300esb_mem_readw, - i6300esb_mem_readl, - }; - static CPUWriteMemoryFunc * const mem_write[3] = { - i6300esb_mem_writeb, - i6300esb_mem_writew, - i6300esb_mem_writel, - }; - I6300State *d = DO_UPCAST(I6300State, dev, dev); - int io_mem; - - i6300esb_debug("addr = %"FMT_PCIBUS", size = %"FMT_PCIBUS", type = %d\n", - addr, size, type); - - io_mem = cpu_register_io_memory(mem_read, mem_write, d, - DEVICE_NATIVE_ENDIAN); - cpu_register_physical_memory (addr, 0x10, io_mem); - /* qemu_register_coalesced_mmio (addr, 0x10); ? */ -} - static const VMStateDescription vmstate_i6300esb = { .name = "i6300esb_wdt", .version_id = sizeof(I6300State), @@ -407,6 +382,17 @@ static int i6300esb_init(PCIDevice *dev) { I6300State *d = DO_UPCAST(I6300State, dev, dev); uint8_t *pci_conf; + int io_mem; + static CPUReadMemoryFunc * const mem_read[3] = { + i6300esb_mem_readb, + i6300esb_mem_readw, + i6300esb_mem_readl, + }; + static CPUWriteMemoryFunc * const mem_write[3] = { + i6300esb_mem_writeb, + i6300esb_mem_writew, + i6300esb_mem_writel, + }; i6300esb_debug("I6300State = %p\n", d); @@ -418,8 +404,10 @@ static int i6300esb_init(PCIDevice *dev) pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_ESB_9); pci_config_set_class(pci_conf, PCI_CLASS_SYSTEM_OTHER); - pci_register_bar(&d->dev, 0, 0x10, - PCI_BASE_ADDRESS_SPACE_MEMORY, i6300esb_map); + io_mem = cpu_register_io_memory(mem_read, mem_write, d, + DEVICE_NATIVE_ENDIAN); + pci_register_bar_simple(&d->dev, 0, 0x10, 0, io_mem); + /* qemu_register_coalesced_mmio (addr, 0x10); ? */ return 0; } From 1b4f97d62e3b3d220130f1b0f59d43c042fddb89 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:04 +0200 Subject: [PATCH 21/34] eepro100: Avoid duplicate debug messages When DEBUG_EEPRO100 was enabled, unsupported writes were logged twice. Now logging in eepro100_write1 and eepro100_write2 is similar to the logging in eepro100_write4 (which already was correct). Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/hw/eepro100.c b/hw/eepro100.c index f2505e4e9b..7c24e1a8ab 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -1,7 +1,7 @@ /* * QEMU i8255x (PRO100) emulation * - * Copyright (C) 2006-2010 Stefan Weil + * Copyright (C) 2006-2011 Stefan Weil * * Portions of the code are copies from grub / etherboot eepro100.c * and linux e100.c. @@ -1393,18 +1393,20 @@ static void eepro100_write1(EEPRO100State * s, uint32_t addr, uint8_t val) memcpy(&s->mem[addr], &val, sizeof(val)); } - TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); - switch (addr) { case SCBStatus: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); break; case SCBAck: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); eepro100_acknowledge(s); break; case SCBCmd: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); eepro100_write_command(s, val); break; case SCBIntmask: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); if (val & BIT(1)) { eepro100_swi_interrupt(s); } @@ -1418,6 +1420,7 @@ static void eepro100_write1(EEPRO100State * s, uint32_t addr, uint8_t val) TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); break; case SCBeeprom: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); eepro100_write_eeprom(s->eeprom, val); break; default: @@ -1433,18 +1436,19 @@ static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val) memcpy(&s->mem[addr], &val, sizeof(val)); } - TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); - switch (addr) { case SCBStatus: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); s->mem[SCBAck] = (val >> 8); eepro100_acknowledge(s); break; case SCBCmd: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); eepro100_write_command(s, val); eepro100_write1(s, SCBIntmask, val >> 8); break; case SCBeeprom: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); eepro100_write_eeprom(s->eeprom, val); break; default: From 77bee84e6a05f086ce40088e4dbadf28e14e4eed Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:05 +0200 Subject: [PATCH 22/34] eepro100: Remove type casts which are no longer needed Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/hw/eepro100.c b/hw/eepro100.c index 7c24e1a8ab..9e1883e12f 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -311,7 +311,7 @@ static const uint16_t eepro100_mdi_mask[] = { static void stl_le_phys(target_phys_addr_t addr, uint32_t val) { val = cpu_to_le32(val); - cpu_physical_memory_write(addr, (const uint8_t *)&val, sizeof(val)); + cpu_physical_memory_write(addr, &val, sizeof(val)); } #define POLYNOMIAL 0x04c11db6 @@ -694,8 +694,7 @@ static void dump_statistics(EEPRO100State * s) * values which really matter. * Number of data should check configuration!!! */ - cpu_physical_memory_write(s->statsaddr, - (uint8_t *) & s->statistics, s->stats_size); + cpu_physical_memory_write(s->statsaddr, &s->statistics, s->stats_size); stl_le_phys(s->statsaddr + 0, s->statistics.tx_good_frames); stl_le_phys(s->statsaddr + 36, s->statistics.rx_good_frames); stl_le_phys(s->statsaddr + 48, s->statistics.rx_resource_errors); @@ -709,7 +708,7 @@ static void dump_statistics(EEPRO100State * s) static void read_cb(EEPRO100State *s) { - cpu_physical_memory_read(s->cb_address, (uint8_t *) &s->tx, sizeof(s->tx)); + cpu_physical_memory_read(s->cb_address, &s->tx, sizeof(s->tx)); s->tx.status = le16_to_cpu(s->tx.status); s->tx.command = le16_to_cpu(s->tx.command); s->tx.link = le32_to_cpu(s->tx.link); @@ -1268,10 +1267,10 @@ static void eepro100_write_port(EEPRO100State * s, uint32_t val) case PORT_SELFTEST: TRACE(OTHER, logout("selftest address=0x%08x\n", address)); eepro100_selftest_t data; - cpu_physical_memory_read(address, (uint8_t *) & data, sizeof(data)); + cpu_physical_memory_read(address, &data, sizeof(data)); data.st_sign = 0xffffffff; data.st_result = 0; - cpu_physical_memory_write(address, (uint8_t *) & data, sizeof(data)); + cpu_physical_memory_write(address, &data, sizeof(data)); break; case PORT_SELECTIVE_RESET: TRACE(OTHER, logout("selective reset, selftest address=0x%08x\n", address)); @@ -1722,7 +1721,7 @@ static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size } /* !!! */ eepro100_rx_t rx; - cpu_physical_memory_read(s->ru_base + s->ru_offset, (uint8_t *) & rx, + cpu_physical_memory_read(s->ru_base + s->ru_offset, &rx, offsetof(eepro100_rx_t, packet)); uint16_t rfd_command = le16_to_cpu(rx.command); uint16_t rfd_size = le16_to_cpu(rx.size); From 27112f18f9025d537f3e6f6df3e574e7f0902cda Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:06 +0200 Subject: [PATCH 23/34] eepro100: Remove unused structure element cppcheck reports that 'packet' is unused. It was only used to calculate the size of the preceding data. Removing it saves a lot of stack space (local variable rx). Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/eepro100.c b/hw/eepro100.c index 9e1883e12f..848bf79461 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -173,7 +173,7 @@ typedef struct { uint32_t rx_buf_addr; /* void * */ uint16_t count; uint16_t size; - char packet[MAX_ETH_FRAME_SIZE + 4]; + /* Ethernet frame data follows. */ } eepro100_rx_t; typedef enum { @@ -1722,7 +1722,7 @@ static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size /* !!! */ eepro100_rx_t rx; cpu_physical_memory_read(s->ru_base + s->ru_offset, &rx, - offsetof(eepro100_rx_t, packet)); + sizeof(eepro100_rx_t)); uint16_t rfd_command = le16_to_cpu(rx.command); uint16_t rfd_size = le16_to_cpu(rx.size); @@ -1753,7 +1753,7 @@ static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size assert(!(s->configuration[17] & BIT(0))); #endif cpu_physical_memory_write(s->ru_base + s->ru_offset + - offsetof(eepro100_rx_t, packet), buf, size); + sizeof(eepro100_rx_t), buf, size); s->statistics.rx_good_frames++; eepro100_fr_interrupt(s); s->ru_offset = le32_to_cpu(rx.link); From 792f1d639443c3895df82306e13bb144627ad6bc Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:07 +0200 Subject: [PATCH 24/34] eepro100: Pad received short frames QEMU sends frames smaller than 60 bytes to ethernet nics. Such frames are rejected by real NICs and their emulations. To avoid this behaviour, other NIC emulations pad received frames. This patch enables this workaround for eepro100, too. All related code is marked with CONFIG_PAD_RECEIVED_FRAMES, so we can drop this in case QEMU's networking code is ever changed. Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/hw/eepro100.c b/hw/eepro100.c index 848bf79461..ab5c699c9d 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -48,6 +48,15 @@ #include "eeprom93xx.h" #include "sysemu.h" +/* QEMU sends frames smaller than 60 bytes to ethernet nics. + * Such frames are rejected by real nics and their emulations. + * To avoid this behaviour, other nic emulations pad received + * frames. The following definition enables this padding for + * eepro100, too. We keep the define around in case it might + * become useful the future if the core networking is ever + * changed to pad short packets itself. */ +#define CONFIG_PAD_RECEIVED_FRAMES + #define KiB 1024 /* Debug EEPRO100 card. */ @@ -1640,19 +1649,32 @@ static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size */ EEPRO100State *s = DO_UPCAST(NICState, nc, nc)->opaque; uint16_t rfd_status = 0xa000; +#if defined(CONFIG_PAD_RECEIVED_FRAMES) + uint8_t min_buf[60]; +#endif static const uint8_t broadcast_macaddr[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +#if defined(CONFIG_PAD_RECEIVED_FRAMES) + /* Pad to minimum Ethernet frame length */ + if (size < sizeof(min_buf)) { + memcpy(min_buf, buf, size); + memset(&min_buf[size], 0, sizeof(min_buf) - size); + buf = min_buf; + size = sizeof(min_buf); + } +#endif + if (s->configuration[8] & 0x80) { /* CSMA is disabled. */ logout("%p received while CSMA is disabled\n", s); return -1; +#if !defined(CONFIG_PAD_RECEIVED_FRAMES) } else if (size < 64 && (s->configuration[7] & BIT(0))) { /* Short frame and configuration byte 7/0 (discard short receive) set: * Short frame is discarded */ logout("%p received short frame (%zu byte)\n", s, size); s->statistics.rx_short_frame_errors++; -#if 0 return -1; #endif } else if ((size > MAX_ETH_FRAME_SIZE + 4) && !(s->configuration[18] & BIT(3))) { @@ -1731,9 +1753,11 @@ static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size "(%zu bytes); data truncated\n", rfd_size, size); size = rfd_size; } +#if !defined(CONFIG_PAD_RECEIVED_FRAMES) if (size < 64) { rfd_status |= 0x0080; } +#endif TRACE(OTHER, logout("command 0x%04x, link 0x%08x, addr 0x%08x, size %u\n", rfd_command, rx.link, rx.rx_buf_addr, rfd_size)); stw_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, status), From e5e23ab83bfaf07c1a5bf685b1adf311e2326b74 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:08 +0200 Subject: [PATCH 25/34] eepro100: Fix endianness issues Like other Intel devices, e100 (eepro100) uses little endian byte order. This patch was tested with these combinations: i386 host, i386 + mipsel guests (le-le) mipsel host, i386 guest (le-le) i386 host, mips + ppc guests (le-be) mips host, i386 guest (be-le) mips and mipsel hosts were emulated machines. v2: Use prefix for new functions. Add the same prefix to stl_le_phys. Fix alignment of mem (needed for word/dword reads/writes). Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 141 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 97 insertions(+), 44 deletions(-) diff --git a/hw/eepro100.c b/hw/eepro100.c index ab5c699c9d..6315fe8e7d 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -20,11 +20,10 @@ * along with this program. If not, see . * * Tested features (i82559): - * PXE boot (i386) ok + * PXE boot (i386 guest, i386 / mips / mipsel / ppc host) ok * Linux networking (i386) ok * * Untested: - * non-i386 platforms * Windows networking * * References: @@ -139,7 +138,7 @@ typedef struct { /* Offsets to the various registers. All accesses need not be longword aligned. */ -enum speedo_offsets { +typedef enum { SCBStatus = 0, /* Status Word. */ SCBAck = 1, SCBCmd = 2, /* Rx/Command Unit command and status. */ @@ -154,7 +153,7 @@ enum speedo_offsets { SCBpmdr = 27, /* Power Management Driver. */ SCBgctrl = 28, /* General Control. */ SCBgstat = 29, /* General Status. */ -}; +} E100RegisterOffset; /* A speedo3 transmit buffer descriptor with two buffers... */ typedef struct { @@ -258,11 +257,13 @@ typedef struct { /* Statistical counters. Also used for wake-up packet (i82559). */ eepro100_stats_t statistics; + /* Data in mem is always in the byte order of the controller (le). + * It must be dword aligned to allow direct access to 32 bit values. */ + uint8_t mem[PCI_MEM_SIZE] __attribute__((aligned(8)));; + /* Configuration bytes. */ uint8_t configuration[22]; - /* Data in mem is always in the byte order of the controller (le). */ - uint8_t mem[PCI_MEM_SIZE]; /* vmstate for each particular nic */ VMStateDescription *vmstate; @@ -316,8 +317,33 @@ static const uint16_t eepro100_mdi_mask[] = { 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }; -/* XXX: optimize */ -static void stl_le_phys(target_phys_addr_t addr, uint32_t val) +/* Read a 16 bit little endian value from physical memory. */ +static uint16_t e100_ldw_le_phys(target_phys_addr_t addr) +{ + /* Load 16 bit (little endian) word from emulated hardware. */ + uint16_t val; + cpu_physical_memory_read(addr, &val, sizeof(val)); + return le16_to_cpu(val); +} + +/* Read a 32 bit little endian value from physical memory. */ +static uint32_t e100_ldl_le_phys(target_phys_addr_t addr) +{ + /* Load 32 bit (little endian) word from emulated hardware. */ + uint32_t val; + cpu_physical_memory_read(addr, &val, sizeof(val)); + return le32_to_cpu(val); +} + +/* Write a 16 bit little endian value to physical memory. */ +static void e100_stw_le_phys(target_phys_addr_t addr, uint16_t val) +{ + val = cpu_to_le16(val); + cpu_physical_memory_write(addr, &val, sizeof(val)); +} + +/* Write a 32 bit little endian value to physical memory. */ +static void e100_stl_le_phys(target_phys_addr_t addr, uint32_t val) { val = cpu_to_le32(val); cpu_physical_memory_write(addr, &val, sizeof(val)); @@ -348,6 +374,36 @@ static unsigned compute_mcast_idx(const uint8_t * ep) return (crc & BITS(7, 2)) >> 2; } +/* Read a 16 bit control/status (CSR) register. */ +static uint16_t e100_read_reg2(EEPRO100State *s, E100RegisterOffset addr) +{ + assert(!((uintptr_t)&s->mem[addr] & 1)); + return le16_to_cpup((uint16_t *)&s->mem[addr]); +} + +/* Read a 32 bit control/status (CSR) register. */ +static uint32_t e100_read_reg4(EEPRO100State *s, E100RegisterOffset addr) +{ + assert(!((uintptr_t)&s->mem[addr] & 3)); + return le32_to_cpup((uint32_t *)&s->mem[addr]); +} + +/* Write a 16 bit control/status (CSR) register. */ +static void e100_write_reg2(EEPRO100State *s, E100RegisterOffset addr, + uint16_t val) +{ + assert(!((uintptr_t)&s->mem[addr] & 1)); + cpu_to_le16w((uint16_t *)&s->mem[addr], val); +} + +/* Read a 32 bit control/status (CSR) register. */ +static void e100_write_reg4(EEPRO100State *s, E100RegisterOffset addr, + uint32_t val) +{ + assert(!((uintptr_t)&s->mem[addr] & 3)); + cpu_to_le32w((uint32_t *)&s->mem[addr], val); +} + #if defined(DEBUG_EEPRO100) static const char *nic_dump(const uint8_t * buf, unsigned size) { @@ -599,8 +655,7 @@ static void nic_selective_reset(EEPRO100State * s) TRACE(EEPROM, logout("checksum=0x%04x\n", eeprom_contents[EEPROM_SIZE - 1])); memset(s->mem, 0, sizeof(s->mem)); - uint32_t val = BIT(21); - memcpy(&s->mem[SCBCtrlMDI], &val, sizeof(val)); + e100_write_reg4(s, SCBCtrlMDI, BIT(21)); assert(sizeof(s->mdimem) == sizeof(eepro100_mdi_default)); memcpy(&s->mdimem[0], &eepro100_mdi_default[0], sizeof(s->mdimem)); @@ -704,13 +759,13 @@ static void dump_statistics(EEPRO100State * s) * Number of data should check configuration!!! */ cpu_physical_memory_write(s->statsaddr, &s->statistics, s->stats_size); - stl_le_phys(s->statsaddr + 0, s->statistics.tx_good_frames); - stl_le_phys(s->statsaddr + 36, s->statistics.rx_good_frames); - stl_le_phys(s->statsaddr + 48, s->statistics.rx_resource_errors); - stl_le_phys(s->statsaddr + 60, s->statistics.rx_short_frame_errors); + e100_stl_le_phys(s->statsaddr + 0, s->statistics.tx_good_frames); + e100_stl_le_phys(s->statsaddr + 36, s->statistics.rx_good_frames); + e100_stl_le_phys(s->statsaddr + 48, s->statistics.rx_resource_errors); + e100_stl_le_phys(s->statsaddr + 60, s->statistics.rx_short_frame_errors); #if 0 - stw_le_phys(s->statsaddr + 76, s->statistics.xmt_tco_frames); - stw_le_phys(s->statsaddr + 78, s->statistics.rcv_tco_frames); + e100_stw_le_phys(s->statsaddr + 76, s->statistics.xmt_tco_frames); + e100_stw_le_phys(s->statsaddr + 78, s->statistics.rcv_tco_frames); missing("CU dump statistical counters"); #endif } @@ -747,10 +802,10 @@ static void tx_command(EEPRO100State *s) } assert(tcb_bytes <= sizeof(buf)); while (size < tcb_bytes) { - uint32_t tx_buffer_address = ldl_phys(tbd_address); - uint16_t tx_buffer_size = lduw_phys(tbd_address + 4); + uint32_t tx_buffer_address = e100_ldl_le_phys(tbd_address); + uint16_t tx_buffer_size = e100_ldw_le_phys(tbd_address + 4); #if 0 - uint16_t tx_buffer_el = lduw_phys(tbd_address + 6); + uint16_t tx_buffer_el = e100_ldw_le_phys(tbd_address + 6); #endif tbd_address += 8; TRACE(RXTX, logout @@ -769,9 +824,9 @@ static void tx_command(EEPRO100State *s) if (s->has_extended_tcb_support && !(s->configuration[6] & BIT(4))) { /* Extended Flexible TCB. */ for (; tbd_count < 2; tbd_count++) { - uint32_t tx_buffer_address = ldl_phys(tbd_address); - uint16_t tx_buffer_size = lduw_phys(tbd_address + 4); - uint16_t tx_buffer_el = lduw_phys(tbd_address + 6); + uint32_t tx_buffer_address = e100_ldl_le_phys(tbd_address); + uint16_t tx_buffer_size = e100_ldw_le_phys(tbd_address + 4); + uint16_t tx_buffer_el = e100_ldw_le_phys(tbd_address + 6); tbd_address += 8; TRACE(RXTX, logout ("TBD (extended flexible mode): buffer address 0x%08x, size 0x%04x\n", @@ -787,9 +842,9 @@ static void tx_command(EEPRO100State *s) } tbd_address = tbd_array; for (; tbd_count < s->tx.tbd_count; tbd_count++) { - uint32_t tx_buffer_address = ldl_phys(tbd_address); - uint16_t tx_buffer_size = lduw_phys(tbd_address + 4); - uint16_t tx_buffer_el = lduw_phys(tbd_address + 6); + uint32_t tx_buffer_address = e100_ldl_le_phys(tbd_address); + uint16_t tx_buffer_size = e100_ldw_le_phys(tbd_address + 4); + uint16_t tx_buffer_el = e100_ldw_le_phys(tbd_address + 6); tbd_address += 8; TRACE(RXTX, logout ("TBD (flexible mode): buffer address 0x%08x, size 0x%04x\n", @@ -897,7 +952,7 @@ static void action_command(EEPRO100State *s) break; } /* Write new status. */ - stw_phys(s->cb_address, s->tx.status | ok_status | STATUS_C); + e100_stw_le_phys(s->cb_address, s->tx.status | ok_status | STATUS_C); if (bit_i) { /* CU completed action. */ eepro100_cx_interrupt(s); @@ -964,7 +1019,7 @@ static void eepro100_cu_command(EEPRO100State * s, uint8_t val) /* Dump statistical counters. */ TRACE(OTHER, logout("val=0x%02x (dump stats)\n", val)); dump_statistics(s); - stl_le_phys(s->statsaddr + s->stats_size, 0xa005); + e100_stl_le_phys(s->statsaddr + s->stats_size, 0xa005); break; case CU_CMD_BASE: /* Load CU base. */ @@ -975,7 +1030,7 @@ static void eepro100_cu_command(EEPRO100State * s, uint8_t val) /* Dump and reset statistical counters. */ TRACE(OTHER, logout("val=0x%02x (dump stats and reset)\n", val)); dump_statistics(s); - stl_le_phys(s->statsaddr + s->stats_size, 0xa007); + e100_stl_le_phys(s->statsaddr + s->stats_size, 0xa007); memset(&s->statistics, 0, sizeof(s->statistics)); break; case CU_SRESUME: @@ -1058,8 +1113,7 @@ static void eepro100_write_command(EEPRO100State * s, uint8_t val) static uint16_t eepro100_read_eeprom(EEPRO100State * s) { - uint16_t val; - memcpy(&val, &s->mem[SCBeeprom], sizeof(val)); + uint16_t val = e100_read_reg2(s, SCBeeprom); if (eeprom93xx_read(s->eeprom)) { val |= EEPROM_DO; } else { @@ -1129,8 +1183,7 @@ static const char *reg2name(uint8_t reg) static uint32_t eepro100_read_mdi(EEPRO100State * s) { - uint32_t val; - memcpy(&val, &s->mem[0x10], sizeof(val)); + uint32_t val = e100_read_reg4(s, SCBCtrlMDI); #ifdef DEBUG_EEPRO100 uint8_t raiseint = (val & BIT(29)) >> 29; @@ -1239,7 +1292,7 @@ static void eepro100_write_mdi(EEPRO100State * s, uint32_t val) } } val = (val & 0xffff0000) + data; - memcpy(&s->mem[0x10], &val, sizeof(val)); + e100_write_reg4(s, SCBCtrlMDI, val); } /***************************************************************************** @@ -1266,7 +1319,6 @@ static uint32_t eepro100_read_port(EEPRO100State * s) static void eepro100_write_port(EEPRO100State * s, uint32_t val) { - val = le32_to_cpu(val); uint32_t address = (val & ~PORT_SELECTION_MASK); uint8_t selection = (val & PORT_SELECTION_MASK); switch (selection) { @@ -1301,7 +1353,7 @@ static uint8_t eepro100_read1(EEPRO100State * s, uint32_t addr) { uint8_t val = 0; if (addr <= sizeof(s->mem) - sizeof(val)) { - memcpy(&val, &s->mem[addr], sizeof(val)); + val = s->mem[addr]; } switch (addr) { @@ -1344,7 +1396,7 @@ static uint16_t eepro100_read2(EEPRO100State * s, uint32_t addr) { uint16_t val = 0; if (addr <= sizeof(s->mem) - sizeof(val)) { - memcpy(&val, &s->mem[addr], sizeof(val)); + val = e100_read_reg2(s, addr); } switch (addr) { @@ -1367,7 +1419,7 @@ static uint32_t eepro100_read4(EEPRO100State * s, uint32_t addr) { uint32_t val = 0; if (addr <= sizeof(s->mem) - sizeof(val)) { - memcpy(&val, &s->mem[addr], sizeof(val)); + val = e100_read_reg4(s, addr); } switch (addr) { @@ -1398,7 +1450,7 @@ static void eepro100_write1(EEPRO100State * s, uint32_t addr, uint8_t val) { /* SCBStatus is readonly. */ if (addr > SCBStatus && addr <= sizeof(s->mem) - sizeof(val)) { - memcpy(&s->mem[addr], &val, sizeof(val)); + s->mem[addr] = val; } switch (addr) { @@ -1441,7 +1493,7 @@ static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val) { /* SCBStatus is readonly. */ if (addr > SCBStatus && addr <= sizeof(s->mem) - sizeof(val)) { - memcpy(&s->mem[addr], &val, sizeof(val)); + e100_write_reg2(s, addr, val); } switch (addr) { @@ -1468,7 +1520,7 @@ static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val) static void eepro100_write4(EEPRO100State * s, uint32_t addr, uint32_t val) { if (addr <= sizeof(s->mem) - sizeof(val)) { - memcpy(&s->mem[addr], &val, sizeof(val)); + e100_write_reg4(s, addr, val); } switch (addr) { @@ -1760,9 +1812,10 @@ static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size #endif TRACE(OTHER, logout("command 0x%04x, link 0x%08x, addr 0x%08x, size %u\n", rfd_command, rx.link, rx.rx_buf_addr, rfd_size)); - stw_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, status), - rfd_status); - stw_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, count), size); + e100_stw_le_phys(s->ru_base + s->ru_offset + + offsetof(eepro100_rx_t, status), rfd_status); + e100_stw_le_phys(s->ru_base + s->ru_offset + + offsetof(eepro100_rx_t, count), size); /* Early receive interrupt not supported. */ #if 0 eepro100_er_interrupt(s); @@ -1891,7 +1944,7 @@ static int e100_nic_init(PCIDevice *pci_dev) /* Handler for memory-mapped I/O */ s->mmio_index = cpu_register_io_memory(pci_mmio_read, pci_mmio_write, s, - DEVICE_NATIVE_ENDIAN); + DEVICE_LITTLE_ENDIAN); pci_register_bar_simple(&s->dev, 0, PCI_MEM_SIZE, PCI_BASE_ADDRESS_MEM_PREFETCH, s->mmio_index); From 3fd3d0b463d5c959c3a08a665eed1a2cd4e1d3da Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:09 +0200 Subject: [PATCH 26/34] eepro100: Support byte/word writes to port address port is a 32 bit register, but may be written using 8 or 16 bit writes. Add support for byte/word writes. Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/hw/eepro100.c b/hw/eepro100.c index 6315fe8e7d..37d6fb72db 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -1317,8 +1317,9 @@ static uint32_t eepro100_read_port(EEPRO100State * s) return 0; } -static void eepro100_write_port(EEPRO100State * s, uint32_t val) +static void eepro100_write_port(EEPRO100State *s) { + uint32_t val = e100_read_reg4(s, SCBPort); uint32_t address = (val & ~PORT_SELECTION_MASK); uint8_t selection = (val & PORT_SELECTION_MASK); switch (selection) { @@ -1472,7 +1473,15 @@ static void eepro100_write1(EEPRO100State * s, uint32_t addr, uint8_t val) } eepro100_interrupt(s, 0); break; + case SCBPort: + case SCBPort + 1: + case SCBPort + 2: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; case SCBPort + 3: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + eepro100_write_port(s); + break; case SCBFlow: /* does not exist on 82557 */ case SCBFlow + 1: case SCBFlow + 2: @@ -1507,6 +1516,13 @@ static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val) eepro100_write_command(s, val); eepro100_write1(s, SCBIntmask, val >> 8); break; + case SCBPort: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; + case SCBPort + 2: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + eepro100_write_port(s); + break; case SCBeeprom: TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); eepro100_write_eeprom(s->eeprom, val); @@ -1529,7 +1545,7 @@ static void eepro100_write4(EEPRO100State * s, uint32_t addr, uint32_t val) break; case SCBPort: TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); - eepro100_write_port(s, val); + eepro100_write_port(s); break; case SCBCtrlMDI: eepro100_write_mdi(s, val); From 27a05006e03eed00d72d943c06224fd8bd349e54 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:10 +0200 Subject: [PATCH 27/34] eepro100: Support byte/word writes to pointer register pointer is a 32 bit register, but may be written using 8 or 16 bit writes. Add support for byte/word writes. Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/hw/eepro100.c b/hw/eepro100.c index 37d6fb72db..634be594b8 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -240,7 +240,6 @@ typedef struct { uint16_t mdimem[32]; eeprom_t *eeprom; uint32_t device; /* device variant */ - uint32_t pointer; /* (cu_base + cu_offset) address the next command block in the command block list. */ uint32_t cu_base; /* CU base address */ uint32_t cu_offset; /* CU address offset */ @@ -991,7 +990,7 @@ static void eepro100_cu_command(EEPRO100State * s, uint8_t val) logout("unexpected CU state is %u\n", cu_state); } set_cu_state(s, cu_active); - s->cu_offset = s->pointer; + s->cu_offset = e100_read_reg4(s, SCBPointer); action_command(s); break; case CU_RESUME: @@ -1012,7 +1011,7 @@ static void eepro100_cu_command(EEPRO100State * s, uint8_t val) break; case CU_STATSADDR: /* Load dump counters address. */ - s->statsaddr = s->pointer; + s->statsaddr = e100_read_reg4(s, SCBPointer); TRACE(OTHER, logout("val=0x%02x (status address)\n", val)); break; case CU_SHOWSTATS: @@ -1024,7 +1023,7 @@ static void eepro100_cu_command(EEPRO100State * s, uint8_t val) case CU_CMD_BASE: /* Load CU base. */ TRACE(OTHER, logout("val=0x%02x (CU base address)\n", val)); - s->cu_base = s->pointer; + s->cu_base = e100_read_reg4(s, SCBPointer); break; case CU_DUMPSTATS: /* Dump and reset statistical counters. */ @@ -1057,7 +1056,7 @@ static void eepro100_ru_command(EEPRO100State * s, uint8_t val) #endif } set_ru_state(s, ru_ready); - s->ru_offset = s->pointer; + s->ru_offset = e100_read_reg4(s, SCBPointer); TRACE(OTHER, logout("val=0x%02x (rx start)\n", val)); break; case RX_RESUME: @@ -1081,7 +1080,7 @@ static void eepro100_ru_command(EEPRO100State * s, uint8_t val) case RX_ADDR_LOAD: /* Load RU base. */ TRACE(OTHER, logout("val=0x%02x (RU base address)\n", val)); - s->ru_base = s->pointer; + s->ru_base = e100_read_reg4(s, SCBPointer); break; default: logout("val=0x%02x (undefined RU command)\n", val); @@ -1138,12 +1137,6 @@ static void eepro100_write_eeprom(eeprom_t * eeprom, uint8_t val) eeprom93xx_write(eeprom, eecs, eesk, eedi); } -static void eepro100_write_pointer(EEPRO100State * s, uint32_t val) -{ - s->pointer = le32_to_cpu(val); - TRACE(OTHER, logout("val=0x%08x\n", val)); -} - /***************************************************************************** * * MDI emulation. @@ -1428,9 +1421,6 @@ static uint32_t eepro100_read4(EEPRO100State * s, uint32_t addr) TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); break; case SCBPointer: -#if 0 - val = eepro100_read_pointer(s); -#endif TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); break; case SCBPort: @@ -1473,6 +1463,12 @@ static void eepro100_write1(EEPRO100State * s, uint32_t addr, uint8_t val) } eepro100_interrupt(s, 0); break; + case SCBPointer: + case SCBPointer + 1: + case SCBPointer + 2: + case SCBPointer + 3: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; case SCBPort: case SCBPort + 1: case SCBPort + 2: @@ -1516,6 +1512,10 @@ static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val) eepro100_write_command(s, val); eepro100_write1(s, SCBIntmask, val >> 8); break; + case SCBPointer: + case SCBPointer + 2: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; case SCBPort: TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); break; @@ -1541,7 +1541,7 @@ static void eepro100_write4(EEPRO100State * s, uint32_t addr, uint32_t val) switch (addr) { case SCBPointer: - eepro100_write_pointer(s, val); + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); break; case SCBPort: TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); @@ -1881,7 +1881,6 @@ static const VMStateDescription vmstate_eepro100 = { /* The eeprom should be saved and restored by its own routines. */ VMSTATE_UINT32(device, EEPRO100State), /* TODO check device. */ - VMSTATE_UINT32(pointer, EEPRO100State), VMSTATE_UINT32(cu_base, EEPRO100State), VMSTATE_UINT32(cu_offset, EEPRO100State), VMSTATE_UINT32(ru_base, EEPRO100State), From 0113f48df6cb8209df9d85e4f92c79aa3be1b5c5 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:11 +0200 Subject: [PATCH 28/34] eepro100: Support byte/word read/write access to MDI control register MDI control is a 32 bit register, but may be read or written using 8 or 16 bit access. Data is latched when the MSB is written. Add support for byte/word read/write access. Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/hw/eepro100.c b/hw/eepro100.c index 634be594b8..c612fe1390 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -1193,8 +1193,9 @@ static uint32_t eepro100_read_mdi(EEPRO100State * s) return val; } -static void eepro100_write_mdi(EEPRO100State * s, uint32_t val) +static void eepro100_write_mdi(EEPRO100State *s) { + uint32_t val = e100_read_reg4(s, SCBCtrlMDI); uint8_t raiseint = (val & BIT(29)) >> 29; uint8_t opcode = (val & BITS(27, 26)) >> 26; uint8_t phy = (val & BITS(25, 21)) >> 21; @@ -1370,6 +1371,13 @@ static uint8_t eepro100_read1(EEPRO100State * s, uint32_t addr) case SCBeeprom: val = eepro100_read_eeprom(s); break; + case SCBCtrlMDI: + case SCBCtrlMDI + 1: + case SCBCtrlMDI + 2: + case SCBCtrlMDI + 3: + val = (uint8_t)(eepro100_read_mdi(s) >> (8 * (addr & 3))); + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; case SCBpmdr: /* Power Management Driver Register */ val = 0; TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); @@ -1402,6 +1410,11 @@ static uint16_t eepro100_read2(EEPRO100State * s, uint32_t addr) val = eepro100_read_eeprom(s); TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); break; + case SCBCtrlMDI: + case SCBCtrlMDI + 2: + val = (uint16_t)(eepro100_read_mdi(s) >> (8 * (addr & 3))); + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; default: logout("addr=%s val=0x%04x\n", regname(addr), val); missing("unknown word read"); @@ -1488,6 +1501,15 @@ static void eepro100_write1(EEPRO100State * s, uint32_t addr, uint8_t val) TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); eepro100_write_eeprom(s->eeprom, val); break; + case SCBCtrlMDI: + case SCBCtrlMDI + 1: + case SCBCtrlMDI + 2: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBCtrlMDI + 3: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + eepro100_write_mdi(s); + break; default: logout("addr=%s val=0x%02x\n", regname(addr), val); missing("unknown byte write"); @@ -1527,6 +1549,13 @@ static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val) TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); eepro100_write_eeprom(s->eeprom, val); break; + case SCBCtrlMDI: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; + case SCBCtrlMDI + 2: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + eepro100_write_mdi(s); + break; default: logout("addr=%s val=0x%04x\n", regname(addr), val); missing("unknown word write"); @@ -1548,7 +1577,8 @@ static void eepro100_write4(EEPRO100State * s, uint32_t addr, uint32_t val) eepro100_write_port(s); break; case SCBCtrlMDI: - eepro100_write_mdi(s, val); + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + eepro100_write_mdi(s); break; default: logout("addr=%s val=0x%08x\n", regname(addr), val); From a39bd01713b9e4e12bffe835b95edbc0ea5f0b16 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:12 +0200 Subject: [PATCH 29/34] eepro100: Support byte read access to general control register The general control register is a byte register. Add support for byte reads. Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/eepro100.c b/hw/eepro100.c index c612fe1390..726d402396 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -1382,6 +1382,9 @@ static uint8_t eepro100_read1(EEPRO100State * s, uint32_t addr) val = 0; TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); break; + case SCBgctrl: /* General Control Register */ + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; case SCBgstat: /* General Status Register */ /* 100 Mbps full duplex, valid link */ val = 0x07; From 072476ea08dcffe89b0bd6e2053f01dd89c54861 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 30 Apr 2011 22:40:13 +0200 Subject: [PATCH 30/34] eepro100: Support 32 bit read/write access to flash register Signed-off-by: Stefan Weil Signed-off-by: Michael S. Tsirkin --- hw/eepro100.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/eepro100.c b/hw/eepro100.c index 726d402396..1781c8ec7c 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -1443,6 +1443,10 @@ static uint32_t eepro100_read4(EEPRO100State * s, uint32_t addr) val = eepro100_read_port(s); TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); break; + case SCBflash: + val = eepro100_read_eeprom(s); + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + break; case SCBCtrlMDI: val = eepro100_read_mdi(s); break; @@ -1579,6 +1583,11 @@ static void eepro100_write4(EEPRO100State * s, uint32_t addr, uint32_t val) TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); eepro100_write_port(s); break; + case SCBflash: + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + val = val >> 16; + eepro100_write_eeprom(s->eeprom, val); + break; case SCBCtrlMDI: TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); eepro100_write_mdi(s); From 45fe15c25a5c9feea6e0f78434f5e9f632de9d94 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 2 May 2011 20:00:47 +0200 Subject: [PATCH 31/34] MSI: Robust resource release msi_init may fail, so we need to check on uninit if the cap was actually installed. This also avoids that the users need to check. Signed-off-by: Jan Kiszka Signed-off-by: Michael S. Tsirkin --- hw/ide/ich.c | 5 +---- hw/intel-hda.c | 4 +--- hw/msi.c | 12 ++++++++++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/hw/ide/ich.c b/hw/ide/ich.c index eb00f03b33..fd3537e281 100644 --- a/hw/ide/ich.c +++ b/hw/ide/ich.c @@ -110,10 +110,7 @@ static int pci_ich9_uninit(PCIDevice *dev) struct AHCIPCIState *d; d = DO_UPCAST(struct AHCIPCIState, card, dev); - if (msi_enabled(dev)) { - msi_uninit(dev); - } - + msi_uninit(dev); qemu_unregister_reset(ahci_reset, d); ahci_uninit(&d->ahci); diff --git a/hw/intel-hda.c b/hw/intel-hda.c index 7f83745d1a..5485745e85 100644 --- a/hw/intel-hda.c +++ b/hw/intel-hda.c @@ -1165,9 +1165,7 @@ static int intel_hda_exit(PCIDevice *pci) { IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci); - if (d->msi) { - msi_uninit(&d->pci); - } + msi_uninit(&d->pci); cpu_unregister_io_memory(d->mmio_addr); return 0; } diff --git a/hw/msi.c b/hw/msi.c index 3dc3a24b77..b0795bd708 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -164,9 +164,17 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, void msi_uninit(struct PCIDevice *dev) { - uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); - uint8_t cap_size = msi_cap_sizeof(flags); + uint16_t flags; + uint8_t cap_size; + + if (!(dev->cap_present & QEMU_PCI_CAP_MSI)) { + return; + } + flags = pci_get_word(dev->config + msi_flags_off(dev)); + cap_size = msi_cap_sizeof(flags); pci_del_capability(dev, PCI_CAP_ID_MSIX, cap_size); + dev->cap_present &= ~QEMU_PCI_CAP_MSI; + MSI_DEV_PRINTF(dev, "uninit\n"); } From 602ef4d917f78cee8e1057ca85bdc8888a1f7087 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 2 May 2011 20:01:37 +0200 Subject: [PATCH 32/34] pci: Add class 0x403 as 'audio controller' Used by HD audio controllers like our intel-hda. Signed-off-by: Jan Kiszka Signed-off-by: Michael S. Tsirkin --- hw/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci.c b/hw/pci.c index 410b67bace..0875654253 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -1169,6 +1169,7 @@ static const pci_class_desc pci_class_descriptions[] = { 0x0400, "Video controller", "video"}, { 0x0401, "Audio controller", "sound"}, { 0x0402, "Phone"}, + { 0x0403, "Audio controller", "sound"}, { 0x0480, "Multimedia controller"}, { 0x0500, "RAM controller", "memory"}, { 0x0501, "Flash controller", "flash"}, From c2f42bf003eac96ee4093faaf44cbf784ac64398 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 3 May 2011 12:36:32 -0600 Subject: [PATCH 33/34] CPUPhysMemoryClient: Fix typo in phys memory client registration When we register a physical memory client, we try to walk the page tables, calling the set_memory hook for every entry. Effectively playing catchup for the client for everything already registered. With this type, we only walk the 2nd entry of the l1 table, typically missing all of the registered memory. Signed-off-by: Alex Williamson Signed-off-by: Michael S. Tsirkin --- exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exec.c b/exec.c index d1a066c5ac..9823e355f7 100644 --- a/exec.c +++ b/exec.c @@ -1772,7 +1772,7 @@ static void phys_page_for_each(CPUPhysMemoryClient *client) int i; for (i = 0; i < P_L1_SIZE; ++i) { phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1, - l1_phys_map + 1); + l1_phys_map + i); } } From 8d4c78e7c8adf0a4440a8de92738b3820fc8215a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 3 May 2011 12:36:46 -0600 Subject: [PATCH 34/34] CPUPhysMemoryClient: Pass guest physical address not region offset When we're trying to get a newly registered phys memory client updated with the current page mappings, we end up passing the region offset (a ram_addr_t) as the start address rather than the actual guest physical memory address (target_phys_addr_t). If your guest has less than 3.5G of memory, these are coincidentally the same thing. If there's more, the region offset for the memory above 4G starts over at 0, so the set_memory client will overwrite it's lower memory entries. Instead, keep track of the guest phsyical address as we're walking the tables and pass that to the set_memory client. Signed-off-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- exec.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/exec.c b/exec.c index 9823e355f7..0c773a8ad9 100644 --- a/exec.c +++ b/exec.c @@ -1743,8 +1743,14 @@ static int cpu_notify_migration_log(int enable) return 0; } +/* The l1_phys_map provides the upper P_L1_BITs of the guest physical + * address. Each intermediate table provides the next L2_BITs of guest + * physical address space. The number of levels vary based on host and + * guest configuration, making it efficient to build the final guest + * physical address by seeding the L1 offset and shifting and adding in + * each L2 offset as we recurse through them. */ static void phys_page_for_each_1(CPUPhysMemoryClient *client, - int level, void **lp) + int level, void **lp, target_phys_addr_t addr) { int i; @@ -1753,16 +1759,18 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client, } if (level == 0) { PhysPageDesc *pd = *lp; + addr <<= L2_BITS + TARGET_PAGE_BITS; for (i = 0; i < L2_SIZE; ++i) { if (pd[i].phys_offset != IO_MEM_UNASSIGNED) { - client->set_memory(client, pd[i].region_offset, + client->set_memory(client, addr | i << TARGET_PAGE_BITS, TARGET_PAGE_SIZE, pd[i].phys_offset, false); } } } else { void **pp = *lp; for (i = 0; i < L2_SIZE; ++i) { - phys_page_for_each_1(client, level - 1, pp + i); + phys_page_for_each_1(client, level - 1, pp + i, + (addr << L2_BITS) | i); } } } @@ -1772,7 +1780,7 @@ static void phys_page_for_each(CPUPhysMemoryClient *client) int i; for (i = 0; i < P_L1_SIZE; ++i) { phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1, - l1_phys_map + i); + l1_phys_map + i, i); } }