qemu/hw/pci-host/piix.c

940 lines
29 KiB
C
Raw Normal View History

/*
* QEMU i440FX/PIIX3 PCI Bridge Emulation
*
* Copyright (c) 2006 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "hw/hw.h"
#include "hw/i386/pc.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_host.h"
#include "hw/isa/isa.h"
#include "hw/sysbus.h"
2016-03-14 16:01:28 +08:00
#include "qapi/error.h"
#include "qemu/range.h"
#include "hw/xen/xen.h"
#include "hw/pci-host/pam.h"
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
#include "sysemu/sysemu.h"
#include "hw/i386/ioapic.h"
#include "qapi/visitor.h"
#include "qemu/error-report.h"
/*
* I440FX chipset data sheet.
* https://wiki.qemu.org/File:29054901.pdf
*/
#define I440FX_PCI_HOST_BRIDGE(obj) \
OBJECT_CHECK(I440FXState, (obj), TYPE_I440FX_PCI_HOST_BRIDGE)
typedef struct I440FXState {
PCIHostState parent_obj;
Range pci_hole;
uint64_t pci_hole64_size;
bool pci_hole64_fix;
uint32_t short_root_bus;
} I440FXState;
#define PIIX_NUM_PIC_IRQS 16 /* i8259 * 2 */
#define PIIX_NUM_PIRQS 4ULL /* PIRQ[A-D] */
#define XEN_PIIX_NUM_PIRQS 128ULL
#define PIIX_PIRQC 0x60
typedef struct PIIX3State {
PCIDevice dev;
/*
* bitmap to track pic levels.
* The pic level is the logical OR of all the PCI irqs mapped to it
* So one PIC level is tracked by PIIX_NUM_PIRQS bits.
*
* PIRQ is mapped to PIC pins, we track it by
* PIIX_NUM_PIRQS * PIIX_NUM_PIC_IRQS = 64 bits with
* pic_irq * PIIX_NUM_PIRQS + pirq
*/
#if PIIX_NUM_PIC_IRQS * PIIX_NUM_PIRQS > 64
#error "unable to encode pic state in 64bit in pic_levels."
#endif
uint64_t pic_levels;
qemu_irq *pic;
/* This member isn't used. Just for save/load compatibility */
int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS];
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
/* Reset Control Register contents */
uint8_t rcr;
/* IO memory region for Reset Control Register (RCR_IOPORT) */
MemoryRegion rcr_mem;
} PIIX3State;
#define TYPE_PIIX3_PCI_DEVICE "pci-piix3"
#define PIIX3_PCI_DEVICE(obj) \
OBJECT_CHECK(PIIX3State, (obj), TYPE_PIIX3_PCI_DEVICE)
#define I440FX_PCI_DEVICE(obj) \
OBJECT_CHECK(PCII440FXState, (obj), TYPE_I440FX_PCI_DEVICE)
#define TYPE_PIIX3_DEVICE "PIIX3"
#define TYPE_PIIX3_XEN_DEVICE "PIIX3-xen"
struct PCII440FXState {
/*< private >*/
PCIDevice parent_obj;
/*< public >*/
MemoryRegion *system_memory;
MemoryRegion *pci_address_space;
MemoryRegion *ram_memory;
PAMMemoryRegion pam_regions[13];
MemoryRegion smram_region;
MemoryRegion smram, low_smram;
};
#define I440FX_PAM 0x59
#define I440FX_PAM_SIZE 7
#define I440FX_SMRAM 0x72
/* Keep it 2G to comply with older win32 guests */
#define I440FX_PCI_HOST_HOLE64_SIZE_DEFAULT (1ULL << 31)
/* Older coreboot versions (4.0 and older) read a config register that doesn't
* exist in real hardware, to get the RAM size from QEMU.
*/
#define I440FX_COREBOOT_RAM_SIZE 0x57
static void piix3_set_irq(void *opaque, int pirq, int level);
static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pci_intx);
static void piix3_write_config_xen(PCIDevice *dev,
uint32_t address, uint32_t val, int len);
/* return the global irq number corresponding to a given device irq
pin. We could also use the bus number to have a more precise
mapping. */
static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
{
int slot_addend;
slot_addend = (pci_dev->devfn >> 3) - 1;
return (pci_intx + slot_addend) & 3;
}
static void i440fx_update_memory_mappings(PCII440FXState *d)
{
int i;
PCIDevice *pd = PCI_DEVICE(d);
memory_region_transaction_begin();
for (i = 0; i < ARRAY_SIZE(d->pam_regions); i++) {
pam_update(&d->pam_regions[i], i,
pd->config[I440FX_PAM + DIV_ROUND_UP(i, 2)]);
}
memory_region_set_enabled(&d->smram_region,
!(pd->config[I440FX_SMRAM] & SMRAM_D_OPEN));
memory_region_set_enabled(&d->smram,
pd->config[I440FX_SMRAM] & SMRAM_G_SMRAME);
memory_region_transaction_commit();
}
static void i440fx_write_config(PCIDevice *dev,
uint32_t address, uint32_t val, int len)
{
PCII440FXState *d = I440FX_PCI_DEVICE(dev);
/* XXX: implement SMRAM.D_LOCK */
pci_default_write_config(dev, address, val, len);
if (ranges_overlap(address, len, I440FX_PAM, I440FX_PAM_SIZE) ||
range_covers_byte(address, len, I440FX_SMRAM)) {
i440fx_update_memory_mappings(d);
}
}
static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
{
PCII440FXState *d = opaque;
PCIDevice *pd = PCI_DEVICE(d);
int ret, i;
uint8_t smm_enabled;
ret = pci_device_load(pd, f);
if (ret < 0)
return ret;
i440fx_update_memory_mappings(d);
qemu_get_8s(f, &smm_enabled);
if (version_id == 2) {
for (i = 0; i < PIIX_NUM_PIRQS; i++) {
qemu_get_be32(f); /* dummy load for compatibility */
}
}
return 0;
}
static int i440fx_post_load(void *opaque, int version_id)
{
PCII440FXState *d = opaque;
i440fx_update_memory_mappings(d);
return 0;
}
static const VMStateDescription vmstate_i440fx = {
.name = "I440FX",
.version_id = 3,
.minimum_version_id = 3,
.minimum_version_id_old = 1,
.load_state_old = i440fx_load_old,
.post_load = i440fx_post_load,
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, PCII440FXState),
/* Used to be smm_enabled, which was basically always zero because
* SeaBIOS hardly uses SMM. SMRAM is now handled by CPU code.
*/
VMSTATE_UNUSED(1),
VMSTATE_END_OF_LIST()
}
};
static void i440fx_pcihost_get_pci_hole_start(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj);
uint64_t val64;
uint32_t value;
val64 = range_is_empty(&s->pci_hole) ? 0 : range_lob(&s->pci_hole);
value = val64;
assert(value == val64);
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 21:48:54 +08:00
visit_type_uint32(v, name, &value, errp);
}
static void i440fx_pcihost_get_pci_hole_end(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj);
uint64_t val64;
uint32_t value;
val64 = range_is_empty(&s->pci_hole) ? 0 : range_upb(&s->pci_hole) + 1;
value = val64;
assert(value == val64);
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 21:48:54 +08:00
visit_type_uint32(v, name, &value, errp);
}
/*
* The 64bit PCI hole start is set by the Guest firmware
* as the address of the first 64bit PCI MEM resource.
* If no PCI device has resources on the 64bit area,
* the 64bit PCI hole will start after "over 4G RAM" and the
* reserved space for memory hotplug if any.
*/
static uint64_t i440fx_pcihost_get_pci_hole64_start_value(Object *obj)
{
PCIHostState *h = PCI_HOST_BRIDGE(obj);
I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj);
Range w64;
uint64_t value;
pci_bus_get_w64_range(h->bus, &w64);
value = range_is_empty(&w64) ? 0 : range_lob(&w64);
if (!value && s->pci_hole64_fix) {
value = pc_pci_hole64_start();
}
return value;
}
static void i440fx_pcihost_get_pci_hole64_start(Object *obj, Visitor *v,
const char *name,
void *opaque, Error **errp)
{
uint64_t hole64_start = i440fx_pcihost_get_pci_hole64_start_value(obj);
visit_type_uint64(v, name, &hole64_start, errp);
}
/*
* The 64bit PCI hole end is set by the Guest firmware
* as the address of the last 64bit PCI MEM resource.
* Then it is expanded to the PCI_HOST_PROP_PCI_HOLE64_SIZE
* that can be configured by the user.
*/
static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
PCIHostState *h = PCI_HOST_BRIDGE(obj);
I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj);
hw/pci-host/x86: extend the 64-bit PCI hole relative to the fw-assigned base In commit 9fa99d2519cb ("hw/pci-host: Fix x86 Host Bridges 64bit PCI hole", 2017-11-16), we meant to expose such a 64-bit PCI MMIO aperture in the ACPI DSDT that would be at least as large as the new "pci-hole64-size" property (2GB on i440fx, 32GB on q35). The goal was to offer "enough" 64-bit MMIO aperture to the guest OS for hotplug purposes. In that commit, we added or modified five functions: - pc_pci_hole64_start(): shared between i440fx and q35. Provides a default 64-bit base, which starts beyond the cold-plugged 64-bit RAM, and skips the DIMM hotplug area too (if any). - i440fx_pcihost_get_pci_hole64_start(), q35_host_get_pci_hole64_start(): board-specific 64-bit base property getters called abstractly by the ACPI generator. Both of these fall back to pc_pci_hole64_start() if the firmware didn't program any 64-bit hole (i.e. if the firmware didn't assign a 64-bit GPA to any MMIO BAR on any device). Otherwise, they honor the firmware's BAR assignments (i.e., they treat the lowest 64-bit GPA programmed by the firmware as the base address for the aperture). - i440fx_pcihost_get_pci_hole64_end(), q35_host_get_pci_hole64_end(): these intended to extend the aperture to our size recommendation, calculated relative to the base of the aperture. Despite the original intent, i440fx_pcihost_get_pci_hole64_end() and q35_host_get_pci_hole64_end() currently only extend the aperture relative to the default base (pc_pci_hole64_start()), ignoring any programming done by the firmware. This means that our size recommendation may not be met. Fix it by honoring the firmware's address assignments. The strange extension sizes were spotted by Alex, in the log of a guest kernel running on top of OVMF (which prefers to assign 64-bit GPAs to 64-bit BARs). This change only affects DSDT generation, therefore no new compat property is being introduced. Using an i440fx OVMF guest with 5GB RAM, an example _CRS change is: > @@ -881,9 +881,9 @@ > QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, > 0x0000000000000000, // Granularity > 0x0000000800000000, // Range Minimum > - 0x000000080001C0FF, // Range Maximum > + 0x000000087FFFFFFF, // Range Maximum > 0x0000000000000000, // Translation Offset > - 0x000000000001C100, // Length > + 0x0000000080000000, // Length > ,, , AddressRangeMemory, TypeStatic) > }) > Device (GPE0) (On i440fx, the low RAM split is at 3GB, in this case. Therefore, with 5GB guest RAM and no DIMM hotplug range, pc_pci_hole64_start() returns 4 + (5-3) = 6 GB. Adding the 2GB extension to that yields 8GB, which is below the firmware-programmed base of 32GB, before the patch. Therefore, before the patch, the extension is ineffective. After the patch, we add the 2GB extension to the firmware-programmed base, namely 32GB.) Using a q35 OVMF guest with 5GB RAM, an example _CRS change is: > @@ -3162,9 +3162,9 @@ > QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, > 0x0000000000000000, // Granularity > 0x0000000800000000, // Range Minimum > - 0x00000009BFFFFFFF, // Range Maximum > + 0x0000000FFFFFFFFF, // Range Maximum > 0x0000000000000000, // Translation Offset > - 0x00000001C0000000, // Length > + 0x0000000800000000, // Length > ,, , AddressRangeMemory, TypeStatic) > }) > Device (GPE0) (On Q35, the low RAM split is at 2GB. Therefore, with 5GB guest RAM and no DIMM hotplug range, pc_pci_hole64_start() returns 4 + (5-2) = 7 GB. Adding the 32GB extension to that yields 39GB (0x0000_0009_BFFF_FFFF + 1), before the patch. After the patch, we add the 32GB extension to the firmware-programmed base, namely 32GB.) The ACPI test data for the bios-tables-test case that we added earlier in this series are corrected too, as follows: > @@ -3339,9 +3339,9 @@ > QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, > 0x0000000000000000, // Granularity > 0x0000000200000000, // Range Minimum > - 0x00000009BFFFFFFF, // Range Maximum > + 0x00000009FFFFFFFF, // Range Maximum > 0x0000000000000000, // Translation Offset > - 0x00000007C0000000, // Length > + 0x0000000800000000, // Length > ,, , AddressRangeMemory, TypeStatic) > }) > Device (GPE0) Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> Fixes: 9fa99d2519cbf71f871e46871df12cb446dc1c3e Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2018-09-28 05:24:38 +08:00
uint64_t hole64_start = i440fx_pcihost_get_pci_hole64_start_value(obj);
Range w64;
uint64_t value, hole64_end;
pci_bus_get_w64_range(h->bus, &w64);
value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1;
hole64_end = ROUND_UP(hole64_start + s->pci_hole64_size, 1ULL << 30);
if (s->pci_hole64_fix && value < hole64_end) {
value = hole64_end;
}
visit_type_uint64(v, name, &value, errp);
}
static void i440fx_pcihost_initfn(Object *obj)
{
PCIHostState *s = PCI_HOST_BRIDGE(obj);
memory_region_init_io(&s->conf_mem, obj, &pci_host_conf_le_ops, s,
"pci-conf-idx", 4);
memory_region_init_io(&s->data_mem, obj, &pci_host_data_le_ops, s,
"pci-conf-data", 4);
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32",
i440fx_pcihost_get_pci_hole_start,
NULL, NULL, NULL, NULL);
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_END, "uint32",
i440fx_pcihost_get_pci_hole_end,
NULL, NULL, NULL, NULL);
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE64_START, "uint64",
i440fx_pcihost_get_pci_hole64_start,
NULL, NULL, NULL, NULL);
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE64_END, "uint64",
i440fx_pcihost_get_pci_hole64_end,
NULL, NULL, NULL, NULL);
}
static void i440fx_pcihost_realize(DeviceState *dev, Error **errp)
{
PCIHostState *s = PCI_HOST_BRIDGE(dev);
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
sysbus_add_io(sbd, 0xcf8, &s->conf_mem);
sysbus_init_ioports(sbd, 0xcf8, 4);
sysbus_add_io(sbd, 0xcfc, &s->data_mem);
sysbus_init_ioports(sbd, 0xcfc, 4);
/* register i440fx 0xcf8 port as coalesced pio */
memory_region_set_flush_coalesced(&s->data_mem);
memory_region_add_coalescing(&s->conf_mem, 0, 4);
}
static void i440fx_realize(PCIDevice *dev, Error **errp)
{
dev->config[I440FX_SMRAM] = 0x02;
if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) {
Convert error_report() to warn_report() Convert all uses of error_report("warning:"... to use warn_report() instead. This helps standardise on a single method of printing warnings to the user. All of the warnings were changed using these two commands: find ./* -type f -exec sed -i \ 's|error_report(".*warning[,:] |warn_report("|Ig' {} + Indentation fixed up manually afterwards. The test-qdev-global-props test case was manually updated to ensure that this patch passes make check (as the test cases are case sensitive). Signed-off-by: Alistair Francis <alistair.francis@xilinx.com> Suggested-by: Thomas Huth <thuth@redhat.com> Cc: Jeff Cody <jcody@redhat.com> Cc: Kevin Wolf <kwolf@redhat.com> Cc: Max Reitz <mreitz@redhat.com> Cc: Ronnie Sahlberg <ronniesahlberg@gmail.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Lieven <pl@kamp.de> Cc: Josh Durgin <jdurgin@redhat.com> Cc: "Richard W.M. Jones" <rjones@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com> Cc: Richard Henderson <rth@twiddle.net> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: Greg Kurz <groug@kaod.org> Cc: Rob Herring <robh@kernel.org> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: Peter Chubb <peter.chubb@nicta.com.au> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Alexander Graf <agraf@suse.de> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Jason Wang <jasowang@redhat.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Cornelia Huck <cohuck@redhat.com> Cc: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: David Gibson <david@gibson.dropbear.id.au> Acked-by: Greg Kurz <groug@kaod.org> Acked-by: Cornelia Huck <cohuck@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed by: Peter Chubb <peter.chubb@data61.csiro.au> Acked-by: Max Reitz <mreitz@redhat.com> Acked-by: Marcel Apfelbaum <marcel@redhat.com> Message-Id: <e1cfa2cd47087c248dd24caca9c33d9af0c499b0.1499866456.git.alistair.francis@xilinx.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-12 21:57:41 +08:00
warn_report("i440fx doesn't support emulated iommu");
}
}
PCIBus *i440fx_init(const char *host_type, const char *pci_type,
PCII440FXState **pi440fx_state,
int *piix3_devfn,
ISABus **isa_bus, qemu_irq *pic,
MemoryRegion *address_space_mem,
MemoryRegion *address_space_io,
ram_addr_t ram_size,
ram_addr_t below_4g_mem_size,
ram_addr_t above_4g_mem_size,
MemoryRegion *pci_address_space,
MemoryRegion *ram_memory)
{
DeviceState *dev;
PCIBus *b;
PCIDevice *d;
PCIHostState *s;
PIIX3State *piix3;
PCII440FXState *f;
unsigned i;
I440FXState *i440fx;
dev = qdev_create(NULL, host_type);
s = PCI_HOST_BRIDGE(dev);
b = pci_root_bus_new(dev, NULL, pci_address_space,
address_space_io, 0, TYPE_PCI_BUS);
s->bus = b;
object_property_add_child(qdev_get_machine(), "i440fx", OBJECT(dev), NULL);
qdev_init_nofail(dev);
d = pci_create_simple(b, 0, pci_type);
*pi440fx_state = I440FX_PCI_DEVICE(d);
f = *pi440fx_state;
f->system_memory = address_space_mem;
f->pci_address_space = pci_address_space;
f->ram_memory = ram_memory;
i440fx = I440FX_PCI_HOST_BRIDGE(dev);
range_set_bounds(&i440fx->pci_hole, below_4g_mem_size,
IO_APIC_DEFAULT_ADDRESS - 1);
/* setup pci memory mapping */
pc_pci_as_mapping_init(OBJECT(f), f->system_memory,
f->pci_address_space);
/* if *disabled* show SMRAM to all CPUs */
memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region",
f->pci_address_space, 0xa0000, 0x20000);
memory_region_add_subregion_overlap(f->system_memory, 0xa0000,
&f->smram_region, 1);
memory_region_set_enabled(&f->smram_region, true);
/* smram, as seen by SMM CPUs */
memory_region_init(&f->smram, OBJECT(d), "smram", 1ull << 32);
memory_region_set_enabled(&f->smram, true);
memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low",
f->ram_memory, 0xa0000, 0x20000);
memory_region_set_enabled(&f->low_smram, true);
memory_region_add_subregion(&f->smram, 0xa0000, &f->low_smram);
object_property_add_const_link(qdev_get_machine(), "smram",
OBJECT(&f->smram), &error_abort);
init_pam(dev, f->ram_memory, f->system_memory, f->pci_address_space,
&f->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE);
for (i = 0; i < ARRAY_SIZE(f->pam_regions) - 1; ++i) {
init_pam(dev, f->ram_memory, f->system_memory, f->pci_address_space,
&f->pam_regions[i+1], PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE,
PAM_EXPAN_SIZE);
}
/* Xen supports additional interrupt routes from the PCI devices to
* the IOAPIC: the four pins of each PCI device on the bus are also
* connected to the IOAPIC directly.
* These additional routes can be discovered through ACPI. */
if (xen_enabled()) {
PCIDevice *pci_dev = pci_create_simple_multifunction(b,
-1, true, TYPE_PIIX3_XEN_DEVICE);
piix3 = PIIX3_PCI_DEVICE(pci_dev);
pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
piix3, XEN_PIIX_NUM_PIRQS);
} else {
PCIDevice *pci_dev = pci_create_simple_multifunction(b,
-1, true, TYPE_PIIX3_DEVICE);
piix3 = PIIX3_PCI_DEVICE(pci_dev);
pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, piix3,
PIIX_NUM_PIRQS);
pci_bus_set_route_irq_fn(b, piix3_route_intx_pin_to_irq);
}
piix3->pic = pic;
*isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix3), "isa.0"));
*piix3_devfn = piix3->dev.devfn;
ram_size = ram_size / 8 / 1024 / 1024;
if (ram_size > 255) {
ram_size = 255;
}
d->config[I440FX_COREBOOT_RAM_SIZE] = ram_size;
i440fx_update_memory_mappings(f);
return b;
}
PCIBus *find_i440fx(void)
{
PCIHostState *s = OBJECT_CHECK(PCIHostState,
object_resolve_path("/machine/i440fx", NULL),
TYPE_PCI_HOST_BRIDGE);
return s ? s->bus : NULL;
}
/* PIIX3 PCI to ISA bridge */
static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq)
{
qemu_set_irq(piix3->pic[pic_irq],
!!(piix3->pic_levels &
(((1ULL << PIIX_NUM_PIRQS) - 1) <<
(pic_irq * PIIX_NUM_PIRQS))));
}
static void piix3_set_irq_level_internal(PIIX3State *piix3, int pirq, int level)
{
int pic_irq;
uint64_t mask;
pic_irq = piix3->dev.config[PIIX_PIRQC + pirq];
if (pic_irq >= PIIX_NUM_PIC_IRQS) {
return;
}
mask = 1ULL << ((pic_irq * PIIX_NUM_PIRQS) + pirq);
piix3->pic_levels &= ~mask;
piix3->pic_levels |= mask * !!level;
}
static void piix3_set_irq_level(PIIX3State *piix3, int pirq, int level)
{
int pic_irq;
pic_irq = piix3->dev.config[PIIX_PIRQC + pirq];
if (pic_irq >= PIIX_NUM_PIC_IRQS) {
return;
}
piix3_set_irq_level_internal(piix3, pirq, level);
piix3_set_irq_pic(piix3, pic_irq);
}
static void piix3_set_irq(void *opaque, int pirq, int level)
{
PIIX3State *piix3 = opaque;
piix3_set_irq_level(piix3, pirq, level);
}
static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin)
{
PIIX3State *piix3 = opaque;
int irq = piix3->dev.config[PIIX_PIRQC + pin];
PCIINTxRoute route;
if (irq < PIIX_NUM_PIC_IRQS) {
route.mode = PCI_INTX_ENABLED;
route.irq = irq;
} else {
route.mode = PCI_INTX_DISABLED;
route.irq = -1;
}
return route;
}
/* irq routing is changed. so rebuild bitmap */
static void piix3_update_irq_levels(PIIX3State *piix3)
{
PCIBus *bus = pci_get_bus(&piix3->dev);
int pirq;
piix3->pic_levels = 0;
for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) {
piix3_set_irq_level(piix3, pirq, pci_bus_get_irq_level(bus, pirq));
}
}
static void piix3_write_config(PCIDevice *dev,
uint32_t address, uint32_t val, int len)
{
pci_default_write_config(dev, address, val, len);
if (ranges_overlap(address, len, PIIX_PIRQC, 4)) {
PIIX3State *piix3 = PIIX3_PCI_DEVICE(dev);
int pic_irq;
pci_bus_fire_intx_routing_notifier(pci_get_bus(&piix3->dev));
piix3_update_irq_levels(piix3);
for (pic_irq = 0; pic_irq < PIIX_NUM_PIC_IRQS; pic_irq++) {
piix3_set_irq_pic(piix3, pic_irq);
}
}
}
static void piix3_write_config_xen(PCIDevice *dev,
uint32_t address, uint32_t val, int len)
{
xen_piix_pci_write_config_client(address, val, len);
piix3_write_config(dev, address, val, len);
}
static void piix3_reset(void *opaque)
{
PIIX3State *d = opaque;
uint8_t *pci_conf = d->dev.config;
pci_conf[0x04] = 0x07; /* master, memory and I/O */
pci_conf[0x05] = 0x00;
pci_conf[0x06] = 0x00;
pci_conf[0x07] = 0x02; /* PCI_status_devsel_medium */
pci_conf[0x4c] = 0x4d;
pci_conf[0x4e] = 0x03;
pci_conf[0x4f] = 0x00;
pci_conf[0x60] = 0x80;
pci_conf[0x61] = 0x80;
pci_conf[0x62] = 0x80;
pci_conf[0x63] = 0x80;
pci_conf[0x69] = 0x02;
pci_conf[0x70] = 0x80;
pci_conf[0x76] = 0x0c;
pci_conf[0x77] = 0x0c;
pci_conf[0x78] = 0x02;
pci_conf[0x79] = 0x00;
pci_conf[0x80] = 0x00;
pci_conf[0x82] = 0x00;
pci_conf[0xa0] = 0x08;
pci_conf[0xa2] = 0x00;
pci_conf[0xa3] = 0x00;
pci_conf[0xa4] = 0x00;
pci_conf[0xa5] = 0x00;
pci_conf[0xa6] = 0x00;
pci_conf[0xa7] = 0x00;
pci_conf[0xa8] = 0x0f;
pci_conf[0xaa] = 0x00;
pci_conf[0xab] = 0x00;
pci_conf[0xac] = 0x00;
pci_conf[0xae] = 0x00;
d->pic_levels = 0;
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
d->rcr = 0;
}
static int piix3_post_load(void *opaque, int version_id)
{
PIIX3State *piix3 = opaque;
int pirq;
/* Because the i8259 has not been deserialized yet, qemu_irq_raise
* might bring the system to a different state than the saved one;
* for example, the interrupt could be masked but the i8259 would
* not know that yet and would trigger an interrupt in the CPU.
*
* Here, we update irq levels without raising the interrupt.
* Interrupt state will be deserialized separately through the i8259.
*/
piix3->pic_levels = 0;
for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) {
piix3_set_irq_level_internal(piix3, pirq,
pci_bus_get_irq_level(pci_get_bus(&piix3->dev), pirq));
}
return 0;
}
static int piix3_pre_save(void *opaque)
{
int i;
PIIX3State *piix3 = opaque;
for (i = 0; i < ARRAY_SIZE(piix3->pci_irq_levels_vmstate); i++) {
piix3->pci_irq_levels_vmstate[i] =
pci_bus_get_irq_level(pci_get_bus(&piix3->dev), i);
}
return 0;
}
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
static bool piix3_rcr_needed(void *opaque)
{
PIIX3State *piix3 = opaque;
return (piix3->rcr != 0);
}
static const VMStateDescription vmstate_piix3_rcr = {
.name = "PIIX3/rcr",
.version_id = 1,
.minimum_version_id = 1,
.needed = piix3_rcr_needed,
.fields = (VMStateField[]) {
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
VMSTATE_UINT8(rcr, PIIX3State),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_piix3 = {
.name = "PIIX3",
.version_id = 3,
.minimum_version_id = 2,
.post_load = piix3_post_load,
.pre_save = piix3_pre_save,
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(dev, PIIX3State),
VMSTATE_INT32_ARRAY_V(pci_irq_levels_vmstate, PIIX3State,
PIIX_NUM_PIRQS, 3),
VMSTATE_END_OF_LIST()
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
},
.subsections = (const VMStateDescription*[]) {
&vmstate_piix3_rcr,
NULL
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
}
};
static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len)
{
PIIX3State *d = opaque;
if (val & 4) {
qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
return;
}
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
d->rcr = val & 2; /* keep System Reset type only */
}
static uint64_t rcr_read(void *opaque, hwaddr addr, unsigned len)
{
PIIX3State *d = opaque;
return d->rcr;
}
static const MemoryRegionOps rcr_ops = {
.read = rcr_read,
.write = rcr_write,
.endianness = DEVICE_LITTLE_ENDIAN
};
static void piix3_realize(PCIDevice *dev, Error **errp)
{
PIIX3State *d = PIIX3_PCI_DEVICE(dev);
isa: Clean up error handling around isa_bus_new() We can have at most one ISA bus. If you try to create another one, isa_bus_new() complains to stderr and returns null. isa_bus_new() is called in two contexts, machine's init() and device's realize() methods. Since complaining to stderr is not proper in the latter context, convert isa_bus_new() to Error. Machine's init(): * mips_jazz_init(), called from the init() methods of machines "magnum" and "pica" * mips_r4k_init(), the init() method of machine "mips" * pc_init1() called from the init() methods of non-q35 PC machines * typhoon_init(), called from clipper_init(), the init() method of machine "clipper" These callers always create the first ISA bus, hence isa_bus_new() can't fail. Simply pass &error_abort. Device's realize(): * i82378_realize(), of PCI device "i82378" * ich9_lpc_realize(), of PCI device "ICH9-LPC" * pci_ebus_realize(), of PCI device "ebus" * piix3_realize(), of PCI device "pci-piix3", abstract parent of "PIIX3" and "PIIX3-xen" * piix4_realize(), of PCI device "PIIX4" * vt82c686b_realize(), of PCI device "VT82C686B" Propagate the error. Note that these devices are typically created only by machine init() methods with qdev_init_nofail() or similar. If we screwed up and created an ISA bus before that call, we now give up right away. Before, we'd hobble on, and typically die in isa_bus_irqs(). Similar if someone finds a way to hot-plug one of these critters. Cc: Richard Henderson <rth@twiddle.net> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: "Hervé Poussineau" <hpoussin@reactos.org> Cc: Aurelien Jarno <aurelien@aurel32.net> Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> Signed-off-by: Markus Armbruster <armbru@pond.sub.org> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Reviewed-by: Hervé Poussineau <hpoussin@reactos.org> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Message-Id: <1450370121-5768-11-git-send-email-armbru@redhat.com>
2015-12-18 00:35:18 +08:00
if (!isa_bus_new(DEVICE(d), get_system_memory(),
pci_address_space_io(dev), errp)) {
return;
}
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
memory_region_init_io(&d->rcr_mem, OBJECT(dev), &rcr_ops, d,
"piix3-reset-control", 1);
PIIX3: reset the VM when the Reset Control Register's RCPU bit gets set Traditional PCI config space access is achieved by writing a 32 bit value to io port 0xcf8 to identify the bus, device, function and config register. Port 0xcfc then contains the register in question. But if you write the appropriate pair of magic values to 0xcf9, the machine will reboot. Spectacular! And not standardised in any way (certainly not part of the PCI spec), so different chipsets may have different requirements. Booo. In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control Register. Bit 1 (System Reset, SRST) would normally differentiate between soft reset and hard reset, but we ignore the difference beyond allowing the guest to read it back. RHBZ reference: 890459 This patch introduces the following overlap between the preexistent "pci-conf-idx" region and the "piix3-reset-control" region just being added. Partial output from "info mtree": I/O 0000000000000000-000000000000ffff (prio 0, RW): io 0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx 0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control I sanity-checked the patch by booting a RHEL-6.3 guest and found no problems. I summoned gdb and set a breakpoint on rcr_write() in order to gather a bit more confidence. Relevant frames of the stack: kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1, count=1) [kvm-all.c:1422] cpu_outb (addr=3321, val=6 '\006') [ioport.c:289] ioport_write (index=0, address=3321, data=6) [ioport.c:83] ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6) [ioport.c:212] memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0, width=1, data=6) [memory.c:439] access_with_adjusted_size (addr=0, value=0x7f3f531fbac0, size=1, access_size_min=1, access_size_max=4, access=0x7f3f5f6e0f90 <memory_region_write_accessor>, opaque=0x7f3f6227b668) [memory.c:364] memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0, value=0x7f3f531fbac0, size=1, shift=0, mask=255) [memory.c:334] rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1) [hw/piix_pci.c:498] The dispatch happens in ioport_write(); "index=0" means byte-wide access: static void ioport_write(int index, uint32_t address, uint32_t data) { static IOPortWriteFunc * const default_func[3] = { default_ioport_writeb, default_ioport_writew, default_ioport_writel }; IOPortWriteFunc *func = ioport_write_table[index][address]; if (!func) func = default_func[index]; func(ioport_opaque[address], address, data); } The "ioport_write_table" and "ioport_opaque" arrays describe the flattened IO port space. The first array is less interesting (it selects a thunk function). The "ioport_opaque" array is interesting because it decides how writing to the port is implemented ultimately. 4-byte wide access to 0xcf8 (pci-conf-idx): (gdb) print ioport_write_table[2][0xcf8] $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f5575cb <pci_host_config_write> 1-byte wide access to 0xcf9 (piix3-reset-control): (gdb) print ioport_write_table[0][0xcf9] $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk> (gdb) print \ ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int)) 0x7f3f5f6b42f1 <rcr_write> The higher priority of "piix3-reset-control" ensures that the 0xcf9 entries in ioport_write_table / ioport_opaque will always belong to it, independently of its relative registration order versus "pci-conf-idx". Signed-off-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-01-24 17:31:20 +08:00
memory_region_add_subregion_overlap(pci_address_space_io(dev), RCR_IOPORT,
&d->rcr_mem, 1);
qemu_register_reset(piix3_reset, d);
}
static void pci_piix3_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
dc->desc = "ISA bridge";
dc->vmsd = &vmstate_piix3;
dc->hotpluggable = false;
k->realize = piix3_realize;
k->vendor_id = PCI_VENDOR_ID_INTEL;
/* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */
k->device_id = PCI_DEVICE_ID_INTEL_82371SB_0;
k->class_id = PCI_CLASS_BRIDGE_ISA;
/*
* Reason: part of PIIX3 southbridge, needs to be wired up by
* pc_piix.c's pc_init1()
*/
qdev: Replace cannot_instantiate_with_device_add_yet with !user_creatable cannot_instantiate_with_device_add_yet was introduced by commit efec3dd631d94160288392721a5f9c39e50fb2bc to replace no_user. It was supposed to be a temporary measure. When it was introduced, we had 54 cannot_instantiate_with_device_add_yet=true lines in the code. Today (3 years later) this number has not shrunk: we now have 57 cannot_instantiate_with_device_add_yet=true lines. I think it is safe to say it is not a temporary measure, and we won't see the flag go away soon. Instead of a long field name that misleads people to believe it is temporary, replace it a shorter and less misleading field: user_creatable. Except for code comments, changes were generated using the following Coccinelle patch: @@ expression DC; @@ ( -DC->cannot_instantiate_with_device_add_yet = false; +DC->user_creatable = true; | -DC->cannot_instantiate_with_device_add_yet = true; +DC->user_creatable = false; ) @@ typedef ObjectClass; expression dc; identifier class, data; @@ static void device_class_init(ObjectClass *class, void *data) { ... dc->hotpluggable = true; +dc->user_creatable = true; ... } @@ @@ struct DeviceClass { ... -bool cannot_instantiate_with_device_add_yet; +bool user_creatable; ... } @@ expression DC; @@ ( -!DC->cannot_instantiate_with_device_add_yet +DC->user_creatable | -DC->cannot_instantiate_with_device_add_yet +!DC->user_creatable ) Cc: Alistair Francis <alistair.francis@xilinx.com> Cc: Laszlo Ersek <lersek@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: Thomas Huth <thuth@redhat.com> Acked-by: Alistair Francis <alistair.francis@xilinx.com> Reviewed-by: Thomas Huth <thuth@redhat.com> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Acked-by: Marcel Apfelbaum <marcel@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Message-Id: <20170503203604.31462-2-ehabkost@redhat.com> [ehabkost: kept "TODO remove once we're there" comment] Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-04 04:35:44 +08:00
dc->user_creatable = false;
}
static const TypeInfo piix3_pci_type_info = {
.name = TYPE_PIIX3_PCI_DEVICE,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(PIIX3State),
.abstract = true,
.class_init = pci_piix3_class_init,
pci: Add INTERFACE_CONVENTIONAL_PCI_DEVICE to Conventional PCI devices Add INTERFACE_CONVENTIONAL_PCI_DEVICE to all direct subtypes of TYPE_PCI_DEVICE, except: 1) The ones that already have INTERFACE_PCIE_DEVICE set: * base-xhci * e1000e * nvme * pvscsi * vfio-pci * virtio-pci * vmxnet3 2) base-pci-bridge Not all PCI bridges are Conventional PCI devices, so INTERFACE_CONVENTIONAL_PCI_DEVICE is added only to the subtypes that are actually Conventional PCI: * dec-21154-p2p-bridge * i82801b11-bridge * pbm-bridge * pci-bridge The direct subtypes of base-pci-bridge not touched by this patch are: * xilinx-pcie-root: Already marked as PCIe-only. * pcie-pci-bridge: Already marked as PCIe-only. * pcie-port: all non-abstract subtypes of pcie-port are already marked as PCIe-only devices. 3) megasas-base Not all megasas devices are Conventional PCI devices, so the interface names are added to the subclasses registered by megasas_register_types(), according to information in the megasas_devices[] array. "megasas-gen2" already implements INTERFACE_PCIE_DEVICE, so add INTERFACE_CONVENTIONAL_PCI_DEVICE only to "megasas". Acked-by: Alberto Garcia <berto@igalia.com> Acked-by: John Snow <jsnow@redhat.com> Acked-by: Anthony PERARD <anthony.perard@citrix.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Acked-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-09-28 03:56:34 +08:00
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{ },
},
};
static void piix3_class_init(ObjectClass *klass, void *data)
{
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
k->config_write = piix3_write_config;
}
static const TypeInfo piix3_info = {
.name = TYPE_PIIX3_DEVICE,
.parent = TYPE_PIIX3_PCI_DEVICE,
.class_init = piix3_class_init,
};
static void piix3_xen_class_init(ObjectClass *klass, void *data)
{
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
k->config_write = piix3_write_config_xen;
};
static const TypeInfo piix3_xen_info = {
.name = TYPE_PIIX3_XEN_DEVICE,
.parent = TYPE_PIIX3_PCI_DEVICE,
.class_init = piix3_xen_class_init,
};
static void i440fx_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
k->realize = i440fx_realize;
k->config_write = i440fx_write_config;
k->vendor_id = PCI_VENDOR_ID_INTEL;
k->device_id = PCI_DEVICE_ID_INTEL_82441;
k->revision = 0x02;
k->class_id = PCI_CLASS_BRIDGE_HOST;
dc->desc = "Host bridge";
dc->vmsd = &vmstate_i440fx;
/*
* PCI-facing part of the host bridge, not usable without the
* host-facing part, which can't be device_add'ed, yet.
*/
qdev: Replace cannot_instantiate_with_device_add_yet with !user_creatable cannot_instantiate_with_device_add_yet was introduced by commit efec3dd631d94160288392721a5f9c39e50fb2bc to replace no_user. It was supposed to be a temporary measure. When it was introduced, we had 54 cannot_instantiate_with_device_add_yet=true lines in the code. Today (3 years later) this number has not shrunk: we now have 57 cannot_instantiate_with_device_add_yet=true lines. I think it is safe to say it is not a temporary measure, and we won't see the flag go away soon. Instead of a long field name that misleads people to believe it is temporary, replace it a shorter and less misleading field: user_creatable. Except for code comments, changes were generated using the following Coccinelle patch: @@ expression DC; @@ ( -DC->cannot_instantiate_with_device_add_yet = false; +DC->user_creatable = true; | -DC->cannot_instantiate_with_device_add_yet = true; +DC->user_creatable = false; ) @@ typedef ObjectClass; expression dc; identifier class, data; @@ static void device_class_init(ObjectClass *class, void *data) { ... dc->hotpluggable = true; +dc->user_creatable = true; ... } @@ @@ struct DeviceClass { ... -bool cannot_instantiate_with_device_add_yet; +bool user_creatable; ... } @@ expression DC; @@ ( -!DC->cannot_instantiate_with_device_add_yet +DC->user_creatable | -DC->cannot_instantiate_with_device_add_yet +!DC->user_creatable ) Cc: Alistair Francis <alistair.francis@xilinx.com> Cc: Laszlo Ersek <lersek@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: Thomas Huth <thuth@redhat.com> Acked-by: Alistair Francis <alistair.francis@xilinx.com> Reviewed-by: Thomas Huth <thuth@redhat.com> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Acked-by: Marcel Apfelbaum <marcel@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Message-Id: <20170503203604.31462-2-ehabkost@redhat.com> [ehabkost: kept "TODO remove once we're there" comment] Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-04 04:35:44 +08:00
dc->user_creatable = false;
dc->hotpluggable = false;
}
static const TypeInfo i440fx_info = {
.name = TYPE_I440FX_PCI_DEVICE,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(PCII440FXState),
.class_init = i440fx_class_init,
pci: Add INTERFACE_CONVENTIONAL_PCI_DEVICE to Conventional PCI devices Add INTERFACE_CONVENTIONAL_PCI_DEVICE to all direct subtypes of TYPE_PCI_DEVICE, except: 1) The ones that already have INTERFACE_PCIE_DEVICE set: * base-xhci * e1000e * nvme * pvscsi * vfio-pci * virtio-pci * vmxnet3 2) base-pci-bridge Not all PCI bridges are Conventional PCI devices, so INTERFACE_CONVENTIONAL_PCI_DEVICE is added only to the subtypes that are actually Conventional PCI: * dec-21154-p2p-bridge * i82801b11-bridge * pbm-bridge * pci-bridge The direct subtypes of base-pci-bridge not touched by this patch are: * xilinx-pcie-root: Already marked as PCIe-only. * pcie-pci-bridge: Already marked as PCIe-only. * pcie-port: all non-abstract subtypes of pcie-port are already marked as PCIe-only devices. 3) megasas-base Not all megasas devices are Conventional PCI devices, so the interface names are added to the subclasses registered by megasas_register_types(), according to information in the megasas_devices[] array. "megasas-gen2" already implements INTERFACE_PCIE_DEVICE, so add INTERFACE_CONVENTIONAL_PCI_DEVICE only to "megasas". Acked-by: Alberto Garcia <berto@igalia.com> Acked-by: John Snow <jsnow@redhat.com> Acked-by: Anthony PERARD <anthony.perard@citrix.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Acked-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-09-28 03:56:34 +08:00
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{ },
},
};
/* IGD Passthrough Host Bridge. */
typedef struct {
uint8_t offset;
uint8_t len;
} IGDHostInfo;
/* Here we just expose minimal host bridge offset subset. */
static const IGDHostInfo igd_host_bridge_infos[] = {
{0x08, 2}, /* revision id */
{0x2c, 2}, /* sybsystem vendor id */
{0x2e, 2}, /* sybsystem id */
{0x50, 2}, /* SNB: processor graphics control register */
{0x52, 2}, /* processor graphics control register */
{0xa4, 4}, /* SNB: graphics base of stolen memory */
{0xa8, 4}, /* SNB: base of GTT stolen memory */
};
static void host_pci_config_read(int pos, int len, uint32_t *val, Error **errp)
{
int rc, config_fd;
/* Access real host bridge. */
char *path = g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s",
0, 0, 0, 0, "config");
config_fd = open(path, O_RDWR);
if (config_fd < 0) {
error_setg_errno(errp, errno, "Failed to open: %s", path);
goto out;
}
if (lseek(config_fd, pos, SEEK_SET) != pos) {
error_setg_errno(errp, errno, "Failed to seek: %s", path);
goto out_close_fd;
}
do {
rc = read(config_fd, (uint8_t *)val, len);
} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
if (rc != len) {
error_setg_errno(errp, errno, "Failed to read: %s", path);
}
out_close_fd:
close(config_fd);
out:
g_free(path);
}
static void igd_pt_i440fx_realize(PCIDevice *pci_dev, Error **errp)
{
uint32_t val = 0;
int i, num;
int pos, len;
Error *local_err = NULL;
num = ARRAY_SIZE(igd_host_bridge_infos);
for (i = 0; i < num; i++) {
pos = igd_host_bridge_infos[i].offset;
len = igd_host_bridge_infos[i].len;
host_pci_config_read(pos, len, &val, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
pci_default_write_config(pci_dev, pos, val, len);
}
}
static void igd_passthrough_i440fx_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
k->realize = igd_pt_i440fx_realize;
dc->desc = "IGD Passthrough Host bridge";
}
static const TypeInfo igd_passthrough_i440fx_info = {
.name = TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE,
.parent = TYPE_I440FX_PCI_DEVICE,
.instance_size = sizeof(PCII440FXState),
.class_init = igd_passthrough_i440fx_class_init,
};
pci: Replace pci_find_domain() with more general pci_root_bus_path() pci_find_domain() is used in a number of places where we want an id for a whole PCI domain (i.e. the subtree under a PCI root bus). The trouble is that many platforms may support multiple independent host bridges with no hardware supplied notion of domain number. This patch, therefore, replaces calls to pci_find_domain() with calls to a new pci_root_bus_path() returning a string. The new call is implemented in terms of a new callback in the host bridge class, so it can be defined in some way that's well defined for the platform. When no callback is available we fall back on the qbus name. Most current uses of pci_find_domain() are for error or informational messages, so the change in identifiers should be harmless. The exception is pci_get_dev_path(), whose results form part of migration streams. To maintain compatibility with old migration streams, the PIIX PCI host is altered to always supply "0000" for this path, which matches the old domain number (since the code didn't actually support domains other than 0). For the pseries (spapr) PCI bridge we use a different platform-unique identifier (pseries machines can routinely have dozens of PCI host bridges). Theoretically that breaks migration streams, but given that we don't yet have migration support for pseries, it doesn't matter. Any other machines that have working migration support including PCI devices will need to be updated to maintain migration stream compatibility. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-06 16:48:49 +08:00
static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge,
PCIBus *rootbus)
{
I440FXState *s = I440FX_PCI_HOST_BRIDGE(host_bridge);
pci: Replace pci_find_domain() with more general pci_root_bus_path() pci_find_domain() is used in a number of places where we want an id for a whole PCI domain (i.e. the subtree under a PCI root bus). The trouble is that many platforms may support multiple independent host bridges with no hardware supplied notion of domain number. This patch, therefore, replaces calls to pci_find_domain() with calls to a new pci_root_bus_path() returning a string. The new call is implemented in terms of a new callback in the host bridge class, so it can be defined in some way that's well defined for the platform. When no callback is available we fall back on the qbus name. Most current uses of pci_find_domain() are for error or informational messages, so the change in identifiers should be harmless. The exception is pci_get_dev_path(), whose results form part of migration streams. To maintain compatibility with old migration streams, the PIIX PCI host is altered to always supply "0000" for this path, which matches the old domain number (since the code didn't actually support domains other than 0). For the pseries (spapr) PCI bridge we use a different platform-unique identifier (pseries machines can routinely have dozens of PCI host bridges). Theoretically that breaks migration streams, but given that we don't yet have migration support for pseries, it doesn't matter. Any other machines that have working migration support including PCI devices will need to be updated to maintain migration stream compatibility. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-06 16:48:49 +08:00
/* For backwards compat with old device paths */
if (s->short_root_bus) {
return "0000";
}
return "0000:00";
pci: Replace pci_find_domain() with more general pci_root_bus_path() pci_find_domain() is used in a number of places where we want an id for a whole PCI domain (i.e. the subtree under a PCI root bus). The trouble is that many platforms may support multiple independent host bridges with no hardware supplied notion of domain number. This patch, therefore, replaces calls to pci_find_domain() with calls to a new pci_root_bus_path() returning a string. The new call is implemented in terms of a new callback in the host bridge class, so it can be defined in some way that's well defined for the platform. When no callback is available we fall back on the qbus name. Most current uses of pci_find_domain() are for error or informational messages, so the change in identifiers should be harmless. The exception is pci_get_dev_path(), whose results form part of migration streams. To maintain compatibility with old migration streams, the PIIX PCI host is altered to always supply "0000" for this path, which matches the old domain number (since the code didn't actually support domains other than 0). For the pseries (spapr) PCI bridge we use a different platform-unique identifier (pseries machines can routinely have dozens of PCI host bridges). Theoretically that breaks migration streams, but given that we don't yet have migration support for pseries, it doesn't matter. Any other machines that have working migration support including PCI devices will need to be updated to maintain migration stream compatibility. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-06 16:48:49 +08:00
}
static Property i440fx_props[] = {
DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState,
pci_hole64_size, I440FX_PCI_HOST_HOLE64_SIZE_DEFAULT),
DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0),
DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true),
DEFINE_PROP_END_OF_LIST(),
};
static void i440fx_pcihost_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
pci: Replace pci_find_domain() with more general pci_root_bus_path() pci_find_domain() is used in a number of places where we want an id for a whole PCI domain (i.e. the subtree under a PCI root bus). The trouble is that many platforms may support multiple independent host bridges with no hardware supplied notion of domain number. This patch, therefore, replaces calls to pci_find_domain() with calls to a new pci_root_bus_path() returning a string. The new call is implemented in terms of a new callback in the host bridge class, so it can be defined in some way that's well defined for the platform. When no callback is available we fall back on the qbus name. Most current uses of pci_find_domain() are for error or informational messages, so the change in identifiers should be harmless. The exception is pci_get_dev_path(), whose results form part of migration streams. To maintain compatibility with old migration streams, the PIIX PCI host is altered to always supply "0000" for this path, which matches the old domain number (since the code didn't actually support domains other than 0). For the pseries (spapr) PCI bridge we use a different platform-unique identifier (pseries machines can routinely have dozens of PCI host bridges). Theoretically that breaks migration streams, but given that we don't yet have migration support for pseries, it doesn't matter. Any other machines that have working migration support including PCI devices will need to be updated to maintain migration stream compatibility. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-06 16:48:49 +08:00
PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
pci: Replace pci_find_domain() with more general pci_root_bus_path() pci_find_domain() is used in a number of places where we want an id for a whole PCI domain (i.e. the subtree under a PCI root bus). The trouble is that many platforms may support multiple independent host bridges with no hardware supplied notion of domain number. This patch, therefore, replaces calls to pci_find_domain() with calls to a new pci_root_bus_path() returning a string. The new call is implemented in terms of a new callback in the host bridge class, so it can be defined in some way that's well defined for the platform. When no callback is available we fall back on the qbus name. Most current uses of pci_find_domain() are for error or informational messages, so the change in identifiers should be harmless. The exception is pci_get_dev_path(), whose results form part of migration streams. To maintain compatibility with old migration streams, the PIIX PCI host is altered to always supply "0000" for this path, which matches the old domain number (since the code didn't actually support domains other than 0). For the pseries (spapr) PCI bridge we use a different platform-unique identifier (pseries machines can routinely have dozens of PCI host bridges). Theoretically that breaks migration streams, but given that we don't yet have migration support for pseries, it doesn't matter. Any other machines that have working migration support including PCI devices will need to be updated to maintain migration stream compatibility. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-06 16:48:49 +08:00
hc->root_bus_path = i440fx_pcihost_root_bus_path;
dc->realize = i440fx_pcihost_realize;
dc->fw_name = "pci";
dc->props = i440fx_props;
/* Reason: needs to be wired up by pc_init1 */
qdev: Replace cannot_instantiate_with_device_add_yet with !user_creatable cannot_instantiate_with_device_add_yet was introduced by commit efec3dd631d94160288392721a5f9c39e50fb2bc to replace no_user. It was supposed to be a temporary measure. When it was introduced, we had 54 cannot_instantiate_with_device_add_yet=true lines in the code. Today (3 years later) this number has not shrunk: we now have 57 cannot_instantiate_with_device_add_yet=true lines. I think it is safe to say it is not a temporary measure, and we won't see the flag go away soon. Instead of a long field name that misleads people to believe it is temporary, replace it a shorter and less misleading field: user_creatable. Except for code comments, changes were generated using the following Coccinelle patch: @@ expression DC; @@ ( -DC->cannot_instantiate_with_device_add_yet = false; +DC->user_creatable = true; | -DC->cannot_instantiate_with_device_add_yet = true; +DC->user_creatable = false; ) @@ typedef ObjectClass; expression dc; identifier class, data; @@ static void device_class_init(ObjectClass *class, void *data) { ... dc->hotpluggable = true; +dc->user_creatable = true; ... } @@ @@ struct DeviceClass { ... -bool cannot_instantiate_with_device_add_yet; +bool user_creatable; ... } @@ expression DC; @@ ( -!DC->cannot_instantiate_with_device_add_yet +DC->user_creatable | -DC->cannot_instantiate_with_device_add_yet +!DC->user_creatable ) Cc: Alistair Francis <alistair.francis@xilinx.com> Cc: Laszlo Ersek <lersek@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: Thomas Huth <thuth@redhat.com> Acked-by: Alistair Francis <alistair.francis@xilinx.com> Reviewed-by: Thomas Huth <thuth@redhat.com> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Acked-by: Marcel Apfelbaum <marcel@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Message-Id: <20170503203604.31462-2-ehabkost@redhat.com> [ehabkost: kept "TODO remove once we're there" comment] Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-04 04:35:44 +08:00
dc->user_creatable = false;
}
static const TypeInfo i440fx_pcihost_info = {
.name = TYPE_I440FX_PCI_HOST_BRIDGE,
.parent = TYPE_PCI_HOST_BRIDGE,
.instance_size = sizeof(I440FXState),
.instance_init = i440fx_pcihost_initfn,
.class_init = i440fx_pcihost_class_init,
};
static void i440fx_register_types(void)
{
type_register_static(&i440fx_info);
type_register_static(&igd_passthrough_i440fx_info);
type_register_static(&piix3_pci_type_info);
type_register_static(&piix3_info);
type_register_static(&piix3_xen_info);
type_register_static(&i440fx_pcihost_info);
}
type_init(i440fx_register_types)