qemu/hw/pci-host/q35.c

668 lines
24 KiB
C
Raw Normal View History

/*
* QEMU MCH/ICH9 PCI Bridge Emulation
*
* Copyright (c) 2006 Fabrice Bellard
* Copyright (c) 2009, 2010, 2011
* Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan K.K.
* Copyright (C) 2012 Jason Baron <jbaron@redhat.com>
*
* This is based on piix.c, but heavily modified.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "hw/hw.h"
#include "hw/pci-host/q35.h"
2016-03-14 16:01:28 +08:00
#include "qapi/error.h"
#include "qapi/visitor.h"
/****************************************************************************
* Q35 host
*/
#define Q35_PCI_HOST_HOLE64_SIZE_DEFAULT (1ULL << 35)
static void q35_host_realize(DeviceState *dev, Error **errp)
{
PCIHostState *pci = PCI_HOST_BRIDGE(dev);
Q35PCIHost *s = Q35_HOST_DEVICE(dev);
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_ADDR, &pci->conf_mem);
sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_ADDR, 4);
sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, &pci->data_mem);
sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, 4);
/* register q35 0xcf8 port as coalesced pio */
memory_region_set_flush_coalesced(&pci->data_mem);
memory_region_add_coalescing(&pci->conf_mem, 0, 4);
pci->bus = pci_root_bus_new(DEVICE(s), "pcie.0",
s->mch.pci_address_space,
s->mch.address_space_io,
0, TYPE_PCIE_BUS);
PC_MACHINE(qdev_get_machine())->bus = pci->bus;
qdev_set_parent_bus(DEVICE(&s->mch), BUS(pci->bus));
qdev_init_nofail(DEVICE(&s->mch));
}
pci: Replace pci_find_domain() with more general pci_root_bus_path() pci_find_domain() is used in a number of places where we want an id for a whole PCI domain (i.e. the subtree under a PCI root bus). The trouble is that many platforms may support multiple independent host bridges with no hardware supplied notion of domain number. This patch, therefore, replaces calls to pci_find_domain() with calls to a new pci_root_bus_path() returning a string. The new call is implemented in terms of a new callback in the host bridge class, so it can be defined in some way that's well defined for the platform. When no callback is available we fall back on the qbus name. Most current uses of pci_find_domain() are for error or informational messages, so the change in identifiers should be harmless. The exception is pci_get_dev_path(), whose results form part of migration streams. To maintain compatibility with old migration streams, the PIIX PCI host is altered to always supply "0000" for this path, which matches the old domain number (since the code didn't actually support domains other than 0). For the pseries (spapr) PCI bridge we use a different platform-unique identifier (pseries machines can routinely have dozens of PCI host bridges). Theoretically that breaks migration streams, but given that we don't yet have migration support for pseries, it doesn't matter. Any other machines that have working migration support including PCI devices will need to be updated to maintain migration stream compatibility. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-06 16:48:49 +08:00
static const char *q35_host_root_bus_path(PCIHostState *host_bridge,
PCIBus *rootbus)
{
Q35PCIHost *s = Q35_HOST_DEVICE(host_bridge);
/* For backwards compat with old device paths */
if (s->mch.short_root_bus) {
return "0000";
}
return "0000:00";
pci: Replace pci_find_domain() with more general pci_root_bus_path() pci_find_domain() is used in a number of places where we want an id for a whole PCI domain (i.e. the subtree under a PCI root bus). The trouble is that many platforms may support multiple independent host bridges with no hardware supplied notion of domain number. This patch, therefore, replaces calls to pci_find_domain() with calls to a new pci_root_bus_path() returning a string. The new call is implemented in terms of a new callback in the host bridge class, so it can be defined in some way that's well defined for the platform. When no callback is available we fall back on the qbus name. Most current uses of pci_find_domain() are for error or informational messages, so the change in identifiers should be harmless. The exception is pci_get_dev_path(), whose results form part of migration streams. To maintain compatibility with old migration streams, the PIIX PCI host is altered to always supply "0000" for this path, which matches the old domain number (since the code didn't actually support domains other than 0). For the pseries (spapr) PCI bridge we use a different platform-unique identifier (pseries machines can routinely have dozens of PCI host bridges). Theoretically that breaks migration streams, but given that we don't yet have migration support for pseries, it doesn't matter. Any other machines that have working migration support including PCI devices will need to be updated to maintain migration stream compatibility. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-06 16:48:49 +08:00
}
static void q35_host_get_pci_hole_start(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
Q35PCIHost *s = Q35_HOST_DEVICE(obj);
uint64_t val64;
uint32_t value;
val64 = range_is_empty(&s->mch.pci_hole)
? 0 : range_lob(&s->mch.pci_hole);
value = val64;
assert(value == val64);
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 21:48:54 +08:00
visit_type_uint32(v, name, &value, errp);
}
static void q35_host_get_pci_hole_end(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
Q35PCIHost *s = Q35_HOST_DEVICE(obj);
uint64_t val64;
uint32_t value;
val64 = range_is_empty(&s->mch.pci_hole)
? 0 : range_upb(&s->mch.pci_hole) + 1;
value = val64;
assert(value == val64);
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 21:48:54 +08:00
visit_type_uint32(v, name, &value, errp);
}
/*
* The 64bit PCI hole start is set by the Guest firmware
* as the address of the first 64bit PCI MEM resource.
* If no PCI device has resources on the 64bit area,
* the 64bit PCI hole will start after "over 4G RAM" and the
* reserved space for memory hotplug if any.
*/
static uint64_t q35_host_get_pci_hole64_start_value(Object *obj)
{
PCIHostState *h = PCI_HOST_BRIDGE(obj);
Q35PCIHost *s = Q35_HOST_DEVICE(obj);
Range w64;
uint64_t value;
pci_bus_get_w64_range(h->bus, &w64);
value = range_is_empty(&w64) ? 0 : range_lob(&w64);
if (!value && s->pci_hole64_fix) {
value = pc_pci_hole64_start();
}
return value;
}
static void q35_host_get_pci_hole64_start(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
uint64_t hole64_start = q35_host_get_pci_hole64_start_value(obj);
visit_type_uint64(v, name, &hole64_start, errp);
}
/*
* The 64bit PCI hole end is set by the Guest firmware
* as the address of the last 64bit PCI MEM resource.
* Then it is expanded to the PCI_HOST_PROP_PCI_HOLE64_SIZE
* that can be configured by the user.
*/
static void q35_host_get_pci_hole64_end(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
PCIHostState *h = PCI_HOST_BRIDGE(obj);
Q35PCIHost *s = Q35_HOST_DEVICE(obj);
hw/pci-host/x86: extend the 64-bit PCI hole relative to the fw-assigned base In commit 9fa99d2519cb ("hw/pci-host: Fix x86 Host Bridges 64bit PCI hole", 2017-11-16), we meant to expose such a 64-bit PCI MMIO aperture in the ACPI DSDT that would be at least as large as the new "pci-hole64-size" property (2GB on i440fx, 32GB on q35). The goal was to offer "enough" 64-bit MMIO aperture to the guest OS for hotplug purposes. In that commit, we added or modified five functions: - pc_pci_hole64_start(): shared between i440fx and q35. Provides a default 64-bit base, which starts beyond the cold-plugged 64-bit RAM, and skips the DIMM hotplug area too (if any). - i440fx_pcihost_get_pci_hole64_start(), q35_host_get_pci_hole64_start(): board-specific 64-bit base property getters called abstractly by the ACPI generator. Both of these fall back to pc_pci_hole64_start() if the firmware didn't program any 64-bit hole (i.e. if the firmware didn't assign a 64-bit GPA to any MMIO BAR on any device). Otherwise, they honor the firmware's BAR assignments (i.e., they treat the lowest 64-bit GPA programmed by the firmware as the base address for the aperture). - i440fx_pcihost_get_pci_hole64_end(), q35_host_get_pci_hole64_end(): these intended to extend the aperture to our size recommendation, calculated relative to the base of the aperture. Despite the original intent, i440fx_pcihost_get_pci_hole64_end() and q35_host_get_pci_hole64_end() currently only extend the aperture relative to the default base (pc_pci_hole64_start()), ignoring any programming done by the firmware. This means that our size recommendation may not be met. Fix it by honoring the firmware's address assignments. The strange extension sizes were spotted by Alex, in the log of a guest kernel running on top of OVMF (which prefers to assign 64-bit GPAs to 64-bit BARs). This change only affects DSDT generation, therefore no new compat property is being introduced. Using an i440fx OVMF guest with 5GB RAM, an example _CRS change is: > @@ -881,9 +881,9 @@ > QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, > 0x0000000000000000, // Granularity > 0x0000000800000000, // Range Minimum > - 0x000000080001C0FF, // Range Maximum > + 0x000000087FFFFFFF, // Range Maximum > 0x0000000000000000, // Translation Offset > - 0x000000000001C100, // Length > + 0x0000000080000000, // Length > ,, , AddressRangeMemory, TypeStatic) > }) > Device (GPE0) (On i440fx, the low RAM split is at 3GB, in this case. Therefore, with 5GB guest RAM and no DIMM hotplug range, pc_pci_hole64_start() returns 4 + (5-3) = 6 GB. Adding the 2GB extension to that yields 8GB, which is below the firmware-programmed base of 32GB, before the patch. Therefore, before the patch, the extension is ineffective. After the patch, we add the 2GB extension to the firmware-programmed base, namely 32GB.) Using a q35 OVMF guest with 5GB RAM, an example _CRS change is: > @@ -3162,9 +3162,9 @@ > QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, > 0x0000000000000000, // Granularity > 0x0000000800000000, // Range Minimum > - 0x00000009BFFFFFFF, // Range Maximum > + 0x0000000FFFFFFFFF, // Range Maximum > 0x0000000000000000, // Translation Offset > - 0x00000001C0000000, // Length > + 0x0000000800000000, // Length > ,, , AddressRangeMemory, TypeStatic) > }) > Device (GPE0) (On Q35, the low RAM split is at 2GB. Therefore, with 5GB guest RAM and no DIMM hotplug range, pc_pci_hole64_start() returns 4 + (5-2) = 7 GB. Adding the 32GB extension to that yields 39GB (0x0000_0009_BFFF_FFFF + 1), before the patch. After the patch, we add the 32GB extension to the firmware-programmed base, namely 32GB.) The ACPI test data for the bios-tables-test case that we added earlier in this series are corrected too, as follows: > @@ -3339,9 +3339,9 @@ > QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, > 0x0000000000000000, // Granularity > 0x0000000200000000, // Range Minimum > - 0x00000009BFFFFFFF, // Range Maximum > + 0x00000009FFFFFFFF, // Range Maximum > 0x0000000000000000, // Translation Offset > - 0x00000007C0000000, // Length > + 0x0000000800000000, // Length > ,, , AddressRangeMemory, TypeStatic) > }) > Device (GPE0) Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> Fixes: 9fa99d2519cbf71f871e46871df12cb446dc1c3e Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2018-09-28 05:24:38 +08:00
uint64_t hole64_start = q35_host_get_pci_hole64_start_value(obj);
Range w64;
uint64_t value, hole64_end;
pci_bus_get_w64_range(h->bus, &w64);
value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1;
hole64_end = ROUND_UP(hole64_start + s->mch.pci_hole64_size, 1ULL << 30);
if (s->pci_hole64_fix && value < hole64_end) {
value = hole64_end;
}
visit_type_uint64(v, name, &value, errp);
}
static void q35_host_get_mmcfg_size(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
PCIExpressHost *e = PCIE_HOST_BRIDGE(obj);
visit_type_uint64(v, name, &e->size, errp);
}
/*
* NOTE: setting defaults for the mch.* fields in this table
* doesn't work, because mch is a separate QOM object that is
* zeroed by the object_initialize(&s->mch, ...) call inside
* q35_host_initfn(). The default values for those
* properties need to be initialized manually by
* q35_host_initfn() after the object_initialize() call.
*/
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
static Property q35_host_props[] = {
DEFINE_PROP_UINT64(PCIE_HOST_MCFG_BASE, Q35PCIHost, parent_obj.base_addr,
MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT),
DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35PCIHost,
mch.pci_hole64_size, Q35_PCI_HOST_HOLE64_SIZE_DEFAULT),
DEFINE_PROP_UINT32("short_root_bus", Q35PCIHost, mch.short_root_bus, 0),
DEFINE_PROP_SIZE(PCI_HOST_BELOW_4G_MEM_SIZE, Q35PCIHost,
mch.below_4g_mem_size, 0),
DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost,
mch.above_4g_mem_size, 0),
DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true),
DEFINE_PROP_END_OF_LIST(),
};
static void q35_host_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
pci: Replace pci_find_domain() with more general pci_root_bus_path() pci_find_domain() is used in a number of places where we want an id for a whole PCI domain (i.e. the subtree under a PCI root bus). The trouble is that many platforms may support multiple independent host bridges with no hardware supplied notion of domain number. This patch, therefore, replaces calls to pci_find_domain() with calls to a new pci_root_bus_path() returning a string. The new call is implemented in terms of a new callback in the host bridge class, so it can be defined in some way that's well defined for the platform. When no callback is available we fall back on the qbus name. Most current uses of pci_find_domain() are for error or informational messages, so the change in identifiers should be harmless. The exception is pci_get_dev_path(), whose results form part of migration streams. To maintain compatibility with old migration streams, the PIIX PCI host is altered to always supply "0000" for this path, which matches the old domain number (since the code didn't actually support domains other than 0). For the pseries (spapr) PCI bridge we use a different platform-unique identifier (pseries machines can routinely have dozens of PCI host bridges). Theoretically that breaks migration streams, but given that we don't yet have migration support for pseries, it doesn't matter. Any other machines that have working migration support including PCI devices will need to be updated to maintain migration stream compatibility. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-06 16:48:49 +08:00
PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
pci: Replace pci_find_domain() with more general pci_root_bus_path() pci_find_domain() is used in a number of places where we want an id for a whole PCI domain (i.e. the subtree under a PCI root bus). The trouble is that many platforms may support multiple independent host bridges with no hardware supplied notion of domain number. This patch, therefore, replaces calls to pci_find_domain() with calls to a new pci_root_bus_path() returning a string. The new call is implemented in terms of a new callback in the host bridge class, so it can be defined in some way that's well defined for the platform. When no callback is available we fall back on the qbus name. Most current uses of pci_find_domain() are for error or informational messages, so the change in identifiers should be harmless. The exception is pci_get_dev_path(), whose results form part of migration streams. To maintain compatibility with old migration streams, the PIIX PCI host is altered to always supply "0000" for this path, which matches the old domain number (since the code didn't actually support domains other than 0). For the pseries (spapr) PCI bridge we use a different platform-unique identifier (pseries machines can routinely have dozens of PCI host bridges). Theoretically that breaks migration streams, but given that we don't yet have migration support for pseries, it doesn't matter. Any other machines that have working migration support including PCI devices will need to be updated to maintain migration stream compatibility. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-06 16:48:49 +08:00
hc->root_bus_path = q35_host_root_bus_path;
dc->realize = q35_host_realize;
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
dc->props = q35_host_props;
/* Reason: needs to be wired up by pc_q35_init */
qdev: Replace cannot_instantiate_with_device_add_yet with !user_creatable cannot_instantiate_with_device_add_yet was introduced by commit efec3dd631d94160288392721a5f9c39e50fb2bc to replace no_user. It was supposed to be a temporary measure. When it was introduced, we had 54 cannot_instantiate_with_device_add_yet=true lines in the code. Today (3 years later) this number has not shrunk: we now have 57 cannot_instantiate_with_device_add_yet=true lines. I think it is safe to say it is not a temporary measure, and we won't see the flag go away soon. Instead of a long field name that misleads people to believe it is temporary, replace it a shorter and less misleading field: user_creatable. Except for code comments, changes were generated using the following Coccinelle patch: @@ expression DC; @@ ( -DC->cannot_instantiate_with_device_add_yet = false; +DC->user_creatable = true; | -DC->cannot_instantiate_with_device_add_yet = true; +DC->user_creatable = false; ) @@ typedef ObjectClass; expression dc; identifier class, data; @@ static void device_class_init(ObjectClass *class, void *data) { ... dc->hotpluggable = true; +dc->user_creatable = true; ... } @@ @@ struct DeviceClass { ... -bool cannot_instantiate_with_device_add_yet; +bool user_creatable; ... } @@ expression DC; @@ ( -!DC->cannot_instantiate_with_device_add_yet +DC->user_creatable | -DC->cannot_instantiate_with_device_add_yet +!DC->user_creatable ) Cc: Alistair Francis <alistair.francis@xilinx.com> Cc: Laszlo Ersek <lersek@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: Thomas Huth <thuth@redhat.com> Acked-by: Alistair Francis <alistair.francis@xilinx.com> Reviewed-by: Thomas Huth <thuth@redhat.com> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Acked-by: Marcel Apfelbaum <marcel@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Message-Id: <20170503203604.31462-2-ehabkost@redhat.com> [ehabkost: kept "TODO remove once we're there" comment] Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-04 04:35:44 +08:00
dc->user_creatable = false;
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
dc->fw_name = "pci";
}
static void q35_host_initfn(Object *obj)
{
Q35PCIHost *s = Q35_HOST_DEVICE(obj);
PCIHostState *phb = PCI_HOST_BRIDGE(obj);
memory_region_init_io(&phb->conf_mem, obj, &pci_host_conf_le_ops, phb,
"pci-conf-idx", 4);
memory_region_init_io(&phb->data_mem, obj, &pci_host_data_le_ops, phb,
"pci-conf-data", 4);
object_initialize(&s->mch, sizeof(s->mch), TYPE_MCH_PCI_DEVICE);
object_property_add_child(OBJECT(s), "mch", OBJECT(&s->mch), NULL);
qdev_prop_set_int32(DEVICE(&s->mch), "addr", PCI_DEVFN(0, 0));
qdev_prop_set_bit(DEVICE(&s->mch), "multifunction", false);
/* mch's object_initialize resets the default value, set it again */
qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE,
Q35_PCI_HOST_HOLE64_SIZE_DEFAULT);
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32",
q35_host_get_pci_hole_start,
NULL, NULL, NULL, NULL);
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_END, "uint32",
q35_host_get_pci_hole_end,
NULL, NULL, NULL, NULL);
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE64_START, "uint64",
q35_host_get_pci_hole64_start,
NULL, NULL, NULL, NULL);
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE64_END, "uint64",
q35_host_get_pci_hole64_end,
NULL, NULL, NULL, NULL);
object_property_add(obj, PCIE_HOST_MCFG_SIZE, "uint64",
q35_host_get_mmcfg_size,
NULL, NULL, NULL, NULL);
object_property_add_link(obj, MCH_HOST_PROP_RAM_MEM, TYPE_MEMORY_REGION,
(Object **) &s->mch.ram_memory,
qdev_prop_allow_set_link_before_realize, 0, NULL);
object_property_add_link(obj, MCH_HOST_PROP_PCI_MEM, TYPE_MEMORY_REGION,
(Object **) &s->mch.pci_address_space,
qdev_prop_allow_set_link_before_realize, 0, NULL);
object_property_add_link(obj, MCH_HOST_PROP_SYSTEM_MEM, TYPE_MEMORY_REGION,
(Object **) &s->mch.system_memory,
qdev_prop_allow_set_link_before_realize, 0, NULL);
object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
(Object **) &s->mch.address_space_io,
qdev_prop_allow_set_link_before_realize, 0, NULL);
/* Leave enough space for the biggest MCFG BAR */
/* TODO: this matches current bios behaviour, but
* it's not a power of two, which means an MTRR
* can't cover it exactly.
*/
range_set_bounds(&s->mch.pci_hole,
MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
IO_APIC_DEFAULT_ADDRESS - 1);
}
static const TypeInfo q35_host_info = {
.name = TYPE_Q35_HOST_DEVICE,
.parent = TYPE_PCIE_HOST_BRIDGE,
.instance_size = sizeof(Q35PCIHost),
.instance_init = q35_host_initfn,
.class_init = q35_host_class_init,
};
/****************************************************************************
* MCH D0:F0
*/
static uint64_t tseg_blackhole_read(void *ptr, hwaddr reg, unsigned size)
{
return 0xffffffff;
}
static void tseg_blackhole_write(void *opaque, hwaddr addr, uint64_t val,
unsigned width)
{
/* nothing */
}
static const MemoryRegionOps tseg_blackhole_ops = {
.read = tseg_blackhole_read,
.write = tseg_blackhole_write,
.endianness = DEVICE_NATIVE_ENDIAN,
.valid.min_access_size = 1,
.valid.max_access_size = 4,
.impl.min_access_size = 4,
.impl.max_access_size = 4,
.endianness = DEVICE_LITTLE_ENDIAN,
};
/* PCIe MMCFG */
static void mch_update_pciexbar(MCHPCIState *mch)
{
PCIDevice *pci_dev = PCI_DEVICE(mch);
BusState *bus = qdev_get_parent_bus(DEVICE(mch));
PCIExpressHost *pehb = PCIE_HOST_BRIDGE(bus->parent);
uint64_t pciexbar;
int enable;
uint64_t addr;
uint64_t addr_mask;
uint32_t length;
pciexbar = pci_get_quad(pci_dev->config + MCH_HOST_BRIDGE_PCIEXBAR);
enable = pciexbar & MCH_HOST_BRIDGE_PCIEXBAREN;
addr_mask = MCH_HOST_BRIDGE_PCIEXBAR_ADMSK;
switch (pciexbar & MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_MASK) {
case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_256M:
length = 256 * 1024 * 1024;
break;
case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_128M:
length = 128 * 1024 * 1024;
addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_128ADMSK |
MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK;
break;
case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_64M:
length = 64 * 1024 * 1024;
addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK;
break;
case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_RVD:
default:
abort();
}
addr = pciexbar & addr_mask;
pcie_host_mmcfg_update(pehb, enable, addr, length);
/* Leave enough space for the MCFG BAR */
/*
* TODO: this matches current bios behaviour, but it's not a power of two,
* which means an MTRR can't cover it exactly.
*/
if (enable) {
range_set_bounds(&mch->pci_hole,
addr + length,
IO_APIC_DEFAULT_ADDRESS - 1);
} else {
range_set_bounds(&mch->pci_hole,
MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
IO_APIC_DEFAULT_ADDRESS - 1);
}
}
/* PAM */
static void mch_update_pam(MCHPCIState *mch)
{
PCIDevice *pd = PCI_DEVICE(mch);
int i;
memory_region_transaction_begin();
for (i = 0; i < 13; i++) {
pam_update(&mch->pam_regions[i], i,
pd->config[MCH_HOST_BRIDGE_PAM0 + DIV_ROUND_UP(i, 2)]);
}
memory_region_transaction_commit();
}
/* SMRAM */
static void mch_update_smram(MCHPCIState *mch)
{
PCIDevice *pd = PCI_DEVICE(mch);
bool h_smrame = (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME);
uint32_t tseg_size;
/* implement SMRAM.D_LCK */
if (pd->config[MCH_HOST_BRIDGE_SMRAM] & MCH_HOST_BRIDGE_SMRAM_D_LCK) {
pd->config[MCH_HOST_BRIDGE_SMRAM] &= ~MCH_HOST_BRIDGE_SMRAM_D_OPEN;
pd->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK_LCK;
pd->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK_LCK;
}
memory_region_transaction_begin();
if (pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_D_OPEN) {
/* Hide (!) low SMRAM if H_SMRAME = 1 */
memory_region_set_enabled(&mch->smram_region, h_smrame);
/* Show high SMRAM if H_SMRAME = 1 */
memory_region_set_enabled(&mch->open_high_smram, h_smrame);
} else {
/* Hide high SMRAM and low SMRAM */
memory_region_set_enabled(&mch->smram_region, true);
memory_region_set_enabled(&mch->open_high_smram, false);
}
if (pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME) {
memory_region_set_enabled(&mch->low_smram, !h_smrame);
memory_region_set_enabled(&mch->high_smram, h_smrame);
} else {
memory_region_set_enabled(&mch->low_smram, false);
memory_region_set_enabled(&mch->high_smram, false);
}
if (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_T_EN) {
switch (pd->config[MCH_HOST_BRIDGE_ESMRAMC] &
MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK) {
case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB:
tseg_size = 1024 * 1024;
break;
case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_2MB:
tseg_size = 1024 * 1024 * 2;
break;
case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_8MB:
tseg_size = 1024 * 1024 * 8;
break;
default:
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
tseg_size = 1024 * 1024 * (uint32_t)mch->ext_tseg_mbytes;
break;
}
} else {
tseg_size = 0;
}
memory_region_del_subregion(mch->system_memory, &mch->tseg_blackhole);
memory_region_set_enabled(&mch->tseg_blackhole, tseg_size);
memory_region_set_size(&mch->tseg_blackhole, tseg_size);
memory_region_add_subregion_overlap(mch->system_memory,
mch->below_4g_mem_size - tseg_size,
&mch->tseg_blackhole, 1);
memory_region_set_enabled(&mch->tseg_window, tseg_size);
memory_region_set_size(&mch->tseg_window, tseg_size);
memory_region_set_address(&mch->tseg_window,
mch->below_4g_mem_size - tseg_size);
memory_region_set_alias_offset(&mch->tseg_window,
mch->below_4g_mem_size - tseg_size);
memory_region_transaction_commit();
}
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
static void mch_update_ext_tseg_mbytes(MCHPCIState *mch)
{
PCIDevice *pd = PCI_DEVICE(mch);
uint8_t *reg = pd->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES;
if (mch->ext_tseg_mbytes > 0 &&
pci_get_word(reg) == MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY) {
pci_set_word(reg, mch->ext_tseg_mbytes);
}
}
static void mch_write_config(PCIDevice *d,
uint32_t address, uint32_t val, int len)
{
MCHPCIState *mch = MCH_PCI_DEVICE(d);
pci_default_write_config(d, address, val, len);
if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PAM0,
MCH_HOST_BRIDGE_PAM_SIZE)) {
mch_update_pam(mch);
}
if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PCIEXBAR,
MCH_HOST_BRIDGE_PCIEXBAR_SIZE)) {
mch_update_pciexbar(mch);
}
if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM,
MCH_HOST_BRIDGE_SMRAM_SIZE)) {
mch_update_smram(mch);
}
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
if (ranges_overlap(address, len, MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE)) {
mch_update_ext_tseg_mbytes(mch);
}
}
static void mch_update(MCHPCIState *mch)
{
mch_update_pciexbar(mch);
mch_update_pam(mch);
mch_update_smram(mch);
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
mch_update_ext_tseg_mbytes(mch);
}
static int mch_post_load(void *opaque, int version_id)
{
MCHPCIState *mch = opaque;
mch_update(mch);
return 0;
}
static const VMStateDescription vmstate_mch = {
.name = "mch",
.version_id = 1,
.minimum_version_id = 1,
.post_load = mch_post_load,
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, MCHPCIState),
/* Used to be smm_enabled, which was basically always zero because
* SeaBIOS hardly uses SMM. SMRAM is now handled by CPU code.
*/
VMSTATE_UNUSED(1),
VMSTATE_END_OF_LIST()
}
};
static void mch_reset(DeviceState *qdev)
{
PCIDevice *d = PCI_DEVICE(qdev);
MCHPCIState *mch = MCH_PCI_DEVICE(d);
pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR,
MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);
d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
if (mch->ext_tseg_mbytes > 0) {
pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
}
mch_update(mch);
}
static void mch_realize(PCIDevice *d, Error **errp)
{
int i;
MCHPCIState *mch = MCH_PCI_DEVICE(d);
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
if (mch->ext_tseg_mbytes > MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX) {
error_setg(errp, "invalid extended-tseg-mbytes value: %" PRIu16,
mch->ext_tseg_mbytes);
return;
}
/* setup pci memory mapping */
pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory,
mch->pci_address_space);
/* if *disabled* show SMRAM to all CPUs */
memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region",
mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE,
MCH_HOST_BRIDGE_SMRAM_C_SIZE);
memory_region_add_subregion_overlap(mch->system_memory, MCH_HOST_BRIDGE_SMRAM_C_BASE,
&mch->smram_region, 1);
memory_region_set_enabled(&mch->smram_region, true);
memory_region_init_alias(&mch->open_high_smram, OBJECT(mch), "smram-open-high",
mch->ram_memory, MCH_HOST_BRIDGE_SMRAM_C_BASE,
MCH_HOST_BRIDGE_SMRAM_C_SIZE);
memory_region_add_subregion_overlap(mch->system_memory, 0xfeda0000,
&mch->open_high_smram, 1);
memory_region_set_enabled(&mch->open_high_smram, false);
/* smram, as seen by SMM CPUs */
memory_region_init(&mch->smram, OBJECT(mch), "smram", 1ull << 32);
memory_region_set_enabled(&mch->smram, true);
memory_region_init_alias(&mch->low_smram, OBJECT(mch), "smram-low",
mch->ram_memory, MCH_HOST_BRIDGE_SMRAM_C_BASE,
MCH_HOST_BRIDGE_SMRAM_C_SIZE);
memory_region_set_enabled(&mch->low_smram, true);
memory_region_add_subregion(&mch->smram, MCH_HOST_BRIDGE_SMRAM_C_BASE,
&mch->low_smram);
memory_region_init_alias(&mch->high_smram, OBJECT(mch), "smram-high",
mch->ram_memory, MCH_HOST_BRIDGE_SMRAM_C_BASE,
MCH_HOST_BRIDGE_SMRAM_C_SIZE);
memory_region_set_enabled(&mch->high_smram, true);
memory_region_add_subregion(&mch->smram, 0xfeda0000, &mch->high_smram);
memory_region_init_io(&mch->tseg_blackhole, OBJECT(mch),
&tseg_blackhole_ops, NULL,
"tseg-blackhole", 0);
memory_region_set_enabled(&mch->tseg_blackhole, false);
memory_region_add_subregion_overlap(mch->system_memory,
mch->below_4g_mem_size,
&mch->tseg_blackhole, 1);
memory_region_init_alias(&mch->tseg_window, OBJECT(mch), "tseg-window",
mch->ram_memory, mch->below_4g_mem_size, 0);
memory_region_set_enabled(&mch->tseg_window, false);
memory_region_add_subregion(&mch->smram, mch->below_4g_mem_size,
&mch->tseg_window);
object_property_add_const_link(qdev_get_machine(), "smram",
OBJECT(&mch->smram), &error_abort);
init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
mch->pci_address_space, &mch->pam_regions[0],
PAM_BIOS_BASE, PAM_BIOS_SIZE);
for (i = 0; i < 12; ++i) {
init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
mch->pci_address_space, &mch->pam_regions[i+1],
PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
}
}
uint64_t mch_mcfg_base(void)
{
bool ambiguous;
Object *o = object_resolve_path_type("", TYPE_MCH_PCI_DEVICE, &ambiguous);
if (!o) {
return 0;
}
return MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT;
}
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
static Property mch_props[] = {
DEFINE_PROP_UINT16("extended-tseg-mbytes", MCHPCIState, ext_tseg_mbytes,
16),
DEFINE_PROP_END_OF_LIST(),
};
static void mch_class_init(ObjectClass *klass, void *data)
{
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
DeviceClass *dc = DEVICE_CLASS(klass);
k->realize = mch_realize;
k->config_write = mch_write_config;
dc->reset = mch_reset;
q35/mch: implement extended TSEG sizes The q35 machine type currently lets the guest firmware select a 1MB, 2MB or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even that is not enough when a lot of VCPUs (more than approx. 224) are configured -- SMRAM footprint scales largely proportionally with VCPU count. Introduce a new property for "mch" called "extended-tseg-mbytes", which expresses (in megabytes) the user's choice of TSEG (SMRAM) size. Invent a new, QEMU-specific register in the config space of the DRAM Controller, at offset 0x50, in order to allow guest firmware to query the TSEG (SMRAM) size. According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller Register Address Map (D0:F0)": Warning: Address locations that are not listed are considered Intel Reserved registers locations. Reads to Reserved registers may return non-zero values. Writes to reserved locations may cause system failures. All registers that are defined in the PCI 2.3 specification, but are not necessary or implemented in this component are simply not included in this document. The reserved/unimplemented space in the PCI configuration header space is not documented as such in this summary. Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part of the standard PCI config space header. And they precede the capability list as well, which starts at 0xe0 for this device. When the guest writes value 0xffff to this register, the value that can be read back is that of "mch.extended-tseg-mbytes" -- unless it remains 0xffff. The guest is required to write 0xffff first (as opposed to a read-only register) because PCI config space is generally not cleared on QEMU reset, and after S3 resume or reboot, new guest firmware running on old QEMU could read a guest OS-injected value from this register. After reading the available "extended" TSEG size, the guest firmware may actually request that TSEG size by writing pattern 11b to the ESMRAMC register's TSEG_SZ bit-field. (The Intel spec referenced above defines only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.) On the QEMU command line, the value can be set with -global mch.extended-tseg-mbytes=N The default value for 2.10+ q35 machine types is 16. The value is limited to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users are responsible for choosing sensible TSEG sizes. On 2.9 and earlier q35 machine types, the default value is 0. This lets the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50, keep their original behavior. When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is set to that value on reset, for completeness. PCI config space is migrated automatically, so no VMSD changes are necessary. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027 Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 00:10:13 +08:00
dc->props = mch_props;
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
dc->desc = "Host bridge";
dc->vmsd = &vmstate_mch;
k->vendor_id = PCI_VENDOR_ID_INTEL;
i386: clarify that the Q35 machine type implements a P35 chipset The 'q35' machine type implements an Intel Series 3 chipset, of which there are several variants: https://www.intel.com/Assets/PDF/datasheet/316966.pdf The key difference between the 82P35 MCH ('p35', PCI device ID 0x29c0) and 82Q35 GMCH ('q35', PCI device ID 0x29b0) variants is that the latter has an integrated graphics adapter. QEMU does not implement integrated graphics, so uses the PCI ID for the 82P35 chipset, despite calling the machine type 'q35'. Thus we rename the PCI device ID constant to reflect reality, to avoid confusing future developers. The new name more closely matches what pci.ids reports it to be: $ grep P35 /usr/share/hwdata/pci.ids | grep 29 29c0 82G33/G31/P35/P31 Express DRAM Controller 29c1 82G33/G31/P35/P31 Express PCI Express Root Port 29c4 82G33/G31/P35/P31 Express MEI Controller 29c5 82G33/G31/P35/P31 Express MEI Controller 29c6 82G33/G31/P35/P31 Express PT IDER Controller 29c7 82G33/G31/P35/P31 Express Serial KT Controller $ grep Q35 /usr/share/hwdata/pci.ids | grep 29 29b0 82Q35 Express DRAM Controller 29b1 82Q35 Express PCI Express Root Port 29b2 82Q35 Express Integrated Graphics Controller 29b3 82Q35 Express Integrated Graphics Controller 29b4 82Q35 Express MEI Controller 29b5 82Q35 Express MEI Controller 29b6 82Q35 Express PT IDER Controller 29b7 82Q35 Express Serial KT Controller Arguably the QEMU machine type should be named 'p35'. At this point in time, however, it is not worth the churn for management applications & documentation to worry about renaming it. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Message-Id: <20180830105757.10577-1-berrange@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-08-30 18:57:57 +08:00
/*
* The 'q35' machine type implements an Intel Series 3 chipset,
* of which there are several variants. The key difference between
* the 82P35 MCH ('p35') and 82Q35 GMCH ('q35') variants is that
* the latter has an integrated graphics adapter. QEMU does not
* implement integrated graphics, so uses the PCI ID for the 82P35
* chipset.
*/
k->device_id = PCI_DEVICE_ID_INTEL_P35_MCH;
k->revision = MCH_HOST_BRIDGE_REVISION_DEFAULT;
k->class_id = PCI_CLASS_BRIDGE_HOST;
/*
* PCI-facing part of the host bridge, not usable without the
* host-facing part, which can't be device_add'ed, yet.
*/
qdev: Replace cannot_instantiate_with_device_add_yet with !user_creatable cannot_instantiate_with_device_add_yet was introduced by commit efec3dd631d94160288392721a5f9c39e50fb2bc to replace no_user. It was supposed to be a temporary measure. When it was introduced, we had 54 cannot_instantiate_with_device_add_yet=true lines in the code. Today (3 years later) this number has not shrunk: we now have 57 cannot_instantiate_with_device_add_yet=true lines. I think it is safe to say it is not a temporary measure, and we won't see the flag go away soon. Instead of a long field name that misleads people to believe it is temporary, replace it a shorter and less misleading field: user_creatable. Except for code comments, changes were generated using the following Coccinelle patch: @@ expression DC; @@ ( -DC->cannot_instantiate_with_device_add_yet = false; +DC->user_creatable = true; | -DC->cannot_instantiate_with_device_add_yet = true; +DC->user_creatable = false; ) @@ typedef ObjectClass; expression dc; identifier class, data; @@ static void device_class_init(ObjectClass *class, void *data) { ... dc->hotpluggable = true; +dc->user_creatable = true; ... } @@ @@ struct DeviceClass { ... -bool cannot_instantiate_with_device_add_yet; +bool user_creatable; ... } @@ expression DC; @@ ( -!DC->cannot_instantiate_with_device_add_yet +DC->user_creatable | -DC->cannot_instantiate_with_device_add_yet +!DC->user_creatable ) Cc: Alistair Francis <alistair.francis@xilinx.com> Cc: Laszlo Ersek <lersek@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: Thomas Huth <thuth@redhat.com> Acked-by: Alistair Francis <alistair.francis@xilinx.com> Reviewed-by: Thomas Huth <thuth@redhat.com> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Acked-by: Marcel Apfelbaum <marcel@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Message-Id: <20170503203604.31462-2-ehabkost@redhat.com> [ehabkost: kept "TODO remove once we're there" comment] Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-04 04:35:44 +08:00
dc->user_creatable = false;
}
static const TypeInfo mch_info = {
.name = TYPE_MCH_PCI_DEVICE,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(MCHPCIState),
.class_init = mch_class_init,
pci: Add INTERFACE_CONVENTIONAL_PCI_DEVICE to Conventional PCI devices Add INTERFACE_CONVENTIONAL_PCI_DEVICE to all direct subtypes of TYPE_PCI_DEVICE, except: 1) The ones that already have INTERFACE_PCIE_DEVICE set: * base-xhci * e1000e * nvme * pvscsi * vfio-pci * virtio-pci * vmxnet3 2) base-pci-bridge Not all PCI bridges are Conventional PCI devices, so INTERFACE_CONVENTIONAL_PCI_DEVICE is added only to the subtypes that are actually Conventional PCI: * dec-21154-p2p-bridge * i82801b11-bridge * pbm-bridge * pci-bridge The direct subtypes of base-pci-bridge not touched by this patch are: * xilinx-pcie-root: Already marked as PCIe-only. * pcie-pci-bridge: Already marked as PCIe-only. * pcie-port: all non-abstract subtypes of pcie-port are already marked as PCIe-only devices. 3) megasas-base Not all megasas devices are Conventional PCI devices, so the interface names are added to the subclasses registered by megasas_register_types(), according to information in the megasas_devices[] array. "megasas-gen2" already implements INTERFACE_PCIE_DEVICE, so add INTERFACE_CONVENTIONAL_PCI_DEVICE only to "megasas". Acked-by: Alberto Garcia <berto@igalia.com> Acked-by: John Snow <jsnow@redhat.com> Acked-by: Anthony PERARD <anthony.perard@citrix.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Acked-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-09-28 03:56:34 +08:00
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{ },
},
};
static void q35_register(void)
{
type_register_static(&mch_info);
type_register_static(&q35_host_info);
}
type_init(q35_register);