From 1f43571604da85c62f25f3ba6d275b1b5ea76ca2 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 5 Jun 2017 12:19:27 -0300 Subject: [PATCH 01/10] pc: Use "min-[x]level" on compat_props Since the automatic cpuid-level code was introduced in commit c39c0edf9bb3b968ba95484465a50c7b19f4aa3a ("target-i386: Automatically set level/xlevel/xlevel2 when needed"), the CPU model tables just define the default CPUID level code (set using "min-level"). Setting "[x]level" forces CPUID level to a specific value and disable the automatic-level logic. But the PC compat code was not updated and the existing "[x]level" compat properties broke compatibility for people using features that triggered the auto-level code. To keep previous behavior, we should set "min-[x]level" instead of "[x]level" on compat_props. This was not a problem for most cases, because old machine-types don't have full-cpuid-auto-level enabled. The only common use case it broke was the CPUID[7] auto-level code, that was already enabled since the first CPUID[7] feature was introduced (in QEMU 1.4.0). This causes the regression reported at: https://bugzilla.redhat.com/show_bug.cgi?id=1454641 Change the PC compat code to use "min-[x]level" instead of "[x]level" on compat_props, and add new test cases to ensure we don't break this again. Reported-by: "Guo, Zhiyi" Fixes: c39c0edf9bb ("target-i386: Automatically set level/xlevel/xlevel2 when needed") Cc: qemu-stable@nongnu.org Acked-by: Michael S. Tsirkin Signed-off-by: Eduardo Habkost --- include/hw/i386/pc.h | 42 +++++++++++++++++------------------ tests/test-x86-cpuid-compat.c | 38 +++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index e447f5d8f4..d071c9c0e9 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -566,75 +566,75 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .value = "off",\ },{\ .driver = "qemu64" "-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(4),\ },{\ .driver = "kvm64" "-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(5),\ },{\ .driver = "pentium3" "-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(2),\ },{\ .driver = "n270" "-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(5),\ },{\ .driver = "Conroe" "-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(4),\ },{\ .driver = "Penryn" "-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(4),\ },{\ .driver = "Nehalem" "-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(4),\ },{\ .driver = "n270" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "Penryn" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "Conroe" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "Nehalem" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "Westmere" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "SandyBridge" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "IvyBridge" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "Haswell" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "Haswell-noTSX" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "Broadwell" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU,\ - .property = "xlevel",\ + .property = "min-xlevel",\ .value = stringify(0x8000000a),\ },{\ .driver = TYPE_X86_CPU,\ @@ -860,7 +860,7 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .value = stringify(2),\ },{\ .driver = "Conroe-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(2),\ },{\ .driver = "Penryn-" TYPE_X86_CPU,\ @@ -868,7 +868,7 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .value = stringify(2),\ },{\ .driver = "Penryn-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(2),\ },{\ .driver = "Nehalem-" TYPE_X86_CPU,\ @@ -876,7 +876,7 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .value = stringify(2),\ },{\ .driver = "Nehalem-" TYPE_X86_CPU,\ - .property = "level",\ + .property = "min-level",\ .value = stringify(2),\ },{\ .driver = "virtio-net-pci",\ diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c index 6c71e46391..4166ce54b7 100644 --- a/tests/test-x86-cpuid-compat.c +++ b/tests/test-x86-cpuid-compat.c @@ -313,6 +313,44 @@ int main(int argc, char **argv) add_cpuid_test("x86/cpuid/auto-xlevel2/pc-2.7", "-machine pc-i440fx-2.7 -cpu 486,+xstore", "xlevel2", 0); + /* + * QEMU 1.4.0 had auto-level enabled for CPUID[7], already, + * and the compat code that sets default level shouldn't + * disable the auto-level=7 code: + */ + add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-1.4/off", + "-machine pc-i440fx-1.4 -cpu Nehalem", + "level", 2); + add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-1.5/on", + "-machine pc-i440fx-1.4 -cpu Nehalem,+smap", + "level", 7); + add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-2.3/off", + "-machine pc-i440fx-2.3 -cpu Penryn", + "level", 4); + add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-2.3/on", + "-machine pc-i440fx-2.3 -cpu Penryn,+erms", + "level", 7); + add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-2.9/off", + "-machine pc-i440fx-2.9 -cpu Conroe", + "level", 10); + add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-2.9/on", + "-machine pc-i440fx-2.9 -cpu Conroe,+erms", + "level", 10); + + /* + * xlevel doesn't have any feature that triggers auto-level + * code on old machine-types. Just check that the compat code + * is working correctly: + */ + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.3", + "-machine pc-i440fx-2.3 -cpu SandyBridge", + "xlevel", 0x8000000a); + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-off", + "-machine pc-i440fx-2.4 -cpu SandyBridge,", + "xlevel", 0x80000008); + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", + "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", + "xlevel", 0x80000008); /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", From a0ceb640d083ab583d115fbd2ded14c089044ae8 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 30 May 2017 18:23:56 +0200 Subject: [PATCH 02/10] numa: consolidate cpu_preplug fixups/checks for pc/arm/spapr Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Message-Id: <1496161442-96665-2-git-send-email-imammedo@redhat.com> [ehabkost: Fix indentation] Signed-off-by: Eduardo Habkost --- hw/arm/virt.c | 16 ++-------------- hw/i386/pc.c | 17 +---------------- hw/ppc/spapr.c | 17 +---------------- include/sysemu/numa.h | 1 + numa.c | 23 +++++++++++++++++++++++ 5 files changed, 28 insertions(+), 46 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 4db2d4207c..010f7244bf 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1372,7 +1372,6 @@ static void machvirt_init(MachineState *machine) for (n = 0; n < possible_cpus->len; n++) { Object *cpuobj; CPUState *cs; - int node_id; if (n >= smp_cpus) { break; @@ -1385,19 +1384,8 @@ static void machvirt_init(MachineState *machine) cs = CPU(cpuobj); cs->cpu_index = n; - node_id = possible_cpus->cpus[cs->cpu_index].props.node_id; - if (!possible_cpus->cpus[cs->cpu_index].props.has_node_id) { - /* by default CPUState::numa_node was 0 if it's not set via CLI - * keep it this way for now but in future we probably should - * refuse to start up with incomplete numa mapping */ - node_id = 0; - } - if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) { - cs->numa_node = node_id; - } else { - /* CPU isn't device_add compatible yet, this shouldn't happen */ - error_setg(&error_abort, "user set node-id not implemented"); - } + numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), + &error_fatal); if (!vms->secure) { object_property_set_bool(cpuobj, false, "has_el3", NULL); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 107a34125b..5b07be61cf 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1893,7 +1893,6 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { int idx; - int node_id; CPUState *cs; CPUArchId *cpu_slot; X86CPUTopoInfo topo; @@ -1984,21 +1983,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, cs = CPU(cpu); cs->cpu_index = idx; - node_id = cpu_slot->props.node_id; - if (!cpu_slot->props.has_node_id) { - /* by default CPUState::numa_node was 0 if it's not set via CLI - * keep it this way for now but in future we probably should - * refuse to start up with incomplete numa mapping */ - node_id = 0; - } - if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) { - cs->numa_node = node_id; - } else if (cs->numa_node != node_id) { - error_setg(errp, "node-id %d must match numa node specified" - "with -numa option for cpu-index %d", - cs->numa_node, cs->cpu_index); - return; - } + numa_cpu_pre_plug(cpu_slot, dev, errp); } static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index ab3aab1279..94563cd8f5 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2922,11 +2922,9 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); Error *local_err = NULL; CPUCore *cc = CPU_CORE(dev); - sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev); char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model); const char *type = object_get_typename(OBJECT(dev)); CPUArchId *core_slot; - int node_id; int index; if (dev->hotplugged && !mc->has_hotpluggable_cpus) { @@ -2967,20 +2965,7 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - node_id = core_slot->props.node_id; - if (!core_slot->props.has_node_id) { - /* by default CPUState::numa_node was 0 if it's not set via CLI - * keep it this way for now but in future we probably should - * refuse to start up with incomplete numa mapping */ - node_id = 0; - } - if (sc->node_id == CPU_UNSET_NUMA_NODE_ID) { - sc->node_id = node_id; - } else if (sc->node_id != node_id) { - error_setg(&local_err, "node-id %d must match numa node specified" - "with -numa option for cpu-index %d", sc->node_id, cc->core_id); - goto out; - } + numa_cpu_pre_plug(core_slot, dev, &local_err); out: g_free(base_core_type); diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 7ffde5b119..610eece211 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -35,4 +35,5 @@ void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, int nb_nodes, ram_addr_t size); void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, int nb_nodes, ram_addr_t size); +void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp); #endif diff --git a/numa.c b/numa.c index be50c62aa9..1c35a79b57 100644 --- a/numa.c +++ b/numa.c @@ -533,6 +533,29 @@ void parse_numa_opts(MachineState *ms) } } +void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp) +{ + int mapped_node_id; /* set by -numa option */ + int node_id = object_property_get_int(OBJECT(dev), "node-id", &error_abort); + + /* by default CPUState::numa_node was 0 if it wasn't set explicitly + * TODO: make it error when incomplete numa mapping support is removed + */ + mapped_node_id = slot->props.node_id; + if (!slot->props.has_node_id) { + mapped_node_id = 0; + } + + if (node_id == CPU_UNSET_NUMA_NODE_ID) { + /* due to bug in libvirt, it doesn't pass node-id from props on + * device_add as expected, so we have to fix it up here */ + object_property_set_int(OBJECT(dev), mapped_node_id, "node-id", errp); + } else if (node_id != mapped_node_id) { + error_setg(errp, "node-id=%d must match numa node specified " + "with -numa option", node_id); + } +} + static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, const char *name, uint64_t ram_size) From 60bed6a30aa8341cbc8fae4bdc53b9eb99d80586 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 30 May 2017 18:23:57 +0200 Subject: [PATCH 03/10] numa: move default mapping init to machine there is no need use cpu_index_to_instance_props() for setting default cpu -> node mapping. Generic machine code can do it without cpu_index by just enabling already preset defaults in possible_cpus. PS: as bonus it makes one less user of cpu_index_to_instance_props() Signed-off-by: Igor Mammedov Message-Id: <1496161442-96665-3-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/core/machine.c | 33 +++++++++++++++++++++++---------- numa.c | 26 -------------------------- 2 files changed, 23 insertions(+), 36 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 3adebf14c4..c1434e6a69 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -701,26 +701,39 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) return g_string_free(s, false); } -static void machine_numa_validate(MachineState *machine) +static void machine_numa_finish_init(MachineState *machine) { int i; + bool default_mapping; GString *s = g_string_new(NULL); MachineClass *mc = MACHINE_GET_CLASS(machine); const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(machine); assert(nb_numa_nodes); + for (i = 0; i < possible_cpus->len; i++) { + if (possible_cpus->cpus[i].props.has_node_id) { + break; + } + } + default_mapping = (i == possible_cpus->len); + for (i = 0; i < possible_cpus->len; i++) { const CPUArchId *cpu_slot = &possible_cpus->cpus[i]; - /* at this point numa mappings are initilized by CLI options - * or with default mappings so it's sufficient to list - * all not yet mapped CPUs here */ - /* TODO: make it hard error in future */ if (!cpu_slot->props.has_node_id) { - char *cpu_str = cpu_slot_to_string(cpu_slot); - g_string_append_printf(s, "%sCPU %d [%s]", s->len ? ", " : "", i, - cpu_str); - g_free(cpu_str); + if (default_mapping) { + /* fetch default mapping from board and enable it */ + CpuInstanceProperties props = cpu_slot->props; + props.has_node_id = true; + machine_set_cpu_numa_node(machine, &props, &error_fatal); + } else { + /* record slots with not set mapping, + * TODO: make it hard error in future */ + char *cpu_str = cpu_slot_to_string(cpu_slot); + g_string_append_printf(s, "%sCPU %d [%s]", + s->len ? ", " : "", i, cpu_str); + g_free(cpu_str); + } } } if (s->len && !qtest_enabled()) { @@ -738,7 +751,7 @@ void machine_run_board_init(MachineState *machine) MachineClass *machine_class = MACHINE_GET_CLASS(machine); if (nb_numa_nodes) { - machine_numa_validate(machine); + machine_numa_finish_init(machine); } machine_class->init(machine); } diff --git a/numa.c b/numa.c index 1c35a79b57..4ec3eaf687 100644 --- a/numa.c +++ b/numa.c @@ -426,7 +426,6 @@ void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, void parse_numa_opts(MachineState *ms) { int i; - const CPUArchIdList *possible_cpus; MachineClass *mc = MACHINE_GET_CLASS(ms); if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, NULL)) { @@ -484,31 +483,6 @@ void parse_numa_opts(MachineState *ms) numa_set_mem_ranges(); - /* assign CPUs to nodes using board provided default mapping */ - if (!mc->cpu_index_to_instance_props || !mc->possible_cpu_arch_ids) { - error_report("default CPUs to NUMA node mapping isn't supported"); - exit(1); - } - - possible_cpus = mc->possible_cpu_arch_ids(ms); - for (i = 0; i < possible_cpus->len; i++) { - if (possible_cpus->cpus[i].props.has_node_id) { - break; - } - } - - /* no CPUs are assigned to NUMA nodes */ - if (i == possible_cpus->len) { - for (i = 0; i < max_cpus; i++) { - CpuInstanceProperties props; - /* fetch default mapping from board and enable it */ - props = mc->cpu_index_to_instance_props(ms, i); - props.has_node_id = true; - - machine_set_cpu_numa_node(ms, &props, &error_fatal); - } - } - /* QEMU needs at least all unique node pair distances to build * the whole NUMA distance table. QEMU treats the distance table * as symmetric by default, i.e. distance A->B == distance B->A. From d41f3e750d2c06c613cb1b8db7724f0fbc0a2b14 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 30 May 2017 18:23:58 +0200 Subject: [PATCH 04/10] numa: make sure that all cpus have has_node_id set if numa is enabled It fixes/add missing _PXM object for non mapped CPU (x86) and missing fdt node (virt-arm). It ensures that possible_cpus contains complete mapping if numa is enabled by the time machine_init() is executed. As result non completely mapped CPUs: 1) appear in ACPI/fdt blobs 2) QMP query-hotpluggable-cpus command shows bound nodes for such CPUs 3) allows to drop checks for has_node_id in numa only code, reducing number of invariants incomplete mapping could produce 4) moves fixup/implicit node init from runtime numa_cpu_pre_plug() (when CPU object is created) to machine_numa_finish_init() which helps to fix [1, 2] and make possible_cpus complete source of numa mapping available even before CPUs are created. Signed-off-by: Igor Mammedov Message-Id: <1496161442-96665-4-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/arm/virt-acpi-build.c | 4 +--- hw/core/machine.c | 16 ++++++++++------ hw/i386/acpi-build.c | 3 +-- hw/i386/pc.c | 4 +--- numa.c | 16 +++++----------- 5 files changed, 18 insertions(+), 25 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 2079828c22..3d78ff68e6 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -496,12 +496,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) srat->reserved1 = cpu_to_le32(1); for (i = 0; i < cpu_list->len; ++i) { - int node_id = cpu_list->cpus[i].props.has_node_id ? - cpu_list->cpus[i].props.node_id : 0; core = acpi_data_push(table_data, sizeof(*core)); core->type = ACPI_SRAT_PROCESSOR_GICC; core->length = sizeof(*core); - core->proximity = cpu_to_le32(node_id); + core->proximity = cpu_to_le32(cpu_list->cpus[i].props.node_id); core->acpi_processor_uid = cpu_to_le32(i); core->flags = cpu_to_le32(1); } diff --git a/hw/core/machine.c b/hw/core/machine.c index c1434e6a69..2e7e9778cd 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -721,19 +721,23 @@ static void machine_numa_finish_init(MachineState *machine) const CPUArchId *cpu_slot = &possible_cpus->cpus[i]; if (!cpu_slot->props.has_node_id) { - if (default_mapping) { - /* fetch default mapping from board and enable it */ - CpuInstanceProperties props = cpu_slot->props; - props.has_node_id = true; - machine_set_cpu_numa_node(machine, &props, &error_fatal); - } else { + /* fetch default mapping from board and enable it */ + CpuInstanceProperties props = cpu_slot->props; + + if (!default_mapping) { /* record slots with not set mapping, * TODO: make it hard error in future */ char *cpu_str = cpu_slot_to_string(cpu_slot); g_string_append_printf(s, "%sCPU %d [%s]", s->len ? ", " : "", i, cpu_str); g_free(cpu_str); + + /* non mapped cpus used to fallback to node 0 */ + props.node_id = 0; } + + props.has_node_id = true; + machine_set_cpu_numa_node(machine, &props, &error_fatal); } } if (s->len && !qtest_enabled()) { diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 82bd44f38e..ce74c84460 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2335,8 +2335,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) srat->reserved1 = cpu_to_le32(1); for (i = 0; i < apic_ids->len; i++) { - int node_id = apic_ids->cpus[i].props.has_node_id ? - apic_ids->cpus[i].props.node_id : 0; + int node_id = apic_ids->cpus[i].props.node_id; uint32_t apic_id = apic_ids->cpus[i].arch_id; if (apic_id < 255) { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 5b07be61cf..5b8c6fbbea 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -788,9 +788,7 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) for (i = 0; i < cpus->len; i++) { unsigned int apic_id = cpus->cpus[i].arch_id; assert(apic_id < pcms->apic_id_limit); - if (cpus->cpus[i].props.has_node_id) { - numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); - } + numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); } for (i = 0; i < nb_numa_nodes; i++) { numa_fw_cfg[pcms->apic_id_limit + 1 + i] = diff --git a/numa.c b/numa.c index 4ec3eaf687..65701cb6c8 100644 --- a/numa.c +++ b/numa.c @@ -509,22 +509,16 @@ void parse_numa_opts(MachineState *ms) void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp) { - int mapped_node_id; /* set by -numa option */ int node_id = object_property_get_int(OBJECT(dev), "node-id", &error_abort); - /* by default CPUState::numa_node was 0 if it wasn't set explicitly - * TODO: make it error when incomplete numa mapping support is removed - */ - mapped_node_id = slot->props.node_id; - if (!slot->props.has_node_id) { - mapped_node_id = 0; - } - if (node_id == CPU_UNSET_NUMA_NODE_ID) { /* due to bug in libvirt, it doesn't pass node-id from props on * device_add as expected, so we have to fix it up here */ - object_property_set_int(OBJECT(dev), mapped_node_id, "node-id", errp); - } else if (node_id != mapped_node_id) { + if (slot->props.has_node_id) { + object_property_set_int(OBJECT(dev), slot->props.node_id, + "node-id", errp); + } + } else if (node_id != slot->props.node_id) { error_setg(errp, "node-id=%d must match numa node specified " "with -numa option", node_id); } From f75cd44de0ec20d72dc9eef5e6e48a8874c40727 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 30 May 2017 18:23:59 +0200 Subject: [PATCH 05/10] numa: make hmp 'info numa' fetch numa nodes from qmp_query_cpus() result HMP command 'info numa' is the last external user that access CPUState::numa_node field directly. In order to move it to CPU classes that actually use it, eliminate direct access and use an alternative approach by using result of qmp_query_cpus(), which provides topology properties CPU threads are associated with (including node-id). Signed-off-by: Igor Mammedov Message-Id: <1496161442-96665-5-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- monitor.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/monitor.c b/monitor.c index 75e7cd26d0..1e63ace2d4 100644 --- a/monitor.c +++ b/monitor.c @@ -1696,23 +1696,26 @@ static void hmp_info_mtree(Monitor *mon, const QDict *qdict) static void hmp_info_numa(Monitor *mon, const QDict *qdict) { int i; - CPUState *cpu; uint64_t *node_mem; + CpuInfoList *cpu_list, *cpu; + cpu_list = qmp_query_cpus(&error_abort); node_mem = g_new0(uint64_t, nb_numa_nodes); query_numa_node_mem(node_mem); monitor_printf(mon, "%d nodes\n", nb_numa_nodes); for (i = 0; i < nb_numa_nodes; i++) { monitor_printf(mon, "node %d cpus:", i); - CPU_FOREACH(cpu) { - if (cpu->numa_node == i) { - monitor_printf(mon, " %d", cpu->cpu_index); + for (cpu = cpu_list; cpu; cpu = cpu->next) { + if (cpu->value->has_props && cpu->value->props->has_node_id && + cpu->value->props->node_id == i) { + monitor_printf(mon, " %" PRIi64, cpu->value->CPU); } } monitor_printf(mon, "\n"); monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i, node_mem[i] >> 20); } + qapi_free_CpuInfoList(cpu_list); g_free(node_mem); } From 15f8b14228b856850df3fa5ba999ad96521f2208 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 30 May 2017 18:24:00 +0200 Subject: [PATCH 06/10] numa: move numa_node from CPUState into target specific classes Move vcpu's associated numa_node field out of generic CPUState into inherited classes that actually care about cpu<->numa mapping, i.e: ARMCPU, PowerPCCPU, X86CPU. Signed-off-by: Igor Mammedov Message-Id: <1496161442-96665-6-git-send-email-imammedo@redhat.com> [ehabkost: s/CPU is belonging to/CPU belongs to/ on comments] Signed-off-by: Eduardo Habkost --- hw/ppc/spapr.c | 2 +- hw/ppc/spapr_cpu_core.c | 4 +++- include/qom/cpu.h | 2 -- target/arm/cpu.c | 2 +- target/arm/cpu.h | 2 ++ target/i386/cpu.c | 2 +- target/i386/cpu.h | 1 + target/ppc/cpu.h | 1 + 8 files changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 94563cd8f5..96471a5d89 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -191,7 +191,7 @@ static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs) cpu_to_be32(0x0), cpu_to_be32(0x0), cpu_to_be32(0x0), - cpu_to_be32(cs->numa_node), + cpu_to_be32(cpu->node_id), cpu_to_be32(index)}; /* Advertise NUMA via ibm,associativity */ diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index ff7058ecc0..029a14120e 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -184,15 +184,17 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) for (i = 0; i < cc->nr_threads; i++) { char id[32]; CPUState *cs; + PowerPCCPU *cpu; obj = sc->threads + i * size; object_initialize(obj, size, typename); cs = CPU(obj); + cpu = POWERPC_CPU(cs); cs->cpu_index = cc->core_id + i; /* Set NUMA node for the threads belonged to core */ - cs->numa_node = sc->node_id; + cpu->node_id = sc->node_id; snprintf(id, sizeof(id), "thread[%d]", i); object_property_add_child(OBJECT(sc), id, obj, &local_err); diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 55214ce131..89ddb686fb 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -265,7 +265,6 @@ struct qemu_work_item; * @cpu_index: CPU index (informative). * @nr_cores: Number of cores within this CPU package. * @nr_threads: Number of threads within this CPU. - * @numa_node: NUMA node this CPU is belonging to. * @host_tid: Host thread ID. * @running: #true if CPU is currently running (lockless). * @has_waiter: #true if a CPU is currently waiting for the cpu_exec_end; @@ -314,7 +313,6 @@ struct CPUState { int nr_cores; int nr_threads; - int numa_node; struct QemuThread *thread; #ifdef _WIN32 diff --git a/target/arm/cpu.c b/target/arm/cpu.c index e748097860..82d739f832 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1587,7 +1587,7 @@ static Property arm_cpu_properties[] = { DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0), DEFINE_PROP_UINT64("mp-affinity", ARMCPU, mp_affinity, ARM64_AFFINITY_INVALID), - DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID), + DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 13da5036bc..16a1e59615 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -704,6 +704,8 @@ struct ARMCPU { ARMELChangeHook *el_change_hook; void *el_change_hook_opaque; + + int32_t node_id; /* NUMA node this CPU belongs to */ }; static inline ARMCPU *arm_env_get_cpu(CPUARMState *env) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index a41d595c23..ffb5267162 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3986,7 +3986,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), #endif - DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID), + DEFINE_PROP_INT32("node-id", X86CPU, node_id, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false), { .name = "hv-spinlocks", .info = &qdev_prop_spinlocks }, DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c4602ca80d..cfe825f0a4 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1275,6 +1275,7 @@ struct X86CPU { struct kvm_msrs *kvm_msr_buf; + int32_t node_id; /* NUMA node this CPU belongs to */ int32_t socket_id; int32_t core_id; int32_t thread_id; diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 401e10e7da..d10808d9f4 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1205,6 +1205,7 @@ struct PowerPCCPU { uint32_t compat_pvr; PPCVirtualHypervisor *vhyp; Object *intc; + int32_t node_id; /* NUMA node this CPU belongs to */ /* Fields related to migration compatibility hacks */ bool pre_2_8_migration; From 99861ecbc53c705fff5e4ea774f0da1193e81da8 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 30 May 2017 18:24:01 +0200 Subject: [PATCH 07/10] spapr: cleanup spapr_fixup_cpu_numa_dt() usage even though spapr_fixup_cpu_numa_dt() has no effect on FDT if numa is disabled, don't call it uselessly. It makes it obvious at call sites that function is needed only when numa is enabled. Signed-off-by: Igor Mammedov Message-Id: <1496161442-96665-7-git-send-email-imammedo@redhat.com> Reviewed-by: Greg Kurz Signed-off-by: Eduardo Habkost --- hw/ppc/spapr.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 96471a5d89..70eb60efed 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -182,10 +182,8 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, return ret; } -static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs) +static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu) { - int ret = 0; - PowerPCCPU *cpu = POWERPC_CPU(cs); int index = ppc_get_vcpu_dt_id(cpu); uint32_t associativity[] = {cpu_to_be32(0x5), cpu_to_be32(0x0), @@ -195,12 +193,8 @@ static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs) cpu_to_be32(index)}; /* Advertise NUMA via ibm,associativity */ - if (nb_numa_nodes > 1) { - ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, + return fdt_setprop(fdt, offset, "ibm,associativity", associativity, sizeof(associativity)); - } - - return ret; } /* Populate the "ibm,pa-features" property */ @@ -325,9 +319,11 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr) return ret; } - ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs); - if (ret < 0) { - return ret; + if (nb_numa_nodes > 1) { + ret = spapr_fixup_cpu_numa_dt(fdt, offset, cpu); + if (ret < 0) { + return ret; + } } ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt); @@ -542,7 +538,9 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, _FDT((fdt_setprop(fdt, offset, "ibm,pft-size", pft_size_prop, sizeof(pft_size_prop)))); - _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs)); + if (nb_numa_nodes > 1) { + _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu)); + } _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt)); From 37bbcd575745d9bea4f840164e75dd2f40e5ee91 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 26 May 2017 15:11:58 -0300 Subject: [PATCH 08/10] qemu.py: Don't set _popen=None on error/shutdown Keep the Popen object around to we can query its exit code later. To keep the existing 'self._popen is None' checks working, add a is_running() method, that will check if the process is still running. Signed-off-by: Eduardo Habkost Message-Id: <20170526181200.17227-2-ehabkost@redhat.com> Reviewed-by: Markus Armbruster Signed-off-by: Eduardo Habkost --- scripts/qemu.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/qemu.py b/scripts/qemu.py index 6d1b6230b7..16934f1e02 100644 --- a/scripts/qemu.py +++ b/scripts/qemu.py @@ -85,8 +85,11 @@ def _remove_if_exists(path): return raise + def is_running(self): + return self._popen and (self._popen.returncode is None) + def get_pid(self): - if not self._popen: + if not self.is_running(): return None return self._popen.pid @@ -128,16 +131,16 @@ def launch(self): stderr=subprocess.STDOUT, shell=False) self._post_launch() except: - if self._popen: + if self.is_running(): self._popen.kill() + self._popen.wait() self._load_io_log() self._post_shutdown() - self._popen = None raise def shutdown(self): '''Terminate the VM and clean up''' - if not self._popen is None: + if self.is_running(): try: self._qmp.cmd('quit') self._qmp.close() @@ -149,7 +152,6 @@ def shutdown(self): sys.stderr.write('qemu received signal %i: %s\n' % (-exitcode, ' '.join(self._args))) self._load_io_log() self._post_shutdown() - self._popen = None underscore_to_dash = string.maketrans('_', '-') def qmp(self, cmd, conv_keys=True, **args): From b2b8d986753ceb20437457dfe51dd75285328373 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 26 May 2017 15:11:59 -0300 Subject: [PATCH 09/10] qemu.py: Add QEMUMachine.exitcode() method Allow the exit code of QEMU to be queried by scripts. Signed-off-by: Eduardo Habkost Message-Id: <20170526181200.17227-3-ehabkost@redhat.com> Signed-off-by: Eduardo Habkost --- scripts/qemu.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/qemu.py b/scripts/qemu.py index 16934f1e02..880e3e8219 100644 --- a/scripts/qemu.py +++ b/scripts/qemu.py @@ -88,6 +88,11 @@ def _remove_if_exists(path): def is_running(self): return self._popen and (self._popen.returncode is None) + def exitcode(self): + if self._popen is None: + return None + return self._popen.returncode + def get_pid(self): if not self.is_running(): return None From 23ea4f30320bbd36a5d202ee469374ec3c747286 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 26 May 2017 15:12:00 -0300 Subject: [PATCH 10/10] scripts: Test script to look for -device crashes Test code to check if we can crash QEMU using -device. It will test all accel/machine/device combinations by default, which may take a few hours (it's more than 90k test cases). There's a "-r" option that makes it test a random sample of combinations. The scripts contains a whitelist for: 1) known error messages that make QEMU exit cleanly; 2) known QEMU crashes. This is the behavior when the script finds a failure: * Known clean (exitcode=1) errors generate DEBUG messages (hidden by default) * Unknown clean (exitcode=1) errors will generate INFO messages (visible by default) * Known crashes generate error messages, but are not fatal (unless --strict mode is used) * Unknown crashes generate fatal error messages Having an updated whitelist of known clean errors is useful to make the script less verbose and run faster when in --quick mode, but the whitelist doesn't need to be always up to date. Signed-off-by: Eduardo Habkost Message-Id: <20170526181200.17227-4-ehabkost@redhat.com> Signed-off-by: Eduardo Habkost --- scripts/device-crash-test | 624 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 624 insertions(+) create mode 100755 scripts/device-crash-test diff --git a/scripts/device-crash-test b/scripts/device-crash-test new file mode 100755 index 0000000000..5f90e9bb54 --- /dev/null +++ b/scripts/device-crash-test @@ -0,0 +1,624 @@ +#!/usr/bin/env python2.7 +# +# Copyright (c) 2017 Red Hat Inc +# +# Author: +# Eduardo Habkost +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Run QEMU with all combinations of -machine and -device types, +check for crashes and unexpected errors. +""" + +import sys +import os +import glob +import logging +import traceback +import re +import random +import argparse +from itertools import chain + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'scripts')) +from qemu import QEMUMachine + +logger = logging.getLogger('device-crash-test') +dbg = logger.debug + + +# Purposes of the following whitelist: +# * Avoiding verbose log messages when we find known non-fatal +# (exitcode=1) errors +# * Avoiding fatal errors when we find known crashes +# * Skipping machines/devices that are known not to work out of +# the box, when running in --quick mode +# +# Keeping the whitelist updated is desirable, but not required, +# because unexpected cases where QEMU exits with exitcode=1 will +# just trigger a INFO message. + +# Valid whitelist entry keys: +# * accel: regexp, full match only +# * machine: regexp, full match only +# * device: regexp, full match only +# * log: regexp, partial match allowed +# * exitcode: if not present, defaults to 1. If None, matches any exitcode +# * warn: if True, matching failures will be logged as warnings +# * expected: if True, QEMU is expected to always fail every time +# when testing the corresponding test case +# * loglevel: log level of log output when there's a match. +ERROR_WHITELIST = [ + # Machines that won't work out of the box: + # MACHINE | ERROR MESSAGE + {'machine':'niagara', 'expected':True}, # Unable to load a firmware for -M niagara + {'machine':'boston', 'expected':True}, # Please provide either a -kernel or -bios argument + {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null) + + # devices that don't work out of the box because they require extra options to "-device DEV": + # DEVICE | ERROR MESSAGE + {'device':'.*-(i386|x86_64)-cpu', 'expected':True}, # CPU socket-id is not set + {'device':'ARM,bitband-memory', 'expected':True}, # source-memory property not set + {'device':'arm.cortex-a9-global-timer', 'expected':True}, # a9_gtimer_realize: num-cpu must be between 1 and 4 + {'device':'arm_mptimer', 'expected':True}, # num-cpu must be between 1 and 4 + {'device':'armv7m', 'expected':True}, # memory property was not set + {'device':'aspeed.scu', 'expected':True}, # Unknown silicon revision: 0x0 + {'device':'aspeed.sdmc', 'expected':True}, # Unknown silicon revision: 0x0 + {'device':'bcm2835-dma', 'expected':True}, # bcm2835_dma_realize: required dma-mr link not found: Property '.dma-mr' not found + {'device':'bcm2835-fb', 'expected':True}, # bcm2835_fb_realize: required vcram-base property not set + {'device':'bcm2835-mbox', 'expected':True}, # bcm2835_mbox_realize: required mbox-mr link not found: Property '.mbox-mr' not found + {'device':'bcm2835-peripherals', 'expected':True}, # bcm2835_peripherals_realize: required ram link not found: Property '.ram' not found + {'device':'bcm2835-property', 'expected':True}, # bcm2835_property_realize: required fb link not found: Property '.fb' not found + {'device':'bcm2835_gpio', 'expected':True}, # bcm2835_gpio_realize: required sdhci link not found: Property '.sdbus-sdhci' not found + {'device':'bcm2836', 'expected':True}, # bcm2836_realize: required ram link not found: Property '.ram' not found + {'device':'cfi.pflash01', 'expected':True}, # attribute "sector-length" not specified or zero. + {'device':'cfi.pflash02', 'expected':True}, # attribute "sector-length" not specified or zero. + {'device':'icp', 'expected':True}, # icp_realize: required link 'xics' not found: Property '.xics' not found + {'device':'ics', 'expected':True}, # ics_base_realize: required link 'xics' not found: Property '.xics' not found + # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True + {'device':'ide-cd'}, # No drive specified + {'device':'ide-drive', 'expected':True}, # No drive specified + {'device':'ide-hd', 'expected':True}, # No drive specified + {'device':'ipmi-bmc-extern', 'expected':True}, # IPMI external bmc requires chardev attribute + {'device':'isa-debugcon', 'expected':True}, # Can't create serial device, empty char device + {'device':'isa-ipmi-bt', 'expected':True}, # IPMI device requires a bmc attribute to be set + {'device':'isa-ipmi-kcs', 'expected':True}, # IPMI device requires a bmc attribute to be set + {'device':'isa-parallel', 'expected':True}, # Can't create serial device, empty char device + {'device':'isa-serial', 'expected':True}, # Can't create serial device, empty char device + {'device':'ivshmem', 'expected':True}, # You must specify either 'shm' or 'chardev' + {'device':'ivshmem-doorbell', 'expected':True}, # You must specify a 'chardev' + {'device':'ivshmem-plain', 'expected':True}, # You must specify a 'memdev' + {'device':'kvm-pci-assign', 'expected':True}, # no host device specified + {'device':'loader', 'expected':True}, # please include valid arguments + {'device':'nand', 'expected':True}, # Unsupported NAND block size 0x1 + {'device':'nvdimm', 'expected':True}, # 'memdev' property is not set + {'device':'nvme', 'expected':True}, # Device initialization failed + {'device':'pc-dimm', 'expected':True}, # 'memdev' property is not set + {'device':'pci-bridge', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. + {'device':'pci-bridge-seat', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. + {'device':'pci-serial', 'expected':True}, # Can't create serial device, empty char device + {'device':'pci-serial-2x', 'expected':True}, # Can't create serial device, empty char device + {'device':'pci-serial-4x', 'expected':True}, # Can't create serial device, empty char device + {'device':'pxa2xx-dma', 'expected':True}, # channels value invalid + {'device':'pxb', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. + {'device':'scsi-block', 'expected':True}, # drive property not set + {'device':'scsi-disk', 'expected':True}, # drive property not set + {'device':'scsi-generic', 'expected':True}, # drive property not set + {'device':'scsi-hd', 'expected':True}, # drive property not set + {'device':'spapr-pci-host-bridge', 'expected':True}, # BUID not specified for PHB + {'device':'spapr-pci-vfio-host-bridge', 'expected':True}, # BUID not specified for PHB + {'device':'spapr-rng', 'expected':True}, # spapr-rng needs an RNG backend! + {'device':'spapr-vty', 'expected':True}, # chardev property not set + {'device':'tpm-tis', 'expected':True}, # tpm_tis: backend driver with id (null) could not be found + {'device':'unimplemented-device', 'expected':True}, # property 'size' not specified or zero + {'device':'usb-braille', 'expected':True}, # Property chardev is required + {'device':'usb-mtp', 'expected':True}, # x-root property must be configured + {'device':'usb-redir', 'expected':True}, # Parameter 'chardev' is missing + {'device':'usb-serial', 'expected':True}, # Property chardev is required + {'device':'usb-storage', 'expected':True}, # drive property not set + {'device':'vfio-amd-xgbe', 'expected':True}, # -device vfio-amd-xgbe: vfio error: wrong host device name + {'device':'vfio-calxeda-xgmac', 'expected':True}, # -device vfio-calxeda-xgmac: vfio error: wrong host device name + {'device':'vfio-pci', 'expected':True}, # No provided host device + {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0 + {'device':'vhost-scsi.*', 'expected':True}, # vhost-scsi: missing wwpn + {'device':'vhost-vsock-device', 'expected':True}, # guest-cid property must be greater than 2 + {'device':'vhost-vsock-pci', 'expected':True}, # guest-cid property must be greater than 2 + {'device':'virtio-9p-ccw', 'expected':True}, # 9pfs device couldn't find fsdev with the id = NULL + {'device':'virtio-9p-device', 'expected':True}, # 9pfs device couldn't find fsdev with the id = NULL + {'device':'virtio-9p-pci', 'expected':True}, # 9pfs device couldn't find fsdev with the id = NULL + {'device':'virtio-blk-ccw', 'expected':True}, # drive property not set + {'device':'virtio-blk-device', 'expected':True}, # drive property not set + {'device':'virtio-blk-device', 'expected':True}, # drive property not set + {'device':'virtio-blk-pci', 'expected':True}, # drive property not set + {'device':'virtio-crypto-ccw', 'expected':True}, # 'cryptodev' parameter expects a valid object + {'device':'virtio-crypto-device', 'expected':True}, # 'cryptodev' parameter expects a valid object + {'device':'virtio-crypto-pci', 'expected':True}, # 'cryptodev' parameter expects a valid object + {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required + {'device':'virtio-input-host-pci', 'expected':True}, # evdev property is required + {'device':'xen-pvdevice', 'expected':True}, # Device ID invalid, it must always be supplied + {'device':'vhost-vsock-ccw', 'expected':True}, # guest-cid property must be greater than 2 + {'device':'ALTR.timer', 'expected':True}, # "clock-frequency" property must be provided + {'device':'zpci', 'expected':True}, # target must be defined + {'device':'pnv-(occ|icp|lpc)', 'expected':True}, # required link 'xics' not found: Property '.xics' not found + {'device':'powernv-cpu-.*', 'expected':True}, # pnv_core_realize: required link 'xics' not found: Property '.xics' not found + + # ioapic devices are already created by pc and will fail: + {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed + {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True}, # Only 1 ioapics allowed + + # KVM-specific devices shouldn't be tried without accel=kvm: + {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True}, + {'accel':'(?!kvm).*', 'device':'kvm-pci-assign', 'expected':True}, + + # xen-specific machines and devices: + {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True}, + {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True}, + + # this fails on some machine-types, but not all, so they don't have expected=True: + {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide + + # Silence INFO messages for errors that are common on multiple + # devices/machines: + {'log':r"No '[\w-]+' bus found for device '[\w-]+'"}, + {'log':r"images* must be given with the 'pflash' parameter"}, + {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"}, + {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"}, + {'log':r"Couldn't find rom image '[\w-]+\.bin'"}, + {'log':r"speed mismatch trying to attach usb device"}, + {'log':r"Can't create a second ISA bus"}, + {'log':r"duplicate fw_cfg file name"}, + # sysbus-related error messages: most machines reject most dynamic sysbus devices: + {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"}, + {'log':r"Device [\w.,-]+ is not supported by this machine yet"}, + {'log':r"Device [\w.,-]+ can not be dynamically instantiated"}, + {'log':r"Platform Bus: Can not fit MMIO region of size "}, + # other more specific errors we will ignore: + {'device':'allwinner-a10', 'log':"Unsupported NIC model:"}, + {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"}, + {'log':r"MSI(-X)? is not supported by interrupt controller"}, + {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"}, + {'log':r"Ignoring smp_cpus value"}, + {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"}, + {'log':r"This CPU requires a smaller page size than the system is using"}, + {'log':r"MSI-X support is mandatory in the S390 architecture"}, + {'log':r"rom check and register reset failed"}, + {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"}, + {'log':r"Multiple VT220 operator consoles are not supported"}, + {'log':r"core 0 already populated"}, + {'log':r"could not find stage1 bootloader"}, + + # other exitcode=1 failures not listed above will just generate INFO messages: + {'exitcode':1, 'loglevel':logging.INFO}, + + # KNOWN CRASHES: + # Known crashes will generate error messages, but won't be fatal. + # Those entries must be removed once we fix the crashes. + {'exitcode':-6, 'log':r"Device 'serial0' is in use", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"spapr_rtas_register: Assertion .*rtas_table\[token\]\.name.* failed", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"qemu_net_client_setup: Assertion `!peer->peer' failed", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r'RAMBlock "[\w.-]+" already registered', 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"find_ram_offset: Assertion `size != 0' failed.", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"puv3_load_kernel: Assertion `kernel_filename != NULL' failed", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"add_cpreg_to_hashtable: code should not be reached", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"qemu_alloc_display: Assertion `surface->image != NULL' failed", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"Unexpected error in error_set_from_qdev_prop_error", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"Object .* is not an instance of type spapr-machine", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"Object .* is not an instance of type generic-pc-machine", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"Object .* is not an instance of type e500-ccsr", 'loglevel':logging.ERROR}, + {'exitcode':-6, 'log':r"vmstate_register_with_alias_id: Assertion `!se->compat || se->instance_id == 0' failed", 'loglevel':logging.ERROR}, + {'exitcode':-11, 'device':'stm32f205-soc', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'device':'xlnx,zynqmp', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'device':'mips-cps', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'device':'gus', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'device':'a9mpcore_priv', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'device':'a15mpcore_priv', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'device':'isa-serial', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'device':'sb16', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'device':'cs4231a', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'device':'arm-gicv3', 'loglevel':logging.ERROR, 'expected':True}, + {'exitcode':-11, 'machine':'isapc', 'device':'.*-iommu', 'loglevel':logging.ERROR, 'expected':True}, + + # everything else (including SIGABRT and SIGSEGV) will be a fatal error: + {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL}, +] + + +def whitelistTestCaseMatch(wl, t): + """Check if a test case specification can match a whitelist entry + + This only checks if a whitelist entry is a candidate match + for a given test case, it won't check if the test case + results/output match the entry. See whitelistResultMatch(). + """ + return (('machine' not in wl or + 'machine' not in t or + re.match(wl['machine'] + '$', t['machine'])) and + ('accel' not in wl or + 'accel' not in t or + re.match(wl['accel'] + '$', t['accel'])) and + ('device' not in wl or + 'device' not in t or + re.match(wl['device'] + '$', t['device']))) + + +def whitelistCandidates(t): + """Generate the list of candidates that can match a test case""" + for i, wl in enumerate(ERROR_WHITELIST): + if whitelistTestCaseMatch(wl, t): + yield (i, wl) + + +def findExpectedResult(t): + """Check if there's an expected=True whitelist entry for a test case + + Returns (i, wl) tuple, where i is the index in + ERROR_WHITELIST and wl is the whitelist entry itself. + """ + for i, wl in whitelistCandidates(t): + if wl.get('expected'): + return (i, wl) + + +def whitelistResultMatch(wl, r): + """Check if test case results/output match a whitelist entry + + It is valid to call this function only if + whitelistTestCaseMatch() is True for the entry (e.g. on + entries returned by whitelistCandidates()) + """ + assert whitelistTestCaseMatch(wl, r['testcase']) + return ((wl.get('exitcode', 1) is None or + r['exitcode'] == wl.get('exitcode', 1)) and + ('log' not in wl or + re.search(wl['log'], r['log'], re.MULTILINE))) + + +def checkResultWhitelist(r): + """Look up whitelist entry for a given test case result + + Returns (i, wl) tuple, where i is the index in + ERROR_WHITELIST and wl is the whitelist entry itself. + """ + for i, wl in whitelistCandidates(r['testcase']): + if whitelistResultMatch(wl, r): + return i, wl + + raise Exception("this should never happen") + + +def qemuOptsEscape(s): + """Escape option value QemuOpts""" + return s.replace(",", ",,") + + +def formatTestCase(t): + """Format test case info as "key=value key=value" for prettier logging output""" + return ' '.join('%s=%s' % (k, v) for k, v in t.items()) + + +def qomListTypeNames(vm, **kwargs): + """Run qom-list-types QMP command, return type names""" + types = vm.command('qom-list-types', **kwargs) + return [t['name'] for t in types] + + +def infoQDM(vm): + """Parse 'info qdm' output""" + args = {'command-line': 'info qdm'} + devhelp = vm.command('human-monitor-command', **args) + for l in devhelp.split('\n'): + l = l.strip() + if l == '' or l.endswith(':'): + continue + d = {'name': re.search(r'name "([^"]+)"', l).group(1), + 'no-user': (re.search(', no-user', l) is not None)} + yield d + + +class QemuBinaryInfo(object): + def __init__(self, binary, devtype): + if devtype is None: + devtype = 'device' + + self.binary = binary + self._machine_info = {} + + dbg("devtype: %r", devtype) + args = ['-S', '-machine', 'none,accel=kvm:tcg'] + dbg("querying info for QEMU binary: %s", binary) + vm = QEMUMachine(binary=binary, args=args) + vm.launch() + try: + self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False)) + # there's no way to query DeviceClass::user_creatable using QMP, + # so use 'info qdm': + self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']]) + self.machines = list(m['name'] for m in vm.command('query-machines')) + self.user_devs = self.alldevs.difference(self.no_user_devs) + self.kvm_available = vm.command('query-kvm')['enabled'] + finally: + vm.shutdown() + + def machineInfo(self, machine): + """Query for information on a specific machine-type + + Results are cached internally, in case the same machine- + type is queried multiple times. + """ + if machine in self._machine_info: + return self._machine_info[machine] + + mi = {} + args = ['-S', '-machine', '%s' % (machine)] + dbg("querying machine info for binary=%s machine=%s", self.binary, machine) + vm = QEMUMachine(binary=self.binary, args=args) + try: + vm.launch() + mi['runnable'] = True + except KeyboardInterrupt: + raise + except: + dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info()) + dbg("log: %r", vm.get_log()) + mi['runnable'] = False + + vm.shutdown() + self._machine_info[machine] = mi + return mi + + +BINARY_INFO = {} + + +def getBinaryInfo(args, binary): + if binary not in BINARY_INFO: + BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype) + return BINARY_INFO[binary] + + +def checkOneCase(args, testcase): + """Check one specific case + + Returns a dictionary containing failure information on error, + or None on success + """ + binary = testcase['binary'] + accel = testcase['accel'] + machine = testcase['machine'] + device = testcase['device'] + + dbg("will test: %r", testcase) + + args = ['-S', '-machine', '%s,accel=%s' % (machine, accel), + '-device', qemuOptsEscape(device)] + cmdline = ' '.join([binary] + args) + dbg("will launch QEMU: %s", cmdline) + vm = QEMUMachine(binary=binary, args=args) + + exc_traceback = None + try: + vm.launch() + except KeyboardInterrupt: + raise + except: + exc_traceback = traceback.format_exc() + dbg("Exception while running test case") + finally: + vm.shutdown() + ec = vm.exitcode() + log = vm.get_log() + + if exc_traceback is not None or ec != 0: + return {'exc_traceback':exc_traceback, + 'exitcode':ec, + 'log':log, + 'testcase':testcase, + 'cmdline':cmdline} + + +def binariesToTest(args, testcase): + if args.qemu: + r = args.qemu + else: + r = glob.glob('./*-softmmu/qemu-system-*') + return r + + +def accelsToTest(args, testcase): + if getBinaryInfo(args, testcase['binary']).kvm_available: + yield 'kvm' + yield 'tcg' + + +def machinesToTest(args, testcase): + return getBinaryInfo(args, testcase['binary']).machines + + +def devicesToTest(args, testcase): + return getBinaryInfo(args, testcase['binary']).user_devs + + +TESTCASE_VARIABLES = [ + ('binary', binariesToTest), + ('accel', accelsToTest), + ('machine', machinesToTest), + ('device', devicesToTest), +] + + +def genCases1(args, testcases, var, fn): + """Generate new testcases for one variable + + If an existing item already has a variable set, don't + generate new items and just return it directly. This + allows the "-t" command-line option to be used to choose + a specific test case. + """ + for testcase in testcases: + if var in testcase: + yield testcase.copy() + else: + for i in fn(args, testcase): + t = testcase.copy() + t[var] = i + yield t + + +def genCases(args, testcase): + """Generate test cases for all variables + """ + cases = [testcase.copy()] + for var, fn in TESTCASE_VARIABLES: + dbg("var: %r, fn: %r", var, fn) + cases = genCases1(args, cases, var, fn) + return cases + + +def casesToTest(args, testcase): + cases = genCases(args, testcase) + if args.random: + cases = list(cases) + cases = random.sample(cases, min(args.random, len(cases))) + if args.debug: + cases = list(cases) + dbg("%d test cases to test", len(cases)) + if args.shuffle: + cases = list(cases) + random.shuffle(cases) + return cases + + +def logFailure(f, level): + t = f['testcase'] + logger.log(level, "failed: %s", formatTestCase(t)) + logger.log(level, "cmdline: %s", f['cmdline']) + for l in f['log'].strip().split('\n'): + logger.log(level, "log: %s", l) + logger.log(level, "exit code: %r", f['exitcode']) + if f['exc_traceback']: + logger.log(level, "exception:") + for l in f['exc_traceback'].split('\n'): + logger.log(level, " %s", l.rstrip('\n')) + + +def main(): + parser = argparse.ArgumentParser(description="QEMU -device crash test") + parser.add_argument('-t', metavar='KEY=VALUE', nargs='*', + help="Limit test cases to KEY=VALUE", + action='append', dest='testcases', default=[]) + parser.add_argument('-d', '--debug', action='store_true', + help='debug output') + parser.add_argument('-v', '--verbose', action='store_true', default=True, + help='verbose output') + parser.add_argument('-q', '--quiet', dest='verbose', action='store_false', + help='non-verbose output') + parser.add_argument('-r', '--random', type=int, metavar='COUNT', + help='run a random sample of COUNT test cases', + default=0) + parser.add_argument('--shuffle', action='store_true', + help='Run test cases in random order') + parser.add_argument('--dry-run', action='store_true', + help="Don't run any tests, just generate list") + parser.add_argument('-D', '--devtype', metavar='TYPE', + help="Test only device types that implement TYPE") + parser.add_argument('-Q', '--quick', action='store_true', default=True, + help="Quick mode: skip test cases that are expected to fail") + parser.add_argument('-F', '--full', action='store_false', dest='quick', + help="Full mode: test cases that are expected to fail") + parser.add_argument('--strict', action='store_true', dest='strict', + help="Treat all warnings as fatal") + parser.add_argument('qemu', nargs='*', metavar='QEMU', + help='QEMU binary to run') + args = parser.parse_args() + + if args.debug: + lvl = logging.DEBUG + elif args.verbose: + lvl = logging.INFO + else: + lvl = logging.WARN + logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s') + + fatal_failures = [] + wl_stats = {} + skipped = 0 + total = 0 + + tc = {} + dbg("testcases: %r", args.testcases) + if args.testcases: + for t in chain(*args.testcases): + for kv in t.split(): + k, v = kv.split('=', 1) + tc[k] = v + + if len(binariesToTest(args, tc)) == 0: + print >>sys.stderr, "No QEMU binary found" + parser.print_usage(sys.stderr) + return 1 + + for t in casesToTest(args, tc): + logger.info("running test case: %s", formatTestCase(t)) + total += 1 + + expected_match = findExpectedResult(t) + if (args.quick and + (expected_match or + not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])): + dbg("skipped: %s", formatTestCase(t)) + skipped += 1 + continue + + if args.dry_run: + continue + + try: + f = checkOneCase(args, t) + except KeyboardInterrupt: + break + + if f: + i, wl = checkResultWhitelist(f) + dbg("testcase: %r, whitelist match: %r", t, wl) + wl_stats.setdefault(i, []).append(f) + level = wl.get('loglevel', logging.DEBUG) + logFailure(f, level) + if wl.get('fatal') or (args.strict and level >= logging.WARN): + fatal_failures.append(f) + else: + dbg("success: %s", formatTestCase(t)) + if expected_match: + logger.warn("Didn't fail as expected: %s", formatTestCase(t)) + + logger.info("Total: %d test cases", total) + if skipped: + logger.info("Skipped %d test cases", skipped) + + if args.debug: + stats = sorted([(len(wl_stats.get(i, [])), wl) for i, wl in enumerate(ERROR_WHITELIST)]) + for count, wl in stats: + dbg("whitelist entry stats: %d: %r", count, wl) + + if fatal_failures: + for f in fatal_failures: + t = f['testcase'] + logger.error("Fatal failure: %s", formatTestCase(t)) + logger.error("Fatal failures on some machine/device combinations") + return 1 + +if __name__ == '__main__': + sys.exit(main())