diff --git a/configure b/configure index e6cfc0e4be..dfb9019b24 100755 --- a/configure +++ b/configure @@ -2768,7 +2768,7 @@ cat > $TMPC << EOF #include int main(void) { unsigned a, b, c, d; - int max = __get_cpuid_max(0, 0); + unsigned max = __get_cpuid_max(0, 0); if (max >= 1) { __cpuid(1, a, b, c, d); diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig index 622b0b50b7..19caebde6c 100644 --- a/hw/acpi/Kconfig +++ b/hw/acpi/Kconfig @@ -10,6 +10,7 @@ config ACPI_X86 select ACPI_HMAT select ACPI_PIIX4 select ACPI_PCIHP + select ACPI_ERST config ACPI_X86_ICH bool @@ -60,3 +61,8 @@ config ACPI_HW_REDUCED select ACPI select ACPI_MEMORY_HOTPLUG select ACPI_NVDIMM + +config ACPI_ERST + bool + default y + depends on ACPI && PCI diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index bb2cad63b5..8966e16320 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1724,9 +1724,9 @@ void acpi_table_begin(AcpiTable *desc, GArray *array) build_append_int_noprefix(array, 0, 4); /* Length */ build_append_int_noprefix(array, desc->rev, 1); /* Revision */ build_append_int_noprefix(array, 0, 1); /* Checksum */ - build_append_padded_str(array, desc->oem_id, 6, ' '); /* OEMID */ + build_append_padded_str(array, desc->oem_id, 6, '\0'); /* OEMID */ /* OEM Table ID */ - build_append_padded_str(array, desc->oem_table_id, 8, ' '); + build_append_padded_str(array, desc->oem_table_id, 8, '\0'); build_append_int_noprefix(array, 1, 4); /* OEM Revision */ g_array_append_vals(array, ACPI_BUILD_APPNAME8, 4); /* Creator ID */ build_append_int_noprefix(array, 1, 4); /* Creator Revision */ diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c new file mode 100644 index 0000000000..c0a23cf467 --- /dev/null +++ b/hw/acpi/erst.c @@ -0,0 +1,1051 @@ +/* + * ACPI Error Record Serialization Table, ERST, Implementation + * + * ACPI ERST introduced in ACPI 4.0, June 16, 2009. + * ACPI Platform Error Interfaces : Error Serialization + * + * Copyright (c) 2021 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-core.h" +#include "exec/memory.h" +#include "qom/object.h" +#include "hw/pci/pci.h" +#include "qom/object_interfaces.h" +#include "qemu/error-report.h" +#include "migration/vmstate.h" +#include "hw/qdev-properties.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/acpi-defs.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/bios-linker-loader.h" +#include "exec/address-spaces.h" +#include "sysemu/hostmem.h" +#include "hw/acpi/erst.h" +#include "trace.h" + +/* ACPI 4.0: Table 17-16 Serialization Actions */ +#define ACTION_BEGIN_WRITE_OPERATION 0x0 +#define ACTION_BEGIN_READ_OPERATION 0x1 +#define ACTION_BEGIN_CLEAR_OPERATION 0x2 +#define ACTION_END_OPERATION 0x3 +#define ACTION_SET_RECORD_OFFSET 0x4 +#define ACTION_EXECUTE_OPERATION 0x5 +#define ACTION_CHECK_BUSY_STATUS 0x6 +#define ACTION_GET_COMMAND_STATUS 0x7 +#define ACTION_GET_RECORD_IDENTIFIER 0x8 +#define ACTION_SET_RECORD_IDENTIFIER 0x9 +#define ACTION_GET_RECORD_COUNT 0xA +#define ACTION_BEGIN_DUMMY_WRITE_OPERATION 0xB +#define ACTION_RESERVED 0xC +#define ACTION_GET_ERROR_LOG_ADDRESS_RANGE 0xD +#define ACTION_GET_ERROR_LOG_ADDRESS_LENGTH 0xE +#define ACTION_GET_ERROR_LOG_ADDRESS_RANGE_ATTRIBUTES 0xF +#define ACTION_GET_EXECUTE_OPERATION_TIMINGS 0x10 /* ACPI 6.3 */ + +/* ACPI 4.0: Table 17-17 Command Status Definitions */ +#define STATUS_SUCCESS 0x00 +#define STATUS_NOT_ENOUGH_SPACE 0x01 +#define STATUS_HARDWARE_NOT_AVAILABLE 0x02 +#define STATUS_FAILED 0x03 +#define STATUS_RECORD_STORE_EMPTY 0x04 +#define STATUS_RECORD_NOT_FOUND 0x05 + +/* ACPI 4.0: Table 17-19 Serialization Instructions */ +#define INST_READ_REGISTER 0x00 +#define INST_READ_REGISTER_VALUE 0x01 +#define INST_WRITE_REGISTER 0x02 +#define INST_WRITE_REGISTER_VALUE 0x03 +#define INST_NOOP 0x04 +#define INST_LOAD_VAR1 0x05 +#define INST_LOAD_VAR2 0x06 +#define INST_STORE_VAR1 0x07 +#define INST_ADD 0x08 +#define INST_SUBTRACT 0x09 +#define INST_ADD_VALUE 0x0A +#define INST_SUBTRACT_VALUE 0x0B +#define INST_STALL 0x0C +#define INST_STALL_WHILE_TRUE 0x0D +#define INST_SKIP_NEXT_INSTRUCTION_IF_TRUE 0x0E +#define INST_GOTO 0x0F +#define INST_SET_SRC_ADDRESS_BASE 0x10 +#define INST_SET_DST_ADDRESS_BASE 0x11 +#define INST_MOVE_DATA 0x12 + +/* UEFI 2.1: Appendix N Common Platform Error Record */ +#define UEFI_CPER_RECORD_MIN_SIZE 128U +#define UEFI_CPER_RECORD_LENGTH_OFFSET 20U +#define UEFI_CPER_RECORD_ID_OFFSET 96U +#define IS_UEFI_CPER_RECORD(ptr) \ + (((ptr)[0] == 'C') && \ + ((ptr)[1] == 'P') && \ + ((ptr)[2] == 'E') && \ + ((ptr)[3] == 'R')) + +/* + * NOTE that when accessing CPER fields within a record, memcpy() + * is utilized to avoid a possible misaligned access on the host. + */ + +/* + * This implementation is an ACTION (cmd) and VALUE (data) + * interface consisting of just two 64-bit registers. + */ +#define ERST_REG_SIZE (16UL) +#define ERST_ACTION_OFFSET (0UL) /* action (cmd) */ +#define ERST_VALUE_OFFSET (8UL) /* argument/value (data) */ + +/* + * ERST_RECORD_SIZE is the buffer size for exchanging ERST + * record contents. Thus, it defines the maximum record size. + * As this is mapped through a PCI BAR, it must be a power of + * two and larger than UEFI_CPER_RECORD_MIN_SIZE. + * The backing storage is divided into fixed size "slots", + * each ERST_RECORD_SIZE in length, and each "slot" + * storing a single record. No attempt at optimizing storage + * through compression, compaction, etc is attempted. + * NOTE that slot 0 is reserved for the backing storage header. + * Depending upon the size of the backing storage, additional + * slots will be part of the slot 0 header in order to account + * for a record_id for each available remaining slot. + */ +/* 8KiB records, not too small, not too big */ +#define ERST_RECORD_SIZE (8192UL) + +#define ACPI_ERST_MEMDEV_PROP "memdev" +#define ACPI_ERST_RECORD_SIZE_PROP "record_size" + +/* + * From the ACPI ERST spec sections: + * A record id of all 0s is used to indicate 'unspecified' record id. + * A record id of all 1s is used to indicate empty or end. + */ +#define ERST_UNSPECIFIED_RECORD_ID (0UL) +#define ERST_EMPTY_END_RECORD_ID (~0UL) + +#define ERST_IS_VALID_RECORD_ID(rid) \ + ((rid != ERST_UNSPECIFIED_RECORD_ID) && \ + (rid != ERST_EMPTY_END_RECORD_ID)) + +/* + * Implementation-specific definitions and types. + * Values are arbitrary and chosen for this implementation. + * See erst.rst documentation for details. + */ +#define ERST_EXECUTE_OPERATION_MAGIC 0x9CUL +#define ERST_STORE_MAGIC 0x524F545354535245UL /* ERSTSTOR */ +typedef struct { + uint64_t magic; + uint32_t record_size; + uint32_t storage_offset; /* offset to record storage beyond header */ + uint16_t version; + uint16_t reserved; + uint32_t record_count; + uint64_t map[]; /* contains record_ids, and position indicates index */ +} __attribute__((packed)) ERSTStorageHeader; + +/* + * Object cast macro + */ +#define ACPIERST(obj) \ + OBJECT_CHECK(ERSTDeviceState, (obj), TYPE_ACPI_ERST) + +/* + * Main ERST device state structure + */ +typedef struct { + PCIDevice parent_obj; + + /* Backend storage */ + HostMemoryBackend *hostmem; + MemoryRegion *hostmem_mr; + uint32_t storage_size; + uint32_t default_record_size; + + /* Programming registers */ + MemoryRegion iomem_mr; + + /* Exchange buffer */ + MemoryRegion exchange_mr; + + /* Interface state */ + uint8_t operation; + uint8_t busy_status; + uint8_t command_status; + uint32_t record_offset; + uint64_t reg_action; + uint64_t reg_value; + uint64_t record_identifier; + ERSTStorageHeader *header; + unsigned first_record_index; + unsigned last_record_index; + unsigned next_record_index; + +} ERSTDeviceState; + +/*******************************************************************/ +/*******************************************************************/ +typedef struct { + GArray *table_data; + pcibus_t bar; + uint8_t instruction; + uint8_t flags; + uint8_t register_bit_width; + pcibus_t register_offset; +} BuildSerializationInstructionEntry; + +/* ACPI 4.0: 17.4.1.2 Serialization Instruction Entries */ +static void build_serialization_instruction( + BuildSerializationInstructionEntry *e, + uint8_t serialization_action, + uint64_t value) +{ + /* ACPI 4.0: Table 17-18 Serialization Instruction Entry */ + struct AcpiGenericAddress gas; + uint64_t mask; + + /* Serialization Action */ + build_append_int_noprefix(e->table_data, serialization_action, 1); + /* Instruction */ + build_append_int_noprefix(e->table_data, e->instruction, 1); + /* Flags */ + build_append_int_noprefix(e->table_data, e->flags, 1); + /* Reserved */ + build_append_int_noprefix(e->table_data, 0, 1); + /* Register Region */ + gas.space_id = AML_SYSTEM_MEMORY; + gas.bit_width = e->register_bit_width; + gas.bit_offset = 0; + gas.access_width = (uint8_t)ctz32(e->register_bit_width) - 2; + gas.address = (uint64_t)(e->bar + e->register_offset); + build_append_gas_from_struct(e->table_data, &gas); + /* Value */ + build_append_int_noprefix(e->table_data, value, 8); + /* Mask */ + mask = (1ULL << (e->register_bit_width - 1) << 1) - 1; + build_append_int_noprefix(e->table_data, mask, 8); +} + +/* ACPI 4.0: 17.4.1 Serialization Action Table */ +void build_erst(GArray *table_data, BIOSLinker *linker, Object *erst_dev, + const char *oem_id, const char *oem_table_id) +{ + /* + * Serialization Action Table + * The serialization action table must be generated first + * so that its size can be known in order to populate the + * Instruction Entry Count field. + */ + unsigned action; + GArray *table_instruction_data = g_array_new(FALSE, FALSE, sizeof(char)); + pcibus_t bar0 = pci_get_bar_addr(PCI_DEVICE(erst_dev), 0); + AcpiTable table = { .sig = "ERST", .rev = 1, .oem_id = oem_id, + .oem_table_id = oem_table_id }; + /* Contexts for the different ways ACTION and VALUE are accessed */ + BuildSerializationInstructionEntry rd_value_32_val = { + .table_data = table_instruction_data, .bar = bar0, .flags = 0, + .instruction = INST_READ_REGISTER_VALUE, + .register_bit_width = 32, + .register_offset = ERST_VALUE_OFFSET, + }; + BuildSerializationInstructionEntry rd_value_32 = { + .table_data = table_instruction_data, .bar = bar0, .flags = 0, + .instruction = INST_READ_REGISTER, + .register_bit_width = 32, + .register_offset = ERST_VALUE_OFFSET, + }; + BuildSerializationInstructionEntry rd_value_64 = { + .table_data = table_instruction_data, .bar = bar0, .flags = 0, + .instruction = INST_READ_REGISTER, + .register_bit_width = 64, + .register_offset = ERST_VALUE_OFFSET, + }; + BuildSerializationInstructionEntry wr_value_32_val = { + .table_data = table_instruction_data, .bar = bar0, .flags = 0, + .instruction = INST_WRITE_REGISTER_VALUE, + .register_bit_width = 32, + .register_offset = ERST_VALUE_OFFSET, + }; + BuildSerializationInstructionEntry wr_value_32 = { + .table_data = table_instruction_data, .bar = bar0, .flags = 0, + .instruction = INST_WRITE_REGISTER, + .register_bit_width = 32, + .register_offset = ERST_VALUE_OFFSET, + }; + BuildSerializationInstructionEntry wr_value_64 = { + .table_data = table_instruction_data, .bar = bar0, .flags = 0, + .instruction = INST_WRITE_REGISTER, + .register_bit_width = 64, + .register_offset = ERST_VALUE_OFFSET, + }; + BuildSerializationInstructionEntry wr_action = { + .table_data = table_instruction_data, .bar = bar0, .flags = 0, + .instruction = INST_WRITE_REGISTER_VALUE, + .register_bit_width = 32, + .register_offset = ERST_ACTION_OFFSET, + }; + + trace_acpi_erst_pci_bar_0(bar0); + + /* Serialization Instruction Entries */ + action = ACTION_BEGIN_WRITE_OPERATION; + build_serialization_instruction(&wr_action, action, action); + + action = ACTION_BEGIN_READ_OPERATION; + build_serialization_instruction(&wr_action, action, action); + + action = ACTION_BEGIN_CLEAR_OPERATION; + build_serialization_instruction(&wr_action, action, action); + + action = ACTION_END_OPERATION; + build_serialization_instruction(&wr_action, action, action); + + action = ACTION_SET_RECORD_OFFSET; + build_serialization_instruction(&wr_value_32, action, 0); + build_serialization_instruction(&wr_action, action, action); + + action = ACTION_EXECUTE_OPERATION; + build_serialization_instruction(&wr_value_32_val, action, + ERST_EXECUTE_OPERATION_MAGIC); + build_serialization_instruction(&wr_action, action, action); + + action = ACTION_CHECK_BUSY_STATUS; + build_serialization_instruction(&wr_action, action, action); + build_serialization_instruction(&rd_value_32_val, action, 0x01); + + action = ACTION_GET_COMMAND_STATUS; + build_serialization_instruction(&wr_action, action, action); + build_serialization_instruction(&rd_value_32, action, 0); + + action = ACTION_GET_RECORD_IDENTIFIER; + build_serialization_instruction(&wr_action, action, action); + build_serialization_instruction(&rd_value_64, action, 0); + + action = ACTION_SET_RECORD_IDENTIFIER; + build_serialization_instruction(&wr_value_64, action, 0); + build_serialization_instruction(&wr_action, action, action); + + action = ACTION_GET_RECORD_COUNT; + build_serialization_instruction(&wr_action, action, action); + build_serialization_instruction(&rd_value_32, action, 0); + + action = ACTION_BEGIN_DUMMY_WRITE_OPERATION; + build_serialization_instruction(&wr_action, action, action); + + action = ACTION_GET_ERROR_LOG_ADDRESS_RANGE; + build_serialization_instruction(&wr_action, action, action); + build_serialization_instruction(&rd_value_64, action, 0); + + action = ACTION_GET_ERROR_LOG_ADDRESS_LENGTH; + build_serialization_instruction(&wr_action, action, action); + build_serialization_instruction(&rd_value_64, action, 0); + + action = ACTION_GET_ERROR_LOG_ADDRESS_RANGE_ATTRIBUTES; + build_serialization_instruction(&wr_action, action, action); + build_serialization_instruction(&rd_value_32, action, 0); + + action = ACTION_GET_EXECUTE_OPERATION_TIMINGS; + build_serialization_instruction(&wr_action, action, action); + build_serialization_instruction(&rd_value_64, action, 0); + + /* Serialization Header */ + acpi_table_begin(&table, table_data); + + /* Serialization Header Size */ + build_append_int_noprefix(table_data, 48, 4); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); + + /* + * Instruction Entry Count + * Each instruction entry is 32 bytes + */ + g_assert((table_instruction_data->len) % 32 == 0); + build_append_int_noprefix(table_data, + (table_instruction_data->len / 32), 4); + + /* Serialization Instruction Entries */ + g_array_append_vals(table_data, table_instruction_data->data, + table_instruction_data->len); + g_array_free(table_instruction_data, TRUE); + + acpi_table_end(linker, &table); +} + +/*******************************************************************/ +/*******************************************************************/ +static uint8_t *get_nvram_ptr_by_index(ERSTDeviceState *s, unsigned index) +{ + uint8_t *rc = NULL; + off_t offset = (index * le32_to_cpu(s->header->record_size)); + + g_assert(offset < s->storage_size); + + rc = memory_region_get_ram_ptr(s->hostmem_mr); + rc += offset; + + return rc; +} + +static void make_erst_storage_header(ERSTDeviceState *s) +{ + ERSTStorageHeader *header = s->header; + unsigned mapsz, headersz; + + header->magic = cpu_to_le64(ERST_STORE_MAGIC); + header->record_size = cpu_to_le32(s->default_record_size); + header->version = cpu_to_le16(0x0100); + header->reserved = cpu_to_le16(0x0000); + + /* Compute mapsize */ + mapsz = s->storage_size / s->default_record_size; + mapsz *= sizeof(uint64_t); + /* Compute header+map size */ + headersz = sizeof(ERSTStorageHeader) + mapsz; + /* Round up to nearest integer multiple of ERST_RECORD_SIZE */ + headersz = QEMU_ALIGN_UP(headersz, s->default_record_size); + header->storage_offset = cpu_to_le32(headersz); + + /* + * The HostMemoryBackend initializes contents to zero, + * so all record_ids stashed in the map are zero'd. + * As well the record_count is zero. Properly initialized. + */ +} + +static void check_erst_backend_storage(ERSTDeviceState *s, Error **errp) +{ + ERSTStorageHeader *header; + uint32_t record_size; + + header = memory_region_get_ram_ptr(s->hostmem_mr); + s->header = header; + + /* Ensure pointer to header is 64-bit aligned */ + g_assert(QEMU_PTR_IS_ALIGNED(header, sizeof(uint64_t))); + + /* + * Check if header is uninitialized; HostMemoryBackend inits to 0 + */ + if (le64_to_cpu(header->magic) == 0UL) { + make_erst_storage_header(s); + } + + /* Validity check record_size */ + record_size = le32_to_cpu(header->record_size); + if (!( + (record_size) && /* non zero */ + (record_size >= UEFI_CPER_RECORD_MIN_SIZE) && + (((record_size - 1) & record_size) == 0) && /* is power of 2 */ + (record_size >= 4096) /* PAGE_SIZE */ + )) { + error_setg(errp, "ERST record_size %u is invalid", record_size); + } + + /* Validity check header */ + if (!( + (le64_to_cpu(header->magic) == ERST_STORE_MAGIC) && + ((le32_to_cpu(header->storage_offset) % record_size) == 0) && + (le16_to_cpu(header->version) == 0x0100) && + (le16_to_cpu(header->reserved) == 0) + )) { + error_setg(errp, "ERST backend storage header is invalid"); + } + + /* Check storage_size against record_size */ + if (((s->storage_size % record_size) != 0) || + (record_size > s->storage_size)) { + error_setg(errp, "ACPI ERST requires storage size be multiple of " + "record size (%uKiB)", record_size); + } + + /* Compute offset of first and last record storage slot */ + s->first_record_index = le32_to_cpu(header->storage_offset) + / record_size; + s->last_record_index = (s->storage_size / record_size); +} + +static void update_map_entry(ERSTDeviceState *s, unsigned index, + uint64_t record_id) +{ + if (index < s->last_record_index) { + s->header->map[index] = cpu_to_le64(record_id); + } +} + +static unsigned find_next_empty_record_index(ERSTDeviceState *s) +{ + unsigned rc = 0; /* 0 not a valid index */ + unsigned index = s->first_record_index; + + for (; index < s->last_record_index; ++index) { + if (le64_to_cpu(s->header->map[index]) == ERST_UNSPECIFIED_RECORD_ID) { + rc = index; + break; + } + } + + return rc; +} + +static unsigned lookup_erst_record(ERSTDeviceState *s, + uint64_t record_identifier) +{ + unsigned rc = 0; /* 0 not a valid index */ + + /* Find the record_identifier in the map */ + if (record_identifier != ERST_UNSPECIFIED_RECORD_ID) { + /* + * Count number of valid records encountered, and + * short-circuit the loop if identifier not found + */ + uint32_t record_count = le32_to_cpu(s->header->record_count); + unsigned count = 0; + unsigned index; + for (index = s->first_record_index; index < s->last_record_index && + count < record_count; ++index) { + if (le64_to_cpu(s->header->map[index]) == record_identifier) { + rc = index; + break; + } + if (le64_to_cpu(s->header->map[index]) != + ERST_UNSPECIFIED_RECORD_ID) { + ++count; + } + } + } + + return rc; +} + +/* + * ACPI 4.0: 17.4.1.1 Serialization Actions, also see + * ACPI 4.0: 17.4.2.2 Operations - Reading 6.c and 2.c + */ +static unsigned get_next_record_identifier(ERSTDeviceState *s, + uint64_t *record_identifier, bool first) +{ + unsigned found = 0; + unsigned index; + + /* For operations needing to return 'first' record identifier */ + if (first) { + /* Reset initial index to beginning */ + s->next_record_index = s->first_record_index; + } + index = s->next_record_index; + + *record_identifier = ERST_EMPTY_END_RECORD_ID; + + if (le32_to_cpu(s->header->record_count)) { + for (; index < s->last_record_index; ++index) { + if (le64_to_cpu(s->header->map[index]) != + ERST_UNSPECIFIED_RECORD_ID) { + /* where to start next time */ + s->next_record_index = index + 1; + *record_identifier = le64_to_cpu(s->header->map[index]); + found = 1; + break; + } + } + } + if (!found) { + /* at end (ie scan complete), reset */ + s->next_record_index = s->first_record_index; + } + + return STATUS_SUCCESS; +} + +/* ACPI 4.0: 17.4.2.3 Operations - Clearing */ +static unsigned clear_erst_record(ERSTDeviceState *s) +{ + unsigned rc = STATUS_RECORD_NOT_FOUND; + unsigned index; + + /* Check for valid record identifier */ + if (!ERST_IS_VALID_RECORD_ID(s->record_identifier)) { + return STATUS_FAILED; + } + + index = lookup_erst_record(s, s->record_identifier); + if (index) { + /* No need to wipe record, just invalidate its map entry */ + uint32_t record_count; + update_map_entry(s, index, ERST_UNSPECIFIED_RECORD_ID); + record_count = le32_to_cpu(s->header->record_count); + record_count -= 1; + s->header->record_count = cpu_to_le32(record_count); + rc = STATUS_SUCCESS; + } + + return rc; +} + +/* ACPI 4.0: 17.4.2.2 Operations - Reading */ +static unsigned read_erst_record(ERSTDeviceState *s) +{ + unsigned rc = STATUS_RECORD_NOT_FOUND; + unsigned exchange_length; + unsigned index; + + /* Check if backend storage is empty */ + if (le32_to_cpu(s->header->record_count) == 0) { + return STATUS_RECORD_STORE_EMPTY; + } + + exchange_length = memory_region_size(&s->exchange_mr); + + /* Check for record identifier of all 0s */ + if (s->record_identifier == ERST_UNSPECIFIED_RECORD_ID) { + /* Set to 'first' record in storage */ + get_next_record_identifier(s, &s->record_identifier, true); + /* record_identifier is now a valid id, or all 1s */ + } + + /* Check for record identifier of all 1s */ + if (s->record_identifier == ERST_EMPTY_END_RECORD_ID) { + return STATUS_FAILED; + } + + /* Validate record_offset */ + if (s->record_offset > (exchange_length - UEFI_CPER_RECORD_MIN_SIZE)) { + return STATUS_FAILED; + } + + index = lookup_erst_record(s, s->record_identifier); + if (index) { + uint8_t *nvram; + uint8_t *exchange; + uint32_t record_length; + + /* Obtain pointer to the exchange buffer */ + exchange = memory_region_get_ram_ptr(&s->exchange_mr); + exchange += s->record_offset; + /* Obtain pointer to slot in storage */ + nvram = get_nvram_ptr_by_index(s, index); + /* Validate CPER record_length */ + memcpy((uint8_t *)&record_length, + &nvram[UEFI_CPER_RECORD_LENGTH_OFFSET], + sizeof(uint32_t)); + record_length = le32_to_cpu(record_length); + if (record_length < UEFI_CPER_RECORD_MIN_SIZE) { + rc = STATUS_FAILED; + } + if ((s->record_offset + record_length) > exchange_length) { + rc = STATUS_FAILED; + } + /* If all is ok, copy the record to the exchange buffer */ + if (rc != STATUS_FAILED) { + memcpy(exchange, nvram, record_length); + rc = STATUS_SUCCESS; + } + } else { + /* + * See "Reading : 'The steps performed by the platform ...' 2.c" + * Set to 'first' record in storage + */ + get_next_record_identifier(s, &s->record_identifier, true); + } + + return rc; +} + +/* ACPI 4.0: 17.4.2.1 Operations - Writing */ +static unsigned write_erst_record(ERSTDeviceState *s) +{ + unsigned rc = STATUS_FAILED; + unsigned exchange_length; + unsigned index; + uint64_t record_identifier; + uint32_t record_length; + uint8_t *exchange; + uint8_t *nvram = NULL; + bool record_found = false; + + exchange_length = memory_region_size(&s->exchange_mr); + + /* Validate record_offset */ + if (s->record_offset > (exchange_length - UEFI_CPER_RECORD_MIN_SIZE)) { + return STATUS_FAILED; + } + + /* Obtain pointer to record in the exchange buffer */ + exchange = memory_region_get_ram_ptr(&s->exchange_mr); + exchange += s->record_offset; + + /* Validate CPER record_length */ + memcpy((uint8_t *)&record_length, &exchange[UEFI_CPER_RECORD_LENGTH_OFFSET], + sizeof(uint32_t)); + record_length = le32_to_cpu(record_length); + if (record_length < UEFI_CPER_RECORD_MIN_SIZE) { + return STATUS_FAILED; + } + if ((s->record_offset + record_length) > exchange_length) { + return STATUS_FAILED; + } + + /* Extract record identifier */ + memcpy((uint8_t *)&record_identifier, &exchange[UEFI_CPER_RECORD_ID_OFFSET], + sizeof(uint64_t)); + record_identifier = le64_to_cpu(record_identifier); + + /* Check for valid record identifier */ + if (!ERST_IS_VALID_RECORD_ID(record_identifier)) { + return STATUS_FAILED; + } + + index = lookup_erst_record(s, record_identifier); + if (index) { + /* Record found, overwrite existing record */ + nvram = get_nvram_ptr_by_index(s, index); + record_found = true; + } else { + /* Record not found, not an overwrite, allocate for write */ + index = find_next_empty_record_index(s); + if (index) { + nvram = get_nvram_ptr_by_index(s, index); + } else { + /* All slots are occupied */ + rc = STATUS_NOT_ENOUGH_SPACE; + } + } + if (nvram) { + /* Write the record into the slot */ + memcpy(nvram, exchange, record_length); + memset(nvram + record_length, exchange_length - record_length, 0xFF); + /* If a new record, increment the record_count */ + if (!record_found) { + uint32_t record_count; + record_count = le32_to_cpu(s->header->record_count); + record_count += 1; /* writing new record */ + s->header->record_count = cpu_to_le32(record_count); + } + update_map_entry(s, index, record_identifier); + rc = STATUS_SUCCESS; + } + + return rc; +} + +/*******************************************************************/ + +static uint64_t erst_rd_reg64(hwaddr addr, + uint64_t reg, unsigned size) +{ + uint64_t rdval; + uint64_t mask; + unsigned shift; + + if (size == sizeof(uint64_t)) { + /* 64b access */ + mask = 0xFFFFFFFFFFFFFFFFUL; + shift = 0; + } else { + /* 32b access */ + mask = 0x00000000FFFFFFFFUL; + shift = ((addr & 0x4) == 0x4) ? 32 : 0; + } + + rdval = reg; + rdval >>= shift; + rdval &= mask; + + return rdval; +} + +static uint64_t erst_wr_reg64(hwaddr addr, + uint64_t reg, uint64_t val, unsigned size) +{ + uint64_t wrval; + uint64_t mask; + unsigned shift; + + if (size == sizeof(uint64_t)) { + /* 64b access */ + mask = 0xFFFFFFFFFFFFFFFFUL; + shift = 0; + } else { + /* 32b access */ + mask = 0x00000000FFFFFFFFUL; + shift = ((addr & 0x4) == 0x4) ? 32 : 0; + } + + val &= mask; + val <<= shift; + mask <<= shift; + wrval = reg; + wrval &= ~mask; + wrval |= val; + + return wrval; +} + +static void erst_reg_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + ERSTDeviceState *s = (ERSTDeviceState *)opaque; + + /* + * NOTE: All actions/operations/side effects happen on the WRITE, + * by this implementation's design. The READs simply return the + * reg_value contents. + */ + trace_acpi_erst_reg_write(addr, val, size); + + switch (addr) { + case ERST_VALUE_OFFSET + 0: + case ERST_VALUE_OFFSET + 4: + s->reg_value = erst_wr_reg64(addr, s->reg_value, val, size); + break; + case ERST_ACTION_OFFSET + 0: + /* + * NOTE: all valid values written to this register are of the + * ACTION_* variety. Thus there is no need to make this a 64-bit + * register, 32-bits is appropriate. As such ERST_ACTION_OFFSET+4 + * is not needed. + */ + switch (val) { + case ACTION_BEGIN_WRITE_OPERATION: + case ACTION_BEGIN_READ_OPERATION: + case ACTION_BEGIN_CLEAR_OPERATION: + case ACTION_BEGIN_DUMMY_WRITE_OPERATION: + case ACTION_END_OPERATION: + s->operation = val; + break; + case ACTION_SET_RECORD_OFFSET: + s->record_offset = s->reg_value; + break; + case ACTION_EXECUTE_OPERATION: + if ((uint8_t)s->reg_value == ERST_EXECUTE_OPERATION_MAGIC) { + s->busy_status = 1; + switch (s->operation) { + case ACTION_BEGIN_WRITE_OPERATION: + s->command_status = write_erst_record(s); + break; + case ACTION_BEGIN_READ_OPERATION: + s->command_status = read_erst_record(s); + break; + case ACTION_BEGIN_CLEAR_OPERATION: + s->command_status = clear_erst_record(s); + break; + case ACTION_BEGIN_DUMMY_WRITE_OPERATION: + s->command_status = STATUS_SUCCESS; + break; + case ACTION_END_OPERATION: + s->command_status = STATUS_SUCCESS; + break; + default: + s->command_status = STATUS_FAILED; + break; + } + s->busy_status = 0; + } + break; + case ACTION_CHECK_BUSY_STATUS: + s->reg_value = s->busy_status; + break; + case ACTION_GET_COMMAND_STATUS: + s->reg_value = s->command_status; + break; + case ACTION_GET_RECORD_IDENTIFIER: + s->command_status = get_next_record_identifier(s, + &s->reg_value, false); + break; + case ACTION_SET_RECORD_IDENTIFIER: + s->record_identifier = s->reg_value; + break; + case ACTION_GET_RECORD_COUNT: + s->reg_value = le32_to_cpu(s->header->record_count); + break; + case ACTION_GET_ERROR_LOG_ADDRESS_RANGE: + s->reg_value = (hwaddr)pci_get_bar_addr(PCI_DEVICE(s), 1); + break; + case ACTION_GET_ERROR_LOG_ADDRESS_LENGTH: + s->reg_value = le32_to_cpu(s->header->record_size); + break; + case ACTION_GET_ERROR_LOG_ADDRESS_RANGE_ATTRIBUTES: + s->reg_value = 0x0; /* intentional, not NVRAM mode */ + break; + case ACTION_GET_EXECUTE_OPERATION_TIMINGS: + s->reg_value = + (100ULL << 32) | /* 100us max time */ + (10ULL << 0) ; /* 10us min time */ + break; + default: + /* Unknown action/command, NOP */ + break; + } + break; + default: + /* This should not happen, but if it does, NOP */ + break; + } +} + +static uint64_t erst_reg_read(void *opaque, hwaddr addr, + unsigned size) +{ + ERSTDeviceState *s = (ERSTDeviceState *)opaque; + uint64_t val = 0; + + switch (addr) { + case ERST_ACTION_OFFSET + 0: + case ERST_ACTION_OFFSET + 4: + val = erst_rd_reg64(addr, s->reg_action, size); + break; + case ERST_VALUE_OFFSET + 0: + case ERST_VALUE_OFFSET + 4: + val = erst_rd_reg64(addr, s->reg_value, size); + break; + default: + break; + } + trace_acpi_erst_reg_read(addr, val, size); + return val; +} + +static const MemoryRegionOps erst_reg_ops = { + .read = erst_reg_read, + .write = erst_reg_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +/*******************************************************************/ +/*******************************************************************/ +static int erst_post_load(void *opaque, int version_id) +{ + ERSTDeviceState *s = opaque; + + /* Recompute pointer to header */ + s->header = (ERSTStorageHeader *)get_nvram_ptr_by_index(s, 0); + trace_acpi_erst_post_load(s->header, le32_to_cpu(s->header->record_size)); + + return 0; +} + +static const VMStateDescription erst_vmstate = { + .name = "acpi-erst", + .version_id = 1, + .minimum_version_id = 1, + .post_load = erst_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8(operation, ERSTDeviceState), + VMSTATE_UINT8(busy_status, ERSTDeviceState), + VMSTATE_UINT8(command_status, ERSTDeviceState), + VMSTATE_UINT32(record_offset, ERSTDeviceState), + VMSTATE_UINT64(reg_action, ERSTDeviceState), + VMSTATE_UINT64(reg_value, ERSTDeviceState), + VMSTATE_UINT64(record_identifier, ERSTDeviceState), + VMSTATE_UINT32(next_record_index, ERSTDeviceState), + VMSTATE_END_OF_LIST() + } +}; + +static void erst_realizefn(PCIDevice *pci_dev, Error **errp) +{ + ERSTDeviceState *s = ACPIERST(pci_dev); + + trace_acpi_erst_realizefn_in(); + + if (!s->hostmem) { + error_setg(errp, "'" ACPI_ERST_MEMDEV_PROP "' property is not set"); + return; + } else if (host_memory_backend_is_mapped(s->hostmem)) { + error_setg(errp, "can't use already busy memdev: %s", + object_get_canonical_path_component(OBJECT(s->hostmem))); + return; + } + + s->hostmem_mr = host_memory_backend_get_memory(s->hostmem); + + /* HostMemoryBackend size will be multiple of PAGE_SIZE */ + s->storage_size = object_property_get_int(OBJECT(s->hostmem), "size", errp); + + /* Initialize backend storage and record_count */ + check_erst_backend_storage(s, errp); + + /* BAR 0: Programming registers */ + memory_region_init_io(&s->iomem_mr, OBJECT(pci_dev), &erst_reg_ops, s, + TYPE_ACPI_ERST, ERST_REG_SIZE); + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->iomem_mr); + + /* BAR 1: Exchange buffer memory */ + memory_region_init_ram(&s->exchange_mr, OBJECT(pci_dev), + "erst.exchange", + le32_to_cpu(s->header->record_size), errp); + pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, + &s->exchange_mr); + + /* Include the backend storage in the migration stream */ + vmstate_register_ram_global(s->hostmem_mr); + + trace_acpi_erst_realizefn_out(s->storage_size); +} + +static void erst_reset(DeviceState *dev) +{ + ERSTDeviceState *s = ACPIERST(dev); + + trace_acpi_erst_reset_in(le32_to_cpu(s->header->record_count)); + s->operation = 0; + s->busy_status = 0; + s->command_status = STATUS_SUCCESS; + s->record_identifier = ERST_UNSPECIFIED_RECORD_ID; + s->record_offset = 0; + s->next_record_index = s->first_record_index; + /* NOTE: first/last_record_index are computed only once */ + trace_acpi_erst_reset_out(le32_to_cpu(s->header->record_count)); +} + +static Property erst_properties[] = { + DEFINE_PROP_LINK(ACPI_ERST_MEMDEV_PROP, ERSTDeviceState, hostmem, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_UINT32(ACPI_ERST_RECORD_SIZE_PROP, ERSTDeviceState, + default_record_size, ERST_RECORD_SIZE), + DEFINE_PROP_END_OF_LIST(), +}; + +static void erst_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + trace_acpi_erst_class_init_in(); + k->realize = erst_realizefn; + k->vendor_id = PCI_VENDOR_ID_REDHAT; + k->device_id = PCI_DEVICE_ID_REDHAT_ACPI_ERST; + k->revision = 0x00; + k->class_id = PCI_CLASS_OTHERS; + dc->reset = erst_reset; + dc->vmsd = &erst_vmstate; + dc->user_creatable = true; + dc->hotpluggable = false; + device_class_set_props(dc, erst_properties); + dc->desc = "ACPI Error Record Serialization Table (ERST) device"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + trace_acpi_erst_class_init_out(); +} + +static const TypeInfo erst_type_info = { + .name = TYPE_ACPI_ERST, + .parent = TYPE_PCI_DEVICE, + .class_init = erst_class_init, + .instance_size = sizeof(ERSTDeviceState), + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + } +}; + +static void erst_register_types(void) +{ + type_register_static(&erst_type_info); +} + +type_init(erst_register_types) diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index adf6347bc4..f5b22983bb 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -22,6 +22,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_true: files('pcihp.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_false: files('acpi-pci-hotplug-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_VIOT', if_true: files('viot.c')) acpi_ss.add(when: 'CONFIG_ACPI_X86_ICH', if_true: files('ich9.c', 'tco.c')) +acpi_ss.add(when: 'CONFIG_ACPI_ERST', if_true: files('erst.c')) acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c')) acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c')) acpi_ss.add(when: 'CONFIG_TPM', if_true: files('tpm.c')) diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index 974d770e8b..2250126a22 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -55,3 +55,18 @@ piix4_gpe_writeb(uint64_t addr, unsigned width, uint64_t val) "addr: 0x%" PRIx64 # tco.c tco_timer_reload(int ticks, int msec) "ticks=%d (%d ms)" tco_timer_expired(int timeouts_no, bool strap, bool no_reboot) "timeouts_no=%d no_reboot=%d/%d" + +# erst.c +acpi_erst_reg_write(uint64_t addr, uint64_t val, unsigned size) "addr: 0x%04" PRIx64 " <== 0x%016" PRIx64 " (size: %u)" +acpi_erst_reg_read(uint64_t addr, uint64_t val, unsigned size) " addr: 0x%04" PRIx64 " ==> 0x%016" PRIx64 " (size: %u)" +acpi_erst_mem_write(uint64_t addr, uint64_t val, unsigned size) "addr: 0x%06" PRIx64 " <== 0x%016" PRIx64 " (size: %u)" +acpi_erst_mem_read(uint64_t addr, uint64_t val, unsigned size) " addr: 0x%06" PRIx64 " ==> 0x%016" PRIx64 " (size: %u)" +acpi_erst_pci_bar_0(uint64_t addr) "BAR0: 0x%016" PRIx64 +acpi_erst_pci_bar_1(uint64_t addr) "BAR1: 0x%016" PRIx64 +acpi_erst_realizefn_in(void) +acpi_erst_realizefn_out(unsigned size) "total nvram size %u bytes" +acpi_erst_reset_in(unsigned record_count) "record_count %u" +acpi_erst_reset_out(unsigned record_count) "record_count %u" +acpi_erst_post_load(void *header, unsigned slot_size) "header: 0x%p slot_size %u" +acpi_erst_class_init_in(void) +acpi_erst_class_init_out(void) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index ce823e8fcb..ebd47aa26f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -43,6 +43,7 @@ #include "sysemu/tpm.h" #include "hw/acpi/tpm.h" #include "hw/acpi/vmgenid.h" +#include "hw/acpi/erst.h" #include "sysemu/tpm_backend.h" #include "hw/rtc/mc146818rtc_regs.h" #include "migration/vmstate.h" @@ -74,6 +75,8 @@ #include "hw/acpi/hmat.h" #include "hw/acpi/viot.h" +#include CONFIG_DEVICES + /* These are used to size the ACPI tables for -M pc-i440fx-1.7 and * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows * a little bit, there should be plenty of free space since the DSDT @@ -2575,6 +2578,18 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) ACPI_DEVICE_IF(x86ms->acpi_dev), x86ms->oem_id, x86ms->oem_table_id); +#ifdef CONFIG_ACPI_ERST + { + Object *erst_dev; + erst_dev = find_erst_dev(); + if (erst_dev) { + acpi_add_table(table_offsets, tables_blob); + build_erst(tables_blob, tables->linker, erst_dev, + x86ms->oem_id, x86ms->oem_table_id); + } + } +#endif + vmgenid_dev = find_vmgenid_dev(); if (vmgenid_dev) { acpi_add_table(table_offsets, tables_blob); diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c index 196d318499..68ca7e7fc2 100644 --- a/hw/i386/acpi-microvm.c +++ b/hw/i386/acpi-microvm.c @@ -30,6 +30,7 @@ #include "hw/acpi/bios-linker-loader.h" #include "hw/acpi/generic_event_device.h" #include "hw/acpi/utils.h" +#include "hw/acpi/erst.h" #include "hw/i386/fw_cfg.h" #include "hw/i386/microvm.h" #include "hw/pci/pci.h" @@ -40,6 +41,8 @@ #include "acpi-common.h" #include "acpi-microvm.h" +#include CONFIG_DEVICES + static void acpi_dsdt_add_virtio(Aml *scope, MicrovmMachineState *mms) { @@ -207,6 +210,18 @@ static void acpi_build_microvm(AcpiBuildTables *tables, ACPI_DEVICE_IF(x86ms->acpi_dev), x86ms->oem_id, x86ms->oem_table_id); +#ifdef CONFIG_ACPI_ERST + { + Object *erst_dev; + erst_dev = find_erst_dev(); + if (erst_dev) { + acpi_add_table(table_offsets, tables_blob); + build_erst(tables_blob, tables->linker, erst_dev, + x86ms->oem_id, x86ms->oem_table_id); + } + } +#endif + xsdt = tables_blob->len; build_xsdt(tables_blob, tables->linker, table_offsets, x86ms->oem_id, x86ms->oem_table_id); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 7c7790a5ce..d9b344248d 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -357,10 +357,12 @@ static void pc_compat_1_4_fn(MachineState *machine) pc_compat_1_5_fn(machine); } +#ifdef CONFIG_ISAPC static void pc_init_isa(MachineState *machine) { pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, TYPE_I440FX_PCI_DEVICE); } +#endif #ifdef CONFIG_XEN static void pc_xen_hvm_init_pci(MachineState *machine) @@ -916,6 +918,7 @@ void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id) pci_config_set_revision(bridge_dev->config, pch_rev_id); } +#ifdef CONFIG_ISAPC static void isapc_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); @@ -935,7 +938,7 @@ static void isapc_machine_options(MachineClass *m) DEFINE_PC_MACHINE(isapc, "isapc", pc_init_isa, isapc_machine_options); - +#endif #ifdef CONFIG_XEN static void xenfv_4_2_machine_options(MachineClass *m) diff --git a/include/hw/acpi/erst.h b/include/hw/acpi/erst.h new file mode 100644 index 0000000000..b747fe7739 --- /dev/null +++ b/include/hw/acpi/erst.h @@ -0,0 +1,24 @@ +/* + * ACPI Error Record Serialization Table, ERST, Implementation + * + * ACPI ERST introduced in ACPI 4.0, June 16, 2009. + * ACPI Platform Error Interfaces : Error Serialization + * + * Copyright (c) 2021 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef HW_ACPI_ERST_H +#define HW_ACPI_ERST_H + +void build_erst(GArray *table_data, BIOSLinker *linker, Object *erst_dev, + const char *oem_id, const char *oem_table_id); + +#define TYPE_ACPI_ERST "acpi-erst" + +/* returns NULL unless there is exactly one device */ +static inline Object *find_erst_dev(void) +{ + return object_resolve_path_type("", TYPE_ACPI_ERST, NULL); +} +#endif diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 023abc0f79..c3f3c90473 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -108,6 +108,7 @@ extern bool pci_available; #define PCI_DEVICE_ID_REDHAT_MDPY 0x000f #define PCI_DEVICE_ID_REDHAT_NVME 0x0010 #define PCI_DEVICE_ID_REDHAT_PVPANIC 0x0011 +#define PCI_DEVICE_ID_REDHAT_ACPI_ERST 0x0012 #define PCI_DEVICE_ID_REDHAT_QXL 0x0100 #define FMT_PCIBUS PRIx64 diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 787f4d2d4f..47d2efc60f 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -690,6 +690,29 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { VuDevRegion *dev_region = &dev->regions[dev->nregions]; void *mmap_addr; + if (vmsg->fd_num != 1) { + vmsg_close_fds(vmsg); + vu_panic(dev, "VHOST_USER_ADD_MEM_REG received %d fds - only 1 fd " + "should be sent for this message type", vmsg->fd_num); + return false; + } + + if (vmsg->size < VHOST_USER_MEM_REG_SIZE) { + close(vmsg->fds[0]); + vu_panic(dev, "VHOST_USER_ADD_MEM_REG requires a message size of at " + "least %d bytes and only %d bytes were received", + VHOST_USER_MEM_REG_SIZE, vmsg->size); + return false; + } + + if (dev->nregions == VHOST_USER_MAX_RAM_SLOTS) { + close(vmsg->fds[0]); + vu_panic(dev, "failing attempt to hot add memory via " + "VHOST_USER_ADD_MEM_REG message because the backend has " + "no free ram slots available"); + return false; + } + /* * If we are in postcopy mode and we receive a u64 payload with a 0 value * we know all the postcopy client bases have been received, and we @@ -728,12 +751,12 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { * accessing it before we userfault. */ mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - PROT_NONE, MAP_SHARED, + PROT_NONE, MAP_SHARED | MAP_NORESERVE, vmsg->fds[0], 0); } else { mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - PROT_READ | PROT_WRITE, MAP_SHARED, vmsg->fds[0], - 0); + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, + vmsg->fds[0], 0); } if (mmap_addr == MAP_FAILED) { @@ -796,10 +819,24 @@ static inline bool reg_equal(VuDevRegion *vudev_reg, static bool vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { - int i, j; - bool found = false; - VuDevRegion shadow_regions[VHOST_USER_MAX_RAM_SLOTS] = {}; VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m; + int i; + bool found = false; + + if (vmsg->fd_num != 1) { + vmsg_close_fds(vmsg); + vu_panic(dev, "VHOST_USER_REM_MEM_REG received %d fds - only 1 fd " + "should be sent for this message type", vmsg->fd_num); + return false; + } + + if (vmsg->size < VHOST_USER_MEM_REG_SIZE) { + close(vmsg->fds[0]); + vu_panic(dev, "VHOST_USER_REM_MEM_REG requires a message size of at " + "least %d bytes and only %d bytes were received", + VHOST_USER_MEM_REG_SIZE, vmsg->size); + return false; + } DPRINT("Removing region:\n"); DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", @@ -811,35 +848,40 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { DPRINT(" mmap_offset 0x%016"PRIx64"\n", msg_region->mmap_offset); - for (i = 0, j = 0; i < dev->nregions; i++) { - if (!reg_equal(&dev->regions[i], msg_region)) { - shadow_regions[j].gpa = dev->regions[i].gpa; - shadow_regions[j].size = dev->regions[i].size; - shadow_regions[j].qva = dev->regions[i].qva; - shadow_regions[j].mmap_addr = dev->regions[i].mmap_addr; - shadow_regions[j].mmap_offset = dev->regions[i].mmap_offset; - j++; - } else { - found = true; + for (i = 0; i < dev->nregions; i++) { + if (reg_equal(&dev->regions[i], msg_region)) { VuDevRegion *r = &dev->regions[i]; void *m = (void *) (uintptr_t) r->mmap_addr; if (m) { munmap(m, r->size + r->mmap_offset); } + + /* + * Shift all affected entries by 1 to close the hole at index i and + * zero out the last entry. + */ + memmove(dev->regions + i, dev->regions + i + 1, + sizeof(VuDevRegion) * (dev->nregions - i - 1)); + memset(dev->regions + dev->nregions - 1, 0, sizeof(VuDevRegion)); + DPRINT("Successfully removed a region\n"); + dev->nregions--; + i--; + + found = true; + + /* Continue the search for eventual duplicates. */ } } if (found) { - memcpy(dev->regions, shadow_regions, - sizeof(VuDevRegion) * VHOST_USER_MAX_RAM_SLOTS); - DPRINT("Successfully removed a region\n"); - dev->nregions--; vmsg_set_reply_u64(vmsg, 0); } else { vu_panic(dev, "Specified region not found\n"); } + close(vmsg->fds[0]); + return true; } @@ -878,7 +920,7 @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) * accessing it before we userfault */ mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - PROT_NONE, MAP_SHARED, + PROT_NONE, MAP_SHARED | MAP_NORESERVE, vmsg->fds[i], 0); if (mmap_addr == MAP_FAILED) { @@ -965,7 +1007,7 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) * mapped address has to be page aligned, and we use huge * pages. */ mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - PROT_READ | PROT_WRITE, MAP_SHARED, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, vmsg->fds[i], 0); if (mmap_addr == MAP_FAILED) { diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h index 3d13dfadde..cde9f07bb3 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -129,6 +129,8 @@ typedef struct VhostUserMemoryRegion { uint64_t mmap_offset; } VhostUserMemoryRegion; +#define VHOST_USER_MEM_REG_SIZE (sizeof(VhostUserMemoryRegion)) + typedef struct VhostUserMemory { uint32_t nregions; uint32_t padding; diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 875311f795..4dab09f265 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3747,7 +3747,7 @@ static void tcg_target_init(TCGContext *s) { #ifdef CONFIG_CPUID_H unsigned a, b, c, d, b7 = 0; - int max = __get_cpuid_max(0, 0); + unsigned max = __get_cpuid_max(0, 0); if (max >= 7) { /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */ diff --git a/tests/data/acpi/microvm/ERST.pcie b/tests/data/acpi/microvm/ERST.pcie new file mode 100644 index 0000000000..a6d0cb7838 Binary files /dev/null and b/tests/data/acpi/microvm/ERST.pcie differ diff --git a/tests/data/acpi/pc/DSDT.acpierst b/tests/data/acpi/pc/DSDT.acpierst new file mode 100644 index 0000000000..bb0593eeb8 Binary files /dev/null and b/tests/data/acpi/pc/DSDT.acpierst differ diff --git a/tests/data/acpi/pc/ERST.acpierst b/tests/data/acpi/pc/ERST.acpierst new file mode 100644 index 0000000000..7965ac2562 Binary files /dev/null and b/tests/data/acpi/pc/ERST.acpierst differ diff --git a/tests/data/acpi/pc/SSDT.dimmpxm b/tests/data/acpi/pc/SSDT.dimmpxm index a50a961fa1..ac55387d57 100644 Binary files a/tests/data/acpi/pc/SSDT.dimmpxm and b/tests/data/acpi/pc/SSDT.dimmpxm differ diff --git a/tests/data/acpi/q35/DSDT.acpierst b/tests/data/acpi/q35/DSDT.acpierst new file mode 100644 index 0000000000..cad26e3f0c Binary files /dev/null and b/tests/data/acpi/q35/DSDT.acpierst differ diff --git a/tests/data/acpi/q35/ERST.acpierst b/tests/data/acpi/q35/ERST.acpierst new file mode 100644 index 0000000000..7965ac2562 Binary files /dev/null and b/tests/data/acpi/q35/ERST.acpierst differ diff --git a/tests/data/acpi/q35/FACP.slic b/tests/data/acpi/q35/FACP.slic index 891fd4b784..15986e095c 100644 Binary files a/tests/data/acpi/q35/FACP.slic and b/tests/data/acpi/q35/FACP.slic differ diff --git a/tests/data/acpi/q35/SSDT.dimmpxm b/tests/data/acpi/q35/SSDT.dimmpxm index 617a1c911c..98e6f0e3f3 100644 Binary files a/tests/data/acpi/q35/SSDT.dimmpxm and b/tests/data/acpi/q35/SSDT.dimmpxm differ diff --git a/tests/data/acpi/virt/SSDT.memhp b/tests/data/acpi/virt/SSDT.memhp index e8b850ae22..375d7b6fc8 100644 Binary files a/tests/data/acpi/virt/SSDT.memhp and b/tests/data/acpi/virt/SSDT.memhp differ diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index e6b72d9026..c4a2d1e166 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -73,7 +73,8 @@ #define OEM_ID "TEST" #define OEM_TABLE_ID "OEM" -#define OEM_TEST_ARGS "-machine x-oem-id="OEM_ID",x-oem-table-id="OEM_TABLE_ID +#define OEM_TEST_ARGS "-machine x-oem-id=" OEM_ID ",x-oem-table-id=" \ + OEM_TABLE_ID typedef struct { bool tcg_only; @@ -1446,6 +1447,57 @@ static void test_acpi_piix4_tcg_acpi_hmat(void) test_acpi_tcg_acpi_hmat(MACHINE_PC); } +static void test_acpi_erst(const char *machine) +{ + gchar *tmp_path = g_dir_make_tmp("qemu-test-erst.XXXXXX", NULL); + gchar *params; + test_data data; + + memset(&data, 0, sizeof(data)); + data.machine = machine; + data.variant = ".acpierst"; + params = g_strdup_printf( + " -object memory-backend-file,id=erstnvram," + "mem-path=%s,size=0x10000,share=on" + " -device acpi-erst,memdev=erstnvram", tmp_path); + test_acpi_one(params, &data); + free_test_data(&data); + g_free(params); + g_assert(g_rmdir(tmp_path) == 0); + g_free(tmp_path); +} + +static void test_acpi_piix4_acpi_erst(void) +{ + test_acpi_erst(MACHINE_PC); +} + +static void test_acpi_q35_acpi_erst(void) +{ + test_acpi_erst(MACHINE_Q35); +} + +static void test_acpi_microvm_acpi_erst(void) +{ + gchar *tmp_path = g_dir_make_tmp("qemu-test-erst.XXXXXX", NULL); + gchar *params; + test_data data; + + test_acpi_microvm_prepare(&data); + data.variant = ".pcie"; + data.tcg_only = true; /* need constant host-phys-bits */ + params = g_strdup_printf(" -machine microvm," + "acpi=on,ioapic2=off,rtc=off,pcie=on" + " -object memory-backend-file,id=erstnvram," + "mem-path=%s,size=0x10000,share=on" + " -device acpi-erst,memdev=erstnvram", tmp_path); + test_acpi_one(params, &data); + g_free(params); + g_assert(g_rmdir(tmp_path) == 0); + g_free(tmp_path); + free_test_data(&data); +} + static void test_acpi_virt_tcg(void) { test_data data = { @@ -1519,11 +1571,7 @@ static void test_acpi_q35_slic(void) static void test_oem_fields(test_data *data) { int i; - char oem_id[6]; - char oem_table_id[8]; - strpadcpy(oem_id, sizeof oem_id, OEM_ID, ' '); - strpadcpy(oem_table_id, sizeof oem_table_id, OEM_TABLE_ID, ' '); for (i = 0; i < data->tables->len; ++i) { AcpiSdtTable *sdt; @@ -1533,8 +1581,8 @@ static void test_oem_fields(test_data *data) continue; } - g_assert(memcmp(sdt->aml + 10, oem_id, 6) == 0); - g_assert(memcmp(sdt->aml + 16, oem_table_id, 8) == 0); + g_assert(strncmp((char *)sdt->aml + 10, OEM_ID, 6) == 0); + g_assert(strncmp((char *)sdt->aml + 16, OEM_TABLE_ID, 8) == 0); } } @@ -1675,6 +1723,8 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); qtest_add_func("acpi/piix4/acpihmat", test_acpi_piix4_tcg_acpi_hmat); qtest_add_func("acpi/q35/acpihmat", test_acpi_q35_tcg_acpi_hmat); + qtest_add_func("acpi/piix4/acpierst", test_acpi_piix4_acpi_erst); + qtest_add_func("acpi/q35/acpierst", test_acpi_q35_acpi_erst); qtest_add_func("acpi/microvm", test_acpi_microvm_tcg); qtest_add_func("acpi/microvm/usb", test_acpi_microvm_usb_tcg); qtest_add_func("acpi/microvm/rtc", test_acpi_microvm_rtc_tcg); @@ -1684,6 +1734,7 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/q35/ivrs", test_acpi_q35_tcg_ivrs); if (strcmp(arch, "x86_64") == 0) { qtest_add_func("acpi/microvm/pcie", test_acpi_microvm_pcie_tcg); + qtest_add_func("acpi/microvm/acpierst", test_acpi_microvm_acpi_erst); } } if (has_kvm) { diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c index cfca24fa94..fdd889a487 100644 --- a/tests/qtest/cdrom-test.c +++ b/tests/qtest/cdrom-test.c @@ -138,7 +138,7 @@ static void add_x86_tests(void) * Unstable CI test under load * See https://lists.gnu.org/archive/html/qemu-devel/2019-02/msg05509.html */ - if (g_test_slow()) { + if (g_test_slow() && qtest_has_machine("isapc")) { qtest_add_data_func("cdrom/boot/isapc", "-M isapc " "-drive if=ide,media=cdrom,file=", test_cdboot); } diff --git a/tests/qtest/erst-test.c b/tests/qtest/erst-test.c new file mode 100644 index 0000000000..c6a0ae4013 --- /dev/null +++ b/tests/qtest/erst-test.c @@ -0,0 +1,164 @@ +/* + * QTest testcase for acpi-erst + * + * Copyright (c) 2021 Oracle + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include +#include "libqos/libqos-pc.h" +#include "libqos/libqtest.h" +#include "qemu-common.h" + +#include "hw/pci/pci.h" + +static void save_fn(QPCIDevice *dev, int devfn, void *data) +{ + QPCIDevice **pdev = (QPCIDevice **) data; + + *pdev = dev; +} + +static QPCIDevice *get_erst_device(QPCIBus *pcibus) +{ + QPCIDevice *dev; + + dev = NULL; + qpci_device_foreach(pcibus, + PCI_VENDOR_ID_REDHAT, + PCI_DEVICE_ID_REDHAT_ACPI_ERST, + save_fn, &dev); + g_assert(dev != NULL); + + return dev; +} + +typedef struct _ERSTState { + QOSState *qs; + QPCIBar reg_bar, mem_bar; + uint64_t reg_barsize, mem_barsize; + QPCIDevice *dev; +} ERSTState; + +#define ACTION 0 +#define VALUE 8 + +static const char *reg2str(unsigned reg) +{ + switch (reg) { + case 0: + return "ACTION"; + case 8: + return "VALUE"; + default: + return NULL; + } +} + +static inline uint32_t in_reg32(ERSTState *s, unsigned reg) +{ + const char *name = reg2str(reg); + uint32_t res; + + res = qpci_io_readl(s->dev, s->reg_bar, reg); + g_test_message("*%s -> %08x", name, res); + + return res; +} + +static inline uint64_t in_reg64(ERSTState *s, unsigned reg) +{ + const char *name = reg2str(reg); + uint64_t res; + + res = qpci_io_readq(s->dev, s->reg_bar, reg); + g_test_message("*%s -> %016llx", name, (unsigned long long)res); + + return res; +} + +static inline void out_reg32(ERSTState *s, unsigned reg, uint32_t v) +{ + const char *name = reg2str(reg); + + g_test_message("%08x -> *%s", v, name); + qpci_io_writel(s->dev, s->reg_bar, reg, v); +} + +static void cleanup_vm(ERSTState *s) +{ + g_free(s->dev); + qtest_shutdown(s->qs); +} + +static void setup_vm_cmd(ERSTState *s, const char *cmd) +{ + const char *arch = qtest_get_arch(); + + if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { + s->qs = qtest_pc_boot(cmd); + } else { + g_printerr("erst-test tests are only available on x86\n"); + exit(EXIT_FAILURE); + } + s->dev = get_erst_device(s->qs->pcibus); + + s->reg_bar = qpci_iomap(s->dev, 0, &s->reg_barsize); + g_assert_cmpuint(s->reg_barsize, ==, 16); + + s->mem_bar = qpci_iomap(s->dev, 1, &s->mem_barsize); + g_assert_cmpuint(s->mem_barsize, ==, 0x2000); + + qpci_device_enable(s->dev); +} + +static void test_acpi_erst_basic(void) +{ + ERSTState state; + uint64_t log_address_range; + uint64_t log_address_length; + uint32_t log_address_attr; + + setup_vm_cmd(&state, + "-object memory-backend-file," + "mem-path=acpi-erst.XXXXXX," + "size=64K," + "share=on," + "id=nvram " + "-device acpi-erst," + "memdev=nvram"); + + out_reg32(&state, ACTION, 0xD); + log_address_range = in_reg64(&state, VALUE); + out_reg32(&state, ACTION, 0xE); + log_address_length = in_reg64(&state, VALUE); + out_reg32(&state, ACTION, 0xF); + log_address_attr = in_reg32(&state, VALUE); + + /* Check log_address_range is not 0, ~0 or base */ + g_assert_cmpuint(log_address_range, !=, 0ULL); + g_assert_cmpuint(log_address_range, !=, ~0ULL); + g_assert_cmpuint(log_address_range, !=, state.reg_bar.addr); + g_assert_cmpuint(log_address_range, ==, state.mem_bar.addr); + + /* Check log_address_length is bar1_size */ + g_assert_cmpuint(log_address_length, ==, state.mem_barsize); + + /* Check log_address_attr is 0 */ + g_assert_cmpuint(log_address_attr, ==, 0); + + cleanup_vm(&state); +} + +int main(int argc, char **argv) +{ + int ret; + + g_test_init(&argc, &argv, NULL); + qtest_add_func("/acpi-erst/basic", test_acpi_erst_basic); + ret = g_test_run(); + return ret; +} diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 842b1df420..762f6adcd5 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -68,6 +68,7 @@ qtests_i386 = \ (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + \ (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) + \ (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \ + (config_all_devices.has_key('CONFIG_ACPI_ERST') ? ['erst-test'] : []) + \ (config_all_devices.has_key('CONFIG_VIRTIO_NET') and \ config_all_devices.has_key('CONFIG_Q35') and \ config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ @@ -278,6 +279,7 @@ qtests = { 'bios-tables-test': [io, 'boot-sector.c', 'acpi-utils.c', 'tpm-emu.c'], 'cdrom-test': files('boot-sector.c'), 'dbus-vmstate-test': files('migration-helpers.c') + dbus_vmstate1, + 'erst-test': files('erst-test.c'), 'ivshmem-test': [rt, '../../contrib/ivshmem-server/ivshmem-server.c'], 'migration-test': files('migration-helpers.c'), 'pxe-test': files('boot-sector.c'), diff --git a/util/bufferiszero.c b/util/bufferiszero.c index 695bb4ce28..ec3cd4ca15 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -272,7 +272,7 @@ static void init_accel(unsigned cache) static void __attribute__((constructor)) init_cpuid_cache(void) { - int max = __get_cpuid_max(0, NULL); + unsigned max = __get_cpuid_max(0, NULL); int a, b, c, d; unsigned cache = 0; diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 9efdc74bba..ac0dbc2adc 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -683,6 +683,7 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, ret = sigaction(SIGBUS, &act, &sigbus_oldact); if (ret) { + qemu_mutex_unlock(&sigbus_mutex); error_setg_errno(errp, errno, "os_mem_prealloc: failed to install signal handler"); return;