From d91525eb8ee6a622ce476955fe1a2530ade87c83 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 10 Dec 2014 13:53:26 -0800 Subject: [PATCH 1/3] ACPI, EINJ: Enhance error injection tolerance level Some BIOSes utilize PCI MMCFG space read/write opertion to trigger specific errors. EINJ will report errors as below when hitting such cases: APEI: Can not request [mem 0x83f990a0-0x83f990a3] for APEI EINJ Trigger registers It is because on x86 platform ACPI based PCI MMCFG logic has reserved all MMCFG spaces so that EINJ can't reserve it again. We already trust the ACPI/APEI code when using the EINJ interface so it is not a big leap to also trust it to access the right MMCFG addresses. Skip address checking to allow the access. Signed-off-by: Chen, Gong Signed-off-by: Tony Luck --- arch/x86/pci/mmconfig-shared.c | 28 ++++++++++++++++++++++++++++ drivers/acpi/apei/apei-base.c | 32 ++++++++++++++++++++++++++------ 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 326198a4434e..676e5e04e4d4 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -610,6 +610,32 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) return 0; } +#ifdef CONFIG_ACPI_APEI +extern int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size, + void *data), void *data); + +static int pci_mmcfg_for_each_region(int (*func)(__u64 start, __u64 size, + void *data), void *data) +{ + struct pci_mmcfg_region *cfg; + int rc; + + if (list_empty(&pci_mmcfg_list)) + return 0; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + rc = func(cfg->res.start, resource_size(&cfg->res), data); + if (rc) + return rc; + } + + return 0; +} +#define set_apei_filter() (arch_apei_filter_addr = pci_mmcfg_for_each_region) +#else +#define set_apei_filter() +#endif + static void __init __pci_mmcfg_init(int early) { pci_mmcfg_reject_broken(early); @@ -644,6 +670,8 @@ void __init pci_mmcfg_early_init(void) else acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); __pci_mmcfg_init(1); + + set_apei_filter(); } } diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 2cd7bdd6c8b3..a85ac07f3da3 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -449,7 +449,7 @@ int apei_resources_sub(struct apei_resources *resources1, } EXPORT_SYMBOL_GPL(apei_resources_sub); -static int apei_get_nvs_callback(__u64 start, __u64 size, void *data) +static int apei_get_res_callback(__u64 start, __u64 size, void *data) { struct apei_resources *resources = data; return apei_res_add(&resources->iomem, start, size); @@ -457,7 +457,15 @@ static int apei_get_nvs_callback(__u64 start, __u64 size, void *data) static int apei_get_nvs_resources(struct apei_resources *resources) { - return acpi_nvs_for_each_region(apei_get_nvs_callback, resources); + return acpi_nvs_for_each_region(apei_get_res_callback, resources); +} + +int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size, + void *data), void *data); +static int apei_get_arch_resources(struct apei_resources *resources) + +{ + return arch_apei_filter_addr(apei_get_res_callback, resources); } /* @@ -470,7 +478,7 @@ int apei_resources_request(struct apei_resources *resources, { struct apei_res *res, *res_bak = NULL; struct resource *r; - struct apei_resources nvs_resources; + struct apei_resources nvs_resources, arch_res; int rc; rc = apei_resources_sub(resources, &apei_resources_all); @@ -485,10 +493,20 @@ int apei_resources_request(struct apei_resources *resources, apei_resources_init(&nvs_resources); rc = apei_get_nvs_resources(&nvs_resources); if (rc) - goto res_fini; + goto nvs_res_fini; rc = apei_resources_sub(resources, &nvs_resources); if (rc) - goto res_fini; + goto nvs_res_fini; + + if (arch_apei_filter_addr) { + apei_resources_init(&arch_res); + rc = apei_get_arch_resources(&arch_res); + if (rc) + goto arch_res_fini; + rc = apei_resources_sub(resources, &arch_res); + if (rc) + goto arch_res_fini; + } rc = -EINVAL; list_for_each_entry(res, &resources->iomem, list) { @@ -536,7 +554,9 @@ int apei_resources_request(struct apei_resources *resources, break; release_mem_region(res->start, res->end - res->start); } -res_fini: +arch_res_fini: + apei_resources_fini(&arch_res); +nvs_res_fini: apei_resources_fini(&nvs_resources); return rc; } From 6c80f87ed4483bff0ecbf4455a1b09e7f950e9ab Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 21 Dec 2014 08:18:25 -0800 Subject: [PATCH 2/3] x86, mce: Improve timeout error messages There are four different possible types of timeouts. Distinguish them in the logs to help debug them. Signed-off-by: Andy Lutomirski Link: http://lkml.kernel.org/r/0fa6d2653a54a01c48b43a3583caf950ea99606e.1419178397.git.luto@amacapital.net Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d2c611699cd9..323e02c658af 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -311,7 +311,7 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } -static void mce_panic(char *msg, struct mce *final, char *exp) +static void mce_panic(const char *msg, struct mce *final, char *exp) { int i, apei_err = 0; @@ -735,7 +735,7 @@ static atomic_t mce_callin; /* * Check if a timeout waiting for other CPUs happened. */ -static int mce_timed_out(u64 *t) +static int mce_timed_out(u64 *t, const char *msg) { /* * The others already did panic for some reason. @@ -750,8 +750,7 @@ static int mce_timed_out(u64 *t) goto out; if ((s64)*t < SPINUNIT) { if (mca_cfg.tolerant <= 1) - mce_panic("Timeout synchronizing machine check over CPUs", - NULL, NULL); + mce_panic(msg, NULL, NULL); cpu_missing = 1; return 1; } @@ -867,7 +866,8 @@ static int mce_start(int *no_way_out) * Wait for everyone. */ while (atomic_read(&mce_callin) != cpus) { - if (mce_timed_out(&timeout)) { + if (mce_timed_out(&timeout, + "Timeout: Not all CPUs entered broadcast exception handler")) { atomic_set(&global_nwo, 0); return -1; } @@ -892,7 +892,8 @@ static int mce_start(int *no_way_out) * only seen by one CPU before cleared, avoiding duplicates. */ while (atomic_read(&mce_executing) < order) { - if (mce_timed_out(&timeout)) { + if (mce_timed_out(&timeout, + "Timeout: Subject CPUs unable to finish machine check processing")) { atomic_set(&global_nwo, 0); return -1; } @@ -936,7 +937,8 @@ static int mce_end(int order) * loops. */ while (atomic_read(&mce_executing) <= cpus) { - if (mce_timed_out(&timeout)) + if (mce_timed_out(&timeout, + "Timeout: Monarch CPU unable to finish machine check processing")) goto reset; ndelay(SPINUNIT); } @@ -949,7 +951,8 @@ static int mce_end(int order) * Subject: Wait for Monarch to finish. */ while (atomic_read(&mce_executing) != 0) { - if (mce_timed_out(&timeout)) + if (mce_timed_out(&timeout, + "Timeout: Monarch CPU did not finish machine check processing")) goto reset; ndelay(SPINUNIT); } From 83737691e586cd2767fa4fc87cd41251d1a49e9e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 22 Dec 2014 21:04:31 +0100 Subject: [PATCH 3/3] x86, mce: Fix sparse errors Make stuff used in mce.c only, static. Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 323e02c658af..4c5cd7575d31 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -115,7 +115,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. */ -ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) @@ -529,7 +529,7 @@ static void mce_schedule_work(void) schedule_work(this_cpu_ptr(&mce_work)); } -DEFINE_PER_CPU(struct irq_work, mce_irq_work); +static DEFINE_PER_CPU(struct irq_work, mce_irq_work); static void mce_irq_work_cb(struct irq_work *entry) { @@ -1012,7 +1012,7 @@ static void mce_clear_state(unsigned long *toclear) */ #define MCE_INFO_MAX 16 -struct mce_info { +static struct mce_info { atomic_t inuse; struct task_struct *t; __u64 paddr;