From b8fb03785d4de097507d0cf45873525e0ac4d2b2 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 1 Sep 2016 11:39:33 -0700 Subject: [PATCH 01/21] locking/static_keys: Provide DECLARE and well as DEFINE macros We will need to provide declarations of static keys in header files. Provide DECLARE_STATIC_KEY_{TRUE,FALSE} macros. Signed-off-by: Tony Luck Acked-by: Borislav Petkov Cc: Peter Zijlstra Cc: Dan Williams Cc: Linus Torvalds Link: http://lkml.kernel.org/r/816881cf85bd3cf13385d212882618f38a3b5d33.1472754711.git.tony.luck@intel.com Signed-off-by: Thomas Gleixner --- include/linux/jump_label.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 661af564fae8..595fb46213fc 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -267,9 +267,15 @@ struct static_key_false { #define DEFINE_STATIC_KEY_TRUE(name) \ struct static_key_true name = STATIC_KEY_TRUE_INIT +#define DECLARE_STATIC_KEY_TRUE(name) \ + extern struct static_key_true name + #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT +#define DECLARE_STATIC_KEY_FALSE(name) \ + extern struct static_key_false name + extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ From 3637efb00864f465baebd49464e58319fd295b65 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 1 Sep 2016 11:39:33 -0700 Subject: [PATCH 02/21] x86/mce: Add PCI quirks to identify Xeons with machine check recovery Each Xeon includes a number of capability registers in PCI space that describe some features not enumerated by CPUID. Use these to determine that we are running on a model that can recover from machine checks. Hooks for Ivybridge ... Skylake provided. Signed-off-by: Tony Luck Acked-by: Borislav Petkov Cc: Peter Zijlstra Cc: Dan Williams Cc: Boris Petkov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/abf331dc4a3e2a2d17444129bc51127437bcf4ba.1472754711.git.tony.luck@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/string_64.h | 3 +++ arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++++ arch/x86/kernel/quirks.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 90dbbd9666d4..877a1dfbf770 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -2,6 +2,7 @@ #define _ASM_X86_STRING_64_H #ifdef __KERNEL__ +#include /* Written 2002 by Andi Kleen */ @@ -78,6 +79,8 @@ int strcmp(const char *cs, const char *ct); #define memset(s, c, n) __memset(s, c, n) #endif +DECLARE_STATIC_KEY_FALSE(mcsafe_key); + /** * memcpy_mcsafe - copy memory with indication if a machine check happened * diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 79d8ec849468..acccebcc836d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -2080,6 +2081,7 @@ void mce_disable_bank(int bank) * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. * mce=nobootlog Don't log MCEs from before booting. * mce=bios_cmci_threshold Don't program the CMCI threshold + * mce=recovery force enable memcpy_mcsafe() */ static int __init mcheck_enable(char *str) { @@ -2676,8 +2678,14 @@ static int __init mcheck_debugfs_init(void) static int __init mcheck_debugfs_init(void) { return -EINVAL; } #endif +DEFINE_STATIC_KEY_FALSE(mcsafe_key); +EXPORT_SYMBOL_GPL(mcsafe_key); + static int __init mcheck_late_init(void) { + if (mca_cfg.recovery) + static_branch_inc(&mcsafe_key); + mcheck_debugfs_init(); /* diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index cc457ff818ad..51402a7e4ca6 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -626,3 +626,34 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, amd_disable_seq_and_redirect_scrub); #endif + +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) +#include +#include + +/* Ivy Bridge, Haswell, Broadwell */ +static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) +{ + u32 capid0; + + pci_read_config_dword(pdev, 0x84, &capid0); + + if (capid0 & 0x10) + static_branch_inc(&mcsafe_key); +} + +/* Skylake */ +static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) +{ + u32 capid0; + + pci_read_config_dword(pdev, 0x84, &capid0); + + if ((capid0 & 0xc0) == 0xc0) + static_branch_inc(&mcsafe_key); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); +#endif From 9a6fb28a355d2609ace4dab4e6425442c647894d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 1 Sep 2016 11:39:33 -0700 Subject: [PATCH 03/21] x86/mce: Improve memcpy_mcsafe() Use the mcsafe_key defined in the previous patch to make decisions on which copy function to use. We can't use the FEATURE bit any more because PCI quirks run too late to affect the patching of code. So we use a static key. Turn memcpy_mcsafe() into an inline function to make life easier for callers. The assembly code that actually does the copy is now named memcpy_mcsafe_unrolled() Signed-off-by: Tony Luck Acked-by: Borislav Petkov Cc: Peter Zijlstra Cc: Dan Williams Cc: Boris Petkov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/bfde2fc774e94f53d91b70a4321c85a0d33e7118.1472754712.git.tony.luck@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pmem.h | 5 +---- arch/x86/include/asm/string_64.h | 16 +++++++++++++++- arch/x86/kernel/x8664_ksyms_64.c | 2 +- arch/x86/lib/memcpy_64.S | 6 +++--- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index 643eba42d620..2c1ebeb4d737 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -46,10 +46,7 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) { - if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) - return memcpy_mcsafe(dst, src, n); - memcpy(dst, src, n); - return 0; + return memcpy_mcsafe(dst, src, n); } /** diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 877a1dfbf770..a164862d77e3 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -79,6 +79,7 @@ int strcmp(const char *cs, const char *ct); #define memset(s, c, n) __memset(s, c, n) #endif +__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -89,10 +90,23 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key); * @cnt: number of bytes to copy * * Low level memory copy function that catches machine checks + * We only call into the "safe" function on systems that can + * actually do machine check recovery. Everyone else can just + * use memcpy(). * * Return 0 for success, -EFAULT for fail */ -int memcpy_mcsafe(void *dst, const void *src, size_t cnt); +static __always_inline __must_check int +memcpy_mcsafe(void *dst, const void *src, size_t cnt) +{ +#ifdef CONFIG_X86_MCE + if (static_branch_unlikely(&mcsafe_key)) + return memcpy_mcsafe_unrolled(dst, src, cnt); + else +#endif + memcpy(dst, src, cnt); + return 0; +} #endif /* __KERNEL__ */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 95e49f6e4fc3..b2cee3d19477 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -38,7 +38,7 @@ EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(_copy_to_user); -EXPORT_SYMBOL_GPL(memcpy_mcsafe); +EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 2ec0b0abbfaa..49e6ebac7e73 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -181,11 +181,11 @@ ENDPROC(memcpy_orig) #ifndef CONFIG_UML /* - * memcpy_mcsafe - memory copy with machine check exception handling + * memcpy_mcsafe_unrolled - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe) +ENTRY(memcpy_mcsafe_unrolled) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe) .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe) +ENDPROC(memcpy_mcsafe_unrolled) .section .fixup, "ax" /* Return -EFAULT for any failure */ From ffb173e657fa8123bffa2a169e124b4bca0b5bc4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 1 Sep 2016 11:39:33 -0700 Subject: [PATCH 04/21] x86/mce: Drop X86_FEATURE_MCE_RECOVERY and the related model string test We now have a better way to determine if we are running on a cpu that supports machine check recovery. Free up this feature bit. Signed-off-by: Tony Luck Acked-by: Borislav Petkov Cc: Peter Zijlstra Cc: Dan Williams Cc: Boris Petkov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/d5db39e08d46cf1012d94d3902275d08ba931926.1472754712.git.tony.luck@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 1 - arch/x86/kernel/cpu/mcheck/mce.c | 11 ----------- 2 files changed, 12 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308b96f6..1188bc849ee3 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -106,7 +106,6 @@ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index acccebcc836d..7f3f0e147242 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1634,17 +1634,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model == 45) quirk_no_way_out = quirk_sandybridge_ifu; - /* - * MCG_CAP.MCG_SER_P is necessary but not sufficient to know - * whether this processor will actually generate recoverable - * machine checks. Check to see if this is an E7 model Xeon. - * We can't do a model number check because E5 and E7 use the - * same model number. E5 doesn't support recovery, E7 does. - */ - if (mca_cfg.recovery || (mca_cfg.ser && - !strncmp(c->x86_model_id, - "Intel(R) Xeon(R) CPU E7-", 24))) - set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY); } if (cfg->monarch_timeout < 0) cfg->monarch_timeout = 0; From 74ab0e7a836a7df772af50cac21267eb43688841 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:27 +0200 Subject: [PATCH 05/21] x86/mce/AMD: Use msr_ops.misc() in allocate_threshold_blocks() Change MSR_IA32_MCx_MISC() macro to msr_ops.misc() because SMCA machines define a different set of MSRs and msr_ops will give you the correct MISC register. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1468269447-8808-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 7b7f3be783d4..78b7681f7f66 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -869,7 +869,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank)); + err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); if (!err) goto out; From db819d60f6720080150a365080ff656cf239f88f Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:28 +0200 Subject: [PATCH 06/21] x86/mce: Add support for new MCA_SYND register Syndrome information is no longer contained in MCA_STATUS for SMCA systems but in a new register - MCA_SYND. Add a synd field to struct mce to hold MCA_SYND register value. Add it to the end of struct mce to maintain compatibility with old versions of mcelog. Also, add it to the respective tracepoint. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1467633035-32080-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 5 ++++- arch/x86/include/uapi/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++ arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 +++ include/trace/events/mce.h | 6 ++++-- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 8bf766ef0e18..21bc5a3a4c89 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -40,9 +40,10 @@ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ /* AMD-specific bits */ +#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ +#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */ #define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */ #define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ -#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ /* * McaX field if set indicates a given bank supports MCA extensions: @@ -110,6 +111,7 @@ #define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 #define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 +#define MSR_AMD64_SMCA_MC0_SYND 0xc0002006 #define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 #define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a @@ -119,6 +121,7 @@ #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 2184943341bf..8c75fbc94c3f 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -26,6 +26,7 @@ struct mce { __u32 socketid; /* CPU socket ID */ __u32 apicid; /* CPU initial apic ID */ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ + __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7f3f0e147242..91a179b95fd0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -569,6 +569,7 @@ static void mce_read_aux(struct mce *m, int i) { if (m->status & MCI_STATUS_MISCV) m->misc = mce_rdmsrl(msr_ops.misc(i)); + if (m->status & MCI_STATUS_ADDRV) { m->addr = mce_rdmsrl(msr_ops.addr(i)); @@ -581,6 +582,9 @@ static void mce_read_aux(struct mce *m, int i) m->addr <<= shift; } } + + if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV)) + m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); } static bool memory_error(struct mce *m) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 78b7681f7f66..419e0ee3b12f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -479,6 +479,9 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) if (m.status & MCI_STATUS_ADDRV) rdmsrl(msr_addr, m.addr); + if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV)) + rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); + mce_log(&m); wrmsrl(msr_status, 0); diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index 4cbbcef6baa8..8be5268caf28 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -20,6 +20,7 @@ TRACE_EVENT(mce_record, __field( u64, status ) __field( u64, addr ) __field( u64, misc ) + __field( u64, synd ) __field( u64, ip ) __field( u64, tsc ) __field( u64, walltime ) @@ -38,6 +39,7 @@ TRACE_EVENT(mce_record, __entry->status = m->status; __entry->addr = m->addr; __entry->misc = m->misc; + __entry->synd = m->synd; __entry->ip = m->ip; __entry->tsc = m->tsc; __entry->walltime = m->time; @@ -50,11 +52,11 @@ TRACE_EVENT(mce_record, __entry->cpuvendor = m->cpuvendor; ), - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", __entry->cpu, __entry->mcgcap, __entry->mcgstatus, __entry->bank, __entry->status, - __entry->addr, __entry->misc, + __entry->addr, __entry->misc, __entry->synd, __entry->cs, __entry->ip, __entry->tsc, __entry->cpuvendor, __entry->cpuid, From b300e87300b68120aa5374341b252875a1cb6ea1 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:29 +0200 Subject: [PATCH 07/21] EDAC/mce_amd: Print syndrome register value on SMCA systems Print SyndV bit status and print the raw value of the MCA_SYND register. Further decoding of the syndrome from struct mce.synd can be done in other places where appropriate, e.g. DRAM ECC. Boris: make the error stanza more compact by putting the error address and syndrome on the same line: [Hardware Error]: Corrected error, no action required. [Hardware Error]: CPU:2 (17:0:0) MC4_STATUS[-|CE|-|PCC|AddrV|-|-|SyndV|CECC]: 0x96204100001e0117 [Hardware Error]: Error Addr: 0x000000007f4c52e3, Syndrome: 0x0000000000000000 [Hardware Error]: Invalid IP block specified. [Hardware Error]: cache level: L3/GEN, tx: DATA, mem-tx: RD Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1467633035-32080-2-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- drivers/edac/mce_amd.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 9b6800a79c7f..057ece577800 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -927,7 +927,7 @@ static void decode_smca_errors(struct mce *m) size_t len; if (rdmsr_safe(addr, &low, &high)) { - pr_emerg("Invalid IP block specified, error information is unreliable.\n"); + pr_emerg(HW_ERR "Invalid IP block specified.\n"); return; } @@ -1078,6 +1078,8 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) u32 low, high; u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); + pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-")); + if (!rdmsr_safe(addr, &low, &high) && (low & MCI_CONFIG_MCAX)) pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-")); @@ -1091,12 +1093,18 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_cont("]: 0x%016llx\n", m->status); if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr); + pr_emerg(HW_ERR "Error Addr: 0x%016llx", m->addr); if (boot_cpu_has(X86_FEATURE_SMCA)) { + if (m->status & MCI_STATUS_SYNDV) + pr_cont(", Syndrome: 0x%016llx", m->synd); + + pr_cont("\n"); + decode_smca_errors(m); goto err_code; - } + } else + pr_cont("\n"); if (!fam_ops) goto err_code; From bad744b7f29d264c2c2ad8fb723dd480e6c9b007 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:30 +0200 Subject: [PATCH 08/21] x86/RAS: Add syndrome support to mce_amd_inj Add a debugfs file which holds the error syndrome (written into MCA_SYND) of an injected error. Only write it on SMCA systems. Update README file, while at it. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1467633035-32080-3-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/ras/mce_amd_inj.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 1104515d5ad2..ff8eb1a9ce6d 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -68,6 +68,7 @@ static int inj_##reg##_set(void *data, u64 val) \ MCE_INJECT_SET(status); MCE_INJECT_SET(misc); MCE_INJECT_SET(addr); +MCE_INJECT_SET(synd); #define MCE_INJECT_GET(reg) \ static int inj_##reg##_get(void *data, u64 *val) \ @@ -81,10 +82,12 @@ static int inj_##reg##_get(void *data, u64 *val) \ MCE_INJECT_GET(status); MCE_INJECT_GET(misc); MCE_INJECT_GET(addr); +MCE_INJECT_GET(synd); DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n"); DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n"); DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n"); /* * Caller needs to be make sure this cpu doesn't disappear @@ -258,6 +261,7 @@ static void prepare_msrs(void *info) } wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc); + wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), i_mce.synd); } else { wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status); wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr); @@ -275,6 +279,9 @@ static void do_inject(void) if (i_mce.misc) i_mce.status |= MCI_STATUS_MISCV; + if (i_mce.synd) + i_mce.status |= MCI_STATUS_SYNDV; + if (inj_type == SW_INJ) { mce_inject_log(&i_mce); return; @@ -371,6 +378,9 @@ static const char readme_msg[] = "\t used for error thresholding purposes and its validity is indicated by\n" "\t MCi_STATUS[MiscV].\n" "\n" +"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n" +"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n" +"\n" "addr:\t Error address value to be written to MCi_ADDR. Log address information\n" "\t associated with the error.\n" "\n" @@ -420,6 +430,7 @@ static struct dfs_node { { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR }, + { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR }, From cfee4f6f0b2026380c6bc6913dbd27943df17371 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:31 +0200 Subject: [PATCH 09/21] x86/mce/AMD: Read MSRs on the CPU allocating the threshold blocks Scalable MCA systems allow non-core MCA banks to only be accessible by certain CPUs. The MSRs for these banks are Read-as-Zero on other CPUs. During allocate_threshold_blocks(), get_block_address() can be scheduled on CPUs other than the one allocating the block. This causes the MSRs to be read on the wrong CPU and results in incorrect behavior. Add a @cpu parameter to get_block_address() and pass this in to ensure that the MSRs are only read on the CPU that is allocating the block. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1472673994-12235-2-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 419e0ee3b12f..9da92fb2e073 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -293,7 +293,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } -static u32 get_block_address(u32 current_addr, u32 low, u32 high, +static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block) { u32 addr = 0, offset = 0; @@ -309,13 +309,13 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, */ u32 low, high; - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) return addr; if (!(low & MCI_CONFIG_MCAX)) return addr; - if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && + if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && (low & MASK_BLKPTR_LO)) addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); } @@ -421,12 +421,12 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, void mce_amd_feature_init(struct cpuinfo_x86 *c) { u32 low = 0, high = 0, address = 0; - unsigned int bank, block; + unsigned int bank, block, cpu = smp_processor_id(); int offset = -1; for (bank = 0; bank < mca_cfg.banks; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { - address = get_block_address(address, low, high, bank, block); + address = get_block_address(cpu, address, low, high, bank, block); if (!address) break; @@ -544,15 +544,14 @@ static void amd_deferred_error_interrupt(void) static void amd_threshold_interrupt(void) { u32 low = 0, high = 0, address = 0; - int cpu = smp_processor_id(); - unsigned int bank, block; + unsigned int bank, block, cpu = smp_processor_id(); /* assume first bank caused it */ for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; for (block = 0; block < NR_BLOCKS; ++block) { - address = get_block_address(address, low, high, bank, block); + address = get_block_address(cpu, address, low, high, bank, block); if (!address) break; @@ -774,7 +773,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (err) goto out_free; recurse: - address = get_block_address(address, low, high, bank, ++block); + address = get_block_address(cpu, address, low, high, bank, ++block); if (!address) return 0; From c019b951e1f9f1de0c5b0726032e3adf34c523a7 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:32 +0200 Subject: [PATCH 10/21] EDAC/mce_amd: Add missing SMCA error descriptions Add missing SMCA error descriptions to the error descriptions arrays. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1472673994-12235-3-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- drivers/edac/mce_amd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 057ece577800..455cd49d6253 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -185,6 +185,8 @@ static const char * const f17h_if_mce_desc[] = { "BPQ snoop parity on Thread 1", "L1 BTB multi-match error", "L2 BTB multi-match error", + "L2 Cache Response Poison error", + "System Read Data error", }; static const char * const f17h_l2_mce_desc[] = { @@ -198,6 +200,7 @@ static const char * const f17h_de_mce_desc[] = { "uop cache tag parity error", "uop cache data parity error", "Insn buffer parity error", + "uop queue parity error", "Insn dispatch queue parity error", "Fetch address FIFO parity", "Patch RAM data parity", @@ -214,6 +217,9 @@ static const char * const f17h_ex_mce_desc[] = { "EX payload parity", "Checkpoint queue parity", "Retire dispatch queue parity", + "Retire status queue parity error", + "Scheduling queue parity error", + "Branch buffer queue parity error", }; static const char * const f17h_fp_mce_desc[] = { @@ -223,6 +229,7 @@ static const char * const f17h_fp_mce_desc[] = { "NSQ parity error", "Retire queue parity", "Status register file parity", + "Hardware assertion", }; static const char * const f17h_l3_mce_desc[] = { From 856095b1794be487527771dbd2fe28e34e94b266 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:33 +0200 Subject: [PATCH 11/21] EDAC/mce_amd: Use SMCA prefix for error descriptions arrays The error descriptions defined for Fam17h can be reused for other SMCA systems, so their names should reflect this. Change f17h prefix to smca for error descriptions. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1472673994-12235-4-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- drivers/edac/mce_amd.c | 80 +++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 455cd49d6253..ea549a94361b 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -148,12 +148,12 @@ static const char * const mc6_mce_desc[] = { }; /* Scalable MCA error strings */ -static const char * const f17h_ls_mce_desc[] = { +static const char * const smca_ls_mce_desc[] = { "Load queue parity", "Store queue parity", "Miss address buffer payload parity", "L1 TLB parity", - "", /* reserved */ + "Reserved", "DC tag error type 6", "DC tag error type 1", "Internal error type 1", @@ -172,7 +172,7 @@ static const char * const f17h_ls_mce_desc[] = { "L2 fill data error", }; -static const char * const f17h_if_mce_desc[] = { +static const char * const smca_if_mce_desc[] = { "microtag probe port parity error", "IC microtag or full tag multi-hit error", "IC full tag parity", @@ -189,14 +189,14 @@ static const char * const f17h_if_mce_desc[] = { "System Read Data error", }; -static const char * const f17h_l2_mce_desc[] = { +static const char * const smca_l2_mce_desc[] = { "L2M tag multi-way-hit error", "L2M tag ECC error", "L2M data ECC error", "HW assert", }; -static const char * const f17h_de_mce_desc[] = { +static const char * const smca_de_mce_desc[] = { "uop cache tag parity error", "uop cache data parity error", "Insn buffer parity error", @@ -208,7 +208,7 @@ static const char * const f17h_de_mce_desc[] = { "uop buffer parity" }; -static const char * const f17h_ex_mce_desc[] = { +static const char * const smca_ex_mce_desc[] = { "Watchdog timeout error", "Phy register file parity", "Flag register file parity", @@ -222,7 +222,7 @@ static const char * const f17h_ex_mce_desc[] = { "Branch buffer queue parity error", }; -static const char * const f17h_fp_mce_desc[] = { +static const char * const smca_fp_mce_desc[] = { "Physical register file parity", "Freelist parity error", "Schedule queue parity", @@ -232,7 +232,7 @@ static const char * const f17h_fp_mce_desc[] = { "Hardware assertion", }; -static const char * const f17h_l3_mce_desc[] = { +static const char * const smca_l3_mce_desc[] = { "Shadow tag macro ECC error", "Shadow tag macro multi-way-hit error", "L3M tag ECC error", @@ -243,7 +243,7 @@ static const char * const f17h_l3_mce_desc[] = { "L3 HW assert", }; -static const char * const f17h_cs_mce_desc[] = { +static const char * const smca_cs_mce_desc[] = { "Illegal request from transport layer", "Address violation", "Security violation", @@ -255,14 +255,14 @@ static const char * const f17h_cs_mce_desc[] = { "ECC error on probe filter access", }; -static const char * const f17h_pie_mce_desc[] = { +static const char * const smca_pie_mce_desc[] = { "HW assert", "Internal PIE register security violation", "Error on GMI link", "Poison data written to internal PIE register", }; -static const char * const f17h_umc_mce_desc[] = { +static const char * const smca_umc_mce_desc[] = { "DRAM ECC error", "Data poison error on DRAM", "SDP parity error", @@ -271,15 +271,15 @@ static const char * const f17h_umc_mce_desc[] = { "Write data CRC error", }; -static const char * const f17h_pb_mce_desc[] = { +static const char * const smca_pb_mce_desc[] = { "Parameter Block RAM ECC error", }; -static const char * const f17h_psp_mce_desc[] = { +static const char * const smca_psp_mce_desc[] = { "PSP RAM ECC or parity error", }; -static const char * const f17h_smu_mce_desc[] = { +static const char * const smca_smu_mce_desc[] = { "SMU RAM ECC or parity error", }; @@ -837,8 +837,8 @@ static void decode_f17h_core_errors(const char *ip_name, u8 xec, switch (mca_type) { case SMCA_LS: - error_desc_array = f17h_ls_mce_desc; - len = ARRAY_SIZE(f17h_ls_mce_desc) - 1; + error_desc_array = smca_ls_mce_desc; + len = ARRAY_SIZE(smca_ls_mce_desc) - 1; if (xec == 0x4) { pr_cont("Unrecognized LS MCA error code.\n"); @@ -847,33 +847,33 @@ static void decode_f17h_core_errors(const char *ip_name, u8 xec, break; case SMCA_IF: - error_desc_array = f17h_if_mce_desc; - len = ARRAY_SIZE(f17h_if_mce_desc) - 1; + error_desc_array = smca_if_mce_desc; + len = ARRAY_SIZE(smca_if_mce_desc) - 1; break; case SMCA_L2_CACHE: - error_desc_array = f17h_l2_mce_desc; - len = ARRAY_SIZE(f17h_l2_mce_desc) - 1; + error_desc_array = smca_l2_mce_desc; + len = ARRAY_SIZE(smca_l2_mce_desc) - 1; break; case SMCA_DE: - error_desc_array = f17h_de_mce_desc; - len = ARRAY_SIZE(f17h_de_mce_desc) - 1; + error_desc_array = smca_de_mce_desc; + len = ARRAY_SIZE(smca_de_mce_desc) - 1; break; case SMCA_EX: - error_desc_array = f17h_ex_mce_desc; - len = ARRAY_SIZE(f17h_ex_mce_desc) - 1; + error_desc_array = smca_ex_mce_desc; + len = ARRAY_SIZE(smca_ex_mce_desc) - 1; break; case SMCA_FP: - error_desc_array = f17h_fp_mce_desc; - len = ARRAY_SIZE(f17h_fp_mce_desc) - 1; + error_desc_array = smca_fp_mce_desc; + len = ARRAY_SIZE(smca_fp_mce_desc) - 1; break; case SMCA_L3_CACHE: - error_desc_array = f17h_l3_mce_desc; - len = ARRAY_SIZE(f17h_l3_mce_desc) - 1; + error_desc_array = smca_l3_mce_desc; + len = ARRAY_SIZE(smca_l3_mce_desc) - 1; break; default: @@ -899,13 +899,13 @@ static void decode_df_errors(u8 xec, unsigned int mca_type) switch (mca_type) { case SMCA_CS: - error_desc_array = f17h_cs_mce_desc; - len = ARRAY_SIZE(f17h_cs_mce_desc) - 1; + error_desc_array = smca_cs_mce_desc; + len = ARRAY_SIZE(smca_cs_mce_desc) - 1; break; case SMCA_PIE: - error_desc_array = f17h_pie_mce_desc; - len = ARRAY_SIZE(f17h_pie_mce_desc) - 1; + error_desc_array = smca_pie_mce_desc; + len = ARRAY_SIZE(smca_pie_mce_desc) - 1; break; default: @@ -963,23 +963,23 @@ static void decode_smca_errors(struct mce *m) break; case SMCA_UMC: - error_desc_array = f17h_umc_mce_desc; - len = ARRAY_SIZE(f17h_umc_mce_desc) - 1; + error_desc_array = smca_umc_mce_desc; + len = ARRAY_SIZE(smca_umc_mce_desc) - 1; break; case SMCA_PB: - error_desc_array = f17h_pb_mce_desc; - len = ARRAY_SIZE(f17h_pb_mce_desc) - 1; + error_desc_array = smca_pb_mce_desc; + len = ARRAY_SIZE(smca_pb_mce_desc) - 1; break; case SMCA_PSP: - error_desc_array = f17h_psp_mce_desc; - len = ARRAY_SIZE(f17h_psp_mce_desc) - 1; + error_desc_array = smca_psp_mce_desc; + len = ARRAY_SIZE(smca_psp_mce_desc) - 1; break; case SMCA_SMU: - error_desc_array = f17h_smu_mce_desc; - len = ARRAY_SIZE(f17h_smu_mce_desc) - 1; + error_desc_array = smca_smu_mce_desc; + len = ARRAY_SIZE(smca_smu_mce_desc) - 1; break; default: From 5896820e0aa32572ad03b30563c539655b6c6375 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:34 +0200 Subject: [PATCH 12/21] x86/mce/AMD, EDAC/mce_amd: Define and use tables for known SMCA IP types Scalable MCA defines a number of IP types. An MCA bank on an SMCA system is defined as one of these IP types. A bank's type is uniquely identified by the combination of the HWID and MCATYPE values read from its MCA_IPID register. Add the required tables in order to be able to lookup error descriptions based on a bank's type and the error's extended error code. [ bp: Align comments, simplify a bit. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1472741832-1690-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 63 +++++---- arch/x86/kernel/cpu/mcheck/mce_amd.c | 102 ++++++++++---- drivers/edac/mce_amd.c | 198 +++++---------------------- 3 files changed, 149 insertions(+), 214 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 21bc5a3a4c89..9bd7ff5ffbcc 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -337,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected, * Scalable MCA. */ #ifdef CONFIG_X86_MCE_AMD -enum amd_ip_types { - SMCA_F17H_CORE = 0, /* Core errors */ - SMCA_DF, /* Data Fabric */ - SMCA_UMC, /* Unified Memory Controller */ - SMCA_PB, /* Parameter Block */ - SMCA_PSP, /* Platform Security Processor */ - SMCA_SMU, /* System Management Unit */ - N_AMD_IP_TYPES -}; -struct amd_hwid { - const char *name; - unsigned int hwid; -}; - -extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES]; - -enum amd_core_mca_blocks { +/* These may be used by multiple smca_hwid_mcatypes */ +enum smca_bank_types { SMCA_LS = 0, /* Load Store */ SMCA_IF, /* Instruction Fetch */ - SMCA_L2_CACHE, /* L2 cache */ - SMCA_DE, /* Decoder unit */ - RES, /* Reserved */ - SMCA_EX, /* Execution unit */ + SMCA_L2_CACHE, /* L2 Cache */ + SMCA_DE, /* Decoder Unit */ + SMCA_EX, /* Execution Unit */ SMCA_FP, /* Floating Point */ - SMCA_L3_CACHE, /* L3 cache */ - N_CORE_MCA_BLOCKS + SMCA_L3_CACHE, /* L3 Cache */ + SMCA_CS, /* Coherent Slave */ + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ + SMCA_SMU, /* System Management Unit */ + N_SMCA_BANK_TYPES }; -extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS]; - -enum amd_df_mca_blocks { - SMCA_CS = 0, /* Coherent Slave */ - SMCA_PIE, /* Power management, Interrupts, etc */ - N_DF_BLOCKS +struct smca_bank_name { + const char *name; /* Short name for sysfs */ + const char *long_name; /* Long name for pretty-printing */ }; -extern const char * const amd_df_mcablock_names[N_DF_BLOCKS]; +extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES]; + +#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) + +struct smca_hwid_mcatype { + unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ + u32 hwid_mcatype; /* (hwid,mcatype) tuple */ + u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ +}; + +struct smca_bank_info { + struct smca_hwid_mcatype *type; + u32 type_instance; +}; + +extern struct smca_bank_info smca_banks[MAX_NR_BANKS]; + #endif #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9da92fb2e073..3b74b62d0808 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -63,34 +63,55 @@ static const char * const th_names[] = { "execution_unit", }; -/* Define HWID to IP type mappings for Scalable MCA */ -struct amd_hwid amd_hwids[] = { - [SMCA_F17H_CORE] = { "f17h_core", 0xB0 }, - [SMCA_DF] = { "data_fabric", 0x2E }, - [SMCA_UMC] = { "umc", 0x96 }, - [SMCA_PB] = { "param_block", 0x5 }, - [SMCA_PSP] = { "psp", 0xFF }, - [SMCA_SMU] = { "smu", 0x1 }, +struct smca_bank_name smca_bank_names[] = { + [SMCA_LS] = { "load_store", "Load Store Unit" }, + [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, + [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, + [SMCA_DE] = { "decode_unit", "Decode Unit" }, + [SMCA_EX] = { "execution_unit", "Execution Unit" }, + [SMCA_FP] = { "floating_point", "Floating Point Unit" }, + [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, + [SMCA_CS] = { "coherent_slave", "Coherent Slave" }, + [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, + [SMCA_UMC] = { "umc", "Unified Memory Controller" }, + [SMCA_PB] = { "param_block", "Parameter Block" }, + [SMCA_PSP] = { "psp", "Platform Security Processor" }, + [SMCA_SMU] = { "smu", "System Management Unit" }, }; -EXPORT_SYMBOL_GPL(amd_hwids); +EXPORT_SYMBOL_GPL(smca_bank_names); -const char * const amd_core_mcablock_names[] = { - [SMCA_LS] = "load_store", - [SMCA_IF] = "insn_fetch", - [SMCA_L2_CACHE] = "l2_cache", - [SMCA_DE] = "decode_unit", - [RES] = "", - [SMCA_EX] = "execution_unit", - [SMCA_FP] = "floating_point", - [SMCA_L3_CACHE] = "l3_cache", -}; -EXPORT_SYMBOL_GPL(amd_core_mcablock_names); +static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { + /* { bank_type, hwid_mcatype, xec_bitmap } */ -const char * const amd_df_mcablock_names[] = { - [SMCA_CS] = "coherent_slave", - [SMCA_PIE] = "pie", + /* ZN Core (HWID=0xB0) MCA types */ + { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF }, + { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, + { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, + { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, + /* HWID 0xB0 MCATYPE 0x4 is Reserved */ + { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF }, + { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F }, + { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF }, + + /* Data Fabric MCA types */ + { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF }, + { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF }, + + /* Unified Memory Controller MCA type */ + { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F }, + + /* Parameter Block MCA type */ + { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 }, + + /* Platform Security Processor MCA type */ + { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 }, + + /* System Management Unit MCA type */ + { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, }; -EXPORT_SYMBOL_GPL(amd_df_mcablock_names); + +struct smca_bank_info smca_banks[MAX_NR_BANKS]; +EXPORT_SYMBOL_GPL(smca_banks); static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ @@ -108,6 +129,36 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; * CPU Initialization */ +static void get_smca_bank_info(unsigned int bank) +{ + unsigned int i, hwid_mcatype, cpu = smp_processor_id(); + struct smca_hwid_mcatype *type; + u32 high, instanceId; + u16 hwid, mcatype; + + /* Collect bank_info using CPU 0 for now. */ + if (cpu) + return; + + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) { + pr_warn("Failed to read MCA_IPID for bank %d\n", bank); + return; + } + + hwid = high & MCI_IPID_HWID; + mcatype = (high & MCI_IPID_MCATYPE) >> 16; + hwid_mcatype = HWID_MCATYPE(hwid, mcatype); + + for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { + type = &smca_hwid_mcatypes[i]; + if (hwid_mcatype == type->hwid_mcatype) { + smca_banks[bank].type = type; + smca_banks[bank].type_instance = instanceId; + break; + } + } +} + struct thresh_restart { struct threshold_block *b; int reset; @@ -425,6 +476,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) int offset = -1; for (bank = 0; bank < mca_cfg.banks; ++bank) { + if (mce_flags.smca) + get_smca_bank_info(bank); + for (block = 0; block < NR_BLOCKS; ++block) { address = get_block_address(cpu, address, low, high, bank, block); if (!address) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index ea549a94361b..99b3bf3f4182 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -283,6 +283,27 @@ static const char * const smca_smu_mce_desc[] = { "SMU RAM ECC or parity error", }; +struct smca_mce_desc { + const char * const *descs; + unsigned int num_descs; +}; + +static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, + [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, + [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, + [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, + [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) }, + [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, + [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, + [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, + [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, + [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, + [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, + [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, + [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, +}; + static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -827,175 +848,32 @@ static void decode_mc6_mce(struct mce *m) pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); } -static void decode_f17h_core_errors(const char *ip_name, u8 xec, - unsigned int mca_type) -{ - const char * const *error_desc_array; - size_t len; - - pr_emerg(HW_ERR "%s Error: ", ip_name); - - switch (mca_type) { - case SMCA_LS: - error_desc_array = smca_ls_mce_desc; - len = ARRAY_SIZE(smca_ls_mce_desc) - 1; - - if (xec == 0x4) { - pr_cont("Unrecognized LS MCA error code.\n"); - return; - } - break; - - case SMCA_IF: - error_desc_array = smca_if_mce_desc; - len = ARRAY_SIZE(smca_if_mce_desc) - 1; - break; - - case SMCA_L2_CACHE: - error_desc_array = smca_l2_mce_desc; - len = ARRAY_SIZE(smca_l2_mce_desc) - 1; - break; - - case SMCA_DE: - error_desc_array = smca_de_mce_desc; - len = ARRAY_SIZE(smca_de_mce_desc) - 1; - break; - - case SMCA_EX: - error_desc_array = smca_ex_mce_desc; - len = ARRAY_SIZE(smca_ex_mce_desc) - 1; - break; - - case SMCA_FP: - error_desc_array = smca_fp_mce_desc; - len = ARRAY_SIZE(smca_fp_mce_desc) - 1; - break; - - case SMCA_L3_CACHE: - error_desc_array = smca_l3_mce_desc; - len = ARRAY_SIZE(smca_l3_mce_desc) - 1; - break; - - default: - pr_cont("Corrupted MCA core error info.\n"); - return; - } - - if (xec > len) { - pr_cont("Unrecognized %s MCA bank error code.\n", - amd_core_mcablock_names[mca_type]); - return; - } - - pr_cont("%s.\n", error_desc_array[xec]); -} - -static void decode_df_errors(u8 xec, unsigned int mca_type) -{ - const char * const *error_desc_array; - size_t len; - - pr_emerg(HW_ERR "Data Fabric Error: "); - - switch (mca_type) { - case SMCA_CS: - error_desc_array = smca_cs_mce_desc; - len = ARRAY_SIZE(smca_cs_mce_desc) - 1; - break; - - case SMCA_PIE: - error_desc_array = smca_pie_mce_desc; - len = ARRAY_SIZE(smca_pie_mce_desc) - 1; - break; - - default: - pr_cont("Corrupted MCA Data Fabric info.\n"); - return; - } - - if (xec > len) { - pr_cont("Unrecognized %s MCA bank error code.\n", - amd_df_mcablock_names[mca_type]); - return; - } - - pr_cont("%s.\n", error_desc_array[xec]); -} - /* Decode errors according to Scalable MCA specification */ static void decode_smca_errors(struct mce *m) { - u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank); - unsigned int hwid, mca_type, i; - u8 xec = XEC(m->status, xec_mask); - const char * const *error_desc_array; + struct smca_hwid_mcatype *type; + unsigned int bank_type; const char *ip_name; - u32 low, high; - size_t len; + u8 xec = XEC(m->status, xec_mask); - if (rdmsr_safe(addr, &low, &high)) { - pr_emerg(HW_ERR "Invalid IP block specified.\n"); + if (m->bank >= ARRAY_SIZE(smca_banks)) return; - } - hwid = high & MCI_IPID_HWID; - mca_type = (high & MCI_IPID_MCATYPE) >> 16; - - pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low); - - /* - * Based on hwid and mca_type values, decode errors from respective IPs. - * Note: mca_type values make sense only in the context of an hwid. - */ - for (i = 0; i < ARRAY_SIZE(amd_hwids); i++) - if (amd_hwids[i].hwid == hwid) - break; - - switch (i) { - case SMCA_F17H_CORE: - ip_name = (mca_type == SMCA_L3_CACHE) ? - "L3 Cache" : "F17h Core"; - return decode_f17h_core_errors(ip_name, xec, mca_type); - break; - - case SMCA_DF: - return decode_df_errors(xec, mca_type); - break; - - case SMCA_UMC: - error_desc_array = smca_umc_mce_desc; - len = ARRAY_SIZE(smca_umc_mce_desc) - 1; - break; - - case SMCA_PB: - error_desc_array = smca_pb_mce_desc; - len = ARRAY_SIZE(smca_pb_mce_desc) - 1; - break; - - case SMCA_PSP: - error_desc_array = smca_psp_mce_desc; - len = ARRAY_SIZE(smca_psp_mce_desc) - 1; - break; - - case SMCA_SMU: - error_desc_array = smca_smu_mce_desc; - len = ARRAY_SIZE(smca_smu_mce_desc) - 1; - break; - - default: - pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid); + type = smca_banks[m->bank].type; + if (!type) return; + + bank_type = type->bank_type; + ip_name = smca_bank_names[bank_type].long_name; + + pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); + + /* Only print the decode of valid error codes */ + if (xec < smca_mce_descs[bank_type].num_descs && + (type->xec_bitmap & BIT_ULL(xec))) { + pr_emerg(HW_ERR "%s Error: ", ip_name); + pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]); } - - ip_name = amd_hwids[i].name; - pr_emerg(HW_ERR "%s Error: ", ip_name); - - if (xec > len) { - pr_cont("Unrecognized %s MCA bank error code.\n", ip_name); - return; - } - - pr_cont("%s.\n", error_desc_array[xec]); } static inline void amd_decode_err_code(u16 ec) From 87a6d4091bd795b43d684bfc87253e04a263af1c Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:35 +0200 Subject: [PATCH 13/21] x86/mce/AMD: Update sysfs bank names for SMCA systems Define a bank's sysfs filename based on its IP type and InstanceId. Credits go to Aravind for: * The general idea and proto- get_name(). * Defining smca_umc_block_names[] and buf_mcatype[]. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: Aravind Gopalakrishnan Link: http://lkml.kernel.org/r/1473193490-3291-2-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 49 ++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 3b74b62d0808..0f9d0786bc97 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,11 @@ static const char * const th_names[] = { "execution_unit", }; +static const char * const smca_umc_block_names[] = { + "dram_ecc", + "misc_umc" +}; + struct smca_bank_name smca_bank_names[] = { [SMCA_LS] = { "load_store", "Load Store Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, @@ -113,6 +119,17 @@ static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { struct smca_bank_info smca_banks[MAX_NR_BANKS]; EXPORT_SYMBOL_GPL(smca_banks); +/* + * In SMCA enabled processors, we can have multiple banks for a given IP type. + * So to define a unique name for each bank, we use a temp c-string to append + * the MCA_IPID[InstanceId] to type's name in get_name(). + * + * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN + * is greater than 8 plus 1 (for underscore) plus length of longest type name. + */ +#define MAX_MCATYPE_NAME_LEN 30 +static char buf_mcatype[MAX_MCATYPE_NAME_LEN]; + static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ @@ -769,6 +786,34 @@ static struct kobj_type threshold_ktype = { .default_attrs = default_attrs, }; +static const char *get_name(unsigned int bank, struct threshold_block *b) +{ + unsigned int bank_type; + + if (!mce_flags.smca) { + if (b && bank == 4) + return bank4_names(b); + + return th_names[bank]; + } + + if (!smca_banks[bank].type) + return NULL; + + bank_type = smca_banks[bank].type->bank_type; + + if (b && bank_type == SMCA_UMC) { + if (b->block < ARRAY_SIZE(smca_umc_block_names)) + return smca_umc_block_names[b->block]; + return NULL; + } + + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, + "%s_%x", smca_bank_names[bank_type].name, + smca_banks[bank].type_instance); + return buf_mcatype; +} + static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, unsigned int block, u32 address) { @@ -823,7 +868,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, err = kobject_init_and_add(&b->kobj, &threshold_ktype, per_cpu(threshold_banks, cpu)[bank]->kobj, - (bank == 4 ? bank4_names(b) : th_names[bank])); + get_name(bank, b)); if (err) goto out_free; recurse: @@ -878,7 +923,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) struct device *dev = per_cpu(mce_device, cpu); struct amd_northbridge *nb = NULL; struct threshold_bank *b = NULL; - const char *name = th_names[bank]; + const char *name = get_name(bank, NULL); int err = 0; if (is_shared_bank(bank)) { From 66ef269dbbe45e264ccf7146d5db32b04478d148 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:36 +0200 Subject: [PATCH 14/21] x86/mce/AMD: Ensure the deferred error interrupt is of type APIC on SMCA systems The Deferred Error Interrupt Type is set per bank on Scalable MCA systems. This is done in a bitfield in the MCA_CONFIG register of each bank. We should set its type to APIC-based interrupt and not assume BIOS has set it for us. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1472737486-1720-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 0f9d0786bc97..16766e09c2b7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -463,6 +463,20 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, */ smca_high &= ~BIT(2); + /* + * SMCA sets the Deferred Error Interrupt type per bank. + * + * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us + * if the DeferredIntType bit field is available. + * + * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the + * high portion of the MSR). OS should set this to 0x1 to enable + * APIC based interrupt. First, check that no interrupt has been + * set. + */ + if ((smca_low & BIT(5)) && !((smca_high >> 5) & 0x3)) + smca_high |= BIT(5); + wrmsr(smca_addr, smca_low, smca_high); } From 5828c46f2c07b97d758da6dc6afd5c374768d44d Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:37 +0200 Subject: [PATCH 15/21] x86/mce/AMD: Save MCA_IPID in MCE struct on SMCA systems The MCA_IPID register uniquely identifies a bank's type and instance on Scalable MCA systems. We should save the value of this register in struct mce along with the other relevant error information. This ensures that we can decode errors without relying on system software to correlate the bank to the type. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1472680624-34221-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/include/uapi/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++-- arch/x86/kernel/cpu/mcheck/mce_amd.c | 8 ++++++-- include/trace/events/mce.h | 5 ++++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 8c75fbc94c3f..69a6e07e3149 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -27,6 +27,7 @@ struct mce { __u32 apicid; /* CPU initial apic ID */ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ + __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 91a179b95fd0..17e9ff011c0e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -583,8 +583,12 @@ static void mce_read_aux(struct mce *m, int i) } } - if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV)) - m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); + if (mce_flags.smca) { + m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i)); + + if (m->status & MCI_STATUS_SYNDV) + m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); + } } static bool memory_error(struct mce *m) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 16766e09c2b7..d2f92ab5322f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -564,8 +564,12 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) if (m.status & MCI_STATUS_ADDRV) rdmsrl(msr_addr, m.addr); - if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV)) - rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); + if (mce_flags.smca) { + rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid); + + if (m.status & MCI_STATUS_SYNDV) + rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); + } mce_log(&m); diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index 8be5268caf28..70f02149808c 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -21,6 +21,7 @@ TRACE_EVENT(mce_record, __field( u64, addr ) __field( u64, misc ) __field( u64, synd ) + __field( u64, ipid ) __field( u64, ip ) __field( u64, tsc ) __field( u64, walltime ) @@ -40,6 +41,7 @@ TRACE_EVENT(mce_record, __entry->addr = m->addr; __entry->misc = m->misc; __entry->synd = m->synd; + __entry->ipid = m->ipid; __entry->ip = m->ip; __entry->tsc = m->tsc; __entry->walltime = m->time; @@ -52,10 +54,11 @@ TRACE_EVENT(mce_record, __entry->cpuvendor = m->cpuvendor; ), - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", __entry->cpu, __entry->mcgcap, __entry->mcgstatus, __entry->bank, __entry->status, + __entry->ipid, __entry->addr, __entry->misc, __entry->synd, __entry->cs, __entry->ip, __entry->tsc, From 4b711f92c9b21878794597997ecda1428acc334c Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:38 +0200 Subject: [PATCH 16/21] x86/mce, EDAC/mce_amd: Print MCA_SYND and MCA_IPID during MCE on SMCA systems The MCA_SYND and MCA_IPID registers contain valuable information and should be included in MCE output. The MCA_SYND register contains syndrome and other error information, and the MCA_IPID register will uniquely identify the MCA bank's type without having to rely on system software. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1472680624-34221-2-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 7 +++++++ drivers/edac/mce_amd.c | 2 ++ 2 files changed, 9 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 17e9ff011c0e..7d905e3d58a2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -293,6 +293,13 @@ static void print_mce(struct mce *m) if (m->misc) pr_cont("MISC %llx ", m->misc); + if (mce_flags.smca) { + if (m->synd) + pr_cont("SYND %llx ", m->synd); + if (m->ipid) + pr_cont("IPID %llx ", m->ipid); + } + pr_cont("\n"); /* * Note this output is parsed by external tools and old fields diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 99b3bf3f4182..e8855a4f92d9 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -984,6 +984,8 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (m->status & MCI_STATUS_SYNDV) pr_cont(", Syndrome: 0x%016llx", m->synd); + pr_cont(", IPID: 0x%016llx", m->ipid); + pr_cont("\n"); decode_smca_errors(m); From 4f29b73bae158e3635b8f289f77376b054904ef5 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:39 +0200 Subject: [PATCH 17/21] x86/mce/AMD: Extract the error address on SMCA systems The MCA_ADDR registers on Scalable MCA systems contain the ErrorAddr in bits [55:0] and the least significant bit of the address in bits [61:56]. We should extract the valid ErrorAddr bits from the MCA_ADDR register rather than saving the raw value to struct mce. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1473275643-1721-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 10 ++++++++++ arch/x86/kernel/cpu/mcheck/mce_amd.c | 13 ++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7d905e3d58a2..a7fdf453d895 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -588,6 +588,16 @@ static void mce_read_aux(struct mce *m, int i) m->addr >>= shift; m->addr <<= shift; } + + /* + * Extract [55:] where lsb is the least significant + * *valid* bit of the address bits. + */ + if (mce_flags.smca) { + u8 lsb = (m->addr >> 56) & 0x3f; + + m->addr &= GENMASK_ULL(55, lsb); + } } if (mce_flags.smca) { diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index d2f92ab5322f..9b5403462936 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -561,9 +561,20 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) if (threshold_err) m.misc = misc; - if (m.status & MCI_STATUS_ADDRV) + if (m.status & MCI_STATUS_ADDRV) { rdmsrl(msr_addr, m.addr); + /* + * Extract [55:] where lsb is the least significant + * *valid* bit of the address bits. + */ + if (mce_flags.smca) { + u8 lsb = (m.addr >> 56) & 0x3f; + + m.addr &= GENMASK_ULL(55, lsb); + } + } + if (mce_flags.smca) { rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid); From a884675b873a0185d2626d1f304987c94cef6d74 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:40 +0200 Subject: [PATCH 18/21] x86/MCE/AMD, EDAC: Handle reserved bank 4 on Fam17h properly Bank 4 is reserved on family 0x17 and shouldn't generate any MCE records. However, broken hardware and software is not something unheard of so warn about bank 4 errors. They shouldn't be coming from bank 4 naturally but users can still use mce_amd_inj to simulate errors from it for testing purposed. Also, avoid special handling in the injector mce_amd_inj like it is being done on the older families. [ bp: Rewrite commit message and merge into one patch. Use boot_cpu_data. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Reviewed-by: Aravind Gopalakrishnan Link: http://lkml.kernel.org/r/1473384591-5323-1-git-send-email-Yazen.Ghannam@amd.com Link: http://lkml.kernel.org/r/1473384591-5323-2-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/ras/mce_amd_inj.c | 4 +++- drivers/edac/mce_amd.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index ff8eb1a9ce6d..f4b442cc8a3e 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -308,7 +308,9 @@ static void do_inject(void) * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for * Fam10h and later BKDGs. */ - if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) { + if (static_cpu_has(X86_FEATURE_AMD_DCM) && + b == 4 && + boot_cpu_data.x86 < 0x17) { toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu)); cpu = get_nbc_for_node(amd_get_nb_id(cpu)); } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index e8855a4f92d9..daaac2c79ca7 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -859,6 +859,9 @@ static void decode_smca_errors(struct mce *m) if (m->bank >= ARRAY_SIZE(smca_banks)) return; + if (boot_cpu_data.x86 >= 0x17 && m->bank == 4) + pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n"); + type = smca_banks[m->bank].type; if (!type) return; From 7cc4ef8ed132e72ba44804cae3ddb2587ff757d6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 12 Sep 2016 09:59:41 +0200 Subject: [PATCH 19/21] x86/RAS/mce_amd_inj: Fix some W= warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In particular: arch/x86/ras/mce_amd_inj.c: In function ‘prepare_msrs’: arch/x86/ras/mce_amd_inj.c:249:13: warning: declaration of ‘i_mce’ shadows a global declaration [-Wshadow] struct mce i_mce = *(struct mce *)info; ^~~~~ arch/x86/ras/mce_amd_inj.c: In function ‘init_mce_inject’: arch/x86/ras/mce_amd_inj.c:453:16: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) { Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20160912075941.24699-16-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/ras/mce_amd_inj.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index f4b442cc8a3e..cd318d93099e 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -246,28 +246,27 @@ static void toggle_nb_mca_mst_cpu(u16 nid) static void prepare_msrs(void *info) { - struct mce i_mce = *(struct mce *)info; - u8 b = i_mce.bank; + struct mce m = *(struct mce *)info; + u8 b = m.bank; - wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus); + wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); if (boot_cpu_has(X86_FEATURE_SMCA)) { - if (i_mce.inject_flags == DFR_INT_INJ) { - wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status); - wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr); + if (m.inject_flags == DFR_INT_INJ) { + wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status); + wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr); } else { - wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status); - wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr); + wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status); + wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr); } - wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc); - wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), i_mce.synd); + wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc); + wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd); } else { - wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status); - wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr); - wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc); + wrmsrl(MSR_IA32_MCx_STATUS(b), m.status); + wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr); + wrmsrl(MSR_IA32_MCx_MISC(b), m.misc); } - } static void do_inject(void) @@ -441,7 +440,7 @@ static struct dfs_node { static int __init init_mce_inject(void) { - int i; + unsigned int i; u64 cap; rdmsrl(MSR_IA32_MCG_CAP, cap); From 8b44f00f8c952ab6eb658090383571b2ec7d253f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 26 Sep 2016 10:31:51 +0200 Subject: [PATCH 20/21] x86/RAS/mce_amd_inj: Fix signed wrap around when decrementing index 'i' Change predecrement compare to post decrement compare to avoid an unsigned integer wrap-around comparisomn when decrementing in the while loop. For example, if the debugfs_create_file() fails when 'i' is zero, the current situation will predecrement 'i' in the while loop, wrapping 'i' to the maximum signed integer and cause multiple out of bounds reads on dfs_fls[i].d as the loop interates to zero. Also, as Borislav Petkov suggested, return -ENODEV rather than -ENOMEM on the error condition. Signed-off-by: Colin Ian King Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yazen Ghannam Link: http://lkml.kernel.org/r/20160926083152.30848-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/ras/mce_amd_inj.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index cd318d93099e..20b227f63407 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -464,13 +464,13 @@ static int __init init_mce_inject(void) return 0; err_dfs_add: - while (--i >= 0) + while (i-- > 0) debugfs_remove(dfs_fls[i].d); debugfs_remove(dfs_inj); dfs_inj = NULL; - return -ENOMEM; + return -ENODEV; } static void __exit exit_mce_inject(void) From b199ac6c4943aa0db246163bf6b483e2bb53431b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 26 Sep 2016 10:31:52 +0200 Subject: [PATCH 21/21] x86/RAS/mce_amd_inj: Remove debugfs dir recursively on exit Simplify exit_mce_inject() by using debugfs_remove_recursive() and do away with the noodling over the dentry elements. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160926083152.30848-3-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/ras/mce_amd_inj.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 20b227f63407..1ac76479c266 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -475,15 +475,11 @@ static int __init init_mce_inject(void) static void __exit exit_mce_inject(void) { - int i; - for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) - debugfs_remove(dfs_fls[i].d); + debugfs_remove_recursive(dfs_inj); + dfs_inj = NULL; memset(&dfs_fls, 0, sizeof(dfs_fls)); - - debugfs_remove(dfs_inj); - dfs_inj = NULL; } module_init(init_mce_inject); module_exit(exit_mce_inject);