mirror of https://gitee.com/openkylin/linux.git
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar: "The main changes were: - Lots of enhancements for AMD SMCA (Scalable MCA features/extensions) systems: extract, decode and print more hardware error information and add matching support on the injection/testing side as well. (Yazn Ghannam) - Various MCE handling improvements on modern Intel Xeons. (Tony Luck) - Plus misc fixes and enhancements" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) x86/RAS/mce_amd_inj: Remove debugfs dir recursively on exit x86/RAS/mce_amd_inj: Fix signed wrap around when decrementing index 'i' x86/RAS/mce_amd_inj: Fix some W= warnings x86/MCE/AMD, EDAC: Handle reserved bank 4 on Fam17h properly x86/mce/AMD: Extract the error address on SMCA systems x86/mce, EDAC/mce_amd: Print MCA_SYND and MCA_IPID during MCE on SMCA systems x86/mce/AMD: Save MCA_IPID in MCE struct on SMCA systems x86/mce/AMD: Ensure the deferred error interrupt is of type APIC on SMCA systems x86/mce/AMD: Update sysfs bank names for SMCA systems x86/mce/AMD, EDAC/mce_amd: Define and use tables for known SMCA IP types EDAC/mce_amd: Use SMCA prefix for error descriptions arrays EDAC/mce_amd: Add missing SMCA error descriptions x86/mce/AMD: Read MSRs on the CPU allocating the threshold blocks x86/RAS: Add syndrome support to mce_amd_inj EDAC/mce_amd: Print syndrome register value on SMCA systems x86/mce: Add support for new MCA_SYND register x86/mce/AMD: Use msr_ops.misc() in allocate_threshold_blocks() x86/mce: Drop X86_FEATURE_MCE_RECOVERY and the related model string test x86/mce: Improve memcpy_mcsafe() x86/mce: Add PCI quirks to identify Xeons with machine check recovery ...
This commit is contained in:
commit
e606d81d2d
|
@ -106,7 +106,6 @@
|
|||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
|
||||
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
|
||||
#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
|
||||
|
|
|
@ -40,9 +40,10 @@
|
|||
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
|
||||
|
||||
/* AMD-specific bits */
|
||||
#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
|
||||
#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */
|
||||
#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
|
||||
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
|
||||
#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
|
||||
|
||||
/*
|
||||
* McaX field if set indicates a given bank supports MCA extensions:
|
||||
|
@ -110,6 +111,7 @@
|
|||
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
|
||||
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
|
||||
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
|
||||
#define MSR_AMD64_SMCA_MC0_SYND 0xc0002006
|
||||
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
|
||||
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
|
||||
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
|
||||
|
@ -119,6 +121,7 @@
|
|||
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
|
||||
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
|
||||
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
|
||||
#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x))
|
||||
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
|
||||
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
|
||||
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
|
||||
|
@ -334,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected,
|
|||
* Scalable MCA.
|
||||
*/
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
enum amd_ip_types {
|
||||
SMCA_F17H_CORE = 0, /* Core errors */
|
||||
SMCA_DF, /* Data Fabric */
|
||||
SMCA_UMC, /* Unified Memory Controller */
|
||||
SMCA_PB, /* Parameter Block */
|
||||
SMCA_PSP, /* Platform Security Processor */
|
||||
SMCA_SMU, /* System Management Unit */
|
||||
N_AMD_IP_TYPES
|
||||
};
|
||||
|
||||
struct amd_hwid {
|
||||
const char *name;
|
||||
unsigned int hwid;
|
||||
};
|
||||
|
||||
extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
|
||||
|
||||
enum amd_core_mca_blocks {
|
||||
/* These may be used by multiple smca_hwid_mcatypes */
|
||||
enum smca_bank_types {
|
||||
SMCA_LS = 0, /* Load Store */
|
||||
SMCA_IF, /* Instruction Fetch */
|
||||
SMCA_L2_CACHE, /* L2 cache */
|
||||
SMCA_DE, /* Decoder unit */
|
||||
RES, /* Reserved */
|
||||
SMCA_EX, /* Execution unit */
|
||||
SMCA_L2_CACHE, /* L2 Cache */
|
||||
SMCA_DE, /* Decoder Unit */
|
||||
SMCA_EX, /* Execution Unit */
|
||||
SMCA_FP, /* Floating Point */
|
||||
SMCA_L3_CACHE, /* L3 cache */
|
||||
N_CORE_MCA_BLOCKS
|
||||
SMCA_L3_CACHE, /* L3 Cache */
|
||||
SMCA_CS, /* Coherent Slave */
|
||||
SMCA_PIE, /* Power, Interrupts, etc. */
|
||||
SMCA_UMC, /* Unified Memory Controller */
|
||||
SMCA_PB, /* Parameter Block */
|
||||
SMCA_PSP, /* Platform Security Processor */
|
||||
SMCA_SMU, /* System Management Unit */
|
||||
N_SMCA_BANK_TYPES
|
||||
};
|
||||
|
||||
extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
|
||||
|
||||
enum amd_df_mca_blocks {
|
||||
SMCA_CS = 0, /* Coherent Slave */
|
||||
SMCA_PIE, /* Power management, Interrupts, etc */
|
||||
N_DF_BLOCKS
|
||||
struct smca_bank_name {
|
||||
const char *name; /* Short name for sysfs */
|
||||
const char *long_name; /* Long name for pretty-printing */
|
||||
};
|
||||
|
||||
extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
|
||||
extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
|
||||
|
||||
#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
|
||||
|
||||
struct smca_hwid_mcatype {
|
||||
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
|
||||
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
|
||||
u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
|
||||
};
|
||||
|
||||
struct smca_bank_info {
|
||||
struct smca_hwid_mcatype *type;
|
||||
u32 type_instance;
|
||||
};
|
||||
|
||||
extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_MCE_H */
|
||||
|
|
|
@ -46,10 +46,7 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
|
|||
|
||||
static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
|
||||
return memcpy_mcsafe(dst, src, n);
|
||||
memcpy(dst, src, n);
|
||||
return 0;
|
||||
return memcpy_mcsafe(dst, src, n);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#define _ASM_X86_STRING_64_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
/* Written 2002 by Andi Kleen */
|
||||
|
||||
|
@ -78,6 +79,9 @@ int strcmp(const char *cs, const char *ct);
|
|||
#define memset(s, c, n) __memset(s, c, n)
|
||||
#endif
|
||||
|
||||
__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
|
||||
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
|
||||
|
||||
/**
|
||||
* memcpy_mcsafe - copy memory with indication if a machine check happened
|
||||
*
|
||||
|
@ -86,10 +90,23 @@ int strcmp(const char *cs, const char *ct);
|
|||
* @cnt: number of bytes to copy
|
||||
*
|
||||
* Low level memory copy function that catches machine checks
|
||||
* We only call into the "safe" function on systems that can
|
||||
* actually do machine check recovery. Everyone else can just
|
||||
* use memcpy().
|
||||
*
|
||||
* Return 0 for success, -EFAULT for fail
|
||||
*/
|
||||
int memcpy_mcsafe(void *dst, const void *src, size_t cnt);
|
||||
static __always_inline __must_check int
|
||||
memcpy_mcsafe(void *dst, const void *src, size_t cnt)
|
||||
{
|
||||
#ifdef CONFIG_X86_MCE
|
||||
if (static_branch_unlikely(&mcsafe_key))
|
||||
return memcpy_mcsafe_unrolled(dst, src, cnt);
|
||||
else
|
||||
#endif
|
||||
memcpy(dst, src, cnt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@ struct mce {
|
|||
__u32 socketid; /* CPU socket ID */
|
||||
__u32 apicid; /* CPU initial apic ID */
|
||||
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
|
||||
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
|
||||
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
|
||||
};
|
||||
|
||||
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include <linux/debugfs.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/traps.h>
|
||||
|
@ -292,6 +293,13 @@ static void print_mce(struct mce *m)
|
|||
if (m->misc)
|
||||
pr_cont("MISC %llx ", m->misc);
|
||||
|
||||
if (mce_flags.smca) {
|
||||
if (m->synd)
|
||||
pr_cont("SYND %llx ", m->synd);
|
||||
if (m->ipid)
|
||||
pr_cont("IPID %llx ", m->ipid);
|
||||
}
|
||||
|
||||
pr_cont("\n");
|
||||
/*
|
||||
* Note this output is parsed by external tools and old fields
|
||||
|
@ -568,6 +576,7 @@ static void mce_read_aux(struct mce *m, int i)
|
|||
{
|
||||
if (m->status & MCI_STATUS_MISCV)
|
||||
m->misc = mce_rdmsrl(msr_ops.misc(i));
|
||||
|
||||
if (m->status & MCI_STATUS_ADDRV) {
|
||||
m->addr = mce_rdmsrl(msr_ops.addr(i));
|
||||
|
||||
|
@ -579,6 +588,23 @@ static void mce_read_aux(struct mce *m, int i)
|
|||
m->addr >>= shift;
|
||||
m->addr <<= shift;
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract [55:<lsb>] where lsb is the least significant
|
||||
* *valid* bit of the address bits.
|
||||
*/
|
||||
if (mce_flags.smca) {
|
||||
u8 lsb = (m->addr >> 56) & 0x3f;
|
||||
|
||||
m->addr &= GENMASK_ULL(55, lsb);
|
||||
}
|
||||
}
|
||||
|
||||
if (mce_flags.smca) {
|
||||
m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
|
||||
|
||||
if (m->status & MCI_STATUS_SYNDV)
|
||||
m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1633,17 +1659,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
|||
|
||||
if (c->x86 == 6 && c->x86_model == 45)
|
||||
quirk_no_way_out = quirk_sandybridge_ifu;
|
||||
/*
|
||||
* MCG_CAP.MCG_SER_P is necessary but not sufficient to know
|
||||
* whether this processor will actually generate recoverable
|
||||
* machine checks. Check to see if this is an E7 model Xeon.
|
||||
* We can't do a model number check because E5 and E7 use the
|
||||
* same model number. E5 doesn't support recovery, E7 does.
|
||||
*/
|
||||
if (mca_cfg.recovery || (mca_cfg.ser &&
|
||||
!strncmp(c->x86_model_id,
|
||||
"Intel(R) Xeon(R) CPU E7-", 24)))
|
||||
set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
|
||||
}
|
||||
if (cfg->monarch_timeout < 0)
|
||||
cfg->monarch_timeout = 0;
|
||||
|
@ -2080,6 +2095,7 @@ void mce_disable_bank(int bank)
|
|||
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
|
||||
* mce=nobootlog Don't log MCEs from before booting.
|
||||
* mce=bios_cmci_threshold Don't program the CMCI threshold
|
||||
* mce=recovery force enable memcpy_mcsafe()
|
||||
*/
|
||||
static int __init mcheck_enable(char *str)
|
||||
{
|
||||
|
@ -2676,8 +2692,14 @@ static int __init mcheck_debugfs_init(void)
|
|||
static int __init mcheck_debugfs_init(void) { return -EINVAL; }
|
||||
#endif
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(mcsafe_key);
|
||||
EXPORT_SYMBOL_GPL(mcsafe_key);
|
||||
|
||||
static int __init mcheck_late_init(void)
|
||||
{
|
||||
if (mca_cfg.recovery)
|
||||
static_branch_inc(&mcsafe_key);
|
||||
|
||||
mcheck_debugfs_init();
|
||||
|
||||
/*
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/amd_nb.h>
|
||||
#include <asm/apic.h>
|
||||
|
@ -63,34 +64,71 @@ static const char * const th_names[] = {
|
|||
"execution_unit",
|
||||
};
|
||||
|
||||
/* Define HWID to IP type mappings for Scalable MCA */
|
||||
struct amd_hwid amd_hwids[] = {
|
||||
[SMCA_F17H_CORE] = { "f17h_core", 0xB0 },
|
||||
[SMCA_DF] = { "data_fabric", 0x2E },
|
||||
[SMCA_UMC] = { "umc", 0x96 },
|
||||
[SMCA_PB] = { "param_block", 0x5 },
|
||||
[SMCA_PSP] = { "psp", 0xFF },
|
||||
[SMCA_SMU] = { "smu", 0x1 },
|
||||
static const char * const smca_umc_block_names[] = {
|
||||
"dram_ecc",
|
||||
"misc_umc"
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(amd_hwids);
|
||||
|
||||
const char * const amd_core_mcablock_names[] = {
|
||||
[SMCA_LS] = "load_store",
|
||||
[SMCA_IF] = "insn_fetch",
|
||||
[SMCA_L2_CACHE] = "l2_cache",
|
||||
[SMCA_DE] = "decode_unit",
|
||||
[RES] = "",
|
||||
[SMCA_EX] = "execution_unit",
|
||||
[SMCA_FP] = "floating_point",
|
||||
[SMCA_L3_CACHE] = "l3_cache",
|
||||
struct smca_bank_name smca_bank_names[] = {
|
||||
[SMCA_LS] = { "load_store", "Load Store Unit" },
|
||||
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
|
||||
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
|
||||
[SMCA_DE] = { "decode_unit", "Decode Unit" },
|
||||
[SMCA_EX] = { "execution_unit", "Execution Unit" },
|
||||
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
|
||||
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
|
||||
[SMCA_CS] = { "coherent_slave", "Coherent Slave" },
|
||||
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
|
||||
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
|
||||
[SMCA_PB] = { "param_block", "Parameter Block" },
|
||||
[SMCA_PSP] = { "psp", "Platform Security Processor" },
|
||||
[SMCA_SMU] = { "smu", "System Management Unit" },
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
|
||||
EXPORT_SYMBOL_GPL(smca_bank_names);
|
||||
|
||||
const char * const amd_df_mcablock_names[] = {
|
||||
[SMCA_CS] = "coherent_slave",
|
||||
[SMCA_PIE] = "pie",
|
||||
static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
|
||||
/* { bank_type, hwid_mcatype, xec_bitmap } */
|
||||
|
||||
/* ZN Core (HWID=0xB0) MCA types */
|
||||
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
|
||||
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
|
||||
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
|
||||
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
|
||||
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
|
||||
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
|
||||
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
|
||||
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
|
||||
|
||||
/* Data Fabric MCA types */
|
||||
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
|
||||
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
|
||||
|
||||
/* Unified Memory Controller MCA type */
|
||||
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
|
||||
|
||||
/* Parameter Block MCA type */
|
||||
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
|
||||
|
||||
/* Platform Security Processor MCA type */
|
||||
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
|
||||
|
||||
/* System Management Unit MCA type */
|
||||
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
|
||||
|
||||
struct smca_bank_info smca_banks[MAX_NR_BANKS];
|
||||
EXPORT_SYMBOL_GPL(smca_banks);
|
||||
|
||||
/*
|
||||
* In SMCA enabled processors, we can have multiple banks for a given IP type.
|
||||
* So to define a unique name for each bank, we use a temp c-string to append
|
||||
* the MCA_IPID[InstanceId] to type's name in get_name().
|
||||
*
|
||||
* InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN
|
||||
* is greater than 8 plus 1 (for underscore) plus length of longest type name.
|
||||
*/
|
||||
#define MAX_MCATYPE_NAME_LEN 30
|
||||
static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
|
||||
|
||||
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
|
||||
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
|
||||
|
@ -108,6 +146,36 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
|
|||
* CPU Initialization
|
||||
*/
|
||||
|
||||
static void get_smca_bank_info(unsigned int bank)
|
||||
{
|
||||
unsigned int i, hwid_mcatype, cpu = smp_processor_id();
|
||||
struct smca_hwid_mcatype *type;
|
||||
u32 high, instanceId;
|
||||
u16 hwid, mcatype;
|
||||
|
||||
/* Collect bank_info using CPU 0 for now. */
|
||||
if (cpu)
|
||||
return;
|
||||
|
||||
if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
|
||||
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
|
||||
return;
|
||||
}
|
||||
|
||||
hwid = high & MCI_IPID_HWID;
|
||||
mcatype = (high & MCI_IPID_MCATYPE) >> 16;
|
||||
hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
|
||||
type = &smca_hwid_mcatypes[i];
|
||||
if (hwid_mcatype == type->hwid_mcatype) {
|
||||
smca_banks[bank].type = type;
|
||||
smca_banks[bank].type_instance = instanceId;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct thresh_restart {
|
||||
struct threshold_block *b;
|
||||
int reset;
|
||||
|
@ -293,7 +361,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
|
|||
wrmsr(MSR_CU_DEF_ERR, low, high);
|
||||
}
|
||||
|
||||
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
|
||||
static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
|
||||
unsigned int bank, unsigned int block)
|
||||
{
|
||||
u32 addr = 0, offset = 0;
|
||||
|
@ -309,13 +377,13 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
|
|||
*/
|
||||
u32 low, high;
|
||||
|
||||
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
|
||||
if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
|
||||
return addr;
|
||||
|
||||
if (!(low & MCI_CONFIG_MCAX))
|
||||
return addr;
|
||||
|
||||
if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
|
||||
if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
|
||||
(low & MASK_BLKPTR_LO))
|
||||
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
|
||||
}
|
||||
|
@ -395,6 +463,20 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
|
|||
*/
|
||||
smca_high &= ~BIT(2);
|
||||
|
||||
/*
|
||||
* SMCA sets the Deferred Error Interrupt type per bank.
|
||||
*
|
||||
* MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
|
||||
* if the DeferredIntType bit field is available.
|
||||
*
|
||||
* MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
|
||||
* high portion of the MSR). OS should set this to 0x1 to enable
|
||||
* APIC based interrupt. First, check that no interrupt has been
|
||||
* set.
|
||||
*/
|
||||
if ((smca_low & BIT(5)) && !((smca_high >> 5) & 0x3))
|
||||
smca_high |= BIT(5);
|
||||
|
||||
wrmsr(smca_addr, smca_low, smca_high);
|
||||
}
|
||||
|
||||
|
@ -421,12 +503,15 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
|
|||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
unsigned int bank, block;
|
||||
unsigned int bank, block, cpu = smp_processor_id();
|
||||
int offset = -1;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (mce_flags.smca)
|
||||
get_smca_bank_info(bank);
|
||||
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
address = get_block_address(address, low, high, bank, block);
|
||||
address = get_block_address(cpu, address, low, high, bank, block);
|
||||
if (!address)
|
||||
break;
|
||||
|
||||
|
@ -476,9 +561,27 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
|
|||
if (threshold_err)
|
||||
m.misc = misc;
|
||||
|
||||
if (m.status & MCI_STATUS_ADDRV)
|
||||
if (m.status & MCI_STATUS_ADDRV) {
|
||||
rdmsrl(msr_addr, m.addr);
|
||||
|
||||
/*
|
||||
* Extract [55:<lsb>] where lsb is the least significant
|
||||
* *valid* bit of the address bits.
|
||||
*/
|
||||
if (mce_flags.smca) {
|
||||
u8 lsb = (m.addr >> 56) & 0x3f;
|
||||
|
||||
m.addr &= GENMASK_ULL(55, lsb);
|
||||
}
|
||||
}
|
||||
|
||||
if (mce_flags.smca) {
|
||||
rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
|
||||
|
||||
if (m.status & MCI_STATUS_SYNDV)
|
||||
rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
|
||||
}
|
||||
|
||||
mce_log(&m);
|
||||
|
||||
wrmsrl(msr_status, 0);
|
||||
|
@ -541,15 +644,14 @@ static void amd_deferred_error_interrupt(void)
|
|||
static void amd_threshold_interrupt(void)
|
||||
{
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
int cpu = smp_processor_id();
|
||||
unsigned int bank, block;
|
||||
unsigned int bank, block, cpu = smp_processor_id();
|
||||
|
||||
/* assume first bank caused it */
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||
continue;
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
address = get_block_address(address, low, high, bank, block);
|
||||
address = get_block_address(cpu, address, low, high, bank, block);
|
||||
if (!address)
|
||||
break;
|
||||
|
||||
|
@ -713,6 +815,34 @@ static struct kobj_type threshold_ktype = {
|
|||
.default_attrs = default_attrs,
|
||||
};
|
||||
|
||||
static const char *get_name(unsigned int bank, struct threshold_block *b)
|
||||
{
|
||||
unsigned int bank_type;
|
||||
|
||||
if (!mce_flags.smca) {
|
||||
if (b && bank == 4)
|
||||
return bank4_names(b);
|
||||
|
||||
return th_names[bank];
|
||||
}
|
||||
|
||||
if (!smca_banks[bank].type)
|
||||
return NULL;
|
||||
|
||||
bank_type = smca_banks[bank].type->bank_type;
|
||||
|
||||
if (b && bank_type == SMCA_UMC) {
|
||||
if (b->block < ARRAY_SIZE(smca_umc_block_names))
|
||||
return smca_umc_block_names[b->block];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
|
||||
"%s_%x", smca_bank_names[bank_type].name,
|
||||
smca_banks[bank].type_instance);
|
||||
return buf_mcatype;
|
||||
}
|
||||
|
||||
static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
|
||||
unsigned int block, u32 address)
|
||||
{
|
||||
|
@ -767,11 +897,11 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
|
|||
|
||||
err = kobject_init_and_add(&b->kobj, &threshold_ktype,
|
||||
per_cpu(threshold_banks, cpu)[bank]->kobj,
|
||||
(bank == 4 ? bank4_names(b) : th_names[bank]));
|
||||
get_name(bank, b));
|
||||
if (err)
|
||||
goto out_free;
|
||||
recurse:
|
||||
address = get_block_address(address, low, high, bank, ++block);
|
||||
address = get_block_address(cpu, address, low, high, bank, ++block);
|
||||
if (!address)
|
||||
return 0;
|
||||
|
||||
|
@ -822,7 +952,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
|
|||
struct device *dev = per_cpu(mce_device, cpu);
|
||||
struct amd_northbridge *nb = NULL;
|
||||
struct threshold_bank *b = NULL;
|
||||
const char *name = th_names[bank];
|
||||
const char *name = get_name(bank, NULL);
|
||||
int err = 0;
|
||||
|
||||
if (is_shared_bank(bank)) {
|
||||
|
@ -869,7 +999,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
|
|||
}
|
||||
}
|
||||
|
||||
err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
|
||||
err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
|
||||
if (!err)
|
||||
goto out;
|
||||
|
||||
|
|
|
@ -626,3 +626,34 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
|
|||
amd_disable_seq_and_redirect_scrub);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
|
||||
#include <linux/jump_label.h>
|
||||
#include <asm/string_64.h>
|
||||
|
||||
/* Ivy Bridge, Haswell, Broadwell */
|
||||
static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
|
||||
{
|
||||
u32 capid0;
|
||||
|
||||
pci_read_config_dword(pdev, 0x84, &capid0);
|
||||
|
||||
if (capid0 & 0x10)
|
||||
static_branch_inc(&mcsafe_key);
|
||||
}
|
||||
|
||||
/* Skylake */
|
||||
static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
|
||||
{
|
||||
u32 capid0;
|
||||
|
||||
pci_read_config_dword(pdev, 0x84, &capid0);
|
||||
|
||||
if ((capid0 & 0xc0) == 0xc0)
|
||||
static_branch_inc(&mcsafe_key);
|
||||
}
|
||||
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
|
||||
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
|
||||
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
|
||||
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
|
||||
#endif
|
||||
|
|
|
@ -38,7 +38,7 @@ EXPORT_SYMBOL(__copy_user_nocache);
|
|||
EXPORT_SYMBOL(_copy_from_user);
|
||||
EXPORT_SYMBOL(_copy_to_user);
|
||||
|
||||
EXPORT_SYMBOL_GPL(memcpy_mcsafe);
|
||||
EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled);
|
||||
|
||||
EXPORT_SYMBOL(copy_page);
|
||||
EXPORT_SYMBOL(clear_page);
|
||||
|
|
|
@ -181,11 +181,11 @@ ENDPROC(memcpy_orig)
|
|||
|
||||
#ifndef CONFIG_UML
|
||||
/*
|
||||
* memcpy_mcsafe - memory copy with machine check exception handling
|
||||
* memcpy_mcsafe_unrolled - memory copy with machine check exception handling
|
||||
* Note that we only catch machine checks when reading the source addresses.
|
||||
* Writes to target are posted and don't generate machine checks.
|
||||
*/
|
||||
ENTRY(memcpy_mcsafe)
|
||||
ENTRY(memcpy_mcsafe_unrolled)
|
||||
cmpl $8, %edx
|
||||
/* Less than 8 bytes? Go to byte copy loop */
|
||||
jb .L_no_whole_words
|
||||
|
@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe)
|
|||
.L_done_memcpy_trap:
|
||||
xorq %rax, %rax
|
||||
ret
|
||||
ENDPROC(memcpy_mcsafe)
|
||||
ENDPROC(memcpy_mcsafe_unrolled)
|
||||
|
||||
.section .fixup, "ax"
|
||||
/* Return -EFAULT for any failure */
|
||||
|
|
|
@ -68,6 +68,7 @@ static int inj_##reg##_set(void *data, u64 val) \
|
|||
MCE_INJECT_SET(status);
|
||||
MCE_INJECT_SET(misc);
|
||||
MCE_INJECT_SET(addr);
|
||||
MCE_INJECT_SET(synd);
|
||||
|
||||
#define MCE_INJECT_GET(reg) \
|
||||
static int inj_##reg##_get(void *data, u64 *val) \
|
||||
|
@ -81,10 +82,12 @@ static int inj_##reg##_get(void *data, u64 *val) \
|
|||
MCE_INJECT_GET(status);
|
||||
MCE_INJECT_GET(misc);
|
||||
MCE_INJECT_GET(addr);
|
||||
MCE_INJECT_GET(synd);
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
|
||||
DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
|
||||
DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
|
||||
DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
|
||||
|
||||
/*
|
||||
* Caller needs to be make sure this cpu doesn't disappear
|
||||
|
@ -243,27 +246,27 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
|
|||
|
||||
static void prepare_msrs(void *info)
|
||||
{
|
||||
struct mce i_mce = *(struct mce *)info;
|
||||
u8 b = i_mce.bank;
|
||||
struct mce m = *(struct mce *)info;
|
||||
u8 b = m.bank;
|
||||
|
||||
wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus);
|
||||
wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_SMCA)) {
|
||||
if (i_mce.inject_flags == DFR_INT_INJ) {
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status);
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr);
|
||||
if (m.inject_flags == DFR_INT_INJ) {
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
|
||||
} else {
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status);
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr);
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
|
||||
}
|
||||
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc);
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
|
||||
wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
|
||||
} else {
|
||||
wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status);
|
||||
wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr);
|
||||
wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc);
|
||||
wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
|
||||
wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
|
||||
wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void do_inject(void)
|
||||
|
@ -275,6 +278,9 @@ static void do_inject(void)
|
|||
if (i_mce.misc)
|
||||
i_mce.status |= MCI_STATUS_MISCV;
|
||||
|
||||
if (i_mce.synd)
|
||||
i_mce.status |= MCI_STATUS_SYNDV;
|
||||
|
||||
if (inj_type == SW_INJ) {
|
||||
mce_inject_log(&i_mce);
|
||||
return;
|
||||
|
@ -301,7 +307,9 @@ static void do_inject(void)
|
|||
* only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
|
||||
* Fam10h and later BKDGs.
|
||||
*/
|
||||
if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
|
||||
if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
|
||||
b == 4 &&
|
||||
boot_cpu_data.x86 < 0x17) {
|
||||
toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
|
||||
cpu = get_nbc_for_node(amd_get_nb_id(cpu));
|
||||
}
|
||||
|
@ -371,6 +379,9 @@ static const char readme_msg[] =
|
|||
"\t used for error thresholding purposes and its validity is indicated by\n"
|
||||
"\t MCi_STATUS[MiscV].\n"
|
||||
"\n"
|
||||
"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
|
||||
"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
|
||||
"\n"
|
||||
"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
|
||||
"\t associated with the error.\n"
|
||||
"\n"
|
||||
|
@ -420,6 +431,7 @@ static struct dfs_node {
|
|||
{ .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
|
||||
{ .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
|
||||
{ .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
|
||||
{ .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
|
||||
{ .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
|
||||
{ .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
|
||||
{ .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
|
||||
|
@ -428,7 +440,7 @@ static struct dfs_node {
|
|||
|
||||
static int __init init_mce_inject(void)
|
||||
{
|
||||
int i;
|
||||
unsigned int i;
|
||||
u64 cap;
|
||||
|
||||
rdmsrl(MSR_IA32_MCG_CAP, cap);
|
||||
|
@ -452,26 +464,22 @@ static int __init init_mce_inject(void)
|
|||
return 0;
|
||||
|
||||
err_dfs_add:
|
||||
while (--i >= 0)
|
||||
while (i-- > 0)
|
||||
debugfs_remove(dfs_fls[i].d);
|
||||
|
||||
debugfs_remove(dfs_inj);
|
||||
dfs_inj = NULL;
|
||||
|
||||
return -ENOMEM;
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void __exit exit_mce_inject(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
|
||||
debugfs_remove(dfs_fls[i].d);
|
||||
debugfs_remove_recursive(dfs_inj);
|
||||
dfs_inj = NULL;
|
||||
|
||||
memset(&dfs_fls, 0, sizeof(dfs_fls));
|
||||
|
||||
debugfs_remove(dfs_inj);
|
||||
dfs_inj = NULL;
|
||||
}
|
||||
module_init(init_mce_inject);
|
||||
module_exit(exit_mce_inject);
|
||||
|
|
|
@ -148,12 +148,12 @@ static const char * const mc6_mce_desc[] = {
|
|||
};
|
||||
|
||||
/* Scalable MCA error strings */
|
||||
static const char * const f17h_ls_mce_desc[] = {
|
||||
static const char * const smca_ls_mce_desc[] = {
|
||||
"Load queue parity",
|
||||
"Store queue parity",
|
||||
"Miss address buffer payload parity",
|
||||
"L1 TLB parity",
|
||||
"", /* reserved */
|
||||
"Reserved",
|
||||
"DC tag error type 6",
|
||||
"DC tag error type 1",
|
||||
"Internal error type 1",
|
||||
|
@ -172,7 +172,7 @@ static const char * const f17h_ls_mce_desc[] = {
|
|||
"L2 fill data error",
|
||||
};
|
||||
|
||||
static const char * const f17h_if_mce_desc[] = {
|
||||
static const char * const smca_if_mce_desc[] = {
|
||||
"microtag probe port parity error",
|
||||
"IC microtag or full tag multi-hit error",
|
||||
"IC full tag parity",
|
||||
|
@ -185,19 +185,22 @@ static const char * const f17h_if_mce_desc[] = {
|
|||
"BPQ snoop parity on Thread 1",
|
||||
"L1 BTB multi-match error",
|
||||
"L2 BTB multi-match error",
|
||||
"L2 Cache Response Poison error",
|
||||
"System Read Data error",
|
||||
};
|
||||
|
||||
static const char * const f17h_l2_mce_desc[] = {
|
||||
static const char * const smca_l2_mce_desc[] = {
|
||||
"L2M tag multi-way-hit error",
|
||||
"L2M tag ECC error",
|
||||
"L2M data ECC error",
|
||||
"HW assert",
|
||||
};
|
||||
|
||||
static const char * const f17h_de_mce_desc[] = {
|
||||
static const char * const smca_de_mce_desc[] = {
|
||||
"uop cache tag parity error",
|
||||
"uop cache data parity error",
|
||||
"Insn buffer parity error",
|
||||
"uop queue parity error",
|
||||
"Insn dispatch queue parity error",
|
||||
"Fetch address FIFO parity",
|
||||
"Patch RAM data parity",
|
||||
|
@ -205,7 +208,7 @@ static const char * const f17h_de_mce_desc[] = {
|
|||
"uop buffer parity"
|
||||
};
|
||||
|
||||
static const char * const f17h_ex_mce_desc[] = {
|
||||
static const char * const smca_ex_mce_desc[] = {
|
||||
"Watchdog timeout error",
|
||||
"Phy register file parity",
|
||||
"Flag register file parity",
|
||||
|
@ -214,18 +217,22 @@ static const char * const f17h_ex_mce_desc[] = {
|
|||
"EX payload parity",
|
||||
"Checkpoint queue parity",
|
||||
"Retire dispatch queue parity",
|
||||
"Retire status queue parity error",
|
||||
"Scheduling queue parity error",
|
||||
"Branch buffer queue parity error",
|
||||
};
|
||||
|
||||
static const char * const f17h_fp_mce_desc[] = {
|
||||
static const char * const smca_fp_mce_desc[] = {
|
||||
"Physical register file parity",
|
||||
"Freelist parity error",
|
||||
"Schedule queue parity",
|
||||
"NSQ parity error",
|
||||
"Retire queue parity",
|
||||
"Status register file parity",
|
||||
"Hardware assertion",
|
||||
};
|
||||
|
||||
static const char * const f17h_l3_mce_desc[] = {
|
||||
static const char * const smca_l3_mce_desc[] = {
|
||||
"Shadow tag macro ECC error",
|
||||
"Shadow tag macro multi-way-hit error",
|
||||
"L3M tag ECC error",
|
||||
|
@ -236,7 +243,7 @@ static const char * const f17h_l3_mce_desc[] = {
|
|||
"L3 HW assert",
|
||||
};
|
||||
|
||||
static const char * const f17h_cs_mce_desc[] = {
|
||||
static const char * const smca_cs_mce_desc[] = {
|
||||
"Illegal request from transport layer",
|
||||
"Address violation",
|
||||
"Security violation",
|
||||
|
@ -248,14 +255,14 @@ static const char * const f17h_cs_mce_desc[] = {
|
|||
"ECC error on probe filter access",
|
||||
};
|
||||
|
||||
static const char * const f17h_pie_mce_desc[] = {
|
||||
static const char * const smca_pie_mce_desc[] = {
|
||||
"HW assert",
|
||||
"Internal PIE register security violation",
|
||||
"Error on GMI link",
|
||||
"Poison data written to internal PIE register",
|
||||
};
|
||||
|
||||
static const char * const f17h_umc_mce_desc[] = {
|
||||
static const char * const smca_umc_mce_desc[] = {
|
||||
"DRAM ECC error",
|
||||
"Data poison error on DRAM",
|
||||
"SDP parity error",
|
||||
|
@ -264,18 +271,39 @@ static const char * const f17h_umc_mce_desc[] = {
|
|||
"Write data CRC error",
|
||||
};
|
||||
|
||||
static const char * const f17h_pb_mce_desc[] = {
|
||||
static const char * const smca_pb_mce_desc[] = {
|
||||
"Parameter Block RAM ECC error",
|
||||
};
|
||||
|
||||
static const char * const f17h_psp_mce_desc[] = {
|
||||
static const char * const smca_psp_mce_desc[] = {
|
||||
"PSP RAM ECC or parity error",
|
||||
};
|
||||
|
||||
static const char * const f17h_smu_mce_desc[] = {
|
||||
static const char * const smca_smu_mce_desc[] = {
|
||||
"SMU RAM ECC or parity error",
|
||||
};
|
||||
|
||||
struct smca_mce_desc {
|
||||
const char * const *descs;
|
||||
unsigned int num_descs;
|
||||
};
|
||||
|
||||
static struct smca_mce_desc smca_mce_descs[] = {
|
||||
[SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
|
||||
[SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
|
||||
[SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
|
||||
[SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
|
||||
[SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
|
||||
[SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
|
||||
[SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
|
||||
[SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
|
||||
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
|
||||
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
|
||||
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
|
||||
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
|
||||
[SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
|
||||
};
|
||||
|
||||
static bool f12h_mc0_mce(u16 ec, u8 xec)
|
||||
{
|
||||
bool ret = false;
|
||||
|
@ -820,175 +848,35 @@ static void decode_mc6_mce(struct mce *m)
|
|||
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
|
||||
}
|
||||
|
||||
static void decode_f17h_core_errors(const char *ip_name, u8 xec,
|
||||
unsigned int mca_type)
|
||||
{
|
||||
const char * const *error_desc_array;
|
||||
size_t len;
|
||||
|
||||
pr_emerg(HW_ERR "%s Error: ", ip_name);
|
||||
|
||||
switch (mca_type) {
|
||||
case SMCA_LS:
|
||||
error_desc_array = f17h_ls_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_ls_mce_desc) - 1;
|
||||
|
||||
if (xec == 0x4) {
|
||||
pr_cont("Unrecognized LS MCA error code.\n");
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case SMCA_IF:
|
||||
error_desc_array = f17h_if_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_if_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_L2_CACHE:
|
||||
error_desc_array = f17h_l2_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_l2_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_DE:
|
||||
error_desc_array = f17h_de_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_de_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_EX:
|
||||
error_desc_array = f17h_ex_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_ex_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_FP:
|
||||
error_desc_array = f17h_fp_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_fp_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_L3_CACHE:
|
||||
error_desc_array = f17h_l3_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_l3_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_cont("Corrupted MCA core error info.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (xec > len) {
|
||||
pr_cont("Unrecognized %s MCA bank error code.\n",
|
||||
amd_core_mcablock_names[mca_type]);
|
||||
return;
|
||||
}
|
||||
|
||||
pr_cont("%s.\n", error_desc_array[xec]);
|
||||
}
|
||||
|
||||
static void decode_df_errors(u8 xec, unsigned int mca_type)
|
||||
{
|
||||
const char * const *error_desc_array;
|
||||
size_t len;
|
||||
|
||||
pr_emerg(HW_ERR "Data Fabric Error: ");
|
||||
|
||||
switch (mca_type) {
|
||||
case SMCA_CS:
|
||||
error_desc_array = f17h_cs_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_cs_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_PIE:
|
||||
error_desc_array = f17h_pie_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_pie_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_cont("Corrupted MCA Data Fabric info.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (xec > len) {
|
||||
pr_cont("Unrecognized %s MCA bank error code.\n",
|
||||
amd_df_mcablock_names[mca_type]);
|
||||
return;
|
||||
}
|
||||
|
||||
pr_cont("%s.\n", error_desc_array[xec]);
|
||||
}
|
||||
|
||||
/* Decode errors according to Scalable MCA specification */
|
||||
static void decode_smca_errors(struct mce *m)
|
||||
{
|
||||
u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
|
||||
unsigned int hwid, mca_type, i;
|
||||
u8 xec = XEC(m->status, xec_mask);
|
||||
const char * const *error_desc_array;
|
||||
struct smca_hwid_mcatype *type;
|
||||
unsigned int bank_type;
|
||||
const char *ip_name;
|
||||
u32 low, high;
|
||||
size_t len;
|
||||
u8 xec = XEC(m->status, xec_mask);
|
||||
|
||||
if (rdmsr_safe(addr, &low, &high)) {
|
||||
pr_emerg("Invalid IP block specified, error information is unreliable.\n");
|
||||
if (m->bank >= ARRAY_SIZE(smca_banks))
|
||||
return;
|
||||
}
|
||||
|
||||
hwid = high & MCI_IPID_HWID;
|
||||
mca_type = (high & MCI_IPID_MCATYPE) >> 16;
|
||||
if (boot_cpu_data.x86 >= 0x17 && m->bank == 4)
|
||||
pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
|
||||
|
||||
pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
|
||||
|
||||
/*
|
||||
* Based on hwid and mca_type values, decode errors from respective IPs.
|
||||
* Note: mca_type values make sense only in the context of an hwid.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
|
||||
if (amd_hwids[i].hwid == hwid)
|
||||
break;
|
||||
|
||||
switch (i) {
|
||||
case SMCA_F17H_CORE:
|
||||
ip_name = (mca_type == SMCA_L3_CACHE) ?
|
||||
"L3 Cache" : "F17h Core";
|
||||
return decode_f17h_core_errors(ip_name, xec, mca_type);
|
||||
break;
|
||||
|
||||
case SMCA_DF:
|
||||
return decode_df_errors(xec, mca_type);
|
||||
break;
|
||||
|
||||
case SMCA_UMC:
|
||||
error_desc_array = f17h_umc_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_umc_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_PB:
|
||||
error_desc_array = f17h_pb_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_pb_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_PSP:
|
||||
error_desc_array = f17h_psp_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_psp_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_SMU:
|
||||
error_desc_array = f17h_smu_mce_desc;
|
||||
len = ARRAY_SIZE(f17h_smu_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
|
||||
type = smca_banks[m->bank].type;
|
||||
if (!type)
|
||||
return;
|
||||
|
||||
bank_type = type->bank_type;
|
||||
ip_name = smca_bank_names[bank_type].long_name;
|
||||
|
||||
pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
|
||||
|
||||
/* Only print the decode of valid error codes */
|
||||
if (xec < smca_mce_descs[bank_type].num_descs &&
|
||||
(type->xec_bitmap & BIT_ULL(xec))) {
|
||||
pr_emerg(HW_ERR "%s Error: ", ip_name);
|
||||
pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
|
||||
}
|
||||
|
||||
ip_name = amd_hwids[i].name;
|
||||
pr_emerg(HW_ERR "%s Error: ", ip_name);
|
||||
|
||||
if (xec > len) {
|
||||
pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
|
||||
return;
|
||||
}
|
||||
|
||||
pr_cont("%s.\n", error_desc_array[xec]);
|
||||
}
|
||||
|
||||
static inline void amd_decode_err_code(u16 ec)
|
||||
|
@ -1078,6 +966,8 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
|||
u32 low, high;
|
||||
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
|
||||
|
||||
pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
|
||||
|
||||
if (!rdmsr_safe(addr, &low, &high) &&
|
||||
(low & MCI_CONFIG_MCAX))
|
||||
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
|
||||
|
@ -1091,12 +981,20 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
|||
pr_cont("]: 0x%016llx\n", m->status);
|
||||
|
||||
if (m->status & MCI_STATUS_ADDRV)
|
||||
pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
|
||||
pr_emerg(HW_ERR "Error Addr: 0x%016llx", m->addr);
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_SMCA)) {
|
||||
if (m->status & MCI_STATUS_SYNDV)
|
||||
pr_cont(", Syndrome: 0x%016llx", m->synd);
|
||||
|
||||
pr_cont(", IPID: 0x%016llx", m->ipid);
|
||||
|
||||
pr_cont("\n");
|
||||
|
||||
decode_smca_errors(m);
|
||||
goto err_code;
|
||||
}
|
||||
} else
|
||||
pr_cont("\n");
|
||||
|
||||
if (!fam_ops)
|
||||
goto err_code;
|
||||
|
|
|
@ -269,9 +269,15 @@ struct static_key_false {
|
|||
#define DEFINE_STATIC_KEY_TRUE(name) \
|
||||
struct static_key_true name = STATIC_KEY_TRUE_INIT
|
||||
|
||||
#define DECLARE_STATIC_KEY_TRUE(name) \
|
||||
extern struct static_key_true name
|
||||
|
||||
#define DEFINE_STATIC_KEY_FALSE(name) \
|
||||
struct static_key_false name = STATIC_KEY_FALSE_INIT
|
||||
|
||||
#define DECLARE_STATIC_KEY_FALSE(name) \
|
||||
extern struct static_key_false name
|
||||
|
||||
#define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \
|
||||
struct static_key_true name[count] = { \
|
||||
[0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \
|
||||
|
|
|
@ -20,6 +20,8 @@ TRACE_EVENT(mce_record,
|
|||
__field( u64, status )
|
||||
__field( u64, addr )
|
||||
__field( u64, misc )
|
||||
__field( u64, synd )
|
||||
__field( u64, ipid )
|
||||
__field( u64, ip )
|
||||
__field( u64, tsc )
|
||||
__field( u64, walltime )
|
||||
|
@ -38,6 +40,8 @@ TRACE_EVENT(mce_record,
|
|||
__entry->status = m->status;
|
||||
__entry->addr = m->addr;
|
||||
__entry->misc = m->misc;
|
||||
__entry->synd = m->synd;
|
||||
__entry->ipid = m->ipid;
|
||||
__entry->ip = m->ip;
|
||||
__entry->tsc = m->tsc;
|
||||
__entry->walltime = m->time;
|
||||
|
@ -50,11 +54,12 @@ TRACE_EVENT(mce_record,
|
|||
__entry->cpuvendor = m->cpuvendor;
|
||||
),
|
||||
|
||||
TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
|
||||
TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
|
||||
__entry->cpu,
|
||||
__entry->mcgcap, __entry->mcgstatus,
|
||||
__entry->bank, __entry->status,
|
||||
__entry->addr, __entry->misc,
|
||||
__entry->ipid,
|
||||
__entry->addr, __entry->misc, __entry->synd,
|
||||
__entry->cs, __entry->ip,
|
||||
__entry->tsc,
|
||||
__entry->cpuvendor, __entry->cpuid,
|
||||
|
|
Loading…
Reference in New Issue