diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index ee2e2e53935f..d4a10793f807 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -326,6 +326,7 @@ struct sbridge_info { const struct interleave_pkg *interleave_pkg; u8 max_sad; u8 (*get_node_id)(struct sbridge_pvt *pvt); + u8 (*get_ha)(u8 bank); enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt); enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr); struct pci_dev *pci_vtd; @@ -1002,6 +1003,39 @@ static u8 knl_get_node_id(struct sbridge_pvt *pvt) return GET_BITFIELD(reg, 0, 2); } +/* + * Use the reporting bank number to determine which memory + * controller (also known as "ha" for "home agent"). Sandy + * Bridge only has one memory controller per socket, so the + * answer is always zero. + */ +static u8 sbridge_get_ha(u8 bank) +{ + return 0; +} + +/* + * On Ivy Bridge, Haswell and Broadwell the error may be in a + * home agent bank (7, 8), or one of the per-channel memory + * controller banks (9 .. 16). + */ +static u8 ibridge_get_ha(u8 bank) +{ + switch (bank) { + case 7 ... 8: + return bank - 7; + case 9 ... 16: + return (bank - 9) / 4; + default: + return -EINVAL; + } +} + +/* Not used, but included for safety/symmetry */ +static u8 knl_get_ha(u8 bank) +{ + return -EINVAL; +} static u64 haswell_get_tolm(struct sbridge_pvt *pvt) { @@ -2207,6 +2241,60 @@ static int get_memory_error_data(struct mem_ctl_info *mci, return 0; } +static int get_memory_error_data_from_mce(struct mem_ctl_info *mci, + const struct mce *m, u8 *socket, + u8 *ha, long *channel_mask, + char *msg) +{ + u32 reg, channel = GET_BITFIELD(m->status, 0, 3); + struct mem_ctl_info *new_mci; + struct sbridge_pvt *pvt; + struct pci_dev *pci_ha; + bool tad0; + + if (channel >= NUM_CHANNELS) { + sprintf(msg, "Invalid channel 0x%x", channel); + return -EINVAL; + } + + pvt = mci->pvt_info; + if (!pvt->info.get_ha) { + sprintf(msg, "No get_ha()"); + return -EINVAL; + } + *ha = pvt->info.get_ha(m->bank); + if (*ha != 0 && *ha != 1) { + sprintf(msg, "Impossible bank %d", m->bank); + return -EINVAL; + } + + *socket = m->socketid; + new_mci = get_mci_for_node_id(*socket, *ha); + if (!new_mci) { + strcpy(msg, "mci socket got corrupted!"); + return -EINVAL; + } + + pvt = new_mci->pvt_info; + pci_ha = pvt->pci_ha; + pci_read_config_dword(pci_ha, tad_dram_rule[0], ®); + tad0 = m->addr <= TAD_LIMIT(reg); + + *channel_mask = 1 << channel; + if (pvt->mirror_mode == FULL_MIRRORING || + (pvt->mirror_mode == ADDR_RANGE_MIRRORING && tad0)) { + *channel_mask |= 1 << ((channel + 2) % 4); + pvt->is_cur_addr_mirrored = true; + } else { + pvt->is_cur_addr_mirrored = false; + } + + if (pvt->is_lockstep) + *channel_mask |= 1 << ((channel + 1) % 4); + + return 0; +} + /**************************************************************************** Device initialization routines: put/get, init/exit ****************************************************************************/ @@ -2877,10 +2965,16 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, u32 errcode = GET_BITFIELD(m->status, 0, 15); u32 channel = GET_BITFIELD(m->status, 0, 3); u32 optypenum = GET_BITFIELD(m->status, 4, 6); + /* + * Bits 5-0 of MCi_MISC give the least significant bit that is valid. + * A value 6 is for cache line aligned address, a value 12 is for page + * aligned address reported by patrol scrubber. + */ + u32 lsb = GET_BITFIELD(m->misc, 0, 5); long channel_mask, first_channel; - u8 rank, socket, ha; + u8 rank = 0xff, socket, ha; int rc, dimm; - char *area_type = NULL; + char *area_type = "DRAM"; if (pvt->info.type != SANDY_BRIDGE) recoverable = true; @@ -2964,9 +3058,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, optype, msg); } return; - } else { + } else if (lsb < 12) { rc = get_memory_error_data(mci, m->addr, &socket, &ha, - &channel_mask, &rank, &area_type, msg); + &channel_mask, &rank, + &area_type, msg); + } else { + rc = get_memory_error_data_from_mce(mci, m, &socket, &ha, + &channel_mask, msg); } if (rc < 0) @@ -2981,14 +3079,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, first_channel = find_first_bit(&channel_mask, NUM_CHANNELS); - if (rank < 4) + if (rank == 0xff) + dimm = -1; + else if (rank < 4) dimm = 0; else if (rank < 8) dimm = 1; else dimm = 2; - /* * FIXME: On some memory configurations (mirror, lockstep), the * Memory Controller can't point the error to a single DIMM. The @@ -3175,6 +3274,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3199,6 +3299,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = sbridge_dram_rule; pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = sbridge_get_ha; pvt->info.rir_limit = rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3223,6 +3324,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = haswell_rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3247,6 +3349,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = haswell_rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3271,6 +3374,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = knl_dram_rule; pvt->info.get_memory_type = knl_get_memory_type; pvt->info.get_node_id = knl_get_node_id; + pvt->info.get_ha = knl_get_ha; pvt->info.rir_limit = NULL; pvt->info.sad_limit = knl_sad_limit; pvt->info.interleave_mode = knl_interleave_mode;