Commit ab5a503c authored by Mauro Carvalho Chehab's avatar Mauro Carvalho Chehab

amd64_edac: convert driver to use the new edac ABI

The legacy edac ABI is going to be removed. Port the driver to use
and benefit from the new API functionality.

Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: default avatarMauro Carvalho Chehab <mchehab@redhat.com>
parent 4275be63
...@@ -1039,6 +1039,37 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, ...@@ -1039,6 +1039,37 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
int channel, csrow; int channel, csrow;
u32 page, offset; u32 page, offset;
error_address_to_page_and_offset(sys_addr, &page, &offset);
/*
* Find out which node the error address belongs to. This may be
* different from the node that detected the error.
*/
src_mci = find_mc_by_sys_addr(mci, sys_addr);
if (!src_mci) {
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
(unsigned long)sys_addr);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
page, offset, syndrome,
-1, -1, -1,
EDAC_MOD_STR,
"failed to map error addr to a node",
NULL);
return;
}
/* Now map the sys_addr to a CSROW */
csrow = sys_addr_to_csrow(src_mci, sys_addr);
if (csrow < 0) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
page, offset, syndrome,
-1, -1, -1,
EDAC_MOD_STR,
"failed to map error addr to a csrow",
NULL);
return;
}
/* CHIPKILL enabled */ /* CHIPKILL enabled */
if (pvt->nbcfg & NBCFG_CHIPKILL) { if (pvt->nbcfg & NBCFG_CHIPKILL) {
channel = get_channel_from_ecc_syndrome(mci, syndrome); channel = get_channel_from_ecc_syndrome(mci, syndrome);
...@@ -1048,9 +1079,15 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, ...@@ -1048,9 +1079,15 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
* 2 DIMMs is in error. So we need to ID 'both' of them * 2 DIMMs is in error. So we need to ID 'both' of them
* as suspect. * as suspect.
*/ */
amd64_mc_warn(mci, "unknown syndrome 0x%04x - possible " amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
"error reporting race\n", syndrome); "possible error reporting race\n",
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); syndrome);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
page, offset, syndrome,
csrow, -1, -1,
EDAC_MOD_STR,
"unknown syndrome - possible error reporting race",
NULL);
return; return;
} }
} else { } else {
...@@ -1065,28 +1102,10 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, ...@@ -1065,28 +1102,10 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
channel = ((sys_addr & BIT(3)) != 0); channel = ((sys_addr & BIT(3)) != 0);
} }
/* edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci,
* Find out which node the error address belongs to. This may be page, offset, syndrome,
* different from the node that detected the error. csrow, channel, -1,
*/ EDAC_MOD_STR, "", NULL);
src_mci = find_mc_by_sys_addr(mci, sys_addr);
if (!src_mci) {
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
(unsigned long)sys_addr);
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
return;
}
/* Now map the sys_addr to a CSROW */
csrow = sys_addr_to_csrow(src_mci, sys_addr);
if (csrow < 0) {
edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR);
} else {
error_address_to_page_and_offset(sys_addr, &page, &offset);
edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow,
channel, EDAC_MOD_STR);
}
} }
static int ddr2_cs_size(unsigned i, bool dct_width) static int ddr2_cs_size(unsigned i, bool dct_width)
...@@ -1592,15 +1611,20 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, ...@@ -1592,15 +1611,20 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
u32 page, offset; u32 page, offset;
int nid, csrow, chan = 0; int nid, csrow, chan = 0;
error_address_to_page_and_offset(sys_addr, &page, &offset);
csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
if (csrow < 0) { if (csrow < 0) {
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
page, offset, syndrome,
-1, -1, -1,
EDAC_MOD_STR,
"failed to map error addr to a csrow",
NULL);
return; return;
} }
error_address_to_page_and_offset(sys_addr, &page, &offset);
/* /*
* We need the syndromes for channel detection only when we're * We need the syndromes for channel detection only when we're
* ganged. Otherwise @chan should already contain the channel at * ganged. Otherwise @chan should already contain the channel at
...@@ -1609,16 +1633,10 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, ...@@ -1609,16 +1633,10 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
if (dct_ganging_enabled(pvt)) if (dct_ganging_enabled(pvt))
chan = get_channel_from_ecc_syndrome(mci, syndrome); chan = get_channel_from_ecc_syndrome(mci, syndrome);
if (chan >= 0) edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan, page, offset, syndrome,
EDAC_MOD_STR); csrow, chan, -1,
else EDAC_MOD_STR, "", NULL);
/*
* Channel unknown, report all channels on this CSROW as failed.
*/
for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++)
edac_mc_handle_ce(mci, page, offset, syndrome,
csrow, chan, EDAC_MOD_STR);
} }
/* /*
...@@ -1899,7 +1917,12 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m) ...@@ -1899,7 +1917,12 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
/* Ensure that the Error Address is VALID */ /* Ensure that the Error Address is VALID */
if (!(m->status & MCI_STATUS_ADDRV)) { if (!(m->status & MCI_STATUS_ADDRV)) {
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
0, 0, 0,
-1, -1, -1,
EDAC_MOD_STR,
"HW has no ERROR_ADDRESS available",
NULL);
return; return;
} }
...@@ -1923,11 +1946,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) ...@@ -1923,11 +1946,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (!(m->status & MCI_STATUS_ADDRV)) { if (!(m->status & MCI_STATUS_ADDRV)) {
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
0, 0, 0,
-1, -1, -1,
EDAC_MOD_STR,
"HW has no ERROR_ADDRESS available",
NULL);
return; return;
} }
sys_addr = get_error_address(m); sys_addr = get_error_address(m);
error_address_to_page_and_offset(sys_addr, &page, &offset);
/* /*
* Find out which node the error address belongs to. This may be * Find out which node the error address belongs to. This may be
...@@ -1937,7 +1966,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) ...@@ -1937,7 +1966,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (!src_mci) { if (!src_mci) {
amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n", amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
(unsigned long)sys_addr); (unsigned long)sys_addr);
edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
page, offset, 0,
-1, -1, -1,
EDAC_MOD_STR,
"ERROR ADDRESS NOT mapped to a MC", NULL);
return; return;
} }
...@@ -1947,10 +1980,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) ...@@ -1947,10 +1980,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (csrow < 0) { if (csrow < 0) {
amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n", amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
(unsigned long)sys_addr); (unsigned long)sys_addr);
edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
page, offset, 0,
-1, -1, -1,
EDAC_MOD_STR,
"ERROR ADDRESS NOT mapped to CS",
NULL);
} else { } else {
error_address_to_page_and_offset(sys_addr, &page, &offset); edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR); page, offset, 0,
csrow, -1, -1,
EDAC_MOD_STR, "", NULL);
} }
} }
...@@ -2515,6 +2555,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) ...@@ -2515,6 +2555,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct amd64_pvt *pvt = NULL; struct amd64_pvt *pvt = NULL;
struct amd64_family_type *fam_type = NULL; struct amd64_family_type *fam_type = NULL;
struct mem_ctl_info *mci = NULL; struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret; int err = 0, ret;
u8 nid = get_node_id(F2); u8 nid = get_node_id(F2);
...@@ -2549,7 +2590,13 @@ static int amd64_init_one_instance(struct pci_dev *F2) ...@@ -2549,7 +2590,13 @@ static int amd64_init_one_instance(struct pci_dev *F2)
goto err_siblings; goto err_siblings;
ret = -ENOMEM; ret = -ENOMEM;
mci = edac_mc_alloc(0, pvt->csels[0].b_cnt, pvt->channel_count, nid); layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = pvt->csels[0].b_cnt;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = pvt->channel_count;
layers[1].is_virt_csrow = false;
mci = new_edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
if (!mci) if (!mci)
goto err_siblings; goto err_siblings;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment