Commit 4b5e35ce authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'edac_updates_for_v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Tony Luck:
 "Various fixes and support for new CPUs:

   - Clean up error messages from thunderx_edac

   - Add MODULE_DEVICE_TABLE to ti_edac so it will autoload

   - Use %pR to print resources in aspeed_edac

   - Add Yazen Ghannam as MAINTAINER for AMD edac drivers

   - Fix Ice Lake and Sapphire Rapids drivers to report correct "near"
     or "far" device for errors in 2LM configurations

   - Add support of on package high bandwidth memory in Sapphire Rapids

   - New CPU support for three CPUs supporting in-band ECC (IOT SKUs for
     ICL-NNPI, Tiger Lake and Alder Lake)

   - Don't even try to load Intel EDAC drivers when running as a guest

   - Fix Kconfig dependency on X86_MCE_INTEL for EDAC_IGEN6"

* tag 'edac_updates_for_v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/igen6: fix core dependency
  EDAC/Intel: Do not load EDAC driver when running as a guest
  EDAC/igen6: Add Intel Alder Lake SoC support
  EDAC/igen6: Add Intel Tiger Lake SoC support
  EDAC/igen6: Add Intel ICL-NNPI SoC support
  EDAC/i10nm: Add support for high bandwidth memory
  EDAC/i10nm: Add detection of memory levels for ICX/SPR servers
  EDAC/skx_common: Add new ADXL components for 2-level memory
  MAINTAINERS: Make Yazen Ghannam maintainer for EDAC-AMD64
  EDAC/aspeed: Use proper format string for printing resource
  EDAC/ti: Add missing MODULE_DEVICE_TABLE
  EDAC/thunderx: Remove irrelevant variable from error messages
parents e60d726f 0a9ece9b
...@@ -6467,10 +6467,11 @@ F: Documentation/filesystems/ecryptfs.rst ...@@ -6467,10 +6467,11 @@ F: Documentation/filesystems/ecryptfs.rst
F: fs/ecryptfs/ F: fs/ecryptfs/
EDAC-AMD64 EDAC-AMD64
M: Borislav Petkov <bp@alien8.de> M: Yazen Ghannam <yazen.ghannam@amd.com>
L: linux-edac@vger.kernel.org L: linux-edac@vger.kernel.org
S: Maintained S: Supported
F: drivers/edac/amd64_edac* F: drivers/edac/amd64_edac*
F: drivers/edac/mce_amd*
EDAC-ARMADA EDAC-ARMADA
M: Jan Luebbe <jlu@pengutronix.de> M: Jan Luebbe <jlu@pengutronix.de>
......
...@@ -270,7 +270,8 @@ config EDAC_PND2 ...@@ -270,7 +270,8 @@ config EDAC_PND2
config EDAC_IGEN6 config EDAC_IGEN6
tristate "Intel client SoC Integrated MC" tristate "Intel client SoC Integrated MC"
depends on PCI && X86_64 && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG depends on PCI && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG
depends on X64_64 && X86_MCE_INTEL
help help
Support for error detection and correction on the Intel Support for error detection and correction on the Intel
client SoC Integrated Memory Controller using In-Band ECC IP. client SoC Integrated Memory Controller using In-Band ECC IP.
......
...@@ -254,8 +254,8 @@ static int init_csrows(struct mem_ctl_info *mci) ...@@ -254,8 +254,8 @@ static int init_csrows(struct mem_ctl_info *mci)
return rc; return rc;
} }
dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n", dev_dbg(mci->pdev, "dt: /memory node resources: first page %pR, PAGE_SHIFT macro=0x%x\n",
r.start, resource_size(&r), PAGE_SHIFT); &r, PAGE_SHIFT);
csrow->first_page = r.start >> PAGE_SHIFT; csrow->first_page = r.start >> PAGE_SHIFT;
nr_pages = resource_size(&r) >> PAGE_SHIFT; nr_pages = resource_size(&r) >> PAGE_SHIFT;
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "edac_module.h" #include "edac_module.h"
#include "skx_common.h" #include "skx_common.h"
#define I10NM_REVISION "v0.0.4" #define I10NM_REVISION "v0.0.5"
#define EDAC_MOD_STR "i10nm_edac" #define EDAC_MOD_STR "i10nm_edac"
/* Debug macros */ /* Debug macros */
...@@ -24,19 +24,39 @@ ...@@ -24,19 +24,39 @@
pci_read_config_dword((d)->uracu, 0xd0, &(reg)) pci_read_config_dword((d)->uracu, 0xd0, &(reg))
#define I10NM_GET_IMC_BAR(d, i, reg) \ #define I10NM_GET_IMC_BAR(d, i, reg) \
pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
#define I10NM_GET_SAD(d, offset, i, reg)\
pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
#define I10NM_GET_HBM_IMC_BAR(d, reg) \
pci_read_config_dword((d)->uracu, 0xd4, &(reg))
#define I10NM_GET_CAPID3_CFG(d, reg) \
pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg))
#define I10NM_GET_DIMMMTR(m, i, j) \ #define I10NM_GET_DIMMMTR(m, i, j) \
readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4) readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
(i) * (m)->chan_mmio_sz + (j) * 4)
#define I10NM_GET_MCDDRTCFG(m, i, j) \ #define I10NM_GET_MCDDRTCFG(m, i, j) \
readl((m)->mbase + 0x20970 + (i) * (m)->chan_mmio_sz + (j) * 4) readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
(i) * (m)->chan_mmio_sz + (j) * 4)
#define I10NM_GET_MCMTR(m, i) \ #define I10NM_GET_MCMTR(m, i) \
readl((m)->mbase + 0x20ef8 + (i) * (m)->chan_mmio_sz) readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
(i) * (m)->chan_mmio_sz)
#define I10NM_GET_AMAP(m, i) \ #define I10NM_GET_AMAP(m, i) \
readl((m)->mbase + 0x20814 + (i) * (m)->chan_mmio_sz) readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
(i) * (m)->chan_mmio_sz)
#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
#define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \ #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \
GET_BITFIELD(reg, 0, 10) + 1) << 12) GET_BITFIELD(reg, 0, 10) + 1) << 12)
#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \
((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
#define I10NM_HBM_IMC_MMIO_SIZE 0x9000
#define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30)
#define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29)
#define I10NM_MAX_SAD 16
#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
static struct list_head *i10nm_edac_list; static struct list_head *i10nm_edac_list;
...@@ -63,7 +83,32 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, ...@@ -63,7 +83,32 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
return pdev; return pdev;
} }
static int i10nm_get_all_munits(void) static bool i10nm_check_2lm(struct res_config *cfg)
{
struct skx_dev *d;
u32 reg;
int i;
list_for_each_entry(d, i10nm_edac_list, list) {
d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1],
PCI_SLOT(cfg->sad_all_devfn),
PCI_FUNC(cfg->sad_all_devfn));
if (!d->sad_all)
continue;
for (i = 0; i < I10NM_MAX_SAD; i++) {
I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
edac_dbg(2, "2-level memory configuration.\n");
return true;
}
}
}
return false;
}
static int i10nm_get_ddr_munits(void)
{ {
struct pci_dev *mdev; struct pci_dev *mdev;
void __iomem *mbase; void __iomem *mbase;
...@@ -91,7 +136,7 @@ static int i10nm_get_all_munits(void) ...@@ -91,7 +136,7 @@ static int i10nm_get_all_munits(void)
edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n", edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
j++, base, reg); j++, base, reg);
for (i = 0; i < I10NM_NUM_IMC; i++) { for (i = 0; i < I10NM_NUM_DDR_IMC; i++) {
mdev = pci_get_dev_wrapper(d->seg, d->bus[0], mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
12 + i, 0); 12 + i, 0);
if (i == 0 && !mdev) { if (i == 0 && !mdev) {
...@@ -127,11 +172,97 @@ static int i10nm_get_all_munits(void) ...@@ -127,11 +172,97 @@ static int i10nm_get_all_munits(void)
return 0; return 0;
} }
static bool i10nm_check_hbm_imc(struct skx_dev *d)
{
u32 reg;
if (I10NM_GET_CAPID3_CFG(d, reg)) {
i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
return false;
}
return I10NM_IS_HBM_PRESENT(reg) != 0;
}
static int i10nm_get_hbm_munits(void)
{
struct pci_dev *mdev;
void __iomem *mbase;
u32 reg, off, mcmtr;
struct skx_dev *d;
int i, lmc;
u64 base;
list_for_each_entry(d, i10nm_edac_list, list) {
d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3);
if (!d->pcu_cr3)
return -ENODEV;
if (!i10nm_check_hbm_imc(d)) {
i10nm_printk(KERN_DEBUG, "No hbm memory\n");
return -ENODEV;
}
if (I10NM_GET_SCK_BAR(d, reg)) {
i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
return -ENODEV;
}
base = I10NM_GET_SCK_MMIO_BASE(reg);
if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
return -ENODEV;
}
base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
lmc = I10NM_NUM_DDR_IMC;
for (i = 0; i < I10NM_NUM_HBM_IMC; i++) {
mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
12 + i / 4, 1 + i % 4);
if (i == 0 && !mdev) {
i10nm_printk(KERN_ERR, "No hbm mc found\n");
return -ENODEV;
}
if (!mdev)
continue;
d->imc[lmc].mdev = mdev;
off = i * I10NM_HBM_IMC_MMIO_SIZE;
edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
if (!mbase) {
i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
base + off);
return -ENOMEM;
}
d->imc[lmc].mbase = mbase;
d->imc[lmc].hbm_mc = true;
mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
if (!I10NM_IS_HBM_IMC(mcmtr)) {
i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
return -ENODEV;
}
lmc++;
}
}
return 0;
}
static struct res_config i10nm_cfg0 = { static struct res_config i10nm_cfg0 = {
.type = I10NM, .type = I10NM,
.decs_did = 0x3452, .decs_did = 0x3452,
.busno_cfg_offset = 0xcc, .busno_cfg_offset = 0xcc,
.ddr_chan_mmio_sz = 0x4000, .ddr_chan_mmio_sz = 0x4000,
.sad_all_devfn = PCI_DEVFN(29, 0),
.sad_all_offset = 0x108,
}; };
static struct res_config i10nm_cfg1 = { static struct res_config i10nm_cfg1 = {
...@@ -139,6 +270,8 @@ static struct res_config i10nm_cfg1 = { ...@@ -139,6 +270,8 @@ static struct res_config i10nm_cfg1 = {
.decs_did = 0x3452, .decs_did = 0x3452,
.busno_cfg_offset = 0xd0, .busno_cfg_offset = 0xd0,
.ddr_chan_mmio_sz = 0x4000, .ddr_chan_mmio_sz = 0x4000,
.sad_all_devfn = PCI_DEVFN(29, 0),
.sad_all_offset = 0x108,
}; };
static struct res_config spr_cfg = { static struct res_config spr_cfg = {
...@@ -146,7 +279,10 @@ static struct res_config spr_cfg = { ...@@ -146,7 +279,10 @@ static struct res_config spr_cfg = {
.decs_did = 0x3252, .decs_did = 0x3252,
.busno_cfg_offset = 0xd0, .busno_cfg_offset = 0xd0,
.ddr_chan_mmio_sz = 0x8000, .ddr_chan_mmio_sz = 0x8000,
.hbm_chan_mmio_sz = 0x4000,
.support_ddr5 = true, .support_ddr5 = true,
.sad_all_devfn = PCI_DEVFN(10, 0),
.sad_all_offset = 0x300,
}; };
static const struct x86_cpu_id i10nm_cpuids[] = { static const struct x86_cpu_id i10nm_cpuids[] = {
...@@ -179,13 +315,13 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, ...@@ -179,13 +315,13 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
struct dimm_info *dimm; struct dimm_info *dimm;
int i, j, ndimms; int i, j, ndimms;
for (i = 0; i < I10NM_NUM_CHANNELS; i++) { for (i = 0; i < imc->num_channels; i++) {
if (!imc->mbase) if (!imc->mbase)
continue; continue;
ndimms = 0; ndimms = 0;
amap = I10NM_GET_AMAP(imc, i); amap = I10NM_GET_AMAP(imc, i);
for (j = 0; j < I10NM_NUM_DIMMS; j++) { for (j = 0; j < imc->num_dimms; j++) {
dimm = edac_get_dimm(mci, i, j, 0); dimm = edac_get_dimm(mci, i, j, 0);
mtr = I10NM_GET_DIMMMTR(imc, i, j); mtr = I10NM_GET_DIMMMTR(imc, i, j);
mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j); mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
...@@ -278,6 +414,9 @@ static int __init i10nm_init(void) ...@@ -278,6 +414,9 @@ static int __init i10nm_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY; return -EBUSY;
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
return -ENODEV;
id = x86_match_cpu(i10nm_cpuids); id = x86_match_cpu(i10nm_cpuids);
if (!id) if (!id)
return -ENODEV; return -ENODEV;
...@@ -296,8 +435,11 @@ static int __init i10nm_init(void) ...@@ -296,8 +435,11 @@ static int __init i10nm_init(void)
return -ENODEV; return -ENODEV;
} }
rc = i10nm_get_all_munits(); skx_set_mem_cfg(i10nm_check_2lm(cfg));
if (rc < 0)
rc = i10nm_get_ddr_munits();
if (i10nm_get_hbm_munits() && rc)
goto fail; goto fail;
list_for_each_entry(d, i10nm_edac_list, list) { list_for_each_entry(d, i10nm_edac_list, list) {
...@@ -318,7 +460,15 @@ static int __init i10nm_init(void) ...@@ -318,7 +460,15 @@ static int __init i10nm_init(void)
d->imc[i].lmc = i; d->imc[i].lmc = i;
d->imc[i].src_id = src_id; d->imc[i].src_id = src_id;
d->imc[i].node_id = node_id; d->imc[i].node_id = node_id;
d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; if (d->imc[i].hbm_mc) {
d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS;
d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS;
} else {
d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS;
d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS;
}
rc = skx_register_mci(&d->imc[i], d->imc[i].mdev, rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
"Intel_10nm Socket", EDAC_MOD_STR, "Intel_10nm Socket", EDAC_MOD_STR,
......
This diff is collapsed.
...@@ -1554,6 +1554,9 @@ static int __init pnd2_init(void) ...@@ -1554,6 +1554,9 @@ static int __init pnd2_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY; return -EBUSY;
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
return -ENODEV;
id = x86_match_cpu(pnd2_cpuids); id = x86_match_cpu(pnd2_cpuids);
if (!id) if (!id)
return -ENODEV; return -ENODEV;
......
...@@ -3510,6 +3510,9 @@ static int __init sbridge_init(void) ...@@ -3510,6 +3510,9 @@ static int __init sbridge_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY; return -EBUSY;
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
return -ENODEV;
id = x86_match_cpu(sbridge_cpuids); id = x86_match_cpu(sbridge_cpuids);
if (!id) if (!id)
return -ENODEV; return -ENODEV;
......
...@@ -656,6 +656,9 @@ static int __init skx_init(void) ...@@ -656,6 +656,9 @@ static int __init skx_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY; return -EBUSY;
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
return -ENODEV;
id = x86_match_cpu(skx_cpuids); id = x86_match_cpu(skx_cpuids);
if (!id) if (!id)
return -ENODEV; return -ENODEV;
......
...@@ -23,10 +23,13 @@ ...@@ -23,10 +23,13 @@
#include "skx_common.h" #include "skx_common.h"
static const char * const component_names[] = { static const char * const component_names[] = {
[INDEX_SOCKET] = "ProcessorSocketId", [INDEX_SOCKET] = "ProcessorSocketId",
[INDEX_MEMCTRL] = "MemoryControllerId", [INDEX_MEMCTRL] = "MemoryControllerId",
[INDEX_CHANNEL] = "ChannelId", [INDEX_CHANNEL] = "ChannelId",
[INDEX_DIMM] = "DimmSlotId", [INDEX_DIMM] = "DimmSlotId",
[INDEX_NM_MEMCTRL] = "NmMemoryControllerId",
[INDEX_NM_CHANNEL] = "NmChannelId",
[INDEX_NM_DIMM] = "NmDimmSlotId",
}; };
static int component_indices[ARRAY_SIZE(component_names)]; static int component_indices[ARRAY_SIZE(component_names)];
...@@ -34,12 +37,14 @@ static int adxl_component_count; ...@@ -34,12 +37,14 @@ static int adxl_component_count;
static const char * const *adxl_component_names; static const char * const *adxl_component_names;
static u64 *adxl_values; static u64 *adxl_values;
static char *adxl_msg; static char *adxl_msg;
static unsigned long adxl_nm_bitmap;
static char skx_msg[MSG_SIZE]; static char skx_msg[MSG_SIZE];
static skx_decode_f skx_decode; static skx_decode_f skx_decode;
static skx_show_retry_log_f skx_show_retry_rd_err_log; static skx_show_retry_log_f skx_show_retry_rd_err_log;
static u64 skx_tolm, skx_tohm; static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list); static LIST_HEAD(dev_edac_list);
static bool skx_mem_cfg_2lm;
int __init skx_adxl_get(void) int __init skx_adxl_get(void)
{ {
...@@ -56,14 +61,25 @@ int __init skx_adxl_get(void) ...@@ -56,14 +61,25 @@ int __init skx_adxl_get(void)
for (j = 0; names[j]; j++) { for (j = 0; names[j]; j++) {
if (!strcmp(component_names[i], names[j])) { if (!strcmp(component_names[i], names[j])) {
component_indices[i] = j; component_indices[i] = j;
if (i >= INDEX_NM_FIRST)
adxl_nm_bitmap |= 1 << i;
break; break;
} }
} }
if (!names[j]) if (!names[j] && i < INDEX_NM_FIRST)
goto err; goto err;
} }
if (skx_mem_cfg_2lm) {
if (!adxl_nm_bitmap)
skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n");
else
edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap);
}
adxl_component_names = names; adxl_component_names = names;
while (*names++) while (*names++)
adxl_component_count++; adxl_component_count++;
...@@ -99,7 +115,7 @@ void __exit skx_adxl_put(void) ...@@ -99,7 +115,7 @@ void __exit skx_adxl_put(void)
kfree(adxl_msg); kfree(adxl_msg);
} }
static bool skx_adxl_decode(struct decoded_addr *res) static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
{ {
struct skx_dev *d; struct skx_dev *d;
int i, len = 0; int i, len = 0;
...@@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res) ...@@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res)
} }
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; if (error_in_1st_level_mem) {
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
(int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ?
(int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
} else {
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
}
if (res->imc > NUM_IMC - 1) { if (res->imc > NUM_IMC - 1 || res->imc < 0) {
skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
return false; return false;
} }
...@@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res) ...@@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res)
return true; return true;
} }
void skx_set_mem_cfg(bool mem_cfg_2lm)
{
skx_mem_cfg_2lm = mem_cfg_2lm;
}
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
{ {
skx_decode = decode; skx_decode = decode;
...@@ -313,9 +343,9 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, ...@@ -313,9 +343,9 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
ranks = numrank(mtr); ranks = numrank(mtr);
rows = numrow(mtr); rows = numrow(mtr);
cols = numcol(mtr); cols = imc->hbm_mc ? 6 : numcol(mtr);
if (cfg->support_ddr5 && (amap & 0x8)) { if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) {
banks = 32; banks = 32;
mtype = MEM_DDR5; mtype = MEM_DDR5;
} else { } else {
...@@ -344,8 +374,13 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, ...@@ -344,8 +374,13 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
dimm->dtype = get_width(mtr); dimm->dtype = get_width(mtr);
dimm->mtype = mtype; dimm->mtype = mtype;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */ dimm->edac_mode = EDAC_SECDED; /* likely better than this */
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
imc->src_id, imc->lmc, chan, dimmno); if (imc->hbm_mc)
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u",
imc->src_id, imc->lmc, chan);
else
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
imc->src_id, imc->lmc, chan, dimmno);
return 1; return 1;
} }
...@@ -578,6 +613,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, ...@@ -578,6 +613,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
optype, skx_msg); optype, skx_msg);
} }
static bool skx_error_in_1st_level_mem(const struct mce *m)
{
u32 errcode;
if (!skx_mem_cfg_2lm)
return false;
errcode = GET_BITFIELD(m->status, 0, 15);
if ((errcode & 0xef80) != 0x280)
return false;
return true;
}
int skx_mce_check_error(struct notifier_block *nb, unsigned long val, int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data) void *data)
{ {
...@@ -597,7 +647,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, ...@@ -597,7 +647,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
res.addr = mce->addr; res.addr = mce->addr;
if (adxl_component_count) { if (adxl_component_count) {
if (!skx_adxl_decode(&res)) if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
return NOTIFY_DONE; return NOTIFY_DONE;
} else if (!skx_decode || !skx_decode(&res)) { } else if (!skx_decode || !skx_decode(&res)) {
return NOTIFY_DONE; return NOTIFY_DONE;
...@@ -658,6 +708,8 @@ void skx_remove(void) ...@@ -658,6 +708,8 @@ void skx_remove(void)
} }
if (d->util_all) if (d->util_all)
pci_dev_put(d->util_all); pci_dev_put(d->util_all);
if (d->pcu_cr3)
pci_dev_put(d->pcu_cr3);
if (d->sad_all) if (d->sad_all)
pci_dev_put(d->sad_all); pci_dev_put(d->sad_all);
if (d->uracu) if (d->uracu)
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#ifndef _SKX_COMM_EDAC_H #ifndef _SKX_COMM_EDAC_H
#define _SKX_COMM_EDAC_H #define _SKX_COMM_EDAC_H
#include <linux/bits.h>
#define MSG_SIZE 1024 #define MSG_SIZE 1024
/* /*
...@@ -30,9 +32,17 @@ ...@@ -30,9 +32,17 @@
#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
#define I10NM_NUM_IMC 4 #define I10NM_NUM_DDR_IMC 4
#define I10NM_NUM_CHANNELS 2 #define I10NM_NUM_DDR_CHANNELS 2
#define I10NM_NUM_DIMMS 2 #define I10NM_NUM_DDR_DIMMS 2
#define I10NM_NUM_HBM_IMC 16
#define I10NM_NUM_HBM_CHANNELS 2
#define I10NM_NUM_HBM_DIMMS 1
#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b))
#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
...@@ -54,12 +64,16 @@ struct skx_dev { ...@@ -54,12 +64,16 @@ struct skx_dev {
struct pci_dev *sad_all; struct pci_dev *sad_all;
struct pci_dev *util_all; struct pci_dev *util_all;
struct pci_dev *uracu; /* for i10nm CPU */ struct pci_dev *uracu; /* for i10nm CPU */
struct pci_dev *pcu_cr3; /* for HBM memory detection */
u32 mcroute; u32 mcroute;
struct skx_imc { struct skx_imc {
struct mem_ctl_info *mci; struct mem_ctl_info *mci;
struct pci_dev *mdev; /* for i10nm CPU */ struct pci_dev *mdev; /* for i10nm CPU */
void __iomem *mbase; /* for i10nm CPU */ void __iomem *mbase; /* for i10nm CPU */
int chan_mmio_sz; /* for i10nm CPU */ int chan_mmio_sz; /* for i10nm CPU */
int num_channels; /* channels per memory controller */
int num_dimms; /* dimms per channel */
bool hbm_mc;
u8 mc; /* system wide mc# */ u8 mc; /* system wide mc# */
u8 lmc; /* socket relative mc# */ u8 lmc; /* socket relative mc# */
u8 src_id, node_id; u8 src_id, node_id;
...@@ -92,9 +106,17 @@ enum { ...@@ -92,9 +106,17 @@ enum {
INDEX_MEMCTRL, INDEX_MEMCTRL,
INDEX_CHANNEL, INDEX_CHANNEL,
INDEX_DIMM, INDEX_DIMM,
INDEX_NM_FIRST,
INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
INDEX_NM_CHANNEL,
INDEX_NM_DIMM,
INDEX_MAX INDEX_MAX
}; };
#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
struct decoded_addr { struct decoded_addr {
struct skx_dev *dev; struct skx_dev *dev;
u64 addr; u64 addr;
...@@ -122,7 +144,12 @@ struct res_config { ...@@ -122,7 +144,12 @@ struct res_config {
int busno_cfg_offset; int busno_cfg_offset;
/* Per DDR channel memory-mapped I/O size */ /* Per DDR channel memory-mapped I/O size */
int ddr_chan_mmio_sz; int ddr_chan_mmio_sz;
/* Per HBM channel memory-mapped I/O size */
int hbm_chan_mmio_sz;
bool support_ddr5; bool support_ddr5;
/* SAD device number and function number */
unsigned int sad_all_devfn;
int sad_all_offset;
}; };
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
...@@ -133,6 +160,7 @@ typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int le ...@@ -133,6 +160,7 @@ typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int le
int __init skx_adxl_get(void); int __init skx_adxl_get(void);
void __exit skx_adxl_put(void); void __exit skx_adxl_put(void);
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
void skx_set_mem_cfg(bool mem_cfg_2lm);
int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
int skx_get_node_id(struct skx_dev *d, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id);
......
...@@ -1368,7 +1368,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev, ...@@ -1368,7 +1368,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev,
name, 1, "CCPI", 1, name, 1, "CCPI", 1,
0, NULL, 0, idx); 0, NULL, 0, idx);
if (!edac_dev) { if (!edac_dev) {
dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret); dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
return -ENOMEM; return -ENOMEM;
} }
ocx = edac_dev->pvt_info; ocx = edac_dev->pvt_info;
...@@ -1380,7 +1380,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev, ...@@ -1380,7 +1380,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev,
ocx->regs = pcim_iomap_table(pdev)[0]; ocx->regs = pcim_iomap_table(pdev)[0];
if (!ocx->regs) { if (!ocx->regs) {
dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); dev_err(&pdev->dev, "Cannot map PCI resources\n");
ret = -ENODEV; ret = -ENODEV;
goto err_free; goto err_free;
} }
......
...@@ -197,6 +197,7 @@ static const struct of_device_id ti_edac_of_match[] = { ...@@ -197,6 +197,7 @@ static const struct of_device_id ti_edac_of_match[] = {
{ .compatible = "ti,emif-dra7xx", .data = (void *)EMIF_TYPE_DRA7 }, { .compatible = "ti,emif-dra7xx", .data = (void *)EMIF_TYPE_DRA7 },
{}, {},
}; };
MODULE_DEVICE_TABLE(of, ti_edac_of_match);
static int _emif_get_id(struct device_node *node) static int _emif_get_id(struct device_node *node)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment