Commit 05b5fdb2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'edac_updates_for_v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:
 "The usual EDAC stuff which managed to trickle in for 5.15:

   - Add new HBM2 (High Bandwidth Memory Gen 2) type and add support for
     it to the Intel SKx drivers

   - Print additional useful per-channel error information on i10nm,
     like on SKL

   - Don't load the AMD EDAC decoder in virtual images

   - The usual round of fixes and cleanups"

* tag 'edac_updates_for_v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/i10nm: Retrieve and print retry_rd_err_log registers
  EDAC/i10nm: Fix NVDIMM detection
  EDAC/skx_common: Set the memory type correctly for HBM memory
  EDAC/altera: Skip defining unused structures for specific configs
  EDAC/mce_amd: Do not load edac_mce_amd module on guests
  EDAC/mc: Add new HBM2 memory type
  EDAC/amd64: Use DEVICE_ATTR helper macros
parents c7a5238e cf4e6d52
...@@ -539,10 +539,18 @@ module_platform_driver(altr_edac_driver); ...@@ -539,10 +539,18 @@ module_platform_driver(altr_edac_driver);
* trigger testing are different for each memory. * trigger testing are different for each memory.
*/ */
#ifdef CONFIG_EDAC_ALTERA_OCRAM
static const struct edac_device_prv_data ocramecc_data; static const struct edac_device_prv_data ocramecc_data;
#endif
#ifdef CONFIG_EDAC_ALTERA_L2C
static const struct edac_device_prv_data l2ecc_data; static const struct edac_device_prv_data l2ecc_data;
#endif
#ifdef CONFIG_EDAC_ALTERA_OCRAM
static const struct edac_device_prv_data a10_ocramecc_data; static const struct edac_device_prv_data a10_ocramecc_data;
#endif
#ifdef CONFIG_EDAC_ALTERA_L2C
static const struct edac_device_prv_data a10_l2ecc_data; static const struct edac_device_prv_data a10_l2ecc_data;
#endif
static irqreturn_t altr_edac_device_handler(int irq, void *dev_id) static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
{ {
...@@ -569,9 +577,9 @@ static irqreturn_t altr_edac_device_handler(int irq, void *dev_id) ...@@ -569,9 +577,9 @@ static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
return ret_value; return ret_value;
} }
static ssize_t altr_edac_device_trig(struct file *file, static ssize_t __maybe_unused
const char __user *user_buf, altr_edac_device_trig(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
u32 *ptemp, i, error_mask; u32 *ptemp, i, error_mask;
...@@ -640,27 +648,27 @@ static ssize_t altr_edac_device_trig(struct file *file, ...@@ -640,27 +648,27 @@ static ssize_t altr_edac_device_trig(struct file *file,
return count; return count;
} }
static const struct file_operations altr_edac_device_inject_fops = { static const struct file_operations altr_edac_device_inject_fops __maybe_unused = {
.open = simple_open, .open = simple_open,
.write = altr_edac_device_trig, .write = altr_edac_device_trig,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
}; };
static ssize_t altr_edac_a10_device_trig(struct file *file, static ssize_t __maybe_unused
const char __user *user_buf, altr_edac_a10_device_trig(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos); size_t count, loff_t *ppos);
static const struct file_operations altr_edac_a10_device_inject_fops = { static const struct file_operations altr_edac_a10_device_inject_fops __maybe_unused = {
.open = simple_open, .open = simple_open,
.write = altr_edac_a10_device_trig, .write = altr_edac_a10_device_trig,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
}; };
static ssize_t altr_edac_a10_device_trig2(struct file *file, static ssize_t __maybe_unused
const char __user *user_buf, altr_edac_a10_device_trig2(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos); size_t count, loff_t *ppos);
static const struct file_operations altr_edac_a10_device_inject2_fops = { static const struct file_operations altr_edac_a10_device_inject2_fops __maybe_unused = {
.open = simple_open, .open = simple_open,
.write = altr_edac_a10_device_trig2, .write = altr_edac_a10_device_trig2,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
...@@ -1697,9 +1705,9 @@ MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match); ...@@ -1697,9 +1705,9 @@ MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match);
* Based on xgene_edac.c peripheral code. * Based on xgene_edac.c peripheral code.
*/ */
static ssize_t altr_edac_a10_device_trig(struct file *file, static ssize_t __maybe_unused
const char __user *user_buf, altr_edac_a10_device_trig(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
struct edac_device_ctl_info *edac_dci = file->private_data; struct edac_device_ctl_info *edac_dci = file->private_data;
struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
...@@ -1729,9 +1737,9 @@ static ssize_t altr_edac_a10_device_trig(struct file *file, ...@@ -1729,9 +1737,9 @@ static ssize_t altr_edac_a10_device_trig(struct file *file,
* slightly. A few Arria10 peripherals can use this injection function. * slightly. A few Arria10 peripherals can use this injection function.
* Inject the error into the memory and then readback to trigger the IRQ. * Inject the error into the memory and then readback to trigger the IRQ.
*/ */
static ssize_t altr_edac_a10_device_trig2(struct file *file, static ssize_t __maybe_unused
const char __user *user_buf, altr_edac_a10_device_trig2(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
struct edac_device_ctl_info *edac_dci = file->private_data; struct edac_device_ctl_info *edac_dci = file->private_data;
struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
......
...@@ -571,8 +571,8 @@ EDAC_DCT_ATTR_SHOW(dbam0); ...@@ -571,8 +571,8 @@ EDAC_DCT_ATTR_SHOW(dbam0);
EDAC_DCT_ATTR_SHOW(top_mem); EDAC_DCT_ATTR_SHOW(top_mem);
EDAC_DCT_ATTR_SHOW(top_mem2); EDAC_DCT_ATTR_SHOW(top_mem2);
static ssize_t hole_show(struct device *dev, struct device_attribute *mattr, static ssize_t dram_hole_show(struct device *dev, struct device_attribute *mattr,
char *data) char *data)
{ {
struct mem_ctl_info *mci = to_mci(dev); struct mem_ctl_info *mci = to_mci(dev);
...@@ -593,7 +593,7 @@ static DEVICE_ATTR(dhar, S_IRUGO, dhar_show, NULL); ...@@ -593,7 +593,7 @@ static DEVICE_ATTR(dhar, S_IRUGO, dhar_show, NULL);
static DEVICE_ATTR(dbam, S_IRUGO, dbam0_show, NULL); static DEVICE_ATTR(dbam, S_IRUGO, dbam0_show, NULL);
static DEVICE_ATTR(topmem, S_IRUGO, top_mem_show, NULL); static DEVICE_ATTR(topmem, S_IRUGO, top_mem_show, NULL);
static DEVICE_ATTR(topmem2, S_IRUGO, top_mem2_show, NULL); static DEVICE_ATTR(topmem2, S_IRUGO, top_mem2_show, NULL);
static DEVICE_ATTR(dram_hole, S_IRUGO, hole_show, NULL); static DEVICE_ATTR_RO(dram_hole);
static struct attribute *dbg_attrs[] = { static struct attribute *dbg_attrs[] = {
&dev_attr_dhar.attr, &dev_attr_dhar.attr,
...@@ -802,16 +802,11 @@ static ssize_t inject_write_store(struct device *dev, ...@@ -802,16 +802,11 @@ static ssize_t inject_write_store(struct device *dev,
* update NUM_INJ_ATTRS in case you add new members * update NUM_INJ_ATTRS in case you add new members
*/ */
static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR, static DEVICE_ATTR_RW(inject_section);
inject_section_show, inject_section_store); static DEVICE_ATTR_RW(inject_word);
static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR, static DEVICE_ATTR_RW(inject_ecc_vector);
inject_word_show, inject_word_store); static DEVICE_ATTR_WO(inject_write);
static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR, static DEVICE_ATTR_WO(inject_read);
inject_ecc_vector_show, inject_ecc_vector_store);
static DEVICE_ATTR(inject_write, S_IWUSR,
NULL, inject_write_store);
static DEVICE_ATTR(inject_read, S_IWUSR,
NULL, inject_read_store);
static struct attribute *inj_attrs[] = { static struct attribute *inj_attrs[] = {
&dev_attr_inject_section.attr, &dev_attr_inject_section.attr,
......
...@@ -166,6 +166,7 @@ const char * const edac_mem_types[] = { ...@@ -166,6 +166,7 @@ const char * const edac_mem_types[] = {
[MEM_DDR5] = "Unbuffered-DDR5", [MEM_DDR5] = "Unbuffered-DDR5",
[MEM_NVDIMM] = "Non-volatile-RAM", [MEM_NVDIMM] = "Non-volatile-RAM",
[MEM_WIO2] = "Wide-IO-2", [MEM_WIO2] = "Wide-IO-2",
[MEM_HBM2] = "High-bandwidth-memory-Gen2",
}; };
EXPORT_SYMBOL_GPL(edac_mem_types); EXPORT_SYMBOL_GPL(edac_mem_types);
......
...@@ -33,15 +33,21 @@ ...@@ -33,15 +33,21 @@
#define I10NM_GET_DIMMMTR(m, i, j) \ #define I10NM_GET_DIMMMTR(m, i, j) \
readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \ readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
(i) * (m)->chan_mmio_sz + (j) * 4) (i) * (m)->chan_mmio_sz + (j) * 4)
#define I10NM_GET_MCDDRTCFG(m, i, j) \ #define I10NM_GET_MCDDRTCFG(m, i) \
readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \ readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
(i) * (m)->chan_mmio_sz + (j) * 4) (i) * (m)->chan_mmio_sz)
#define I10NM_GET_MCMTR(m, i) \ #define I10NM_GET_MCMTR(m, i) \
readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \ readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
(i) * (m)->chan_mmio_sz) (i) * (m)->chan_mmio_sz)
#define I10NM_GET_AMAP(m, i) \ #define I10NM_GET_AMAP(m, i) \
readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \ readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
(i) * (m)->chan_mmio_sz) (i) * (m)->chan_mmio_sz)
#define I10NM_GET_REG32(m, i, offset) \
readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
#define I10NM_GET_REG64(m, i, offset) \
readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
#define I10NM_SET_REG32(m, i, offset, v) \
writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
...@@ -58,8 +64,125 @@ ...@@ -58,8 +64,125 @@
#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
#define RETRY_RD_ERR_LOG_UC BIT(1)
#define RETRY_RD_ERR_LOG_NOOVER BIT(14)
#define RETRY_RD_ERR_LOG_EN BIT(15)
#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1))
#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0))
static struct list_head *i10nm_edac_list; static struct list_head *i10nm_edac_list;
static struct res_config *res_cfg;
static int retry_rd_err_log;
static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable)
{
u32 s, d;
if (!imc->mbase)
return;
s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]);
d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]);
if (enable) {
/* Save default configurations */
imc->chan[chan].retry_rd_err_log_s = s;
imc->chan[chan].retry_rd_err_log_d = d;
s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
s |= RETRY_RD_ERR_LOG_EN;
d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
d |= RETRY_RD_ERR_LOG_EN;
} else {
/* Restore default configurations */
if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
s |= RETRY_RD_ERR_LOG_UC;
if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
s |= RETRY_RD_ERR_LOG_NOOVER;
if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
s &= ~RETRY_RD_ERR_LOG_EN;
if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
d |= RETRY_RD_ERR_LOG_UC;
if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
d |= RETRY_RD_ERR_LOG_NOOVER;
if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
d &= ~RETRY_RD_ERR_LOG_EN;
}
I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s);
I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d);
}
static void enable_retry_rd_err_log(bool enable)
{
struct skx_dev *d;
int i, j;
edac_dbg(2, "\n");
list_for_each_entry(d, i10nm_edac_list, list)
for (i = 0; i < I10NM_NUM_IMC; i++)
for (j = 0; j < I10NM_NUM_CHANNELS; j++)
__enable_retry_rd_err_log(&d->imc[i], j, enable);
}
static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
int len, bool scrub_err)
{
struct skx_imc *imc = &res->dev->imc[res->imc];
u32 log0, log1, log2, log3, log4;
u32 corr0, corr1, corr2, corr3;
u64 log2a, log5;
u32 *offsets;
int n;
if (!imc->mbase)
return;
offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand;
log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
if (res_cfg->type == SPR) {
log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx]",
log0, log1, log2a, log3, log4, log5);
} else {
log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
log0, log1, log2, log3, log4, log5);
}
corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
if (len - n > 0)
snprintf(msg + n, len - n,
" correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
corr0 & 0xffff, corr0 >> 16,
corr1 & 0xffff, corr1 >> 16,
corr2 & 0xffff, corr2 >> 16,
corr3 & 0xffff, corr3 >> 16);
/* Clear status bits */
if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
}
}
static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
unsigned int dev, unsigned int fun) unsigned int dev, unsigned int fun)
{ {
...@@ -263,6 +386,8 @@ static struct res_config i10nm_cfg0 = { ...@@ -263,6 +386,8 @@ static struct res_config i10nm_cfg0 = {
.ddr_chan_mmio_sz = 0x4000, .ddr_chan_mmio_sz = 0x4000,
.sad_all_devfn = PCI_DEVFN(29, 0), .sad_all_devfn = PCI_DEVFN(29, 0),
.sad_all_offset = 0x108, .sad_all_offset = 0x108,
.offsets_scrub = offsets_scrub_icx,
.offsets_demand = offsets_demand_icx,
}; };
static struct res_config i10nm_cfg1 = { static struct res_config i10nm_cfg1 = {
...@@ -272,6 +397,8 @@ static struct res_config i10nm_cfg1 = { ...@@ -272,6 +397,8 @@ static struct res_config i10nm_cfg1 = {
.ddr_chan_mmio_sz = 0x4000, .ddr_chan_mmio_sz = 0x4000,
.sad_all_devfn = PCI_DEVFN(29, 0), .sad_all_devfn = PCI_DEVFN(29, 0),
.sad_all_offset = 0x108, .sad_all_offset = 0x108,
.offsets_scrub = offsets_scrub_icx,
.offsets_demand = offsets_demand_icx,
}; };
static struct res_config spr_cfg = { static struct res_config spr_cfg = {
...@@ -283,6 +410,8 @@ static struct res_config spr_cfg = { ...@@ -283,6 +410,8 @@ static struct res_config spr_cfg = {
.support_ddr5 = true, .support_ddr5 = true,
.sad_all_devfn = PCI_DEVFN(10, 0), .sad_all_devfn = PCI_DEVFN(10, 0),
.sad_all_offset = 0x300, .sad_all_offset = 0x300,
.offsets_scrub = offsets_scrub_spr,
.offsets_demand = offsets_demand_spr,
}; };
static const struct x86_cpu_id i10nm_cpuids[] = { static const struct x86_cpu_id i10nm_cpuids[] = {
...@@ -321,10 +450,10 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, ...@@ -321,10 +450,10 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
ndimms = 0; ndimms = 0;
amap = I10NM_GET_AMAP(imc, i); amap = I10NM_GET_AMAP(imc, i);
mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
for (j = 0; j < imc->num_dimms; j++) { for (j = 0; j < imc->num_dimms; j++) {
dimm = edac_get_dimm(mci, i, j, 0); dimm = edac_get_dimm(mci, i, j, 0);
mtr = I10NM_GET_DIMMMTR(imc, i, j); mtr = I10NM_GET_DIMMMTR(imc, i, j);
mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n", edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
mtr, mcddrtcfg, imc->mc, i, j); mtr, mcddrtcfg, imc->mc, i, j);
...@@ -422,6 +551,7 @@ static int __init i10nm_init(void) ...@@ -422,6 +551,7 @@ static int __init i10nm_init(void)
return -ENODEV; return -ENODEV;
cfg = (struct res_config *)id->driver_data; cfg = (struct res_config *)id->driver_data;
res_cfg = cfg;
rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
if (rc) if (rc)
...@@ -486,6 +616,12 @@ static int __init i10nm_init(void) ...@@ -486,6 +616,12 @@ static int __init i10nm_init(void)
mce_register_decode_chain(&i10nm_mce_dec); mce_register_decode_chain(&i10nm_mce_dec);
setup_i10nm_debug(); setup_i10nm_debug();
if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
skx_set_decode(NULL, show_retry_rd_err_log);
if (retry_rd_err_log == 2)
enable_retry_rd_err_log(true);
}
i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
return 0; return 0;
...@@ -497,6 +633,13 @@ static int __init i10nm_init(void) ...@@ -497,6 +633,13 @@ static int __init i10nm_init(void)
static void __exit i10nm_exit(void) static void __exit i10nm_exit(void)
{ {
edac_dbg(2, "\n"); edac_dbg(2, "\n");
if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
skx_set_decode(NULL, NULL);
if (retry_rd_err_log == 2)
enable_retry_rd_err_log(false);
}
teardown_i10nm_debug(); teardown_i10nm_debug();
mce_unregister_decode_chain(&i10nm_mce_dec); mce_unregister_decode_chain(&i10nm_mce_dec);
skx_adxl_put(); skx_adxl_put();
...@@ -506,5 +649,8 @@ static void __exit i10nm_exit(void) ...@@ -506,5 +649,8 @@ static void __exit i10nm_exit(void)
module_init(i10nm_init); module_init(i10nm_init);
module_exit(i10nm_exit); module_exit(i10nm_exit);
module_param(retry_rd_err_log, int, 0444);
MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors"); MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
...@@ -1246,6 +1246,9 @@ static int __init mce_amd_init(void) ...@@ -1246,6 +1246,9 @@ static int __init mce_amd_init(void)
c->x86_vendor != X86_VENDOR_HYGON) c->x86_vendor != X86_VENDOR_HYGON)
return -ENODEV; return -ENODEV;
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
return -ENODEV;
if (boot_cpu_has(X86_FEATURE_SMCA)) { if (boot_cpu_has(X86_FEATURE_SMCA)) {
xec_mask = 0x3f; xec_mask = 0x3f;
goto out; goto out;
......
...@@ -230,7 +230,8 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) ...@@ -230,7 +230,8 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
#define SKX_ILV_TARGET(tgt) ((tgt) & 7) #define SKX_ILV_TARGET(tgt) ((tgt) & 7)
static void skx_show_retry_rd_err_log(struct decoded_addr *res, static void skx_show_retry_rd_err_log(struct decoded_addr *res,
char *msg, int len) char *msg, int len,
bool scrub_err)
{ {
u32 log0, log1, log2, log3, log4; u32 log0, log1, log2, log3, log4;
u32 corr0, corr1, corr2, corr3; u32 corr0, corr1, corr2, corr3;
......
...@@ -345,7 +345,10 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, ...@@ -345,7 +345,10 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
rows = numrow(mtr); rows = numrow(mtr);
cols = imc->hbm_mc ? 6 : numcol(mtr); cols = imc->hbm_mc ? 6 : numcol(mtr);
if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) { if (imc->hbm_mc) {
banks = 32;
mtype = MEM_HBM2;
} else if (cfg->support_ddr5 && (amap & 0x8)) {
banks = 32; banks = 32;
mtype = MEM_DDR5; mtype = MEM_DDR5;
} else { } else {
...@@ -529,6 +532,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, ...@@ -529,6 +532,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62); bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
bool scrub_err = false;
bool recoverable; bool recoverable;
int len; int len;
u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
...@@ -580,6 +584,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, ...@@ -580,6 +584,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
break; break;
case 4: case 4:
optype = "memory scrubbing error"; optype = "memory scrubbing error";
scrub_err = true;
break; break;
default: default:
optype = "reserved"; optype = "reserved";
...@@ -602,7 +607,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, ...@@ -602,7 +607,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
} }
if (skx_show_retry_rd_err_log) if (skx_show_retry_rd_err_log)
skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len); skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err);
edac_dbg(0, "%s\n", skx_msg); edac_dbg(0, "%s\n", skx_msg);
......
...@@ -80,6 +80,8 @@ struct skx_dev { ...@@ -80,6 +80,8 @@ struct skx_dev {
struct skx_channel { struct skx_channel {
struct pci_dev *cdev; struct pci_dev *cdev;
struct pci_dev *edev; struct pci_dev *edev;
u32 retry_rd_err_log_s;
u32 retry_rd_err_log_d;
struct skx_dimm { struct skx_dimm {
u8 close_pg; u8 close_pg;
u8 bank_xor_enable; u8 bank_xor_enable;
...@@ -150,12 +152,15 @@ struct res_config { ...@@ -150,12 +152,15 @@ struct res_config {
/* SAD device number and function number */ /* SAD device number and function number */
unsigned int sad_all_devfn; unsigned int sad_all_devfn;
int sad_all_offset; int sad_all_offset;
/* Offsets of retry_rd_err_log registers */
u32 *offsets_scrub;
u32 *offsets_demand;
}; };
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
struct res_config *cfg); struct res_config *cfg);
typedef bool (*skx_decode_f)(struct decoded_addr *res); typedef bool (*skx_decode_f)(struct decoded_addr *res);
typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len); typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
int __init skx_adxl_get(void); int __init skx_adxl_get(void);
void __exit skx_adxl_put(void); void __exit skx_adxl_put(void);
......
...@@ -184,6 +184,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) ...@@ -184,6 +184,7 @@ static inline char *mc_event_error_type(const unsigned int err_type)
* @MEM_DDR5: Unbuffered DDR5 RAM * @MEM_DDR5: Unbuffered DDR5 RAM
* @MEM_NVDIMM: Non-volatile RAM * @MEM_NVDIMM: Non-volatile RAM
* @MEM_WIO2: Wide I/O 2. * @MEM_WIO2: Wide I/O 2.
* @MEM_HBM2: High bandwidth Memory Gen 2.
*/ */
enum mem_type { enum mem_type {
MEM_EMPTY = 0, MEM_EMPTY = 0,
...@@ -212,6 +213,7 @@ enum mem_type { ...@@ -212,6 +213,7 @@ enum mem_type {
MEM_DDR5, MEM_DDR5,
MEM_NVDIMM, MEM_NVDIMM,
MEM_WIO2, MEM_WIO2,
MEM_HBM2,
}; };
#define MEM_FLAG_EMPTY BIT(MEM_EMPTY) #define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
...@@ -239,6 +241,7 @@ enum mem_type { ...@@ -239,6 +241,7 @@ enum mem_type {
#define MEM_FLAG_DDR5 BIT(MEM_DDR5) #define MEM_FLAG_DDR5 BIT(MEM_DDR5)
#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM)
#define MEM_FLAG_WIO2 BIT(MEM_WIO2) #define MEM_FLAG_WIO2 BIT(MEM_WIO2)
#define MEM_FLAG_HBM2 BIT(MEM_HBM2)
/** /**
* enum edac_type - Error Detection and Correction capabilities and mode * enum edac_type - Error Detection and Correction capabilities and mode
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment