Commit 66fed2d4 authored by Borislav Petkov's avatar Borislav Petkov Committed by Borislav Petkov

amd64_edac: Improve error injection

When injecting DRAM ECC errors over the F3xB[8,C] interface, the machine
does this by injecting the error in the next non-cached access. This
takes relatively long time on a normal system so that in order for us to
expedite it, we disable the caches around the injection.
Signed-off-by: default avatarBorislav Petkov <borislav.petkov@amd.com>
parent 6e71a870
...@@ -60,7 +60,7 @@ struct scrubrate { ...@@ -60,7 +60,7 @@ struct scrubrate {
{ 0x00, 0UL}, /* scrubbing off */ { 0x00, 0UL}, /* scrubbing off */
}; };
static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 *val, const char *func) u32 *val, const char *func)
{ {
int err = 0; int err = 0;
...@@ -1980,11 +1980,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) ...@@ -1980,11 +1980,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
struct mce *m) struct mce *m)
{ {
u16 ec = EC(m->status);
u8 xec = XEC(m->status, 0x1f);
u8 ecc_type = (m->status >> 45) & 0x3; u8 ecc_type = (m->status >> 45) & 0x3;
u8 xec = XEC(m->status, 0x1f);
u16 ec = EC(m->status);
/* Bail early out if this was an 'observed' error */ /* Bail out early if this was an 'observed' error */
if (PP(ec) == NBSL_PP_OBS) if (PP(ec) == NBSL_PP_OBS)
return; return;
......
...@@ -273,9 +273,10 @@ ...@@ -273,9 +273,10 @@
#define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1) #define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1)
#define F10_NB_ARRAY_DATA 0xBC #define F10_NB_ARRAY_DATA 0xBC
#define F10_NB_ARR_ECC_WR_REQ BIT(17)
#define SET_NB_DRAM_INJECTION_WRITE(inj) \ #define SET_NB_DRAM_INJECTION_WRITE(inj) \
(BIT(((inj.word) & 0xF) + 20) | \ (BIT(((inj.word) & 0xF) + 20) | \
BIT(17) | inj.bit_map) F10_NB_ARR_ECC_WR_REQ | inj.bit_map)
#define SET_NB_DRAM_INJECTION_READ(inj) \ #define SET_NB_DRAM_INJECTION_READ(inj) \
(BIT(((inj.word) & 0xF) + 20) | \ (BIT(((inj.word) & 0xF) + 20) | \
BIT(16) | inj.bit_map) BIT(16) | inj.bit_map)
...@@ -460,6 +461,8 @@ struct amd64_family_type { ...@@ -460,6 +461,8 @@ struct amd64_family_type {
struct low_ops ops; struct low_ops ops;
}; };
int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 *val, const char *func);
int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 val, const char *func); u32 val, const char *func);
...@@ -476,3 +479,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, ...@@ -476,3 +479,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size); u64 *hole_offset, u64 *hole_size);
#define to_mci(k) container_of(k, struct mem_ctl_info, dev) #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
/* Injection helpers */
static inline void disable_caches(void *dummy)
{
write_cr0(read_cr0() | X86_CR0_CD);
wbinvd();
}
static inline void enable_caches(void *dummy)
{
write_cr0(read_cr0() & ~X86_CR0_CD);
}
...@@ -153,8 +153,8 @@ static ssize_t amd64_inject_write_store(struct device *dev, ...@@ -153,8 +153,8 @@ static ssize_t amd64_inject_write_store(struct device *dev,
{ {
struct mem_ctl_info *mci = to_mci(dev); struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
u32 section, word_bits, tmp;
unsigned long value; unsigned long value;
u32 section, word_bits;
int ret; int ret;
ret = strict_strtoul(data, 10, &value); ret = strict_strtoul(data, 10, &value);
...@@ -168,9 +168,25 @@ static ssize_t amd64_inject_write_store(struct device *dev, ...@@ -168,9 +168,25 @@ static ssize_t amd64_inject_write_store(struct device *dev,
word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection); word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
pr_notice_once("Don't forget to decrease MCE polling interval in\n"
"/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
"so that you can get the error report faster.\n");
on_each_cpu(disable_caches, NULL, 1);
/* Issue 'word' and 'bit' along with the READ request */ /* Issue 'word' and 'bit' along with the READ request */
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
retry:
/* wait until injection happens */
amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
if (tmp & F10_NB_ARR_ECC_WR_REQ) {
cpu_relax();
goto retry;
}
on_each_cpu(enable_caches, NULL, 1);
edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
return count; return count;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment