Commit fa45a45c authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'ras_for_3.21' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras

Pull RAS updates from Borislav Petkov:

 "- Enable AMD thresholding IRQ by default if supported. (Aravind Gopalakrishnan)

  - Unify mce_panic() message pattern. (Derek Che)

  - A bit more involved simplification of the CMCI logic after yet another
    report about race condition with the adaptive logic. (Borislav Petkov)

  - ACPI APEI EINJ fleshing out of the user documentation. (Borislav Petkov)

  - Minor cleanup. (Jan Beulich.)"
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents e07e0d4c d79f931f
This diff is collapsed.
...@@ -183,11 +183,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); ...@@ -183,11 +183,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
enum mcp_flags { enum mcp_flags {
MCP_TIMESTAMP = (1 << 0), /* log time stamp */ MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = (1 << 1), /* log uncorrected errors */ MCP_UC = BIT(1), /* log uncorrected errors */
MCP_DONTLOG = (1 << 2), /* only clear, don't log */ MCP_DONTLOG = BIT(2), /* only clear, don't log */
}; };
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
int mce_notify_irq(void); int mce_notify_irq(void);
......
...@@ -14,6 +14,7 @@ enum severity_level { ...@@ -14,6 +14,7 @@ enum severity_level {
}; };
#define ATTR_LEN 16 #define ATTR_LEN 16
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
/* One object for each MCE bank, shared by all CPUs */ /* One object for each MCE bank, shared by all CPUs */
struct mce_bank { struct mce_bank {
...@@ -30,13 +31,13 @@ extern struct mce_bank *mce_banks; ...@@ -30,13 +31,13 @@ extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled; extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL #ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval); unsigned long cmci_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void); bool mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu); void mce_intel_hcpu_update(unsigned long cpu);
void cmci_disable_bank(int bank); void cmci_disable_bank(int bank);
#else #else
# define mce_intel_adjust_timer mce_adjust_timer_default # define cmci_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { } static inline bool mce_intel_cmci_poll(void) { return false; }
static inline void mce_intel_hcpu_update(unsigned long cpu) { } static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { } static inline void cmci_disable_bank(int bank) { }
#endif #endif
......
...@@ -59,7 +59,7 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); ...@@ -59,7 +59,7 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/mce.h> #include <trace/events/mce.h>
#define SPINUNIT 100 /* 100ns */ #define SPINUNIT 100 /* 100ns */
DEFINE_PER_CPU(unsigned, mce_exception_count); DEFINE_PER_CPU(unsigned, mce_exception_count);
...@@ -88,9 +88,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); ...@@ -88,9 +88,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
static DEFINE_PER_CPU(struct mce, mces_seen); static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing; static int cpu_missing;
/* CMCI storm detection filter */
static DEFINE_PER_CPU(unsigned long, mce_polled_error);
/* /*
* MCA banks polled by the period polling timer for corrected events. * MCA banks polled by the period polling timer for corrected events.
* With Intel CMCI, this only has MCA banks which do not support CMCI (if any). * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
...@@ -624,8 +621,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); ...@@ -624,8 +621,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
* is already totally * confused. In this case it's likely it will * is already totally * confused. In this case it's likely it will
* not fully execute the machine check handler either. * not fully execute the machine check handler either.
*/ */
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{ {
bool error_logged = false;
struct mce m; struct mce m;
int severity; int severity;
int i; int i;
...@@ -648,7 +646,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -648,7 +646,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!(m.status & MCI_STATUS_VAL)) if (!(m.status & MCI_STATUS_VAL))
continue; continue;
this_cpu_write(mce_polled_error, 1);
/* /*
* Uncorrected or signalled events are handled by the exception * Uncorrected or signalled events are handled by the exception
* handler when it is enabled, so don't process those here. * handler when it is enabled, so don't process those here.
...@@ -681,8 +679,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -681,8 +679,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to * Don't get the IP here because it's unlikely to
* have anything to do with the actual error location. * have anything to do with the actual error location.
*/ */
if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
error_logged = true;
mce_log(&m); mce_log(&m);
}
/* /*
* Clear state for this bank. * Clear state for this bank.
...@@ -696,6 +696,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -696,6 +696,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
*/ */
sync_core(); sync_core();
return error_logged;
} }
EXPORT_SYMBOL_GPL(machine_check_poll); EXPORT_SYMBOL_GPL(machine_check_poll);
...@@ -815,7 +817,7 @@ static void mce_reign(void) ...@@ -815,7 +817,7 @@ static void mce_reign(void)
* other CPUs. * other CPUs.
*/ */
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Fatal Machine check", m, msg); mce_panic("Fatal machine check", m, msg);
/* /*
* For UC somewhere we let the CPU who detects it handle it. * For UC somewhere we let the CPU who detects it handle it.
...@@ -828,7 +830,7 @@ static void mce_reign(void) ...@@ -828,7 +830,7 @@ static void mce_reign(void)
* source or one CPU is hung. Panic. * source or one CPU is hung. Panic.
*/ */
if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3) if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Machine check from unknown source", NULL, NULL); mce_panic("Fatal machine check from unknown source", NULL, NULL);
/* /*
* Now clear all the mces_seen so that they don't reappear on * Now clear all the mces_seen so that they don't reappear on
...@@ -1260,7 +1262,7 @@ void mce_log_therm_throt_event(__u64 status) ...@@ -1260,7 +1262,7 @@ void mce_log_therm_throt_event(__u64 status)
* poller finds an MCE, poll 2x faster. When the poller finds no more * poller finds an MCE, poll 2x faster. When the poller finds no more
* errors, poll 2x slower (up to check_interval seconds). * errors, poll 2x slower (up to check_interval seconds).
*/ */
static unsigned long check_interval = 5 * 60; /* 5 minutes */ static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer); static DEFINE_PER_CPU(struct timer_list, mce_timer);
...@@ -1270,49 +1272,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) ...@@ -1270,49 +1272,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
return interval; return interval;
} }
static unsigned long (*mce_adjust_timer)(unsigned long interval) = static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
mce_adjust_timer_default;
static int cmc_error_seen(void) static void __restart_timer(struct timer_list *t, unsigned long interval)
{ {
unsigned long *v = this_cpu_ptr(&mce_polled_error); unsigned long when = jiffies + interval;
unsigned long flags;
local_irq_save(flags);
if (timer_pending(t)) {
if (time_before(when, t->expires))
mod_timer_pinned(t, when);
} else {
t->expires = round_jiffies(when);
add_timer_on(t, smp_processor_id());
}
return test_and_clear_bit(0, v); local_irq_restore(flags);
} }
static void mce_timer_fn(unsigned long data) static void mce_timer_fn(unsigned long data)
{ {
struct timer_list *t = this_cpu_ptr(&mce_timer); struct timer_list *t = this_cpu_ptr(&mce_timer);
int cpu = smp_processor_id();
unsigned long iv; unsigned long iv;
int notify;
WARN_ON(smp_processor_id() != data); WARN_ON(cpu != data);
iv = __this_cpu_read(mce_next_interval);
if (mce_available(this_cpu_ptr(&cpu_info))) { if (mce_available(this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP, machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
this_cpu_ptr(&mce_poll_banks));
mce_intel_cmci_poll(); if (mce_intel_cmci_poll()) {
iv = mce_adjust_timer(iv);
goto done;
}
} }
/* /*
* Alert userspace if needed. If we logged an MCE, reduce the * Alert userspace if needed. If we logged an MCE, reduce the polling
* polling interval, otherwise increase the polling interval. * interval, otherwise increase the polling interval.
*/ */
iv = __this_cpu_read(mce_next_interval); if (mce_notify_irq())
notify = mce_notify_irq();
notify |= cmc_error_seen();
if (notify) {
iv = max(iv / 2, (unsigned long) HZ/100); iv = max(iv / 2, (unsigned long) HZ/100);
} else { else
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
iv = mce_adjust_timer(iv);
} done:
__this_cpu_write(mce_next_interval, iv); __this_cpu_write(mce_next_interval, iv);
/* Might have become 0 after CMCI storm subsided */ __restart_timer(t, iv);
if (iv) {
t->expires = jiffies + iv;
add_timer_on(t, smp_processor_id());
}
} }
/* /*
...@@ -1321,16 +1331,10 @@ static void mce_timer_fn(unsigned long data) ...@@ -1321,16 +1331,10 @@ static void mce_timer_fn(unsigned long data)
void mce_timer_kick(unsigned long interval) void mce_timer_kick(unsigned long interval)
{ {
struct timer_list *t = this_cpu_ptr(&mce_timer); struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned long when = jiffies + interval;
unsigned long iv = __this_cpu_read(mce_next_interval); unsigned long iv = __this_cpu_read(mce_next_interval);
if (timer_pending(t)) { __restart_timer(t, interval);
if (time_before(when, t->expires))
mod_timer_pinned(t, when);
} else {
t->expires = round_jiffies(when);
add_timer_on(t, smp_processor_id());
}
if (interval < iv) if (interval < iv)
__this_cpu_write(mce_next_interval, interval); __this_cpu_write(mce_next_interval, interval);
} }
...@@ -1631,7 +1635,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) ...@@ -1631,7 +1635,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
switch (c->x86_vendor) { switch (c->x86_vendor) {
case X86_VENDOR_INTEL: case X86_VENDOR_INTEL:
mce_intel_feature_init(c); mce_intel_feature_init(c);
mce_adjust_timer = mce_intel_adjust_timer; mce_adjust_timer = cmci_intel_adjust_timer;
break; break;
case X86_VENDOR_AMD: case X86_VENDOR_AMD:
mce_amd_feature_init(c); mce_amd_feature_init(c);
......
...@@ -79,7 +79,7 @@ static inline bool is_shared_bank(int bank) ...@@ -79,7 +79,7 @@ static inline bool is_shared_bank(int bank)
return (bank == 4); return (bank == 4);
} }
static const char * const bank4_names(struct threshold_block *b) static const char *bank4_names(const struct threshold_block *b)
{ {
switch (b->address) { switch (b->address) {
/* MSR4_MISC0 */ /* MSR4_MISC0 */
...@@ -250,6 +250,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) ...@@ -250,6 +250,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (!b.interrupt_capable) if (!b.interrupt_capable)
goto init; goto init;
b.interrupt_enable = 1;
new = (high & MASK_LVTOFF_HI) >> 20; new = (high & MASK_LVTOFF_HI) >> 20;
offset = setup_APIC_mce(offset, new); offset = setup_APIC_mce(offset, new);
...@@ -322,6 +323,8 @@ static void amd_threshold_interrupt(void) ...@@ -322,6 +323,8 @@ static void amd_threshold_interrupt(void)
log: log:
mce_setup(&m); mce_setup(&m);
rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
if (!(m.status & MCI_STATUS_VAL))
return;
m.misc = ((u64)high << 32) | low; m.misc = ((u64)high << 32) | low;
m.bank = bank; m.bank = bank;
mce_log(&m); mce_log(&m);
...@@ -497,10 +500,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, ...@@ -497,10 +500,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
b->interrupt_capable = lvt_interrupt_supported(bank, high); b->interrupt_capable = lvt_interrupt_supported(bank, high);
b->threshold_limit = THRESHOLD_MAX; b->threshold_limit = THRESHOLD_MAX;
if (b->interrupt_capable) if (b->interrupt_capable) {
threshold_ktype.default_attrs[2] = &interrupt_enable.attr; threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
else b->interrupt_enable = 1;
} else {
threshold_ktype.default_attrs[2] = NULL; threshold_ktype.default_attrs[2] = NULL;
}
INIT_LIST_HEAD(&b->miscj); INIT_LIST_HEAD(&b->miscj);
......
...@@ -38,6 +38,15 @@ ...@@ -38,6 +38,15 @@
*/ */
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
* CMCI storm detection backoff counter
*
* During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
* encountered an error. If not, we decrement it by one. We signal the end of
* the CMCI storm when it reaches 0.
*/
static DEFINE_PER_CPU(int, cmci_backoff_cnt);
/* /*
* cmci_discover_lock protects against parallel discovery attempts * cmci_discover_lock protects against parallel discovery attempts
* which could race against each other. * which could race against each other.
...@@ -46,7 +55,7 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); ...@@ -46,7 +55,7 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1 #define CMCI_THRESHOLD 1
#define CMCI_POLL_INTERVAL (30 * HZ) #define CMCI_POLL_INTERVAL (30 * HZ)
#define CMCI_STORM_INTERVAL (1 * HZ) #define CMCI_STORM_INTERVAL (HZ)
#define CMCI_STORM_THRESHOLD 15 #define CMCI_STORM_THRESHOLD 15
static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
...@@ -82,11 +91,21 @@ static int cmci_supported(int *banks) ...@@ -82,11 +91,21 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P); return !!(cap & MCG_CMCI_P);
} }
void mce_intel_cmci_poll(void) bool mce_intel_cmci_poll(void)
{ {
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
return; return false;
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
/*
* Reset the counter if we've logged an error in the last poll
* during the storm.
*/
if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
else
this_cpu_dec(cmci_backoff_cnt);
return true;
} }
void mce_intel_hcpu_update(unsigned long cpu) void mce_intel_hcpu_update(unsigned long cpu)
...@@ -97,31 +116,32 @@ void mce_intel_hcpu_update(unsigned long cpu) ...@@ -97,31 +116,32 @@ void mce_intel_hcpu_update(unsigned long cpu)
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
} }
unsigned long mce_intel_adjust_timer(unsigned long interval) unsigned long cmci_intel_adjust_timer(unsigned long interval)
{ {
int r; if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
(__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
if (interval < CMCI_POLL_INTERVAL) mce_notify_irq();
return interval; return CMCI_STORM_INTERVAL;
}
switch (__this_cpu_read(cmci_storm_state)) { switch (__this_cpu_read(cmci_storm_state)) {
case CMCI_STORM_ACTIVE: case CMCI_STORM_ACTIVE:
/* /*
* We switch back to interrupt mode once the poll timer has * We switch back to interrupt mode once the poll timer has
* silenced itself. That means no events recorded and the * silenced itself. That means no events recorded and the timer
* timer interval is back to our poll interval. * interval is back to our poll interval.
*/ */
__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
r = atomic_sub_return(1, &cmci_storm_on_cpus); if (!atomic_sub_return(1, &cmci_storm_on_cpus))
if (r == 0)
pr_notice("CMCI storm subsided: switching to interrupt mode\n"); pr_notice("CMCI storm subsided: switching to interrupt mode\n");
/* FALLTHROUGH */ /* FALLTHROUGH */
case CMCI_STORM_SUBSIDED: case CMCI_STORM_SUBSIDED:
/* /*
* We wait for all cpus to go back to SUBSIDED * We wait for all CPUs to go back to SUBSIDED state. When that
* state. When that happens we switch back to * happens we switch back to interrupt mode.
* interrupt mode.
*/ */
if (!atomic_read(&cmci_storm_on_cpus)) { if (!atomic_read(&cmci_storm_on_cpus)) {
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
...@@ -130,10 +150,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval) ...@@ -130,10 +150,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
} }
return CMCI_POLL_INTERVAL; return CMCI_POLL_INTERVAL;
default: default:
/*
* We have shiny weather. Let the poll do whatever it /* We have shiny weather. Let the poll do whatever it thinks. */
* thinks.
*/
return interval; return interval;
} }
} }
...@@ -178,7 +196,8 @@ static bool cmci_storm_detect(void) ...@@ -178,7 +196,8 @@ static bool cmci_storm_detect(void)
cmci_storm_disable_banks(); cmci_storm_disable_banks();
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
r = atomic_add_return(1, &cmci_storm_on_cpus); r = atomic_add_return(1, &cmci_storm_on_cpus);
mce_timer_kick(CMCI_POLL_INTERVAL); mce_timer_kick(CMCI_STORM_INTERVAL);
this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
if (r == 1) if (r == 1)
pr_notice("CMCI storm detected: switching to poll mode\n"); pr_notice("CMCI storm detected: switching to poll mode\n");
...@@ -195,6 +214,7 @@ static void intel_threshold_interrupt(void) ...@@ -195,6 +214,7 @@ static void intel_threshold_interrupt(void)
{ {
if (cmci_storm_detect()) if (cmci_storm_detect())
return; return;
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
mce_notify_irq(); mce_notify_irq();
} }
...@@ -286,6 +306,7 @@ void cmci_recheck(void) ...@@ -286,6 +306,7 @@ void cmci_recheck(void)
if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
return; return;
local_irq_save(flags); local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
local_irq_restore(flags); local_irq_restore(flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment