Commit fa45a45c authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'ras_for_3.21' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras

Pull RAS updates from Borislav Petkov:

 "- Enable AMD thresholding IRQ by default if supported. (Aravind Gopalakrishnan)

  - Unify mce_panic() message pattern. (Derek Che)

  - A bit more involved simplification of the CMCI logic after yet another
    report about race condition with the adaptive logic. (Borislav Petkov)

  - ACPI APEI EINJ fleshing out of the user documentation. (Borislav Petkov)

  - Minor cleanup. (Jan Beulich.)"
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents e07e0d4c d79f931f
This diff is collapsed.
......@@ -183,11 +183,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
enum mcp_flags {
MCP_TIMESTAMP = (1 << 0), /* log time stamp */
MCP_UC = (1 << 1), /* log uncorrected errors */
MCP_DONTLOG = (1 << 2), /* only clear, don't log */
MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = BIT(1), /* log uncorrected errors */
MCP_DONTLOG = BIT(2), /* only clear, don't log */
};
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
int mce_notify_irq(void);
......
......@@ -14,6 +14,7 @@ enum severity_level {
};
#define ATTR_LEN 16
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
/* One object for each MCE bank, shared by all CPUs */
struct mce_bank {
......@@ -30,13 +31,13 @@ extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void);
unsigned long cmci_intel_adjust_timer(unsigned long interval);
bool mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu);
void cmci_disable_bank(int bank);
#else
# define mce_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { }
# define cmci_intel_adjust_timer mce_adjust_timer_default
static inline bool mce_intel_cmci_poll(void) { return false; }
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { }
#endif
......
......@@ -88,9 +88,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
/* CMCI storm detection filter */
static DEFINE_PER_CPU(unsigned long, mce_polled_error);
/*
* MCA banks polled by the period polling timer for corrected events.
* With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
......@@ -624,8 +621,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
* is already totally * confused. In this case it's likely it will
* not fully execute the machine check handler either.
*/
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
bool error_logged = false;
struct mce m;
int severity;
int i;
......@@ -648,7 +646,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!(m.status & MCI_STATUS_VAL))
continue;
this_cpu_write(mce_polled_error, 1);
/*
* Uncorrected or signalled events are handled by the exception
* handler when it is enabled, so don't process those here.
......@@ -681,8 +679,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
error_logged = true;
mce_log(&m);
}
/*
* Clear state for this bank.
......@@ -696,6 +696,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
*/
sync_core();
return error_logged;
}
EXPORT_SYMBOL_GPL(machine_check_poll);
......@@ -815,7 +817,7 @@ static void mce_reign(void)
* other CPUs.
*/
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Fatal Machine check", m, msg);
mce_panic("Fatal machine check", m, msg);
/*
* For UC somewhere we let the CPU who detects it handle it.
......@@ -828,7 +830,7 @@ static void mce_reign(void)
* source or one CPU is hung. Panic.
*/
if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Machine check from unknown source", NULL, NULL);
mce_panic("Fatal machine check from unknown source", NULL, NULL);
/*
* Now clear all the mces_seen so that they don't reappear on
......@@ -1260,7 +1262,7 @@ void mce_log_therm_throt_event(__u64 status)
* poller finds an MCE, poll 2x faster. When the poller finds no more
* errors, poll 2x slower (up to check_interval seconds).
*/
static unsigned long check_interval = 5 * 60; /* 5 minutes */
static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
......@@ -1270,49 +1272,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
return interval;
}
static unsigned long (*mce_adjust_timer)(unsigned long interval) =
mce_adjust_timer_default;
static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
static int cmc_error_seen(void)
static void __restart_timer(struct timer_list *t, unsigned long interval)
{
unsigned long *v = this_cpu_ptr(&mce_polled_error);
unsigned long when = jiffies + interval;
unsigned long flags;
return test_and_clear_bit(0, v);
local_irq_save(flags);
if (timer_pending(t)) {
if (time_before(when, t->expires))
mod_timer_pinned(t, when);
} else {
t->expires = round_jiffies(when);
add_timer_on(t, smp_processor_id());
}
local_irq_restore(flags);
}
static void mce_timer_fn(unsigned long data)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
int cpu = smp_processor_id();
unsigned long iv;
int notify;
WARN_ON(smp_processor_id() != data);
WARN_ON(cpu != data);
iv = __this_cpu_read(mce_next_interval);
if (mce_available(this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
this_cpu_ptr(&mce_poll_banks));
mce_intel_cmci_poll();
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
if (mce_intel_cmci_poll()) {
iv = mce_adjust_timer(iv);
goto done;
}
}
/*
* Alert userspace if needed. If we logged an MCE, reduce the
* polling interval, otherwise increase the polling interval.
* Alert userspace if needed. If we logged an MCE, reduce the polling
* interval, otherwise increase the polling interval.
*/
iv = __this_cpu_read(mce_next_interval);
notify = mce_notify_irq();
notify |= cmc_error_seen();
if (notify) {
if (mce_notify_irq())
iv = max(iv / 2, (unsigned long) HZ/100);
} else {
else
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
iv = mce_adjust_timer(iv);
}
done:
__this_cpu_write(mce_next_interval, iv);
/* Might have become 0 after CMCI storm subsided */
if (iv) {
t->expires = jiffies + iv;
add_timer_on(t, smp_processor_id());
}
__restart_timer(t, iv);
}
/*
......@@ -1321,16 +1331,10 @@ static void mce_timer_fn(unsigned long data)
void mce_timer_kick(unsigned long interval)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned long when = jiffies + interval;
unsigned long iv = __this_cpu_read(mce_next_interval);
if (timer_pending(t)) {
if (time_before(when, t->expires))
mod_timer_pinned(t, when);
} else {
t->expires = round_jiffies(when);
add_timer_on(t, smp_processor_id());
}
__restart_timer(t, interval);
if (interval < iv)
__this_cpu_write(mce_next_interval, interval);
}
......@@ -1631,7 +1635,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
mce_intel_feature_init(c);
mce_adjust_timer = mce_intel_adjust_timer;
mce_adjust_timer = cmci_intel_adjust_timer;
break;
case X86_VENDOR_AMD:
mce_amd_feature_init(c);
......
......@@ -79,7 +79,7 @@ static inline bool is_shared_bank(int bank)
return (bank == 4);
}
static const char * const bank4_names(struct threshold_block *b)
static const char *bank4_names(const struct threshold_block *b)
{
switch (b->address) {
/* MSR4_MISC0 */
......@@ -250,6 +250,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (!b.interrupt_capable)
goto init;
b.interrupt_enable = 1;
new = (high & MASK_LVTOFF_HI) >> 20;
offset = setup_APIC_mce(offset, new);
......@@ -322,6 +323,8 @@ static void amd_threshold_interrupt(void)
log:
mce_setup(&m);
rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
if (!(m.status & MCI_STATUS_VAL))
return;
m.misc = ((u64)high << 32) | low;
m.bank = bank;
mce_log(&m);
......@@ -497,10 +500,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
b->interrupt_capable = lvt_interrupt_supported(bank, high);
b->threshold_limit = THRESHOLD_MAX;
if (b->interrupt_capable)
if (b->interrupt_capable) {
threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
else
b->interrupt_enable = 1;
} else {
threshold_ktype.default_attrs[2] = NULL;
}
INIT_LIST_HEAD(&b->miscj);
......
......@@ -38,6 +38,15 @@
*/
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
* CMCI storm detection backoff counter
*
* During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
* encountered an error. If not, we decrement it by one. We signal the end of
* the CMCI storm when it reaches 0.
*/
static DEFINE_PER_CPU(int, cmci_backoff_cnt);
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
......@@ -46,7 +55,7 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
#define CMCI_POLL_INTERVAL (30 * HZ)
#define CMCI_STORM_INTERVAL (1 * HZ)
#define CMCI_STORM_INTERVAL (HZ)
#define CMCI_STORM_THRESHOLD 15
static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
......@@ -82,11 +91,21 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P);
}
void mce_intel_cmci_poll(void)
bool mce_intel_cmci_poll(void)
{
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
return;
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
return false;
/*
* Reset the counter if we've logged an error in the last poll
* during the storm.
*/
if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
else
this_cpu_dec(cmci_backoff_cnt);
return true;
}
void mce_intel_hcpu_update(unsigned long cpu)
......@@ -97,31 +116,32 @@ void mce_intel_hcpu_update(unsigned long cpu)
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
}
unsigned long mce_intel_adjust_timer(unsigned long interval)
unsigned long cmci_intel_adjust_timer(unsigned long interval)
{
int r;
if (interval < CMCI_POLL_INTERVAL)
return interval;
if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
(__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
mce_notify_irq();
return CMCI_STORM_INTERVAL;
}
switch (__this_cpu_read(cmci_storm_state)) {
case CMCI_STORM_ACTIVE:
/*
* We switch back to interrupt mode once the poll timer has
* silenced itself. That means no events recorded and the
* timer interval is back to our poll interval.
* silenced itself. That means no events recorded and the timer
* interval is back to our poll interval.
*/
__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
r = atomic_sub_return(1, &cmci_storm_on_cpus);
if (r == 0)
if (!atomic_sub_return(1, &cmci_storm_on_cpus))
pr_notice("CMCI storm subsided: switching to interrupt mode\n");
/* FALLTHROUGH */
case CMCI_STORM_SUBSIDED:
/*
* We wait for all cpus to go back to SUBSIDED
* state. When that happens we switch back to
* interrupt mode.
* We wait for all CPUs to go back to SUBSIDED state. When that
* happens we switch back to interrupt mode.
*/
if (!atomic_read(&cmci_storm_on_cpus)) {
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
......@@ -130,10 +150,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
}
return CMCI_POLL_INTERVAL;
default:
/*
* We have shiny weather. Let the poll do whatever it
* thinks.
*/
/* We have shiny weather. Let the poll do whatever it thinks. */
return interval;
}
}
......@@ -178,7 +196,8 @@ static bool cmci_storm_detect(void)
cmci_storm_disable_banks();
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
r = atomic_add_return(1, &cmci_storm_on_cpus);
mce_timer_kick(CMCI_POLL_INTERVAL);
mce_timer_kick(CMCI_STORM_INTERVAL);
this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
if (r == 1)
pr_notice("CMCI storm detected: switching to poll mode\n");
......@@ -195,6 +214,7 @@ static void intel_threshold_interrupt(void)
{
if (cmci_storm_detect())
return;
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
mce_notify_irq();
}
......@@ -286,6 +306,7 @@ void cmci_recheck(void)
if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
return;
local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
local_irq_restore(flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment