Commit cd9c57ca authored by Borislav Petkov's avatar Borislav Petkov Committed by Thomas Gleixner

x86/MCE: Dump MCE to dmesg if no consumers

When there are no error record consumers registered with the kernel, the
only thing that appears in dmesg is something like:

  [  300.000326] mce: [Hardware Error]: Machine check events logged

and the error records are gone. Which is seriously counterproductive.

So let's dump them to dmesg instead, in such a case.
Requested-by: default avatarEric Morton <Eric.Morton@amd.com>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20161101120911.13163-4-bp@alien8.deSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 8c203dbb
...@@ -207,8 +207,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log); ...@@ -207,8 +207,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log);
static struct notifier_block mce_srao_nb; static struct notifier_block mce_srao_nb;
static atomic_t num_notifiers;
void mce_register_decode_chain(struct notifier_block *nb) void mce_register_decode_chain(struct notifier_block *nb)
{ {
atomic_inc(&num_notifiers);
/* Ensure SRAO notifier has the highest priority in the decode chain. */ /* Ensure SRAO notifier has the highest priority in the decode chain. */
if (nb != &mce_srao_nb && nb->priority == INT_MAX) if (nb != &mce_srao_nb && nb->priority == INT_MAX)
nb->priority -= 1; nb->priority -= 1;
...@@ -219,6 +223,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain); ...@@ -219,6 +223,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain);
void mce_unregister_decode_chain(struct notifier_block *nb) void mce_unregister_decode_chain(struct notifier_block *nb)
{ {
atomic_dec(&num_notifiers);
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
} }
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
...@@ -270,17 +276,17 @@ struct mca_msr_regs msr_ops = { ...@@ -270,17 +276,17 @@ struct mca_msr_regs msr_ops = {
.misc = misc_reg .misc = misc_reg
}; };
static void print_mce(struct mce *m) static void __print_mce(struct mce *m)
{ {
int ret = 0; pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
m->extcpu,
pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
m->extcpu, m->mcgstatus, m->bank, m->status); m->mcgstatus, m->bank, m->status);
if (m->ip) { if (m->ip) {
pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
m->cs, m->ip); m->cs, m->ip);
if (m->cs == __KERNEL_CS) if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->ip); print_symbol("{%s}", m->ip);
...@@ -308,6 +314,13 @@ static void print_mce(struct mce *m) ...@@ -308,6 +314,13 @@ static void print_mce(struct mce *m)
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
cpu_data(m->extcpu).microcode); cpu_data(m->extcpu).microcode);
}
static void print_mce(struct mce *m)
{
int ret = 0;
__print_mce(m);
/* /*
* Print out human-readable details about the MCE error, * Print out human-readable details about the MCE error,
...@@ -569,6 +582,32 @@ static struct notifier_block mce_srao_nb = { ...@@ -569,6 +582,32 @@ static struct notifier_block mce_srao_nb = {
.priority = INT_MAX, .priority = INT_MAX,
}; };
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *m = (struct mce *)data;
if (!m)
return NOTIFY_DONE;
/*
* Run the default notifier if we have only the SRAO
* notifier and us registered.
*/
if (atomic_read(&num_notifiers) > 2)
return NOTIFY_DONE;
__print_mce(m);
return NOTIFY_DONE;
}
static struct notifier_block mce_default_nb = {
.notifier_call = mce_default_notifier,
/* lowest prio, we want it to run last. */
.priority = 0,
};
/* /*
* Read ADDR and MISC registers. * Read ADDR and MISC registers.
*/ */
...@@ -2138,6 +2177,7 @@ int __init mcheck_init(void) ...@@ -2138,6 +2177,7 @@ int __init mcheck_init(void)
{ {
mcheck_intel_therm_init(); mcheck_intel_therm_init();
mce_register_decode_chain(&mce_srao_nb); mce_register_decode_chain(&mce_srao_nb);
mce_register_decode_chain(&mce_default_nb);
mcheck_vendor_init_severity(); mcheck_vendor_init_severity();
INIT_WORK(&mce_work, mce_process_work); INIT_WORK(&mce_work, mce_process_work);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment