Commit e8779776 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, mce: Use HW_ERR in MCE handler
  x86, mce: Add HW_ERR printk prefix for hardware error logging
  x86, mce: Fix MSR_IA32_MCI_CTL2 CMCI threshold setup
  x86, mce: Rename MSR_IA32_MCx_CTL2 value
parents 3cf8ad33 a2d7b0d4
...@@ -38,6 +38,10 @@ ...@@ -38,6 +38,10 @@
#define MCM_ADDR_MEM 3 /* memory address */ #define MCM_ADDR_MEM 3 /* memory address */
#define MCM_ADDR_GENERIC 7 /* generic */ #define MCM_ADDR_GENERIC 7 /* generic */
/* CTL2 register defines */
#define MCI_CTL2_CMCI_EN (1ULL << 30)
#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
#define MCJ_CTX_MASK 3 #define MCJ_CTX_MASK 3
#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
#define MCJ_CTX_RANDOM 0 /* inject context: random */ #define MCJ_CTX_RANDOM 0 /* inject context: random */
......
...@@ -96,9 +96,6 @@ ...@@ -96,9 +96,6 @@
#define MSR_IA32_MC0_CTL2 0x00000280 #define MSR_IA32_MC0_CTL2 0x00000280
#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
#define CMCI_EN (1ULL << 30)
#define CMCI_THRESHOLD_MASK 0xffffULL
#define MSR_P6_PERFCTR0 0x000000c1 #define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2 #define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL0 0x00000186
......
...@@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); ...@@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
static int default_decode_mce(struct notifier_block *nb, unsigned long val, static int default_decode_mce(struct notifier_block *nb, unsigned long val,
void *data) void *data)
{ {
pr_emerg("No human readable MCE decoding support on this CPU type.\n"); pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");
return NOTIFY_STOP; return NOTIFY_STOP;
} }
...@@ -211,11 +211,11 @@ void mce_log(struct mce *mce) ...@@ -211,11 +211,11 @@ void mce_log(struct mce *mce)
static void print_mce(struct mce *m) static void print_mce(struct mce *m)
{ {
pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
m->extcpu, m->mcgstatus, m->bank, m->status); m->extcpu, m->mcgstatus, m->bank, m->status);
if (m->ip) { if (m->ip) {
pr_emerg("RIP%s %02x:<%016Lx> ", pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
m->cs, m->ip); m->cs, m->ip);
...@@ -224,14 +224,14 @@ static void print_mce(struct mce *m) ...@@ -224,14 +224,14 @@ static void print_mce(struct mce *m)
pr_cont("\n"); pr_cont("\n");
} }
pr_emerg("TSC %llx ", m->tsc); pr_emerg(HW_ERR "TSC %llx ", m->tsc);
if (m->addr) if (m->addr)
pr_cont("ADDR %llx ", m->addr); pr_cont("ADDR %llx ", m->addr);
if (m->misc) if (m->misc)
pr_cont("MISC %llx ", m->misc); pr_cont("MISC %llx ", m->misc);
pr_cont("\n"); pr_cont("\n");
pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
/* /*
...@@ -241,16 +241,6 @@ static void print_mce(struct mce *m) ...@@ -241,16 +241,6 @@ static void print_mce(struct mce *m)
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
} }
static void print_mce_head(void)
{
pr_emerg("\nHARDWARE ERROR\n");
}
static void print_mce_tail(void)
{
pr_emerg("This is not a software problem!\n");
}
#define PANIC_TIMEOUT 5 /* 5 seconds */ #define PANIC_TIMEOUT 5 /* 5 seconds */
static atomic_t mce_paniced; static atomic_t mce_paniced;
...@@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp) ...@@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
if (atomic_inc_return(&mce_fake_paniced) > 1) if (atomic_inc_return(&mce_fake_paniced) > 1)
return; return;
} }
print_mce_head();
/* First print corrected ones that are still unlogged */ /* First print corrected ones that are still unlogged */
for (i = 0; i < MCE_LOG_LEN; i++) { for (i = 0; i < MCE_LOG_LEN; i++) {
struct mce *m = &mcelog.entry[i]; struct mce *m = &mcelog.entry[i];
...@@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp) ...@@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
apei_err = apei_write_mce(final); apei_err = apei_write_mce(final);
} }
if (cpu_missing) if (cpu_missing)
printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
print_mce_tail();
if (exp) if (exp)
printk(KERN_EMERG "Machine check: %s\n", exp); pr_emerg(HW_ERR "Machine check: %s\n", exp);
if (!fake_panic) { if (!fake_panic) {
if (panic_timeout == 0) if (panic_timeout == 0)
panic_timeout = mce_panic_timeout; panic_timeout = mce_panic_timeout;
panic(msg); panic(msg);
} else } else
printk(KERN_EMERG "Fake kernel panic: %s\n", msg); pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
} }
/* Support code for software error injection */ /* Support code for software error injection */
...@@ -1221,7 +1209,7 @@ int mce_notify_irq(void) ...@@ -1221,7 +1209,7 @@ int mce_notify_irq(void)
schedule_work(&mce_trigger_work); schedule_work(&mce_trigger_work);
if (__ratelimit(&ratelimit)) if (__ratelimit(&ratelimit))
printk(KERN_INFO "Machine check events logged\n"); pr_info(HW_ERR "Machine check events logged\n");
return 1; return 1;
} }
......
...@@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot) ...@@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot)
rdmsrl(MSR_IA32_MCx_CTL2(i), val); rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Already owned by someone else? */ /* Already owned by someone else? */
if (val & CMCI_EN) { if (val & MCI_CTL2_CMCI_EN) {
if (test_and_clear_bit(i, owned) && !boot) if (test_and_clear_bit(i, owned) && !boot)
print_update("SHD", &hdr, i); print_update("SHD", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks)); __clear_bit(i, __get_cpu_var(mce_poll_banks));
continue; continue;
} }
val |= CMCI_EN | CMCI_THRESHOLD; val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
wrmsrl(MSR_IA32_MCx_CTL2(i), val); wrmsrl(MSR_IA32_MCx_CTL2(i), val);
rdmsrl(MSR_IA32_MCx_CTL2(i), val); rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Did the enable bit stick? -- the bank supports CMCI */ /* Did the enable bit stick? -- the bank supports CMCI */
if (val & CMCI_EN) { if (val & MCI_CTL2_CMCI_EN) {
if (!test_and_set_bit(i, owned) && !boot) if (!test_and_set_bit(i, owned) && !boot)
print_update("CMCI", &hdr, i); print_update("CMCI", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks)); __clear_bit(i, __get_cpu_var(mce_poll_banks));
...@@ -155,7 +156,7 @@ void cmci_clear(void) ...@@ -155,7 +156,7 @@ void cmci_clear(void)
continue; continue;
/* Disable CMCI */ /* Disable CMCI */
rdmsrl(MSR_IA32_MCx_CTL2(i), val); rdmsrl(MSR_IA32_MCx_CTL2(i), val);
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
wrmsrl(MSR_IA32_MCx_CTL2(i), val); wrmsrl(MSR_IA32_MCx_CTL2(i), val);
__clear_bit(i, __get_cpu_var(mce_banks_owned)); __clear_bit(i, __get_cpu_var(mce_banks_owned));
} }
......
...@@ -252,6 +252,13 @@ extern struct pid *session_of_pgrp(struct pid *pgrp); ...@@ -252,6 +252,13 @@ extern struct pid *session_of_pgrp(struct pid *pgrp);
#define FW_WARN "[Firmware Warn]: " #define FW_WARN "[Firmware Warn]: "
#define FW_INFO "[Firmware Info]: " #define FW_INFO "[Firmware Info]: "
/*
* HW_ERR
* Add this to a message for hardware errors, so that user can report
* it to hardware vendor instead of LKML or software vendor.
*/
#define HW_ERR "[Hardware Error]: "
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int vprintk(const char *fmt, va_list args)
__attribute__ ((format (printf, 1, 0))); __attribute__ ((format (printf, 1, 0)));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment