Commit 0f68c088 authored by Tony Luck's avatar Tony Luck Committed by Ingo Molnar

x86/cpufeature: Create a new synthetic cpu capability for machine check recovery

The Intel Software Developer Manual describes bit 24 in the MCG_CAP
MSR:

   MCG_SER_P (software error recovery support present) flag,
   bit 24 — Indicates (when set) that the processor supports
   software error recovery

But only some models with this capability bit set will actually
generate recoverable machine checks.

Check the model name and set a synthetic capability bit. Provide
a command line option to set this bit anyway in case the kernel
doesn't recognise the model name.
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
Reviewed-by: default avatarBorislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2e5bfb23c89800a036fb8a45fa97a74bb16bc362.1455732970.git.tony.luck@intel.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 3a2f2ac9
...@@ -60,6 +60,8 @@ Machine check ...@@ -60,6 +60,8 @@ Machine check
threshold to 1. Enabling this may make memory predictive failure threshold to 1. Enabling this may make memory predictive failure
analysis less effective if the bios sets thresholds for memory analysis less effective if the bios sets thresholds for memory
errors since we will not see details for all errors. errors since we will not see details for all errors.
mce=recovery
Force-enable recoverable machine check code paths
nomce (for compatibility with i386): same as mce=off nomce (for compatibility with i386): same as mce=off
......
...@@ -106,6 +106,7 @@ ...@@ -106,6 +106,7 @@
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
......
...@@ -113,6 +113,7 @@ struct mca_config { ...@@ -113,6 +113,7 @@ struct mca_config {
bool ignore_ce; bool ignore_ce;
bool disabled; bool disabled;
bool ser; bool ser;
bool recovery;
bool bios_cmci_threshold; bool bios_cmci_threshold;
u8 banks; u8 banks;
s8 bootlog; s8 bootlog;
......
...@@ -1576,6 +1576,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) ...@@ -1576,6 +1576,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model == 45) if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu; quirk_no_way_out = quirk_sandybridge_ifu;
/*
* MCG_CAP.MCG_SER_P is necessary but not sufficient to know
* whether this processor will actually generate recoverable
* machine checks. Check to see if this is an E7 model Xeon.
* We can't do a model number check because E5 and E7 use the
* same model number. E5 doesn't support recovery, E7 does.
*/
if (mca_cfg.recovery || (mca_cfg.ser &&
!strncmp(c->x86_model_id,
"Intel(R) Xeon(R) CPU E7-", 24)))
set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
} }
if (cfg->monarch_timeout < 0) if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0; cfg->monarch_timeout = 0;
...@@ -2028,6 +2039,8 @@ static int __init mcheck_enable(char *str) ...@@ -2028,6 +2039,8 @@ static int __init mcheck_enable(char *str)
cfg->bootlog = (str[0] == 'b'); cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold")) else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true; cfg->bios_cmci_threshold = true;
else if (!strcmp(str, "recovery"))
cfg->recovery = true;
else if (isdigit(str[0])) { else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2) if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout)); get_option(&str, &(cfg->monarch_timeout));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment