Merge tag 'ras_core_for_v5.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov: - More noinstr fixes - Add an erratum workaround for Intel CPUs which, in certain circumstances, end up consuming an unrelated uncorrectable memory error when using fast string copy insns - Remove the MCE tolerance level control as it is not really needed or used anymore * tag 'ras_core_for_v5.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Remove the tolerance level control x86/mce: Work around an erratum on fast string copy instructions x86/mce: Use arch atomic and bit helpers

Merge tag 'ras_core_for_v5.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov: - More noinstr fixes - Add an erratum workaround for Intel CPUs which, in certain circumstances, end up consuming an unrelated uncorrectable memory error when using fast string copy insns - Remove the MCE tolerance level control as it is not really needed or used anymore * tag 'ras_core_for_v5.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Remove the tolerance level control x86/mce: Work around an erratum on fast string copy instructions x86/mce: Use arch atomic and bit helpers
636f64db · Linus Torvalds · ebcb577a · 7f1b8e0d · 636f64db · 636f64db
Commit 636f64db authored Mar 25, 2022 by Linus Torvalds
7 changed files
--- a/Documentation/ABI/removed/sysfs-mce
+++ b/Documentation/ABI/removed/sysfs-mce
+What:		/sys/devices/system/machinecheck/machinecheckX/tolerant
+Contact:	Borislav Petkov <bp@suse.de>
+Date:		Dec, 2021
+Description:
+		Unused and obsolete after the advent of recoverable machine
+		checks (see last sentence below) and those are present since
+		2010 (Nehalem).
+		Original description:
+		The entries appear for each CPU, but they are truly shared
+		between all CPUs.
+		Tolerance level. When a machine check exception occurs for a
+		non corrected machine check the kernel can take different
+		actions.
+		Since machine check exceptions can happen any time it is
+		sometimes risky for the kernel to kill a process because it
+		defies normal kernel locking rules. The tolerance level
+		configures how hard the kernel tries to recover even at some
+		risk of	deadlock. Higher tolerant values trade potentially
+		better uptime with the risk of a crash or even corruption
+		(for tolerant >= 3).
+		==  ===========================================================
+		 0  always panic on uncorrected errors, log corrected errors
+		 1  panic or SIGBUS on uncorrected errors, log corrected errors
+		 2  SIGBUS or log uncorrected errors, log corrected errors
+		 3  never panic or SIGBUS, log all errors (for testing only)
+		==  ===========================================================
+		Default: 1
+		Note this only makes a difference if the CPU allows recovery
+		from a machine check exception. Current x86 CPUs generally
+		do not.
--- a/Documentation/ABI/testing/sysfs-mce
+++ b/Documentation/ABI/testing/sysfs-mce
@@ -53,38 +53,6 @@ Description:
 		(but some corrected errors might be still reported
 		in other ways)
-What:		/sys/devices/system/machinecheck/machinecheckX/tolerant
-Contact:	Andi Kleen <ak@linux.intel.com>
-Date:		Feb, 2007
-Description:
-		The entries appear for each CPU, but they are truly shared
-		between all CPUs.
-		Tolerance level. When a machine check exception occurs for a
-		non corrected machine check the kernel can take different
-		actions.
-		Since machine check exceptions can happen any time it is
-		sometimes risky for the kernel to kill a process because it
-		defies normal kernel locking rules. The tolerance level
-		configures how hard the kernel tries to recover even at some
-		risk of	deadlock. Higher tolerant values trade potentially
-		better uptime with the risk of a crash or even corruption
-		(for tolerant >= 3).
-		==  ===========================================================
-		 0  always panic on uncorrected errors, log corrected errors
-		 1  panic or SIGBUS on uncorrected errors, log corrected errors
-		 2  SIGBUS or log uncorrected errors, log corrected errors
-		 3  never panic or SIGBUS, log all errors (for testing only)
-		==  ===========================================================
-		Default: 1
-		Note this only makes a difference if the CPU allows recovery
-		from a machine check exception. Current x86 CPUs generally
-		do not.
 What:		/sys/devices/system/machinecheck/machinecheckX/trigger
 Contact:	Andi Kleen <ak@linux.intel.com>
 Date:		Feb, 2007

--- a/Documentation/vm/hwpoison.rst
+++ b/Documentation/vm/hwpoison.rst
@@ -60,8 +60,6 @@ There are two (actually three) modes memory failure recovery can be in:
 vm.memory_failure_recovery sysctl set to zero:
 	All memory failures cause a panic. Do not attempt recovery.
-	(on x86 this can be also affected by the tolerant level of the
-	MCE subsystem)
 early kill
 	(can be controlled globally and per process)

--- a/Documentation/x86/x86_64/boot-options.rst
+++ b/Documentation/x86/x86_64/boot-options.rst
@@ -47,14 +47,7 @@ Please see Documentation/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
 		in a reboot. On Intel systems it is enabled by default.
   mce=nobootlog
 		Disable boot machine check logging.
-   mce=tolerancelevel[,monarchtimeout] (number,number)
+   mce=monarchtimeout (number)
-		tolerance levels:
-		0: always panic on uncorrected errors, log corrected errors
-		1: panic or SIGBUS on uncorrected errors, log corrected errors
-		2: SIGBUS or log uncorrected errors, log corrected errors
-		3: never panic or SIGBUS, log all errors (for testing only)
-		Default is 1
-		Can be also set using sysfs which is preferable.
 		monarchtimeout:
 		Sets the time in us to wait for other CPUs on machine checks. 0
 		to disable.

--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -35,7 +35,7 @@ int mce_gen_pool_add(struct mce *mce);
 int mce_gen_pool_init(void);
 struct llist_node *mce_gen_pool_prepare_records(void);
-int mce_severity(struct mce *a, struct pt_regs *regs, int tolerant, char **msg, bool is_excp);
+int mce_severity(struct mce *a, struct pt_regs *regs, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 extern mce_banks_t mce_banks_ce_disabled;
@@ -127,7 +127,6 @@ struct mca_config {
 	bool ignore_ce;
 	bool print_all;
-	int tolerant;
 	int monarch_timeout;
 	int panic_timeout;
 	u32 rip_msr;
@@ -170,7 +169,10 @@ struct mce_vendor_flags {
 	/* SandyBridge IFU quirk */
 	snb_ifu_quirk		: 1,
-	__reserved_0		: 57;
+	/* Skylake, Cascade Lake, Cooper Lake REP;MOVS* quirk */
+	skx_repmov_quirk	: 1,
+	__reserved_0		: 56;
 };
 extern struct mce_vendor_flags mce_flags;
@@ -182,8 +184,6 @@ enum mca_msr {
 	MCA_MISC,
 };
-u32 mca_msr_reg(int bank, enum mca_msr reg);
 /* Decide whether to add MCE record to MCE event pool or filter it out. */
 extern bool filter_mce(struct mce *m);
@@ -209,4 +209,25 @@ static inline void winchip_machine_check(struct pt_regs *regs) {}
 noinstr u64 mce_rdmsrl(u32 msr);
+static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
+{
+	if (mce_flags.smca) {
+		switch (reg) {
+		case MCA_CTL:	 return MSR_AMD64_SMCA_MCx_CTL(bank);
+		case MCA_ADDR:	 return MSR_AMD64_SMCA_MCx_ADDR(bank);
+		case MCA_MISC:	 return MSR_AMD64_SMCA_MCx_MISC(bank);
+		case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
+		}
+	}
+	switch (reg) {
+	case MCA_CTL:	 return MSR_IA32_MCx_CTL(bank);
+	case MCA_ADDR:	 return MSR_IA32_MCx_ADDR(bank);
+	case MCA_MISC:	 return MSR_IA32_MCx_MISC(bank);
+	case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
+	}
+	return 0;
+}
 #endif /* __X86_MCE_INTERNAL_H__ */
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -301,7 +301,7 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
 	}
 }
-static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
+static __always_inline int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
 {
 	u64 mcx_cfg;
@@ -330,8 +330,7 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
 * See AMD Error Scope Hierarchy table in a newer BKDG. For example
 * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
 */
-static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
+static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
-				    char **msg, bool is_excp)
 {
 	enum context ctx = error_context(m, regs);
@@ -383,8 +382,7 @@ static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tol
 	return MCE_KEEP_SEVERITY;
 }
-static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs,
+static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
-				      int tolerant, char **msg, bool is_excp)
 {
 	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
 	enum context ctx = error_context(m, regs);
@@ -412,22 +410,21 @@ static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs,
 		if (msg)
 			*msg = s->msg;
 		s->covered = 1;
-		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
-			if (tolerant < 1)
+		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL)
 			return MCE_PANIC_SEVERITY;
-		}
 		return s->sev;
 	}
 }
-int noinstr mce_severity(struct mce *m, struct pt_regs *regs, int tolerant, char **msg,
+int noinstr mce_severity(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
-			 bool is_excp)
 {
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
 	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
-		return mce_severity_amd(m, regs, tolerant, msg, is_excp);
+		return mce_severity_amd(m, regs, msg, is_excp);
 	else
-		return mce_severity_intel(m, regs, tolerant, msg, is_excp);
+		return mce_severity_intel(m, regs, msg, is_excp);
 }
 #ifdef CONFIG_DEBUG_FS