Commit ca1b6692 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ras_updates_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

 - Extend the recovery from MCE in kernel space also to processes which
   encounter an MCE in kernel space but while copying from user memory
   by sending them a SIGBUS on return to user space and umapping the
   faulty memory, by Tony Luck and Youquan Song.

 - memcpy_mcsafe() rework by splitting the functionality into
   copy_mc_to_user() and copy_mc_to_kernel(). This, as a result, enables
   support for new hardware which can recover from a machine check
   encountered during a fast string copy and makes that the default and
   lets the older hardware which does not support that advance recovery,
   opt in to use the old, fragile, slow variant, by Dan Williams.

 - New AMD hw enablement, by Yazen Ghannam and Akshay Gupta.

 - Do not use MSR-tracing accessors in #MC context and flag any fault
   while accessing MCA architectural MSRs as an architectural violation
   with the hope that such hw/fw misdesigns are caught early during the
   hw eval phase and they don't make it into production.

 - Misc fixes, improvements and cleanups, as always.

* tag 'ras_updates_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Allow for copy_mc_fragile symbol checksum to be generated
  x86/mce: Decode a kernel instruction to determine if it is copying from user
  x86/mce: Recover from poison found while copying from user space
  x86/mce: Avoid tail copy when machine check terminated a copy from user
  x86/mce: Add _ASM_EXTABLE_CPY for copy user access
  x86/mce: Provide method to find out the type of an exception handler
  x86/mce: Pass pointer to saved pt_regs to severity calculation routines
  x86/copy_mc: Introduce copy_mc_enhanced_fast_string()
  x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
  x86/mce: Drop AMD-specific "DEFERRED" case from Intel severity rule list
  x86/mce: Add Skylake quirk for patrol scrub reported errors
  RAS/CEC: Convert to DEFINE_SHOW_ATTRIBUTE()
  x86/mce: Annotate mce_rd/wrmsrl() with noinstr
  x86/mce/dev-mcelog: Do not update kflags on AMD systems
  x86/mce: Stop mce_reign() from re-computing severity for every CPU
  x86/mce: Make mce_rdmsrl() panic on an inaccessible MSR
  x86/mce: Increase maximum number of banks to 64
  x86/mce: Delay clearing IA32_MCG_STATUS to the end of do_machine_check()
  x86/MCE/AMD, EDAC/mce_amd: Remove struct smca_hwid.xec_bitmap
  RAS/CEC: Fix cec_init() prototype
parents a9a4b7d9 b3149ffc
......@@ -135,7 +135,7 @@ config PPC
select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UACCESS_MCSAFE if PPC64
select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
......
......@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
#define __HAVE_ARCH_MEMCPY_MCSAFE
extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
......
......@@ -435,6 +435,32 @@ do { \
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);
#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_generic(void *to, const void *from, unsigned long size);
static inline unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned long size)
{
return copy_mc_generic(to, from, size);
}
#define copy_mc_to_kernel copy_mc_to_kernel
static inline unsigned long __must_check
copy_mc_to_user(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = copy_mc_generic((void *)to, from, n);
prevent_write_to_user(to, n);
}
}
return n;
}
#endif
#ifdef __powerpc64__
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
......@@ -523,20 +549,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
return ret;
}
static __always_inline unsigned long __must_check
copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = memcpy_mcsafe((void *)to, from, n);
prevent_write_to_user(to, n);
}
}
return n;
}
unsigned long __arch_clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
......
......@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
memcpy_64.o memcpy_mcsafe_64.o
memcpy_64.o copy_mc_64.o
ifndef CONFIG_PPC_QUEUED_SPINLOCKS
obj64-$(CONFIG_SMP) += locks.o
......
......@@ -50,7 +50,7 @@ err3; stb r0,0(r3)
blr
_GLOBAL(memcpy_mcsafe)
_GLOBAL(copy_mc_generic)
mr r7,r5
cmpldi r5,16
blt .Lshort_copy
......@@ -239,4 +239,4 @@ err1; stb r0,0(r3)
15: li r3,0
blr
EXPORT_SYMBOL_GPL(memcpy_mcsafe);
EXPORT_SYMBOL_GPL(copy_mc_generic);
......@@ -75,7 +75,7 @@ config X86
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_COPY_MC if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
......
......@@ -62,7 +62,7 @@ config EARLY_PRINTK_USB_XDBC
You should normally say N here, unless you want to debug early
crashes or need a very simple printk logging facility.
config MCSAFE_TEST
config COPY_MC_TEST
def_bool n
config EFI_PGT_DUMP
......
......@@ -5,6 +5,7 @@
#include <asm/string.h>
#include <asm/page.h>
#include <asm/checksum.h>
#include <asm/mce.h>
#include <asm-generic/asm-prototypes.h>
......
......@@ -135,6 +135,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
# define _ASM_EXTABLE_CPY(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
......@@ -160,6 +163,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
# define _ASM_EXTABLE_CPY(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _COPY_MC_TEST_H_
#define _COPY_MC_TEST_H_
#ifndef __ASSEMBLY__
#ifdef CONFIG_COPY_MC_TEST
extern unsigned long copy_mc_test_src;
extern unsigned long copy_mc_test_dst;
static inline void copy_mc_inject_src(void *addr)
{
if (addr)
copy_mc_test_src = (unsigned long) addr;
else
copy_mc_test_src = ~0UL;
}
static inline void copy_mc_inject_dst(void *addr)
{
if (addr)
copy_mc_test_dst = (unsigned long) addr;
else
copy_mc_test_dst = ~0UL;
}
#else /* CONFIG_COPY_MC_TEST */
static inline void copy_mc_inject_src(void *addr)
{
}
static inline void copy_mc_inject_dst(void *addr)
{
}
#endif /* CONFIG_COPY_MC_TEST */
#else /* __ASSEMBLY__ */
#include <asm/export.h>
#ifdef CONFIG_COPY_MC_TEST
.macro COPY_MC_TEST_CTL
.pushsection .data
.align 8
.globl copy_mc_test_src
copy_mc_test_src:
.quad 0
EXPORT_SYMBOL_GPL(copy_mc_test_src)
.globl copy_mc_test_dst
copy_mc_test_dst:
.quad 0
EXPORT_SYMBOL_GPL(copy_mc_test_dst)
.popsection
.endm
.macro COPY_MC_TEST_SRC reg count target
leaq \count(\reg), %r9
cmp copy_mc_test_src, %r9
ja \target
.endm
.macro COPY_MC_TEST_DST reg count target
leaq \count(\reg), %r9
cmp copy_mc_test_dst, %r9
ja \target
.endm
#else
.macro COPY_MC_TEST_CTL
.endm
.macro COPY_MC_TEST_SRC reg count target
.endm
.macro COPY_MC_TEST_DST reg count target
.endm
#endif /* CONFIG_COPY_MC_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _COPY_MC_TEST_H_ */
......@@ -29,10 +29,17 @@ struct pt_regs;
(b)->handler = (tmp).handler - (delta); \
} while (0)
enum handler_type {
EX_HANDLER_NONE,
EX_HANDLER_FAULT,
EX_HANDLER_UACCESS,
EX_HANDLER_OTHER
};
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
#endif
......@@ -136,8 +136,23 @@
#define MCE_HANDLED_NFIT BIT_ULL(3)
#define MCE_HANDLED_EDAC BIT_ULL(4)
#define MCE_HANDLED_MCELOG BIT_ULL(5)
/*
* Indicates an MCE which has happened in kernel space but from
* which the kernel can recover simply by executing fixup_exception()
* so that an error is returned to the caller of the function that
* hit the machine check.
*/
#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
/*
* Indicates an MCE that happened in kernel space while copying data
* from user. In this case fixup_exception() gets the kernel to the
* error exit for the copy function. Machine check handler can then
* treat it like a fault taken in user mode.
*/
#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
......@@ -174,6 +189,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
extern int mce_p5_enabled;
#ifdef CONFIG_ARCH_HAS_COPY_MC
extern void enable_copy_mc_fragile(void);
unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
#else
static inline void enable_copy_mc_fragile(void)
{
}
#endif
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
......@@ -200,12 +224,8 @@ void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct device *, mce_device);
/*
* Maximum banks number.
* This is the limit of the current register layout on
* Intel CPUs.
*/
#define MAX_NR_BANKS 32
/* Maximum number of MCA banks per CPU. */
#define MAX_NR_BANKS 64
#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
......@@ -328,7 +348,6 @@ enum smca_bank_types {
struct smca_hwid {
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
u8 count; /* Number of instances. */
};
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MCSAFE_TEST_H_
#define _MCSAFE_TEST_H_
#ifndef __ASSEMBLY__
#ifdef CONFIG_MCSAFE_TEST
extern unsigned long mcsafe_test_src;
extern unsigned long mcsafe_test_dst;
static inline void mcsafe_inject_src(void *addr)
{
if (addr)
mcsafe_test_src = (unsigned long) addr;
else
mcsafe_test_src = ~0UL;
}
static inline void mcsafe_inject_dst(void *addr)
{
if (addr)
mcsafe_test_dst = (unsigned long) addr;
else
mcsafe_test_dst = ~0UL;
}
#else /* CONFIG_MCSAFE_TEST */
static inline void mcsafe_inject_src(void *addr)
{
}
static inline void mcsafe_inject_dst(void *addr)
{
}
#endif /* CONFIG_MCSAFE_TEST */
#else /* __ASSEMBLY__ */
#include <asm/export.h>
#ifdef CONFIG_MCSAFE_TEST
.macro MCSAFE_TEST_CTL
.pushsection .data
.align 8
.globl mcsafe_test_src
mcsafe_test_src:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_src)
.globl mcsafe_test_dst
mcsafe_test_dst:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_dst)
.popsection
.endm
.macro MCSAFE_TEST_SRC reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_src, %r9
ja \target
.endm
.macro MCSAFE_TEST_DST reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_dst, %r9
ja \target
.endm
#else
.macro MCSAFE_TEST_CTL
.endm
.macro MCSAFE_TEST_SRC reg count target
.endm
.macro MCSAFE_TEST_DST reg count target
.endm
#endif /* CONFIG_MCSAFE_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _MCSAFE_TEST_H_ */
......@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);
#endif
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
/**
* memcpy_mcsafe - copy memory with indication if a machine check happened
*
* @dst: destination address
* @src: source address
* @cnt: number of bytes to copy
*
* Low level memory copy function that catches machine checks
* We only call into the "safe" function on systems that can
* actually do machine check recovery. Everyone else can just
* use memcpy().
*
* Return 0 for success, or number of bytes not copied if there was an
* exception.
*/
static __always_inline __must_check unsigned long
memcpy_mcsafe(void *dst, const void *src, size_t cnt)
{
#ifdef CONFIG_X86_MCE
if (static_branch_unlikely(&mcsafe_key))
return __memcpy_mcsafe(dst, src, cnt);
else
#endif
memcpy(dst, src, cnt);
return 0;
}
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
......
......@@ -35,6 +35,8 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
bool fault_in_kernel_space(unsigned long address);
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
......
......@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned len);
#define copy_mc_to_kernel copy_mc_to_kernel
unsigned long __must_check
copy_mc_to_user(void *to, const void *from, unsigned len);
#endif
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
......
......@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
return ret;
}
static __always_inline __must_check unsigned long
copy_to_user_mcsafe(void *to, const void *from, unsigned len)
{
unsigned long ret;
__uaccess_begin();
/*
* Note, __memcpy_mcsafe() is explicitly used since it can
* handle exceptions / faults. memcpy_mcsafe() may fall back to
* memcpy() which lacks this handling.
*/
ret = __memcpy_mcsafe(to, from, len);
__uaccess_end();
return ret;
}
static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
......@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_UACCESS_64_H */
......@@ -132,49 +132,49 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
}
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */
/* { bank_type, hwid_mcatype } */
/* Reserved type */
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
{ SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3) },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5) },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6) },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) },
/* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
/* Unified Memory Controller MCA type */
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
/* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
/* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
/* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
/* Microprocessor 5 Unit MCA type */
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
/* Northbridge IO Unit MCA type */
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
/* PCI Express Unit MCA type */
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
};
struct smca_bank smca_banks[MAX_NR_BANKS];
......
......@@ -40,7 +40,6 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/set_memory.h>
#include <linux/sync_core.h>
#include <linux/task_work.h>
......@@ -373,42 +372,105 @@ static int msr_to_offset(u32 msr)
return -1;
}
__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip);
show_stack_regs(regs);
panic("MCA architectural violation!\n");
while (true)
cpu_relax();
return true;
}
/* MSR access wrappers used for error injection */
static u64 mce_rdmsrl(u32 msr)
static noinstr u64 mce_rdmsrl(u32 msr)
{
u64 v;
DECLARE_ARGS(val, low, high);
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
int offset;
u64 ret;
instrumentation_begin();
offset = msr_to_offset(msr);
if (offset < 0)
return 0;
return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
}
ret = 0;
else
ret = *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
if (rdmsrl_safe(msr, &v)) {
WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
/*
* Return zero in case the access faulted. This should
* not happen normally but can happen if the CPU does
* something weird, or if the code is buggy.
*/
v = 0;
instrumentation_end();
return ret;
}
return v;
/*
* RDMSR on MCA MSRs should not fault. If they do, this is very much an
* architectural violation and needs to be reported to hw vendor. Panic
* the box to not allow any further progress.
*/
asm volatile("1: rdmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
: EAX_EDX_RET(val, low, high) : "c" (msr));
return EAX_EDX_VAL(val, low, high);
}
__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
regs->ip, (void *)regs->ip);
show_stack_regs(regs);
panic("MCA architectural violation!\n");
while (true)
cpu_relax();
return true;
}
static void mce_wrmsrl(u32 msr, u64 v)
static noinstr void mce_wrmsrl(u32 msr, u64 v)
{
u32 low, high;
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
int offset;
instrumentation_begin();
offset = msr_to_offset(msr);
if (offset >= 0)
*(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
instrumentation_end();
return;
}
wrmsrl(msr, v);
low = (u32)v;
high = (u32)(v >> 32);
/* See comment in mce_rdmsrl() */
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
/*
......@@ -745,7 +807,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
goto clear_it;
mce_read_aux(&m, i);
m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
......@@ -794,7 +856,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
m->bank = i;
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
mce_read_aux(m, i);
*msg = tmp;
return 1;
......@@ -872,7 +934,6 @@ static void mce_reign(void)
struct mce *m = NULL;
int global_worst = 0;
char *msg = NULL;
char *nmsg = NULL;
/*
* This CPU is the Monarch and the other CPUs have run
......@@ -880,12 +941,10 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
int severity = mce_severity(&per_cpu(mces_seen, cpu),
mca_cfg.tolerant,
&nmsg, true);
if (severity > global_worst) {
msg = nmsg;
global_worst = severity;
struct mce *mtmp = &per_cpu(mces_seen, cpu);
if (mtmp->severity > global_worst) {
global_worst = mtmp->severity;
m = &per_cpu(mces_seen, cpu);
}
}
......@@ -895,8 +954,11 @@ static void mce_reign(void)
* This dumps all the mces in the log buffer and stops the
* other CPUs.
*/
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
/* call mce_severity() to get "msg" for panic */
mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
mce_panic("Fatal machine check", m, msg);
}
/*
* For UC somewhere we let the CPU who detects it handle it.
......@@ -1105,7 +1167,7 @@ static noinstr bool mce_check_crashing_cpu(void)
return false;
}
static void __mc_scan_banks(struct mce *m, struct mce *final,
static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
......@@ -1140,7 +1202,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
/* Set taint even when machine check was not enabled. */
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
severity = mce_severity(m, cfg->tolerant, NULL, true);
severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
/*
* When machine check was for corrected/deferred handler don't
......@@ -1188,13 +1250,34 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
!(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core();
return;
}
pr_err("Memory error not recovered");
kill_me_now(cb);
if (p->mce_vaddr != (void __user *)-1l) {
force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
} else {
pr_err("Memory error not recovered");
kill_me_now(cb);
}
}
static void queue_task_work(struct mce *m, int kill_it)
{
current->mce_addr = m->addr;
current->mce_kflags = m->kflags;
current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(m);
if (kill_it)
current->mce_kill_me.func = kill_me_now;
else
current->mce_kill_me.func = kill_me_maybe;
task_work_add(current, &current->mce_kill_me, true);
}
/*
......@@ -1291,7 +1374,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
order = mce_start(&no_way_out);
}
__mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
__mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
......@@ -1313,7 +1396,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
mce_severity(&m, cfg->tolerant, &msg, true);
mce_severity(&m, regs, cfg->tolerant, &msg, true);
mce_panic("Local fatal machine check!", &m, msg);
}
}
......@@ -1330,25 +1413,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (worst > 0)
irq_work_queue(&mce_irq_work);
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
sync_core();
if (worst != MCE_AR_SEVERITY && !kill_it)
return;
goto out;
/* Fault was in user mode and we need to take some action */
if ((m.cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
current->mce_addr = m.addr;
current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(&m);
current->mce_kill_me.func = kill_me_maybe;
if (kill_it)
current->mce_kill_me.func = kill_me_now;
task_work_add(current, &current->mce_kill_me, true);
queue_task_work(&m, kill_it);
} else {
/*
* Handle an MCE which has happened in kernel space but from
......@@ -1363,7 +1437,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
if (m.kflags & MCE_IN_KERNEL_COPYIN)
queue_task_work(&m, kill_it);
}
out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
}
EXPORT_SYMBOL_GPL(do_machine_check);
......@@ -2064,7 +2143,7 @@ void mce_disable_bank(int bank)
and older.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
* mce=recovery force enable memcpy_mcsafe()
* mce=recovery force enable copy_mc_fragile()
*/
static int __init mcheck_enable(char *str)
{
......@@ -2672,13 +2751,10 @@ static void __init mcheck_debugfs_init(void)
static void __init mcheck_debugfs_init(void) { }
#endif
DEFINE_STATIC_KEY_FALSE(mcsafe_key);
EXPORT_SYMBOL_GPL(mcsafe_key);
static int __init mcheck_late_init(void)
{
if (mca_cfg.recovery)
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
mcheck_debugfs_init();
......
......@@ -67,7 +67,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
unlock:
mutex_unlock(&mce_chrdev_read_mutex);
mce->kflags |= MCE_HANDLED_MCELOG;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
mce->kflags |= MCE_HANDLED_MCELOG;
return NOTIFY_OK;
}
......
......@@ -38,7 +38,8 @@ int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
extern int (*mce_severity)(struct mce *a, struct pt_regs *regs,
int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
extern mce_banks_t mce_banks_ce_disabled;
......@@ -185,4 +186,14 @@ extern bool amd_filter_mce(struct mce *m);
static inline bool amd_filter_mce(struct mce *m) { return false; };
#endif
__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr);
__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr);
#endif /* __X86_MCE_INTERNAL_H__ */
......@@ -9,9 +9,14 @@
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/debugfs.h>
#include <asm/mce.h>
#include <linux/uaccess.h>
#include <asm/mce.h>
#include <asm/intel-family.h>
#include <asm/traps.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include "internal.h"
/*
......@@ -40,9 +45,14 @@ static struct severity {
unsigned char context;
unsigned char excp;
unsigned char covered;
unsigned char cpu_model;
unsigned char cpu_minstepping;
unsigned char bank_lo, bank_hi;
char *msg;
} severities[] = {
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
#define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
#define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s
#define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER
#define KERNEL_RECOV .context = IN_KERNEL_RECOV
......@@ -89,15 +99,10 @@ static struct severity {
PANIC, "In kernel and no restart IP",
EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
DEFERRED, "Deferred error",
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
MCESEV(
KEEP, "Corrected error",
NOSER, BITCLR(MCI_STATUS_UC)
),
/*
* known AO MCACODs reported via MCE or CMC:
*
......@@ -113,6 +118,18 @@ static struct severity {
AO, "Action optional: last level cache writeback error",
SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
),
/*
* Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured
* to report uncorrected errors using CMCI with a special signature.
* UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported
* in one of the memory controller banks.
* Set severity to "AO" for same action as normal patrol scrub error.
*/
MCESEV(
AO, "Uncorrected Patrol Scrub Error",
SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0),
MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18)
),
/* ignore OVER for UCNA */
MCESEV(
......@@ -198,6 +215,47 @@ static struct severity {
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
static bool is_copy_from_user(struct pt_regs *regs)
{
u8 insn_buf[MAX_INSN_SIZE];
struct insn insn;
unsigned long addr;
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
return false;
kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
insn_get_opcode(&insn);
if (!insn.opcode.got)
return false;
switch (insn.opcode.value) {
/* MOV mem,reg */
case 0x8A: case 0x8B:
/* MOVZ mem,reg */
case 0xB60F: case 0xB70F:
insn_get_modrm(&insn);
insn_get_sib(&insn);
if (!insn.modrm.got || !insn.sib.got)
return false;
addr = (unsigned long)insn_get_addr_ref(&insn, regs);
break;
/* REP MOVS */
case 0xA4: case 0xA5:
addr = regs->si;
break;
default:
return false;
}
if (fault_in_kernel_space(addr))
return false;
current->mce_vaddr = (void __user *)addr;
return true;
}
/*
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
......@@ -209,15 +267,25 @@ static struct severity {
* distinguish an exception taken in user from from one
* taken in the kernel.
*/
static int error_context(struct mce *m)
static int error_context(struct mce *m, struct pt_regs *regs)
{
enum handler_type t;
if ((m->cs & 3) == 3)
return IN_USER;
if (!mc_recoverable(m->mcgstatus))
return IN_KERNEL;
if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) {
t = ex_get_fault_handler_type(m->ip);
if (t == EX_HANDLER_FAULT) {
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
}
if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
m->kflags |= MCE_IN_KERNEL_RECOV;
m->kflags |= MCE_IN_KERNEL_COPYIN;
return IN_KERNEL_RECOV;
}
return IN_KERNEL;
}
......@@ -253,9 +321,10 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
*/
static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
char **msg, bool is_excp)
{
enum context ctx = error_context(m);
enum context ctx = error_context(m, regs);
/* Processor Context Corrupt, no need to fumble too much, die! */
if (m->status & MCI_STATUS_PCC)
......@@ -305,10 +374,11 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
return MCE_KEEP_SEVERITY;
}
static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
int tolerant, char **msg, bool is_excp)
{
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
enum context ctx = error_context(m);
enum context ctx = error_context(m, regs);
struct severity *s;
for (s = severities;; s++) {
......@@ -324,6 +394,12 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
continue;
if (s->excp && excp != s->excp)
continue;
if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model)
continue;
if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping)
continue;
if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi))
continue;
if (msg)
*msg = s->msg;
s->covered = 1;
......@@ -336,7 +412,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
}
/* Default to mce_severity_intel */
int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) =
mce_severity_intel;
void __init mcheck_vendor_init_severity(void)
......
......@@ -8,6 +8,7 @@
#include <asm/hpet.h>
#include <asm/setup.h>
#include <asm/mce.h>
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
......@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
amd_disable_seq_and_redirect_scrub);
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
#include <linux/jump_label.h>
#include <asm/string_64.h>
/* Ivy Bridge, Haswell, Broadwell */
static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
{
......@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
pci_read_config_dword(pdev, 0x84, &capid0);
if (capid0 & 0x10)
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
}
/* Skylake */
......@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
* enabled, so memory machine check recovery is also enabled.
*/
if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
......@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
#endif
#endif
bool x86_apple_machine;
EXPORT_SYMBOL(x86_apple_machine);
......
......@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
#include <linux/jump_label.h>
#include <linux/uaccess.h>
#include <linux/export.h>
#include <linux/string.h>
#include <linux/types.h>
#include <asm/mce.h>
#ifdef CONFIG_X86_MCE
/*
* See COPY_MC_TEST for self-test of the copy_mc_fragile()
* implementation.
*/
static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
void enable_copy_mc_fragile(void)
{
static_branch_inc(&copy_mc_fragile_key);
}
#define copy_mc_fragile_enabled (static_branch_unlikely(&copy_mc_fragile_key))
/*
* Similar to copy_user_handle_tail, probe for the write fault point, or
* source exception point.
*/
__visible notrace unsigned long
copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++)
if (copy_mc_fragile(to, from, 1))
break;
return len;
}
#else
/*
* No point in doing careful copying, or consulting a static key when
* there is no #MC handler in the CONFIG_X86_MCE=n case.
*/
void enable_copy_mc_fragile(void)
{
}
#define copy_mc_fragile_enabled (0)
#endif
unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len);
/**
* copy_mc_to_kernel - memory copy that handles source exceptions
*
* @dst: destination address
* @src: source address
* @len: number of bytes to copy
*
* Call into the 'fragile' version on systems that benefit from avoiding
* corner case poison consumption scenarios, For example, accessing
* poison across 2 cachelines with a single instruction. Almost all
* other uses case can use copy_mc_enhanced_fast_string() for a fast
* recoverable copy, or fallback to plain memcpy.
*
* Return 0 for success, or number of bytes not copied if there was an
* exception.
*/
unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
{
if (copy_mc_fragile_enabled)
return copy_mc_fragile(dst, src, len);
if (static_cpu_has(X86_FEATURE_ERMS))
return copy_mc_enhanced_fast_string(dst, src, len);
memcpy(dst, src, len);
return 0;
}
EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
{
unsigned long ret;
if (copy_mc_fragile_enabled) {
__uaccess_begin();
ret = copy_mc_fragile(dst, src, len);
__uaccess_end();
return ret;
}
if (static_cpu_has(X86_FEATURE_ERMS)) {
__uaccess_begin();
ret = copy_mc_enhanced_fast_string(dst, src, len);
__uaccess_end();
return ret;
}
return copy_user_generic(dst, src, len);
}
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
#include <linux/linkage.h>
#include <asm/copy_mc_test.h>
#include <asm/export.h>
#include <asm/asm.h>
#ifndef CONFIG_UML
#ifdef CONFIG_X86_MCE
COPY_MC_TEST_CTL
/*
* copy_mc_fragile - copy memory with indication if an exception / fault happened
*
* The 'fragile' version is opted into by platform quirks and takes
* pains to avoid unrecoverable corner cases like 'fast-string'
* instruction sequences, and consuming poison across a cacheline
* boundary. The non-fragile version is equivalent to memcpy()
* regardless of CPU machine-check-recovery capability.
*/
SYM_FUNC_START(copy_mc_fragile)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
/* Check for bad alignment of source */
testl $7, %esi
/* Already aligned */
jz .L_8byte_aligned
/* Copy one byte at a time until source is 8-byte aligned */
movl %esi, %ecx
andl $7, %ecx
subl $8, %ecx
negl %ecx
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_leading_bytes
.L_8byte_aligned:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
.L_read_words:
movq (%rsi), %r8
COPY_MC_TEST_SRC %rsi 8 .E_read_words
COPY_MC_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
addq $8, %rdi
decl %ecx
jnz .L_read_words
/* Any trailing bytes? */
.L_no_whole_words:
andl %edx, %edx
jz .L_done_memcpy_trap
/* Copy trailing bytes */
movl %edx, %ecx
.L_read_trailing_bytes:
movb (%rsi), %al
COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_trailing_bytes
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorl %eax, %eax
.L_done:
ret
SYM_FUNC_END(copy_mc_fragile)
EXPORT_SYMBOL_GPL(copy_mc_fragile)
.section .fixup, "ax"
/*
* Return number of bytes not copied for any failure. Note that
* there is no "tail" handling since the source buffer is 8-byte
* aligned and poison is cacheline aligned.
*/
.E_read_words:
shll $3, %ecx
.E_leading_bytes:
addl %edx, %ecx
.E_trailing_bytes:
mov %ecx, %eax
jmp .L_done
/*
* For write fault handling, given the destination is unaligned,
* we handle faults on multi-byte writes with a byte-by-byte
* copy up to the write-protected page.
*/
.E_write_words:
shll $3, %ecx
addl %edx, %ecx
movl %ecx, %edx
jmp copy_mc_fragile_handle_tail
.previous
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
#endif /* CONFIG_X86_MCE */
/*
* copy_mc_enhanced_fast_string - memory copy with exception handling
*
* Fast string copy + fault / exception handling. If the CPU does
* support machine check exception recovery, but does not support
* recovering from fast-string exceptions then this CPU needs to be
* added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
* machine check recovery support this version should be no slower than
* standard memcpy.
*/
SYM_FUNC_START(copy_mc_enhanced_fast_string)
movq %rdi, %rax
movq %rdx, %rcx
.L_copy:
rep movsb
/* Copy successful. Return zero */
xorl %eax, %eax
ret
SYM_FUNC_END(copy_mc_enhanced_fast_string)
.section .fixup, "ax"
.E_copy:
/*
* On fault %rcx is updated such that the copy instruction could
* optionally be restarted at the fault position, i.e. it
* contains 'bytes remaining'. A non-zero return indicates error
* to copy_mc_generic() users, or indicate short transfers to
* user-copy routines.
*/
movq %rcx, %rax
ret
.previous
_ASM_EXTABLE_FAULT(.L_copy, .E_copy)
#endif /* !CONFIG_UML */
......@@ -15,6 +15,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
#include <asm/trapnr.h>
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
......@@ -36,8 +37,8 @@
jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(100b, 103b)
_ASM_EXTABLE_UA(101b, 103b)
_ASM_EXTABLE_CPY(100b, 103b)
_ASM_EXTABLE_CPY(101b, 103b)
.endm
/*
......@@ -116,26 +117,26 @@ SYM_FUNC_START(copy_user_generic_unrolled)
60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
.previous
_ASM_EXTABLE_UA(1b, 30b)
_ASM_EXTABLE_UA(2b, 30b)
_ASM_EXTABLE_UA(3b, 30b)
_ASM_EXTABLE_UA(4b, 30b)
_ASM_EXTABLE_UA(5b, 30b)
_ASM_EXTABLE_UA(6b, 30b)
_ASM_EXTABLE_UA(7b, 30b)
_ASM_EXTABLE_UA(8b, 30b)
_ASM_EXTABLE_UA(9b, 30b)
_ASM_EXTABLE_UA(10b, 30b)
_ASM_EXTABLE_UA(11b, 30b)
_ASM_EXTABLE_UA(12b, 30b)
_ASM_EXTABLE_UA(13b, 30b)
_ASM_EXTABLE_UA(14b, 30b)
_ASM_EXTABLE_UA(15b, 30b)
_ASM_EXTABLE_UA(16b, 30b)
_ASM_EXTABLE_UA(18b, 40b)
_ASM_EXTABLE_UA(19b, 40b)
_ASM_EXTABLE_UA(21b, 50b)
_ASM_EXTABLE_UA(22b, 50b)
_ASM_EXTABLE_CPY(1b, 30b)
_ASM_EXTABLE_CPY(2b, 30b)
_ASM_EXTABLE_CPY(3b, 30b)
_ASM_EXTABLE_CPY(4b, 30b)
_ASM_EXTABLE_CPY(5b, 30b)
_ASM_EXTABLE_CPY(6b, 30b)
_ASM_EXTABLE_CPY(7b, 30b)
_ASM_EXTABLE_CPY(8b, 30b)
_ASM_EXTABLE_CPY(9b, 30b)
_ASM_EXTABLE_CPY(10b, 30b)
_ASM_EXTABLE_CPY(11b, 30b)
_ASM_EXTABLE_CPY(12b, 30b)
_ASM_EXTABLE_CPY(13b, 30b)
_ASM_EXTABLE_CPY(14b, 30b)
_ASM_EXTABLE_CPY(15b, 30b)
_ASM_EXTABLE_CPY(16b, 30b)
_ASM_EXTABLE_CPY(18b, 40b)
_ASM_EXTABLE_CPY(19b, 40b)
_ASM_EXTABLE_CPY(21b, 50b)
_ASM_EXTABLE_CPY(22b, 50b)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
......@@ -180,8 +181,8 @@ SYM_FUNC_START(copy_user_generic_string)
jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, 11b)
_ASM_EXTABLE_UA(3b, 12b)
_ASM_EXTABLE_CPY(1b, 11b)
_ASM_EXTABLE_CPY(3b, 12b)
SYM_FUNC_END(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
......@@ -213,7 +214,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string)
jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, 12b)
_ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
......@@ -221,6 +222,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
* Don't try to copy the tail if machine check happened
*
* Input:
* rdi destination
......@@ -232,12 +234,25 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx
cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
je 3f
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
ret
_ASM_EXTABLE_UA(1b, 2b)
/*
* Return zero to pretend that this copy succeeded. This
* is counter-intuitive, but needed to prevent the code
* in lib/iov_iter.c from retrying and running back into
* the poison cache line again. The machine check handler
* will ensure that a SIGBUS is sent to the task.
*/
3: xorl %eax,%eax
ASM_CLAC
ret
_ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
......@@ -366,27 +381,27 @@ SYM_FUNC_START(__copy_user_nocache)
jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
_ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
_ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
_ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
_ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
_ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
SYM_FUNC_END(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)
......@@ -4,7 +4,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
......@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
SYM_FUNC_END(memcpy_orig)
.popsection
#ifndef CONFIG_UML
MCSAFE_TEST_CTL
/*
* __memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
SYM_FUNC_START(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
/* Check for bad alignment of source */
testl $7, %esi
/* Already aligned */
jz .L_8byte_aligned
/* Copy one byte at a time until source is 8-byte aligned */
movl %esi, %ecx
andl $7, %ecx
subl $8, %ecx
negl %ecx
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_leading_bytes
.L_8byte_aligned:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
.L_read_words:
movq (%rsi), %r8
MCSAFE_TEST_SRC %rsi 8 .E_read_words
MCSAFE_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
addq $8, %rdi
decl %ecx
jnz .L_read_words
/* Any trailing bytes? */
.L_no_whole_words:
andl %edx, %edx
jz .L_done_memcpy_trap
/* Copy trailing bytes */
movl %edx, %ecx
.L_read_trailing_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_trailing_bytes
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorl %eax, %eax
.L_done:
ret
SYM_FUNC_END(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax"
/*
* Return number of bytes not copied for any failure. Note that
* there is no "tail" handling since the source buffer is 8-byte
* aligned and poison is cacheline aligned.
*/
.E_read_words:
shll $3, %ecx
.E_leading_bytes:
addl %edx, %ecx
.E_trailing_bytes:
mov %ecx, %eax
jmp .L_done
/*
* For write fault handling, given the destination is unaligned,
* we handle faults on multi-byte writes with a byte-by-byte
* copy up to the write-protected page.
*/
.E_write_words:
shll $3, %ecx
addl %edx, %ecx
movl %ecx, %edx
jmp mcsafe_handle_tail
.previous
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
#endif
......@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
}
EXPORT_SYMBOL(clear_user);
/*
* Similar to copy_user_handle_tail, probe for the write fault point,
* but reuse __memcpy_mcsafe in case a new read error is encountered.
* clac() is handled in _copy_to_iter_mcsafe().
*/
__visible notrace unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++) {
/*
* Call the assembly routine back directly since
* memcpy_mcsafe() may silently fallback to memcpy.
*/
unsigned long rem = __memcpy_mcsafe(to, from, 1);
if (rem)
break;
}
return len;
}
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB
......
......@@ -80,6 +80,18 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_uaccess);
__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
return true;
}
EXPORT_SYMBOL(ex_handler_copy);
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
......@@ -125,17 +137,21 @@ __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_clear_fs);
__visible bool ex_has_fault_handler(unsigned long ip)
enum handler_type ex_get_fault_handler_type(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
e = search_exception_tables(ip);
if (!e)
return false;
return EX_HANDLER_NONE;
handler = ex_fixup_handler(e);
return handler == ex_handler_fault;
if (handler == ex_handler_fault)
return EX_HANDLER_FAULT;
else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
return EX_HANDLER_UACCESS;
else
return EX_HANDLER_OTHER;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
......
......@@ -1128,7 +1128,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
return 0;
}
static int fault_in_kernel_space(unsigned long address)
bool fault_in_kernel_space(unsigned long address)
{
/*
* On 64-bit systems, the vsyscall page is at an address above
......
......@@ -999,10 +999,8 @@ static void decode_smca_error(struct mce *m)
pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
/* Only print the decode of valid error codes */
if (xec < smca_mce_descs[bank_type].num_descs &&
(hwid->xec_bitmap & BIT_ULL(xec))) {
if (xec < smca_mce_descs[bank_type].num_descs)
pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
}
if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
decode_dram_ecc(cpu_to_node(m->extcpu), m);
......
......@@ -49,7 +49,7 @@ do { \
#define pmem_assign(dest, src) ((dest) = (src))
#endif
#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM)
#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM)
#define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
#endif
......@@ -992,7 +992,8 @@ static void writecache_resume(struct dm_target *ti)
}
wc->freelist_size = 0;
r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count,
sizeof(uint64_t));
if (r) {
writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
sb_seq_count = cpu_to_le64(0);
......@@ -1008,7 +1009,8 @@ static void writecache_resume(struct dm_target *ti)
e->seq_count = -1;
continue;
}
r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
r = copy_mc_to_kernel(&wme, memory_entry(wc, e),
sizeof(struct wc_memory_entry));
if (r) {
writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d",
(unsigned long)b, r);
......@@ -1206,7 +1208,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
if (rw == READ) {
int r;
r = memcpy_mcsafe(buf, data, size);
r = copy_mc_to_kernel(buf, data, size);
flush_dcache_page(bio_page(bio));
if (unlikely(r)) {
writecache_error(wc, r, "hardware memory error when reading data: %d", r);
......@@ -2349,7 +2351,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
}
r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
if (r) {
ti->error = "Hardware memory error when reading superblock";
goto bad;
......@@ -2360,7 +2362,8 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->error = "Unable to initialize device";
goto bad;
}
r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
r = copy_mc_to_kernel(&s, sb(wc),
sizeof(struct wc_memory_superblock));
if (r) {
ti->error = "Hardware memory error when reading superblock";
goto bad;
......
......@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
if (rw == READ) {
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
return -EIO;
if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
return -EIO;
return 0;
}
......
......@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
while (len) {
mem = kmap_atomic(page);
chunk = min_t(unsigned int, len, PAGE_SIZE - off);
rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
if (rem)
return BLK_STS_IOERR;
......@@ -304,7 +304,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
/*
* Use the 'no check' versions of copy_from_iter_flushcache() and
* copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
* copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
* checking, both file offset and device offset, is handled by
* dax_iomap_actor()
*/
......@@ -317,7 +317,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_to_iter_mcsafe(addr, bytes, i);
return _copy_mc_to_iter(addr, bytes, i);
}
static const struct dax_operations pmem_dax_ops = {
......
......@@ -435,7 +435,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%
static const char * const bins[] = { "00", "01", "10", "11" };
static int array_dump(struct seq_file *m, void *v)
static int array_show(struct seq_file *m, void *v)
{
struct ce_array *ca = &ce_arr;
int i;
......@@ -467,18 +467,7 @@ static int array_dump(struct seq_file *m, void *v)
return 0;
}
static int array_open(struct inode *inode, struct file *filp)
{
return single_open(filp, array_dump, NULL);
}
static const struct file_operations array_ops = {
.owner = THIS_MODULE,
.open = array_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
DEFINE_SHOW_ATTRIBUTE(array);
static int __init create_debugfs_nodes(void)
{
......@@ -513,7 +502,7 @@ static int __init create_debugfs_nodes(void)
goto err;
}
array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_fops);
if (!array) {
pr_warn("Error creating array debugfs node!\n");
goto err;
......@@ -553,20 +542,20 @@ static struct notifier_block cec_nb = {
.priority = MCE_PRIO_CEC,
};
static void __init cec_init(void)
static int __init cec_init(void)
{
if (ce_arr.disabled)
return;
return -ENODEV;
ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL);
if (!ce_arr.array) {
pr_err("Error allocating CE array page!\n");
return;
return -ENOMEM;
}
if (create_debugfs_nodes()) {
free_page((unsigned long)ce_arr.array);
return;
return -ENOMEM;
}
INIT_DELAYED_WORK(&cec_work, cec_work_fn);
......@@ -575,6 +564,7 @@ static void __init cec_init(void)
mce_register_decode_chain(&cec_nb);
pr_info("Correctable Errors collector initialized.\n");
return 0;
}
late_initcall(cec_init);
......
......@@ -1308,6 +1308,8 @@ struct task_struct {
#endif
#ifdef CONFIG_X86_MCE
void __user *mce_vaddr;
__u64 mce_kflags;
u64 mce_addr;
__u64 mce_ripv : 1,
mce_whole_page : 1,
......
......@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t);
#ifndef __HAVE_ARCH_MEMCHR
extern void * memchr(const void *,int,__kernel_size_t);
#endif
#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
static inline __must_check unsigned long memcpy_mcsafe(void *dst,
const void *src, size_t cnt)
{
memcpy(dst, src, cnt);
return 0;
}
#endif
#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
{
memcpy(dst, src, cnt);
}
#endif
void *memchr_inv(const void *s, int c, size_t n);
char *strreplace(char *s, char old, char new);
......
......@@ -179,6 +179,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
}
#endif
#ifndef copy_mc_to_kernel
/*
* Without arch opt-in this generic copy_mc_to_kernel() will not handle
* #MC (or arch equivalent) during source read.
*/
static inline unsigned long __must_check
copy_mc_to_kernel(void *dst, const void *src, size_t cnt)
{
memcpy(dst, src, cnt);
return 0;
}
#endif
static __always_inline void pagefault_disabled_inc(void)
{
current->pagefault_disabled++;
......
......@@ -185,10 +185,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#define _copy_from_iter_flushcache _copy_from_iter_nocache
#endif
#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i);
#ifdef CONFIG_ARCH_HAS_COPY_MC
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
#else
#define _copy_to_iter_mcsafe _copy_to_iter
#define _copy_mc_to_iter _copy_to_iter
#endif
static __always_inline __must_check
......@@ -201,12 +201,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
}
static __always_inline __must_check
size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(!check_copy_size(addr, bytes, true)))
return 0;
else
return _copy_to_iter_mcsafe(addr, bytes, i);
return _copy_mc_to_iter(addr, bytes, i);
}
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
......
......@@ -635,7 +635,12 @@ config UACCESS_MEMCPY
config ARCH_HAS_UACCESS_FLUSHCACHE
bool
config ARCH_HAS_UACCESS_MCSAFE
# arch has a concept of a recoverable synchronous exception due to a
# memory-read error like x86 machine-check or ARM data-abort, and
# implements copy_mc_to_{user,kernel} to abort and report
# 'bytes-transferred' if that exception fires when accessing the source
# buffer.
config ARCH_HAS_COPY_MC
bool
# Temporary. Goes away when all archs are cleaned up
......
......@@ -637,30 +637,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
}
EXPORT_SYMBOL(_copy_to_iter);
#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
static int copyout_mcsafe(void __user *to, const void *from, size_t n)
#ifdef CONFIG_ARCH_HAS_COPY_MC
static int copyout_mc(void __user *to, const void *from, size_t n)
{
if (access_ok(to, n)) {
instrument_copy_to_user(to, from, n);
n = copy_to_user_mcsafe((__force void *) to, from, n);
n = copy_mc_to_user((__force void *) to, from, n);
}
return n;
}
static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
static unsigned long copy_mc_to_page(struct page *page, size_t offset,
const char *from, size_t len)
{
unsigned long ret;
char *to;
to = kmap_atomic(page);
ret = memcpy_mcsafe(to + offset, from, len);
ret = copy_mc_to_kernel(to + offset, from, len);
kunmap_atomic(to);
return ret;
}
static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
struct iov_iter *i)
{
struct pipe_inode_info *pipe = i->pipe;
......@@ -678,7 +678,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
unsigned long rem;
rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
off, addr, chunk);
i->head = i_head;
i->iov_offset = off + chunk - rem;
......@@ -695,18 +695,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
}
/**
* _copy_to_iter_mcsafe - copy to user with source-read error exception handling
* _copy_mc_to_iter - copy to iter with source memory error exception handling
* @addr: source kernel address
* @bytes: total transfer length
* @iter: destination iterator
*
* The pmem driver arranges for filesystem-dax to use this facility via
* dax_copy_to_iter() for protecting read/write to persistent memory.
* Unless / until an architecture can guarantee identical performance
* between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
* performance regression to switch more users to the mcsafe version.
* The pmem driver deploys this for the dax operation
* (dax_copy_to_iter()) for dax reads (bypass page-cache and the
* block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
* successfully copied.
*
* Otherwise, the main differences between this and typical _copy_to_iter().
* The main differences between this and typical _copy_to_iter().
*
* * Typical tail/residue handling after a fault retries the copy
* byte-by-byte until the fault happens again. Re-triggering machine
......@@ -717,23 +716,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
* * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
* Compare to copy_to_iter() where only ITER_IOVEC attempts might return
* a short copy.
*
* See MCSAFE_TEST for self-test.
*/
size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
const char *from = addr;
unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
if (unlikely(iov_iter_is_pipe(i)))
return copy_pipe_to_iter_mcsafe(addr, bytes, i);
return copy_mc_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
iterate_and_advance(i, bytes, v,
copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
v.iov_len),
({
rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
(from += v.bv_len) - v.bv_len, v.bv_len);
rem = copy_mc_to_page(v.bv_page, v.bv_offset,
(from += v.bv_len) - v.bv_len, v.bv_len);
if (rem) {
curr_addr = (unsigned long) from;
bytes = curr_addr - s_addr - rem;
......@@ -741,8 +739,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
}
}),
({
rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
v.iov_len);
rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
- v.iov_len, v.iov_len);
if (rem) {
curr_addr = (unsigned long) from;
bytes = curr_addr - s_addr - rem;
......@@ -753,8 +751,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
return bytes;
}
EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
#endif /* CONFIG_ARCH_HAS_COPY_MC */
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MCSAFE_TEST_H_
#define _MCSAFE_TEST_H_
.macro MCSAFE_TEST_CTL
.endm
.macro MCSAFE_TEST_SRC reg count target
.endm
.macro MCSAFE_TEST_DST reg count target
.endm
#endif /* _MCSAFE_TEST_H_ */
......@@ -4,7 +4,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
......@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
SYM_FUNC_END(memcpy_orig)
.popsection
#ifndef CONFIG_UML
MCSAFE_TEST_CTL
/*
* __memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
SYM_FUNC_START(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
/* Check for bad alignment of source */
testl $7, %esi
/* Already aligned */
jz .L_8byte_aligned
/* Copy one byte at a time until source is 8-byte aligned */
movl %esi, %ecx
andl $7, %ecx
subl $8, %ecx
negl %ecx
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_leading_bytes
.L_8byte_aligned:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
.L_read_words:
movq (%rsi), %r8
MCSAFE_TEST_SRC %rsi 8 .E_read_words
MCSAFE_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
addq $8, %rdi
decl %ecx
jnz .L_read_words
/* Any trailing bytes? */
.L_no_whole_words:
andl %edx, %edx
jz .L_done_memcpy_trap
/* Copy trailing bytes */
movl %edx, %ecx
.L_read_trailing_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_trailing_bytes
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorl %eax, %eax
.L_done:
ret
SYM_FUNC_END(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax"
/*
* Return number of bytes not copied for any failure. Note that
* there is no "tail" handling since the source buffer is 8-byte
* aligned and poison is cacheline aligned.
*/
.E_read_words:
shll $3, %ecx
.E_leading_bytes:
addl %edx, %ecx
.E_trailing_bytes:
mov %ecx, %eax
jmp .L_done
/*
* For write fault handling, given the destination is unaligned,
* we handle faults on multi-byte writes with a byte-by-byte
* copy up to the write-protected page.
*/
.E_write_words:
shll $3, %ecx
addl %edx, %ecx
movl %ecx, %edx
jmp mcsafe_handle_tail
.previous
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
#endif
......@@ -548,8 +548,9 @@ static const char *uaccess_safe_builtin[] = {
"__ubsan_handle_shift_out_of_bounds",
/* misc */
"csum_partial_copy_generic",
"__memcpy_mcsafe",
"mcsafe_handle_tail",
"copy_mc_fragile",
"copy_mc_fragile_handle_tail",
"copy_mc_enhanced_fast_string",
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
NULL
};
......
......@@ -13,7 +13,6 @@ perf-y += synthesize.o
perf-y += kallsyms-parse.o
perf-y += find-bit-bench.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
......
/*
* From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
* of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
* happy.
*/
#include <linux/types.h>
unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++) {
/*
* Call the assembly routine back directly since
* memcpy_mcsafe() may silently fallback to memcpy.
*/
unsigned long rem = __memcpy_mcsafe(to, from, 1);
if (rem)
break;
}
return len;
}
......@@ -23,7 +23,8 @@
#include "nfit_test.h"
#include "../watermark.h"
#include <asm/mcsafe_test.h>
#include <asm/copy_mc_test.h>
#include <asm/mce.h>
/*
* Generate an NFIT table to describe the following topology:
......@@ -3283,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
.id_table = nfit_test_id,
};
static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
enum INJECT {
INJECT_NONE,
......@@ -3291,7 +3292,7 @@ enum INJECT {
INJECT_DST,
};
static void mcsafe_test_init(char *dst, char *src, size_t size)
static void copy_mc_test_init(char *dst, char *src, size_t size)
{
size_t i;
......@@ -3300,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
src[i] = (char) i;
}
static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
size_t size, unsigned long rem)
{
size_t i;
......@@ -3321,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
return true;
}
void mcsafe_test(void)
void copy_mc_test(void)
{
char *inject_desc[] = { "none", "source", "destination" };
enum INJECT inj;
if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
pr_info("%s: run...\n", __func__);
} else {
pr_info("%s: disabled, skip.\n", __func__);
......@@ -3344,31 +3345,31 @@ void mcsafe_test(void)
switch (inj) {
case INJECT_NONE:
mcsafe_inject_src(NULL);
mcsafe_inject_dst(NULL);
dst = &mcsafe_buf[2048];
src = &mcsafe_buf[1024 - i];
copy_mc_inject_src(NULL);
copy_mc_inject_dst(NULL);
dst = &copy_mc_buf[2048];
src = &copy_mc_buf[1024 - i];
expect = 0;
break;
case INJECT_SRC:
mcsafe_inject_src(&mcsafe_buf[1024]);
mcsafe_inject_dst(NULL);
dst = &mcsafe_buf[2048];
src = &mcsafe_buf[1024 - i];
copy_mc_inject_src(&copy_mc_buf[1024]);
copy_mc_inject_dst(NULL);
dst = &copy_mc_buf[2048];
src = &copy_mc_buf[1024 - i];
expect = 512 - i;
break;
case INJECT_DST:
mcsafe_inject_src(NULL);
mcsafe_inject_dst(&mcsafe_buf[2048]);
dst = &mcsafe_buf[2048 - i];
src = &mcsafe_buf[1024];
copy_mc_inject_src(NULL);
copy_mc_inject_dst(&copy_mc_buf[2048]);
dst = &copy_mc_buf[2048 - i];
src = &copy_mc_buf[1024];
expect = 512 - i;
break;
}
mcsafe_test_init(dst, src, 512);
rem = __memcpy_mcsafe(dst, src, 512);
valid = mcsafe_test_validate(dst, src, 512, expect);
copy_mc_test_init(dst, src, 512);
rem = copy_mc_fragile(dst, src, 512);
valid = copy_mc_test_validate(dst, src, 512, expect);
if (rem == expect && valid)
continue;
pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
......@@ -3380,8 +3381,8 @@ void mcsafe_test(void)
}
}
mcsafe_inject_src(NULL);
mcsafe_inject_dst(NULL);
copy_mc_inject_src(NULL);
copy_mc_inject_dst(NULL);
}
static __init int nfit_test_init(void)
......@@ -3392,7 +3393,7 @@ static __init int nfit_test_init(void)
libnvdimm_test();
acpi_nfit_test();
device_dax_test();
mcsafe_test();
copy_mc_test();
dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
......
......@@ -12,4 +12,4 @@ memcpy_p7_t1
copyuser_64_exc_t0
copyuser_64_exc_t1
copyuser_64_exc_t2
memcpy_mcsafe_64
copy_mc_64
......@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
EXTRA_SOURCES := validate.c ../harness.c stubs.S
......@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
-o $@ $^
$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
-D COPY_LOOP=test_memcpy_mcsafe \
-D COPY_LOOP=test_copy_mc_generic \
-o $@ $^
$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
......
../../../../../arch/powerpc/lib/copy_mc_64.S
\ No newline at end of file
../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment