Commit a1aab6f3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "Most of the changes relate to Peter Zijlstra's cleanup of ptregs
  handling, in particular the i386 part is now much simplified and
  standardized - no more partial ptregs stack frames via the esp/ss
  oddity. This simplifies ftrace, kprobes, the unwinder, ptrace, kdump
  and kgdb.

  There's also a CR4 hardening enhancements by Kees Cook, to make the
  generic platform functions such as native_write_cr4() less useful as
  ROP gadgets that disable SMEP/SMAP. Also protect the WP bit of CR0
  against similar attacks.

  The rest is smaller cleanups/fixes"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/alternatives: Add int3_emulate_call() selftest
  x86/stackframe/32: Allow int3_emulate_push()
  x86/stackframe/32: Provide consistent pt_regs
  x86/stackframe, x86/ftrace: Add pt_regs frame annotations
  x86/stackframe, x86/kprobes: Fix frame pointer annotations
  x86/stackframe: Move ENCODE_FRAME_POINTER to asm/frame.h
  x86/entry/32: Clean up return from interrupt preemption path
  x86/asm: Pin sensitive CR0 bits
  x86/asm: Pin sensitive CR4 bits
  Documentation/x86: Fix path to entry_32.S
  x86/asm: Remove unused TASK_TI_flags from asm-offsets.c
parents dad1c12e 7457c0da
...@@ -35,7 +35,7 @@ page fault handler:: ...@@ -35,7 +35,7 @@ page fault handler::
void do_page_fault(struct pt_regs *regs, unsigned long error_code) void do_page_fault(struct pt_regs *regs, unsigned long error_code)
in arch/x86/mm/fault.c. The parameters on the stack are set up by in arch/x86/mm/fault.c. The parameters on the stack are set up by
the low level assembly glue in arch/x86/kernel/entry_32.S. The parameter the low level assembly glue in arch/x86/entry/entry_32.S. The parameter
regs is a pointer to the saved registers on the stack, error_code regs is a pointer to the saved registers on the stack, error_code
contains a reason code for the exception. contains a reason code for the exception.
......
...@@ -172,21 +172,6 @@ For 32-bit we have the following conventions - kernel is built with ...@@ -172,21 +172,6 @@ For 32-bit we have the following conventions - kernel is built with
.endif .endif
.endm .endm
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
* is just setting the LSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
* NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
* the original rbp.
*/
.macro ENCODE_FRAME_POINTER ptregs_offset=0
#ifdef CONFIG_FRAME_POINTER
leaq 1+\ptregs_offset(%rsp), %rbp
#endif
.endm
#ifdef CONFIG_PAGE_TABLE_ISOLATION #ifdef CONFIG_PAGE_TABLE_ISOLATION
/* /*
......
...@@ -67,7 +67,6 @@ ...@@ -67,7 +67,6 @@
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF # define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else #else
# define preempt_stop(clobbers) # define preempt_stop(clobbers)
# define resume_kernel restore_all_kernel
#endif #endif
.macro TRACE_IRQS_IRET .macro TRACE_IRQS_IRET
...@@ -203,9 +202,102 @@ ...@@ -203,9 +202,102 @@
.Lend_\@: .Lend_\@:
.endm .endm
#define CS_FROM_ENTRY_STACK (1 << 31)
#define CS_FROM_USER_CR3 (1 << 30)
#define CS_FROM_KERNEL (1 << 29)
.macro FIXUP_FRAME
/*
* The high bits of the CS dword (__csh) are used for CS_FROM_*.
* Clear them in case hardware didn't do this for us.
*/
andl $0x0000ffff, 3*4(%esp)
#ifdef CONFIG_VM86
testl $X86_EFLAGS_VM, 4*4(%esp)
jnz .Lfrom_usermode_no_fixup_\@
#endif
testl $SEGMENT_RPL_MASK, 3*4(%esp)
jnz .Lfrom_usermode_no_fixup_\@
orl $CS_FROM_KERNEL, 3*4(%esp)
/*
* When we're here from kernel mode; the (exception) stack looks like:
*
* 5*4(%esp) - <previous context>
* 4*4(%esp) - flags
* 3*4(%esp) - cs
* 2*4(%esp) - ip
* 1*4(%esp) - orig_eax
* 0*4(%esp) - gs / function
*
* Lets build a 5 entry IRET frame after that, such that struct pt_regs
* is complete and in particular regs->sp is correct. This gives us
* the original 5 enties as gap:
*
* 12*4(%esp) - <previous context>
* 11*4(%esp) - gap / flags
* 10*4(%esp) - gap / cs
* 9*4(%esp) - gap / ip
* 8*4(%esp) - gap / orig_eax
* 7*4(%esp) - gap / gs / function
* 6*4(%esp) - ss
* 5*4(%esp) - sp
* 4*4(%esp) - flags
* 3*4(%esp) - cs
* 2*4(%esp) - ip
* 1*4(%esp) - orig_eax
* 0*4(%esp) - gs / function
*/
pushl %ss # ss
pushl %esp # sp (points at ss)
addl $6*4, (%esp) # point sp back at the previous context
pushl 6*4(%esp) # flags
pushl 6*4(%esp) # cs
pushl 6*4(%esp) # ip
pushl 6*4(%esp) # orig_eax
pushl 6*4(%esp) # gs / function
.Lfrom_usermode_no_fixup_\@:
.endm
.macro IRET_FRAME
testl $CS_FROM_KERNEL, 1*4(%esp)
jz .Lfinished_frame_\@
/*
* Reconstruct the 3 entry IRET frame right after the (modified)
* regs->sp without lowering %esp in between, such that an NMI in the
* middle doesn't scribble our stack.
*/
pushl %eax
pushl %ecx
movl 5*4(%esp), %eax # (modified) regs->sp
movl 4*4(%esp), %ecx # flags
movl %ecx, -4(%eax)
movl 3*4(%esp), %ecx # cs
andl $0x0000ffff, %ecx
movl %ecx, -8(%eax)
movl 2*4(%esp), %ecx # ip
movl %ecx, -12(%eax)
movl 1*4(%esp), %ecx # eax
movl %ecx, -16(%eax)
popl %ecx
lea -16(%eax), %esp
popl %eax
.Lfinished_frame_\@:
.endm
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 .macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
cld cld
PUSH_GS PUSH_GS
FIXUP_FRAME
pushl %fs pushl %fs
pushl %es pushl %es
pushl %ds pushl %ds
...@@ -247,22 +339,6 @@ ...@@ -247,22 +339,6 @@
.Lend_\@: .Lend_\@:
.endm .endm
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
* is just clearing the MSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
* NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
* original rbp.
*/
.macro ENCODE_FRAME_POINTER
#ifdef CONFIG_FRAME_POINTER
mov %esp, %ebp
andl $0x7fffffff, %ebp
#endif
.endm
.macro RESTORE_INT_REGS .macro RESTORE_INT_REGS
popl %ebx popl %ebx
popl %ecx popl %ecx
...@@ -375,9 +451,6 @@ ...@@ -375,9 +451,6 @@
* switch to it before we do any copying. * switch to it before we do any copying.
*/ */
#define CS_FROM_ENTRY_STACK (1 << 31)
#define CS_FROM_USER_CR3 (1 << 30)
.macro SWITCH_TO_KERNEL_STACK .macro SWITCH_TO_KERNEL_STACK
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
...@@ -391,13 +464,6 @@ ...@@ -391,13 +464,6 @@
* that register for the time this macro runs * that register for the time this macro runs
*/ */
/*
* The high bits of the CS dword (__csh) are used for
* CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
* hardware didn't do this for us.
*/
andl $(0x0000ffff), PT_CS(%esp)
/* Are we on the entry stack? Bail out if not! */ /* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
...@@ -755,7 +821,7 @@ ret_from_intr: ...@@ -755,7 +821,7 @@ ret_from_intr:
andl $SEGMENT_RPL_MASK, %eax andl $SEGMENT_RPL_MASK, %eax
#endif #endif
cmpl $USER_RPL, %eax cmpl $USER_RPL, %eax
jb resume_kernel # not returning to v8086 or userspace jb restore_all_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace) ENTRY(resume_userspace)
DISABLE_INTERRUPTS(CLBR_ANY) DISABLE_INTERRUPTS(CLBR_ANY)
...@@ -765,18 +831,6 @@ ENTRY(resume_userspace) ...@@ -765,18 +831,6 @@ ENTRY(resume_userspace)
jmp restore_all jmp restore_all
END(ret_from_exception) END(ret_from_exception)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0, PER_CPU_VAR(__preempt_count)
jnz restore_all_kernel
testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all_kernel
call preempt_schedule_irq
jmp restore_all_kernel
END(resume_kernel)
#endif
GLOBAL(__begin_SYSENTER_singlestep_region) GLOBAL(__begin_SYSENTER_singlestep_region)
/* /*
* All code from here through __end_SYSENTER_singlestep_region is subject * All code from here through __end_SYSENTER_singlestep_region is subject
...@@ -1019,6 +1073,7 @@ restore_all: ...@@ -1019,6 +1073,7 @@ restore_all:
/* Restore user state */ /* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code RESTORE_REGS pop=4 # skip orig_eax/error_code
.Lirq_return: .Lirq_return:
IRET_FRAME
/* /*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
* when returning from IPI handler and when returning from * when returning from IPI handler and when returning from
...@@ -1027,6 +1082,15 @@ restore_all: ...@@ -1027,6 +1082,15 @@ restore_all:
INTERRUPT_RETURN INTERRUPT_RETURN
restore_all_kernel: restore_all_kernel:
#ifdef CONFIG_PREEMPT
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0, PER_CPU_VAR(__preempt_count)
jnz .Lno_preempt
testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz .Lno_preempt
call preempt_schedule_irq
.Lno_preempt:
#endif
TRACE_IRQS_IRET TRACE_IRQS_IRET
PARANOID_EXIT_TO_KERNEL_MODE PARANOID_EXIT_TO_KERNEL_MODE
BUG_IF_WRONG_CR3 BUG_IF_WRONG_CR3
...@@ -1384,6 +1448,7 @@ END(page_fault) ...@@ -1384,6 +1448,7 @@ END(page_fault)
common_exception: common_exception:
/* the function address is in %gs's slot on the stack */ /* the function address is in %gs's slot on the stack */
FIXUP_FRAME
pushl %fs pushl %fs
pushl %es pushl %es
pushl %ds pushl %ds
......
...@@ -22,6 +22,35 @@ ...@@ -22,6 +22,35 @@
pop %_ASM_BP pop %_ASM_BP
.endm .endm
#ifdef CONFIG_X86_64
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
* is just setting the LSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
* NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
* the original rbp.
*/
.macro ENCODE_FRAME_POINTER ptregs_offset=0
leaq 1+\ptregs_offset(%rsp), %rbp
.endm
#else /* !CONFIG_X86_64 */
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
* is just clearing the MSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
* NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
* original ebp.
*/
.macro ENCODE_FRAME_POINTER
mov %esp, %ebp
andl $0x7fffffff, %ebp
.endm
#endif /* CONFIG_X86_64 */
#else /* !__ASSEMBLY__ */ #else /* !__ASSEMBLY__ */
#define FRAME_BEGIN \ #define FRAME_BEGIN \
...@@ -30,12 +59,32 @@ ...@@ -30,12 +59,32 @@
#define FRAME_END "pop %" _ASM_BP "\n" #define FRAME_END "pop %" _ASM_BP "\n"
#ifdef CONFIG_X86_64
#define ENCODE_FRAME_POINTER \
"lea 1(%rsp), %rbp\n\t"
#else /* !CONFIG_X86_64 */
#define ENCODE_FRAME_POINTER \
"movl %esp, %ebp\n\t" \
"andl $0x7fffffff, %ebp\n\t"
#endif /* CONFIG_X86_64 */
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#define FRAME_OFFSET __ASM_SEL(4, 8) #define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */ #else /* !CONFIG_FRAME_POINTER */
#ifdef __ASSEMBLY__
.macro ENCODE_FRAME_POINTER ptregs_offset=0
.endm
#else /* !__ASSEMBLY */
#define ENCODE_FRAME_POINTER
#endif
#define FRAME_BEGIN #define FRAME_BEGIN
#define FRAME_END #define FRAME_END
#define FRAME_OFFSET 0 #define FRAME_OFFSET 0
......
...@@ -70,22 +70,6 @@ struct kimage; ...@@ -70,22 +70,6 @@ struct kimage;
#define KEXEC_BACKUP_SRC_START (0UL) #define KEXEC_BACKUP_SRC_START (0UL)
#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ #define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
/*
* CPU does not save ss and sp on stack if execution is already
* running in kernel mode at the time of NMI occurrence. This code
* fixes it.
*/
static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
struct pt_regs *oldregs)
{
#ifdef CONFIG_X86_32
newregs->sp = (unsigned long)&(oldregs->sp);
asm volatile("xorl %%eax, %%eax\n\t"
"movw %%ss, %%ax\n\t"
:"=a"(newregs->ss));
#endif
}
/* /*
* This function is responsible for capturing register states if coming * This function is responsible for capturing register states if coming
* via panic otherwise just fix up the ss and sp if coming via kernel * via panic otherwise just fix up the ss and sp if coming via kernel
...@@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, ...@@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
{ {
if (oldregs) { if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs)); memcpy(newregs, oldregs, sizeof(*newregs));
crash_fixup_ss_esp(newregs, oldregs);
} else { } else {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
......
...@@ -166,14 +166,10 @@ static inline bool user_64bit_mode(struct pt_regs *regs) ...@@ -166,14 +166,10 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#define compat_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp
#endif #endif
#ifdef CONFIG_X86_32
extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
#else
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{ {
return regs->sp; return regs->sp;
} }
#endif
#define GET_IP(regs) ((regs)->ip) #define GET_IP(regs) ((regs)->ip)
#define GET_FP(regs) ((regs)->bp) #define GET_FP(regs) ((regs)->bp)
...@@ -201,14 +197,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, ...@@ -201,14 +197,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
if (unlikely(offset > MAX_REG_OFFSET)) if (unlikely(offset > MAX_REG_OFFSET))
return 0; return 0;
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/*
* Traps from the kernel do not save sp and ss.
* Use the helper function to retrieve sp.
*/
if (offset == offsetof(struct pt_regs, sp) &&
regs->cs == __KERNEL_CS)
return kernel_stack_pointer(regs);
/* The selector fields are 16-bit. */ /* The selector fields are 16-bit. */
if (offset == offsetof(struct pt_regs, cs) || if (offset == offsetof(struct pt_regs, cs) ||
offset == offsetof(struct pt_regs, ss) || offset == offsetof(struct pt_regs, ss) ||
...@@ -234,8 +222,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, ...@@ -234,8 +222,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
static inline int regs_within_kernel_stack(struct pt_regs *regs, static inline int regs_within_kernel_stack(struct pt_regs *regs,
unsigned long addr) unsigned long addr)
{ {
return ((addr & ~(THREAD_SIZE - 1)) == return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
} }
/** /**
...@@ -249,7 +236,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, ...@@ -249,7 +236,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
*/ */
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
{ {
unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); unsigned long *addr = (unsigned long *)regs->sp;
addr += n; addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr)) if (regs_within_kernel_stack(regs, (unsigned long)addr))
......
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <asm/nops.h> #include <asm/nops.h>
#include <asm/processor-flags.h>
#include <linux/jump_label.h>
/* /*
* Volatile isn't enough to prevent the compiler from reordering the * Volatile isn't enough to prevent the compiler from reordering the
...@@ -16,6 +18,10 @@ ...@@ -16,6 +18,10 @@
*/ */
extern unsigned long __force_order; extern unsigned long __force_order;
/* Starts false and gets enabled once CPU feature detection is done. */
DECLARE_STATIC_KEY_FALSE(cr_pinning);
extern unsigned long cr4_pinned_bits;
static inline unsigned long native_read_cr0(void) static inline unsigned long native_read_cr0(void)
{ {
unsigned long val; unsigned long val;
...@@ -25,7 +31,20 @@ static inline unsigned long native_read_cr0(void) ...@@ -25,7 +31,20 @@ static inline unsigned long native_read_cr0(void)
static inline void native_write_cr0(unsigned long val) static inline void native_write_cr0(unsigned long val)
{ {
asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); unsigned long bits_missing = 0;
set_register:
asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
bits_missing = X86_CR0_WP;
val |= bits_missing;
goto set_register;
}
/* Warn after we've set the missing bits. */
WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
}
} }
static inline unsigned long native_read_cr2(void) static inline unsigned long native_read_cr2(void)
...@@ -74,7 +93,21 @@ static inline unsigned long native_read_cr4(void) ...@@ -74,7 +93,21 @@ static inline unsigned long native_read_cr4(void)
static inline void native_write_cr4(unsigned long val) static inline void native_write_cr4(unsigned long val)
{ {
asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); unsigned long bits_missing = 0;
set_register:
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
bits_missing = ~val & cr4_pinned_bits;
val |= bits_missing;
goto set_register;
}
/* Warn after we've set the missing bits. */
WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
bits_missing);
}
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
......
...@@ -78,7 +78,7 @@ static inline unsigned long * ...@@ -78,7 +78,7 @@ static inline unsigned long *
get_stack_pointer(struct task_struct *task, struct pt_regs *regs) get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
{ {
if (regs) if (regs)
return (unsigned long *)kernel_stack_pointer(regs); return (unsigned long *)regs->sp;
if (task == current) if (task == current)
return __builtin_frame_address(0); return __builtin_frame_address(0);
......
...@@ -66,7 +66,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) ...@@ -66,7 +66,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
#define INT3_INSN_SIZE 1 #define INT3_INSN_SIZE 1
#define CALL_INSN_SIZE 5 #define CALL_INSN_SIZE 5
#ifdef CONFIG_X86_64
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{ {
/* /*
...@@ -84,7 +83,6 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) ...@@ -84,7 +83,6 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
int3_emulate_jmp(regs, func); int3_emulate_jmp(regs, func);
} }
#endif /* CONFIG_X86_64 */
#endif /* !CONFIG_UML_X86 */ #endif /* !CONFIG_UML_X86 */
#endif /* _ASM_X86_TEXT_PATCHING_H */ #endif /* _ASM_X86_TEXT_PATCHING_H */
...@@ -616,11 +616,83 @@ extern struct paravirt_patch_site __start_parainstructions[], ...@@ -616,11 +616,83 @@ extern struct paravirt_patch_site __start_parainstructions[],
__stop_parainstructions[]; __stop_parainstructions[];
#endif /* CONFIG_PARAVIRT */ #endif /* CONFIG_PARAVIRT */
/*
* Self-test for the INT3 based CALL emulation code.
*
* This exercises int3_emulate_call() to make sure INT3 pt_regs are set up
* properly and that there is a stack gap between the INT3 frame and the
* previous context. Without this gap doing a virtual PUSH on the interrupted
* stack would corrupt the INT3 IRET frame.
*
* See entry_{32,64}.S for more details.
*/
static void __init int3_magic(unsigned int *ptr)
{
*ptr = 1;
}
extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */
static int __init
int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
{
struct die_args *args = data;
struct pt_regs *regs = args->regs;
if (!regs || user_mode(regs))
return NOTIFY_DONE;
if (val != DIE_INT3)
return NOTIFY_DONE;
if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip)
return NOTIFY_DONE;
int3_emulate_call(regs, (unsigned long)&int3_magic);
return NOTIFY_STOP;
}
static void __init int3_selftest(void)
{
static __initdata struct notifier_block int3_exception_nb = {
.notifier_call = int3_exception_notify,
.priority = INT_MAX-1, /* last */
};
unsigned int val = 0;
BUG_ON(register_die_notifier(&int3_exception_nb));
/*
* Basically: int3_magic(&val); but really complicated :-)
*
* Stick the address of the INT3 instruction into int3_selftest_ip,
* then trigger the INT3, padded with NOPs to match a CALL instruction
* length.
*/
asm volatile ("1: int3; nop; nop; nop; nop\n\t"
".pushsection .init.data,\"aw\"\n\t"
".align " __ASM_SEL(4, 8) "\n\t"
".type int3_selftest_ip, @object\n\t"
".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t"
"int3_selftest_ip:\n\t"
__ASM_SEL(.long, .quad) " 1b\n\t"
".popsection\n\t"
: : __ASM_SEL_RAW(a, D) (&val) : "memory");
BUG_ON(val != 1);
unregister_die_notifier(&int3_exception_nb);
}
void __init alternative_instructions(void) void __init alternative_instructions(void)
{ {
/* The patching is not fully atomic, so try to avoid local interruptions int3_selftest();
that might execute the to be patched code.
Other CPUs are not running. */ /*
* The patching is not fully atomic, so try to avoid local
* interruptions that might execute the to be patched code.
* Other CPUs are not running.
*/
stop_nmi(); stop_nmi();
/* /*
...@@ -645,10 +717,11 @@ void __init alternative_instructions(void) ...@@ -645,10 +717,11 @@ void __init alternative_instructions(void)
_text, _etext); _text, _etext);
} }
if (!uniproc_patched || num_possible_cpus() == 1) if (!uniproc_patched || num_possible_cpus() == 1) {
free_init_pages("SMP alternatives", free_init_pages("SMP alternatives",
(unsigned long)__smp_locks, (unsigned long)__smp_locks,
(unsigned long)__smp_locks_end); (unsigned long)__smp_locks_end);
}
#endif #endif
apply_paravirt(__parainstructions, __parainstructions_end); apply_paravirt(__parainstructions, __parainstructions_end);
......
...@@ -38,7 +38,6 @@ static void __used common(void) ...@@ -38,7 +38,6 @@ static void __used common(void)
#endif #endif
BLANK(); BLANK();
OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK(); BLANK();
......
...@@ -366,6 +366,25 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c) ...@@ -366,6 +366,25 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_clear_bits(X86_CR4_UMIP); cr4_clear_bits(X86_CR4_UMIP);
} }
DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
EXPORT_SYMBOL(cr_pinning);
unsigned long cr4_pinned_bits __ro_after_init;
EXPORT_SYMBOL(cr4_pinned_bits);
/*
* Once CPU feature detection is finished (and boot params have been
* parsed), record any of the sensitive CR bits that are set, and
* enable CR pinning.
*/
static void __init setup_cr_pinning(void)
{
unsigned long mask;
mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
static_key_enable(&cr_pinning.key);
}
/* /*
* Protection Keys are not available in 32-bit mode. * Protection Keys are not available in 32-bit mode.
*/ */
...@@ -1468,6 +1487,7 @@ void __init identify_boot_cpu(void) ...@@ -1468,6 +1487,7 @@ void __init identify_boot_cpu(void)
enable_sep_cpu(); enable_sep_cpu();
#endif #endif
cpu_detect_tlb(&boot_cpu_data); cpu_detect_tlb(&boot_cpu_data);
setup_cr_pinning();
} }
void identify_secondary_cpu(struct cpuinfo_x86 *c) void identify_secondary_cpu(struct cpuinfo_x86 *c)
......
...@@ -73,14 +73,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void) ...@@ -73,14 +73,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs) static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
{ {
#ifdef CONFIG_X86_32
struct pt_regs fixed_regs;
if (!user_mode(regs)) {
crash_fixup_ss_esp(&fixed_regs, regs);
regs = &fixed_regs;
}
#endif
crash_save_cpu(regs, cpu); crash_save_cpu(regs, cpu);
/* /*
......
...@@ -310,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs) ...@@ -310,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
ip = regs->ip - INT3_INSN_SIZE; ip = regs->ip - INT3_INSN_SIZE;
#ifdef CONFIG_X86_64
if (ftrace_location(ip)) { if (ftrace_location(ip)) {
int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
return 1; return 1;
...@@ -322,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs) ...@@ -322,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
int3_emulate_call(regs, ftrace_update_func_call); int3_emulate_call(regs, ftrace_update_func_call);
return 1; return 1;
} }
#else
if (ftrace_location(ip) || is_ftrace_caller(ip)) {
int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
return 1;
}
#endif
return 0; return 0;
} }
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include <asm/export.h> #include <asm/export.h>
#include <asm/ftrace.h> #include <asm/ftrace.h>
#include <asm/nospec-branch.h> #include <asm/nospec-branch.h>
#include <asm/frame.h>
#include <asm/asm-offsets.h>
# define function_hook __fentry__ # define function_hook __fentry__
EXPORT_SYMBOL(__fentry__) EXPORT_SYMBOL(__fentry__)
...@@ -89,26 +91,38 @@ END(ftrace_caller) ...@@ -89,26 +91,38 @@ END(ftrace_caller)
ENTRY(ftrace_regs_caller) ENTRY(ftrace_regs_caller)
/* /*
* i386 does not save SS and ESP when coming from kernel. * We're here from an mcount/fentry CALL, and the stack frame looks like:
* Instead, to get sp, &regs->sp is used (see ptrace.h). *
* Unfortunately, that means eflags must be at the same location * <previous context>
* as the current return ip is. We move the return ip into the * RET-IP
* regs->ip location, and move flags into the return ip location. *
* The purpose of this function is to call out in an emulated INT3
* environment with a stack frame like:
*
* <previous context>
* gap / RET-IP
* gap
* gap
* gap
* pt_regs
*
* We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
*/ */
pushl $__KERNEL_CS subl $3*4, %esp # RET-IP + 3 gaps
pushl 4(%esp) /* Save the return ip */ pushl %ss # ss
pushl $0 /* Load 0 into orig_ax */ pushl %esp # points at ss
addl $5*4, (%esp) # make it point at <previous context>
pushfl # flags
pushl $__KERNEL_CS # cs
pushl 7*4(%esp) # ip <- RET-IP
pushl $0 # orig_eax
pushl %gs pushl %gs
pushl %fs pushl %fs
pushl %es pushl %es
pushl %ds pushl %ds
pushl %eax
/* Get flags and place them into the return ip slot */
pushf
popl %eax
movl %eax, 8*4(%esp)
pushl %eax
pushl %ebp pushl %ebp
pushl %edi pushl %edi
pushl %esi pushl %esi
...@@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller) ...@@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller)
pushl %ecx pushl %ecx
pushl %ebx pushl %ebx
movl 12*4(%esp), %eax /* Load ip (1st parameter) */ ENCODE_FRAME_POINTER
subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */ movl PT_EIP(%esp), %eax # 1st argument: IP
movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ subl $MCOUNT_INSN_SIZE, %eax
pushl %esp /* Save pt_regs as 4th parameter */ movl 21*4(%esp), %edx # 2nd argument: parent ip
movl function_trace_op, %ecx # 3rd argument: ftrace_pos
pushl %esp # 4th argument: pt_regs
GLOBAL(ftrace_regs_call) GLOBAL(ftrace_regs_call)
call ftrace_stub call ftrace_stub
addl $4, %esp /* Skip pt_regs */ addl $4, %esp # skip 4th argument
/* restore flags */ /* place IP below the new SP */
push 14*4(%esp) movl PT_OLDESP(%esp), %eax
popf movl PT_EIP(%esp), %ecx
movl %ecx, -4(%eax)
/* Move return ip back to its original location */ /* place EAX below that */
movl 12*4(%esp), %eax movl PT_EAX(%esp), %ecx
movl %eax, 14*4(%esp) movl %ecx, -8(%eax)
popl %ebx popl %ebx
popl %ecx popl %ecx
...@@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call) ...@@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call)
popl %esi popl %esi
popl %edi popl %edi
popl %ebp popl %ebp
popl %eax
popl %ds
popl %es
popl %fs
popl %gs
/* use lea to not affect flags */ lea -8(%eax), %esp
lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */ popl %eax
jmp .Lftrace_ret jmp .Lftrace_ret
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <asm/export.h> #include <asm/export.h>
#include <asm/nospec-branch.h> #include <asm/nospec-branch.h>
#include <asm/unwind_hints.h> #include <asm/unwind_hints.h>
#include <asm/frame.h>
.code64 .code64
.section .entry.text, "ax" .section .entry.text, "ax"
...@@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr) ...@@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr)
leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx
movq %rcx, RSP(%rsp) movq %rcx, RSP(%rsp)
ENCODE_FRAME_POINTER
/* regs go into 4th parameter */ /* regs go into 4th parameter */
leaq (%rsp), %rcx leaq (%rsp), %rcx
......
...@@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) ...@@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
switch (regno) { switch (regno) {
case GDB_SS:
if (!user_mode(regs))
*(unsigned long *)mem = __KERNEL_DS;
break;
case GDB_SP:
if (!user_mode(regs))
*(unsigned long *)mem = kernel_stack_pointer(regs);
break;
case GDB_GS: case GDB_GS:
case GDB_FS: case GDB_FS:
*(unsigned long *)mem = 0xFFFF; *(unsigned long *)mem = 0xFFFF;
......
...@@ -5,15 +5,10 @@ ...@@ -5,15 +5,10 @@
/* Kprobes and Optprobes common header */ /* Kprobes and Optprobes common header */
#include <asm/asm.h> #include <asm/asm.h>
#include <asm/frame.h>
#ifdef CONFIG_FRAME_POINTER
# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
" mov %" _ASM_SP ", %" _ASM_BP "\n"
#else
# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
#endif
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define SAVE_REGS_STRING \ #define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax. */ \ /* Skip cs, ip, orig_ax. */ \
" subq $24, %rsp\n" \ " subq $24, %rsp\n" \
...@@ -27,11 +22,13 @@ ...@@ -27,11 +22,13 @@
" pushq %r10\n" \ " pushq %r10\n" \
" pushq %r11\n" \ " pushq %r11\n" \
" pushq %rbx\n" \ " pushq %rbx\n" \
SAVE_RBP_STRING \ " pushq %rbp\n" \
" pushq %r12\n" \ " pushq %r12\n" \
" pushq %r13\n" \ " pushq %r13\n" \
" pushq %r14\n" \ " pushq %r14\n" \
" pushq %r15\n" " pushq %r15\n" \
ENCODE_FRAME_POINTER
#define RESTORE_REGS_STRING \ #define RESTORE_REGS_STRING \
" popq %r15\n" \ " popq %r15\n" \
" popq %r14\n" \ " popq %r14\n" \
...@@ -51,19 +48,22 @@ ...@@ -51,19 +48,22 @@
/* Skip orig_ax, ip, cs */ \ /* Skip orig_ax, ip, cs */ \
" addq $24, %rsp\n" " addq $24, %rsp\n"
#else #else
#define SAVE_REGS_STRING \ #define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax and gs. */ \ /* Skip cs, ip, orig_ax and gs. */ \
" subl $16, %esp\n" \ " subl $4*4, %esp\n" \
" pushl %fs\n" \ " pushl %fs\n" \
" pushl %es\n" \ " pushl %es\n" \
" pushl %ds\n" \ " pushl %ds\n" \
" pushl %eax\n" \ " pushl %eax\n" \
SAVE_RBP_STRING \ " pushl %ebp\n" \
" pushl %edi\n" \ " pushl %edi\n" \
" pushl %esi\n" \ " pushl %esi\n" \
" pushl %edx\n" \ " pushl %edx\n" \
" pushl %ecx\n" \ " pushl %ecx\n" \
" pushl %ebx\n" " pushl %ebx\n" \
ENCODE_FRAME_POINTER
#define RESTORE_REGS_STRING \ #define RESTORE_REGS_STRING \
" popl %ebx\n" \ " popl %ebx\n" \
" popl %ecx\n" \ " popl %ecx\n" \
...@@ -72,8 +72,8 @@ ...@@ -72,8 +72,8 @@
" popl %edi\n" \ " popl %edi\n" \
" popl %ebp\n" \ " popl %ebp\n" \
" popl %eax\n" \ " popl %eax\n" \
/* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ /* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\
" addl $24, %esp\n" " addl $7*4, %esp\n"
#endif #endif
/* Ensure if the instruction can be boostable */ /* Ensure if the instruction can be boostable */
......
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) #define stack_addr(regs) ((unsigned long *)regs->sp)
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
...@@ -718,29 +718,27 @@ asm( ...@@ -718,29 +718,27 @@ asm(
".global kretprobe_trampoline\n" ".global kretprobe_trampoline\n"
".type kretprobe_trampoline, @function\n" ".type kretprobe_trampoline, @function\n"
"kretprobe_trampoline:\n" "kretprobe_trampoline:\n"
#ifdef CONFIG_X86_64
/* We don't bother saving the ss register */ /* We don't bother saving the ss register */
#ifdef CONFIG_X86_64
" pushq %rsp\n" " pushq %rsp\n"
" pushfq\n" " pushfq\n"
SAVE_REGS_STRING SAVE_REGS_STRING
" movq %rsp, %rdi\n" " movq %rsp, %rdi\n"
" call trampoline_handler\n" " call trampoline_handler\n"
/* Replace saved sp with true return address. */ /* Replace saved sp with true return address. */
" movq %rax, 152(%rsp)\n" " movq %rax, 19*8(%rsp)\n"
RESTORE_REGS_STRING RESTORE_REGS_STRING
" popfq\n" " popfq\n"
#else #else
" pushf\n" " pushl %esp\n"
" pushfl\n"
SAVE_REGS_STRING SAVE_REGS_STRING
" movl %esp, %eax\n" " movl %esp, %eax\n"
" call trampoline_handler\n" " call trampoline_handler\n"
/* Move flags to cs */ /* Replace saved sp with true return address. */
" movl 56(%esp), %edx\n" " movl %eax, 15*4(%esp)\n"
" movl %edx, 52(%esp)\n"
/* Replace saved flags with true return address. */
" movl %eax, 56(%esp)\n"
RESTORE_REGS_STRING RESTORE_REGS_STRING
" popf\n" " popfl\n"
#endif #endif
" ret\n" " ret\n"
".size kretprobe_trampoline, .-kretprobe_trampoline\n" ".size kretprobe_trampoline, .-kretprobe_trampoline\n"
...@@ -781,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) ...@@ -781,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
INIT_HLIST_HEAD(&empty_rp); INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags); kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */ /* fixup registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS; regs->cs = __KERNEL_CS;
/* On x86-64, we use pt_regs->sp for return address holder. */ #ifdef CONFIG_X86_32
frame_pointer = &regs->sp; regs->cs |= get_kernel_rpl();
#else
regs->cs = __KERNEL_CS | get_kernel_rpl();
regs->gs = 0; regs->gs = 0;
/* On x86-32, we use pt_regs->flags for return address holder. */
frame_pointer = &regs->flags;
#endif #endif
/* We use pt_regs->sp for return address holder. */
frame_pointer = &regs->sp;
regs->ip = trampoline_address; regs->ip = trampoline_address;
regs->orig_ax = ~0UL; regs->orig_ax = ~0UL;
......
...@@ -102,14 +102,15 @@ asm ( ...@@ -102,14 +102,15 @@ asm (
"optprobe_template_call:\n" "optprobe_template_call:\n"
ASM_NOP5 ASM_NOP5
/* Move flags to rsp */ /* Move flags to rsp */
" movq 144(%rsp), %rdx\n" " movq 18*8(%rsp), %rdx\n"
" movq %rdx, 152(%rsp)\n" " movq %rdx, 19*8(%rsp)\n"
RESTORE_REGS_STRING RESTORE_REGS_STRING
/* Skip flags entry */ /* Skip flags entry */
" addq $8, %rsp\n" " addq $8, %rsp\n"
" popfq\n" " popfq\n"
#else /* CONFIG_X86_32 */ #else /* CONFIG_X86_32 */
" pushf\n" " pushl %esp\n"
" pushfl\n"
SAVE_REGS_STRING SAVE_REGS_STRING
" movl %esp, %edx\n" " movl %esp, %edx\n"
".global optprobe_template_val\n" ".global optprobe_template_val\n"
...@@ -118,9 +119,13 @@ asm ( ...@@ -118,9 +119,13 @@ asm (
".global optprobe_template_call\n" ".global optprobe_template_call\n"
"optprobe_template_call:\n" "optprobe_template_call:\n"
ASM_NOP5 ASM_NOP5
/* Move flags into esp */
" movl 14*4(%esp), %edx\n"
" movl %edx, 15*4(%esp)\n"
RESTORE_REGS_STRING RESTORE_REGS_STRING
" addl $4, %esp\n" /* skip cs */ /* Skip flags entry */
" popf\n" " addl $4, %esp\n"
" popfl\n"
#endif #endif
".global optprobe_template_end\n" ".global optprobe_template_end\n"
"optprobe_template_end:\n" "optprobe_template_end:\n"
...@@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) ...@@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
} else { } else {
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* Save skipped registers */ /* Save skipped registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS; regs->cs = __KERNEL_CS;
#else #ifdef CONFIG_X86_32
regs->cs = __KERNEL_CS | get_kernel_rpl(); regs->cs |= get_kernel_rpl();
regs->gs = 0; regs->gs = 0;
#endif #endif
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
......
...@@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) ...@@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{ {
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7; unsigned long d0, d1, d2, d3, d6, d7;
unsigned long sp; unsigned short gs;
unsigned short ss, gs;
if (user_mode(regs)) { if (user_mode(regs))
sp = regs->sp;
ss = regs->ss;
gs = get_user_gs(regs); gs = get_user_gs(regs);
} else { else
sp = kernel_stack_pointer(regs);
savesegment(ss, ss);
savesegment(gs, gs); savesegment(gs, gs);
}
show_ip(regs, KERN_DEFAULT); show_ip(regs, KERN_DEFAULT);
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx); regs->ax, regs->bx, regs->cx, regs->dx);
printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
regs->si, regs->di, regs->bp, sp); regs->si, regs->di, regs->bp, regs->sp);
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n", printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags); (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags);
if (mode != SHOW_REGS_ALL) if (mode != SHOW_REGS_ALL)
return; return;
......
...@@ -155,35 +155,6 @@ static inline bool invalid_selector(u16 value) ...@@ -155,35 +155,6 @@ static inline bool invalid_selector(u16 value)
#define FLAG_MASK FLAG_MASK_32 #define FLAG_MASK FLAG_MASK_32
/*
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
* when it traps. The previous stack will be directly underneath the saved
* registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
*
* Now, if the stack is empty, '&regs->sp' is out of range. In this
* case we try to take the previous stack. To always return a non-null
* stack pointer we fall back to regs as stack if no previous stack
* exists.
*
* This is valid only for kernel mode traps.
*/
unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
unsigned long sp = (unsigned long)&regs->sp;
u32 *prev_esp;
if (context == (sp & ~(THREAD_SIZE - 1)))
return sp;
prev_esp = (u32 *)(context);
if (*prev_esp)
return (unsigned long)*prev_esp;
return (unsigned long)regs;
}
EXPORT_SYMBOL_GPL(kernel_stack_pointer);
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{ {
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
......
...@@ -205,13 +205,19 @@ static int enable_start_cpu0; ...@@ -205,13 +205,19 @@ static int enable_start_cpu0;
*/ */
static void notrace start_secondary(void *unused) static void notrace start_secondary(void *unused)
{ {
unsigned long cr4 = __read_cr4();
/* /*
* Don't put *anything* except direct CPU state initialization * Don't put *anything* except direct CPU state initialization
* before cpu_init(), SMP booting is too fragile that we want to * before cpu_init(), SMP booting is too fragile that we want to
* limit the things done here to the most necessary things. * limit the things done here to the most necessary things.
*/ */
if (boot_cpu_has(X86_FEATURE_PCID)) if (boot_cpu_has(X86_FEATURE_PCID))
__write_cr4(__read_cr4() | X86_CR4_PCIDE); cr4 |= X86_CR4_PCIDE;
if (static_branch_likely(&cr_pinning))
cr4 |= cr4_pinned_bits;
__write_cr4(cr4);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* switch away from the initial page table */ /* switch away from the initial page table */
......
...@@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs) ...@@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs)
#ifdef CONFIG_FRAME_POINTER #ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long)); return *(unsigned long *)(regs->bp + sizeof(long));
#else #else
unsigned long *sp = unsigned long *sp = (unsigned long *)regs->sp;
(unsigned long *)kernel_stack_pointer(regs);
/* /*
* Return address is either directly at stack pointer * Return address is either directly at stack pointer
* or above a saved flags. Eflags has bits 22-31 zero, * or above a saved flags. Eflags has bits 22-31 zero,
......
...@@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state) ...@@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state)
} }
} }
static size_t regs_size(struct pt_regs *regs)
{
/* x86_32 regs from kernel mode are two words shorter: */
if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
return sizeof(*regs) - 2*sizeof(long);
return sizeof(*regs);
}
static bool in_entry_code(unsigned long ip) static bool in_entry_code(unsigned long ip)
{ {
char *addr = (char *)ip; char *addr = (char *)ip;
...@@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp) ...@@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
} }
#endif #endif
#ifdef CONFIG_X86_32
#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
#else
#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
#endif
static bool update_stack_state(struct unwind_state *state, static bool update_stack_state(struct unwind_state *state,
unsigned long *next_bp) unsigned long *next_bp)
{ {
...@@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state, ...@@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state,
size_t len; size_t len;
if (state->regs) if (state->regs)
prev_frame_end = (void *)state->regs + regs_size(state->regs); prev_frame_end = (void *)state->regs + sizeof(*state->regs);
else else
prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE; prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE;
...@@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state, ...@@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state,
regs = decode_frame_pointer(next_bp); regs = decode_frame_pointer(next_bp);
if (regs) { if (regs) {
frame = (unsigned long *)regs; frame = (unsigned long *)regs;
len = KERNEL_REGS_SIZE; len = sizeof(*regs);
state->got_irq = true; state->got_irq = true;
} else { } else {
frame = next_bp; frame = next_bp;
...@@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state, ...@@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state,
frame < prev_frame_end) frame < prev_frame_end)
return false; return false;
/*
* On 32-bit with user mode regs, make sure the last two regs are safe
* to access:
*/
if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
!on_stack(info, frame, len + 2*sizeof(long)))
return false;
/* Move state to the next frame: */ /* Move state to the next frame: */
if (regs) { if (regs) {
state->regs = regs; state->regs = regs;
...@@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, ...@@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
* Pretend that the frame is complete and that BP points to it, but save * Pretend that the frame is complete and that BP points to it, but save
* the real BP so that we can use it when looking for the next frame. * the real BP so that we can use it when looking for the next frame.
*/ */
if (regs && regs->ip == 0 && if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) {
(unsigned long *)kernel_stack_pointer(regs) >= first_frame) {
state->next_bp = bp; state->next_bp = bp;
bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1; bp = ((unsigned long *)regs->sp) - 1;
} }
/* Initialize stack info and make sure the frame data is accessible: */ /* Initialize stack info and make sure the frame data is accessible: */
......
...@@ -598,7 +598,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, ...@@ -598,7 +598,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
goto done; goto done;
state->ip = regs->ip; state->ip = regs->ip;
state->sp = kernel_stack_pointer(regs); state->sp = regs->sp;
state->bp = regs->bp; state->bp = regs->bp;
state->regs = regs; state->regs = regs;
state->full_regs = true; state->full_regs = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment