Commit 7fcb3bc3 authored by Ingo Molnar's avatar Ingo Molnar

x86/asm/entry/64: Update comments about stack frames

Tweak a few outdated comments that were obsoleted by recent changes
to syscall entry code:

 - we no longer have a "partial stack frame" on
   entry, ever.

 - explain the syscall entry usage of old_rsp.

Partially based on a (split out of) patch from Denys Vlasenko.

Originally-from: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: default avatarBorislav Petkov <bp@alien8.de>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Will Drewry <wad@chromium.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent ac9af498
...@@ -15,10 +15,8 @@ ...@@ -15,10 +15,8 @@
* after an interrupt and after each system call. * after an interrupt and after each system call.
* *
* A note on terminology: * A note on terminology:
* - top of stack: Architecture defined interrupt frame from SS to RIP * - iret frame: Architecture defined interrupt frame from SS to RIP
* at the top of the kernel process stack. * at the top of the kernel process stack.
* - partial stack frame: partially saved registers up to R11.
* - full stack frame: Like partial stack frame, but all register saved.
* *
* Some macro usage: * Some macro usage:
* - CFI macros are used to generate dwarf2 unwind information for better * - CFI macros are used to generate dwarf2 unwind information for better
...@@ -219,7 +217,7 @@ ENDPROC(native_usergs_sysret64) ...@@ -219,7 +217,7 @@ ENDPROC(native_usergs_sysret64)
* Interrupts are off on entry. * Interrupts are off on entry.
* Only called from user space. * Only called from user space.
* *
* When user can change the frames always force IRET. That is because * When user can change pt_regs->foo always force IRET. That is because
* it deals with uncanonical addresses better. SYSRET has trouble * it deals with uncanonical addresses better. SYSRET has trouble
* with them due to bugs in both AMD and Intel CPUs. * with them due to bugs in both AMD and Intel CPUs.
*/ */
...@@ -238,6 +236,11 @@ ENTRY(system_call) ...@@ -238,6 +236,11 @@ ENTRY(system_call)
*/ */
GLOBAL(system_call_after_swapgs) GLOBAL(system_call_after_swapgs)
/*
* We use 'old_rsp' as a scratch register, hence this block must execute
* atomically in the face of possible interrupt-driven task preemption,
* so we can enable interrupts only after we're done with using old_rsp:
*/
movq %rsp,PER_CPU_VAR(old_rsp) movq %rsp,PER_CPU_VAR(old_rsp)
/* kernel_stack is set so that 5 slots (iret frame) are preallocated */ /* kernel_stack is set so that 5 slots (iret frame) are preallocated */
movq PER_CPU_VAR(kernel_stack),%rsp movq PER_CPU_VAR(kernel_stack),%rsp
...@@ -303,7 +306,7 @@ int_ret_from_sys_call_fixup: ...@@ -303,7 +306,7 @@ int_ret_from_sys_call_fixup:
FIXUP_TOP_OF_STACK %r11 FIXUP_TOP_OF_STACK %r11
jmp int_ret_from_sys_call jmp int_ret_from_sys_call
/* Do syscall tracing */ /* Do syscall entry tracing */
tracesys: tracesys:
movq %rsp, %rdi movq %rsp, %rdi
movq $AUDIT_ARCH_X86_64, %rsi movq $AUDIT_ARCH_X86_64, %rsi
...@@ -339,11 +342,11 @@ tracesys_phase2: ...@@ -339,11 +342,11 @@ tracesys_phase2:
movq %r10,%rcx /* fixup for C */ movq %r10,%rcx /* fixup for C */
call *sys_call_table(,%rax,8) call *sys_call_table(,%rax,8)
movq %rax,RAX(%rsp) movq %rax,RAX(%rsp)
/* Use IRET because user could have changed frame */ /* Use IRET because user could have changed pt_regs->foo */
/* /*
* Syscall return path ending with IRET. * Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame. * Has correct iret frame.
*/ */
GLOBAL(int_ret_from_sys_call) GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
...@@ -374,7 +377,7 @@ int_careful: ...@@ -374,7 +377,7 @@ int_careful:
TRACE_IRQS_OFF TRACE_IRQS_OFF
jmp int_with_check jmp int_with_check
/* handle signals and tracing -- both require a full stack frame */ /* handle signals and tracing -- both require a full pt_regs */
int_very_careful: int_very_careful:
TRACE_IRQS_ON TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE) ENABLE_INTERRUPTS(CLBR_NONE)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment