Commit 4facb95b authored by Thomas Gleixner's avatar Thomas Gleixner

x86/entry: Unbreak 32bit fast syscall

Andy reported that the syscall treacing for 32bit fast syscall fails:

# ./tools/testing/selftests/x86/ptrace_syscall_32
...
[RUN] SYSEMU
[FAIL] Initial args are wrong (nr=224, args=10 11 12 13 14 4289172732)
...
[RUN] SYSCALL
[FAIL] Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732)
 
The eason is that the conversion to generic entry code moved the retrieval
of the sixth argument (EBP) after the point where the syscall entry work
runs, i.e. ptrace, seccomp, audit...

Unbreak it by providing a split up version of syscall_enter_from_user_mode().

- syscall_enter_from_user_mode_prepare() establishes state and enables
  interrupts

- syscall_enter_from_user_mode_work() runs the entry work

Replace the call to syscall_enter_from_user_mode() in the 32bit fast
syscall C-entry with the split functions and stick the EBP retrieval
between them.

Fixes: 27d6b4d1 ("x86/entry: Use generic syscall entry function")
Reported-by: default avatarAndy Lutomirski <luto@kernel.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/87k0xdjbtt.fsf@nanos.tec.linutronix.de
parent d5c678ae
...@@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) ...@@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs) static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
{ {
unsigned int nr = (unsigned int)regs->orig_ax;
if (IS_ENABLED(CONFIG_IA32_EMULATION)) if (IS_ENABLED(CONFIG_IA32_EMULATION))
current_thread_info()->status |= TS_COMPAT; current_thread_info()->status |= TS_COMPAT;
/*
* Subtlety here: if ptrace pokes something larger than 2^32-1 into return (unsigned int)regs->orig_ax;
* orig_ax, the unsigned int return value truncates it. This may
* or may not be necessary, but it matches the old asm behavior.
*/
return (unsigned int)syscall_enter_from_user_mode(regs, nr);
} }
/* /*
...@@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) ...@@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{ {
unsigned int nr = syscall_32_enter(regs); unsigned int nr = syscall_32_enter(regs);
/*
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
* orig_ax, the unsigned int return value truncates it. This may
* or may not be necessary, but it matches the old asm behavior.
*/
nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
do_syscall_32_irqs_on(regs, nr); do_syscall_32_irqs_on(regs, nr);
syscall_exit_to_user_mode(regs); syscall_exit_to_user_mode(regs);
} }
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{ {
unsigned int nr = syscall_32_enter(regs); unsigned int nr = syscall_32_enter(regs);
int res; int res;
/*
* This cannot use syscall_enter_from_user_mode() as it has to
* fetch EBP before invoking any of the syscall entry work
* functions.
*/
syscall_enter_from_user_mode_prepare(regs);
instrumentation_begin(); instrumentation_begin();
/* Fetch EBP from where the vDSO stashed it. */ /* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) { if (IS_ENABLED(CONFIG_X86_64)) {
...@@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) ...@@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
return false; return false;
} }
/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
/* Now this is just like a normal syscall. */ /* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs, nr); do_syscall_32_irqs_on(regs, nr);
syscall_exit_to_user_mode(regs); syscall_exit_to_user_mode(regs);
......
...@@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs ...@@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
#endif #endif
/** /**
* syscall_enter_from_user_mode - Check and handle work before invoking * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
* a syscall
* @regs: Pointer to currents pt_regs * @regs: Pointer to currents pt_regs
* @syscall: The syscall number
* *
* Invoked from architecture specific syscall entry code with interrupts * Invoked from architecture specific syscall entry code with interrupts
* disabled. The calling code has to be non-instrumentable. When the * disabled. The calling code has to be non-instrumentable. When the
* function returns all state is correct and the subsequent functions can be * function returns all state is correct, interrupts are enabled and the
* instrumented. * subsequent functions can be instrumented.
*
* This handles lockdep, RCU (context tracking) and tracing state.
*
* This is invoked when there is extra architecture specific functionality
* to be done between establishing state and handling user mode entry work.
*/
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
/**
* syscall_enter_from_user_mode_work - Check and handle work before invoking
* a syscall
* @regs: Pointer to currents pt_regs
* @syscall: The syscall number
*
* Invoked from architecture specific syscall entry code with interrupts
* enabled after invoking syscall_enter_from_user_mode_prepare() and extra
* architecture specific work.
* *
* Returns: The original or a modified syscall number * Returns: The original or a modified syscall number
* *
...@@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs ...@@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
* syscall_set_return_value() first. If neither of those are called and -1 * syscall_set_return_value() first. If neither of those are called and -1
* is returned, then the syscall will fail with ENOSYS. * is returned, then the syscall will fail with ENOSYS.
* *
* The following functionality is handled here: * It handles the following work items:
* *
* 1) Establish state (lockdep, RCU (context tracking), tracing) * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
* 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
* __secure_computing(), trace_sys_enter() * __secure_computing(), trace_sys_enter()
* 3) Invocation of audit_syscall_entry() * 2) Invocation of audit_syscall_entry()
*/
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
/**
* syscall_enter_from_user_mode - Establish state and check and handle work
* before invoking a syscall
* @regs: Pointer to currents pt_regs
* @syscall: The syscall number
*
* Invoked from architecture specific syscall entry code with interrupts
* disabled. The calling code has to be non-instrumentable. When the
* function returns all state is correct, interrupts are enabled and the
* subsequent functions can be instrumented.
*
* This is combination of syscall_enter_from_user_mode_prepare() and
* syscall_enter_from_user_mode_work().
*
* Returns: The original or a modified syscall number. See
* syscall_enter_from_user_mode_work() for further explanation.
*/ */
long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
......
...@@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, ...@@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
return ret ? : syscall_get_nr(current, regs); return ret ? : syscall_get_nr(current, regs);
} }
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) static __always_inline long
__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
{ {
unsigned long ti_work; unsigned long ti_work;
enter_from_user_mode(regs);
instrumentation_begin();
local_irq_enable();
ti_work = READ_ONCE(current_thread_info()->flags); ti_work = READ_ONCE(current_thread_info()->flags);
if (ti_work & SYSCALL_ENTER_WORK) if (ti_work & SYSCALL_ENTER_WORK)
syscall = syscall_trace_enter(regs, syscall, ti_work); syscall = syscall_trace_enter(regs, syscall, ti_work);
instrumentation_end();
return syscall; return syscall;
} }
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
{
return __syscall_enter_from_user_work(regs, syscall);
}
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
{
long ret;
enter_from_user_mode(regs);
instrumentation_begin();
local_irq_enable();
ret = __syscall_enter_from_user_work(regs, syscall);
instrumentation_end();
return ret;
}
noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
{
enter_from_user_mode(regs);
instrumentation_begin();
local_irq_enable();
instrumentation_end();
}
/** /**
* exit_to_user_mode - Fixup state when exiting to user mode * exit_to_user_mode - Fixup state when exiting to user mode
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment