Commit 11894468 authored by Gabriel Krisman Bertazi's avatar Gabriel Krisman Bertazi Committed by Thomas Gleixner

entry: Support Syscall User Dispatch on common syscall entry

Syscall User Dispatch (SUD) must take precedence over seccomp and
ptrace, since the use case is emulation (it can be invoked with a
different ABI) such that seccomp filtering by syscall number doesn't
make sense in the first place.  In addition, either the syscall is
dispatched back to userspace, in which case there is no resource for to
trace, or the syscall will be executed, and seccomp/ptrace will execute
next.

Since SUD runs before tracepoints, it needs to be a SYSCALL_WORK_EXIT as
well, just to prevent a trace exit event when dispatch was triggered.
For that, the on_syscall_dispatch() examines context to skip the
tracepoint, audit and other work.

[ tglx: Add a comment on the exit side ]
Signed-off-by: default avatarGabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarAndy Lutomirski <luto@kernel.org>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarKees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20201127193238.821364-5-krisman@collabora.com
parent 1446e1df
...@@ -44,10 +44,12 @@ ...@@ -44,10 +44,12 @@
SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_TRACE | \
SYSCALL_WORK_SYSCALL_EMU | \ SYSCALL_WORK_SYSCALL_EMU | \
SYSCALL_WORK_SYSCALL_AUDIT | \ SYSCALL_WORK_SYSCALL_AUDIT | \
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
ARCH_SYSCALL_WORK_ENTER) ARCH_SYSCALL_WORK_ENTER)
#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_TRACE | \
SYSCALL_WORK_SYSCALL_AUDIT | \ SYSCALL_WORK_SYSCALL_AUDIT | \
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
ARCH_SYSCALL_WORK_EXIT) ARCH_SYSCALL_WORK_EXIT)
/* /*
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#include <linux/livepatch.h> #include <linux/livepatch.h>
#include <linux/audit.h> #include <linux/audit.h>
#include "common.h"
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h> #include <trace/events/syscalls.h>
...@@ -46,6 +48,16 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, ...@@ -46,6 +48,16 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
{ {
long ret = 0; long ret = 0;
/*
* Handle Syscall User Dispatch. This must comes first, since
* the ABI here can be something that doesn't make sense for
* other syscall_work features.
*/
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
if (syscall_user_dispatch(regs))
return -1L;
}
/* Handle ptrace */ /* Handle ptrace */
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
ret = arch_syscall_enter_tracehook(regs); ret = arch_syscall_enter_tracehook(regs);
...@@ -230,6 +242,19 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long work) ...@@ -230,6 +242,19 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
{ {
bool step; bool step;
/*
* If the syscall was rolled back due to syscall user dispatching,
* then the tracers below are not invoked for the same reason as
* the entry side was not invoked in syscall_trace_enter(): The ABI
* of these syscalls is unknown.
*/
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
if (unlikely(current->syscall_dispatch.on_dispatch)) {
current->syscall_dispatch.on_dispatch = false;
return;
}
}
audit_syscall_exit(regs); audit_syscall_exit(regs);
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment