Commit 1dfb0f47 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-entry-2021-06-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 entry code related updates from Thomas Gleixner:

 - Consolidate the macros for .byte ... opcode sequences

 - Deduplicate register offset defines in include files

 - Simplify the ia32,x32 compat handling of the related syscall tables
   to get rid of #ifdeffery.

 - Clear all EFLAGS which are not required for syscall handling

 - Consolidate the syscall tables and switch the generation over to the
   generic shell script and remove the CFLAGS tweaks which are not
   longer required.

 - Use 'int' type for system call numbers to match the generic code.

 - Add more selftests for syscalls

* tag 'x86-entry-2021-06-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/syscalls: Don't adjust CFLAGS for syscall tables
  x86/syscalls: Remove -Wno-override-init for syscall tables
  x86/uml/syscalls: Remove array index from syscall initializers
  x86/syscalls: Clear 'offset' and 'prefix' in case they are set in env
  x86/entry: Use int everywhere for system call numbers
  x86/entry: Treat out of range and gap system calls the same
  x86/entry/64: Sign-extend system calls on entry to int
  selftests/x86/syscall: Add tests under ptrace to syscall_numbering_64
  selftests/x86/syscall: Simplify message reporting in syscall_numbering
  selftests/x86/syscall: Update and extend syscall_numbering_64
  x86/syscalls: Switch to generic syscallhdr.sh
  x86/syscalls: Use __NR_syscalls instead of __NR_syscall_max
  x86/unistd: Define X32_NR_syscalls only for 64-bit kernel
  x86/syscalls: Stop filling syscall arrays with *_sys_ni_syscall
  x86/syscalls: Switch to generic syscalltbl.sh
  x86/entry/x32: Rename __x32_compat_sys_* to __x64_compat_sys_*
parents a22c3f61 48f7eee8
......@@ -41,7 +41,7 @@ void handle_syscall(struct uml_pt_regs *r)
goto out;
syscall = UPT_SYSCALL_NR(r);
if (syscall >= 0 && syscall <= __NR_syscall_max)
if (syscall >= 0 && syscall < __NR_syscalls)
PT_REGS_SET_SYSCALL_RETURN(regs,
EXECUTE_SYSCALL(syscall, regs));
......
......@@ -8,18 +8,8 @@ UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall_x32.o = $(CC_FLAGS_FTRACE)
CFLAGS_common.o += -fno-stack-protector
CFLAGS_syscall_64.o += -fno-stack-protector
CFLAGS_syscall_32.o += -fno-stack-protector
CFLAGS_syscall_x32.o += -fno-stack-protector
CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,)
CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,)
CFLAGS_syscall_x32.o += $(call cc-option,-Wno-override-init,)
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
obj-y += common.o
......
......@@ -36,61 +36,97 @@
#include <asm/irq_stack.h>
#ifdef CONFIG_X86_64
__visible noinstr void do_syscall_64(struct pt_regs *regs, unsigned long nr)
static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
{
/*
* Convert negative numbers to very high and thus out of range
* numbers for comparisons.
*/
unsigned int unr = nr;
if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls);
regs->ax = sys_call_table[unr](regs);
return true;
}
return false;
}
static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
{
/*
* Adjust the starting offset of the table, and convert numbers
* < __X32_SYSCALL_BIT to very high and thus out of range
* numbers for comparisons.
*/
unsigned int xnr = nr - __X32_SYSCALL_BIT;
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls);
regs->ax = x32_sys_call_table[xnr](regs);
return true;
}
return false;
}
__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
{
add_random_kstack_offset();
nr = syscall_enter_from_user_mode(regs, nr);
instrumentation_begin();
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
#ifdef CONFIG_X86_X32_ABI
} else if (likely((nr & __X32_SYSCALL_BIT) &&
(nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
X32_NR_syscalls);
regs->ax = x32_sys_call_table[nr](regs);
#endif
if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
/* Invalid system call, but still a system call. */
regs->ax = __x64_sys_ni_syscall(regs);
}
instrumentation_end();
syscall_exit_to_user_mode(regs);
}
#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
static __always_inline int syscall_32_enter(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_IA32_EMULATION))
current_thread_info()->status |= TS_COMPAT;
return (unsigned int)regs->orig_ax;
return (int)regs->orig_ax;
}
/*
* Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
*/
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
unsigned int nr)
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
{
if (likely(nr < IA32_NR_syscalls)) {
nr = array_index_nospec(nr, IA32_NR_syscalls);
regs->ax = ia32_sys_call_table[nr](regs);
/*
* Convert negative numbers to very high and thus out of range
* numbers for comparisons.
*/
unsigned int unr = nr;
if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls);
regs->ax = ia32_sys_call_table[unr](regs);
} else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs);
}
}
/* Handles int $0x80 */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
unsigned int nr = syscall_32_enter(regs);
int nr = syscall_32_enter(regs);
add_random_kstack_offset();
/*
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
* orig_ax, the unsigned int return value truncates it. This may
* or may not be necessary, but it matches the old asm behavior.
* Subtlety here: if ptrace pokes something larger than 2^31-1 into
* orig_ax, the int return value truncates it. This matches
* the semantics of syscall_get_nr().
*/
nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
nr = syscall_enter_from_user_mode(regs, nr);
instrumentation_begin();
do_syscall_32_irqs_on(regs, nr);
......@@ -101,7 +137,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{
unsigned int nr = syscall_32_enter(regs);
int nr = syscall_32_enter(regs);
int res;
add_random_kstack_offset();
......@@ -136,8 +172,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
return false;
}
/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
nr = syscall_enter_from_user_mode_work(regs, nr);
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs, nr);
......
......@@ -108,7 +108,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* IRQs are off. */
movq %rsp, %rdi
movq %rax, %rsi
/* Sign extend the lower 32bit as syscall numbers are treated as int */
movslq %eax, %rsi
call do_syscall_64 /* returns with IRQs disabled */
/*
......
......@@ -5,21 +5,21 @@
#include <linux/sys.h>
#include <linux/cache.h>
#include <linux/syscalls.h>
#include <asm/unistd.h>
#include <asm/syscall.h>
#define __SYSCALL_I386(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
#ifdef CONFIG_IA32_EMULATION
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
#else
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
#endif
#define __SYSCALL(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#undef __SYSCALL
#define __SYSCALL_I386(nr, sym) [nr] = __ia32_##sym,
#define __SYSCALL(nr, sym) __ia32_##sym,
__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_ia32_syscall_max] = &__ia32_sys_ni_syscall,
__visible const sys_call_ptr_t ia32_sys_call_table[] = {
#include <asm/syscalls_32.h>
};
......@@ -5,23 +5,14 @@
#include <linux/sys.h>
#include <linux/cache.h>
#include <linux/syscalls.h>
#include <asm/unistd.h>
#include <asm/syscall.h>
#define __SYSCALL_X32(nr, sym)
#define __SYSCALL_COMMON(nr, sym) __SYSCALL_64(nr, sym)
#define __SYSCALL_64(nr, sym) extern long __x64_##sym(const struct pt_regs *);
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
#undef __SYSCALL
#define __SYSCALL_64(nr, sym) [nr] = __x64_##sym,
#define __SYSCALL(nr, sym) __x64_##sym,
asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_syscall_max] = &__x64_sys_ni_syscall,
asmlinkage const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
......@@ -5,37 +5,14 @@
#include <linux/sys.h>
#include <linux/cache.h>
#include <linux/syscalls.h>
#include <asm/unistd.h>
#include <asm/syscall.h>
/*
* Reuse the 64-bit entry points for the x32 versions that occupy different
* slots in the syscall table.
*/
#define __x32_sys_readv __x64_sys_readv
#define __x32_sys_writev __x64_sys_writev
#define __x32_sys_getsockopt __x64_sys_getsockopt
#define __x32_sys_setsockopt __x64_sys_setsockopt
#define __x32_sys_vmsplice __x64_sys_vmsplice
#define __x32_sys_process_vm_readv __x64_sys_process_vm_readv
#define __x32_sys_process_vm_writev __x64_sys_process_vm_writev
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_x32.h>
#undef __SYSCALL
#define __SYSCALL_64(nr, sym)
#define __SYSCALL(nr, sym) __x64_##sym,
#define __SYSCALL_X32(nr, sym) extern long __x32_##sym(const struct pt_regs *);
#define __SYSCALL_COMMON(nr, sym) extern long __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
#undef __SYSCALL_X32
#undef __SYSCALL_COMMON
#define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
#define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,
asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_x32_syscall_max] = &__x64_sys_ni_syscall,
#include <asm/syscalls_64.h>
asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
#include <asm/syscalls_x32.h>
};
......@@ -9,47 +9,54 @@ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
syscall32 := $(src)/syscall_32.tbl
syscall64 := $(src)/syscall_64.tbl
syshdr := $(srctree)/$(src)/syscallhdr.sh
systbl := $(srctree)/$(src)/syscalltbl.sh
syshdr := $(srctree)/scripts/syscallhdr.sh
systbl := $(srctree)/scripts/syscalltbl.sh
offset :=
prefix :=
quiet_cmd_syshdr = SYSHDR $@
cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
'$(syshdr_abi_$(basetarget))' \
'$(syshdr_pfx_$(basetarget))' \
'$(syshdr_offset_$(basetarget))'
cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --abis $(abis) --emit-nr \
$(if $(offset),--offset $(offset)) \
$(if $(prefix),--prefix $(prefix)) \
$< $@
quiet_cmd_systbl = SYSTBL $@
cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
quiet_cmd_hypercalls = HYPERCALLS $@
cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<, $(real-prereqs))
syshdr_abi_unistd_32 := i386
$(uapi)/unistd_32.h: abis := i386
$(uapi)/unistd_32.h: $(syscall32) $(syshdr) FORCE
$(call if_changed,syshdr)
syshdr_abi_unistd_32_ia32 := i386
syshdr_pfx_unistd_32_ia32 := ia32_
$(out)/unistd_32_ia32.h: abis := i386
$(out)/unistd_32_ia32.h: prefix := ia32_
$(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) FORCE
$(call if_changed,syshdr)
syshdr_abi_unistd_x32 := common,x32
syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT
$(uapi)/unistd_x32.h: abis := common,x32
$(uapi)/unistd_x32.h: offset := __X32_SYSCALL_BIT
$(uapi)/unistd_x32.h: $(syscall64) $(syshdr) FORCE
$(call if_changed,syshdr)
syshdr_abi_unistd_64 := common,64
$(uapi)/unistd_64.h: abis := common,64
$(uapi)/unistd_64.h: $(syscall64) $(syshdr) FORCE
$(call if_changed,syshdr)
syshdr_abi_unistd_64_x32 := x32
syshdr_pfx_unistd_64_x32 := x32_
$(out)/unistd_64_x32.h: abis := x32
$(out)/unistd_64_x32.h: prefix := x32_
$(out)/unistd_64_x32.h: $(syscall64) $(syshdr) FORCE
$(call if_changed,syshdr)
$(out)/syscalls_32.h: abis := i386
$(out)/syscalls_32.h: $(syscall32) $(systbl) FORCE
$(call if_changed,systbl)
$(out)/syscalls_64.h: abis := common,64
$(out)/syscalls_64.h: $(syscall64) $(systbl) FORCE
$(call if_changed,systbl)
$(out)/syscalls_x32.h: abis := common,x32
$(out)/syscalls_x32.h: $(syscall64) $(systbl) FORCE
$(call if_changed,systbl)
$(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh FORCE
$(call if_changed,hypercalls)
......@@ -60,6 +67,7 @@ uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h
syshdr-y += syscalls_32.h
syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h
syshdr-$(CONFIG_X86_64) += syscalls_64.h
syshdr-$(CONFIG_X86_X32) += syscalls_x32.h
syshdr-$(CONFIG_XEN) += xen-hypercalls.h
uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y))
......
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
in="$1"
out="$2"
my_abis=`echo "($3)" | tr ',' '|'`
prefix="$4"
offset="$5"
fileguard=_ASM_X86_`basename "$out" | sed \
-e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
echo "#ifndef ${fileguard}"
echo "#define ${fileguard} 1"
echo ""
max=0
while read nr abi name entry ; do
if [ -z "$offset" ]; then
echo "#define __NR_${prefix}${name} $nr"
else
echo "#define __NR_${prefix}${name} ($offset + $nr)"
fi
max=$nr
done
echo ""
echo "#ifdef __KERNEL__"
echo "#define __NR_${prefix}syscall_max $max"
echo "#endif"
echo ""
echo "#endif /* ${fileguard} */"
) > "$out"
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
in="$1"
out="$2"
syscall_macro() {
local abi="$1"
local nr="$2"
local entry="$3"
echo "__SYSCALL_${abi}($nr, $entry)"
}
emit() {
local abi="$1"
local nr="$2"
local entry="$3"
local compat="$4"
if [ "$abi" != "I386" -a -n "$compat" ]; then
echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
exit 1
fi
if [ -z "$compat" ]; then
if [ -n "$entry" ]; then
syscall_macro "$abi" "$nr" "$entry"
fi
else
echo "#ifdef CONFIG_X86_32"
if [ -n "$entry" ]; then
syscall_macro "$abi" "$nr" "$entry"
fi
echo "#else"
syscall_macro "$abi" "$nr" "$compat"
echo "#endif"
fi
}
grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
emit "$abi" "$nr" "$entry" "$compat"
done
) > "$out"
......@@ -3,6 +3,7 @@
generated-y += syscalls_32.h
generated-y += syscalls_64.h
generated-y += syscalls_x32.h
generated-y += unistd_32_ia32.h
generated-y += unistd_64_x32.h
generated-y += xen-hypercalls.h
......
......@@ -159,7 +159,7 @@ static inline int syscall_get_arch(struct task_struct *task)
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
void do_syscall_64(struct pt_regs *regs, unsigned long nr);
void do_syscall_64(struct pt_regs *regs, int nr);
void do_int80_syscall_32(struct pt_regs *regs);
long do_fast_syscall_32(struct pt_regs *regs);
......
......@@ -17,7 +17,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* __x64_sys_*() - 64-bit native syscall
* __ia32_sys_*() - 32-bit native syscall or common compat syscall
* __ia32_compat_sys_*() - 32-bit compat syscall
* __x32_compat_sys_*() - 64-bit X32 compat syscall
* __x64_compat_sys_*() - 64-bit X32 compat syscall
*
* The registers are decoded according to the ABI:
* 64-bit: RDI, RSI, RDX, R10, R8, R9
......@@ -166,17 +166,17 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* with x86_64 obviously do not need such care.
*/
#define __X32_COMPAT_SYS_STUB0(name) \
__SYS_STUB0(x32, compat_sys_##name)
__SYS_STUB0(x64, compat_sys_##name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...) \
__SYS_STUBx(x32, compat_sys##name, \
__SYS_STUBx(x64, compat_sys##name, \
SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
#define __X32_COMPAT_COND_SYSCALL(name) \
__COND_SYSCALL(x32, compat_sys_##name)
__COND_SYSCALL(x64, compat_sys_##name)
#define __X32_COMPAT_SYS_NI(name) \
__SYS_NI(x32, compat_sys_##name)
__SYS_NI(x64, compat_sys_##name)
#else /* CONFIG_X86_X32 */
#define __X32_COMPAT_SYS_STUB0(name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...)
......
......@@ -13,7 +13,7 @@
# define __ARCH_WANT_SYS_OLD_MMAP
# define __ARCH_WANT_SYS_OLD_SELECT
# define __NR_ia32_syscall_max __NR_syscall_max
# define IA32_NR_syscalls (__NR_syscalls)
# else
......@@ -26,12 +26,12 @@
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64
# define __ARCH_WANT_COMPAT_SYS_PREADV64V2
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
# define X32_NR_syscalls (__NR_x32_syscalls)
# define IA32_NR_syscalls (__NR_ia32_syscalls)
# endif
# define NR_syscalls (__NR_syscall_max + 1)
# define X32_NR_syscalls (__NR_x32_syscall_max + 1)
# define IA32_NR_syscalls (__NR_ia32_syscall_max + 1)
# define NR_syscalls (__NR_syscalls)
# define __ARCH_WANT_NEW_STAT
# define __ARCH_WANT_OLD_READDIR
......
......@@ -7,7 +7,6 @@
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
#include <asm/unistd.h>
#include <asm/syscall.h>
#define __NO_STUBS
......@@ -26,20 +25,17 @@
#define old_mmap sys_old_mmap
#define __SYSCALL_I386(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym) [ nr ] = sym,
#undef __SYSCALL
#define __SYSCALL(nr, sym) sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_32.h>
};
......
......@@ -7,7 +7,6 @@
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
#include <asm/unistd.h>
#include <asm/syscall.h>
#define __NO_STUBS
......@@ -36,23 +35,15 @@
#define stub_execveat sys_execveat
#define stub_rt_sigreturn sys_rt_sigreturn
#define __SYSCALL_X32(nr, sym)
#define __SYSCALL_COMMON(nr, sym) __SYSCALL_64(nr, sym)
#define __SYSCALL_64(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
#define __SYSCALL_64(nr, sym) [ nr ] = sym,
#undef __SYSCALL
#define __SYSCALL(nr, sym) sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h>
};
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
* syscall_numbering.c - test calling the x86-64 kernel with various
* valid and invalid system call numbers.
*
* Copyright (c) 2018 Andrew Lutomirski
*/
......@@ -11,79 +13,470 @@
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
#include <syscall.h>
#include <string.h>
#include <fcntl.h>
#include <limits.h>
#include <signal.h>
#include <sysexits.h>
static int nerrs;
#include <sys/ptrace.h>
#include <sys/user.h>
#include <sys/wait.h>
#include <sys/mman.h>
#define X32_BIT 0x40000000UL
#include <linux/ptrace.h>
static void check_enosys(unsigned long nr, bool *ok)
/* Common system call numbers */
#define SYS_READ 0
#define SYS_WRITE 1
#define SYS_GETPID 39
/* x64-only system call numbers */
#define X64_IOCTL 16
#define X64_READV 19
#define X64_WRITEV 20
/* x32-only system call numbers (without X32_BIT) */
#define X32_IOCTL 514
#define X32_READV 515
#define X32_WRITEV 516
#define X32_BIT 0x40000000
static int nullfd = -1; /* File descriptor for /dev/null */
static bool with_x32; /* x32 supported on this kernel? */
enum ptrace_pass {
PTP_NOTHING,
PTP_GETREGS,
PTP_WRITEBACK,
PTP_FUZZRET,
PTP_FUZZHIGH,
PTP_INTNUM,
PTP_DONE
};
static const char * const ptrace_pass_name[] =
{
/* If this fails, a segfault is reasonably likely. */
fflush(stdout);
[PTP_NOTHING] = "just stop, no data read",
[PTP_GETREGS] = "only getregs",
[PTP_WRITEBACK] = "getregs, unmodified setregs",
[PTP_FUZZRET] = "modifying the default return",
[PTP_FUZZHIGH] = "clobbering the top 32 bits",
[PTP_INTNUM] = "sign-extending the syscall number",
};
long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
if (ret == 0) {
printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
*ok = false;
} else if (errno != ENOSYS) {
printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
*ok = false;
}
/*
* Shared memory block between tracer and test
*/
struct shared {
unsigned int nerr; /* Total error count */
unsigned int indent; /* Message indentation level */
enum ptrace_pass ptrace_pass;
bool probing_syscall; /* In probe_syscall() */
};
static volatile struct shared *sh;
static inline unsigned int offset(void)
{
unsigned int level = sh ? sh->indent : 0;
return 8 + level * 4;
}
static void test_x32_without_x32_bit(void)
#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
## __VA_ARGS__)
#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__)
#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__)
#define fail(fmt, ...) \
do { \
msg(FAIL, fmt, ## __VA_ARGS__); \
sh->nerr++; \
} while (0)
#define crit(fmt, ...) \
do { \
sh->indent = 0; \
msg(FAIL, fmt, ## __VA_ARGS__); \
msg(SKIP, "Unable to run test\n"); \
exit(EX_OSERR); \
} while (0)
/* Sentinel for ptrace-modified return value */
#define MODIFIED_BY_PTRACE -9999
/*
* Directly invokes the given syscall with nullfd as the first argument
* and the rest zero. Avoids involving glibc wrappers in case they ever
* end up intercepting some system calls for some reason, or modify
* the system call number itself.
*/
static long long probe_syscall(int msb, int lsb)
{
bool ok = true;
register long long arg1 asm("rdi") = nullfd;
register long long arg2 asm("rsi") = 0;
register long long arg3 asm("rdx") = 0;
register long long arg4 asm("r10") = 0;
register long long arg5 asm("r8") = 0;
register long long arg6 asm("r9") = 0;
long long nr = ((long long)msb << 32) | (unsigned int)lsb;
long long ret;
/*
* Syscalls 512-547 are "x32" syscalls. They are intended to be
* called with the x32 (0x40000000) bit set. Calling them without
* the x32 bit set is nonsense and should not work.
* We pass in an extra copy of the extended system call number
* in %rbx, so we can examine it from the ptrace handler without
* worrying about it being possibly modified. This is to test
* the validity of struct user regs.orig_rax a.k.a.
* struct pt_regs.orig_ax.
*/
printf("[RUN]\tChecking syscalls 512-547\n");
for (int i = 512; i <= 547; i++)
check_enosys(i, &ok);
sh->probing_syscall = true;
asm volatile("syscall"
: "=a" (ret)
: "a" (nr), "b" (nr),
"r" (arg1), "r" (arg2), "r" (arg3),
"r" (arg4), "r" (arg5), "r" (arg6)
: "rcx", "r11", "memory", "cc");
sh->probing_syscall = false;
return ret;
}
static const char *syscall_str(int msb, int start, int end)
{
static char buf[64];
const char * const type = (start & X32_BIT) ? "x32" : "x64";
int lsb = start;
/*
* Check that a handful of 64-bit-only syscalls are rejected if the x32
* bit is set.
* Improve readability by stripping the x32 bit, but round
* toward zero so we don't display -1 as -1073741825.
*/
printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
check_enosys(16 | X32_BIT, &ok); /* ioctl */
check_enosys(19 | X32_BIT, &ok); /* readv */
check_enosys(20 | X32_BIT, &ok); /* writev */
if (lsb < 0)
lsb |= X32_BIT;
else
lsb &= ~X32_BIT;
if (start == end)
snprintf(buf, sizeof buf, "%s syscall %d:%d",
type, msb, lsb);
else
snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
type, msb, lsb, lsb + (end-start));
return buf;
}
static unsigned int _check_for(int msb, int start, int end, long long expect,
const char *expect_str)
{
unsigned int err = 0;
sh->indent++;
if (start != end)
sh->indent++;
for (int nr = start; nr <= end; nr++) {
long long ret = probe_syscall(msb, nr);
if (ret != expect) {
fail("%s returned %lld, but it should have returned %s\n",
syscall_str(msb, nr, nr),
ret, expect_str);
err++;
}
}
if (start != end)
sh->indent--;
if (err) {
if (start != end)
fail("%s had %u failure%s\n",
syscall_str(msb, start, end),
err, err == 1 ? "s" : "");
} else {
ok("%s returned %s as expected\n",
syscall_str(msb, start, end), expect_str);
}
sh->indent--;
return err;
}
#define check_for(msb,start,end,expect) \
_check_for(msb,start,end,expect,#expect)
static bool check_zero(int msb, int nr)
{
return check_for(msb, nr, nr, 0);
}
static bool check_enosys(int msb, int nr)
{
return check_for(msb, nr, nr, -ENOSYS);
}
/*
* Anyone diagnosing a failure will want to know whether the kernel
* supports x32. Tell them. This can also be used to conditionalize
* tests based on existence or nonexistence of x32.
*/
static bool test_x32(void)
{
long long ret;
pid_t mypid = getpid();
run("Checking for x32 by calling x32 getpid()\n");
ret = probe_syscall(0, SYS_GETPID | X32_BIT);
sh->indent++;
if (ret == mypid) {
info("x32 is supported\n");
with_x32 = true;
} else if (ret == -ENOSYS) {
info("x32 is not supported\n");
with_x32 = false;
} else {
fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
with_x32 = false;
}
sh->indent--;
return with_x32;
}
static void test_syscalls_common(int msb)
{
enum ptrace_pass pass = sh->ptrace_pass;
run("Checking some common syscalls as 64 bit\n");
check_zero(msb, SYS_READ);
check_zero(msb, SYS_WRITE);
run("Checking some 64-bit only syscalls as 64 bit\n");
check_zero(msb, X64_READV);
check_zero(msb, X64_WRITEV);
run("Checking out of range system calls\n");
check_for(msb, -64, -2, -ENOSYS);
if (pass >= PTP_FUZZRET)
check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
else
check_for(msb, -1, -1, -ENOSYS);
check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
}
static void test_syscalls_with_x32(int msb)
{
/*
* Check some syscalls with high bits set.
* Syscalls 512-547 are "x32" syscalls. They are
* intended to be called with the x32 (0x40000000) bit
* set. Calling them without the x32 bit set is
* nonsense and should not work.
*/
printf("[RUN]\tChecking numbers above 2^32-1\n");
check_enosys((1UL << 32), &ok);
check_enosys(X32_BIT | (1UL << 32), &ok);
run("Checking x32 syscalls as 64 bit\n");
check_for(msb, 512, 547, -ENOSYS);
if (!ok)
nerrs++;
else
printf("[OK]\tThey all returned -ENOSYS\n");
run("Checking some common syscalls as x32\n");
check_zero(msb, SYS_READ | X32_BIT);
check_zero(msb, SYS_WRITE | X32_BIT);
run("Checking some x32 syscalls as x32\n");
check_zero(msb, X32_READV | X32_BIT);
check_zero(msb, X32_WRITEV | X32_BIT);
run("Checking some 64-bit syscalls as x32\n");
check_enosys(msb, X64_IOCTL | X32_BIT);
check_enosys(msb, X64_READV | X32_BIT);
check_enosys(msb, X64_WRITEV | X32_BIT);
}
int main()
static void test_syscalls_without_x32(int msb)
{
run("Checking for absence of x32 system calls\n");
check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
}
static void test_syscall_numbering(void)
{
static const int msbs[] = {
0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
INT_MIN, INT_MIN+1
};
sh->indent++;
/*
* Anyone diagnosing a failure will want to know whether the kernel
* supports x32. Tell them.
* The MSB is supposed to be ignored, so we loop over a few
* to test that out.
*/
printf("\tChecking for x32...");
fflush(stdout);
if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
printf(" supported\n");
} else if (errno == ENOSYS) {
printf(" not supported\n");
for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
int msb = msbs[i];
run("Checking system calls with msb = %d (0x%x)\n",
msb, msb);
sh->indent++;
test_syscalls_common(msb);
if (with_x32)
test_syscalls_with_x32(msb);
else
test_syscalls_without_x32(msb);
sh->indent--;
}
sh->indent--;
}
static void syscall_numbering_tracee(void)
{
enum ptrace_pass pass;
if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
crit("Failed to request tracing\n");
return;
}
raise(SIGSTOP);
for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
sh->ptrace_pass = ++pass) {
run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
test_syscall_numbering();
}
}
static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
{
struct user_regs_struct regs;
sh->probing_syscall = false; /* Do this on entry only */
/* For these, don't even getregs */
if (pass == PTP_NOTHING || pass == PTP_DONE)
return;
ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
if (regs.orig_rax != regs.rbx) {
fail("orig_rax %#llx doesn't match syscall number %#llx\n",
(unsigned long long)regs.orig_rax,
(unsigned long long)regs.rbx);
}
switch (pass) {
case PTP_GETREGS:
/* Just read, no writeback */
return;
case PTP_WRITEBACK:
/* Write back the same register state verbatim */
break;
case PTP_FUZZRET:
regs.rax = MODIFIED_BY_PTRACE;
break;
case PTP_FUZZHIGH:
regs.rax = MODIFIED_BY_PTRACE;
regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
break;
case PTP_INTNUM:
regs.rax = MODIFIED_BY_PTRACE;
regs.orig_rax = (int)regs.orig_rax;
break;
default:
crit("invalid ptrace_pass\n");
break;
}
ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
}
static void syscall_numbering_tracer(pid_t testpid)
{
int wstatus;
do {
pid_t wpid = waitpid(testpid, &wstatus, 0);
if (wpid < 0 && errno != EINTR)
break;
if (wpid != testpid)
continue;
if (!WIFSTOPPED(wstatus))
break; /* Thread exited? */
if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
mess_with_syscall(testpid, sh->ptrace_pass);
} while (sh->ptrace_pass != PTP_DONE &&
!ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
ptrace(PTRACE_DETACH, testpid, NULL, NULL);
/* Wait for the child process to terminate */
while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
/* wait some more */;
}
static void test_traced_syscall_numbering(void)
{
pid_t testpid;
/* Launch the test thread; this thread continues as the tracer thread */
testpid = fork();
if (testpid < 0) {
crit("Unable to launch tracer process\n");
} else if (testpid == 0) {
syscall_numbering_tracee();
_exit(0);
} else {
printf(" confused\n");
syscall_numbering_tracer(testpid);
}
}
test_x32_without_x32_bit();
int main(void)
{
unsigned int nerr;
return nerrs ? 1 : 0;
/*
* It is quite likely to get a segfault on a failure, so make
* sure the message gets out by setting stdout to nonbuffered.
*/
setvbuf(stdout, NULL, _IONBF, 0);
/*
* Harmless file descriptor to work on...
*/
nullfd = open("/dev/null", O_RDWR);
if (nullfd < 0) {
crit("Unable to open /dev/null: %s\n", strerror(errno));
}
/*
* Set up a block of shared memory...
*/
sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_SHARED, 0, 0);
if (sh == MAP_FAILED) {
crit("Unable to allocated shared memory block: %s\n",
strerror(errno));
}
with_x32 = test_x32();
run("Running tests without ptrace...\n");
test_syscall_numbering();
test_traced_syscall_numbering();
nerr = sh->nerr;
if (!nerr) {
ok("All system calls succeeded or failed as expected\n");
return 0;
} else {
fail("A total of %u system call%s had incorrect behavior\n",
nerr, nerr != 1 ? "s" : "");
return 1;
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment