Commit 8a6bd2f4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
 "An unfortunately larger set of fixes, but a large portion is
  selftests:

   - Fix the missing clusterid initializaiton for x2apic cluster
     management which caused boot failures due to IPIs being sent to the
     wrong cluster

   - Drop TX_COMPAT when a 64bit executable is exec()'ed from a compat
     task

   - Wrap access to __supported_pte_mask in __startup_64() where clang
     compile fails due to a non PC relative access being generated.

   - Two fixes for 5 level paging fallout in the decompressor:

      - Handle GOT correctly for paging_prepare() and
        cleanup_trampoline()

      - Fix the page table handling in cleanup_trampoline() to avoid
        page table corruption.

   - Stop special casing protection key 0 as this is inconsistent with
     the manpage and also inconsistent with the allocation map handling.

   - Override the protection key wen moving away from PROT_EXEC to
     prevent inaccessible memory.

   - Fix and update the protection key selftests to address breakage and
     to cover the above issue

   - Add a MOV SS self test"

[ Part of the x86 fixes were in the earlier core pull due to dependencies ]

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  x86/mm: Drop TS_COMPAT on 64-bit exec() syscall
  x86/apic/x2apic: Initialize cluster ID properly
  x86/boot/compressed/64: Fix moving page table out of trampoline memory
  x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline()
  x86/pkeys: Do not special case protection key 0
  x86/pkeys/selftests: Add a test for pkey 0
  x86/pkeys/selftests: Save off 'prot' for allocations
  x86/pkeys/selftests: Fix pointer math
  x86/pkeys: Override pkey when moving away from PROT_EXEC
  x86/pkeys/selftests: Fix pkey exhaustion test off-by-one
  x86/pkeys/selftests: Add PROT_EXEC test
  x86/pkeys/selftests: Factor out "instruction page"
  x86/pkeys/selftests: Allow faults on unknown keys
  x86/pkeys/selftests: Avoid printf-in-signal deadlocks
  x86/pkeys/selftests: Remove dead debugging code, fix dprint_in_signal
  x86/pkeys/selftests: Stop using assert()
  x86/pkeys/selftests: Give better unexpected fault error messages
  x86/selftests: Add mov_to_ss test
  x86/mpx/selftests: Adjust the self-test to fresh distros that export the MPX ABI
  x86/pkeys/selftests: Adjust the self-test to fresh distros that export the pkeys ABI
  ...
parents b9aad922 acf46020
...@@ -305,6 +305,25 @@ ENTRY(startup_64) ...@@ -305,6 +305,25 @@ ENTRY(startup_64)
/* Set up the stack */ /* Set up the stack */
leaq boot_stack_end(%rbx), %rsp leaq boot_stack_end(%rbx), %rsp
/*
* paging_prepare() and cleanup_trampoline() below can have GOT
* references. Adjust the table with address we are running at.
*
* Zero RAX for adjust_got: the GOT was not adjusted before;
* there's no adjustment to undo.
*/
xorq %rax, %rax
/*
* Calculate the address the binary is loaded at and use it as
* a GOT adjustment.
*/
call 1f
1: popq %rdi
subq $1b, %rdi
call adjust_got
/* /*
* At this point we are in long mode with 4-level paging enabled, * At this point we are in long mode with 4-level paging enabled,
* but we might want to enable 5-level paging or vice versa. * but we might want to enable 5-level paging or vice versa.
...@@ -370,10 +389,14 @@ trampoline_return: ...@@ -370,10 +389,14 @@ trampoline_return:
/* /*
* cleanup_trampoline() would restore trampoline memory. * cleanup_trampoline() would restore trampoline memory.
* *
* RDI is address of the page table to use instead of page table
* in trampoline memory (if required).
*
* RSI holds real mode data and needs to be preserved across * RSI holds real mode data and needs to be preserved across
* this function call. * this function call.
*/ */
pushq %rsi pushq %rsi
leaq top_pgtable(%rbx), %rdi
call cleanup_trampoline call cleanup_trampoline
popq %rsi popq %rsi
...@@ -381,6 +404,21 @@ trampoline_return: ...@@ -381,6 +404,21 @@ trampoline_return:
pushq $0 pushq $0
popfq popfq
/*
* Previously we've adjusted the GOT with address the binary was
* loaded at. Now we need to re-adjust for relocation address.
*
* Calculate the address the binary is loaded at, so that we can
* undo the previous GOT adjustment.
*/
call 1f
1: popq %rax
subq $1b, %rax
/* The new adjustment is the relocation address */
movq %rbx, %rdi
call adjust_got
/* /*
* Copy the compressed kernel to the end of our buffer * Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe. * where decompression in place becomes safe.
...@@ -481,19 +519,6 @@ relocated: ...@@ -481,19 +519,6 @@ relocated:
shrq $3, %rcx shrq $3, %rcx
rep stosq rep stosq
/*
* Adjust our own GOT
*/
leaq _got(%rip), %rdx
leaq _egot(%rip), %rcx
1:
cmpq %rcx, %rdx
jae 2f
addq %rbx, (%rdx)
addq $8, %rdx
jmp 1b
2:
/* /*
* Do the extraction, and jump to the new kernel.. * Do the extraction, and jump to the new kernel..
*/ */
...@@ -512,6 +537,27 @@ relocated: ...@@ -512,6 +537,27 @@ relocated:
*/ */
jmp *%rax jmp *%rax
/*
* Adjust the global offset table
*
* RAX is the previous adjustment of the table to undo (use 0 if it's the
* first time we touch GOT).
* RDI is the new adjustment to apply.
*/
adjust_got:
/* Walk through the GOT adding the address to the entries */
leaq _got(%rip), %rdx
leaq _egot(%rip), %rcx
1:
cmpq %rcx, %rdx
jae 2f
subq %rax, (%rdx) /* Undo previous adjustment */
addq %rdi, (%rdx) /* Apply the new adjustment */
addq $8, %rdx
jmp 1b
2:
ret
.code32 .code32
/* /*
* This is the 32-bit trampoline that will be copied over to low memory. * This is the 32-bit trampoline that will be copied over to low memory.
...@@ -649,3 +695,10 @@ boot_stack_end: ...@@ -649,3 +695,10 @@ boot_stack_end:
.balign 4096 .balign 4096
pgtable: pgtable:
.fill BOOT_PGT_SIZE, 1, 0 .fill BOOT_PGT_SIZE, 1, 0
/*
* The page table is going to be used instead of page table in the trampoline
* memory.
*/
top_pgtable:
.fill PAGE_SIZE, 1, 0
...@@ -22,14 +22,6 @@ struct paging_config { ...@@ -22,14 +22,6 @@ struct paging_config {
/* Buffer to preserve trampoline memory */ /* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
/*
* The page table is going to be used instead of page table in the trampoline
* memory.
*
* It must not be in BSS as BSS is cleared after cleanup_trampoline().
*/
static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
/* /*
* Trampoline address will be printed by extract_kernel() for debugging * Trampoline address will be printed by extract_kernel() for debugging
* purposes. * purposes.
...@@ -134,7 +126,7 @@ struct paging_config paging_prepare(void) ...@@ -134,7 +126,7 @@ struct paging_config paging_prepare(void)
return paging_config; return paging_config;
} }
void cleanup_trampoline(void) void cleanup_trampoline(void *pgtable)
{ {
void *trampoline_pgtable; void *trampoline_pgtable;
...@@ -145,8 +137,8 @@ void cleanup_trampoline(void) ...@@ -145,8 +137,8 @@ void cleanup_trampoline(void)
* if it's there. * if it's there.
*/ */
if ((void *)__native_read_cr3() == trampoline_pgtable) { if ((void *)__native_read_cr3() == trampoline_pgtable) {
memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE); memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
native_write_cr3((unsigned long)top_pgtable); native_write_cr3((unsigned long)pgtable);
} }
/* Restore trampoline memory */ /* Restore trampoline memory */
......
...@@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk, ...@@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk,
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
/* pkey 0 is the default and always allocated */ /* pkey 0 is the default and allocated implicitly */
mm->context.pkey_allocation_map = 0x1; mm->context.pkey_allocation_map = 0x1;
/* -1 means unallocated or invalid */ /* -1 means unallocated or invalid */
mm->context.execute_only_pkey = -1; mm->context.execute_only_pkey = -1;
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#ifndef _ASM_X86_PKEYS_H #ifndef _ASM_X86_PKEYS_H
#define _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H
#define ARCH_DEFAULT_PKEY 0
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
...@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm); ...@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm);
static inline int execute_only_pkey(struct mm_struct *mm) static inline int execute_only_pkey(struct mm_struct *mm)
{ {
if (!boot_cpu_has(X86_FEATURE_OSPKE)) if (!boot_cpu_has(X86_FEATURE_OSPKE))
return 0; return ARCH_DEFAULT_PKEY;
return __execute_only_pkey(mm); return __execute_only_pkey(mm);
} }
...@@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) ...@@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
{ {
/* /*
* "Allocated" pkeys are those that have been returned * "Allocated" pkeys are those that have been returned
* from pkey_alloc(). pkey 0 is special, and never * from pkey_alloc() or pkey 0 which is allocated
* returned from pkey_alloc(). * implicitly when the mm is created.
*/ */
if (pkey <= 0) if (pkey < 0)
return false; return false;
if (pkey >= arch_max_pkey()) if (pkey >= arch_max_pkey())
return false; return false;
/*
* The exec-only pkey is set in the allocation map, but
* is not available to any of the user interfaces like
* mprotect_pkey().
*/
if (pkey == mm->context.execute_only_pkey)
return false;
return mm_pkey_allocation_map(mm) & (1U << pkey); return mm_pkey_allocation_map(mm) & (1U << pkey);
} }
......
...@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void) ...@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
goto update; goto update;
} }
cmsk = cluster_hotplug_mask; cmsk = cluster_hotplug_mask;
cmsk->clusterid = cluster;
cluster_hotplug_mask = NULL; cluster_hotplug_mask = NULL;
update: update:
this_cpu_write(cluster_masks, cmsk); this_cpu_write(cluster_masks, cmsk);
......
...@@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr) ...@@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr)
} }
#endif #endif
/* Code in __startup_64() can be relocated during execution, but the compiler
* doesn't have to generate PC-relative relocations when accessing globals from
* that function. Clang actually does not generate them, which leads to
* boot-time crashes. To work around this problem, every global pointer must
* be adjusted using fixup_pointer().
*/
unsigned long __head __startup_64(unsigned long physaddr, unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp) struct boot_params *bp)
{ {
...@@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr, ...@@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
p4dval_t *p4d; p4dval_t *p4d;
pudval_t *pud; pudval_t *pud;
pmdval_t *pmd, pmd_entry; pmdval_t *pmd, pmd_entry;
pteval_t *mask_ptr;
bool la57; bool la57;
int i; int i;
unsigned int *next_pgt_ptr; unsigned int *next_pgt_ptr;
...@@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr, ...@@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
/* Filter out unsupported __PAGE_KERNEL_* bits: */ /* Filter out unsupported __PAGE_KERNEL_* bits: */
pmd_entry &= __supported_pte_mask; mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
pmd_entry &= *mask_ptr;
pmd_entry += sme_get_me_mask(); pmd_entry += sme_get_me_mask();
pmd_entry += physaddr; pmd_entry += physaddr;
......
...@@ -542,6 +542,7 @@ void set_personality_64bit(void) ...@@ -542,6 +542,7 @@ void set_personality_64bit(void)
clear_thread_flag(TIF_X32); clear_thread_flag(TIF_X32);
/* Pretend that this comes from a 64bit execve */ /* Pretend that this comes from a 64bit execve */
task_pt_regs(current)->orig_ax = __NR_execve; task_pt_regs(current)->orig_ax = __NR_execve;
current_thread_info()->status &= ~TS_COMPAT;
/* Ensure the corresponding mm is not marked. */ /* Ensure the corresponding mm is not marked. */
if (current->mm) if (current->mm)
......
...@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey ...@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
*/ */
if (pkey != -1) if (pkey != -1)
return pkey; return pkey;
/*
* Look for a protection-key-drive execute-only mapping
* which is now being given permissions that are not
* execute-only. Move it back to the default pkey.
*/
if (vma_is_pkey_exec_only(vma) &&
(prot & (PROT_READ|PROT_WRITE))) {
return 0;
}
/* /*
* The mapping is execute-only. Go try to get the * The mapping is execute-only. Go try to get the
* execute-only protection key. If we fail to do that, * execute-only protection key. If we fail to do that,
* fall through as if we do not have execute-only * fall through as if we do not have execute-only
* support. * support in this mm.
*/ */
if (prot == PROT_EXEC) { if (prot == PROT_EXEC) {
pkey = execute_only_pkey(vma->vm_mm); pkey = execute_only_pkey(vma->vm_mm);
if (pkey > 0) if (pkey > 0)
return pkey; return pkey;
} else if (vma_is_pkey_exec_only(vma)) {
/*
* Protections are *not* PROT_EXEC, but the mapping
* is using the exec-only pkey. This mapping was
* PROT_EXEC and will no longer be. Move back to
* the default pkey.
*/
return ARCH_DEFAULT_PKEY;
} }
/* /*
* This is a vanilla, non-pkey mprotect (or we failed to * This is a vanilla, non-pkey mprotect (or we failed to
* setup execute-only), inherit the pkey from the VMA we * setup execute-only), inherit the pkey from the VMA we
......
...@@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) ...@@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
protection_keys test_vdso test_vsyscall protection_keys test_vdso test_vsyscall mov_ss_trap
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \ test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer vdso_restorer
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
*
* This does MOV SS from a watchpointed address followed by various
* types of kernel entries. A MOV SS that hits a watchpoint will queue
* up a #DB trap but will not actually deliver that trap. The trap
* will be delivered after the next instruction instead. The CPU's logic
* seems to be:
*
* - Any fault: drop the pending #DB trap.
* - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
* deliver #DB.
* - ICEBP: enter the kernel but do not deliver the watchpoint trap
* - breakpoint: only one #DB is delivered (phew!)
*
* There are plenty of ways for a kernel to handle this incorrectly. This
* test tries to exercise all the cases.
*
* This should mostly cover CVE-2018-1087 and CVE-2018-8897.
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/user.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <err.h>
#include <string.h>
#include <setjmp.h>
#include <sys/prctl.h>
#define X86_EFLAGS_RF (1UL << 16)
#if __x86_64__
# define REG_IP REG_RIP
#else
# define REG_IP REG_EIP
#endif
unsigned short ss;
extern unsigned char breakpoint_insn[];
sigjmp_buf jmpbuf;
static unsigned char altstack_data[SIGSTKSZ];
static void enable_watchpoint(void)
{
pid_t parent = getpid();
int status;
pid_t child = fork();
if (child < 0)
err(1, "fork");
if (child) {
if (waitpid(child, &status, 0) != child)
err(1, "waitpid for child");
} else {
unsigned long dr0, dr1, dr7;
dr0 = (unsigned long)&ss;
dr1 = (unsigned long)breakpoint_insn;
dr7 = ((1UL << 1) | /* G0 */
(3UL << 16) | /* RW0 = read or write */
(1UL << 18) | /* LEN0 = 2 bytes */
(1UL << 3)); /* G1, RW1 = insn */
if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
err(1, "PTRACE_ATTACH");
if (waitpid(parent, &status, 0) != parent)
err(1, "waitpid for child");
if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
err(1, "PTRACE_POKEUSER DR0");
if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
err(1, "PTRACE_POKEUSER DR1");
if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
err(1, "PTRACE_POKEUSER DR7");
printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
err(1, "PTRACE_DETACH");
exit(0);
}
}
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO | flags;
sigemptyset(&sa.sa_mask);
if (sigaction(sig, &sa, 0))
err(1, "sigaction");
}
static char const * const signames[] = {
[SIGSEGV] = "SIGSEGV",
[SIGBUS] = "SIBGUS",
[SIGTRAP] = "SIGTRAP",
[SIGILL] = "SIGILL",
};
static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
{
ucontext_t *ctx = ctx_void;
printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
(unsigned long)ctx->uc_mcontext.gregs[REG_IP],
!!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
}
static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
{
ucontext_t *ctx = ctx_void;
printf("\tGot %s with RIP=%lx\n", signames[sig],
(unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
}
static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
{
ucontext_t *ctx = ctx_void;
printf("\tGot %s with RIP=%lx\n", signames[sig],
(unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
siglongjmp(jmpbuf, 1);
}
int main()
{
unsigned long nr;
asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
printf("\tPR_SET_PTRACER_ANY succeeded\n");
printf("\tSet up a watchpoint\n");
sethandler(SIGTRAP, sigtrap, 0);
enable_watchpoint();
printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
printf("[RUN]\tMOV SS; INT3\n");
asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
printf("[RUN]\tMOV SS; INT 3\n");
asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
printf("[RUN]\tMOV SS; CS CS INT3\n");
asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
printf("[RUN]\tMOV SS; CSx14 INT3\n");
asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
printf("[RUN]\tMOV SS; INT 4\n");
sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
#ifdef __i386__
printf("[RUN]\tMOV SS; INTO\n");
sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
nr = -1;
asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
: [tmp] "+r" (nr) : [ss] "m" (ss));
#endif
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; ICEBP\n");
/* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
}
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; CLI\n");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
}
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; #PF\n");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
: [tmp] "=r" (nr) : [ss] "m" (ss));
}
/*
* INT $1: if #DB has DPL=3 and there isn't special handling,
* then the kernel will die.
*/
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; INT 1\n");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
}
#ifdef __x86_64__
/*
* In principle, we should test 32-bit SYSCALL as well, but
* the calling convention is so unpredictable that it's
* not obviously worth the effort.
*/
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; SYSCALL\n");
sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
nr = SYS_getpid;
/*
* Toggle the high bit of RSP to make it noncanonical to
* strengthen this test on non-SMAP systems.
*/
asm volatile ("btc $63, %%rsp\n\t"
"mov %[ss], %%ss; syscall\n\t"
"btc $63, %%rsp"
: "+a" (nr) : [ss] "m" (ss)
: "rcx"
#ifdef __x86_64__
, "r11"
#endif
);
}
#endif
printf("[RUN]\tMOV SS; breakpointed NOP\n");
asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
/*
* Invoking SYSENTER directly breaks all the rules. Just handle
* the SIGSEGV.
*/
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; SYSENTER\n");
stack_t stack = {
.ss_sp = altstack_data,
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
err(1, "sigaltstack");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
nr = SYS_getpid;
asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
: [ss] "m" (ss) : "flags", "rcx"
#ifdef __x86_64__
, "r11"
#endif
);
/* We're unreachable here. SYSENTER forgets RIP. */
}
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; INT $0x80\n");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
nr = 20; /* compat getpid */
asm volatile ("mov %[ss], %%ss; int $0x80"
: "+a" (nr) : [ss] "m" (ss)
: "flags"
#ifdef __x86_64__
, "r8", "r9", "r10", "r11"
#endif
);
}
printf("[OK]\tI aten't dead\n");
return 0;
}
...@@ -368,6 +368,11 @@ static int expected_bnd_index = -1; ...@@ -368,6 +368,11 @@ static int expected_bnd_index = -1;
uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
/* Failed address bound checks: */
#ifndef SEGV_BNDERR
# define SEGV_BNDERR 3
#endif
/* /*
* The kernel is supposed to provide some information about the bounds * The kernel is supposed to provide some information about the bounds
* exception in the siginfo. It should match what we have in the bounds * exception in the siginfo. It should match what we have in the bounds
...@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) ...@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
br_count++; br_count++;
dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
#define SEGV_BNDERR 3 /* failed address bound checks */
dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
status, ip, br_reason); status, ip, br_reason);
dprintf2("si_signo: %d\n", si->si_signo); dprintf2("si_signo: %d\n", si->si_signo);
......
...@@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...) ...@@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...)
{ {
va_list ap; va_list ap;
va_start(ap, format);
if (!dprint_in_signal) { if (!dprint_in_signal) {
va_start(ap, format);
vprintf(format, ap); vprintf(format, ap);
va_end(ap);
} else { } else {
int ret; int ret;
int len = vsnprintf(dprint_in_signal_buffer,
DPRINT_IN_SIGNAL_BUF_SIZE,
format, ap);
/* /*
* len is amount that would have been printed, * No printf() functions are signal-safe.
* but actual write is truncated at BUF_SIZE. * They deadlock easily. Write the format
* string to get some output, even if
* incomplete.
*/ */
if (len > DPRINT_IN_SIGNAL_BUF_SIZE) ret = write(1, format, strlen(format));
len = DPRINT_IN_SIGNAL_BUF_SIZE;
ret = write(1, dprint_in_signal_buffer, len);
if (ret < 0) if (ret < 0)
abort(); exit(1);
} }
va_end(ap);
} }
#define dprintf_level(level, args...) do { \ #define dprintf_level(level, args...) do { \
if (level <= DEBUG_LEVEL) \ if (level <= DEBUG_LEVEL) \
sigsafe_printf(args); \ sigsafe_printf(args); \
fflush(NULL); \
} while (0) } while (0)
#define dprintf0(args...) dprintf_level(0, args) #define dprintf0(args...) dprintf_level(0, args)
#define dprintf1(args...) dprintf_level(1, args) #define dprintf1(args...) dprintf_level(1, args)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment