Commit f6170f0a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull misc x86 updates from Ingo Molnar:
 "Misc changes:

   - Enhance #GP fault printouts by distinguishing between canonical and
     non-canonical address faults, and also add KASAN fault decoding.

   - Fix/enhance the x86 NMI handler by putting the duration check into
     a direct function call instead of an irq_work which we know to be
     broken in some cases.

   - Clean up do_general_protection() a bit"

* 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/nmi: Remove irq_work from the long duration NMI handler
  x86/traps: Cleanup do_general_protection()
  x86/kasan: Print original address on #GP
  x86/dumpstack: Introduce die_addr() for die() with #GP fault address
  x86/traps: Print address on #GP
  x86/insn-eval: Add support for 64-bit kernel mode
parents 6da49d1a 248ed510
...@@ -33,6 +33,7 @@ enum show_regs_mode { ...@@ -33,6 +33,7 @@ enum show_regs_mode {
}; };
extern void die(const char *, struct pt_regs *,long); extern void die(const char *, struct pt_regs *,long);
void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr);
extern int __must_check __die(const char *, struct pt_regs *, long); extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs); extern void show_stack_regs(struct pt_regs *regs);
extern void __show_regs(struct pt_regs *regs, enum show_regs_mode); extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
......
...@@ -41,7 +41,6 @@ struct nmiaction { ...@@ -41,7 +41,6 @@ struct nmiaction {
struct list_head list; struct list_head list;
nmi_handler_t handler; nmi_handler_t handler;
u64 max_duration; u64 max_duration;
struct irq_work irq_work;
unsigned long flags; unsigned long flags;
const char *name; const char *name;
}; };
......
...@@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs) ...@@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif #endif
} }
/*
* Determine whether the register set came from any context that is running in
* 64-bit mode.
*/
static inline bool any_64bit_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_64
return !user_mode(regs) || user_64bit_mode(regs);
#else
return false;
#endif
}
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp #define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp
......
...@@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) ...@@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
} }
NOKPROBE_SYMBOL(oops_end); NOKPROBE_SYMBOL(oops_end);
int __die(const char *str, struct pt_regs *regs, long err) static void __die_header(const char *str, struct pt_regs *regs, long err)
{ {
const char *pr = ""; const char *pr = "";
...@@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err) ...@@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
(boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
}
NOKPROBE_SYMBOL(__die_header);
static int __die_body(const char *str, struct pt_regs *regs, long err)
{
show_regs(regs); show_regs(regs);
print_modules(); print_modules();
...@@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err) ...@@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
return 0; return 0;
} }
NOKPROBE_SYMBOL(__die_body);
int __die(const char *str, struct pt_regs *regs, long err)
{
__die_header(str, regs, err);
return __die_body(str, regs, err);
}
NOKPROBE_SYMBOL(__die); NOKPROBE_SYMBOL(__die);
/* /*
...@@ -410,6 +421,19 @@ void die(const char *str, struct pt_regs *regs, long err) ...@@ -410,6 +421,19 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig); oops_end(flags, regs, sig);
} }
void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr)
{
unsigned long flags = oops_begin();
int sig = SIGSEGV;
__die_header(str, regs, err);
if (gp_addr)
kasan_non_canonical_hook(gp_addr);
if (__die_body(str, regs, err))
sig = 0;
oops_end(flags, regs, sig);
}
void show_regs(struct pt_regs *regs) void show_regs(struct pt_regs *regs)
{ {
show_regs_print_info(KERN_DEFAULT); show_regs_print_info(KERN_DEFAULT);
......
...@@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void) ...@@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void)
} }
fs_initcall(nmi_warning_debugfs); fs_initcall(nmi_warning_debugfs);
static void nmi_max_handler(struct irq_work *w) static void nmi_check_duration(struct nmiaction *action, u64 duration)
{ {
struct nmiaction *a = container_of(w, struct nmiaction, irq_work); u64 whole_msecs = READ_ONCE(action->max_duration);
int remainder_ns, decimal_msecs; int remainder_ns, decimal_msecs;
u64 whole_msecs = READ_ONCE(a->max_duration);
if (duration < nmi_longest_ns || duration < action->max_duration)
return;
action->max_duration = duration;
remainder_ns = do_div(whole_msecs, (1000 * 1000)); remainder_ns = do_div(whole_msecs, (1000 * 1000));
decimal_msecs = remainder_ns / 1000; decimal_msecs = remainder_ns / 1000;
printk_ratelimited(KERN_INFO printk_ratelimited(KERN_INFO
"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
a->handler, whole_msecs, decimal_msecs); action->handler, whole_msecs, decimal_msecs);
} }
static int nmi_handle(unsigned int type, struct pt_regs *regs) static int nmi_handle(unsigned int type, struct pt_regs *regs)
...@@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs) ...@@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs)
delta = sched_clock() - delta; delta = sched_clock() - delta;
trace_nmi_handler(a->handler, (int)delta, thishandled); trace_nmi_handler(a->handler, (int)delta, thishandled);
if (delta < nmi_longest_ns || delta < a->max_duration) nmi_check_duration(a, delta);
continue;
a->max_duration = delta;
irq_work_queue(&a->irq_work);
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) ...@@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
if (!action->handler) if (!action->handler)
return -EINVAL; return -EINVAL;
init_irq_work(&action->irq_work, nmi_max_handler);
raw_spin_lock_irqsave(&desc->lock, flags); raw_spin_lock_irqsave(&desc->lock, flags);
/* /*
......
...@@ -56,6 +56,8 @@ ...@@ -56,6 +56,8 @@
#include <asm/mpx.h> #include <asm/mpx.h>
#include <asm/vm86.h> #include <asm/vm86.h>
#include <asm/umip.h> #include <asm/umip.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#include <asm/x86_init.h> #include <asm/x86_init.h>
...@@ -518,11 +520,57 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) ...@@ -518,11 +520,57 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
} }
dotraplinkage void enum kernel_gp_hint {
do_general_protection(struct pt_regs *regs, long error_code) GP_NO_HINT,
GP_NON_CANONICAL,
GP_CANONICAL
};
/*
* When an uncaught #GP occurs, try to determine the memory address accessed by
* the instruction and return that address to the caller. Also, try to figure
* out whether any part of the access to that address was non-canonical.
*/
static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
unsigned long *addr)
{ {
const char *desc = "general protection fault"; u8 insn_buf[MAX_INSN_SIZE];
struct insn insn;
if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
return GP_NO_HINT;
kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
insn_get_modrm(&insn);
insn_get_sib(&insn);
*addr = (unsigned long)insn_get_addr_ref(&insn, regs);
if (*addr == -1UL)
return GP_NO_HINT;
#ifdef CONFIG_X86_64
/*
* Check that:
* - the operand is not in the kernel half
* - the last byte of the operand is not in the user canonical half
*/
if (*addr < ~__VIRTUAL_MASK &&
*addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK)
return GP_NON_CANONICAL;
#endif
return GP_CANONICAL;
}
#define GPFSTR "general protection fault"
dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
{
char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
enum kernel_gp_hint hint = GP_NO_HINT;
struct task_struct *tsk; struct task_struct *tsk;
unsigned long gp_addr;
int ret;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
cond_local_irq_enable(regs); cond_local_irq_enable(regs);
...@@ -539,34 +587,56 @@ do_general_protection(struct pt_regs *regs, long error_code) ...@@ -539,34 +587,56 @@ do_general_protection(struct pt_regs *regs, long error_code)
} }
tsk = current; tsk = current;
if (!user_mode(regs)) {
if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
return;
if (user_mode(regs)) {
tsk->thread.error_code = error_code; tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP; tsk->thread.trap_nr = X86_TRAP_GP;
/* show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
* To be potentially processing a kprobe fault and to force_sig(SIGSEGV);
* trust the result from kprobe_running(), we have to
* be non-preemptible.
*/
if (!preemptible() && kprobe_running() &&
kprobe_fault_handler(regs, X86_TRAP_GP))
return;
if (notify_die(DIE_GPF, desc, regs, error_code,
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
die(desc, regs, error_code);
return; return;
} }
if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
return;
tsk->thread.error_code = error_code; tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP; tsk->thread.trap_nr = X86_TRAP_GP;
show_signal(tsk, SIGSEGV, "", desc, regs, error_code); /*
* To be potentially processing a kprobe fault and to trust the result
* from kprobe_running(), we have to be non-preemptible.
*/
if (!preemptible() &&
kprobe_running() &&
kprobe_fault_handler(regs, X86_TRAP_GP))
return;
ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
if (ret == NOTIFY_STOP)
return;
if (error_code)
snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
else
hint = get_kernel_gp_address(regs, &gp_addr);
if (hint != GP_NO_HINT)
snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx",
(hint == GP_NON_CANONICAL) ? "probably for non-canonical address"
: "maybe for address",
gp_addr);
/*
* KASAN is interested only in the non-canonical case, clear it
* otherwise.
*/
if (hint != GP_NON_CANONICAL)
gp_addr = 0;
die_addr(desc, regs, error_code, gp_addr);
force_sig(SIGSEGV);
} }
NOKPROBE_SYMBOL(do_general_protection); NOKPROBE_SYMBOL(do_general_protection);
......
...@@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff) ...@@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff)
*/ */
static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off) static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
{ {
if (user_64bit_mode(regs)) if (any_64bit_mode(regs))
return INAT_SEG_REG_IGNORE; return INAT_SEG_REG_IGNORE;
/* /*
* Resolve the default segment register as described in Section 3.7.4 * Resolve the default segment register as described in Section 3.7.4
...@@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) ...@@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
* which may be invalid at this point. * which may be invalid at this point.
*/ */
if (regoff == offsetof(struct pt_regs, ip)) { if (regoff == offsetof(struct pt_regs, ip)) {
if (user_64bit_mode(regs)) if (any_64bit_mode(regs))
return INAT_SEG_REG_IGNORE; return INAT_SEG_REG_IGNORE;
else else
return INAT_SEG_REG_CS; return INAT_SEG_REG_CS;
...@@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) ...@@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
* In long mode, segment override prefixes are ignored, except for * In long mode, segment override prefixes are ignored, except for
* overrides for FS and GS. * overrides for FS and GS.
*/ */
if (user_64bit_mode(regs)) { if (any_64bit_mode(regs)) {
if (idx != INAT_SEG_REG_FS && if (idx != INAT_SEG_REG_FS &&
idx != INAT_SEG_REG_GS) idx != INAT_SEG_REG_GS)
idx = INAT_SEG_REG_IGNORE; idx = INAT_SEG_REG_IGNORE;
...@@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) ...@@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
*/ */
return (unsigned long)(sel << 4); return (unsigned long)(sel << 4);
if (user_64bit_mode(regs)) { if (any_64bit_mode(regs)) {
/* /*
* Only FS or GS will have a base address, the rest of * Only FS or GS will have a base address, the rest of
* the segments' bases are forced to 0. * the segments' bases are forced to 0.
*/ */
unsigned long base; unsigned long base;
if (seg_reg_idx == INAT_SEG_REG_FS) if (seg_reg_idx == INAT_SEG_REG_FS) {
rdmsrl(MSR_FS_BASE, base); rdmsrl(MSR_FS_BASE, base);
else if (seg_reg_idx == INAT_SEG_REG_GS) } else if (seg_reg_idx == INAT_SEG_REG_GS) {
/* /*
* swapgs was called at the kernel entry point. Thus, * swapgs was called at the kernel entry point. Thus,
* MSR_KERNEL_GS_BASE will have the user-space GS base. * MSR_KERNEL_GS_BASE will have the user-space GS base.
*/ */
rdmsrl(MSR_KERNEL_GS_BASE, base); if (user_mode(regs))
else rdmsrl(MSR_KERNEL_GS_BASE, base);
else
rdmsrl(MSR_GS_BASE, base);
} else {
base = 0; base = 0;
}
return base; return base;
} }
...@@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) ...@@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
if (sel < 0) if (sel < 0)
return 0; return 0;
if (user_64bit_mode(regs) || v8086_mode(regs)) if (any_64bit_mode(regs) || v8086_mode(regs))
return -1L; return -1L;
if (!sel) if (!sel)
...@@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs, ...@@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs,
* following instruction. * following instruction.
*/ */
if (*regoff == -EDOM) { if (*regoff == -EDOM) {
if (user_64bit_mode(regs)) if (any_64bit_mode(regs))
tmp = regs->ip + insn->length; tmp = regs->ip + insn->length;
else else
tmp = 0; tmp = 0;
...@@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs) ...@@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs)
* After computed, the effective address is treated as an unsigned * After computed, the effective address is treated as an unsigned
* quantity. * quantity.
*/ */
if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
goto out; goto out;
/* /*
......
...@@ -288,23 +288,6 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end) ...@@ -288,23 +288,6 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end)
} while (pgd++, addr = next, addr != (unsigned long)end); } while (pgd++, addr = next, addr != (unsigned long)end);
} }
#ifdef CONFIG_KASAN_INLINE
static int kasan_die_handler(struct notifier_block *self,
unsigned long val,
void *data)
{
if (val == DIE_GPF) {
pr_emerg("CONFIG_KASAN_INLINE enabled\n");
pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
}
return NOTIFY_OK;
}
static struct notifier_block kasan_die_notifier = {
.notifier_call = kasan_die_handler,
};
#endif
void __init kasan_early_init(void) void __init kasan_early_init(void)
{ {
int i; int i;
...@@ -341,10 +324,6 @@ void __init kasan_init(void) ...@@ -341,10 +324,6 @@ void __init kasan_init(void)
int i; int i;
void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
#ifdef CONFIG_KASAN_INLINE
register_die_notifier(&kasan_die_notifier);
#endif
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
/* /*
......
...@@ -228,4 +228,10 @@ static inline void kasan_release_vmalloc(unsigned long start, ...@@ -228,4 +228,10 @@ static inline void kasan_release_vmalloc(unsigned long start,
unsigned long free_region_end) {} unsigned long free_region_end) {}
#endif #endif
#ifdef CONFIG_KASAN_INLINE
void kasan_non_canonical_hook(unsigned long addr);
#else /* CONFIG_KASAN_INLINE */
static inline void kasan_non_canonical_hook(unsigned long addr) { }
#endif /* CONFIG_KASAN_INLINE */
#endif /* LINUX_KASAN_H */ #endif /* LINUX_KASAN_H */
...@@ -512,3 +512,43 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon ...@@ -512,3 +512,43 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon
end_report(&flags); end_report(&flags);
} }
#ifdef CONFIG_KASAN_INLINE
/*
* With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high
* canonical half of the address space) cause out-of-bounds shadow memory reads
* before the actual access. For addresses in the low canonical half of the
* address space, as well as most non-canonical addresses, that out-of-bounds
* shadow memory access lands in the non-canonical part of the address space.
* Help the user figure out what the original bogus pointer was.
*/
void kasan_non_canonical_hook(unsigned long addr)
{
unsigned long orig_addr;
const char *bug_type;
if (addr < KASAN_SHADOW_OFFSET)
return;
orig_addr = (addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT;
/*
* For faults near the shadow address for NULL, we can be fairly certain
* that this is a KASAN shadow memory access.
* For faults that correspond to shadow for low canonical addresses, we
* can still be pretty sure - that shadow region is a fairly narrow
* chunk of the non-canonical address space.
* But faults that look like shadow for non-canonical addresses are a
* really large chunk of the address space. In that case, we still
* print the decoded address, but make it clear that this is not
* necessarily what's actually going on.
*/
if (orig_addr < PAGE_SIZE)
bug_type = "null-ptr-deref";
else if (orig_addr < TASK_SIZE)
bug_type = "probably user-memory-access";
else
bug_type = "maybe wild-memory-access";
pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type,
orig_addr, orig_addr + KASAN_SHADOW_MASK);
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment