Commit 5645688f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "The main changes in this development cycle were:

   - a large number of call stack dumping/printing improvements: higher
     robustness, better cross-context dumping, improved output, etc.
     (Josh Poimboeuf)

   - vDSO getcpu() performance improvement for future Intel CPUs with
     the RDPID instruction (Andy Lutomirski)

   - add two new Intel AVX512 features and the CPUID support
     infrastructure for it: AVX512IFMA and AVX512VBMI. (Gayatri Kammela,
     He Chen)

   - more copy-user unification (Borislav Petkov)

   - entry code assembly macro simplifications (Alexander Kuleshov)

   - vDSO C/R support improvements (Dmitry Safonov)

   - misc fixes and cleanups (Borislav Petkov, Paul Bolle)"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (40 commits)
  scripts/decode_stacktrace.sh: Fix address line detection on x86
  x86/boot/64: Use defines for page size
  x86/dumpstack: Make stack name tags more comprehensible
  selftests/x86: Add test_vdso to test getcpu()
  x86/vdso: Use RDPID in preference to LSL when available
  x86/dumpstack: Handle NULL stack pointer in show_trace_log_lvl()
  x86/cpufeatures: Enable new AVX512 cpu features
  x86/cpuid: Provide get_scattered_cpuid_leaf()
  x86/cpuid: Cleanup cpuid_regs definitions
  x86/copy_user: Unify the code by removing the 64-bit asm _copy_*_user() variants
  x86/unwind: Ensure stack grows down
  x86/vdso: Set vDSO pointer only after success
  x86/prctl/uapi: Remove #ifdef for CHECKPOINT_RESTORE
  x86/unwind: Detect bad stack return address
  x86/dumpstack: Warn on stack recursion
  x86/unwind: Warn on bad frame pointer
  x86/decoder: Use stderr if insn sanity test fails
  x86/decoder: Use stdout if insn decoder test is successful
  mm/page_alloc: Remove kernel address exposure in free_reserved_area()
  x86/dumpstack: Remove raw stack dump
  ...
parents 4ade5b22 53938ee4
......@@ -1963,9 +1963,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
kmemcheck=2 (one-shot mode)
Default: 2 (one-shot mode)
kstack=N [X86] Print N words from the kernel stack
in oops dumps.
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
Default is 0 (don't ignore, but inject #GP)
......
......@@ -40,7 +40,6 @@ show up in /proc/sys/kernel:
- hung_task_warnings
- kexec_load_disabled
- kptr_restrict
- kstack_depth_to_print [ X86 only ]
- l2cr [ PPC only ]
- modprobe ==> Documentation/debugging-modules.txt
- modules_disabled
......@@ -395,13 +394,6 @@ When kptr_restrict is set to (2), kernel pointers printed using
==============================================================
kstack_depth_to_print: (X86 only)
Controls the number of words to print when dumping the raw
kernel stack.
==============================================================
l2cr: (PPC only)
This flag controls the L2 cache of G3 processor boards. If
......
......@@ -277,10 +277,6 @@ IOMMU (input/output memory management unit)
space might stop working. Use this option if you have devices that
are accessed from userspace directly on some PCI host bridge.
Debugging
kstack=N Print N words from the kernel stack in oops dumps.
Miscellaneous
nogbpages
......
......@@ -90,8 +90,8 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
addq $-(15*8+\addskip), %rsp
.macro ALLOC_PT_GPREGS_ON_STACK
addq $-(15*8), %rsp
.endm
.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
......@@ -147,15 +147,6 @@ For 32-bit we have the following conventions - kernel is built with
movq 5*8+\offset(%rsp), %rbx
.endm
.macro ZERO_EXTRA_REGS
xorl %r15d, %r15d
xorl %r14d, %r14d
xorl %r13d, %r13d
xorl %r12d, %r12d
xorl %ebp, %ebp
xorl %ebx, %ebx
.endm
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11
movq 6*8(%rsp), %r11
......@@ -201,6 +192,26 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1
.endm
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
* is just setting the LSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
* NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
* the original rbp.
*/
.macro ENCODE_FRAME_POINTER ptregs_offset=0
#ifdef CONFIG_FRAME_POINTER
.if \ptregs_offset
leaq \ptregs_offset(%rsp), %rbp
.else
mov %rsp, %rbp
.endif
orq $0x1, %rbp
#endif
.endm
#endif /* CONFIG_X86_64 */
/*
......
This diff is collapsed.
......@@ -38,12 +38,6 @@
#include <asm/export.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_64BIT 0x80000000
#define __AUDIT_ARCH_LE 0x40000000
.code64
.section .entry.text, "ax"
......@@ -469,6 +463,7 @@ END(irq_entries_start)
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
ENCODE_FRAME_POINTER
testb $3, CS(%rsp)
jz 1f
......@@ -985,6 +980,7 @@ ENTRY(xen_failsafe_callback)
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
ENCODE_FRAME_POINTER
jmp error_exit
END(xen_failsafe_callback)
......@@ -1028,6 +1024,7 @@ ENTRY(paranoid_entry)
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
ENCODE_FRAME_POINTER 8
movl $1, %ebx
movl $MSR_GS_BASE, %ecx
rdmsr
......@@ -1075,6 +1072,7 @@ ENTRY(error_entry)
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
ENCODE_FRAME_POINTER 8
xorl %ebx, %ebx
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
......@@ -1257,6 +1255,7 @@ ENTRY(nmi)
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
ENCODE_FRAME_POINTER
/*
* At this point we no longer need to worry about stack damage
......@@ -1270,11 +1269,10 @@ ENTRY(nmi)
/*
* Return back to user mode. We must *not* do the normal exit
* work, because we don't want to enable interrupts. Fortunately,
* do_nmi doesn't modify pt_regs.
* work, because we don't want to enable interrupts.
*/
SWAPGS
jmp restore_c_regs_and_iret
jmp restore_regs_and_iret
.Lnmi_from_kernel:
/*
......
......@@ -161,8 +161,6 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
}
text_start = addr - image->sym_vvar_start;
current->mm->context.vdso = (void __user *)text_start;
current->mm->context.vdso_image = image;
/*
* MAYWRITE to allow gdb to COW and set breakpoints
......@@ -189,14 +187,12 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
do_munmap(mm, text_start, image->size);
} else {
current->mm->context.vdso = (void __user *)text_start;
current->mm->context.vdso_image = image;
}
up_fail:
if (ret) {
current->mm->context.vdso = NULL;
current->mm->context.vdso_image = NULL;
}
up_write(&mm->mmap_sem);
return ret;
}
......
......@@ -36,13 +36,6 @@ static DEFINE_PER_CPU(struct pt, pt_ctx);
static struct pt_pmu pt_pmu;
enum cpuid_regs {
CR_EAX = 0,
CR_ECX,
CR_EDX,
CR_EBX
};
/*
* Capabilities of Intel PT hardware, such as number of address bits or
* supported output schemes, are cached and exported to userspace as "caps"
......@@ -64,21 +57,21 @@ static struct pt_cap_desc {
u8 reg;
u32 mask;
} pt_caps[] = {
PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)),
PT_CAP(mtc, 0, CR_EBX, BIT(3)),
PT_CAP(ptwrite, 0, CR_EBX, BIT(4)),
PT_CAP(power_event_trace, 0, CR_EBX, BIT(5)),
PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
PT_CAP(num_address_ranges, 1, CR_EAX, 0x3),
PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
PT_CAP(max_subleaf, 0, CPUID_EAX, 0xffffffff),
PT_CAP(cr3_filtering, 0, CPUID_EBX, BIT(0)),
PT_CAP(psb_cyc, 0, CPUID_EBX, BIT(1)),
PT_CAP(ip_filtering, 0, CPUID_EBX, BIT(2)),
PT_CAP(mtc, 0, CPUID_EBX, BIT(3)),
PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)),
PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)),
PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff),
PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
};
static u32 pt_cap_get(enum pt_capabilities cap)
......@@ -213,10 +206,10 @@ static int __init pt_pmu_hw_init(void)
for (i = 0; i < PT_CPUID_LEAVES; i++) {
cpuid_count(20, i,
&pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
&pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM]);
}
ret = -ENOMEM;
......
......@@ -227,6 +227,7 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
......@@ -280,8 +281,10 @@
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
......
......@@ -21,7 +21,6 @@ enum die_val {
DIE_NMIUNKNOWN,
};
extern void printk_address(unsigned long address);
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
......
......@@ -137,6 +137,17 @@ struct cpuinfo_x86 {
u32 microcode;
};
struct cpuid_regs {
u32 eax, ebx, ecx, edx;
};
enum cpuid_regs_idx {
CPUID_EAX = 0,
CPUID_EBX,
CPUID_ECX,
CPUID_EDX,
};
#define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1
#define X86_VENDOR_AMD 2
......@@ -178,6 +189,9 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern u32 get_scattered_cpuid_leaf(unsigned int level,
unsigned int sub_leaf,
enum cpuid_regs_idx reg);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
......
......@@ -30,8 +30,7 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask);
void stack_type_str(enum stack_type type, const char **begin,
const char **end);
const char *stack_type_name(enum stack_type type);
static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
{
......@@ -43,8 +42,6 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
addr + len > begin && addr + len <= end);
}
extern int kstack_depth_to_print;
#ifdef CONFIG_X86_32
#define STACKSLOTS_PER_LINE 8
#else
......@@ -86,9 +83,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl);
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, char *log_lvl);
extern unsigned int code_bytes;
/* The form of the top of the frame on the stack */
......
......@@ -13,6 +13,7 @@ struct unwind_state {
int graph_idx;
#ifdef CONFIG_FRAME_POINTER
unsigned long *bp;
struct pt_regs *regs;
#else
unsigned long *sp;
#endif
......@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
if (unwind_done(state))
return NULL;
return state->bp + 1;
return state->regs ? &state->regs->ip : state->bp + 1;
}
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
if (unwind_done(state))
return NULL;
return state->regs;
}
#else /* !CONFIG_FRAME_POINTER */
......@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
return NULL;
}
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
return NULL;
}
#endif /* CONFIG_FRAME_POINTER */
#endif /* _ASM_X86_UNWIND_H */
......@@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void)
* works on all CPUs. This is volatile so that it orders
* correctly wrt barrier() and to keep gcc from cleverly
* hoisting it out of the calling function.
*
* If RDPID is available, use it.
*/
asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
alternative_io ("lsl %[p],%[seg]",
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
X86_FEATURE_RDPID,
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
return p;
}
......
......@@ -6,10 +6,8 @@
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
#ifdef CONFIG_CHECKPOINT_RESTORE
# define ARCH_MAP_VDSO_X32 0x2001
# define ARCH_MAP_VDSO_32 0x2002
# define ARCH_MAP_VDSO_64 0x2003
#endif
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */
......@@ -17,11 +17,17 @@ struct cpuid_bit {
u32 sub_leaf;
};
enum cpuid_regs {
CR_EAX = 0,
CR_ECX,
CR_EDX,
CR_EBX
/* Please keep the leaf sorted by cpuid_bit.level for faster search. */
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ 0, 0, 0, 0, 0 }
};
void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
......@@ -30,18 +36,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
u32 regs[4];
const struct cpuid_bit *cb;
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
{ 0, 0, 0, 0, 0 }
};
for (cb = cpuid_bits; cb->feature; cb++) {
/* Verify that the level is valid */
......@@ -50,10 +44,35 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
max_level > (cb->level | 0xffff))
continue;
cpuid_count(cb->level, cb->sub_leaf, &regs[CR_EAX],
&regs[CR_EBX], &regs[CR_ECX], &regs[CR_EDX]);
cpuid_count(cb->level, cb->sub_leaf, &regs[CPUID_EAX],
&regs[CPUID_EBX], &regs[CPUID_ECX],
&regs[CPUID_EDX]);
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
}
u32 get_scattered_cpuid_leaf(unsigned int level, unsigned int sub_leaf,
enum cpuid_regs_idx reg)
{
const struct cpuid_bit *cb;
u32 cpuid_val = 0;
for (cb = cpuid_bits; cb->feature; cb++) {
if (level > cb->level)
continue;
if (level < cb->level)
break;
if (reg == cb->reg && sub_leaf == cb->sub_leaf) {
if (cpu_has(&boot_cpu_data, cb->feature))
cpuid_val |= BIT(cb->bit);
}
}
return cpuid_val;
}
EXPORT_SYMBOL_GPL(get_scattered_cpuid_leaf);
......@@ -46,10 +46,6 @@
static struct class *cpuid_class;
struct cpuid_regs {
u32 eax, ebx, ecx, edx;
};
static void cpuid_smp_cpuid(void *cmd_block)
{
struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
......
......@@ -22,7 +22,6 @@
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
unsigned int code_bytes = 64;
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static int die_counter;
bool in_task_stack(unsigned long *stack, struct task_struct *task,
......@@ -46,14 +45,7 @@ static void printk_stack_address(unsigned long address, int reliable,
char *log_lvl)
{
touch_nmi_watchdog();
printk("%s [<%p>] %s%pB\n",
log_lvl, (void *)address, reliable ? "" : "? ",
(void *)address);
}
void printk_address(unsigned long address)
{
pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
}
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
......@@ -67,6 +59,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk("%sCall Trace:\n", log_lvl);
unwind_start(&state, task, regs, stack);
stack = stack ? : get_stack_pointer(task, regs);
/*
* Iterate through the stacks, starting with the current stack pointer.
......@@ -82,8 +75,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* - softirq stack
* - hardirq stack
*/
for (; stack; stack = stack_info.next_sp) {
const char *str_begin, *str_end;
for (regs = NULL; stack; stack = stack_info.next_sp) {
const char *stack_name;
/*
* If we overflowed the task stack into a guard page, jump back
......@@ -95,9 +88,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (get_stack_info(stack, task, &stack_info, &visit_mask))
break;
stack_type_str(stack_info.type, &str_begin, &str_end);
if (str_begin)
printk("%s <%s> ", log_lvl, str_begin);
stack_name = stack_type_name(stack_info.type);
if (stack_name)
printk("%s <%s>\n", log_lvl, stack_name);
/*
* Scan the stack, printing any text addresses we find. At the
......@@ -119,6 +112,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (!__kernel_text_address(addr))
continue;
/*
* Don't print regs->ip again if it was already printed
* by __show_regs() below.
*/
if (regs && stack == &regs->ip) {
unwind_next_frame(&state);
continue;
}
if (stack == ret_addr_p)
reliable = 1;
......@@ -146,10 +148,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* of the addresses will just be printed as unreliable.
*/
unwind_next_frame(&state);
/* if the frame has entry regs, print them */
regs = unwind_get_entry_regs(&state);
if (regs)
__show_regs(regs, 0);
}
if (str_end)
printk("%s <%s> ", log_lvl, str_end);
if (stack_name)
printk("%s </%s>\n", log_lvl, stack_name);
}
}
......@@ -164,12 +171,12 @@ void show_stack(struct task_struct *task, unsigned long *sp)
if (!sp && task == current)
sp = get_stack_pointer(current, NULL);
show_stack_log_lvl(task, NULL, sp, "");
show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT);
}
void show_stack_regs(struct pt_regs *regs)
{
show_stack_log_lvl(current, regs, NULL, "");
show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
}
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
......@@ -261,14 +268,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
sp = kernel_stack_pointer(regs);
savesegment(ss, ss);
}
printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
print_symbol("%s", regs->ip);
printk(" SS:ESP %04x:%08lx\n", ss, sp);
printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
(void *)regs->ip, ss, sp);
#else
/* Executive summary in case the oops scrolled away */
printk(KERN_ALERT "RIP ");
printk_address(regs->ip);
printk(" RSP <%016lx>\n", regs->sp);
printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
#endif
return 0;
}
......@@ -291,22 +295,6 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig);
}
static int __init kstack_setup(char *s)
{
ssize_t ret;
unsigned long val;
if (!s)
return -EINVAL;
ret = kstrtoul(s, 0, &val);
if (ret)
return ret;
kstack_depth_to_print = val;
return 0;
}
early_param("kstack", kstack_setup);
static int __init code_bytes_setup(char *s)
{
ssize_t ret;
......
......@@ -16,18 +16,15 @@
#include <asm/stacktrace.h>
void stack_type_str(enum stack_type type, const char **begin, const char **end)
const char *stack_type_name(enum stack_type type)
{
switch (type) {
case STACK_TYPE_IRQ:
case STACK_TYPE_SOFTIRQ:
*begin = "IRQ";
*end = "EOI";
break;
default:
*begin = NULL;
*end = NULL;
}
if (type == STACK_TYPE_IRQ)
return "IRQ";
if (type == STACK_TYPE_SOFTIRQ)
return "SOFTIRQ";
return NULL;
}
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
......@@ -109,8 +106,10 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
* just break out and report an unknown stack type.
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type))
if (*visit_mask & (1UL << info->type)) {
printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
}
......@@ -121,36 +120,6 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
return -EINVAL;
}
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, char *log_lvl)
{
unsigned long *stack;
int i;
if (!try_get_task_stack(task))
return;
sp = sp ? : get_stack_pointer(task, regs);
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
if (kstack_end(stack))
break;
if ((i % STACKSLOTS_PER_LINE) == 0) {
if (i != 0)
pr_cont("\n");
printk("%s %08lx", log_lvl, *stack++);
} else
pr_cont(" %08lx", *stack++);
touch_nmi_watchdog();
}
pr_cont("\n");
show_trace_log_lvl(task, regs, sp, log_lvl);
put_task_stack(task);
}
void show_regs(struct pt_regs *regs)
{
int i;
......@@ -168,8 +137,7 @@ void show_regs(struct pt_regs *regs)
unsigned char c;
u8 *ip;
pr_emerg("Stack:\n");
show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
show_trace_log_lvl(current, regs, NULL, KERN_EMERG);
pr_emerg("Code:");
......
......@@ -28,23 +28,17 @@ static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
void stack_type_str(enum stack_type type, const char **begin, const char **end)
const char *stack_type_name(enum stack_type type)
{
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
switch (type) {
case STACK_TYPE_IRQ:
*begin = "IRQ";
*end = "EOI";
break;
case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
*begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
*end = "EOE";
break;
default:
*begin = NULL;
*end = NULL;
}
if (type == STACK_TYPE_IRQ)
return "IRQ";
if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
return exception_stack_names[type - STACK_TYPE_EXCEPTION];
return NULL;
}
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
......@@ -128,8 +122,10 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
* just break out and report an unknown stack type.
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type))
if (*visit_mask & (1UL << info->type)) {
printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
}
......@@ -140,56 +136,6 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
return -EINVAL;
}
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, char *log_lvl)
{
unsigned long *irq_stack_end;
unsigned long *irq_stack;
unsigned long *stack;
int i;
if (!try_get_task_stack(task))
return;
irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
sp = sp ? : get_stack_pointer(task, regs);
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
unsigned long word;
if (stack >= irq_stack && stack <= irq_stack_end) {
if (stack == irq_stack_end) {
stack = (unsigned long *) (irq_stack_end[-1]);
pr_cont(" <EOI> ");
}
} else {
if (kstack_end(stack))
break;
}
if (probe_kernel_address(stack, word))
break;
if ((i % STACKSLOTS_PER_LINE) == 0) {
if (i != 0)
pr_cont("\n");
printk("%s %016lx", log_lvl, word);
} else
pr_cont(" %016lx", word);
stack++;
touch_nmi_watchdog();
}
pr_cont("\n");
show_trace_log_lvl(task, regs, sp, log_lvl);
put_task_stack(task);
}
void show_regs(struct pt_regs *regs)
{
int i;
......@@ -207,8 +153,7 @@ void show_regs(struct pt_regs *regs)
unsigned char c;
u8 *ip;
printk(KERN_DEFAULT "Stack:\n");
show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
printk(KERN_DEFAULT "Code: ");
......
......@@ -65,6 +65,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX);
setup_clear_cpu_cap(X86_FEATURE_AVX2);
setup_clear_cpu_cap(X86_FEATURE_AVX512F);
setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
......@@ -73,6 +74,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
setup_clear_cpu_cap(X86_FEATURE_PKU);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
......
......@@ -63,6 +63,8 @@
#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
#endif
#define SIZEOF_PTREGS 17*4
/*
* Number of possible pages in the lowmem region.
*
......@@ -248,19 +250,19 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
#ifdef CONFIG_PARAVIRT
/* This is can only trip for a broken bootloader... */
cmpw $0x207, pa(boot_params + BP_version)
jb default_entry
jb .Ldefault_entry
/* Paravirt-compatible boot parameters. Look to see what architecture
we're booting under. */
movl pa(boot_params + BP_hardware_subarch), %eax
cmpl $num_subarch_entries, %eax
jae bad_subarch
jae .Lbad_subarch
movl pa(subarch_entries)(,%eax,4), %eax
subl $__PAGE_OFFSET, %eax
jmp *%eax
bad_subarch:
.Lbad_subarch:
WEAK(lguest_entry)
WEAK(xen_entry)
/* Unknown implementation; there's really
......@@ -270,14 +272,14 @@ WEAK(xen_entry)
__INITDATA
subarch_entries:
.long default_entry /* normal x86/PC */
.long .Ldefault_entry /* normal x86/PC */
.long lguest_entry /* lguest hypervisor */
.long xen_entry /* Xen hypervisor */
.long default_entry /* Moorestown MID */
.long .Ldefault_entry /* Moorestown MID */
num_subarch_entries = (. - subarch_entries) / 4
.previous
#else
jmp default_entry
jmp .Ldefault_entry
#endif /* CONFIG_PARAVIRT */
#ifdef CONFIG_HOTPLUG_CPU
......@@ -289,7 +291,8 @@ num_subarch_entries = (. - subarch_entries) / 4
ENTRY(start_cpu0)
movl initial_stack, %ecx
movl %ecx, %esp
jmp *(initial_code)
call *(initial_code)
1: jmp 1b
ENDPROC(start_cpu0)
#endif
......@@ -317,7 +320,7 @@ ENTRY(startup_32_smp)
call load_ucode_ap
#endif
default_entry:
.Ldefault_entry:
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
X86_CR0_PG)
......@@ -347,7 +350,7 @@ default_entry:
pushfl
popl %eax # get EFLAGS
testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set?
jz enable_paging # hw disallowed setting of ID bit
jz .Lenable_paging # hw disallowed setting of ID bit
# which means no CPUID and no CR4
xorl %eax,%eax
......@@ -357,13 +360,13 @@ default_entry:
movl $1,%eax
cpuid
andl $~1,%edx # Ignore CPUID.FPU
jz enable_paging # No flags or only CPUID.FPU = no CR4
jz .Lenable_paging # No flags or only CPUID.FPU = no CR4
movl pa(mmu_cr4_features),%eax
movl %eax,%cr4
testb $X86_CR4_PAE, %al # check if PAE is enabled
jz enable_paging
jz .Lenable_paging
/* Check if extended functions are implemented */
movl $0x80000000, %eax
......@@ -371,7 +374,7 @@ default_entry:
/* Value must be in the range 0x80000001 to 0x8000ffff */
subl $0x80000001, %eax
cmpl $(0x8000ffff-0x80000001), %eax
ja enable_paging
ja .Lenable_paging
/* Clear bogus XD_DISABLE bits */
call verify_cpu
......@@ -380,7 +383,7 @@ default_entry:
cpuid
/* Execute Disable bit supported? */
btl $(X86_FEATURE_NX & 31), %edx
jnc enable_paging
jnc .Lenable_paging
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
......@@ -390,7 +393,7 @@ default_entry:
/* Make changes effective */
wrmsr
enable_paging:
.Lenable_paging:
/*
* Enable paging
......@@ -419,7 +422,7 @@ enable_paging:
*/
movb $4,X86 # at least 486
cmpl $-1,X86_CPUID
je is486
je .Lis486
/* get vendor info */
xorl %eax,%eax # call CPUID with 0 -> return vendor ID
......@@ -430,7 +433,7 @@ enable_paging:
movl %ecx,X86_VENDOR_ID+8 # last 4 chars
orl %eax,%eax # do we have processor info as well?
je is486
je .Lis486
movl $1,%eax # Use the CPUID instruction to get CPU type
cpuid
......@@ -444,7 +447,7 @@ enable_paging:
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
is486:
.Lis486:
movl $0x50022,%ecx # set AM, WP, NE and MP
movl %cr0,%eax
andl $0x80000011,%eax # Save PG,PE,ET
......@@ -470,8 +473,9 @@ is486:
xorl %eax,%eax # Clear LDT
lldt %ax
pushl $0 # fake return address for unwinder
jmp *(initial_code)
call *(initial_code)
1: jmp 1b
ENDPROC(startup_32_smp)
#include "verify_cpu.S"
......@@ -709,7 +713,12 @@ ENTRY(initial_page_table)
.data
.balign 4
ENTRY(initial_stack)
.long init_thread_union+THREAD_SIZE
/*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
* unwinder reliably detect the end of the stack.
*/
.long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
TOP_OF_KERNEL_STACK_PADDING;
__INITRODATA
int_msg:
......
......@@ -66,13 +66,8 @@ startup_64:
* tables and then reload them.
*/
/*
* Setup stack for verify_cpu(). "-8" because initial_stack is defined
* this way, see below. Our best guess is a NULL ptr for stack
* termination heuristics and we don't want to break anything which
* might depend on it (kgdb, ...).
*/
leaq (__end_init_task - 8)(%rip), %rsp
/* Set up the stack for verify_cpu(), similar to initial_stack below */
leaq (__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
/* Sanitize CPU configuration */
call verify_cpu
......@@ -117,20 +112,20 @@ startup_64:
movq %rdi, %rax
shrq $PGDIR_SHIFT, %rax
leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx
leaq (PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
movq %rdx, 0(%rbx,%rax,8)
movq %rdx, 8(%rbx,%rax,8)
addq $4096, %rdx
addq $PAGE_SIZE, %rdx
movq %rdi, %rax
shrq $PUD_SHIFT, %rax
andl $(PTRS_PER_PUD-1), %eax
movq %rdx, 4096(%rbx,%rax,8)
movq %rdx, PAGE_SIZE(%rbx,%rax,8)
incl %eax
andl $(PTRS_PER_PUD-1), %eax
movq %rdx, 4096(%rbx,%rax,8)
movq %rdx, PAGE_SIZE(%rbx,%rax,8)
addq $8192, %rbx
addq $PAGE_SIZE * 2, %rbx
movq %rdi, %rax
shrq $PMD_SHIFT, %rdi
addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
......@@ -270,8 +265,12 @@ ENTRY(secondary_startup_64)
/* rsi is pointer to real mode structure with interesting info.
pass it to C */
movq %rsi, %rdi
jmp start_cpu
ENDPROC(secondary_startup_64)
/* Finally jump to run C code and to be on real kernel address
ENTRY(start_cpu)
/*
* Jump to run C code and to be on a real kernel address.
* Since we are running on identity-mapped space we have to jump
* to the full 64bit address, this is only possible as indirect
* jump. In addition we need to ensure %cs is set so we make this
......@@ -295,12 +294,13 @@ ENTRY(secondary_startup_64)
* REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
* address given in m16:64.
*/
movq initial_code(%rip),%rax
pushq $0 # fake return address to stop unwinder
call 1f # put return address on stack for unwinder
1: xorq %rbp, %rbp # clear frame pointer
movq initial_code(%rip), %rax
pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space
lretq
ENDPROC(secondary_startup_64)
ENDPROC(start_cpu)
#include "verify_cpu.S"
......@@ -308,15 +308,11 @@ ENDPROC(secondary_startup_64)
/*
* Boot CPU0 entry point. It's called from play_dead(). Everything has been set
* up already except stack. We just set up stack here. Then call
* start_secondary().
* start_secondary() via start_cpu().
*/
ENTRY(start_cpu0)
movq initial_stack(%rip),%rsp
movq initial_code(%rip),%rax
pushq $0 # fake return address to stop unwinder
pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space
lretq
movq initial_stack(%rip), %rsp
jmp start_cpu
ENDPROC(start_cpu0)
#endif
......@@ -328,7 +324,11 @@ ENDPROC(start_cpu0)
GLOBAL(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
GLOBAL(initial_stack)
.quad init_thread_union+THREAD_SIZE-8
/*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
* unwinder reliably detect the end of the stack.
*/
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
__FINITDATA
bad_address:
......
......@@ -72,10 +72,9 @@ void __show_regs(struct pt_regs *regs, int all)
savesegment(gs, gs);
}
printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
(u16)regs->cs, regs->ip, regs->flags,
printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
smp_processor_id());
print_symbol("EIP is at %s\n", regs->ip);
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
......
......@@ -61,10 +61,15 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
printk_address(regs->ip);
printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
(void *)regs->ip);
printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
regs->sp, regs->flags);
if (regs->orig_ax != -1)
pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
else
pr_cont("\n");
printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->ax, regs->bx, regs->cx);
printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
......
......@@ -987,9 +987,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
int cpu0_nmi_registered = 0;
unsigned long timeout;
idle->thread.sp = (unsigned long) (((struct pt_regs *)
(THREAD_SIZE + task_stack_page(idle))) - 1);
idle->thread.sp = (unsigned long)task_pt_regs(idle);
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;
......
......@@ -14,13 +14,55 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
if (unwind_done(state))
return 0;
if (state->regs && user_mode(state->regs))
return 0;
addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
addr_p);
return __kernel_text_address(addr) ? addr : 0;
if (!__kernel_text_address(addr)) {
printk_deferred_once(KERN_WARNING
"WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n",
(void *)addr, addr_p, state->task->comm,
state->task->pid);
return 0;
}
return addr;
}
EXPORT_SYMBOL_GPL(unwind_get_return_address);
static size_t regs_size(struct pt_regs *regs)
{
/* x86_32 regs from kernel mode are two words shorter: */
if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
return sizeof(*regs) - 2*sizeof(long);
return sizeof(*regs);
}
static bool is_last_task_frame(struct unwind_state *state)
{
unsigned long bp = (unsigned long)state->bp;
unsigned long regs = (unsigned long)task_pt_regs(state->task);
return bp == regs - FRAME_HEADER_SIZE;
}
/*
* This determines if the frame pointer actually contains an encoded pointer to
* pt_regs on the stack. See ENCODE_FRAME_POINTER.
*/
static struct pt_regs *decode_frame_pointer(unsigned long *bp)
{
unsigned long regs = (unsigned long)bp;
if (!(regs & 0x1))
return NULL;
return (struct pt_regs *)(regs & ~0x1);
}
static bool update_stack_state(struct unwind_state *state, void *addr,
size_t len)
{
......@@ -43,26 +85,117 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
bool unwind_next_frame(struct unwind_state *state)
{
unsigned long *next_bp;
struct pt_regs *regs;
unsigned long *next_bp, *next_frame;
size_t next_len;
enum stack_type prev_type = state->stack_info.type;
if (unwind_done(state))
return false;
/* have we reached the end? */
if (state->regs && user_mode(state->regs))
goto the_end;
if (is_last_task_frame(state)) {
regs = task_pt_regs(state->task);
/*
* kthreads (other than the boot CPU's idle thread) have some
* partial regs at the end of their stack which were placed
* there by copy_thread_tls(). But the regs don't have any
* useful information, so we can skip them.
*
* This user_mode() check is slightly broader than a PF_KTHREAD
* check because it also catches the awkward situation where a
* newly forked kthread transitions into a user task by calling
* do_execve(), which eventually clears PF_KTHREAD.
*/
if (!user_mode(regs))
goto the_end;
/*
* We're almost at the end, but not quite: there's still the
* syscall regs frame. Entry code doesn't encode the regs
* pointer for syscalls, so we have to set it manually.
*/
state->regs = regs;
state->bp = NULL;
return true;
}
/* get the next frame pointer */
if (state->regs)
next_bp = (unsigned long *)state->regs->bp;
else
next_bp = (unsigned long *)*state->bp;
/* is the next frame pointer an encoded pointer to pt_regs? */
regs = decode_frame_pointer(next_bp);
if (regs) {
next_frame = (unsigned long *)regs;
next_len = sizeof(*regs);
} else {
next_frame = next_bp;
next_len = FRAME_HEADER_SIZE;
}
/* make sure the next frame's data is accessible */
if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
return false;
if (!update_stack_state(state, next_frame, next_len)) {
/*
* Don't warn on bad regs->bp. An interrupt in entry code
* might cause a false positive warning.
*/
if (state->regs)
goto the_end;
goto bad_address;
}
/* Make sure it only unwinds up and doesn't overlap the last frame: */
if (state->stack_info.type == prev_type) {
if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs))
goto bad_address;
if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE)
goto bad_address;
}
/* move to the next frame */
if (regs) {
state->regs = regs;
state->bp = NULL;
} else {
state->bp = next_bp;
state->regs = NULL;
}
return true;
bad_address:
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
state->regs, state->task->comm,
state->task->pid, next_frame);
} else {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
state->bp, state->task->comm,
state->task->pid, next_frame);
}
the_end:
state->stack_info.type = STACK_TYPE_UNKNOWN;
return false;
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame)
{
unsigned long *bp, *frame;
size_t len;
memset(state, 0, sizeof(*state));
state->task = task;
......@@ -73,12 +206,22 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
}
/* set up the starting stack frame */
state->bp = get_frame_pointer(task, regs);
bp = get_frame_pointer(task, regs);
regs = decode_frame_pointer(bp);
if (regs) {
state->regs = regs;
frame = (unsigned long *)regs;
len = sizeof(*regs);
} else {
state->bp = bp;
frame = bp;
len = FRAME_HEADER_SIZE;
}
/* initialize stack info and make sure the frame data is accessible */
get_stack_info(state->bp, state->task, &state->stack_info,
get_stack_info(frame, state->task, &state->stack_info,
&state->stack_mask);
update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
update_stack_state(state, frame, len);
/*
* The caller can provide the address of the first frame directly
......
......@@ -91,10 +91,10 @@ SECTIONS
/* Text and read-only data */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
_text = .;
_stext = .;
/* bootstrapping code */
HEAD_TEXT
. = ALIGN(8);
_stext = .;
TEXT_TEXT
SCHED_TEXT
CPUIDLE_TEXT
......
......@@ -16,53 +16,6 @@
#include <asm/smap.h>
#include <asm/export.h>
/* Standard copy_to_user with segment limit checking */
ENTRY(_copy_to_user)
mov PER_CPU_VAR(current_task), %rax
movq %rdi,%rcx
addq %rdx,%rcx
jc bad_to_user
cmpq TASK_addr_limit(%rax),%rcx
ja bad_to_user
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
"jmp copy_user_generic_string", \
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
ENDPROC(_copy_to_user)
EXPORT_SYMBOL(_copy_to_user)
/* Standard copy_from_user with segment limit checking */
ENTRY(_copy_from_user)
mov PER_CPU_VAR(current_task), %rax
movq %rsi,%rcx
addq %rdx,%rcx
jc bad_from_user
cmpq TASK_addr_limit(%rax),%rcx
ja bad_from_user
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
"jmp copy_user_generic_string", \
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
ENDPROC(_copy_from_user)
EXPORT_SYMBOL(_copy_from_user)
.section .fixup,"ax"
/* must zero dest */
ENTRY(bad_from_user)
bad_from_user:
movl %edx,%ecx
xorl %eax,%eax
rep
stosb
bad_to_user:
movl %edx,%eax
ret
ENDPROC(bad_from_user)
.previous
/*
* copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
......
......@@ -34,3 +34,52 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
return ret;
}
EXPORT_SYMBOL_GPL(copy_from_user_nmi);
/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep if pagefaults are
* enabled.
*
* Copy data from kernel space to user space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*/
unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
{
if (access_ok(VERIFY_WRITE, to, n))
n = __copy_to_user(to, from, n);
return n;
}
EXPORT_SYMBOL(_copy_to_user);
/**
* copy_from_user: - Copy a block of data from user space.
* @to: Destination address, in kernel space.
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep if pagefaults are
* enabled.
*
* Copy data from user space to kernel space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*
* If some data could not be copied, this function will pad the copied
* data to the requested size using zero bytes.
*/
unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
{
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
else
memset(to, 0, n);
return n;
}
EXPORT_SYMBOL(_copy_from_user);
......@@ -640,52 +640,3 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep if pagefaults are
* enabled.
*
* Copy data from kernel space to user space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*/
unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
{
if (access_ok(VERIFY_WRITE, to, n))
n = __copy_to_user(to, from, n);
return n;
}
EXPORT_SYMBOL(_copy_to_user);
/**
* copy_from_user: - Copy a block of data from user space.
* @to: Destination address, in kernel space.
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep if pagefaults are
* enabled.
*
* Copy data from user space to kernel space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*
* If some data could not be copied, this function will pad the copied
* data to the requested size using zero bytes.
*/
unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
{
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
else
memset(to, 0, n);
return n;
}
EXPORT_SYMBOL(_copy_from_user);
......@@ -679,8 +679,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(KERN_CONT "paging request");
printk(KERN_CONT " at %p\n", (void *) address);
printk(KERN_ALERT "IP:");
printk_address(regs->ip);
printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
dump_pagetable(address);
}
......
......@@ -387,8 +387,8 @@ static void uv_nmi_dump_cpu_ip_hdr(void)
/* Dump Instruction Pointer info */
static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
{
pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm);
printk_address(regs->ip);
pr_info("UV: %4d %6d %-32.32s %pS",
cpu, current->pid, current->comm, (void *)regs->ip);
}
/*
......
......@@ -269,7 +269,8 @@ int main(int argc, char **argv)
insns++;
}
fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
fprintf((errors) ? stderr : stdout,
"%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
prog,
(errors) ? "Failure" : "Success",
insns,
......
......@@ -167,7 +167,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Warning: decoded and checked %d"
" instructions with %d warnings\n", insns, warnings);
else
fprintf(stderr, "Succeed: decoded and checked %d"
fprintf(stdout, "Success: decoded and checked %d"
" instructions\n", insns);
return 0;
}
......@@ -982,13 +982,6 @@ static struct ctl_table kern_table[] = {
.mode = 0444,
.proc_handler = proc_dointvec,
},
{
.procname = "kstack_depth_to_print",
.data = &kstack_depth_to_print,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "io_delay_type",
.data = &io_delay_type,
......
......@@ -6399,8 +6399,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
}
if (pages && s)
pr_info("Freeing %s memory: %ldK (%p - %p)\n",
s, pages << (PAGE_SHIFT - 10), start, end);
pr_info("Freeing %s memory: %ldK\n",
s, pages << (PAGE_SHIFT - 10));
return pages;
}
......
......@@ -139,7 +139,8 @@ handle_line() {
while read line; do
# Let's see if we have an address in the line
if [[ $line =~ \[\<([^]]+)\>\] ]]; then
if [[ $line =~ \[\<([^]]+)\>\] ]] ||
[[ $line =~ [^+\ ]+\+0x[0-9a-f]+/0x[0-9a-f]+ ]]; then
# Translate address to line numbers
handle_line "$line"
# Is it a code line?
......
......@@ -105,9 +105,18 @@ __faddr2line() {
# In rare cases there might be duplicates.
while read symbol; do
local fields=($symbol)
local sym_base=0x${fields[1]}
local sym_size=${fields[2]}
local sym_type=${fields[3]}
local sym_base=0x${fields[0]}
local sym_type=${fields[1]}
local sym_end=0x${fields[3]}
# calculate the size
local sym_size=$(($sym_end - $sym_base))
if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then
warn "bad symbol size: base: $sym_base end: $sym_end"
DONE=1
return
fi
sym_size=0x$(printf %x $sym_size)
# calculate the address
local addr=$(($sym_base + $offset))
......@@ -116,26 +125,26 @@ __faddr2line() {
DONE=1
return
fi
local hexaddr=0x$(printf %x $addr)
addr=0x$(printf %x $addr)
# weed out non-function symbols
if [[ $sym_type != "FUNC" ]]; then
if [[ $sym_type != t ]] && [[ $sym_type != T ]]; then
[[ $print_warnings = 1 ]] &&
echo "skipping $func address at $hexaddr due to non-function symbol"
echo "skipping $func address at $addr due to non-function symbol of type '$sym_type'"
continue
fi
# if the user provided a size, make sure it matches the symbol's size
if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
[[ $print_warnings = 1 ]] &&
echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)"
echo "skipping $func address at $addr due to size mismatch ($size != $sym_size)"
continue;
fi
# make sure the provided offset is within the symbol's range
if [[ $offset -gt $sym_size ]]; then
[[ $print_warnings = 1 ]] &&
echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)"
echo "skipping $func address at $addr due to size mismatch ($offset > $sym_size)"
continue
fi
......@@ -143,12 +152,12 @@ __faddr2line() {
[[ $FIRST = 0 ]] && echo
FIRST=0
local hexsize=0x$(printf %x $sym_size)
echo "$func+$offset/$hexsize:"
addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;"
# pass real address to addr2line
echo "$func+$offset/$sym_size:"
addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
DONE=1
done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}')
done < <(nm -n $objfile | awk -v fn=$func '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, $1 }')
}
[[ $# -lt 2 ]] && usage
......
......@@ -6,7 +6,7 @@ include ../lib.mk
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
check_initial_reg_state sigreturn ldt_gdt iopl \
protection_keys
protection_keys test_vdso
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
......
/*
* ldt_gdt.c - Test cases for LDT and GDT access
* Copyright (c) 2011-2015 Andrew Lutomirski
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <dlfcn.h>
#include <string.h>
#include <errno.h>
#include <sched.h>
#include <stdbool.h>
#ifndef SYS_getcpu
# ifdef __x86_64__
# define SYS_getcpu 309
# else
# define SYS_getcpu 318
# endif
#endif
int nerrs = 0;
#ifdef __x86_64__
# define VSYS(x) (x)
#else
# define VSYS(x) 0
#endif
typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
getcpu_t vdso_getcpu;
void fill_function_pointers()
{
void *vdso = dlopen("linux-vdso.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso)
vdso = dlopen("linux-gate.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso) {
printf("[WARN]\tfailed to find vDSO\n");
return;
}
vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
if (!vdso_getcpu)
printf("Warning: failed to find getcpu in vDSO\n");
}
static long sys_getcpu(unsigned * cpu, unsigned * node,
void* cache)
{
return syscall(__NR_getcpu, cpu, node, cache);
}
static void test_getcpu(void)
{
printf("[RUN]\tTesting getcpu...\n");
for (int cpu = 0; ; cpu++) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
return;
unsigned cpu_sys, cpu_vdso, cpu_vsys,
node_sys, node_vdso, node_vsys;
long ret_sys, ret_vdso = 1, ret_vsys = 1;
unsigned node;
ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
if (vdso_getcpu)
ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
if (vgetcpu)
ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
if (!ret_sys)
node = node_sys;
else if (!ret_vdso)
node = node_vdso;
else if (!ret_vsys)
node = node_vsys;
bool ok = true;
if (!ret_sys && (cpu_sys != cpu || node_sys != node))
ok = false;
if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
ok = false;
if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
ok = false;
printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
if (!ret_sys)
printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
if (!ret_vdso)
printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
if (!ret_vsys)
printf(" vsyscall: cpu %u, node %u", cpu_vsys,
node_vsys);
printf("\n");
if (!ok)
nerrs++;
}
}
int main(int argc, char **argv)
{
fill_function_pointers();
test_getcpu();
return nerrs ? 1 : 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment