Commit b5aa97e8 authored by Ingo Molnar's avatar Ingo Molnar

Merge branches 'x86/signal' and 'x86/irq' into perfcounters/core

Merge these pending x86 tree changes into the perfcounters tree
to avoid conflicts.
......@@ -242,21 +242,13 @@ config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER
if ACPI
config X86_MPPARSE
def_bool y
bool "Enable MPS table"
bool "Enable MPS table" if ACPI
default y
depends on X86_LOCAL_APIC
help
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
endif
if !ACPI
config X86_MPPARSE
def_bool y
depends on X86_LOCAL_APIC
endif
choice
prompt "Subarchitecture Type"
......
......@@ -197,23 +197,28 @@ struct rt_sigframe
/* fp state follows here */
};
#define COPY(x) { \
unsigned int reg; \
err |= __get_user(reg, &sc->x); \
regs->x = reg; \
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
#define RELOAD_SEG(seg,mask) \
{ unsigned int cur; \
unsigned short pre; \
err |= __get_user(pre, &sc->seg); \
savesegment(seg, cur); \
pre |= mask; \
if (pre != cur) loadsegment(seg, pre); }
#define COPY_SEG_CPL3(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
}
#define RELOAD_SEG(seg) { \
unsigned int cur, pre; \
err |= __get_user(pre, &sc->seg); \
savesegment(seg, cur); \
pre |= 3; \
if (pre != cur) \
loadsegment(seg, pre); \
}
static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_ia32 __user *sc,
unsigned int *peax)
unsigned int *pax)
{
unsigned int tmpflags, gs, oldgs, err = 0;
void __user *buf;
......@@ -240,18 +245,16 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
if (gs != oldgs)
load_gs_index(gs);
RELOAD_SEG(fs, 3);
RELOAD_SEG(ds, 3);
RELOAD_SEG(es, 3);
RELOAD_SEG(fs);
RELOAD_SEG(ds);
RELOAD_SEG(es);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
/* Don't touch extended registers */
err |= __get_user(regs->cs, &sc->cs);
regs->cs |= 3;
err |= __get_user(regs->ss, &sc->ss);
regs->ss |= 3;
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
......@@ -262,9 +265,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
buf = compat_ptr(tmp);
err |= restore_i387_xstate_ia32(buf);
err |= __get_user(tmp, &sc->ax);
*peax = tmp;
err |= __get_user(*pax, &sc->ax);
return err;
}
......@@ -359,20 +360,15 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
err |= __put_user(regs->dx, &sc->dx);
err |= __put_user(regs->cx, &sc->cx);
err |= __put_user(regs->ax, &sc->ax);
err |= __put_user(regs->cs, &sc->cs);
err |= __put_user(regs->ss, &sc->ss);
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user(regs->ip, &sc->ip);
err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(regs->sp, &sc->sp_at_signal);
err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
tmp = save_i387_xstate_ia32(fpstate);
if (tmp < 0)
err = -EFAULT;
else
err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
&sc->fpstate);
err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate);
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
......@@ -408,6 +404,8 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
if (used_math()) {
sp = sp - sig_xstate_ia32_size;
*fpstate = (struct _fpstate_ia32 *) sp;
if (save_i387_xstate_ia32(*fpstate) < 0)
return (void __user *) -1L;
}
sp -= frame_size;
......@@ -430,12 +428,10 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
u16 poplmovl;
u32 val;
u16 int80;
u16 pad;
} __attribute__((packed)) code = {
0xb858, /* popl %eax ; movl $...,%eax */
__NR_ia32_sigreturn,
0x80cd, /* int $0x80 */
0,
};
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
......@@ -511,8 +507,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
u8 movl;
u32 val;
u16 int80;
u16 pad;
u8 pad2;
u8 pad;
} __attribute__((packed)) code = {
0xb8,
__NR_ia32_rt_sigreturn,
......@@ -572,11 +567,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->dx = (unsigned long) &frame->info;
regs->cx = (unsigned long) &frame->uc;
/* Make -mregparm=3 work */
regs->ax = sig;
regs->dx = (unsigned long) &frame->info;
regs->cx = (unsigned long) &frame->uc;
loadsegment(ds, __USER32_DS);
loadsegment(es, __USER32_DS);
......
......@@ -168,7 +168,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
*/
static inline void change_bit(int nr, volatile unsigned long *addr)
{
asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr));
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr)));
} else {
asm volatile(LOCK_PREFIX "btc %1,%0"
: BITOP_ADDR(addr)
: "Ir" (nr));
}
}
/**
......
......@@ -4,26 +4,33 @@
#include <asm/types.h>
#include <linux/compiler.h>
#ifdef __GNUC__
#define __LITTLE_ENDIAN
#ifdef __i386__
static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
{
#ifdef CONFIG_X86_BSWAP
asm("bswap %0" : "=r" (x) : "0" (x));
#else
#ifdef __i386__
# ifdef CONFIG_X86_BSWAP
asm("bswap %0" : "=r" (val) : "0" (val));
# else
asm("xchgb %b0,%h0\n\t" /* swap lower bytes */
"rorl $16,%0\n\t" /* swap words */
"xchgb %b0,%h0" /* swap higher bytes */
: "=q" (x)
: "0" (x));
: "=q" (val)
: "0" (val));
# endif
#else /* __i386__ */
asm("bswapl %0"
: "=r" (val)
: "0" (val));
#endif
return x;
return val;
}
#define __arch_swab32 __arch_swab32
static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
{
#ifdef __i386__
union {
struct {
__u32 a;
......@@ -32,50 +39,27 @@ static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
__u64 u;
} v;
v.u = val;
#ifdef CONFIG_X86_BSWAP
# ifdef CONFIG_X86_BSWAP
asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
#else
v.s.a = ___arch__swab32(v.s.a);
v.s.b = ___arch__swab32(v.s.b);
# else
v.s.a = __arch_swab32(v.s.a);
v.s.b = __arch_swab32(v.s.b);
asm("xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
#endif
# endif
return v.u;
}
#else /* __i386__ */
static inline __attribute_const__ __u64 ___arch__swab64(__u64 x)
{
asm("bswapq %0"
: "=r" (x)
: "0" (x));
return x;
}
static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
{
asm("bswapl %0"
: "=r" (x)
: "0" (x));
return x;
}
: "=r" (val)
: "0" (val));
return val;
#endif
}
#define __arch_swab64 __arch_swab64
/* Do not define swab16. Gcc is smart enough to recognize "C" version and
convert it into rotation or exhange. */
#define __arch__swab64(x) ___arch__swab64(x)
#define __arch__swab32(x) ___arch__swab32(x)
#define __BYTEORDER_HAS_U64__
#endif /* __GNUC__ */
#include <linux/byteorder/little_endian.h>
#include <linux/byteorder.h>
#endif /* _ASM_X86_BYTEORDER_H */
......@@ -6,56 +6,91 @@
#endif
/*
Macros for dwarf2 CFI unwind table entries.
See "as.info" for details on these pseudo ops. Unfortunately
they are only supported in very new binutils, so define them
away for older version.
* Macros for dwarf2 CFI unwind table entries.
* See "as.info" for details on these pseudo ops. Unfortunately
* they are only supported in very new binutils, so define them
* away for older version.
*/
#ifdef CONFIG_AS_CFI
#define CFI_STARTPROC .cfi_startproc
#define CFI_ENDPROC .cfi_endproc
#define CFI_DEF_CFA .cfi_def_cfa
#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
#define CFI_OFFSET .cfi_offset
#define CFI_REL_OFFSET .cfi_rel_offset
#define CFI_REGISTER .cfi_register
#define CFI_RESTORE .cfi_restore
#define CFI_REMEMBER_STATE .cfi_remember_state
#define CFI_RESTORE_STATE .cfi_restore_state
#define CFI_UNDEFINED .cfi_undefined
#define CFI_STARTPROC .cfi_startproc
#define CFI_ENDPROC .cfi_endproc
#define CFI_DEF_CFA .cfi_def_cfa
#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
#define CFI_OFFSET .cfi_offset
#define CFI_REL_OFFSET .cfi_rel_offset
#define CFI_REGISTER .cfi_register
#define CFI_RESTORE .cfi_restore
#define CFI_REMEMBER_STATE .cfi_remember_state
#define CFI_RESTORE_STATE .cfi_restore_state
#define CFI_UNDEFINED .cfi_undefined
#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
#define CFI_SIGNAL_FRAME .cfi_signal_frame
#define CFI_SIGNAL_FRAME .cfi_signal_frame
#else
#define CFI_SIGNAL_FRAME
#endif
#else
/* Due to the structure of pre-exisiting code, don't use assembler line
comment character # to ignore the arguments. Instead, use a dummy macro. */
/*
* Due to the structure of pre-exisiting code, don't use assembler line
* comment character # to ignore the arguments. Instead, use a dummy macro.
*/
.macro cfi_ignore a=0, b=0, c=0, d=0
.endm
#define CFI_STARTPROC cfi_ignore
#define CFI_ENDPROC cfi_ignore
#define CFI_DEF_CFA cfi_ignore
#define CFI_STARTPROC cfi_ignore
#define CFI_ENDPROC cfi_ignore
#define CFI_DEF_CFA cfi_ignore
#define CFI_DEF_CFA_REGISTER cfi_ignore
#define CFI_DEF_CFA_OFFSET cfi_ignore
#define CFI_ADJUST_CFA_OFFSET cfi_ignore
#define CFI_OFFSET cfi_ignore
#define CFI_REL_OFFSET cfi_ignore
#define CFI_REGISTER cfi_ignore
#define CFI_RESTORE cfi_ignore
#define CFI_REMEMBER_STATE cfi_ignore
#define CFI_RESTORE_STATE cfi_ignore
#define CFI_UNDEFINED cfi_ignore
#define CFI_SIGNAL_FRAME cfi_ignore
#define CFI_OFFSET cfi_ignore
#define CFI_REL_OFFSET cfi_ignore
#define CFI_REGISTER cfi_ignore
#define CFI_RESTORE cfi_ignore
#define CFI_REMEMBER_STATE cfi_ignore
#define CFI_RESTORE_STATE cfi_ignore
#define CFI_UNDEFINED cfi_ignore
#define CFI_SIGNAL_FRAME cfi_ignore
#endif
/*
* An attempt to make CFI annotations more or less
* correct and shorter. It is implied that you know
* what you're doing if you use them.
*/
#ifdef __ASSEMBLY__
#ifdef CONFIG_X86_64
.macro pushq_cfi reg
pushq \reg
CFI_ADJUST_CFA_OFFSET 8
.endm
.macro popq_cfi reg
popq \reg
CFI_ADJUST_CFA_OFFSET -8
.endm
.macro movq_cfi reg offset=0
movq %\reg, \offset(%rsp)
CFI_REL_OFFSET \reg, \offset
.endm
.macro movq_cfi_restore offset reg
movq \offset(%rsp), %\reg
CFI_RESTORE \reg
.endm
#else /*!CONFIG_X86_64*/
/* 32bit defenitions are missed yet */
#endif /*!CONFIG_X86_64*/
#endif /*__ASSEMBLY__*/
#endif /* _ASM_X86_DWARF2_H */
......@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
#endif
#endif
#ifdef CONFIG_X86_32
extern void (*const interrupt[NR_VECTORS])(void);
#endif
extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
typedef int vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
......
......@@ -31,10 +31,6 @@ static inline int irq_canonicalize(int irq)
# endif
#endif
#ifdef CONFIG_IRQBALANCE
extern int irqbalance_disable(char *str);
#endif
#ifdef CONFIG_HOTPLUG_CPU
#include <linux/cpumask.h>
extern void fixup_irqs(cpumask_t map);
......
......@@ -9,6 +9,8 @@
#include <asm/percpu.h>
#define ARCH_HAS_OWN_IRQ_REGS
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
......
......@@ -57,5 +57,65 @@
#define __ALIGN_STR ".align 16,0x90"
#endif
/*
* to check ENTRY_X86/END_X86 and
* KPROBE_ENTRY_X86/KPROBE_END_X86
* unbalanced-missed-mixed appearance
*/
#define __set_entry_x86 .set ENTRY_X86_IN, 0
#define __unset_entry_x86 .set ENTRY_X86_IN, 1
#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
#define __check_entry_x86 \
.ifdef ENTRY_X86_IN; \
.ifeq ENTRY_X86_IN; \
__macro_err_x86; \
.abort; \
.endif; \
.endif
#define __check_kprobe_x86 \
.ifdef KPROBE_X86_IN; \
.ifeq KPROBE_X86_IN; \
__macro_err_x86; \
.abort; \
.endif; \
.endif
#define __check_entry_kprobe_x86 \
__check_entry_x86; \
__check_kprobe_x86
#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
#define ENTRY_X86(name) \
__check_entry_kprobe_x86; \
__set_entry_x86; \
.globl name; \
__ALIGN; \
name:
#define END_X86(name) \
__unset_entry_x86; \
__check_entry_kprobe_x86; \
.size name, .-name
#define KPROBE_ENTRY_X86(name) \
__check_entry_kprobe_x86; \
__set_kprobe_x86; \
.pushsection .kprobes.text, "ax"; \
.globl name; \
__ALIGN; \
name:
#define KPROBE_END_X86(name) \
__unset_kprobe_x86; \
__check_entry_kprobe_x86; \
.size name, .-name; \
.popsection
#endif /* _ASM_X86_LINKAGE_H */
......@@ -33,7 +33,7 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
struct old_sigaction __user *);
asmlinkage int sys_sigaltstack(unsigned long);
asmlinkage unsigned long sys_sigreturn(unsigned long);
asmlinkage int sys_rt_sigreturn(unsigned long);
asmlinkage int sys_rt_sigreturn(struct pt_regs);
/* kernel/ioport.c */
asmlinkage long sys_iopl(unsigned long);
......
......@@ -34,8 +34,6 @@ static inline cycles_t get_cycles(void)
static __always_inline cycles_t vget_cycles(void)
{
cycles_t cycles;
/*
* We only do VDSOs on TSC capable CPUs, so this shouldnt
* access boot_cpu_data (which is not VDSO-safe):
......@@ -44,11 +42,7 @@ static __always_inline cycles_t vget_cycles(void)
if (!cpu_has_tsc)
return 0;
#endif
rdtsc_barrier();
cycles = (cycles_t)__native_read_tsc();
rdtsc_barrier();
return cycles;
return (cycles_t)__native_read_tsc();
}
extern void tsc_init(void);
......
......@@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
#
......@@ -23,7 +24,7 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp)
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
......
......@@ -619,28 +619,37 @@ END(syscall_badsys)
27:;
/*
* Build the entry stubs and pointer table with
* some assembler magic.
* Build the entry stubs and pointer table with some assembler magic.
* We pack 7 stubs into a single 32-byte chunk, which will fit in a
* single cache line on all modern x86 implementations.
*/
.section .rodata,"a"
.section .init.rodata,"a"
ENTRY(interrupt)
.text
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
RING0_INT_FRAME
vector=0
.rept NR_VECTORS
ALIGN
.if vector
vector=FIRST_EXTERNAL_VECTOR
.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
.balign 32
.rept 7
.if vector < NR_VECTORS
.if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -4
.endif
1: pushl $~(vector)
.endif
1: pushl $(~vector+0x80) /* Note: always in signed byte range */
CFI_ADJUST_CFA_OFFSET 4
jmp common_interrupt
.previous
.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
jmp 2f
.endif
.previous
.long 1b
.text
.text
vector=vector+1
.endif
.endr
2: jmp common_interrupt
.endr
END(irq_entries_start)
......@@ -652,8 +661,9 @@ END(interrupt)
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
*/
ALIGN
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
SAVE_ALL
TRACE_IRQS_OFF
movl %esp,%eax
......@@ -678,65 +688,6 @@ ENDPROC(name)
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
KPROBE_ENTRY(page_fault)
RING0_EC_FRAME
pushl $do_page_fault
CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
/* the function address is in %fs's slot on the stack */
pushl %es
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0*/
pushl %ds
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET ds, 0*/
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eax, 0
pushl %ebp
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebp, 0
pushl %edi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edi, 0
pushl %esi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET esi, 0
pushl %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0
pushl %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0
pushl %ebx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
cld
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
UNWIND_ESPFIX_STACK
popl %ecx
CFI_ADJUST_CFA_OFFSET -4
/*CFI_REGISTER es, ecx*/
movl PT_FS(%esp), %edi # get the function address
movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
mov %ecx, PT_FS(%esp)
/*CFI_REL_OFFSET fs, ES*/
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
TRACE_IRQS_OFF
movl %esp,%eax # pt_regs pointer
call *%edi
jmp ret_from_exception
CFI_ENDPROC
KPROBE_END(page_fault)
ENTRY(coprocessor_error)
RING0_INT_FRAME
pushl $0
......@@ -767,140 +718,6 @@ ENTRY(device_not_available)
CFI_ENDPROC
END(device_not_available)
/*
* Debug traps and NMI can happen at the one SYSENTER instruction
* that sets up the real kernel stack. Check here, since we can't
* allow the wrong stack to be used.
*
* "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
* already pushed 3 words if it hits on the sysenter instruction:
* eflags, cs and eip.
*
* We just load the right stack, and push the three (known) values
* by hand onto the new stack - while updating the return eip past
* the instruction that would have done it for sysenter.
*/
#define FIX_STACK(offset, ok, label) \
cmpw $__KERNEL_CS,4(%esp); \
jne ok; \
label: \
movl TSS_sysenter_sp0+offset(%esp),%esp; \
CFI_DEF_CFA esp, 0; \
CFI_UNDEFINED eip; \
pushfl; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl $__KERNEL_CS; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl $sysenter_past_esp; \
CFI_ADJUST_CFA_OFFSET 4; \
CFI_REL_OFFSET eip, 0
KPROBE_ENTRY(debug)
RING0_INT_FRAME
cmpl $ia32_sysenter_target,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # error code 0
movl %esp,%eax # pt_regs pointer
call do_debug
jmp ret_from_exception
CFI_ENDPROC
KPROBE_END(debug)
/*
* NMI is doubly nasty. It can happen _while_ we're handling
* a debug fault, and the debug fault hasn't yet been able to
* clear up the stack. So we first check whether we got an
* NMI on the sysenter entry path, but after that we need to
* check whether we got an NMI on the debug path where the debug
* fault happened on the sysenter path.
*/
KPROBE_ENTRY(nmi)
RING0_INT_FRAME
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
popl %eax
CFI_ADJUST_CFA_OFFSET -4
je nmi_espfix_stack
cmpl $ia32_sysenter_target,(%esp)
je nmi_stack_fixup
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
movl %esp,%eax
/* Do not access memory above the end of our stack page,
* it might not exist.
*/
andl $(THREAD_SIZE-1),%eax
cmpl $(THREAD_SIZE-20),%eax
popl %eax
CFI_ADJUST_CFA_OFFSET -4
jae nmi_stack_correct
cmpl $ia32_sysenter_target,12(%esp)
je nmi_debug_stack_check
nmi_stack_correct:
/* We have a RING0_INT_FRAME here */
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
jmp restore_nocheck_notrace
CFI_ENDPROC
nmi_stack_fixup:
RING0_INT_FRAME
FIX_STACK(12,nmi_stack_correct, 1)
jmp nmi_stack_correct
nmi_debug_stack_check:
/* We have a RING0_INT_FRAME here */
cmpw $__KERNEL_CS,16(%esp)
jne nmi_stack_correct
cmpl $debug,(%esp)
jb nmi_stack_correct
cmpl $debug_esp_fix_insn,(%esp)
ja nmi_stack_correct
FIX_STACK(24,nmi_stack_correct, 1)
jmp nmi_stack_correct
nmi_espfix_stack:
/* We have a RING0_INT_FRAME here.
*
* create the pointer to lss back
*/
pushl %ss
CFI_ADJUST_CFA_OFFSET 4
pushl %esp
CFI_ADJUST_CFA_OFFSET 4
addw $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
pushl 16(%esp)
CFI_ADJUST_CFA_OFFSET 4
.endr
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
TRACE_IRQS_OFF
FIXUP_ESPFIX_STACK # %eax == %esp
xorl %edx,%edx # zero error code
call do_nmi
RESTORE_REGS
lss 12+4(%esp), %esp # back to espfix stack
CFI_ADJUST_CFA_OFFSET -24
jmp irq_return
CFI_ENDPROC
KPROBE_END(nmi)
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
iret
......@@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit)
END(native_irq_enable_sysexit)
#endif
KPROBE_ENTRY(int3)
RING0_INT_FRAME
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
jmp ret_from_exception
CFI_ENDPROC
KPROBE_END(int3)
ENTRY(overflow)
RING0_INT_FRAME
pushl $0
......@@ -993,14 +797,6 @@ ENTRY(stack_segment)
CFI_ENDPROC
END(stack_segment)
KPROBE_ENTRY(general_protection)
RING0_EC_FRAME
pushl $do_general_protection
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
KPROBE_END(general_protection)
ENTRY(alignment_check)
RING0_EC_FRAME
pushl $do_alignment_check
......@@ -1051,6 +847,7 @@ ENTRY(kernel_thread_helper)
push %eax
CFI_ADJUST_CFA_OFFSET 4
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
ENDPROC(kernel_thread_helper)
......@@ -1210,3 +1007,227 @@ END(mcount)
#include "syscall_table_32.S"
syscall_table_size=(.-sys_call_table)
/*
* Some functions should be protected against kprobes
*/
.pushsection .kprobes.text, "ax"
ENTRY(page_fault)
RING0_EC_FRAME
pushl $do_page_fault
CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
/* the function address is in %fs's slot on the stack */
pushl %es
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0*/
pushl %ds
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET ds, 0*/
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eax, 0
pushl %ebp
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebp, 0
pushl %edi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edi, 0
pushl %esi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET esi, 0
pushl %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0
pushl %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0
pushl %ebx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
cld
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
UNWIND_ESPFIX_STACK
popl %ecx
CFI_ADJUST_CFA_OFFSET -4
/*CFI_REGISTER es, ecx*/
movl PT_FS(%esp), %edi # get the function address
movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
mov %ecx, PT_FS(%esp)
/*CFI_REL_OFFSET fs, ES*/
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
TRACE_IRQS_OFF
movl %esp,%eax # pt_regs pointer
call *%edi
jmp ret_from_exception
CFI_ENDPROC
END(page_fault)
/*
* Debug traps and NMI can happen at the one SYSENTER instruction
* that sets up the real kernel stack. Check here, since we can't
* allow the wrong stack to be used.
*
* "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
* already pushed 3 words if it hits on the sysenter instruction:
* eflags, cs and eip.
*
* We just load the right stack, and push the three (known) values
* by hand onto the new stack - while updating the return eip past
* the instruction that would have done it for sysenter.
*/
#define FIX_STACK(offset, ok, label) \
cmpw $__KERNEL_CS,4(%esp); \
jne ok; \
label: \
movl TSS_sysenter_sp0+offset(%esp),%esp; \
CFI_DEF_CFA esp, 0; \
CFI_UNDEFINED eip; \
pushfl; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl $__KERNEL_CS; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl $sysenter_past_esp; \
CFI_ADJUST_CFA_OFFSET 4; \
CFI_REL_OFFSET eip, 0
ENTRY(debug)
RING0_INT_FRAME
cmpl $ia32_sysenter_target,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # error code 0
movl %esp,%eax # pt_regs pointer
call do_debug
jmp ret_from_exception
CFI_ENDPROC
END(debug)
/*
* NMI is doubly nasty. It can happen _while_ we're handling
* a debug fault, and the debug fault hasn't yet been able to
* clear up the stack. So we first check whether we got an
* NMI on the sysenter entry path, but after that we need to
* check whether we got an NMI on the debug path where the debug
* fault happened on the sysenter path.
*/
ENTRY(nmi)
RING0_INT_FRAME
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
popl %eax
CFI_ADJUST_CFA_OFFSET -4
je nmi_espfix_stack
cmpl $ia32_sysenter_target,(%esp)
je nmi_stack_fixup
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
movl %esp,%eax
/* Do not access memory above the end of our stack page,
* it might not exist.
*/
andl $(THREAD_SIZE-1),%eax
cmpl $(THREAD_SIZE-20),%eax
popl %eax
CFI_ADJUST_CFA_OFFSET -4
jae nmi_stack_correct
cmpl $ia32_sysenter_target,12(%esp)
je nmi_debug_stack_check
nmi_stack_correct:
/* We have a RING0_INT_FRAME here */
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
jmp restore_nocheck_notrace
CFI_ENDPROC
nmi_stack_fixup:
RING0_INT_FRAME
FIX_STACK(12,nmi_stack_correct, 1)
jmp nmi_stack_correct
nmi_debug_stack_check:
/* We have a RING0_INT_FRAME here */
cmpw $__KERNEL_CS,16(%esp)
jne nmi_stack_correct
cmpl $debug,(%esp)
jb nmi_stack_correct
cmpl $debug_esp_fix_insn,(%esp)
ja nmi_stack_correct
FIX_STACK(24,nmi_stack_correct, 1)
jmp nmi_stack_correct
nmi_espfix_stack:
/* We have a RING0_INT_FRAME here.
*
* create the pointer to lss back
*/
pushl %ss
CFI_ADJUST_CFA_OFFSET 4
pushl %esp
CFI_ADJUST_CFA_OFFSET 4
addw $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
pushl 16(%esp)
CFI_ADJUST_CFA_OFFSET 4
.endr
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
TRACE_IRQS_OFF
FIXUP_ESPFIX_STACK # %eax == %esp
xorl %edx,%edx # zero error code
call do_nmi
RESTORE_REGS
lss 12+4(%esp), %esp # back to espfix stack
CFI_ADJUST_CFA_OFFSET -24
jmp irq_return
CFI_ENDPROC
END(nmi)
ENTRY(int3)
RING0_INT_FRAME
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
jmp ret_from_exception
CFI_ENDPROC
END(int3)
ENTRY(general_protection)
RING0_EC_FRAME
pushl $do_general_protection
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
END(general_protection)
/*
* End of kprobes section
*/
.popsection
......@@ -11,15 +11,15 @@
*
* NOTE: This code handles signal-recognition, which happens every time
* after an interrupt and after each system call.
*
* Normal syscalls and interrupts don't save a full stack frame, this is
*
* Normal syscalls and interrupts don't save a full stack frame, this is
* only done for syscall tracing, signals or fork/exec et.al.
*
* A note on terminology:
* - top of stack: Architecture defined interrupt frame from SS to RIP
* at the top of the kernel process stack.
*
* A note on terminology:
* - top of stack: Architecture defined interrupt frame from SS to RIP
* at the top of the kernel process stack.
* - partial stack frame: partially saved registers upto R11.
* - full stack frame: Like partial stack frame, but all register saved.
* - full stack frame: Like partial stack frame, but all register saved.
*
* Some macro usage:
* - CFI macros are used to generate dwarf2 unwind information for better
......@@ -60,7 +60,6 @@
#define __AUDIT_ARCH_LE 0x40000000
.code64
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
......@@ -142,7 +141,7 @@ END(mcount)
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
#endif
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret64)
......@@ -161,29 +160,29 @@ ENTRY(native_usergs_sysret64)
.endm
/*
* C code is not supposed to know about undefined top of stack. Every time
* a C function with an pt_regs argument is called from the SYSCALL based
* C code is not supposed to know about undefined top of stack. Every time
* a C function with an pt_regs argument is called from the SYSCALL based
* fast path FIXUP_TOP_OF_STACK is needed.
* RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
* manipulation.
*/
/* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp
movq %gs:pda_oldrsp,\tmp
movq \tmp,RSP(%rsp)
movq $__USER_DS,SS(%rsp)
movq $__USER_CS,CS(%rsp)
movq $-1,RCX(%rsp)
movq R11(%rsp),\tmp /* get eflags */
movq \tmp,EFLAGS(%rsp)
*/
/* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp offset=0
movq %gs:pda_oldrsp,\tmp
movq \tmp,RSP+\offset(%rsp)
movq $__USER_DS,SS+\offset(%rsp)
movq $__USER_CS,CS+\offset(%rsp)
movq $-1,RCX+\offset(%rsp)
movq R11+\offset(%rsp),\tmp /* get eflags */
movq \tmp,EFLAGS+\offset(%rsp)
.endm
.macro RESTORE_TOP_OF_STACK tmp,offset=0
movq RSP-\offset(%rsp),\tmp
movq \tmp,%gs:pda_oldrsp
movq EFLAGS-\offset(%rsp),\tmp
movq \tmp,R11-\offset(%rsp)
.macro RESTORE_TOP_OF_STACK tmp offset=0
movq RSP+\offset(%rsp),\tmp
movq \tmp,%gs:pda_oldrsp
movq EFLAGS+\offset(%rsp),\tmp
movq \tmp,R11+\offset(%rsp)
.endm
.macro FAKE_STACK_FRAME child_rip
......@@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64)
pushq %rax /* rsp */
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rsp,0
pushq $(1<<9) /* eflags - interrupts on */
pushq $X86_EFLAGS_IF /* eflags - interrupts on */
CFI_ADJUST_CFA_OFFSET 8
/*CFI_REL_OFFSET rflags,0*/
pushq $__KERNEL_CS /* cs */
......@@ -213,62 +212,184 @@ ENTRY(native_usergs_sysret64)
CFI_ADJUST_CFA_OFFSET -(6*8)
.endm
.macro CFI_DEFAULT_STACK start=1
/*
* initial frame state for interrupts (and exceptions without error code)
*/
.macro EMPTY_FRAME start=1 offset=0
.if \start
CFI_STARTPROC simple
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8
CFI_DEF_CFA rsp,8+\offset
.else
CFI_DEF_CFA_OFFSET SS+8
CFI_DEF_CFA_OFFSET 8+\offset
.endif
CFI_REL_OFFSET r15,R15
CFI_REL_OFFSET r14,R14
CFI_REL_OFFSET r13,R13
CFI_REL_OFFSET r12,R12
CFI_REL_OFFSET rbp,RBP
CFI_REL_OFFSET rbx,RBX
CFI_REL_OFFSET r11,R11
CFI_REL_OFFSET r10,R10
CFI_REL_OFFSET r9,R9
CFI_REL_OFFSET r8,R8
CFI_REL_OFFSET rax,RAX
CFI_REL_OFFSET rcx,RCX
CFI_REL_OFFSET rdx,RDX
CFI_REL_OFFSET rsi,RSI
CFI_REL_OFFSET rdi,RDI
CFI_REL_OFFSET rip,RIP
/*CFI_REL_OFFSET cs,CS*/
/*CFI_REL_OFFSET rflags,EFLAGS*/
CFI_REL_OFFSET rsp,RSP
/*CFI_REL_OFFSET ss,SS*/
.endm
/*
* initial frame state for interrupts (and exceptions without error code)
*/
.macro INTR_FRAME start=1 offset=0
EMPTY_FRAME \start, SS+8+\offset-RIP
/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
CFI_REL_OFFSET rsp, RSP+\offset-RIP
/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
CFI_REL_OFFSET rip, RIP+\offset-RIP
.endm
/*
* initial frame state for exceptions with error code (and interrupts
* with vector already pushed)
*/
.macro XCPT_FRAME start=1 offset=0
INTR_FRAME \start, RIP+\offset-ORIG_RAX
/*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
.endm
/*
* frame that enables calling into C.
*/
.macro PARTIAL_FRAME start=1 offset=0
XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
.endm
/*
* frame that enables passing a complete pt_regs to a C function.
*/
.macro DEFAULT_FRAME start=1 offset=0
PARTIAL_FRAME \start, R11+\offset-R15
CFI_REL_OFFSET rbx, RBX+\offset
CFI_REL_OFFSET rbp, RBP+\offset
CFI_REL_OFFSET r12, R12+\offset
CFI_REL_OFFSET r13, R13+\offset
CFI_REL_OFFSET r14, R14+\offset
CFI_REL_OFFSET r15, R15+\offset
.endm
/* save partial stack frame */
ENTRY(save_args)
XCPT_FRAME
cld
movq_cfi rdi, RDI+16-ARGOFFSET
movq_cfi rsi, RSI+16-ARGOFFSET
movq_cfi rdx, RDX+16-ARGOFFSET
movq_cfi rcx, RCX+16-ARGOFFSET
movq_cfi rax, RAX+16-ARGOFFSET
movq_cfi r8, R8+16-ARGOFFSET
movq_cfi r9, R9+16-ARGOFFSET
movq_cfi r10, R10+16-ARGOFFSET
movq_cfi r11, R11+16-ARGOFFSET
leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
movq_cfi rbp, 8 /* push %rbp */
leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
testl $3, CS(%rdi)
je 1f
SWAPGS
/*
* irqcount is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
* a little cheaper to use a separate counter in the PDA (short of
* moving irq_enter into assembly, which would be too much work)
*/
1: incl %gs:pda_irqcount
jne 2f
popq_cfi %rax /* move return address... */
mov %gs:pda_irqstackptr,%rsp
EMPTY_FRAME 0
pushq_cfi %rax /* ... to the new stack */
/*
* We entered an interrupt context - irqs are off:
*/
2: TRACE_IRQS_OFF
ret
CFI_ENDPROC
END(save_args)
ENTRY(save_rest)
PARTIAL_FRAME 1 REST_SKIP+8
movq 5*8+16(%rsp), %r11 /* save return address */
movq_cfi rbx, RBX+16
movq_cfi rbp, RBP+16
movq_cfi r12, R12+16
movq_cfi r13, R13+16
movq_cfi r14, R14+16
movq_cfi r15, R15+16
movq %r11, 8(%rsp) /* return address */
FIXUP_TOP_OF_STACK %r11, 16
ret
CFI_ENDPROC
END(save_rest)
/* save complete stack frame */
ENTRY(save_paranoid)
XCPT_FRAME 1 RDI+8
cld
movq_cfi rdi, RDI+8
movq_cfi rsi, RSI+8
movq_cfi rdx, RDX+8
movq_cfi rcx, RCX+8
movq_cfi rax, RAX+8
movq_cfi r8, R8+8
movq_cfi r9, R9+8
movq_cfi r10, R10+8
movq_cfi r11, R11+8
movq_cfi rbx, RBX+8
movq_cfi rbp, RBP+8
movq_cfi r12, R12+8
movq_cfi r13, R13+8
movq_cfi r14, R14+8
movq_cfi r15, R15+8
movl $1,%ebx
movl $MSR_GS_BASE,%ecx
rdmsr
testl %edx,%edx
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx,%ebx
1: ret
CFI_ENDPROC
END(save_paranoid)
/*
* A newly forked process directly context switches into this.
*/
/* rdi: prev */
* A newly forked process directly context switches into this address.
*
* rdi: prev task we switched from
*/
ENTRY(ret_from_fork)
CFI_DEFAULT_STACK
DEFAULT_FRAME
push kernel_eflags(%rip)
CFI_ADJUST_CFA_OFFSET 8
popf # reset kernel eflags
popf # reset kernel eflags
CFI_ADJUST_CFA_OFFSET -8
call schedule_tail
call schedule_tail # rdi: 'prev' task parameter
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
jnz rff_trace
rff_action:
CFI_REMEMBER_STATE
RESTORE_REST
testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
je int_ret_from_sys_call
testl $_TIF_IA32,TI_flags(%rcx)
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
jmp ret_from_sys_call
rff_trace:
movq %rsp,%rdi
call syscall_trace_leave
GET_THREAD_INFO(%rcx)
jmp rff_action
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call # go to the SYSRET fastpath
CFI_RESTORE_STATE
CFI_ENDPROC
END(ret_from_fork)
......@@ -278,20 +399,20 @@ END(ret_from_fork)
* SYSCALL does not save anything on the stack and does not change the
* stack pointer.
*/
/*
* Register setup:
* Register setup:
* rax system call number
* rdi arg0
* rcx return address for syscall/sysret, C arg3
* rcx return address for syscall/sysret, C arg3
* rsi arg1
* rdx arg2
* rdx arg2
* r10 arg3 (--> moved to rcx for C)
* r8 arg4
* r9 arg5
* r11 eflags for syscall/sysret, temporary for C
* r12-r15,rbp,rbx saved by C code, not touched.
*
* r12-r15,rbp,rbx saved by C code, not touched.
*
* Interrupts are off on entry.
* Only called from user space.
*
......@@ -301,7 +422,7 @@ END(ret_from_fork)
* When user can change the frames always force IRET. That is because
* it deals with uncanonical addresses better. SYSRET has trouble
* with them due to bugs in both AMD and Intel CPUs.
*/
*/
ENTRY(system_call)
CFI_STARTPROC simple
......@@ -317,7 +438,7 @@ ENTRY(system_call)
*/
ENTRY(system_call_after_swapgs)
movq %rsp,%gs:pda_oldrsp
movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
/*
* No need to follow this irqs off/on section - it's straight
......@@ -325,7 +446,7 @@ ENTRY(system_call_after_swapgs)
*/
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,1
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
......@@ -339,19 +460,19 @@ system_call_fastpath:
movq %rax,RAX-ARGOFFSET(%rsp)
/*
* Syscall return path ending with SYSRET (fast path)
* Has incomplete stack frame and undefined top of stack.
*/
* Has incomplete stack frame and undefined top of stack.
*/
ret_from_sys_call:
movl $_TIF_ALLWORK_MASK,%edi
/* edi: flagmask */
sysret_check:
sysret_check:
LOCKDEP_SYS_EXIT
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl TI_flags(%rcx),%edx
andl %edi,%edx
jnz sysret_careful
jnz sysret_careful
CFI_REMEMBER_STATE
/*
* sysretq will re-enable interrupts:
......@@ -366,7 +487,7 @@ sysret_check:
CFI_RESTORE_STATE
/* Handle reschedules */
/* edx: work, edi: workmask */
/* edx: work, edi: workmask */
sysret_careful:
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
......@@ -379,7 +500,7 @@ sysret_careful:
CFI_ADJUST_CFA_OFFSET -8
jmp sysret_check
/* Handle a signal */
/* Handle a signal */
sysret_signal:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
......@@ -388,17 +509,20 @@ sysret_signal:
jc sysret_audit
#endif
/* edx: work flags (arg3) */
leaq do_notify_resume(%rip),%rax
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
xorl %esi,%esi # oldset -> arg2
call ptregscall_common
SAVE_REST
FIXUP_TOP_OF_STACK %r11
call do_notify_resume
RESTORE_TOP_OF_STACK %r11
RESTORE_REST
movl $_TIF_WORK_MASK,%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
jmp ret_from_sys_call
......@@ -437,7 +561,7 @@ sysret_audit:
#endif /* CONFIG_AUDITSYSCALL */
/* Do syscall tracing */
tracesys:
tracesys:
#ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
jz auditsys
......@@ -460,8 +584,8 @@ tracesys:
call *sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
/*
/*
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
......@@ -505,18 +629,18 @@ int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
/* Check for syscall exit trace */
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
leaq 8(%rsp),%rdi # &ptregs -> arg1
leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
jmp int_restore_rest
int_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz 1f
......@@ -531,22 +655,24 @@ int_restore_rest:
jmp int_with_check
CFI_ENDPROC
END(system_call)
/*
/*
* Certain special system calls that need to save a complete full stack frame.
*/
*/
.macro PTREGSCALL label,func,arg
.globl \label
\label:
leaq \func(%rip),%rax
leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
jmp ptregscall_common
ENTRY(\label)
PARTIAL_FRAME 1 8 /* offset 8: return address */
subq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET REST_SKIP
call save_rest
DEFAULT_FRAME 0 8 /* offset 8: return address */
leaq 8(%rsp), \arg /* pt_regs pointer */
call \func
jmp ptregscall_common
CFI_ENDPROC
END(\label)
.endm
CFI_STARTPROC
PTREGSCALL stub_clone, sys_clone, %r8
PTREGSCALL stub_fork, sys_fork, %rdi
PTREGSCALL stub_vfork, sys_vfork, %rdi
......@@ -554,25 +680,18 @@ END(\label)
PTREGSCALL stub_iopl, sys_iopl, %rsi
ENTRY(ptregscall_common)
popq %r11
CFI_ADJUST_CFA_OFFSET -8
CFI_REGISTER rip, r11
SAVE_REST
movq %r11, %r15
CFI_REGISTER rip, r15
FIXUP_TOP_OF_STACK %r11
call *%rax
RESTORE_TOP_OF_STACK %r11
movq %r15, %r11
CFI_REGISTER rip, r11
RESTORE_REST
pushq %r11
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rip, 0
ret
DEFAULT_FRAME 1 8 /* offset 8: return address */
RESTORE_TOP_OF_STACK %r11, 8
movq_cfi_restore R15+8, r15
movq_cfi_restore R14+8, r14
movq_cfi_restore R13+8, r13
movq_cfi_restore R12+8, r12
movq_cfi_restore RBP+8, rbp
movq_cfi_restore RBX+8, rbx
ret $REST_SKIP /* pop extended registers */
CFI_ENDPROC
END(ptregscall_common)
ENTRY(stub_execve)
CFI_STARTPROC
popq %r11
......@@ -588,11 +707,11 @@ ENTRY(stub_execve)
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_execve)
/*
* sigreturn is special because it needs to restore all registers on return.
* This cannot be done with SYSRET, so use the IRET return path instead.
*/
*/
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
......@@ -608,70 +727,70 @@ ENTRY(stub_rt_sigreturn)
END(stub_rt_sigreturn)
/*
* initial frame state for interrupts and exceptions
* Build the entry stubs and pointer table with some assembler magic.
* We pack 7 stubs into a single 32-byte chunk, which will fit in a
* single cache line on all modern x86 implementations.
*/
.macro _frame ref
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8-\ref
/*CFI_REL_OFFSET ss,SS-\ref*/
CFI_REL_OFFSET rsp,RSP-\ref
/*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
/*CFI_REL_OFFSET cs,CS-\ref*/
CFI_REL_OFFSET rip,RIP-\ref
.endm
.section .init.rodata,"a"
ENTRY(interrupt)
.text
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
INTR_FRAME
vector=FIRST_EXTERNAL_VECTOR
.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
.balign 32
.rept 7
.if vector < NR_VECTORS
.if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -8
.endif
1: pushq $(~vector+0x80) /* Note: always in signed byte range */
CFI_ADJUST_CFA_OFFSET 8
.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
jmp 2f
.endif
.previous
.quad 1b
.text
vector=vector+1
.endif
.endr
2: jmp common_interrupt
.endr
CFI_ENDPROC
END(irq_entries_start)
/* initial frame state for interrupts (and exceptions without error code) */
#define INTR_FRAME _frame RIP
/* initial frame state for exceptions with error code (and interrupts with
vector already pushed) */
#define XCPT_FRAME _frame ORIG_RAX
.previous
END(interrupt)
.previous
/*
/*
* Interrupt entry/exit.
*
* Interrupt entry points save only callee clobbered registers in fast path.
*
* Entry runs with interrupts off.
*/
*
* Entry runs with interrupts off.
*/
/* 0(%rsp): interrupt number */
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
cld
SAVE_ARGS
leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
pushq %rbp
/*
* Save rbp twice: One is for marking the stack frame, as usual, and the
* other, to fill pt_regs properly. This is because bx comes right
* before the last saved register in that structure, and not bp. If the
* base pointer were in the place bx is today, this would not be needed.
*/
movq %rbp, -8(%rsp)
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rbp, 0
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
testl $3,CS(%rdi)
je 1f
SWAPGS
/* irqcount is used to check if a CPU is already on an interrupt
stack or not. While this is essentially redundant with preempt_count
it is a little cheaper to use a separate counter in the PDA
(short of moving irq_enter into assembly, which would be too
much work) */
1: incl %gs:pda_irqcount
cmoveq %gs:pda_irqstackptr,%rsp
push %rbp # backlink for old unwinder
/*
* We entered an interrupt context - irqs are off:
*/
TRACE_IRQS_OFF
subq $10*8, %rsp
CFI_ADJUST_CFA_OFFSET 10*8
call save_args
PARTIAL_FRAME 0
call \func
.endm
ENTRY(common_interrupt)
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
* then jump to common_interrupt.
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
XCPT_FRAME
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
ret_from_intr:
......@@ -685,12 +804,12 @@ exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
je retint_kernel
/* Interrupt came from user space */
/*
* Has a correct top of stack, but a partial stack frame
* %rcx: thread info. Interrupts off.
*/
*/
retint_with_reschedule:
movl $_TIF_WORK_MASK,%edi
retint_check:
......@@ -763,20 +882,20 @@ retint_careful:
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp retint_check
retint_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz retint_swapgs
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
movq $-1,ORIG_RAX(%rsp)
movq $-1,ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
......@@ -798,324 +917,211 @@ ENTRY(retint_kernel)
jnc retint_restore_args
call preempt_schedule_irq
jmp exit_intr
#endif
#endif
CFI_ENDPROC
END(common_interrupt)
/*
* APIC interrupts.
*/
.macro apicinterrupt num,func
*/
.macro apicinterrupt num sym do_sym
ENTRY(\sym)
INTR_FRAME
pushq $~(\num)
CFI_ADJUST_CFA_OFFSET 8
interrupt \func
interrupt \do_sym
jmp ret_from_intr
CFI_ENDPROC
.endm
ENTRY(thermal_interrupt)
apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
END(thermal_interrupt)
ENTRY(threshold_interrupt)
apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
END(threshold_interrupt)
#ifdef CONFIG_SMP
ENTRY(reschedule_interrupt)
apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
END(reschedule_interrupt)
.macro INVALIDATE_ENTRY num
ENTRY(invalidate_interrupt\num)
apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
END(invalidate_interrupt\num)
.endm
END(\sym)
.endm
INVALIDATE_ENTRY 0
INVALIDATE_ENTRY 1
INVALIDATE_ENTRY 2
INVALIDATE_ENTRY 3
INVALIDATE_ENTRY 4
INVALIDATE_ENTRY 5
INVALIDATE_ENTRY 6
INVALIDATE_ENTRY 7
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
END(call_function_interrupt)
ENTRY(call_function_single_interrupt)
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
END(call_function_single_interrupt)
ENTRY(irq_move_cleanup_interrupt)
apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
END(irq_move_cleanup_interrupt)
#ifdef CONFIG_SMP
apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
#endif
ENTRY(apic_timer_interrupt)
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
END(apic_timer_interrupt)
apicinterrupt UV_BAU_MESSAGE \
uv_bau_message_intr1 uv_bau_message_interrupt
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
#ifdef CONFIG_SMP
apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
invalidate_interrupt0 smp_invalidate_interrupt
apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
invalidate_interrupt1 smp_invalidate_interrupt
apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
invalidate_interrupt2 smp_invalidate_interrupt
apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
invalidate_interrupt3 smp_invalidate_interrupt
apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
invalidate_interrupt4 smp_invalidate_interrupt
apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
invalidate_interrupt5 smp_invalidate_interrupt
apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
invalidate_interrupt6 smp_invalidate_interrupt
apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
invalidate_interrupt7 smp_invalidate_interrupt
#endif
ENTRY(uv_bau_message_intr1)
apicinterrupt 220,uv_bau_message_interrupt
END(uv_bau_message_intr1)
apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt mce_threshold_interrupt
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
call_function_single_interrupt smp_call_function_single_interrupt
apicinterrupt CALL_FUNCTION_VECTOR \
call_function_interrupt smp_call_function_interrupt
apicinterrupt RESCHEDULE_VECTOR \
reschedule_interrupt smp_reschedule_interrupt
#endif
ENTRY(error_interrupt)
apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
END(error_interrupt)
apicinterrupt ERROR_APIC_VECTOR \
error_interrupt smp_error_interrupt
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
ENTRY(spurious_interrupt)
apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
END(spurious_interrupt)
/*
* Exception entry points.
*/
.macro zeroentry sym
*/
.macro zeroentry sym do_sym
ENTRY(\sym)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0 /* push error code/oldrax */
CFI_ADJUST_CFA_OFFSET 8
pushq %rax /* push real oldrax to the rdi slot */
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rax,0
leaq \sym(%rip),%rax
jmp error_entry
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $15*8,%rsp
CFI_ADJUST_CFA_OFFSET 15*8
call error_entry
DEFAULT_FRAME 0
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
call \do_sym
jmp error_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
.endm
END(\sym)
.endm
.macro errorentry sym
XCPT_FRAME
.macro paranoidzeroentry sym do_sym
ENTRY(\sym)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq %rax
pushq $-1 /* ORIG_RAX: no syscall to restart */
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rax,0
leaq \sym(%rip),%rax
jmp error_entry
subq $15*8, %rsp
call save_paranoid
TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
call \do_sym
jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
.endm
END(\sym)
.endm
/* error code is on the stack already */
/* handle NMI like exceptions that can happen everywhere */
.macro paranoidentry sym, ist=0, irqtrace=1
SAVE_ALL
cld
movl $1,%ebx
movl $MSR_GS_BASE,%ecx
rdmsr
testl %edx,%edx
js 1f
SWAPGS
xorl %ebx,%ebx
1:
.if \ist
movq %gs:pda_data_offset, %rbp
.endif
.if \irqtrace
TRACE_IRQS_OFF
.endif
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi
movq $-1,ORIG_RAX(%rsp)
.if \ist
subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
.endif
call \sym
.if \ist
addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
.endif
DISABLE_INTERRUPTS(CLBR_NONE)
.if \irqtrace
.macro paranoidzeroentry_ist sym do_sym ist
ENTRY(\sym)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $-1 /* ORIG_RAX: no syscall to restart */
CFI_ADJUST_CFA_OFFSET 8
subq $15*8, %rsp
call save_paranoid
TRACE_IRQS_OFF
.endif
.endm
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
movq %gs:pda_data_offset, %rbp
subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
call \do_sym
addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
END(\sym)
.endm
/*
* "Paranoid" exit path from exception stack.
* Paranoid because this is used by NMIs and cannot take
* any kernel state for granted.
* We don't do kernel preemption checks here, because only
* NMI should be common and it does not enable IRQs and
* cannot get reschedule ticks.
*
* "trace" is 0 for the NMI handler only, because irq-tracing
* is fundamentally NMI-unsafe. (we cannot change the soft and
* hard flags at once, atomically)
*/
.macro paranoidexit trace=1
/* ebx: no swapgs flag */
paranoid_exit\trace:
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore\trace
testl $3,CS(%rsp)
jnz paranoid_userspace\trace
paranoid_swapgs\trace:
.if \trace
TRACE_IRQS_IRETQ 0
.endif
SWAPGS_UNSAFE_STACK
paranoid_restore\trace:
RESTORE_ALL 8
jmp irq_return
paranoid_userspace\trace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
jz paranoid_swapgs\trace
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
testl $_TIF_NEED_RESCHED,%ebx
jnz paranoid_schedule\trace
movl %ebx,%edx /* arg3: thread flags */
.if \trace
TRACE_IRQS_ON
.endif
ENABLE_INTERRUPTS(CLBR_NONE)
xorl %esi,%esi /* arg2: oldset */
movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
DISABLE_INTERRUPTS(CLBR_NONE)
.if \trace
TRACE_IRQS_OFF
.endif
jmp paranoid_userspace\trace
paranoid_schedule\trace:
.if \trace
TRACE_IRQS_ON
.endif
ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
DISABLE_INTERRUPTS(CLBR_ANY)
.if \trace
TRACE_IRQS_OFF
.endif
jmp paranoid_userspace\trace
.macro errorentry sym do_sym
ENTRY(\sym)
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
subq $15*8,%rsp
CFI_ADJUST_CFA_OFFSET 15*8
call error_entry
DEFAULT_FRAME 0
movq %rsp,%rdi /* pt_regs pointer */
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
call \do_sym
jmp error_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
.endm
END(\sym)
.endm
/*
* Exception entry point. This expects an error code/orig_rax on the stack
* and the exception handler in %rax.
*/
KPROBE_ENTRY(error_entry)
_frame RDI
CFI_REL_OFFSET rax,0
/* rdi slot contains rax, oldrax contains error code */
cld
subq $14*8,%rsp
CFI_ADJUST_CFA_OFFSET (14*8)
movq %rsi,13*8(%rsp)
CFI_REL_OFFSET rsi,RSI
movq 14*8(%rsp),%rsi /* load rax from rdi slot */
CFI_REGISTER rax,rsi
movq %rdx,12*8(%rsp)
CFI_REL_OFFSET rdx,RDX
movq %rcx,11*8(%rsp)
CFI_REL_OFFSET rcx,RCX
movq %rsi,10*8(%rsp) /* store rax */
CFI_REL_OFFSET rax,RAX
movq %r8, 9*8(%rsp)
CFI_REL_OFFSET r8,R8
movq %r9, 8*8(%rsp)
CFI_REL_OFFSET r9,R9
movq %r10,7*8(%rsp)
CFI_REL_OFFSET r10,R10
movq %r11,6*8(%rsp)
CFI_REL_OFFSET r11,R11
movq %rbx,5*8(%rsp)
CFI_REL_OFFSET rbx,RBX
movq %rbp,4*8(%rsp)
CFI_REL_OFFSET rbp,RBP
movq %r12,3*8(%rsp)
CFI_REL_OFFSET r12,R12
movq %r13,2*8(%rsp)
CFI_REL_OFFSET r13,R13
movq %r14,1*8(%rsp)
CFI_REL_OFFSET r14,R14
movq %r15,(%rsp)
CFI_REL_OFFSET r15,R15
xorl %ebx,%ebx
testl $3,CS(%rsp)
je error_kernelspace
error_swapgs:
SWAPGS
error_sti:
TRACE_IRQS_OFF
movq %rdi,RDI(%rsp)
CFI_REL_OFFSET rdi,RDI
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
call *%rax
/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
error_exit:
movl %ebx,%eax
RESTORE_REST
DISABLE_INTERRUPTS(CLBR_NONE)
/* error code is on the stack already */
.macro paranoiderrorentry sym do_sym
ENTRY(\sym)
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
subq $15*8,%rsp
CFI_ADJUST_CFA_OFFSET 15*8
call save_paranoid
DEFAULT_FRAME 0
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
jne retint_kernel
LOCKDEP_SYS_EXIT_IRQ
movl TI_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
andl %edi,%edx
jnz retint_careful
jmp retint_swapgs
movq %rsp,%rdi /* pt_regs pointer */
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
call \do_sym
jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
END(\sym)
.endm
error_kernelspace:
incl %ebx
/* There are two places in the kernel that can potentially fault with
usergs. Handle them here. The exception handlers after
iret run with kernel gs again, so don't set the user space flag.
B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */
leaq irq_return(%rip),%rcx
cmpq %rcx,RIP(%rsp)
je error_swapgs
movl %ecx,%ecx /* zero extend */
cmpq %rcx,RIP(%rsp)
je error_swapgs
cmpq $gs_change,RIP(%rsp)
je error_swapgs
jmp error_sti
KPROBE_END(error_entry)
/* Reload gs selector with exception handling */
/* edi: new selector */
zeroentry divide_error do_divide_error
zeroentry overflow do_overflow
zeroentry bounds do_bounds
zeroentry invalid_op do_invalid_op
zeroentry device_not_available do_device_not_available
paranoiderrorentry double_fault do_double_fault
zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
errorentry invalid_TSS do_invalid_TSS
errorentry segment_not_present do_segment_not_present
zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
zeroentry coprocessor_error do_coprocessor_error
errorentry alignment_check do_alignment_check
zeroentry simd_coprocessor_error do_simd_coprocessor_error
/* Reload gs selector with exception handling */
/* edi: new selector */
ENTRY(native_load_gs_index)
CFI_STARTPROC
pushf
CFI_ADJUST_CFA_OFFSET 8
DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
SWAPGS
gs_change:
movl %edi,%gs
SWAPGS
gs_change:
movl %edi,%gs
2: mfence /* workaround */
SWAPGS
popf
popf
CFI_ADJUST_CFA_OFFSET -8
ret
ret
CFI_ENDPROC
ENDPROC(native_load_gs_index)
.section __ex_table,"a"
.align 8
.quad gs_change,bad_gs
.previous
.section .fixup,"ax"
END(native_load_gs_index)
.section __ex_table,"a"
.align 8
.quad gs_change,bad_gs
.previous
.section .fixup,"ax"
/* running with kernelgs */
bad_gs:
bad_gs:
SWAPGS /* switch back to user gs */
xorl %eax,%eax
movl %eax,%gs
jmp 2b
.previous
movl %eax,%gs
jmp 2b
.previous
/*
* Create a kernel thread.
*
......@@ -1138,7 +1144,7 @@ ENTRY(kernel_thread)
xorl %r8d,%r8d
xorl %r9d,%r9d
# clone now
call do_fork
movq %rax,RAX(%rsp)
......@@ -1149,15 +1155,15 @@ ENTRY(kernel_thread)
* so internally to the x86_64 port you can rely on kernel_thread()
* not to reschedule the child before returning, this avoids the need
* of hacks for example to fork off the per-CPU idle tasks.
* [Hopefully no generic code relies on the reschedule -AK]
* [Hopefully no generic code relies on the reschedule -AK]
*/
RESTORE_ALL
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
ENDPROC(kernel_thread)
child_rip:
END(kernel_thread)
ENTRY(child_rip)
pushq $0 # fake return address
CFI_STARTPROC
/*
......@@ -1170,8 +1176,9 @@ child_rip:
# exit
mov %eax, %edi
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
ENDPROC(child_rip)
END(child_rip)
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
......@@ -1191,10 +1198,10 @@ ENDPROC(child_rip)
ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
SAVE_ALL
SAVE_ALL
movq %rsp,%rcx
call sys_execve
movq %rax, RAX(%rsp)
movq %rax, RAX(%rsp)
RESTORE_REST
testq %rax,%rax
je int_ret_from_sys_call
......@@ -1202,129 +1209,7 @@ ENTRY(kernel_execve)
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
ENDPROC(kernel_execve)
KPROBE_ENTRY(page_fault)
errorentry do_page_fault
KPROBE_END(page_fault)
ENTRY(coprocessor_error)
zeroentry do_coprocessor_error
END(coprocessor_error)
ENTRY(simd_coprocessor_error)
zeroentry do_simd_coprocessor_error
END(simd_coprocessor_error)
ENTRY(device_not_available)
zeroentry do_device_not_available
END(device_not_available)
/* runs on exception stack */
KPROBE_ENTRY(debug)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug, DEBUG_STACK
paranoidexit
KPROBE_END(debug)
/* runs on exception stack */
KPROBE_ENTRY(nmi)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $-1
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_nmi, 0, 0
#ifdef CONFIG_TRACE_IRQFLAGS
paranoidexit 0
#else
jmp paranoid_exit1
CFI_ENDPROC
#endif
KPROBE_END(nmi)
KPROBE_ENTRY(int3)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_int3, DEBUG_STACK
jmp paranoid_exit1
CFI_ENDPROC
KPROBE_END(int3)
ENTRY(overflow)
zeroentry do_overflow
END(overflow)
ENTRY(bounds)
zeroentry do_bounds
END(bounds)
ENTRY(invalid_op)
zeroentry do_invalid_op
END(invalid_op)
ENTRY(coprocessor_segment_overrun)
zeroentry do_coprocessor_segment_overrun
END(coprocessor_segment_overrun)
/* runs on exception stack */
ENTRY(double_fault)
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
paranoidentry do_double_fault
jmp paranoid_exit1
CFI_ENDPROC
END(double_fault)
ENTRY(invalid_TSS)
errorentry do_invalid_TSS
END(invalid_TSS)
ENTRY(segment_not_present)
errorentry do_segment_not_present
END(segment_not_present)
/* runs on exception stack */
ENTRY(stack_segment)
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
paranoidentry do_stack_segment
jmp paranoid_exit1
CFI_ENDPROC
END(stack_segment)
KPROBE_ENTRY(general_protection)
errorentry do_general_protection
KPROBE_END(general_protection)
ENTRY(alignment_check)
errorentry do_alignment_check
END(alignment_check)
ENTRY(divide_error)
zeroentry do_divide_error
END(divide_error)
ENTRY(spurious_interrupt_bug)
zeroentry do_spurious_interrupt_bug
END(spurious_interrupt_bug)
#ifdef CONFIG_X86_MCE
/* runs on exception stack */
ENTRY(machine_check)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_machine_check
jmp paranoid_exit1
CFI_ENDPROC
END(machine_check)
#endif
END(kernel_execve)
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
......@@ -1344,40 +1229,33 @@ ENTRY(call_softirq)
decl %gs:pda_irqcount
ret
CFI_ENDPROC
ENDPROC(call_softirq)
KPROBE_ENTRY(ignore_sysret)
CFI_STARTPROC
mov $-ENOSYS,%eax
sysret
CFI_ENDPROC
ENDPROC(ignore_sysret)
END(call_softirq)
#ifdef CONFIG_XEN
ENTRY(xen_hypervisor_callback)
zeroentry xen_do_hypervisor_callback
END(xen_hypervisor_callback)
zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
/*
# A note on the "critical region" in our callback handler.
# We want to avoid stacking callback handlers due to events occurring
# during handling of the last event. To do this, we keep events disabled
# until we've done all processing. HOWEVER, we must enable events before
# popping the stack frame (can't be done atomically) and so it would still
# be possible to get enough handler activations to overflow the stack.
# Although unlikely, bugs of that kind are hard to track down, so we'd
# like to avoid the possibility.
# So, on entry to the handler we detect whether we interrupted an
# existing activation in its critical region -- if so, we pop the current
# activation and restart the handler using the previous one.
*/
* A note on the "critical region" in our callback handler.
* We want to avoid stacking callback handlers due to events occurring
* during handling of the last event. To do this, we keep events disabled
* until we've done all processing. HOWEVER, we must enable events before
* popping the stack frame (can't be done atomically) and so it would still
* be possible to get enough handler activations to overflow the stack.
* Although unlikely, bugs of that kind are hard to track down, so we'd
* like to avoid the possibility.
* So, on entry to the handler we detect whether we interrupted an
* existing activation in its critical region -- if so, we pop the current
* activation and restart the handler using the previous one.
*/
ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
CFI_STARTPROC
/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
see the correct pointer to the pt_regs */
/*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
* see the correct pointer to the pt_regs
*/
movq %rdi, %rsp # we don't return, adjust the stack frame
CFI_ENDPROC
CFI_DEFAULT_STACK
DEFAULT_FRAME
11: incl %gs:pda_irqcount
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
......@@ -1392,23 +1270,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
END(do_hypervisor_callback)
/*
# Hypervisor uses this for application faults while it executes.
# We get here for two reasons:
# 1. Fault while reloading DS, ES, FS or GS
# 2. Fault while executing IRET
# Category 1 we do not need to fix up as Xen has already reloaded all segment
# registers that could be reloaded and zeroed the others.
# Category 2 we fix up by killing the current process. We cannot use the
# normal Linux return path in this case because if we use the IRET hypercall
# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
# We distinguish between categories by comparing each saved segment register
# with its current contents: any discrepancy means we in category 1.
*/
* Hypervisor uses this for application faults while it executes.
* We get here for two reasons:
* 1. Fault while reloading DS, ES, FS or GS
* 2. Fault while executing IRET
* Category 1 we do not need to fix up as Xen has already reloaded all segment
* registers that could be reloaded and zeroed the others.
* Category 2 we fix up by killing the current process. We cannot use the
* normal Linux return path in this case because if we use the IRET hypercall
* to pop the stack frame we end up in an infinite loop of failsafe callbacks.
* We distinguish between categories by comparing each saved segment register
* with its current contents: any discrepancy means we in category 1.
*/
ENTRY(xen_failsafe_callback)
framesz = (RIP-0x30) /* workaround buggy gas */
_frame framesz
CFI_REL_OFFSET rcx, 0
CFI_REL_OFFSET r11, 8
INTR_FRAME 1 (6*8)
/*CFI_REL_OFFSET gs,GS*/
/*CFI_REL_OFFSET fs,FS*/
/*CFI_REL_OFFSET es,ES*/
/*CFI_REL_OFFSET ds,DS*/
CFI_REL_OFFSET r11,8
CFI_REL_OFFSET rcx,0
movw %ds,%cx
cmpw %cx,0x10(%rsp)
CFI_REMEMBER_STATE
......@@ -1429,12 +1310,9 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
pushq $0
CFI_ADJUST_CFA_OFFSET 8
pushq %r11
CFI_ADJUST_CFA_OFFSET 8
pushq %rcx
CFI_ADJUST_CFA_OFFSET 8
pushq_cfi $0 /* RIP */
pushq_cfi %r11
pushq_cfi %rcx
jmp general_protection
CFI_RESTORE_STATE
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
......@@ -1444,11 +1322,223 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
pushq $0
CFI_ADJUST_CFA_OFFSET 8
pushq_cfi $0
SAVE_ALL
jmp error_exit
CFI_ENDPROC
END(xen_failsafe_callback)
#endif /* CONFIG_XEN */
/*
* Some functions should be protected against kprobes
*/
.pushsection .kprobes.text, "ax"
paranoidzeroentry_ist debug do_debug DEBUG_STACK
paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
paranoiderrorentry stack_segment do_stack_segment
errorentry general_protection do_general_protection
errorentry page_fault do_page_fault
#ifdef CONFIG_X86_MCE
paranoidzeroentry machine_check do_machine_check
#endif
/*
* "Paranoid" exit path from exception stack.
* Paranoid because this is used by NMIs and cannot take
* any kernel state for granted.
* We don't do kernel preemption checks here, because only
* NMI should be common and it does not enable IRQs and
* cannot get reschedule ticks.
*
* "trace" is 0 for the NMI handler only, because irq-tracing
* is fundamentally NMI-unsafe. (we cannot change the soft and
* hard flags at once, atomically)
*/
/* ebx: no swapgs flag */
ENTRY(paranoid_exit)
INTR_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore
testl $3,CS(%rsp)
jnz paranoid_userspace
paranoid_swapgs:
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
paranoid_restore:
RESTORE_ALL 8
jmp irq_return
paranoid_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
jz paranoid_swapgs
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
testl $_TIF_NEED_RESCHED,%ebx
jnz paranoid_schedule
movl %ebx,%edx /* arg3: thread flags */
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
xorl %esi,%esi /* arg2: oldset */
movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp paranoid_userspace
paranoid_schedule:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
jmp paranoid_userspace
CFI_ENDPROC
END(paranoid_exit)
/*
* Exception entry point. This expects an error code/orig_rax on the stack.
* returns in "no swapgs flag" in %ebx.
*/
ENTRY(error_entry)
XCPT_FRAME
CFI_ADJUST_CFA_OFFSET 15*8
/* oldrax contains error code */
cld
movq_cfi rdi, RDI+8
movq_cfi rsi, RSI+8
movq_cfi rdx, RDX+8
movq_cfi rcx, RCX+8
movq_cfi rax, RAX+8
movq_cfi r8, R8+8
movq_cfi r9, R9+8
movq_cfi r10, R10+8
movq_cfi r11, R11+8
movq_cfi rbx, RBX+8
movq_cfi rbp, RBP+8
movq_cfi r12, R12+8
movq_cfi r13, R13+8
movq_cfi r14, R14+8
movq_cfi r15, R15+8
xorl %ebx,%ebx
testl $3,CS+8(%rsp)
je error_kernelspace
error_swapgs:
SWAPGS
error_sti:
TRACE_IRQS_OFF
ret
CFI_ENDPROC
/*
* There are two places in the kernel that can potentially fault with
* usergs. Handle them here. The exception handlers after iret run with
* kernel gs again, so don't set the user space flag. B stepping K8s
* sometimes report an truncated RIP for IRET exceptions returning to
* compat mode. Check for these here too.
*/
error_kernelspace:
incl %ebx
leaq irq_return(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
movl %ecx,%ecx /* zero extend */
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
cmpq $gs_change,RIP+8(%rsp)
je error_swapgs
jmp error_sti
END(error_entry)
/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
ENTRY(error_exit)
DEFAULT_FRAME
movl %ebx,%eax
RESTORE_REST
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
jne retint_kernel
LOCKDEP_SYS_EXIT_IRQ
movl TI_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
andl %edi,%edx
jnz retint_careful
jmp retint_swapgs
CFI_ENDPROC
END(error_exit)
/* runs on exception stack */
ENTRY(nmi)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1
subq $15*8, %rsp
CFI_ADJUST_CFA_OFFSET 15*8
call save_paranoid
DEFAULT_FRAME 0
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp,%rdi
movq $-1,%rsi
call do_nmi
#ifdef CONFIG_TRACE_IRQFLAGS
/* paranoidexit; without TRACE_IRQS_OFF */
/* ebx: no swapgs flag */
DISABLE_INTERRUPTS(CLBR_NONE)
testl %ebx,%ebx /* swapgs needed? */
jnz nmi_restore
testl $3,CS(%rsp)
jnz nmi_userspace
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
RESTORE_ALL 8
jmp irq_return
nmi_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
jz nmi_swapgs
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
testl $_TIF_NEED_RESCHED,%ebx
jnz nmi_schedule
movl %ebx,%edx /* arg3: thread flags */
ENABLE_INTERRUPTS(CLBR_NONE)
xorl %esi,%esi /* arg2: oldset */
movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
DISABLE_INTERRUPTS(CLBR_NONE)
jmp nmi_userspace
nmi_schedule:
ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
DISABLE_INTERRUPTS(CLBR_ANY)
jmp nmi_userspace
CFI_ENDPROC
#else
jmp paranoid_exit
CFI_ENDPROC
#endif
END(nmi)
ENTRY(ignore_sysret)
CFI_STARTPROC
mov $-ENOSYS,%eax
sysret
CFI_ENDPROC
END(ignore_sysret)
/*
* End of kprobes section
*/
.popsection
......@@ -18,7 +18,6 @@
#include <asm/idle.h>
#include <asm/smp.h>
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/*
* Probabilistic stack overflow check:
*
......@@ -28,19 +27,18 @@
*/
static inline void stack_overflow_check(struct pt_regs *regs)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
u64 curbase = (u64)task_stack_page(current);
static unsigned long warned = -60*HZ;
if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
regs->sp < curbase + sizeof(struct thread_info) + 128 &&
time_after(jiffies, warned + 60*HZ)) {
printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
current->comm, curbase, regs->sp);
show_stack(NULL,NULL);
warned = jiffies;
}
}
WARN_ONCE(regs->sp >= curbase &&
regs->sp <= curbase + THREAD_SIZE &&
regs->sp < curbase + sizeof(struct thread_info) +
sizeof(struct pt_regs) + 128,
"do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
current->comm, curbase, regs->sp);
#endif
}
/*
* do_IRQ handles all normal device IRQ's (the special
......@@ -60,9 +58,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
irq_enter();
irq = __get_cpu_var(vector_irq)[vector];
#ifdef CONFIG_DEBUG_STACKOVERFLOW
stack_overflow_check(regs);
#endif
desc = irq_to_desc(irq);
if (likely(desc))
......
......@@ -129,7 +129,7 @@ void __init native_init_IRQ(void)
for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
/* SYSCALL_VECTOR was reserved in trap_init. */
if (i != SYSCALL_VECTOR)
set_intr_gate(i, interrupt[i]);
set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
}
......
......@@ -23,41 +23,6 @@
#include <asm/apic.h>
#include <asm/i8259.h>
/*
* Common place to define all x86 IRQ vectors
*
* This builds up the IRQ handler stubs using some ugly macros in irq.h
*
* These macros create the low-level assembly IRQ routines that save
* register context and call do_IRQ(). do_IRQ() then does all the
* operations that are needed to keep the AT (or SMP IOAPIC)
* interrupt-controller happy.
*/
#define IRQ_NAME2(nr) nr##_interrupt(void)
#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
/*
* SMP has a few special interrupts for IPI messages
*/
#define BUILD_IRQ(nr) \
asmlinkage void IRQ_NAME(nr); \
asm("\n.text\n.p2align\n" \
"IRQ" #nr "_interrupt:\n\t" \
"push $~(" #nr ") ; " \
"jmp common_interrupt\n" \
".previous");
#define BI(x,y) \
BUILD_IRQ(x##y)
#define BUILD_16_IRQS(x) \
BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
BI(x,c) BI(x,d) BI(x,e) BI(x,f)
/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
* (these are usually mapped to vectors 0x30-0x3f)
......@@ -73,37 +38,6 @@
*
* (these are usually mapped into the 0x30-0xff vector range)
*/
BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
#undef BUILD_16_IRQS
#undef BI
#define IRQ(x,y) \
IRQ##x##y##_interrupt
#define IRQLIST_16(x) \
IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
/* for the irq vectors */
static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
IRQLIST_16(0x2), IRQLIST_16(0x3),
IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
};
#undef IRQ
#undef IRQLIST_16
/*
* IRQ2 is cascade interrupt to second interrupt controller
......
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
*
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
* 2000-2002 x86-64 support by Andi Kleen
*/
#include <linux/list.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
#include <linux/suspend.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
#include <linux/ptrace.h>
#include <linux/signal.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/elf.h>
#include <linux/smp.h>
#include <linux/mm.h>
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/vdso.h>
#ifdef CONFIG_X86_64
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
#include <asm/mce.h>
#endif /* CONFIG_X86_64 */
#include <asm/syscall.h>
#include <asm/syscalls.h>
......@@ -45,74 +50,6 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
/*
* Atomically swap in the new signal mask, and wait for a signal.
*/
asmlinkage int
sys_sigsuspend(int history0, int history1, old_sigset_t mask)
{
mask &= _BLOCKABLE;
spin_lock_irq(&current->sighand->siglock);
current->saved_sigmask = current->blocked;
siginitset(&current->blocked, mask);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
current->state = TASK_INTERRUPTIBLE;
schedule();
set_restore_sigmask();
return -ERESTARTNOHAND;
}
asmlinkage int
sys_sigaction(int sig, const struct old_sigaction __user *act,
struct old_sigaction __user *oact)
{
struct k_sigaction new_ka, old_ka;
int ret;
if (act) {
old_sigset_t mask;
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
return -EFAULT;
__get_user(new_ka.sa.sa_flags, &act->sa_flags);
__get_user(mask, &act->sa_mask);
siginitset(&new_ka.sa.sa_mask, mask);
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
__put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
return -EFAULT;
__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
}
return ret;
}
asmlinkage int sys_sigaltstack(unsigned long bx)
{
/*
* This is needed to make gcc realize it doesn't own the
* "struct pt_regs"
*/
struct pt_regs *regs = (struct pt_regs *)&bx;
const stack_t __user *uss = (const stack_t __user *)bx;
stack_t __user *uoss = (stack_t __user *)regs->cx;
return do_sigaltstack(uss, uoss, regs->sp);
}
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
......@@ -123,7 +60,7 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
regs->seg = tmp; \
}
#define COPY_SEG_STRICT(seg) { \
#define COPY_SEG_CPL3(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
......@@ -135,9 +72,6 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
loadsegment(seg, tmp); \
}
/*
* Do a signal return; undo the signal stack.
*/
static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax)
......@@ -149,14 +83,36 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
#ifdef CONFIG_X86_32
GET_SEG(gs);
COPY_SEG(fs);
COPY_SEG(es);
COPY_SEG(ds);
#endif /* CONFIG_X86_32 */
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
COPY_SEG_STRICT(cs);
COPY_SEG_STRICT(ss);
#ifdef CONFIG_X86_64
COPY(r8);
COPY(r9);
COPY(r10);
COPY(r11);
COPY(r12);
COPY(r13);
COPY(r14);
COPY(r15);
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
#else /* !CONFIG_X86_32 */
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
COPY_SEG_CPL3(cs);
#endif /* CONFIG_X86_32 */
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
......@@ -169,102 +125,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
return err;
}
asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
{
struct sigframe __user *frame;
struct pt_regs *regs;
unsigned long ax;
sigset_t set;
regs = (struct pt_regs *) &__unused;
frame = (struct sigframe __user *)(regs->sp - 8);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
&& __copy_from_user(&set.sig[1], &frame->extramask,
sizeof(frame->extramask))))
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sighand->siglock);
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
if (restore_sigcontext(regs, &frame->sc, &ax))
goto badframe;
return ax;
badframe:
if (show_unhandled_signals && printk_ratelimit()) {
printk("%s%s[%d] bad frame in sigreturn frame:"
"%p ip:%lx sp:%lx oeax:%lx",
task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
current->comm, task_pid_nr(current), frame, regs->ip,
regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
printk(KERN_CONT "\n");
}
force_sig(SIGSEGV, current);
return 0;
}
static long do_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
unsigned long ax;
sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sighand->siglock);
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
goto badframe;
return ax;
badframe:
signal_fault(regs, frame, "rt_sigreturn");
return 0;
}
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
{
struct pt_regs *regs = (struct pt_regs *)&__unused;
return do_rt_sigreturn(regs);
}
/*
* Set up a signal frame.
*/
static int
setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
int tmp, err = 0;
int err = 0;
err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
savesegment(gs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
#ifdef CONFIG_X86_32
{
unsigned int tmp;
savesegment(gs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
}
err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
#endif /* CONFIG_X86_32 */
err |= __put_user(regs->di, &sc->di);
err |= __put_user(regs->si, &sc->si);
err |= __put_user(regs->bp, &sc->bp);
......@@ -273,19 +151,33 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
err |= __put_user(regs->dx, &sc->dx);
err |= __put_user(regs->cx, &sc->cx);
err |= __put_user(regs->ax, &sc->ax);
#ifdef CONFIG_X86_64
err |= __put_user(regs->r8, &sc->r8);
err |= __put_user(regs->r9, &sc->r9);
err |= __put_user(regs->r10, &sc->r10);
err |= __put_user(regs->r11, &sc->r11);
err |= __put_user(regs->r12, &sc->r12);
err |= __put_user(regs->r13, &sc->r13);
err |= __put_user(regs->r14, &sc->r14);
err |= __put_user(regs->r15, &sc->r15);
#endif /* CONFIG_X86_64 */
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user(regs->ip, &sc->ip);
#ifdef CONFIG_X86_32
err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(regs->sp, &sc->sp_at_signal);
err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
#else /* !CONFIG_X86_32 */
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(regs->cs, &sc->cs);
err |= __put_user(0, &sc->gs);
err |= __put_user(0, &sc->fs);
#endif /* CONFIG_X86_32 */
tmp = save_i387_xstate(fpstate);
if (tmp < 0)
err = 1;
else
err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
err |= __put_user(fpstate, &sc->fpstate);
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
......@@ -294,6 +186,32 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
return err;
}
/*
* Set up a signal frame.
*/
#ifdef CONFIG_X86_32
static const struct {
u16 poplmovl;
u32 val;
u16 int80;
} __attribute__((packed)) retcode = {
0xb858, /* popl %eax; movl $..., %eax */
__NR_sigreturn,
0x80cd, /* int $0x80 */
};
static const struct {
u8 movl;
u32 val;
u16 int80;
u8 pad;
} __attribute__((packed)) rt_retcode = {
0xb8, /* movl $..., %eax */
__NR_rt_sigreturn,
0x80cd, /* int $0x80 */
0
};
/*
* Determine which stack to use..
*/
......@@ -328,6 +246,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
if (used_math()) {
sp = sp - sig_xstate_size;
*fpstate = (struct _fpstate *) sp;
if (save_i387_xstate(*fpstate) < 0)
return (void __user *)-1L;
}
sp -= frame_size;
......@@ -383,9 +303,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
if (err)
return -EFAULT;
......@@ -454,9 +372,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
if (err)
return -EFAULT;
......@@ -475,23 +391,298 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
return 0;
}
#else /* !CONFIG_X86_32 */
/*
* Determine which stack to use..
*/
static void __user *
get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
{
/* Default to using normal stack - redzone*/
sp -= 128;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
}
return (void __user *)round_down(sp - size, 64);
}
static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
void __user *fp = NULL;
int err = 0;
struct task_struct *me = current;
if (used_math()) {
fp = get_stack(ka, regs->sp, sig_xstate_size);
frame = (void __user *)round_down(
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (save_i387_xstate(fp) < 0)
return -EFAULT;
} else
frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user(&frame->info, info))
return -EFAULT;
}
/* Create the ucontext. */
if (cpu_has_xsave)
err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
else
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
/* x86-64 should always use SA_RESTORER. */
if (ka->sa.sa_flags & SA_RESTORER) {
err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
return -EFAULT;
}
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->di = sig;
/* In case the signal handler was declared without prototypes */
regs->ax = 0;
/* This also works for non SA_SIGINFO handlers because they expect the
next argument after the signal number on the stack. */
regs->si = (unsigned long)&frame->info;
regs->dx = (unsigned long)&frame->uc;
regs->ip = (unsigned long) ka->sa.sa_handler;
regs->sp = (unsigned long)frame;
/* Set up the CS register to run signal handlers in 64-bit mode,
even if the handler happens to be interrupting 32-bit code. */
regs->cs = __USER_CS;
return 0;
}
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_X86_32
/*
* Atomically swap in the new signal mask, and wait for a signal.
*/
asmlinkage int
sys_sigsuspend(int history0, int history1, old_sigset_t mask)
{
mask &= _BLOCKABLE;
spin_lock_irq(&current->sighand->siglock);
current->saved_sigmask = current->blocked;
siginitset(&current->blocked, mask);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
current->state = TASK_INTERRUPTIBLE;
schedule();
set_restore_sigmask();
return -ERESTARTNOHAND;
}
asmlinkage int
sys_sigaction(int sig, const struct old_sigaction __user *act,
struct old_sigaction __user *oact)
{
struct k_sigaction new_ka, old_ka;
int ret;
if (act) {
old_sigset_t mask;
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
return -EFAULT;
__get_user(new_ka.sa.sa_flags, &act->sa_flags);
__get_user(mask, &act->sa_mask);
siginitset(&new_ka.sa.sa_mask, mask);
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
__put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
return -EFAULT;
__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
}
return ret;
}
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_X86_32
asmlinkage int sys_sigaltstack(unsigned long bx)
{
/*
* This is needed to make gcc realize it doesn't own the
* "struct pt_regs"
*/
struct pt_regs *regs = (struct pt_regs *)&bx;
const stack_t __user *uss = (const stack_t __user *)bx;
stack_t __user *uoss = (stack_t __user *)regs->cx;
return do_sigaltstack(uss, uoss, regs->sp);
}
#else /* !CONFIG_X86_32 */
asmlinkage long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
{
return do_sigaltstack(uss, uoss, regs->sp);
}
#endif /* CONFIG_X86_32 */
/*
* Do a signal return; undo the signal stack.
*/
#ifdef CONFIG_X86_32
asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
{
struct sigframe __user *frame;
struct pt_regs *regs;
unsigned long ax;
sigset_t set;
regs = (struct pt_regs *) &__unused;
frame = (struct sigframe __user *)(regs->sp - 8);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
&& __copy_from_user(&set.sig[1], &frame->extramask,
sizeof(frame->extramask))))
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sighand->siglock);
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
if (restore_sigcontext(regs, &frame->sc, &ax))
goto badframe;
return ax;
badframe:
if (show_unhandled_signals && printk_ratelimit()) {
printk("%s%s[%d] bad frame in sigreturn frame:"
"%p ip:%lx sp:%lx oeax:%lx",
task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
current->comm, task_pid_nr(current), frame, regs->ip,
regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
printk(KERN_CONT "\n");
}
force_sig(SIGSEGV, current);
return 0;
}
#endif /* CONFIG_X86_32 */
static long do_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
unsigned long ax;
sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sighand->siglock);
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
goto badframe;
return ax;
badframe:
signal_fault(regs, frame, "rt_sigreturn");
return 0;
}
#ifdef CONFIG_X86_32
asmlinkage int sys_rt_sigreturn(struct pt_regs regs)
{
return do_rt_sigreturn(&regs);
}
#else /* !CONFIG_X86_32 */
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
{
return do_rt_sigreturn(regs);
}
#endif /* CONFIG_X86_32 */
/*
* OK, we're invoking a handler:
*/
static int signr_convert(int sig)
{
#ifdef CONFIG_X86_32
struct thread_info *info = current_thread_info();
if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
return info->exec_domain->signal_invmap[sig];
#endif /* CONFIG_X86_32 */
return sig;
}
#ifdef CONFIG_X86_32
#define is_ia32 1
#define ia32_setup_frame __setup_frame
#define ia32_setup_rt_frame __setup_rt_frame
#else /* !CONFIG_X86_32 */
#ifdef CONFIG_IA32_EMULATION
#define is_ia32 test_thread_flag(TIF_IA32)
#else /* !CONFIG_IA32_EMULATION */
#define is_ia32 0
#endif /* CONFIG_IA32_EMULATION */
#endif /* CONFIG_X86_32 */
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
......@@ -592,7 +783,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
return 0;
}
#ifdef CONFIG_X86_32
#define NR_restart_syscall __NR_restart_syscall
#else /* !CONFIG_X86_32 */
#define NR_restart_syscall \
test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
#endif /* CONFIG_X86_32 */
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
......
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
*
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
* 2000-2002 x86-64 support by Andi Kleen
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/compiler.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
#include <asm/i387.h>
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
#include <asm/mce.h>
#include <asm/syscall.h>
#include <asm/syscalls.h>
#include "sigframe.h"
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
X86_EFLAGS_CF)
#ifdef CONFIG_X86_32
# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
#else
# define FIX_EFLAGS __FIX_EFLAGS
#endif
asmlinkage long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
{
return do_sigaltstack(uss, uoss, regs->sp);
}
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
#define COPY_SEG_STRICT(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
}
/*
* Do a signal return; undo the signal stack.
*/
static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax)
{
void __user *buf;
unsigned int tmpflags;
unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
COPY(r8);
COPY(r9);
COPY(r10);
COPY(r11);
COPY(r12);
COPY(r13);
COPY(r14);
COPY(r15);
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
COPY_SEG_STRICT(cs);
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
err |= __get_user(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
err |= __get_user(*pax, &sc->ax);
return err;
}
static long do_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
unsigned long ax;
sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sighand->siglock);
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
goto badframe;
return ax;
badframe:
signal_fault(regs, frame, "rt_sigreturn");
return 0;
}
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
{
return do_rt_sigreturn(regs);
}
/*
* Set up a signal frame.
*/
static inline int
setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
unsigned long mask, struct task_struct *me)
{
int err = 0;
err |= __put_user(regs->cs, &sc->cs);
err |= __put_user(0, &sc->gs);
err |= __put_user(0, &sc->fs);
err |= __put_user(regs->di, &sc->di);
err |= __put_user(regs->si, &sc->si);
err |= __put_user(regs->bp, &sc->bp);
err |= __put_user(regs->sp, &sc->sp);
err |= __put_user(regs->bx, &sc->bx);
err |= __put_user(regs->dx, &sc->dx);
err |= __put_user(regs->cx, &sc->cx);
err |= __put_user(regs->ax, &sc->ax);
err |= __put_user(regs->r8, &sc->r8);
err |= __put_user(regs->r9, &sc->r9);
err |= __put_user(regs->r10, &sc->r10);
err |= __put_user(regs->r11, &sc->r11);
err |= __put_user(regs->r12, &sc->r12);
err |= __put_user(regs->r13, &sc->r13);
err |= __put_user(regs->r14, &sc->r14);
err |= __put_user(regs->r15, &sc->r15);
err |= __put_user(me->thread.trap_no, &sc->trapno);
err |= __put_user(me->thread.error_code, &sc->err);
err |= __put_user(regs->ip, &sc->ip);
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(mask, &sc->oldmask);
err |= __put_user(me->thread.cr2, &sc->cr2);
return err;
}
/*
* Determine which stack to use..
*/
static void __user *
get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
{
unsigned long sp;
/* Default to using normal stack - redzone*/
sp = regs->sp - 128;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
}
return (void __user *)round_down(sp - size, 64);
}
static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
void __user *fp = NULL;
int err = 0;
struct task_struct *me = current;
if (used_math()) {
fp = get_stack(ka, regs, sig_xstate_size);
frame = (void __user *)round_down(
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (save_i387_xstate(fp) < 0)
return -EFAULT;
} else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user(&frame->info, info))
return -EFAULT;
}
/* Create the ucontext. */
if (cpu_has_xsave)
err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
else
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
if (sizeof(*set) == 16) {
__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
} else
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
/* x86-64 should always use SA_RESTORER. */
if (ka->sa.sa_flags & SA_RESTORER) {
err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
return -EFAULT;
}
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->di = sig;
/* In case the signal handler was declared without prototypes */
regs->ax = 0;
/* This also works for non SA_SIGINFO handlers because they expect the
next argument after the signal number on the stack. */
regs->si = (unsigned long)&frame->info;
regs->dx = (unsigned long)&frame->uc;
regs->ip = (unsigned long) ka->sa.sa_handler;
regs->sp = (unsigned long)frame;
/* Set up the CS register to run signal handlers in 64-bit mode,
even if the handler happens to be interrupting 32-bit code. */
regs->cs = __USER_CS;
return 0;
}
/*
* OK, we're invoking a handler
*/
static int signr_convert(int sig)
{
return sig;
}
#ifdef CONFIG_IA32_EMULATION
#define is_ia32 test_thread_flag(TIF_IA32)
#else
#define is_ia32 0
#endif
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
int usig = signr_convert(sig);
int ret;
/* Set up the stack frame */
if (is_ia32) {
if (ka->sa.sa_flags & SA_SIGINFO)
ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
else
ret = ia32_setup_frame(usig, ka, set, regs);
} else
ret = __setup_rt_frame(sig, ka, info, set, regs);
if (ret) {
force_sigsegv(sig, current);
return -EFAULT;
}
return ret;
}
static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs *regs)
{
int ret;
/* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
switch (syscall_get_error(current, regs)) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->ax = -EINTR;
break;
case -ERESTARTSYS:
if (!(ka->sa.sa_flags & SA_RESTART)) {
regs->ax = -EINTR;
break;
}
/* fallthrough */
case -ERESTARTNOINTR:
regs->ax = regs->orig_ax;
regs->ip -= 2;
break;
}
}
/*
* If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
* flag so that register information in the sigcontext is correct.
*/
if (unlikely(regs->flags & X86_EFLAGS_TF) &&
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
ret = setup_rt_frame(sig, ka, info, oldset, regs);
if (ret)
return ret;
#ifdef CONFIG_X86_64
/*
* This has nothing to do with segment registers,
* despite the name. This magic affects uaccess.h
* macros' behavior. Reset it to the normal setting.
*/
set_fs(USER_DS);
#endif
/*
* Clear the direction flag as per the ABI for function entry.
*/
regs->flags &= ~X86_EFLAGS_DF;
/*
* Clear TF when entering the signal handler, but
* notify any tracer that was single-stepping it.
* The tracer may want to single-step inside the
* handler too.
*/
regs->flags &= ~X86_EFLAGS_TF;
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NODEFER))
sigaddset(&current->blocked, sig);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
tracehook_signal_handler(sig, info, ka, regs,
test_thread_flag(TIF_SINGLESTEP));
return 0;
}
#define NR_restart_syscall \
test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
* mistake.
*/
static void do_signal(struct pt_regs *regs)
{
struct k_sigaction ka;
siginfo_t info;
int signr;
sigset_t *oldset;
/*
* We want the common case to go fast, which is why we may in certain
* cases get here from kernel mode. Just return without doing anything
* if so.
* X86_32: vm86 regs switched out by assembly code before reaching
* here, so testing against kernel CS suffices.
*/
if (!user_mode(regs))
return;
if (current_thread_info()->status & TS_RESTORE_SIGMASK)
oldset = &current->saved_sigmask;
else
oldset = &current->blocked;
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
/*
* Re-enable any watchpoints before delivering the
* signal to user space. The processor register will
* have been cleared if the watchpoint triggered
* inside the kernel.
*/
if (current->thread.debugreg7)
set_debugreg(current->thread.debugreg7, 7);
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*
* A signal was successfully delivered; the saved
* sigmask will have been stored in the signal frame,
* and will be restored by sigreturn, so we can simply
* clear the TS_RESTORE_SIGMASK flag.
*/
current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
}
return;
}
/* Did we come from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
/* Restart the system call - no handlers present */
switch (syscall_get_error(current, regs)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
regs->ax = regs->orig_ax;
regs->ip -= 2;
break;
case -ERESTART_RESTARTBLOCK:
regs->ax = NR_restart_syscall;
regs->ip -= 2;
break;
}
}
/*
* If there's no signal to deliver, we just put the saved sigmask
* back.
*/
if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
}
}
/*
* notification of userspace execution resumption
* - triggered by the TIF_WORK_MASK flags
*/
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_user();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
#endif /* CONFIG_X86_32 */
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
{
struct task_struct *me = current;
if (show_unhandled_signals && printk_ratelimit()) {
printk(KERN_INFO
"%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
me->comm, me->pid, where, frame,
regs->ip, regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
printk(KERN_CONT "\n");
}
force_sig(SIGSEGV, me);
}
......@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void)
break;
no_ctr_free = (i == 4);
if (no_ctr_free) {
WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
"cpu_khz value may be incorrect.\n");
i = 3;
rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
wrmsrl(MSR_K7_EVNTSEL3, 0);
......
......@@ -128,7 +128,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
gettimeofday(tv,NULL);
return;
}
/*
* Surround the RDTSC by barriers, to make sure it's not
* speculated to outside the seqlock critical section and
* does not cause time warps:
*/
rdtsc_barrier();
now = vread();
rdtsc_barrier();
base = __vsyscall_gtod_data.clock.cycle_last;
mask = __vsyscall_gtod_data.clock.mask;
mult = __vsyscall_gtod_data.clock.mult;
......
......@@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void)
* a straightforward 1 to 1 mapping, so force that here. */
__get_cpu_var(vector_irq)[vector] = i;
if (vector != SYSCALL_VECTOR) {
set_intr_gate(vector, interrupt[vector]);
set_intr_gate(vector,
interrupt[vector-FIRST_EXTERNAL_VECTOR]);
set_irq_chip_and_handler_name(i, &lguest_irq_controller,
handle_level_irq,
"level");
......
......@@ -102,6 +102,8 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
return pmd_table;
}
#endif
pud = pud_offset(pgd, 0);
......
......@@ -64,14 +64,6 @@
name:
#endif
#define KPROBE_ENTRY(name) \
.pushsection .kprobes.text, "ax"; \
ENTRY(name)
#define KPROBE_END(name) \
END(name); \
.popsection
#ifndef END
#define END(name) \
.size name, .-name
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment