Commit ab639f35 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'core/percpu' into x86/core

parents f8a6b2b9 58105ef1
...@@ -532,8 +532,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN}) ...@@ -532,8 +532,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
endif endif
# Force gcc to behave correct even for buggy distributions # Force gcc to behave correct even for buggy distributions
# Arch Makefiles may override this setting ifndef CONFIG_CC_STACKPROTECTOR
KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector)
endif
ifdef CONFIG_FRAME_POINTER ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
......
...@@ -27,12 +27,12 @@ extern void *per_cpu_init(void); ...@@ -27,12 +27,12 @@ extern void *per_cpu_init(void);
#else /* ! SMP */ #else /* ! SMP */
#define PER_CPU_ATTRIBUTES __attribute__((__section__(".data.percpu")))
#define per_cpu_init() (__phys_per_cpu_start) #define per_cpu_init() (__phys_per_cpu_start)
#endif /* SMP */ #endif /* SMP */
#define PER_CPU_BASE_SECTION ".data.percpu"
/* /*
* Be extremely careful when taking the address of this variable! Due to virtual * Be extremely careful when taking the address of this variable! Due to virtual
* remapping, it is different from the canonical address returned by __get_cpu_var(var)! * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
......
#ifndef _ASM_IA64_UV_UV_H
#define _ASM_IA64_UV_UV_H
#include <asm/system.h>
#include <asm/sn/simulator.h>
static inline int is_uv_system(void)
{
/* temporary support for running on hardware simulator */
return IS_MEDUSA() || ia64_platform_is("uv");
}
#endif /* _ASM_IA64_UV_UV_H */
...@@ -194,6 +194,10 @@ config X86_TRAMPOLINE ...@@ -194,6 +194,10 @@ config X86_TRAMPOLINE
depends on SMP || (64BIT && ACPI_SLEEP) depends on SMP || (64BIT && ACPI_SLEEP)
default y default y
config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !CC_STACKPROTECTOR
config KTIME_SCALAR config KTIME_SCALAR
def_bool X86_32 def_bool X86_32
source "init/Kconfig" source "init/Kconfig"
...@@ -1339,7 +1343,6 @@ config CC_STACKPROTECTOR_ALL ...@@ -1339,7 +1343,6 @@ config CC_STACKPROTECTOR_ALL
config CC_STACKPROTECTOR config CC_STACKPROTECTOR
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
depends on X86_64
select CC_STACKPROTECTOR_ALL select CC_STACKPROTECTOR_ALL
---help--- ---help---
This option turns on the -fstack-protector GCC feature. This This option turns on the -fstack-protector GCC feature. This
......
...@@ -70,14 +70,17 @@ else ...@@ -70,14 +70,17 @@ else
# this works around some issues with generating unwind tables in older gccs # this works around some issues with generating unwind tables in older gccs
# newer gccs do it by default # newer gccs do it by default
KBUILD_CFLAGS += -maccumulate-outgoing-args KBUILD_CFLAGS += -maccumulate-outgoing-args
endif
stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh ifdef CONFIG_CC_STACKPROTECTOR
stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
"$(CC)" "-fstack-protector -DGCC_HAS_SP" ) ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y)
stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ stackp-y := -fstack-protector
"$(CC)" -fstack-protector-all ) stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
KBUILD_CFLAGS += $(stackp-y)
KBUILD_CFLAGS += $(stackp-y) else
$(warning stack protector enabled but no compiler support)
endif
endif endif
# Stackpointer is addressed different for 32 bit and 64 bit x86 # Stackpointer is addressed different for 32 bit and 64 bit x86
......
...@@ -55,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) ...@@ -55,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
dump->regs.ds = (u16)regs->ds; dump->regs.ds = (u16)regs->ds;
dump->regs.es = (u16)regs->es; dump->regs.es = (u16)regs->es;
dump->regs.fs = (u16)regs->fs; dump->regs.fs = (u16)regs->fs;
savesegment(gs, dump->regs.gs); dump->regs.gs = get_user_gs(regs);
dump->regs.orig_ax = regs->orig_ax; dump->regs.orig_ax = regs->orig_ax;
dump->regs.ip = regs->ip; dump->regs.ip = regs->ip;
dump->regs.cs = (u16)regs->cs; dump->regs.cs = (u16)regs->cs;
......
...@@ -112,7 +112,7 @@ extern unsigned int vdso_enabled; ...@@ -112,7 +112,7 @@ extern unsigned int vdso_enabled;
* now struct_user_regs, they are different) * now struct_user_regs, they are different)
*/ */
#define ELF_CORE_COPY_REGS(pr_reg, regs) \ #define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \
do { \ do { \
pr_reg[0] = regs->bx; \ pr_reg[0] = regs->bx; \
pr_reg[1] = regs->cx; \ pr_reg[1] = regs->cx; \
...@@ -124,7 +124,6 @@ do { \ ...@@ -124,7 +124,6 @@ do { \
pr_reg[7] = regs->ds & 0xffff; \ pr_reg[7] = regs->ds & 0xffff; \
pr_reg[8] = regs->es & 0xffff; \ pr_reg[8] = regs->es & 0xffff; \
pr_reg[9] = regs->fs & 0xffff; \ pr_reg[9] = regs->fs & 0xffff; \
savesegment(gs, pr_reg[10]); \
pr_reg[11] = regs->orig_ax; \ pr_reg[11] = regs->orig_ax; \
pr_reg[12] = regs->ip; \ pr_reg[12] = regs->ip; \
pr_reg[13] = regs->cs & 0xffff; \ pr_reg[13] = regs->cs & 0xffff; \
...@@ -133,6 +132,18 @@ do { \ ...@@ -133,6 +132,18 @@ do { \
pr_reg[16] = regs->ss & 0xffff; \ pr_reg[16] = regs->ss & 0xffff; \
} while (0); } while (0);
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
do { \
ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
pr_reg[10] = get_user_gs(regs); \
} while (0);
#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \
do { \
ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
savesegment(gs, pr_reg[10]); \
} while (0);
#define ELF_PLATFORM (utsname()->machine) #define ELF_PLATFORM (utsname()->machine)
#define set_personality_64bit() do { } while (0) #define set_personality_64bit() do { } while (0)
......
...@@ -79,7 +79,7 @@ do { \ ...@@ -79,7 +79,7 @@ do { \
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#define deactivate_mm(tsk, mm) \ #define deactivate_mm(tsk, mm) \
do { \ do { \
loadsegment(gs, 0); \ lazy_load_gs(0); \
} while (0) } while (0)
#else #else
#define deactivate_mm(tsk, mm) \ #define deactivate_mm(tsk, mm) \
......
...@@ -34,6 +34,12 @@ ...@@ -34,6 +34,12 @@
#define PER_CPU_VAR(var) per_cpu__##var #define PER_CPU_VAR(var) per_cpu__##var
#endif /* SMP */ #endif /* SMP */
#ifdef CONFIG_X86_64_SMP
#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
#else
#define INIT_PER_CPU_VAR(var) per_cpu__##var
#endif
#else /* ...!ASSEMBLY */ #else /* ...!ASSEMBLY */
#include <linux/stringify.h> #include <linux/stringify.h>
...@@ -45,6 +51,22 @@ ...@@ -45,6 +51,22 @@
#define __percpu_arg(x) "%" #x #define __percpu_arg(x) "%" #x
#endif #endif
/*
* Initialized pointers to per-cpu variables needed for the boot
* processor need to use these macros to get the proper address
* offset from __per_cpu_load on SMP.
*
* There also must be an entry in vmlinux_64.lds.S
*/
#define DECLARE_INIT_PER_CPU(var) \
extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
#ifdef CONFIG_X86_64_SMP
#define init_per_cpu_var(var) init_per_cpu__##var
#else
#define init_per_cpu_var(var) per_cpu_var(var)
#endif
/* For arch-specific code, we can use direct single-insn ops (they /* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */ * don't give an lvalue though). */
extern void __bad_percpu_size(void); extern void __bad_percpu_size(void);
......
...@@ -393,8 +393,14 @@ union irq_stack_union { ...@@ -393,8 +393,14 @@ union irq_stack_union {
}; };
DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_INIT_PER_CPU(irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(char *, irq_stack_ptr);
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
DECLARE_PER_CPU(unsigned long, stack_canary);
#endif #endif
#endif /* X86_64 */
extern void print_cpu_info(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int xstate_size; extern unsigned int xstate_size;
......
...@@ -28,7 +28,7 @@ struct pt_regs { ...@@ -28,7 +28,7 @@ struct pt_regs {
int xds; int xds;
int xes; int xes;
int xfs; int xfs;
/* int gs; */ int xgs;
long orig_eax; long orig_eax;
long eip; long eip;
int xcs; int xcs;
...@@ -50,7 +50,7 @@ struct pt_regs { ...@@ -50,7 +50,7 @@ struct pt_regs {
unsigned long ds; unsigned long ds;
unsigned long es; unsigned long es;
unsigned long fs; unsigned long fs;
/* int gs; */ unsigned long gs;
unsigned long orig_ax; unsigned long orig_ax;
unsigned long ip; unsigned long ip;
unsigned long cs; unsigned long cs;
......
...@@ -61,7 +61,7 @@ ...@@ -61,7 +61,7 @@
* *
* 26 - ESPFIX small SS * 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ] * 27 - per-cpu [ offset to per-cpu data area ]
* 28 - unused * 28 - stack_canary-20 [ for stack protector ]
* 29 - unused * 29 - unused
* 30 - unused * 30 - unused
* 31 - TSS for double fault handler * 31 - TSS for double fault handler
...@@ -95,6 +95,13 @@ ...@@ -95,6 +95,13 @@
#define __KERNEL_PERCPU 0 #define __KERNEL_PERCPU 0
#endif #endif
#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16)
#ifdef CONFIG_CC_STACKPROTECTOR
#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8)
#else
#define __KERNEL_STACK_CANARY 0
#endif
#define GDT_ENTRY_DOUBLEFAULT_TSS 31 #define GDT_ENTRY_DOUBLEFAULT_TSS 31
/* /*
......
/*
* GCC stack protector support.
*
* Stack protector works by putting predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
* returning from the function. The pattern is called stack canary
* and unfortunately gcc requires it to be at a fixed offset from %gs.
* On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
* and x86_32 use segment registers differently and thus handles this
* requirement differently.
*
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
* irq_stack_union which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
*
* On x86_32, it's slightly more complicated. As in x86_64, %gs is
* used for userland TLS. Unfortunately, some processors are much
* slower at loading segment registers with different value when
* entering and leaving the kernel, so the kernel uses %fs for percpu
* area and manages %gs lazily so that %gs is switched only when
* necessary, usually during task switch.
*
* As gcc requires the stack canary at %gs:20, %gs can't be managed
* lazily if stack protector is enabled, so the kernel saves and
* restores userland %gs on kernel entry and exit. This behavior is
* controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
* system.h to hide the details.
*/
#ifndef _ASM_STACKPROTECTOR_H #ifndef _ASM_STACKPROTECTOR_H
#define _ASM_STACKPROTECTOR_H 1 #define _ASM_STACKPROTECTOR_H 1
#ifdef CONFIG_CC_STACKPROTECTOR
#include <asm/tsc.h> #include <asm/tsc.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/percpu.h>
#include <asm/system.h>
#include <asm/desc.h>
#include <linux/random.h>
/*
* 24 byte read-only segment initializer for stack canary. Linker
* can't handle the address bit shifting. Address will be set in
* head_32 for boot CPU and setup_per_cpu_areas() for others.
*/
#define GDT_STACK_CANARY_INIT \
[GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
/* /*
* Initialize the stackprotector canary value. * Initialize the stackprotector canary value.
...@@ -15,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void) ...@@ -15,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void)
u64 canary; u64 canary;
u64 tsc; u64 tsc;
/* #ifdef CONFIG_X86_64
* Build time only check to make sure the stack_canary is at
* offset 40 in the pda; this is a gcc ABI requirement
*/
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
#endif
/* /*
* We both use the random pool and the current TSC as a source * We both use the random pool and the current TSC as a source
* of randomness. The TSC only matters for very early init, * of randomness. The TSC only matters for very early init,
...@@ -32,7 +75,50 @@ static __always_inline void boot_init_stack_canary(void) ...@@ -32,7 +75,50 @@ static __always_inline void boot_init_stack_canary(void)
canary += tsc + (tsc << 32UL); canary += tsc + (tsc << 32UL);
current->stack_canary = canary; current->stack_canary = canary;
#ifdef CONFIG_X86_64
percpu_write(irq_stack_union.stack_canary, canary); percpu_write(irq_stack_union.stack_canary, canary);
#else
percpu_write(stack_canary, canary);
#endif
} }
static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20;
struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
desc.base0 = canary & 0xffff;
desc.base1 = (canary >> 16) & 0xff;
desc.base2 = (canary >> 24) & 0xff;
write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
#endif
}
static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
#endif
}
#else /* CC_STACKPROTECTOR */
#define GDT_STACK_CANARY_INIT
/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
static inline void setup_stack_canary_segment(int cpu)
{ }
static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm volatile ("mov %0, %%gs" : : "r" (0));
#endif #endif
}
#endif /* CC_STACKPROTECTOR */
#endif /* _ASM_STACKPROTECTOR_H */
...@@ -29,21 +29,21 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); ...@@ -29,21 +29,21 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
/* X86_32 only */ /* X86_32 only */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* kernel/process_32.c */ /* kernel/process_32.c */
asmlinkage int sys_fork(struct pt_regs); int sys_fork(struct pt_regs *);
asmlinkage int sys_clone(struct pt_regs); int sys_clone(struct pt_regs *);
asmlinkage int sys_vfork(struct pt_regs); int sys_vfork(struct pt_regs *);
asmlinkage int sys_execve(struct pt_regs); int sys_execve(struct pt_regs *);
/* kernel/signal_32.c */ /* kernel/signal_32.c */
asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
struct old_sigaction __user *); struct old_sigaction __user *);
asmlinkage int sys_sigaltstack(unsigned long); int sys_sigaltstack(struct pt_regs *);
asmlinkage unsigned long sys_sigreturn(unsigned long); unsigned long sys_sigreturn(struct pt_regs *);
asmlinkage int sys_rt_sigreturn(unsigned long); long sys_rt_sigreturn(struct pt_regs *);
/* kernel/ioport.c */ /* kernel/ioport.c */
asmlinkage long sys_iopl(unsigned long); long sys_iopl(struct pt_regs *);
/* kernel/sys_i386_32.c */ /* kernel/sys_i386_32.c */
asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
...@@ -59,8 +59,8 @@ struct oldold_utsname; ...@@ -59,8 +59,8 @@ struct oldold_utsname;
asmlinkage int sys_olduname(struct oldold_utsname __user *); asmlinkage int sys_olduname(struct oldold_utsname __user *);
/* kernel/vm86_32.c */ /* kernel/vm86_32.c */
asmlinkage int sys_vm86old(struct pt_regs); int sys_vm86old(struct pt_regs *);
asmlinkage int sys_vm86(struct pt_regs); int sys_vm86(struct pt_regs *);
#else /* CONFIG_X86_32 */ #else /* CONFIG_X86_32 */
......
...@@ -23,6 +23,20 @@ struct task_struct *__switch_to(struct task_struct *prev, ...@@ -23,6 +23,20 @@ struct task_struct *__switch_to(struct task_struct *prev,
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#ifdef CONFIG_CC_STACKPROTECTOR
#define __switch_canary \
"movl %P[task_canary](%[next]), %%ebx\n\t" \
"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
#define __switch_canary_oparam \
, [stack_canary] "=m" (per_cpu_var(stack_canary))
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
#define __switch_canary
#define __switch_canary_oparam
#define __switch_canary_iparam
#endif /* CC_STACKPROTECTOR */
/* /*
* Saving eflags is important. It switches not only IOPL between tasks, * Saving eflags is important. It switches not only IOPL between tasks,
* it also protects other tasks from NT leaking through sysenter etc. * it also protects other tasks from NT leaking through sysenter etc.
...@@ -44,6 +58,7 @@ do { \ ...@@ -44,6 +58,7 @@ do { \
"movl %[next_sp],%%esp\n\t" /* restore ESP */ \ "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \ "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
"pushl %[next_ip]\n\t" /* restore EIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \
__switch_canary \
"jmp __switch_to\n" /* regparm call */ \ "jmp __switch_to\n" /* regparm call */ \
"1:\t" \ "1:\t" \
"popl %%ebp\n\t" /* restore EBP */ \ "popl %%ebp\n\t" /* restore EBP */ \
...@@ -58,6 +73,8 @@ do { \ ...@@ -58,6 +73,8 @@ do { \
"=b" (ebx), "=c" (ecx), "=d" (edx), \ "=b" (ebx), "=c" (ecx), "=d" (edx), \
"=S" (esi), "=D" (edi) \ "=S" (esi), "=D" (edi) \
\ \
__switch_canary_oparam \
\
/* input parameters: */ \ /* input parameters: */ \
: [next_sp] "m" (next->thread.sp), \ : [next_sp] "m" (next->thread.sp), \
[next_ip] "m" (next->thread.ip), \ [next_ip] "m" (next->thread.ip), \
...@@ -66,6 +83,8 @@ do { \ ...@@ -66,6 +83,8 @@ do { \
[prev] "a" (prev), \ [prev] "a" (prev), \
[next] "d" (next) \ [next] "d" (next) \
\ \
__switch_canary_iparam \
\
: /* reloaded segment registers */ \ : /* reloaded segment registers */ \
"memory"); \ "memory"); \
} while (0) } while (0)
...@@ -182,6 +201,25 @@ extern void native_load_gs_index(unsigned); ...@@ -182,6 +201,25 @@ extern void native_load_gs_index(unsigned);
#define savesegment(seg, value) \ #define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory") asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
/*
* x86_32 user gs accessors.
*/
#ifdef CONFIG_X86_32
#ifdef CONFIG_X86_32_LAZY_GS
#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
#define task_user_gs(tsk) ((tsk)->thread.gs)
#define lazy_save_gs(v) savesegment(gs, (v))
#define lazy_load_gs(v) loadsegment(gs, (v))
#else /* X86_32_LAZY_GS */
#define get_user_gs(regs) (u16)((regs)->gs)
#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
#define lazy_save_gs(v) do { } while (0)
#define lazy_load_gs(v) do { } while (0)
#endif /* X86_32_LAZY_GS */
#endif /* X86_32 */
static inline unsigned long get_limit(unsigned long segment) static inline unsigned long get_limit(unsigned long segment)
{ {
unsigned long __limit; unsigned long __limit;
......
...@@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long); ...@@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long);
dotraplinkage void do_overflow(struct pt_regs *, long); dotraplinkage void do_overflow(struct pt_regs *, long);
dotraplinkage void do_bounds(struct pt_regs *, long); dotraplinkage void do_bounds(struct pt_regs *, long);
dotraplinkage void do_invalid_op(struct pt_regs *, long); dotraplinkage void do_invalid_op(struct pt_regs *, long);
dotraplinkage void do_device_not_available(struct pt_regs); dotraplinkage void do_device_not_available(struct pt_regs *, long);
dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
dotraplinkage void do_invalid_TSS(struct pt_regs *, long); dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
dotraplinkage void do_segment_not_present(struct pt_regs *, long); dotraplinkage void do_segment_not_present(struct pt_regs *, long);
......
...@@ -186,7 +186,7 @@ extern int __get_user_bad(void); ...@@ -186,7 +186,7 @@ extern int __get_user_bad(void);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#define __put_user_asm_u64(x, addr, err) \ #define __put_user_asm_u64(x, addr, err, errret) \
asm volatile("1: movl %%eax,0(%2)\n" \ asm volatile("1: movl %%eax,0(%2)\n" \
"2: movl %%edx,4(%2)\n" \ "2: movl %%edx,4(%2)\n" \
"3:\n" \ "3:\n" \
...@@ -197,7 +197,7 @@ extern int __get_user_bad(void); ...@@ -197,7 +197,7 @@ extern int __get_user_bad(void);
_ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \ _ASM_EXTABLE(2b, 4b) \
: "=r" (err) \ : "=r" (err) \
: "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) : "A" (x), "r" (addr), "i" (errret), "0" (err))
#define __put_user_asm_ex_u64(x, addr) \ #define __put_user_asm_ex_u64(x, addr) \
asm volatile("1: movl %%eax,0(%1)\n" \ asm volatile("1: movl %%eax,0(%1)\n" \
...@@ -211,8 +211,8 @@ extern int __get_user_bad(void); ...@@ -211,8 +211,8 @@ extern int __get_user_bad(void);
asm volatile("call __put_user_8" : "=a" (__ret_pu) \ asm volatile("call __put_user_8" : "=a" (__ret_pu) \
: "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
#else #else
#define __put_user_asm_u64(x, ptr, retval) \ #define __put_user_asm_u64(x, ptr, retval, errret) \
__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) __put_user_asm(x, ptr, retval, "q", "", "Zr", errret)
#define __put_user_asm_ex_u64(x, addr) \ #define __put_user_asm_ex_u64(x, addr) \
__put_user_asm_ex(x, addr, "q", "", "Zr") __put_user_asm_ex(x, addr, "q", "", "Zr")
#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
...@@ -289,7 +289,8 @@ do { \ ...@@ -289,7 +289,8 @@ do { \
__put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \
break; \ break; \
case 8: \ case 8: \
__put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval); \ __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \
errret); \
break; \ break; \
default: \ default: \
__put_user_bad(); \ __put_user_bad(); \
...@@ -525,8 +526,6 @@ struct __large_struct { unsigned long buf[100]; }; ...@@ -525,8 +526,6 @@ struct __large_struct { unsigned long buf[100]; };
*/ */
#define get_user_try uaccess_try #define get_user_try uaccess_try
#define get_user_catch(err) uaccess_catch(err) #define get_user_catch(err) uaccess_catch(err)
#define put_user_try uaccess_try
#define put_user_catch(err) uaccess_catch(err)
#define get_user_ex(x, ptr) do { \ #define get_user_ex(x, ptr) do { \
unsigned long __gue_val; \ unsigned long __gue_val; \
...@@ -534,9 +533,29 @@ struct __large_struct { unsigned long buf[100]; }; ...@@ -534,9 +533,29 @@ struct __large_struct { unsigned long buf[100]; };
(x) = (__force __typeof__(*(ptr)))__gue_val; \ (x) = (__force __typeof__(*(ptr)))__gue_val; \
} while (0) } while (0)
#ifdef CONFIG_X86_WP_WORKS_OK
#define put_user_try uaccess_try
#define put_user_catch(err) uaccess_catch(err)
#define put_user_ex(x, ptr) \ #define put_user_ex(x, ptr) \
__put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
#else /* !CONFIG_X86_WP_WORKS_OK */
#define put_user_try do { \
int __uaccess_err = 0;
#define put_user_catch(err) \
(err) |= __uaccess_err; \
} while (0)
#define put_user_ex(x, ptr) do { \
__uaccess_err |= __put_user(x, ptr); \
} while (0)
#endif /* CONFIG_X86_WP_WORKS_OK */
/* /*
* movsl can be slow when source and dest are not both 8-byte aligned * movsl can be slow when source and dest are not both 8-byte aligned
*/ */
......
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
struct cpumask;
struct mm_struct;
#ifdef CONFIG_X86_UV #ifdef CONFIG_X86_UV
extern enum uv_system_type get_uv_system_type(void); extern enum uv_system_type get_uv_system_type(void);
......
...@@ -75,6 +75,7 @@ void foo(void) ...@@ -75,6 +75,7 @@ void foo(void)
OFFSET(PT_DS, pt_regs, ds); OFFSET(PT_DS, pt_regs, ds);
OFFSET(PT_ES, pt_regs, es); OFFSET(PT_ES, pt_regs, es);
OFFSET(PT_FS, pt_regs, fs); OFFSET(PT_FS, pt_regs, fs);
OFFSET(PT_GS, pt_regs, gs);
OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
OFFSET(PT_EIP, pt_regs, ip); OFFSET(PT_EIP, pt_regs, ip);
OFFSET(PT_CS, pt_regs, cs); OFFSET(PT_CS, pt_regs, cs);
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/hypervisor.h> #include <asm/hypervisor.h>
#include <asm/stackprotector.h>
#include "cpu.h" #include "cpu.h"
...@@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { ...@@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
GDT_STACK_CANARY_INIT
#endif #endif
} }; } };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
...@@ -304,6 +306,7 @@ void load_percpu_segment(int cpu) ...@@ -304,6 +306,7 @@ void load_percpu_segment(int cpu)
loadsegment(gs, 0); loadsegment(gs, 0);
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif #endif
load_stack_canary_segment();
} }
/* Current gdt points %fs at the "master" per-cpu area: after this, /* Current gdt points %fs at the "master" per-cpu area: after this,
...@@ -938,12 +941,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; ...@@ -938,12 +941,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union, DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE); irq_stack_union) __aligned(PAGE_SIZE);
#ifdef CONFIG_SMP
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
#else
DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(char *, irq_stack_ptr) =
per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
#endif
DEFINE_PER_CPU(unsigned long, kernel_stack) = DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
...@@ -986,16 +985,21 @@ unsigned long kernel_eflags; ...@@ -986,16 +985,21 @@ unsigned long kernel_eflags;
*/ */
DEFINE_PER_CPU(struct orig_ist, orig_ist); DEFINE_PER_CPU(struct orig_ist, orig_ist);
#else #else /* x86_64 */
/* Make sure %fs is initialized properly in idle threads */ #ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, stack_canary);
#endif
/* Make sure %fs and %gs are initialized properly in idle threads */
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{ {
memset(regs, 0, sizeof(struct pt_regs)); memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU; regs->fs = __KERNEL_PERCPU;
regs->gs = __KERNEL_STACK_CANARY;
return regs; return regs;
} }
#endif #endif /* x86_64 */
/* /*
* cpu_init() initializes state that is per-CPU. Some data is already * cpu_init() initializes state that is per-CPU. Some data is already
...@@ -1157,9 +1161,6 @@ void __cpuinit cpu_init(void) ...@@ -1157,9 +1161,6 @@ void __cpuinit cpu_init(void)
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
#endif #endif
/* Clear %gs. */
asm volatile ("mov %0, %%gs" : : "r" (0));
/* Clear all 6 debug registers: */ /* Clear all 6 debug registers: */
set_debugreg(0, 0); set_debugreg(0, 0);
set_debugreg(0, 1); set_debugreg(0, 1);
......
...@@ -30,12 +30,13 @@ ...@@ -30,12 +30,13 @@
* 1C(%esp) - %ds * 1C(%esp) - %ds
* 20(%esp) - %es * 20(%esp) - %es
* 24(%esp) - %fs * 24(%esp) - %fs
* 28(%esp) - orig_eax * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
* 2C(%esp) - %eip * 2C(%esp) - orig_eax
* 30(%esp) - %cs * 30(%esp) - %eip
* 34(%esp) - %eflags * 34(%esp) - %cs
* 38(%esp) - %oldesp * 38(%esp) - %eflags
* 3C(%esp) - %oldss * 3C(%esp) - %oldesp
* 40(%esp) - %oldss
* *
* "current" is in register %ebx during any slow entries. * "current" is in register %ebx during any slow entries.
*/ */
...@@ -101,121 +102,221 @@ ...@@ -101,121 +102,221 @@
#define resume_userspace_sig resume_userspace #define resume_userspace_sig resume_userspace
#endif #endif
#define SAVE_ALL \ /*
cld; \ * User gs save/restore
pushl %fs; \ *
CFI_ADJUST_CFA_OFFSET 4;\ * %gs is used for userland TLS and kernel only uses it for stack
/*CFI_REL_OFFSET fs, 0;*/\ * canary which is required to be at %gs:20 by gcc. Read the comment
pushl %es; \ * at the top of stackprotector.h for more info.
CFI_ADJUST_CFA_OFFSET 4;\ *
/*CFI_REL_OFFSET es, 0;*/\ * Local labels 98 and 99 are used.
pushl %ds; \ */
CFI_ADJUST_CFA_OFFSET 4;\ #ifdef CONFIG_X86_32_LAZY_GS
/*CFI_REL_OFFSET ds, 0;*/\
pushl %eax; \ /* unfortunately push/pop can't be no-op */
CFI_ADJUST_CFA_OFFSET 4;\ .macro PUSH_GS
CFI_REL_OFFSET eax, 0;\ pushl $0
pushl %ebp; \ CFI_ADJUST_CFA_OFFSET 4
CFI_ADJUST_CFA_OFFSET 4;\ .endm
CFI_REL_OFFSET ebp, 0;\ .macro POP_GS pop=0
pushl %edi; \ addl $(4 + \pop), %esp
CFI_ADJUST_CFA_OFFSET 4;\ CFI_ADJUST_CFA_OFFSET -(4 + \pop)
CFI_REL_OFFSET edi, 0;\ .endm
pushl %esi; \ .macro POP_GS_EX
CFI_ADJUST_CFA_OFFSET 4;\ .endm
CFI_REL_OFFSET esi, 0;\
pushl %edx; \ /* all the rest are no-op */
CFI_ADJUST_CFA_OFFSET 4;\ .macro PTGS_TO_GS
CFI_REL_OFFSET edx, 0;\ .endm
pushl %ecx; \ .macro PTGS_TO_GS_EX
CFI_ADJUST_CFA_OFFSET 4;\ .endm
CFI_REL_OFFSET ecx, 0;\ .macro GS_TO_REG reg
pushl %ebx; \ .endm
CFI_ADJUST_CFA_OFFSET 4;\ .macro REG_TO_PTGS reg
CFI_REL_OFFSET ebx, 0;\ .endm
movl $(__USER_DS), %edx; \ .macro SET_KERNEL_GS reg
movl %edx, %ds; \ .endm
movl %edx, %es; \
movl $(__KERNEL_PERCPU), %edx; \ #else /* CONFIG_X86_32_LAZY_GS */
.macro PUSH_GS
pushl %gs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET gs, 0*/
.endm
.macro POP_GS pop=0
98: popl %gs
CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE gs*/
.if \pop <> 0
add $\pop, %esp
CFI_ADJUST_CFA_OFFSET -\pop
.endif
.endm
.macro POP_GS_EX
.pushsection .fixup, "ax"
99: movl $0, (%esp)
jmp 98b
.section __ex_table, "a"
.align 4
.long 98b, 99b
.popsection
.endm
.macro PTGS_TO_GS
98: mov PT_GS(%esp), %gs
.endm
.macro PTGS_TO_GS_EX
.pushsection .fixup, "ax"
99: movl $0, PT_GS(%esp)
jmp 98b
.section __ex_table, "a"
.align 4
.long 98b, 99b
.popsection
.endm
.macro GS_TO_REG reg
movl %gs, \reg
/*CFI_REGISTER gs, \reg*/
.endm
.macro REG_TO_PTGS reg
movl \reg, PT_GS(%esp)
/*CFI_REL_OFFSET gs, PT_GS*/
.endm
.macro SET_KERNEL_GS reg
movl $(__KERNEL_STACK_CANARY), \reg
movl \reg, %gs
.endm
#endif /* CONFIG_X86_32_LAZY_GS */
.macro SAVE_ALL
cld
PUSH_GS
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0;*/
pushl %es
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0;*/
pushl %ds
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET ds, 0;*/
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eax, 0
pushl %ebp
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebp, 0
pushl %edi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edi, 0
pushl %esi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET esi, 0
pushl %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0
pushl %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0
pushl %ebx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
movl $(__USER_DS), %edx
movl %edx, %ds
movl %edx, %es
movl $(__KERNEL_PERCPU), %edx
movl %edx, %fs movl %edx, %fs
SET_KERNEL_GS %edx
.endm
#define RESTORE_INT_REGS \ .macro RESTORE_INT_REGS
popl %ebx; \ popl %ebx
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebx;\ CFI_RESTORE ebx
popl %ecx; \ popl %ecx
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ecx;\ CFI_RESTORE ecx
popl %edx; \ popl %edx
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE edx;\ CFI_RESTORE edx
popl %esi; \ popl %esi
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE esi;\ CFI_RESTORE esi
popl %edi; \ popl %edi
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE edi;\ CFI_RESTORE edi
popl %ebp; \ popl %ebp
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebp;\ CFI_RESTORE ebp
popl %eax; \ popl %eax
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE eax CFI_RESTORE eax
.endm
#define RESTORE_REGS \ .macro RESTORE_REGS pop=0
RESTORE_INT_REGS; \ RESTORE_INT_REGS
1: popl %ds; \ 1: popl %ds
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE ds;*/\ /*CFI_RESTORE ds;*/
2: popl %es; \ 2: popl %es
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE es;*/\ /*CFI_RESTORE es;*/
3: popl %fs; \ 3: popl %fs
CFI_ADJUST_CFA_OFFSET -4;\ CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE fs;*/\ /*CFI_RESTORE fs;*/
.pushsection .fixup,"ax"; \ POP_GS \pop
4: movl $0,(%esp); \ .pushsection .fixup, "ax"
jmp 1b; \ 4: movl $0, (%esp)
5: movl $0,(%esp); \ jmp 1b
jmp 2b; \ 5: movl $0, (%esp)
6: movl $0,(%esp); \ jmp 2b
jmp 3b; \ 6: movl $0, (%esp)
.section __ex_table,"a";\ jmp 3b
.align 4; \ .section __ex_table, "a"
.long 1b,4b; \ .align 4
.long 2b,5b; \ .long 1b, 4b
.long 3b,6b; \ .long 2b, 5b
.long 3b, 6b
.popsection .popsection
POP_GS_EX
.endm
#define RING0_INT_FRAME \ .macro RING0_INT_FRAME
CFI_STARTPROC simple;\ CFI_STARTPROC simple
CFI_SIGNAL_FRAME;\ CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, 3*4;\ CFI_DEF_CFA esp, 3*4
/*CFI_OFFSET cs, -2*4;*/\ /*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4 CFI_OFFSET eip, -3*4
.endm
#define RING0_EC_FRAME \ .macro RING0_EC_FRAME
CFI_STARTPROC simple;\ CFI_STARTPROC simple
CFI_SIGNAL_FRAME;\ CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, 4*4;\ CFI_DEF_CFA esp, 4*4
/*CFI_OFFSET cs, -2*4;*/\ /*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4 CFI_OFFSET eip, -3*4
.endm
#define RING0_PTREGS_FRAME \ .macro RING0_PTREGS_FRAME
CFI_STARTPROC simple;\ CFI_STARTPROC simple
CFI_SIGNAL_FRAME;\ CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ CFI_OFFSET eip, PT_EIP-PT_OLDESP
/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ CFI_OFFSET eax, PT_EAX-PT_OLDESP
CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ CFI_OFFSET ebp, PT_EBP-PT_OLDESP
CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ CFI_OFFSET edi, PT_EDI-PT_OLDESP
CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ CFI_OFFSET esi, PT_ESI-PT_OLDESP
CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ CFI_OFFSET edx, PT_EDX-PT_OLDESP
CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ CFI_OFFSET ecx, PT_ECX-PT_OLDESP
CFI_OFFSET ebx, PT_EBX-PT_OLDESP CFI_OFFSET ebx, PT_EBX-PT_OLDESP
.endm
ENTRY(ret_from_fork) ENTRY(ret_from_fork)
CFI_STARTPROC CFI_STARTPROC
...@@ -362,6 +463,7 @@ sysenter_exit: ...@@ -362,6 +463,7 @@ sysenter_exit:
xorl %ebp,%ebp xorl %ebp,%ebp
TRACE_IRQS_ON TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs 1: mov PT_FS(%esp), %fs
PTGS_TO_GS
ENABLE_INTERRUPTS_SYSEXIT ENABLE_INTERRUPTS_SYSEXIT
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
...@@ -410,6 +512,7 @@ sysexit_audit: ...@@ -410,6 +512,7 @@ sysexit_audit:
.align 4 .align 4
.long 1b,2b .long 1b,2b
.popsection .popsection
PTGS_TO_GS_EX
ENDPROC(ia32_sysenter_target) ENDPROC(ia32_sysenter_target)
# system call handler stub # system call handler stub
...@@ -452,8 +555,7 @@ restore_all: ...@@ -452,8 +555,7 @@ restore_all:
restore_nocheck: restore_nocheck:
TRACE_IRQS_IRET TRACE_IRQS_IRET
restore_nocheck_notrace: restore_nocheck_notrace:
RESTORE_REGS RESTORE_REGS 4 # skip orig_eax/error_code
addl $4, %esp # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4 CFI_ADJUST_CFA_OFFSET -4
irq_return: irq_return:
INTERRUPT_RETURN INTERRUPT_RETURN
...@@ -595,28 +697,50 @@ syscall_badsys: ...@@ -595,28 +697,50 @@ syscall_badsys:
END(syscall_badsys) END(syscall_badsys)
CFI_ENDPROC CFI_ENDPROC
#define FIXUP_ESPFIX_STACK \ /*
/* since we are on a wrong stack, we cant make it a C code :( */ \ * System calls that need a pt_regs pointer.
PER_CPU(gdt_page, %ebx); \ */
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ #define PTREGSCALL(name) \
addl %esp, %eax; \ ALIGN; \
pushl $__KERNEL_DS; \ ptregs_##name: \
CFI_ADJUST_CFA_OFFSET 4; \ leal 4(%esp),%eax; \
pushl %eax; \ jmp sys_##name;
CFI_ADJUST_CFA_OFFSET 4; \
lss (%esp), %esp; \ PTREGSCALL(iopl)
CFI_ADJUST_CFA_OFFSET -8; PTREGSCALL(fork)
#define UNWIND_ESPFIX_STACK \ PTREGSCALL(clone)
movl %ss, %eax; \ PTREGSCALL(vfork)
/* see if on espfix stack */ \ PTREGSCALL(execve)
cmpw $__ESPFIX_SS, %ax; \ PTREGSCALL(sigaltstack)
jne 27f; \ PTREGSCALL(sigreturn)
movl $__KERNEL_DS, %eax; \ PTREGSCALL(rt_sigreturn)
movl %eax, %ds; \ PTREGSCALL(vm86)
movl %eax, %es; \ PTREGSCALL(vm86old)
/* switch to normal stack */ \
FIXUP_ESPFIX_STACK; \ .macro FIXUP_ESPFIX_STACK
27:; /* since we are on a wrong stack, we cant make it a C code :( */
PER_CPU(gdt_page, %ebx)
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
addl %esp, %eax
pushl $__KERNEL_DS
CFI_ADJUST_CFA_OFFSET 4
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
lss (%esp), %esp
CFI_ADJUST_CFA_OFFSET -8
.endm
.macro UNWIND_ESPFIX_STACK
movl %ss, %eax
/* see if on espfix stack */
cmpw $__ESPFIX_SS, %ax
jne 27f
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %es
/* switch to normal stack */
FIXUP_ESPFIX_STACK
27:
.endm
/* /*
* Build the entry stubs and pointer table with some assembler magic. * Build the entry stubs and pointer table with some assembler magic.
...@@ -1070,7 +1194,10 @@ ENTRY(page_fault) ...@@ -1070,7 +1194,10 @@ ENTRY(page_fault)
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
ALIGN ALIGN
error_code: error_code:
/* the function address is in %fs's slot on the stack */ /* the function address is in %gs's slot on the stack */
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
pushl %es pushl %es
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0*/ /*CFI_REL_OFFSET es, 0*/
...@@ -1099,20 +1226,15 @@ error_code: ...@@ -1099,20 +1226,15 @@ error_code:
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0 CFI_REL_OFFSET ebx, 0
cld cld
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
movl $(__KERNEL_PERCPU), %ecx movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs movl %ecx, %fs
UNWIND_ESPFIX_STACK UNWIND_ESPFIX_STACK
popl %ecx GS_TO_REG %ecx
CFI_ADJUST_CFA_OFFSET -4 movl PT_GS(%esp), %edi # get the function address
/*CFI_REGISTER es, ecx*/
movl PT_FS(%esp), %edi # get the function address
movl PT_ORIG_EAX(%esp), %edx # get the error code movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
mov %ecx, PT_FS(%esp) REG_TO_PTGS %ecx
/*CFI_REL_OFFSET fs, ES*/ SET_KERNEL_GS %ecx
movl $(__USER_DS), %ecx movl $(__USER_DS), %ecx
movl %ecx, %ds movl %ecx, %ds
movl %ecx, %es movl %ecx, %es
...@@ -1136,26 +1258,27 @@ END(page_fault) ...@@ -1136,26 +1258,27 @@ END(page_fault)
* by hand onto the new stack - while updating the return eip past * by hand onto the new stack - while updating the return eip past
* the instruction that would have done it for sysenter. * the instruction that would have done it for sysenter.
*/ */
#define FIX_STACK(offset, ok, label) \ .macro FIX_STACK offset ok label
cmpw $__KERNEL_CS,4(%esp); \ cmpw $__KERNEL_CS, 4(%esp)
jne ok; \ jne \ok
label: \ \label:
movl TSS_sysenter_sp0+offset(%esp),%esp; \ movl TSS_sysenter_sp0 + \offset(%esp), %esp
CFI_DEF_CFA esp, 0; \ CFI_DEF_CFA esp, 0
CFI_UNDEFINED eip; \ CFI_UNDEFINED eip
pushfl; \ pushfl
CFI_ADJUST_CFA_OFFSET 4; \ CFI_ADJUST_CFA_OFFSET 4
pushl $__KERNEL_CS; \ pushl $__KERNEL_CS
CFI_ADJUST_CFA_OFFSET 4; \ CFI_ADJUST_CFA_OFFSET 4
pushl $sysenter_past_esp; \ pushl $sysenter_past_esp
CFI_ADJUST_CFA_OFFSET 4; \ CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eip, 0 CFI_REL_OFFSET eip, 0
.endm
ENTRY(debug) ENTRY(debug)
RING0_INT_FRAME RING0_INT_FRAME
cmpl $ia32_sysenter_target,(%esp) cmpl $ia32_sysenter_target,(%esp)
jne debug_stack_correct jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
debug_stack_correct: debug_stack_correct:
pushl $-1 # mark this as an int pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
...@@ -1213,7 +1336,7 @@ nmi_stack_correct: ...@@ -1213,7 +1336,7 @@ nmi_stack_correct:
nmi_stack_fixup: nmi_stack_fixup:
RING0_INT_FRAME RING0_INT_FRAME
FIX_STACK(12,nmi_stack_correct, 1) FIX_STACK 12, nmi_stack_correct, 1
jmp nmi_stack_correct jmp nmi_stack_correct
nmi_debug_stack_check: nmi_debug_stack_check:
...@@ -1224,7 +1347,7 @@ nmi_debug_stack_check: ...@@ -1224,7 +1347,7 @@ nmi_debug_stack_check:
jb nmi_stack_correct jb nmi_stack_correct
cmpl $debug_esp_fix_insn,(%esp) cmpl $debug_esp_fix_insn,(%esp)
ja nmi_stack_correct ja nmi_stack_correct
FIX_STACK(24,nmi_stack_correct, 1) FIX_STACK 24, nmi_stack_correct, 1
jmp nmi_stack_correct jmp nmi_stack_correct
nmi_espfix_stack: nmi_espfix_stack:
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/percpu.h>
/* Physical address */ /* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET) #define pa(X) ((X) - __PAGE_OFFSET)
...@@ -437,8 +438,26 @@ is386: movl $2,%ecx # set MP ...@@ -437,8 +438,26 @@ is386: movl $2,%ecx # set MP
movl $(__KERNEL_PERCPU), %eax movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu movl %eax,%fs # set this cpu's percpu
xorl %eax,%eax # Clear GS and LDT #ifdef CONFIG_CC_STACKPROTECTOR
/*
* The linker can't handle this by relocation. Manually set
* base address in stack canary segment descriptor.
*/
cmpb $0,ready
jne 1f
movl $per_cpu__gdt_page,%eax
movl $per_cpu__stack_canary,%ecx
subl $20, %ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
1:
#endif
movl $(__KERNEL_STACK_CANARY),%eax
movl %eax,%gs movl %eax,%gs
xorl %eax,%eax # Clear LDT
lldt %ax lldt %ax
cld # gcc2 wants the direction flag cleared at all times cld # gcc2 wants the direction flag cleared at all times
......
...@@ -205,19 +205,6 @@ ENTRY(secondary_startup_64) ...@@ -205,19 +205,6 @@ ENTRY(secondary_startup_64)
pushq $0 pushq $0
popfq popfq
#ifdef CONFIG_SMP
/*
* Fix up static pointers that need __per_cpu_load added. The assembler
* is unable to do this directly. This is only needed for the boot cpu.
* These values are set up with the correct base addresses by C code for
* secondary cpus.
*/
movq initial_gs(%rip), %rax
cmpl $0, per_cpu__cpu_number(%rax)
jne 1f
addq %rax, early_gdt_descr_base(%rip)
1:
#endif
/* /*
* We must switch to a new descriptor in kernel space for the GDT * We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace * because soon the kernel won't have access anymore to the userspace
...@@ -275,11 +262,7 @@ ENTRY(secondary_startup_64) ...@@ -275,11 +262,7 @@ ENTRY(secondary_startup_64)
ENTRY(initial_code) ENTRY(initial_code)
.quad x86_64_start_kernel .quad x86_64_start_kernel
ENTRY(initial_gs) ENTRY(initial_gs)
#ifdef CONFIG_SMP .quad INIT_PER_CPU_VAR(irq_stack_union)
.quad __per_cpu_load
#else
.quad PER_CPU_VAR(irq_stack_union)
#endif
__FINITDATA __FINITDATA
ENTRY(stack_start) ENTRY(stack_start)
...@@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt) ...@@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt)
early_gdt_descr: early_gdt_descr:
.word GDT_ENTRIES*8-1 .word GDT_ENTRIES*8-1
early_gdt_descr_base: early_gdt_descr_base:
.quad per_cpu__gdt_page .quad INIT_PER_CPU_VAR(gdt_page)
ENTRY(phys_base) ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */ /* This must match the first entry in level2_kernel_pgt */
......
...@@ -131,9 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) ...@@ -131,9 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
asmlinkage long sys_iopl(unsigned long regsp) long sys_iopl(struct pt_regs *regs)
{ {
struct pt_regs *regs = (struct pt_regs *)&regsp;
unsigned int level = regs->bx; unsigned int level = regs->bx;
struct thread_struct *t = &current->thread; struct thread_struct *t = &current->thread;
int rc; int rc;
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <stdarg.h> #include <stdarg.h>
#include <linux/stackprotector.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/sched.h> #include <linux/sched.h>
...@@ -91,6 +92,15 @@ void cpu_idle(void) ...@@ -91,6 +92,15 @@ void cpu_idle(void)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
/*
* If we're the non-boot CPU, nothing set the stack canary up
* for us. CPU0 already has it initialized but no harm in
* doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
current_thread_info()->status |= TS_POLLING; current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */ /* endless idle loop with no priority at all */
...@@ -131,7 +141,7 @@ void __show_regs(struct pt_regs *regs, int all) ...@@ -131,7 +141,7 @@ void __show_regs(struct pt_regs *regs, int all)
if (user_mode_vm(regs)) { if (user_mode_vm(regs)) {
sp = regs->sp; sp = regs->sp;
ss = regs->ss & 0xffff; ss = regs->ss & 0xffff;
savesegment(gs, gs); gs = get_user_gs(regs);
} else { } else {
sp = (unsigned long) (&regs->sp); sp = (unsigned long) (&regs->sp);
savesegment(ss, ss); savesegment(ss, ss);
...@@ -212,6 +222,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) ...@@ -212,6 +222,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.ds = __USER_DS; regs.ds = __USER_DS;
regs.es = __USER_DS; regs.es = __USER_DS;
regs.fs = __KERNEL_PERCPU; regs.fs = __KERNEL_PERCPU;
regs.gs = __KERNEL_STACK_CANARY;
regs.orig_ax = -1; regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper; regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl(); regs.cs = __KERNEL_CS | get_kernel_rpl();
...@@ -304,7 +315,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, ...@@ -304,7 +315,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
p->thread.ip = (unsigned long) ret_from_fork; p->thread.ip = (unsigned long) ret_from_fork;
savesegment(gs, p->thread.gs); task_user_gs(p) = get_user_gs(regs);
tsk = current; tsk = current;
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
...@@ -342,7 +353,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, ...@@ -342,7 +353,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
void void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{ {
__asm__("movl %0, %%gs" : : "r"(0)); set_user_gs(regs, 0);
regs->fs = 0; regs->fs = 0;
set_fs(USER_DS); set_fs(USER_DS);
regs->ds = __USER_DS; regs->ds = __USER_DS;
...@@ -539,7 +550,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -539,7 +550,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* used %fs or %gs (it does not today), or if the kernel is * used %fs or %gs (it does not today), or if the kernel is
* running inside of a hypervisor layer. * running inside of a hypervisor layer.
*/ */
savesegment(gs, prev->gs); lazy_save_gs(prev->gs);
/* /*
* Load the per-thread Thread-Local Storage descriptor. * Load the per-thread Thread-Local Storage descriptor.
...@@ -585,31 +596,31 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -585,31 +596,31 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Restore %gs if needed (which is common) * Restore %gs if needed (which is common)
*/ */
if (prev->gs | next->gs) if (prev->gs | next->gs)
loadsegment(gs, next->gs); lazy_load_gs(next->gs);
percpu_write(current_task, next_p); percpu_write(current_task, next_p);
return prev_p; return prev_p;
} }
asmlinkage int sys_fork(struct pt_regs regs) int sys_fork(struct pt_regs *regs)
{ {
return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL); return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
} }
asmlinkage int sys_clone(struct pt_regs regs) int sys_clone(struct pt_regs *regs)
{ {
unsigned long clone_flags; unsigned long clone_flags;
unsigned long newsp; unsigned long newsp;
int __user *parent_tidptr, *child_tidptr; int __user *parent_tidptr, *child_tidptr;
clone_flags = regs.bx; clone_flags = regs->bx;
newsp = regs.cx; newsp = regs->cx;
parent_tidptr = (int __user *)regs.dx; parent_tidptr = (int __user *)regs->dx;
child_tidptr = (int __user *)regs.di; child_tidptr = (int __user *)regs->di;
if (!newsp) if (!newsp)
newsp = regs.sp; newsp = regs->sp;
return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr); return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
} }
/* /*
...@@ -622,27 +633,27 @@ asmlinkage int sys_clone(struct pt_regs regs) ...@@ -622,27 +633,27 @@ asmlinkage int sys_clone(struct pt_regs regs)
* do not have enough call-clobbered registers to hold all * do not have enough call-clobbered registers to hold all
* the information you need. * the information you need.
*/ */
asmlinkage int sys_vfork(struct pt_regs regs) int sys_vfork(struct pt_regs *regs)
{ {
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL); return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
} }
/* /*
* sys_execve() executes a new program. * sys_execve() executes a new program.
*/ */
asmlinkage int sys_execve(struct pt_regs regs) int sys_execve(struct pt_regs *regs)
{ {
int error; int error;
char *filename; char *filename;
filename = getname((char __user *) regs.bx); filename = getname((char __user *) regs->bx);
error = PTR_ERR(filename); error = PTR_ERR(filename);
if (IS_ERR(filename)) if (IS_ERR(filename))
goto out; goto out;
error = do_execve(filename, error = do_execve(filename,
(char __user * __user *) regs.cx, (char __user * __user *) regs->cx,
(char __user * __user *) regs.dx, (char __user * __user *) regs->dx,
&regs); regs);
if (error == 0) { if (error == 0) {
/* Make sure we don't return using sysenter.. */ /* Make sure we don't return using sysenter.. */
set_thread_flag(TIF_IRET); set_thread_flag(TIF_IRET);
......
...@@ -120,12 +120,11 @@ void cpu_idle(void) ...@@ -120,12 +120,11 @@ void cpu_idle(void)
current_thread_info()->status |= TS_POLLING; current_thread_info()->status |= TS_POLLING;
/* /*
* If we're the non-boot CPU, nothing set the PDA stack * If we're the non-boot CPU, nothing set the stack canary up
* canary up for us - and if we are the boot CPU we have * for us. CPU0 already has it initialized but no harm in
* a 0 stack canary. This is a good place for updating * doing it again. This is a good place for updating it, as
* it, as we wont ever return from this function (so the * we wont ever return from this function (so the invalid
* invalid canaries already on the stack wont ever * canaries already on the stack wont ever trigger).
* trigger):
*/ */
boot_init_stack_canary(); boot_init_stack_canary();
......
...@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value) ...@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{ {
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
regno >>= 2; return &regs->bx + (regno >> 2);
if (regno > FS)
--regno;
return &regs->bx + regno;
} }
static u16 get_segment_reg(struct task_struct *task, unsigned long offset) static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
...@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) ...@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
if (offset != offsetof(struct user_regs_struct, gs)) if (offset != offsetof(struct user_regs_struct, gs))
retval = *pt_regs_access(task_pt_regs(task), offset); retval = *pt_regs_access(task_pt_regs(task), offset);
else { else {
retval = task->thread.gs;
if (task == current) if (task == current)
savesegment(gs, retval); retval = get_user_gs(task_pt_regs(task));
else
retval = task_user_gs(task);
} }
return retval; return retval;
} }
...@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task, ...@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task,
break; break;
case offsetof(struct user_regs_struct, gs): case offsetof(struct user_regs_struct, gs):
task->thread.gs = value;
if (task == current) if (task == current)
/* set_user_gs(task_pt_regs(task), value);
* The user-mode %gs is not affected by else
* kernel entry, so we must update the CPU. task_user_gs(task) = value;
*/
loadsegment(gs, value);
} }
return 0; return 0;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/cpumask.h> #include <asm/cpumask.h>
#include <asm/cpu.h> #include <asm/cpu.h>
#include <asm/stackprotector.h>
#ifdef CONFIG_DEBUG_PER_CPU_MAPS #ifdef CONFIG_DEBUG_PER_CPU_MAPS
# define DBG(x...) printk(KERN_DEBUG x) # define DBG(x...) printk(KERN_DEBUG x)
...@@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void) ...@@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void)
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu; per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu); setup_percpu_segment(cpu);
setup_stack_canary_segment(cpu);
/* /*
* Copy data used in early init routines from the * Copy data used in early init routines from the
* initial arrays to the per cpu data areas. These * initial arrays to the per cpu data areas. These
......
...@@ -50,27 +50,23 @@ ...@@ -50,27 +50,23 @@
# define FIX_EFLAGS __FIX_EFLAGS # define FIX_EFLAGS __FIX_EFLAGS
#endif #endif
#define COPY(x) { \ #define COPY(x) do { \
get_user_ex(regs->x, &sc->x); \ get_user_ex(regs->x, &sc->x); \
} } while (0)
#define COPY_SEG(seg) { \ #define GET_SEG(seg) ({ \
unsigned short tmp; \ unsigned short tmp; \
get_user_ex(tmp, &sc->seg); \ get_user_ex(tmp, &sc->seg); \
regs->seg = tmp; \ tmp; \
} })
#define COPY_SEG_CPL3(seg) { \ #define COPY_SEG(seg) do { \
unsigned short tmp; \ regs->seg = GET_SEG(seg); \
get_user_ex(tmp, &sc->seg); \ } while (0)
regs->seg = tmp | 3; \
}
#define GET_SEG(seg) { \ #define COPY_SEG_CPL3(seg) do { \
unsigned short tmp; \ regs->seg = GET_SEG(seg) | 3; \
get_user_ex(tmp, &sc->seg); \ } while (0)
loadsegment(seg, tmp); \
}
static int static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
...@@ -86,7 +82,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, ...@@ -86,7 +82,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
get_user_try { get_user_try {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
GET_SEG(gs); set_user_gs(regs, GET_SEG(gs));
COPY_SEG(fs); COPY_SEG(fs);
COPY_SEG(es); COPY_SEG(es);
COPY_SEG(ds); COPY_SEG(ds);
...@@ -138,12 +134,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, ...@@ -138,12 +134,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
put_user_try { put_user_try {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
{ put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
unsigned int tmp;
savesegment(gs, tmp);
put_user_ex(tmp, (unsigned int __user *)&sc->gs);
}
put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
put_user_ex(regs->es, (unsigned int __user *)&sc->es); put_user_ex(regs->es, (unsigned int __user *)&sc->es);
put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
...@@ -558,14 +549,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, ...@@ -558,14 +549,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
asmlinkage int sys_sigaltstack(unsigned long bx) int sys_sigaltstack(struct pt_regs *regs)
{ {
/* const stack_t __user *uss = (const stack_t __user *)regs->bx;
* This is needed to make gcc realize it doesn't own the
* "struct pt_regs"
*/
struct pt_regs *regs = (struct pt_regs *)&bx;
const stack_t __user *uss = (const stack_t __user *)bx;
stack_t __user *uoss = (stack_t __user *)regs->cx; stack_t __user *uoss = (stack_t __user *)regs->cx;
return do_sigaltstack(uss, uoss, regs->sp); return do_sigaltstack(uss, uoss, regs->sp);
...@@ -583,14 +569,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, ...@@ -583,14 +569,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
* Do a signal return; undo the signal stack. * Do a signal return; undo the signal stack.
*/ */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
asmlinkage unsigned long sys_sigreturn(unsigned long __unused) unsigned long sys_sigreturn(struct pt_regs *regs)
{ {
struct sigframe __user *frame; struct sigframe __user *frame;
struct pt_regs *regs;
unsigned long ax; unsigned long ax;
sigset_t set; sigset_t set;
regs = (struct pt_regs *) &__unused;
frame = (struct sigframe __user *)(regs->sp - 8); frame = (struct sigframe __user *)(regs->sp - 8);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
...@@ -617,7 +601,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused) ...@@ -617,7 +601,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
} }
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
static long do_rt_sigreturn(struct pt_regs *regs) long sys_rt_sigreturn(struct pt_regs *regs)
{ {
struct rt_sigframe __user *frame; struct rt_sigframe __user *frame;
unsigned long ax; unsigned long ax;
...@@ -648,25 +632,6 @@ static long do_rt_sigreturn(struct pt_regs *regs) ...@@ -648,25 +632,6 @@ static long do_rt_sigreturn(struct pt_regs *regs)
return 0; return 0;
} }
#ifdef CONFIG_X86_32
/*
* Note: do not pass in pt_regs directly as with tail-call optimization
* GCC will incorrectly stomp on the caller's frame and corrupt user-space
* register state:
*/
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
{
struct pt_regs *regs = (struct pt_regs *)&__unused;
return do_rt_sigreturn(regs);
}
#else /* !CONFIG_X86_32 */
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
{
return do_rt_sigreturn(regs);
}
#endif /* CONFIG_X86_32 */
/* /*
* OK, we're invoking a handler: * OK, we're invoking a handler:
*/ */
......
ENTRY(sys_call_table) ENTRY(sys_call_table)
.long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
.long sys_exit .long sys_exit
.long sys_fork .long ptregs_fork
.long sys_read .long sys_read
.long sys_write .long sys_write
.long sys_open /* 5 */ .long sys_open /* 5 */
...@@ -10,7 +10,7 @@ ENTRY(sys_call_table) ...@@ -10,7 +10,7 @@ ENTRY(sys_call_table)
.long sys_creat .long sys_creat
.long sys_link .long sys_link
.long sys_unlink /* 10 */ .long sys_unlink /* 10 */
.long sys_execve .long ptregs_execve
.long sys_chdir .long sys_chdir
.long sys_time .long sys_time
.long sys_mknod .long sys_mknod
...@@ -109,17 +109,17 @@ ENTRY(sys_call_table) ...@@ -109,17 +109,17 @@ ENTRY(sys_call_table)
.long sys_newlstat .long sys_newlstat
.long sys_newfstat .long sys_newfstat
.long sys_uname .long sys_uname
.long sys_iopl /* 110 */ .long ptregs_iopl /* 110 */
.long sys_vhangup .long sys_vhangup
.long sys_ni_syscall /* old "idle" system call */ .long sys_ni_syscall /* old "idle" system call */
.long sys_vm86old .long ptregs_vm86old
.long sys_wait4 .long sys_wait4
.long sys_swapoff /* 115 */ .long sys_swapoff /* 115 */
.long sys_sysinfo .long sys_sysinfo
.long sys_ipc .long sys_ipc
.long sys_fsync .long sys_fsync
.long sys_sigreturn .long ptregs_sigreturn
.long sys_clone /* 120 */ .long ptregs_clone /* 120 */
.long sys_setdomainname .long sys_setdomainname
.long sys_newuname .long sys_newuname
.long sys_modify_ldt .long sys_modify_ldt
...@@ -165,14 +165,14 @@ ENTRY(sys_call_table) ...@@ -165,14 +165,14 @@ ENTRY(sys_call_table)
.long sys_mremap .long sys_mremap
.long sys_setresuid16 .long sys_setresuid16
.long sys_getresuid16 /* 165 */ .long sys_getresuid16 /* 165 */
.long sys_vm86 .long ptregs_vm86
.long sys_ni_syscall /* Old sys_query_module */ .long sys_ni_syscall /* Old sys_query_module */
.long sys_poll .long sys_poll
.long sys_nfsservctl .long sys_nfsservctl
.long sys_setresgid16 /* 170 */ .long sys_setresgid16 /* 170 */
.long sys_getresgid16 .long sys_getresgid16
.long sys_prctl .long sys_prctl
.long sys_rt_sigreturn .long ptregs_rt_sigreturn
.long sys_rt_sigaction .long sys_rt_sigaction
.long sys_rt_sigprocmask /* 175 */ .long sys_rt_sigprocmask /* 175 */
.long sys_rt_sigpending .long sys_rt_sigpending
...@@ -185,11 +185,11 @@ ENTRY(sys_call_table) ...@@ -185,11 +185,11 @@ ENTRY(sys_call_table)
.long sys_getcwd .long sys_getcwd
.long sys_capget .long sys_capget
.long sys_capset /* 185 */ .long sys_capset /* 185 */
.long sys_sigaltstack .long ptregs_sigaltstack
.long sys_sendfile .long sys_sendfile
.long sys_ni_syscall /* reserved for streams1 */ .long sys_ni_syscall /* reserved for streams1 */
.long sys_ni_syscall /* reserved for streams2 */ .long sys_ni_syscall /* reserved for streams2 */
.long sys_vfork /* 190 */ .long ptregs_vfork /* 190 */
.long sys_getrlimit .long sys_getrlimit
.long sys_mmap2 .long sys_mmap2
.long sys_truncate64 .long sys_truncate64
......
...@@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info) ...@@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info)
} }
#endif /* CONFIG_MATH_EMULATION */ #endif /* CONFIG_MATH_EMULATION */
dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code)
{ {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
if (read_cr0() & X86_CR0_EM) { if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { }; struct math_emu_info info = { };
conditional_sti(&regs); conditional_sti(regs);
info.regs = &regs; info.regs = regs;
math_emulate(&info); math_emulate(&info);
} else { } else {
math_state_restore(); /* interrupts still off */ math_state_restore(); /* interrupts still off */
conditional_sti(&regs); conditional_sti(regs);
} }
#else #else
math_state_restore(); math_state_restore();
......
...@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) ...@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
ret = KVM86->regs32; ret = KVM86->regs32;
ret->fs = current->thread.saved_fs; ret->fs = current->thread.saved_fs;
loadsegment(gs, current->thread.saved_gs); set_user_gs(ret, current->thread.saved_gs);
return ret; return ret;
} }
...@@ -197,9 +197,9 @@ static void mark_screen_rdonly(struct mm_struct *mm) ...@@ -197,9 +197,9 @@ static void mark_screen_rdonly(struct mm_struct *mm)
static int do_vm86_irq_handling(int subfunction, int irqnumber); static int do_vm86_irq_handling(int subfunction, int irqnumber);
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
asmlinkage int sys_vm86old(struct pt_regs regs) int sys_vm86old(struct pt_regs *regs)
{ {
struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
struct kernel_vm86_struct info; /* declare this _on top_, struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space. * this avoids wasting of stack space.
* This remains on the stack until we * This remains on the stack until we
...@@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs) ...@@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
if (tmp) if (tmp)
goto out; goto out;
memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
info.regs32 = &regs; info.regs32 = regs;
tsk->thread.vm86_info = v86; tsk->thread.vm86_info = v86;
do_sys_vm86(&info, tsk); do_sys_vm86(&info, tsk);
ret = 0; /* we never return here */ ret = 0; /* we never return here */
...@@ -227,7 +227,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs) ...@@ -227,7 +227,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
} }
asmlinkage int sys_vm86(struct pt_regs regs) int sys_vm86(struct pt_regs *regs)
{ {
struct kernel_vm86_struct info; /* declare this _on top_, struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space. * this avoids wasting of stack space.
...@@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs) ...@@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs)
struct vm86plus_struct __user *v86; struct vm86plus_struct __user *v86;
tsk = current; tsk = current;
switch (regs.bx) { switch (regs->bx) {
case VM86_REQUEST_IRQ: case VM86_REQUEST_IRQ:
case VM86_FREE_IRQ: case VM86_FREE_IRQ:
case VM86_GET_IRQ_BITS: case VM86_GET_IRQ_BITS:
case VM86_GET_AND_RESET_IRQ: case VM86_GET_AND_RESET_IRQ:
ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
goto out; goto out;
case VM86_PLUS_INSTALL_CHECK: case VM86_PLUS_INSTALL_CHECK:
/* /*
...@@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs) ...@@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs)
ret = -EPERM; ret = -EPERM;
if (tsk->thread.saved_sp0) if (tsk->thread.saved_sp0)
goto out; goto out;
v86 = (struct vm86plus_struct __user *)regs.cx; v86 = (struct vm86plus_struct __user *)regs->cx;
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
offsetof(struct kernel_vm86_struct, regs32) - offsetof(struct kernel_vm86_struct, regs32) -
sizeof(info.regs)); sizeof(info.regs));
ret = -EFAULT; ret = -EFAULT;
if (tmp) if (tmp)
goto out; goto out;
info.regs32 = &regs; info.regs32 = regs;
info.vm86plus.is_vm86pus = 1; info.vm86plus.is_vm86pus = 1;
tsk->thread.vm86_info = (struct vm86_struct __user *)v86; tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
do_sys_vm86(&info, tsk); do_sys_vm86(&info, tsk);
...@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk ...@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
info->regs32->ax = 0; info->regs32->ax = 0;
tsk->thread.saved_sp0 = tsk->thread.sp0; tsk->thread.saved_sp0 = tsk->thread.sp0;
tsk->thread.saved_fs = info->regs32->fs; tsk->thread.saved_fs = info->regs32->fs;
savesegment(gs, tsk->thread.saved_gs); tsk->thread.saved_gs = get_user_gs(info->regs32);
tss = &per_cpu(init_tss, get_cpu()); tss = &per_cpu(init_tss, get_cpu());
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
......
...@@ -257,6 +257,14 @@ SECTIONS ...@@ -257,6 +257,14 @@ SECTIONS
DWARF_DEBUG DWARF_DEBUG
} }
/*
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union);
/* /*
* Build-time check on the image size: * Build-time check on the image size:
*/ */
......
...@@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) ...@@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
/* There's one problem which normal hardware doesn't have: the Host /* There's one problem which normal hardware doesn't have: the Host
* can't handle us removing entries we're currently using. So we clear * can't handle us removing entries we're currently using. So we clear
* the GS register here: if it's needed it'll be reloaded anyway. */ * the GS register here: if it's needed it'll be reloaded anyway. */
loadsegment(gs, 0); lazy_load_gs(0);
lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
} }
......
...@@ -150,11 +150,9 @@ static long pm_address(u_char FPU_modrm, u_char segment, ...@@ -150,11 +150,9 @@ static long pm_address(u_char FPU_modrm, u_char segment,
#endif /* PARANOID */ #endif /* PARANOID */
switch (segment) { switch (segment) {
/* gs isn't used by the kernel, so it still has its
user-space value. */
case PREFIX_GS_ - 1: case PREFIX_GS_ - 1:
/* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ /* user gs handling can be lazy, use special accessors */
savesegment(gs, addr->selector); addr->selector = get_user_gs(FPU_info->regs);
break; break;
default: default:
addr->selector = PM_REG_(segment); addr->selector = PM_REG_(segment);
......
...@@ -702,7 +702,7 @@ void __cpuinit numa_set_node(int cpu, int node) ...@@ -702,7 +702,7 @@ void __cpuinit numa_set_node(int cpu, int node)
} }
#ifdef CONFIG_DEBUG_PER_CPU_MAPS #ifdef CONFIG_DEBUG_PER_CPU_MAPS
if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
dump_stack(); dump_stack();
return; return;
...@@ -790,7 +790,7 @@ int early_cpu_to_node(int cpu) ...@@ -790,7 +790,7 @@ int early_cpu_to_node(int cpu)
if (early_per_cpu_ptr(x86_cpu_to_node_map)) if (early_per_cpu_ptr(x86_cpu_to_node_map))
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
if (!per_cpu_offset(cpu)) { if (!cpu_possible(cpu)) {
printk(KERN_WARNING printk(KERN_WARNING
"early_cpu_to_node(%d): no per_cpu area!\n", cpu); "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
dump_stack(); dump_stack();
......
...@@ -38,7 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE ...@@ -38,7 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy) $(call if_changed,objcopy)
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
$(filter -g%,$(KBUILD_CFLAGS)) $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector)
$(vobjs): KBUILD_CFLAGS += $(CFL) $(vobjs): KBUILD_CFLAGS += $(CFL)
......
...@@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t, ...@@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t,
static void xen_load_tls(struct thread_struct *t, unsigned int cpu) static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
{ {
/* /*
* XXX sleazy hack: If we're being called in a lazy-cpu zone, * XXX sleazy hack: If we're being called in a lazy-cpu zone
* it means we're in a context switch, and %gs has just been * and lazy gs handling is enabled, it means we're in a
* saved. This means we can zero it out to prevent faults on * context switch, and %gs has just been saved. This means we
* exit from the hypervisor if the next process has no %gs. * can zero it out to prevent faults on exit from the
* Either way, it has been saved, and the new value will get * hypervisor if the next process has no %gs. Either way, it
* loaded properly. This will go away as soon as Xen has been * has been saved, and the new value will get loaded properly.
* modified to not save/restore %gs for normal hypercalls. * This will go away as soon as Xen has been modified to not
* save/restore %gs for normal hypercalls.
* *
* On x86_64, this hack is not used for %gs, because gs points * On x86_64, this hack is not used for %gs, because gs points
* to KERNEL_GS_BASE (and uses it for PDA references), so we * to KERNEL_GS_BASE (and uses it for PDA references), so we
...@@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) ...@@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
*/ */
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
loadsegment(gs, 0); lazy_load_gs(0);
#else #else
loadsegment(fs, 0); loadsegment(fs, 0);
#endif #endif
......
/* /*
Asm versions of Xen pv-ops, suitable for either direct use or inlining. * Asm versions of Xen pv-ops, suitable for either direct use or
The inline versions are the same as the direct-use versions, with the * inlining. The inline versions are the same as the direct-use
pre- and post-amble chopped off. * versions, with the pre- and post-amble chopped off.
*
This code is encoded for size rather than absolute efficiency, * This code is encoded for size rather than absolute efficiency, with
with a view to being able to inline as much as possible. * a view to being able to inline as much as possible.
*
We only bother with direct forms (ie, vcpu in percpu data) of * We only bother with direct forms (ie, vcpu in percpu data) of the
the operations here; the indirect forms are better handled in * operations here; the indirect forms are better handled in C, since
C, since they're generally too large to inline anyway. * they're generally too large to inline anyway.
*/ */
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
...@@ -18,17 +18,19 @@ ...@@ -18,17 +18,19 @@
#include "xen-asm.h" #include "xen-asm.h"
/* /*
Enable events. This clears the event mask and tests the pending * Enable events. This clears the event mask and tests the pending
event status with one and operation. If there are pending * event status with one and operation. If there are pending events,
events, then enter the hypervisor to get them handled. * then enter the hypervisor to get them handled.
*/ */
ENTRY(xen_irq_enable_direct) ENTRY(xen_irq_enable_direct)
/* Unmask events */ /* Unmask events */
movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/* Preempt here doesn't matter because that will deal with /*
any pending interrupts. The pending check may end up being * Preempt here doesn't matter because that will deal with any
run on the wrong CPU, but that doesn't hurt. */ * pending interrupts. The pending check may end up being run
* on the wrong CPU, but that doesn't hurt.
*/
/* Test for pending */ /* Test for pending */
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
...@@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct) ...@@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
/* /*
Disabling events is simply a matter of making the event mask * Disabling events is simply a matter of making the event mask
non-zero. * non-zero.
*/ */
ENTRY(xen_irq_disable_direct) ENTRY(xen_irq_disable_direct)
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
...@@ -54,18 +56,18 @@ ENDPATCH(xen_irq_disable_direct) ...@@ -54,18 +56,18 @@ ENDPATCH(xen_irq_disable_direct)
RELOC(xen_irq_disable_direct, 0) RELOC(xen_irq_disable_direct, 0)
/* /*
(xen_)save_fl is used to get the current interrupt enable status. * (xen_)save_fl is used to get the current interrupt enable status.
Callers expect the status to be in X86_EFLAGS_IF, and other bits * Callers expect the status to be in X86_EFLAGS_IF, and other bits
may be set in the return value. We take advantage of this by * may be set in the return value. We take advantage of this by
making sure that X86_EFLAGS_IF has the right value (and other bits * making sure that X86_EFLAGS_IF has the right value (and other bits
in that byte are 0), but other bits in the return value are * in that byte are 0), but other bits in the return value are
undefined. We need to toggle the state of the bit, because * undefined. We need to toggle the state of the bit, because Xen and
Xen and x86 use opposite senses (mask vs enable). * x86 use opposite senses (mask vs enable).
*/ */
ENTRY(xen_save_fl_direct) ENTRY(xen_save_fl_direct)
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
setz %ah setz %ah
addb %ah,%ah addb %ah, %ah
ENDPATCH(xen_save_fl_direct) ENDPATCH(xen_save_fl_direct)
ret ret
ENDPROC(xen_save_fl_direct) ENDPROC(xen_save_fl_direct)
...@@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct) ...@@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct)
/* /*
In principle the caller should be passing us a value return * In principle the caller should be passing us a value return from
from xen_save_fl_direct, but for robustness sake we test only * xen_save_fl_direct, but for robustness sake we test only the
the X86_EFLAGS_IF flag rather than the whole byte. After * X86_EFLAGS_IF flag rather than the whole byte. After setting the
setting the interrupt mask state, it checks for unmasked * interrupt mask state, it checks for unmasked pending events and
pending events and enters the hypervisor to get them delivered * enters the hypervisor to get them delivered if so.
if so.
*/ */
ENTRY(xen_restore_fl_direct) ENTRY(xen_restore_fl_direct)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct) ...@@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct)
testb $X86_EFLAGS_IF>>8, %ah testb $X86_EFLAGS_IF>>8, %ah
#endif #endif
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/* Preempt here doesn't matter because that will deal with /*
any pending interrupts. The pending check may end up being * Preempt here doesn't matter because that will deal with any
run on the wrong CPU, but that doesn't hurt. */ * pending interrupts. The pending check may end up being run
* on the wrong CPU, but that doesn't hurt.
*/
/* check for unmasked and pending */ /* check for unmasked and pending */
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
...@@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct) ...@@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct)
/* /*
Force an event check by making a hypercall, * Force an event check by making a hypercall, but preserve regs
but preserve regs before making the call. * before making the call.
*/ */
check_events: check_events:
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
...@@ -137,4 +140,3 @@ check_events: ...@@ -137,4 +140,3 @@ check_events:
pop %rax pop %rax
#endif #endif
ret ret
/* /*
Asm versions of Xen pv-ops, suitable for either direct use or inlining. * Asm versions of Xen pv-ops, suitable for either direct use or
The inline versions are the same as the direct-use versions, with the * inlining. The inline versions are the same as the direct-use
pre- and post-amble chopped off. * versions, with the pre- and post-amble chopped off.
*
This code is encoded for size rather than absolute efficiency, * This code is encoded for size rather than absolute efficiency, with
with a view to being able to inline as much as possible. * a view to being able to inline as much as possible.
*
We only bother with direct forms (ie, vcpu in pda) of the operations * We only bother with direct forms (ie, vcpu in pda) of the
here; the indirect forms are better handled in C, since they're * operations here; the indirect forms are better handled in C, since
generally too large to inline anyway. * they're generally too large to inline anyway.
*/ */
//#include <asm/asm-offsets.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/segment.h> #include <asm/segment.h>
...@@ -21,8 +20,8 @@ ...@@ -21,8 +20,8 @@
#include "xen-asm.h" #include "xen-asm.h"
/* /*
Force an event check by making a hypercall, * Force an event check by making a hypercall, but preserve regs
but preserve regs before making the call. * before making the call.
*/ */
check_events: check_events:
push %eax push %eax
...@@ -35,10 +34,10 @@ check_events: ...@@ -35,10 +34,10 @@ check_events:
ret ret
/* /*
We can't use sysexit directly, because we're not running in ring0. * We can't use sysexit directly, because we're not running in ring0.
But we can easily fake it up using iret. Assuming xen_sysexit * But we can easily fake it up using iret. Assuming xen_sysexit is
is jumped to with a standard stack frame, we can just strip it * jumped to with a standard stack frame, we can just strip it back to
back to a standard iret frame and use iret. * a standard iret frame and use iret.
*/ */
ENTRY(xen_sysexit) ENTRY(xen_sysexit)
movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
...@@ -49,33 +48,31 @@ ENTRY(xen_sysexit) ...@@ -49,33 +48,31 @@ ENTRY(xen_sysexit)
ENDPROC(xen_sysexit) ENDPROC(xen_sysexit)
/* /*
This is run where a normal iret would be run, with the same stack setup: * This is run where a normal iret would be run, with the same stack setup:
8: eflags * 8: eflags
4: cs * 4: cs
esp-> 0: eip * esp-> 0: eip
*
This attempts to make sure that any pending events are dealt * This attempts to make sure that any pending events are dealt with
with on return to usermode, but there is a small window in * on return to usermode, but there is a small window in which an
which an event can happen just before entering usermode. If * event can happen just before entering usermode. If the nested
the nested interrupt ends up setting one of the TIF_WORK_MASK * interrupt ends up setting one of the TIF_WORK_MASK pending work
pending work flags, they will not be tested again before * flags, they will not be tested again before returning to
returning to usermode. This means that a process can end up * usermode. This means that a process can end up with pending work,
with pending work, which will be unprocessed until the process * which will be unprocessed until the process enters and leaves the
enters and leaves the kernel again, which could be an * kernel again, which could be an unbounded amount of time. This
unbounded amount of time. This means that a pending signal or * means that a pending signal or reschedule event could be
reschedule event could be indefinitely delayed. * indefinitely delayed.
*
The fix is to notice a nested interrupt in the critical * The fix is to notice a nested interrupt in the critical window, and
window, and if one occurs, then fold the nested interrupt into * if one occurs, then fold the nested interrupt into the current
the current interrupt stack frame, and re-process it * interrupt stack frame, and re-process it iteratively rather than
iteratively rather than recursively. This means that it will * recursively. This means that it will exit via the normal path, and
exit via the normal path, and all pending work will be dealt * all pending work will be dealt with appropriately.
with appropriately. *
* Because the nested interrupt handler needs to deal with the current
Because the nested interrupt handler needs to deal with the * stack state in whatever form its in, we keep things simple by only
current stack state in whatever form its in, we keep things * using a single register which is pushed/popped on the stack.
simple by only using a single register which is pushed/popped
on the stack.
*/ */
ENTRY(xen_iret) ENTRY(xen_iret)
/* test eflags for special cases */ /* test eflags for special cases */
...@@ -85,13 +82,15 @@ ENTRY(xen_iret) ...@@ -85,13 +82,15 @@ ENTRY(xen_iret)
push %eax push %eax
ESP_OFFSET=4 # bytes pushed onto stack ESP_OFFSET=4 # bytes pushed onto stack
/* Store vcpu_info pointer for easy access. Do it this /*
way to avoid having to reload %fs */ * Store vcpu_info pointer for easy access. Do it this way to
* avoid having to reload %fs
*/
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
GET_THREAD_INFO(%eax) GET_THREAD_INFO(%eax)
movl TI_cpu(%eax),%eax movl TI_cpu(%eax), %eax
movl __per_cpu_offset(,%eax,4),%eax movl __per_cpu_offset(,%eax,4), %eax
mov per_cpu__xen_vcpu(%eax),%eax mov per_cpu__xen_vcpu(%eax), %eax
#else #else
movl per_cpu__xen_vcpu, %eax movl per_cpu__xen_vcpu, %eax
#endif #endif
...@@ -99,37 +98,46 @@ ENTRY(xen_iret) ...@@ -99,37 +98,46 @@ ENTRY(xen_iret)
/* check IF state we're restoring */ /* check IF state we're restoring */
testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
/* Maybe enable events. Once this happens we could get a /*
recursive event, so the critical region starts immediately * Maybe enable events. Once this happens we could get a
afterwards. However, if that happens we don't end up * recursive event, so the critical region starts immediately
resuming the code, so we don't have to be worried about * afterwards. However, if that happens we don't end up
being preempted to another CPU. */ * resuming the code, so we don't have to be worried about
* being preempted to another CPU.
*/
setz XEN_vcpu_info_mask(%eax) setz XEN_vcpu_info_mask(%eax)
xen_iret_start_crit: xen_iret_start_crit:
/* check for unmasked and pending */ /* check for unmasked and pending */
cmpw $0x0001, XEN_vcpu_info_pending(%eax) cmpw $0x0001, XEN_vcpu_info_pending(%eax)
/* If there's something pending, mask events again so we /*
can jump back into xen_hypervisor_callback */ * If there's something pending, mask events again so we can
* jump back into xen_hypervisor_callback
*/
sete XEN_vcpu_info_mask(%eax) sete XEN_vcpu_info_mask(%eax)
popl %eax popl %eax
/* From this point on the registers are restored and the stack /*
updated, so we don't need to worry about it if we're preempted */ * From this point on the registers are restored and the stack
* updated, so we don't need to worry about it if we're
* preempted
*/
iret_restore_end: iret_restore_end:
/* Jump to hypervisor_callback after fixing up the stack. /*
Events are masked, so jumping out of the critical * Jump to hypervisor_callback after fixing up the stack.
region is OK. */ * Events are masked, so jumping out of the critical region is
* OK.
*/
je xen_hypervisor_callback je xen_hypervisor_callback
1: iret 1: iret
xen_iret_end_crit: xen_iret_end_crit:
.section __ex_table,"a" .section __ex_table, "a"
.align 4 .align 4
.long 1b,iret_exc .long 1b, iret_exc
.previous .previous
hyper_iret: hyper_iret:
...@@ -139,55 +147,55 @@ hyper_iret: ...@@ -139,55 +147,55 @@ hyper_iret:
.globl xen_iret_start_crit, xen_iret_end_crit .globl xen_iret_start_crit, xen_iret_end_crit
/* /*
This is called by xen_hypervisor_callback in entry.S when it sees * This is called by xen_hypervisor_callback in entry.S when it sees
that the EIP at the time of interrupt was between xen_iret_start_crit * that the EIP at the time of interrupt was between
and xen_iret_end_crit. We're passed the EIP in %eax so we can do * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in
a more refined determination of what to do. * %eax so we can do a more refined determination of what to do.
*
The stack format at this point is: * The stack format at this point is:
---------------- * ----------------
ss : (ss/esp may be present if we came from usermode) * ss : (ss/esp may be present if we came from usermode)
esp : * esp :
eflags } outer exception info * eflags } outer exception info
cs } * cs }
eip } * eip }
---------------- <- edi (copy dest) * ---------------- <- edi (copy dest)
eax : outer eax if it hasn't been restored * eax : outer eax if it hasn't been restored
---------------- * ----------------
eflags } nested exception info * eflags } nested exception info
cs } (no ss/esp because we're nested * cs } (no ss/esp because we're nested
eip } from the same ring) * eip } from the same ring)
orig_eax }<- esi (copy src) * orig_eax }<- esi (copy src)
- - - - - - - - * - - - - - - - -
fs } * fs }
es } * es }
ds } SAVE_ALL state * ds } SAVE_ALL state
eax } * eax }
: : * : :
ebx }<- esp * ebx }<- esp
---------------- * ----------------
*
In order to deliver the nested exception properly, we need to shift * In order to deliver the nested exception properly, we need to shift
everything from the return addr up to the error code so it * everything from the return addr up to the error code so it sits
sits just under the outer exception info. This means that when we * just under the outer exception info. This means that when we
handle the exception, we do it in the context of the outer exception * handle the exception, we do it in the context of the outer
rather than starting a new one. * exception rather than starting a new one.
*
The only caveat is that if the outer eax hasn't been * The only caveat is that if the outer eax hasn't been restored yet
restored yet (ie, it's still on stack), we need to insert * (ie, it's still on stack), we need to insert its value into the
its value into the SAVE_ALL state before going on, since * SAVE_ALL state before going on, since it's usermode state which we
it's usermode state which we eventually need to restore. * eventually need to restore.
*/ */
ENTRY(xen_iret_crit_fixup) ENTRY(xen_iret_crit_fixup)
/* /*
Paranoia: Make sure we're really coming from kernel space. * Paranoia: Make sure we're really coming from kernel space.
One could imagine a case where userspace jumps into the * One could imagine a case where userspace jumps into the
critical range address, but just before the CPU delivers a GP, * critical range address, but just before the CPU delivers a
it decides to deliver an interrupt instead. Unlikely? * GP, it decides to deliver an interrupt instead. Unlikely?
Definitely. Easy to avoid? Yes. The Intel documents * Definitely. Easy to avoid? Yes. The Intel documents
explicitly say that the reported EIP for a bad jump is the * explicitly say that the reported EIP for a bad jump is the
jump instruction itself, not the destination, but some virtual * jump instruction itself, not the destination, but some
environments get this wrong. * virtual environments get this wrong.
*/ */
movl PT_CS(%esp), %ecx movl PT_CS(%esp), %ecx
andl $SEGMENT_RPL_MASK, %ecx andl $SEGMENT_RPL_MASK, %ecx
...@@ -197,15 +205,17 @@ ENTRY(xen_iret_crit_fixup) ...@@ -197,15 +205,17 @@ ENTRY(xen_iret_crit_fixup)
lea PT_ORIG_EAX(%esp), %esi lea PT_ORIG_EAX(%esp), %esi
lea PT_EFLAGS(%esp), %edi lea PT_EFLAGS(%esp), %edi
/* If eip is before iret_restore_end then stack /*
hasn't been restored yet. */ * If eip is before iret_restore_end then stack
* hasn't been restored yet.
*/
cmp $iret_restore_end, %eax cmp $iret_restore_end, %eax
jae 1f jae 1f
movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */
movl %eax, PT_EAX(%esp) movl %eax, PT_EAX(%esp)
lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */
/* set up the copy */ /* set up the copy */
1: std 1: std
...@@ -213,6 +223,6 @@ ENTRY(xen_iret_crit_fixup) ...@@ -213,6 +223,6 @@ ENTRY(xen_iret_crit_fixup)
rep movsl rep movsl
cld cld
lea 4(%edi),%esp /* point esp to new frame */ lea 4(%edi), %esp /* point esp to new frame */
2: jmp xen_do_upcall 2: jmp xen_do_upcall
/* /*
Asm versions of Xen pv-ops, suitable for either direct use or inlining. * Asm versions of Xen pv-ops, suitable for either direct use or
The inline versions are the same as the direct-use versions, with the * inlining. The inline versions are the same as the direct-use
pre- and post-amble chopped off. * versions, with the pre- and post-amble chopped off.
*
This code is encoded for size rather than absolute efficiency, * This code is encoded for size rather than absolute efficiency, with
with a view to being able to inline as much as possible. * a view to being able to inline as much as possible.
*
We only bother with direct forms (ie, vcpu in pda) of the operations * We only bother with direct forms (ie, vcpu in pda) of the
here; the indirect forms are better handled in C, since they're * operations here; the indirect forms are better handled in C, since
generally too large to inline anyway. * they're generally too large to inline anyway.
*/ */
#include <asm/errno.h> #include <asm/errno.h>
...@@ -21,25 +21,25 @@ ...@@ -21,25 +21,25 @@
#include "xen-asm.h" #include "xen-asm.h"
ENTRY(xen_adjust_exception_frame) ENTRY(xen_adjust_exception_frame)
mov 8+0(%rsp),%rcx mov 8+0(%rsp), %rcx
mov 8+8(%rsp),%r11 mov 8+8(%rsp), %r11
ret $16 ret $16
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/* /*
Xen64 iret frame: * Xen64 iret frame:
*
ss * ss
rsp * rsp
rflags * rflags
cs * cs
rip <-- standard iret frame * rip <-- standard iret frame
*
flags * flags
*
rcx } * rcx }
r11 }<-- pushed by hypercall page * r11 }<-- pushed by hypercall page
rsp -> rax } * rsp->rax }
*/ */
ENTRY(xen_iret) ENTRY(xen_iret)
pushq $0 pushq $0
...@@ -48,8 +48,8 @@ ENDPATCH(xen_iret) ...@@ -48,8 +48,8 @@ ENDPATCH(xen_iret)
RELOC(xen_iret, 1b+1) RELOC(xen_iret, 1b+1)
/* /*
sysexit is not used for 64-bit processes, so it's * sysexit is not used for 64-bit processes, so it's only ever used to
only ever used to return to 32-bit compat userspace. * return to 32-bit compat userspace.
*/ */
ENTRY(xen_sysexit) ENTRY(xen_sysexit)
pushq $__USER32_DS pushq $__USER32_DS
...@@ -64,10 +64,12 @@ ENDPATCH(xen_sysexit) ...@@ -64,10 +64,12 @@ ENDPATCH(xen_sysexit)
RELOC(xen_sysexit, 1b+1) RELOC(xen_sysexit, 1b+1)
ENTRY(xen_sysret64) ENTRY(xen_sysret64)
/* We're already on the usermode stack at this point, but still /*
with the kernel gs, so we can easily switch back */ * We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(old_rsp) movq %rsp, PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp movq PER_CPU_VAR(kernel_stack), %rsp
pushq $__USER_DS pushq $__USER_DS
pushq PER_CPU_VAR(old_rsp) pushq PER_CPU_VAR(old_rsp)
...@@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64) ...@@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64)
RELOC(xen_sysret64, 1b+1) RELOC(xen_sysret64, 1b+1)
ENTRY(xen_sysret32) ENTRY(xen_sysret32)
/* We're already on the usermode stack at this point, but still /*
with the kernel gs, so we can easily switch back */ * We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(old_rsp) movq %rsp, PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack), %rsp movq PER_CPU_VAR(kernel_stack), %rsp
...@@ -98,28 +102,27 @@ ENDPATCH(xen_sysret32) ...@@ -98,28 +102,27 @@ ENDPATCH(xen_sysret32)
RELOC(xen_sysret32, 1b+1) RELOC(xen_sysret32, 1b+1)
/* /*
Xen handles syscall callbacks much like ordinary exceptions, * Xen handles syscall callbacks much like ordinary exceptions, which
which means we have: * means we have:
- kernel gs * - kernel gs
- kernel rsp * - kernel rsp
- an iret-like stack frame on the stack (including rcx and r11): * - an iret-like stack frame on the stack (including rcx and r11):
ss * ss
rsp * rsp
rflags * rflags
cs * cs
rip * rip
r11 * r11
rsp-> rcx * rsp->rcx
*
In all the entrypoints, we undo all that to make it look * In all the entrypoints, we undo all that to make it look like a
like a CPU-generated syscall/sysenter and jump to the normal * CPU-generated syscall/sysenter and jump to the normal entrypoint.
entrypoint.
*/ */
.macro undo_xen_syscall .macro undo_xen_syscall
mov 0*8(%rsp),%rcx mov 0*8(%rsp), %rcx
mov 1*8(%rsp),%r11 mov 1*8(%rsp), %r11
mov 5*8(%rsp),%rsp mov 5*8(%rsp), %rsp
.endm .endm
/* Normal 64-bit system call target */ /* Normal 64-bit system call target */
...@@ -146,7 +149,7 @@ ENDPROC(xen_sysenter_target) ...@@ -146,7 +149,7 @@ ENDPROC(xen_sysenter_target)
ENTRY(xen_syscall32_target) ENTRY(xen_syscall32_target)
ENTRY(xen_sysenter_target) ENTRY(xen_sysenter_target)
lea 16(%rsp), %rsp /* strip %rcx,%r11 */ lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax mov $-ENOSYS, %rax
pushq $VGCF_in_syscall pushq $VGCF_in_syscall
jmp hypercall_iret jmp hypercall_iret
......
...@@ -19,8 +19,6 @@ ...@@ -19,8 +19,6 @@
#ifndef __GRU_H__ #ifndef __GRU_H__
#define __GRU_H__ #define __GRU_H__
#include <asm/uv/uv.h>
/* /*
* GRU architectural definitions * GRU architectural definitions
*/ */
......
...@@ -36,23 +36,11 @@ ...@@ -36,23 +36,11 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/uv/uv.h>
#include "gru.h" #include "gru.h"
#include "grulib.h" #include "grulib.h"
#include "grutables.h" #include "grutables.h"
#if defined CONFIG_X86_64
#include <asm/genapic.h>
#include <asm/irq.h>
#define IS_UV() is_uv_system()
#elif defined CONFIG_IA64
#include <asm/system.h>
#include <asm/sn/simulator.h>
/* temp support for running on hardware simulator */
#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv")
#else
#define IS_UV() 0
#endif
#include <asm/uv/uv_hub.h> #include <asm/uv/uv_hub.h>
#include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_mmrs.h>
...@@ -381,7 +369,7 @@ static int __init gru_init(void) ...@@ -381,7 +369,7 @@ static int __init gru_init(void)
char id[10]; char id[10];
void *gru_start_vaddr; void *gru_start_vaddr;
if (!IS_UV()) if (!is_uv_system())
return 0; return 0;
#if defined CONFIG_IA64 #if defined CONFIG_IA64
...@@ -451,7 +439,7 @@ static void __exit gru_exit(void) ...@@ -451,7 +439,7 @@ static void __exit gru_exit(void)
int order = get_order(sizeof(struct gru_state) * int order = get_order(sizeof(struct gru_state) *
GRU_CHIPLETS_PER_BLADE); GRU_CHIPLETS_PER_BLADE);
if (!IS_UV()) if (!is_uv_system())
return; return;
for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
......
...@@ -15,21 +15,19 @@ ...@@ -15,21 +15,19 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
#include <asm/uv/uv.h> #include <asm/uv/uv.h>
#define is_uv() is_uv_system()
#endif
#ifndef is_uv
#define is_uv() 0
#endif
#ifdef CONFIG_IA64 #if defined CONFIG_IA64
#include <asm/system.h> #include <asm/system.h>
#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */ #include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
#define is_shub() ia64_platform_is("sn2") #define is_shub() ia64_platform_is("sn2")
#ifdef CONFIG_IA64_SGI_UV
#define is_uv() ia64_platform_is("uv")
#else
#define is_uv() 0
#endif
#endif
#ifdef CONFIG_X86_64
#include <asm/genapic.h>
#define is_uv() is_uv_system()
#endif #endif
#ifndef is_shub1 #ifndef is_shub1
...@@ -44,10 +42,6 @@ ...@@ -44,10 +42,6 @@
#define is_shub() 0 #define is_shub() 0
#endif #endif
#ifndef is_uv
#define is_uv() 0
#endif
#ifdef USE_DBUG_ON #ifdef USE_DBUG_ON
#define DBUG_ON(condition) BUG_ON(condition) #define DBUG_ON(condition) BUG_ON(condition)
#else #else
......
...@@ -111,6 +111,15 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re ...@@ -111,6 +111,15 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re
#endif #endif
} }
static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
{
#ifdef ELF_CORE_COPY_KERNEL_REGS
ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs);
#else
elf_core_copy_regs(elfregs, regs);
#endif
}
static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
{ {
#ifdef ELF_CORE_COPY_TASK_REGS #ifdef ELF_CORE_COPY_TASK_REGS
......
...@@ -8,8 +8,15 @@ ...@@ -8,8 +8,15 @@
#include <asm/percpu.h> #include <asm/percpu.h>
#ifndef PER_CPU_BASE_SECTION
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define PER_CPU_BASE_SECTION ".data.percpu" #define PER_CPU_BASE_SECTION ".data.percpu"
#else
#define PER_CPU_BASE_SECTION ".data"
#endif
#endif
#ifdef CONFIG_SMP
#ifdef MODULE #ifdef MODULE
#define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_SHARED_ALIGNED_SECTION ""
...@@ -20,7 +27,6 @@ ...@@ -20,7 +27,6 @@
#else #else
#define PER_CPU_BASE_SECTION ".data"
#define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_SHARED_ALIGNED_SECTION ""
#define PER_CPU_FIRST_SECTION "" #define PER_CPU_FIRST_SECTION ""
......
...@@ -1130,7 +1130,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu) ...@@ -1130,7 +1130,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
return; return;
memset(&prstatus, 0, sizeof(prstatus)); memset(&prstatus, 0, sizeof(prstatus));
prstatus.pr_pid = current->pid; prstatus.pr_pid = current->pid;
elf_core_copy_regs(&prstatus.pr_reg, regs); elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
&prstatus, sizeof(prstatus)); &prstatus, sizeof(prstatus));
final_note(buf); final_note(buf);
......
...@@ -359,10 +359,6 @@ EXPORT_SYMBOL(warn_slowpath); ...@@ -359,10 +359,6 @@ EXPORT_SYMBOL(warn_slowpath);
#ifdef CONFIG_CC_STACKPROTECTOR #ifdef CONFIG_CC_STACKPROTECTOR
#ifndef GCC_HAS_SP
#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this.
#endif
/* /*
* Called when gcc's -fstack-protector feature is used, and * Called when gcc's -fstack-protector feature is used, and
* gcc detects corruption of the on-stack canary value * gcc detects corruption of the on-stack canary value
......
#!/bin/sh
echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
if [ "$?" -eq "0" ] ; then
echo y
else
echo n
fi
#!/bin/sh #!/bin/sh
echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
if [ "$?" -eq "0" ] ; then if [ "$?" -eq "0" ] ; then
echo $2 echo y
else
echo n
fi fi
...@@ -415,8 +415,9 @@ static int parse_elf(struct elf_info *info, const char *filename) ...@@ -415,8 +415,9 @@ static int parse_elf(struct elf_info *info, const char *filename)
const char *secstrings const char *secstrings
= (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
const char *secname; const char *secname;
int nobits = sechdrs[i].sh_type == SHT_NOBITS;
if (sechdrs[i].sh_offset > info->size) { if (!nobits && sechdrs[i].sh_offset > info->size) {
fatal("%s is truncated. sechdrs[i].sh_offset=%lu > " fatal("%s is truncated. sechdrs[i].sh_offset=%lu > "
"sizeof(*hrd)=%zu\n", filename, "sizeof(*hrd)=%zu\n", filename,
(unsigned long)sechdrs[i].sh_offset, (unsigned long)sechdrs[i].sh_offset,
...@@ -425,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename) ...@@ -425,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
} }
secname = secstrings + sechdrs[i].sh_name; secname = secstrings + sechdrs[i].sh_name;
if (strcmp(secname, ".modinfo") == 0) { if (strcmp(secname, ".modinfo") == 0) {
if (nobits)
fatal("%s has NOBITS .modinfo\n", filename);
info->modinfo = (void *)hdr + sechdrs[i].sh_offset; info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
info->modinfo_len = sechdrs[i].sh_size; info->modinfo_len = sechdrs[i].sh_size;
} else if (strcmp(secname, "__ksymtab") == 0) } else if (strcmp(secname, "__ksymtab") == 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment