Commit ab639f35 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'core/percpu' into x86/core

parents f8a6b2b9 58105ef1
......@@ -532,8 +532,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
endif
# Force gcc to behave correct even for buggy distributions
# Arch Makefiles may override this setting
ifndef CONFIG_CC_STACKPROTECTOR
KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector)
endif
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
......
......@@ -27,12 +27,12 @@ extern void *per_cpu_init(void);
#else /* ! SMP */
#define PER_CPU_ATTRIBUTES __attribute__((__section__(".data.percpu")))
#define per_cpu_init() (__phys_per_cpu_start)
#endif /* SMP */
#define PER_CPU_BASE_SECTION ".data.percpu"
/*
* Be extremely careful when taking the address of this variable! Due to virtual
* remapping, it is different from the canonical address returned by __get_cpu_var(var)!
......
#ifndef _ASM_IA64_UV_UV_H
#define _ASM_IA64_UV_UV_H
#include <asm/system.h>
#include <asm/sn/simulator.h>
static inline int is_uv_system(void)
{
/* temporary support for running on hardware simulator */
return IS_MEDUSA() || ia64_platform_is("uv");
}
#endif /* _ASM_IA64_UV_UV_H */
......@@ -194,6 +194,10 @@ config X86_TRAMPOLINE
depends on SMP || (64BIT && ACPI_SLEEP)
default y
config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !CC_STACKPROTECTOR
config KTIME_SCALAR
def_bool X86_32
source "init/Kconfig"
......@@ -1339,7 +1343,6 @@ config CC_STACKPROTECTOR_ALL
config CC_STACKPROTECTOR
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
depends on X86_64
select CC_STACKPROTECTOR_ALL
---help---
This option turns on the -fstack-protector GCC feature. This
......
......@@ -70,14 +70,17 @@ else
# this works around some issues with generating unwind tables in older gccs
# newer gccs do it by default
KBUILD_CFLAGS += -maccumulate-outgoing-args
endif
stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
"$(CC)" "-fstack-protector -DGCC_HAS_SP" )
stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
"$(CC)" -fstack-protector-all )
ifdef CONFIG_CC_STACKPROTECTOR
cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y)
stackp-y := -fstack-protector
stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
KBUILD_CFLAGS += $(stackp-y)
else
$(warning stack protector enabled but no compiler support)
endif
endif
# Stackpointer is addressed different for 32 bit and 64 bit x86
......
......@@ -55,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
dump->regs.ds = (u16)regs->ds;
dump->regs.es = (u16)regs->es;
dump->regs.fs = (u16)regs->fs;
savesegment(gs, dump->regs.gs);
dump->regs.gs = get_user_gs(regs);
dump->regs.orig_ax = regs->orig_ax;
dump->regs.ip = regs->ip;
dump->regs.cs = (u16)regs->cs;
......
......@@ -112,7 +112,7 @@ extern unsigned int vdso_enabled;
* now struct_user_regs, they are different)
*/
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \
do { \
pr_reg[0] = regs->bx; \
pr_reg[1] = regs->cx; \
......@@ -124,7 +124,6 @@ do { \
pr_reg[7] = regs->ds & 0xffff; \
pr_reg[8] = regs->es & 0xffff; \
pr_reg[9] = regs->fs & 0xffff; \
savesegment(gs, pr_reg[10]); \
pr_reg[11] = regs->orig_ax; \
pr_reg[12] = regs->ip; \
pr_reg[13] = regs->cs & 0xffff; \
......@@ -133,6 +132,18 @@ do { \
pr_reg[16] = regs->ss & 0xffff; \
} while (0);
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
do { \
ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
pr_reg[10] = get_user_gs(regs); \
} while (0);
#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \
do { \
ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
savesegment(gs, pr_reg[10]); \
} while (0);
#define ELF_PLATFORM (utsname()->machine)
#define set_personality_64bit() do { } while (0)
......
......@@ -79,7 +79,7 @@ do { \
#ifdef CONFIG_X86_32
#define deactivate_mm(tsk, mm) \
do { \
loadsegment(gs, 0); \
lazy_load_gs(0); \
} while (0)
#else
#define deactivate_mm(tsk, mm) \
......
......@@ -34,6 +34,12 @@
#define PER_CPU_VAR(var) per_cpu__##var
#endif /* SMP */
#ifdef CONFIG_X86_64_SMP
#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
#else
#define INIT_PER_CPU_VAR(var) per_cpu__##var
#endif
#else /* ...!ASSEMBLY */
#include <linux/stringify.h>
......@@ -45,6 +51,22 @@
#define __percpu_arg(x) "%" #x
#endif
/*
* Initialized pointers to per-cpu variables needed for the boot
* processor need to use these macros to get the proper address
* offset from __per_cpu_load on SMP.
*
* There also must be an entry in vmlinux_64.lds.S
*/
#define DECLARE_INIT_PER_CPU(var) \
extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
#ifdef CONFIG_X86_64_SMP
#define init_per_cpu_var(var) init_per_cpu__##var
#else
#define init_per_cpu_var(var) per_cpu_var(var)
#endif
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
......
......@@ -393,8 +393,14 @@ union irq_stack_union {
};
DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_INIT_PER_CPU(irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr);
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
DECLARE_PER_CPU(unsigned long, stack_canary);
#endif
#endif /* X86_64 */
extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int xstate_size;
......
......@@ -28,7 +28,7 @@ struct pt_regs {
int xds;
int xes;
int xfs;
/* int gs; */
int xgs;
long orig_eax;
long eip;
int xcs;
......@@ -50,7 +50,7 @@ struct pt_regs {
unsigned long ds;
unsigned long es;
unsigned long fs;
/* int gs; */
unsigned long gs;
unsigned long orig_ax;
unsigned long ip;
unsigned long cs;
......
......@@ -61,7 +61,7 @@
*
* 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ]
* 28 - unused
* 28 - stack_canary-20 [ for stack protector ]
* 29 - unused
* 30 - unused
* 31 - TSS for double fault handler
......@@ -95,6 +95,13 @@
#define __KERNEL_PERCPU 0
#endif
#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16)
#ifdef CONFIG_CC_STACKPROTECTOR
#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8)
#else
#define __KERNEL_STACK_CANARY 0
#endif
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
/*
......
/*
* GCC stack protector support.
*
* Stack protector works by putting predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
* returning from the function. The pattern is called stack canary
* and unfortunately gcc requires it to be at a fixed offset from %gs.
* On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
* and x86_32 use segment registers differently and thus handles this
* requirement differently.
*
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
* irq_stack_union which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
*
* On x86_32, it's slightly more complicated. As in x86_64, %gs is
* used for userland TLS. Unfortunately, some processors are much
* slower at loading segment registers with different value when
* entering and leaving the kernel, so the kernel uses %fs for percpu
* area and manages %gs lazily so that %gs is switched only when
* necessary, usually during task switch.
*
* As gcc requires the stack canary at %gs:20, %gs can't be managed
* lazily if stack protector is enabled, so the kernel saves and
* restores userland %gs on kernel entry and exit. This behavior is
* controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
* system.h to hide the details.
*/
#ifndef _ASM_STACKPROTECTOR_H
#define _ASM_STACKPROTECTOR_H 1
#ifdef CONFIG_CC_STACKPROTECTOR
#include <asm/tsc.h>
#include <asm/processor.h>
#include <asm/percpu.h>
#include <asm/system.h>
#include <asm/desc.h>
#include <linux/random.h>
/*
* 24 byte read-only segment initializer for stack canary. Linker
* can't handle the address bit shifting. Address will be set in
* head_32 for boot CPU and setup_per_cpu_areas() for others.
*/
#define GDT_STACK_CANARY_INIT \
[GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
/*
* Initialize the stackprotector canary value.
......@@ -15,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void)
u64 canary;
u64 tsc;
/*
* Build time only check to make sure the stack_canary is at
* offset 40 in the pda; this is a gcc ABI requirement
*/
#ifdef CONFIG_X86_64
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
#endif
/*
* We both use the random pool and the current TSC as a source
* of randomness. The TSC only matters for very early init,
......@@ -32,7 +75,50 @@ static __always_inline void boot_init_stack_canary(void)
canary += tsc + (tsc << 32UL);
current->stack_canary = canary;
#ifdef CONFIG_X86_64
percpu_write(irq_stack_union.stack_canary, canary);
#else
percpu_write(stack_canary, canary);
#endif
}
static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20;
struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
desc.base0 = canary & 0xffff;
desc.base1 = (canary >> 16) & 0xff;
desc.base2 = (canary >> 24) & 0xff;
write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
#endif
}
static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
#endif
}
#else /* CC_STACKPROTECTOR */
#define GDT_STACK_CANARY_INIT
/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
static inline void setup_stack_canary_segment(int cpu)
{ }
static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm volatile ("mov %0, %%gs" : : "r" (0));
#endif
}
#endif /* CC_STACKPROTECTOR */
#endif /* _ASM_STACKPROTECTOR_H */
......@@ -29,21 +29,21 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
/* X86_32 only */
#ifdef CONFIG_X86_32
/* kernel/process_32.c */
asmlinkage int sys_fork(struct pt_regs);
asmlinkage int sys_clone(struct pt_regs);
asmlinkage int sys_vfork(struct pt_regs);
asmlinkage int sys_execve(struct pt_regs);
int sys_fork(struct pt_regs *);
int sys_clone(struct pt_regs *);
int sys_vfork(struct pt_regs *);
int sys_execve(struct pt_regs *);
/* kernel/signal_32.c */
asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
struct old_sigaction __user *);
asmlinkage int sys_sigaltstack(unsigned long);
asmlinkage unsigned long sys_sigreturn(unsigned long);
asmlinkage int sys_rt_sigreturn(unsigned long);
int sys_sigaltstack(struct pt_regs *);
unsigned long sys_sigreturn(struct pt_regs *);
long sys_rt_sigreturn(struct pt_regs *);
/* kernel/ioport.c */
asmlinkage long sys_iopl(unsigned long);
long sys_iopl(struct pt_regs *);
/* kernel/sys_i386_32.c */
asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
......@@ -59,8 +59,8 @@ struct oldold_utsname;
asmlinkage int sys_olduname(struct oldold_utsname __user *);
/* kernel/vm86_32.c */
asmlinkage int sys_vm86old(struct pt_regs);
asmlinkage int sys_vm86(struct pt_regs);
int sys_vm86old(struct pt_regs *);
int sys_vm86(struct pt_regs *);
#else /* CONFIG_X86_32 */
......
......@@ -23,6 +23,20 @@ struct task_struct *__switch_to(struct task_struct *prev,
#ifdef CONFIG_X86_32
#ifdef CONFIG_CC_STACKPROTECTOR
#define __switch_canary \
"movl %P[task_canary](%[next]), %%ebx\n\t" \
"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
#define __switch_canary_oparam \
, [stack_canary] "=m" (per_cpu_var(stack_canary))
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
#define __switch_canary
#define __switch_canary_oparam
#define __switch_canary_iparam
#endif /* CC_STACKPROTECTOR */
/*
* Saving eflags is important. It switches not only IOPL between tasks,
* it also protects other tasks from NT leaking through sysenter etc.
......@@ -44,6 +58,7 @@ do { \
"movl %[next_sp],%%esp\n\t" /* restore ESP */ \
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \
"pushl %[next_ip]\n\t" /* restore EIP */ \
__switch_canary \
"jmp __switch_to\n" /* regparm call */ \
"1:\t" \
"popl %%ebp\n\t" /* restore EBP */ \
......@@ -58,6 +73,8 @@ do { \
"=b" (ebx), "=c" (ecx), "=d" (edx), \
"=S" (esi), "=D" (edi) \
\
__switch_canary_oparam \
\
/* input parameters: */ \
: [next_sp] "m" (next->thread.sp), \
[next_ip] "m" (next->thread.ip), \
......@@ -66,6 +83,8 @@ do { \
[prev] "a" (prev), \
[next] "d" (next) \
\
__switch_canary_iparam \
\
: /* reloaded segment registers */ \
"memory"); \
} while (0)
......@@ -182,6 +201,25 @@ extern void native_load_gs_index(unsigned);
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
/*
* x86_32 user gs accessors.
*/
#ifdef CONFIG_X86_32
#ifdef CONFIG_X86_32_LAZY_GS
#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
#define task_user_gs(tsk) ((tsk)->thread.gs)
#define lazy_save_gs(v) savesegment(gs, (v))
#define lazy_load_gs(v) loadsegment(gs, (v))
#else /* X86_32_LAZY_GS */
#define get_user_gs(regs) (u16)((regs)->gs)
#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
#define lazy_save_gs(v) do { } while (0)
#define lazy_load_gs(v) do { } while (0)
#endif /* X86_32_LAZY_GS */
#endif /* X86_32 */
static inline unsigned long get_limit(unsigned long segment)
{
unsigned long __limit;
......
......@@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long);
dotraplinkage void do_overflow(struct pt_regs *, long);
dotraplinkage void do_bounds(struct pt_regs *, long);
dotraplinkage void do_invalid_op(struct pt_regs *, long);
dotraplinkage void do_device_not_available(struct pt_regs);
dotraplinkage void do_device_not_available(struct pt_regs *, long);
dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
dotraplinkage void do_segment_not_present(struct pt_regs *, long);
......
......@@ -186,7 +186,7 @@ extern int __get_user_bad(void);
#ifdef CONFIG_X86_32
#define __put_user_asm_u64(x, addr, err) \
#define __put_user_asm_u64(x, addr, err, errret) \
asm volatile("1: movl %%eax,0(%2)\n" \
"2: movl %%edx,4(%2)\n" \
"3:\n" \
......@@ -197,7 +197,7 @@ extern int __get_user_bad(void);
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
: "=r" (err) \
: "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
: "A" (x), "r" (addr), "i" (errret), "0" (err))
#define __put_user_asm_ex_u64(x, addr) \
asm volatile("1: movl %%eax,0(%1)\n" \
......@@ -211,8 +211,8 @@ extern int __get_user_bad(void);
asm volatile("call __put_user_8" : "=a" (__ret_pu) \
: "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
#else
#define __put_user_asm_u64(x, ptr, retval) \
__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
#define __put_user_asm_u64(x, ptr, retval, errret) \
__put_user_asm(x, ptr, retval, "q", "", "Zr", errret)
#define __put_user_asm_ex_u64(x, addr) \
__put_user_asm_ex(x, addr, "q", "", "Zr")
#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
......@@ -289,7 +289,8 @@ do { \
__put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \
break; \
case 8: \
__put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval); \
__put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \
errret); \
break; \
default: \
__put_user_bad(); \
......@@ -525,8 +526,6 @@ struct __large_struct { unsigned long buf[100]; };
*/
#define get_user_try uaccess_try
#define get_user_catch(err) uaccess_catch(err)
#define put_user_try uaccess_try
#define put_user_catch(err) uaccess_catch(err)
#define get_user_ex(x, ptr) do { \
unsigned long __gue_val; \
......@@ -534,9 +533,29 @@ struct __large_struct { unsigned long buf[100]; };
(x) = (__force __typeof__(*(ptr)))__gue_val; \
} while (0)
#ifdef CONFIG_X86_WP_WORKS_OK
#define put_user_try uaccess_try
#define put_user_catch(err) uaccess_catch(err)
#define put_user_ex(x, ptr) \
__put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
#else /* !CONFIG_X86_WP_WORKS_OK */
#define put_user_try do { \
int __uaccess_err = 0;
#define put_user_catch(err) \
(err) |= __uaccess_err; \
} while (0)
#define put_user_ex(x, ptr) do { \
__uaccess_err |= __put_user(x, ptr); \
} while (0)
#endif /* CONFIG_X86_WP_WORKS_OK */
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
......
......@@ -3,6 +3,9 @@
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
struct cpumask;
struct mm_struct;
#ifdef CONFIG_X86_UV
extern enum uv_system_type get_uv_system_type(void);
......
......@@ -75,6 +75,7 @@ void foo(void)
OFFSET(PT_DS, pt_regs, ds);
OFFSET(PT_ES, pt_regs, es);
OFFSET(PT_FS, pt_regs, fs);
OFFSET(PT_GS, pt_regs, gs);
OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
OFFSET(PT_EIP, pt_regs, ip);
OFFSET(PT_CS, pt_regs, cs);
......
......@@ -39,6 +39,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
#include <asm/stackprotector.h>
#include "cpu.h"
......@@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
GDT_STACK_CANARY_INIT
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
......@@ -304,6 +306,7 @@ void load_percpu_segment(int cpu)
loadsegment(gs, 0);
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif
load_stack_canary_segment();
}
/* Current gdt points %fs at the "master" per-cpu area: after this,
......@@ -938,12 +941,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
#ifdef CONFIG_SMP
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
#else
DEFINE_PER_CPU(char *, irq_stack_ptr) =
per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
#endif
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
......@@ -986,16 +985,21 @@ unsigned long kernel_eflags;
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
#else
#else /* x86_64 */
/* Make sure %fs is initialized properly in idle threads */
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, stack_canary);
#endif
/* Make sure %fs and %gs are initialized properly in idle threads */
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU;
regs->gs = __KERNEL_STACK_CANARY;
return regs;
}
#endif
#endif /* x86_64 */
/*
* cpu_init() initializes state that is per-CPU. Some data is already
......@@ -1157,9 +1161,6 @@ void __cpuinit cpu_init(void)
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
#endif
/* Clear %gs. */
asm volatile ("mov %0, %%gs" : : "r" (0));
/* Clear all 6 debug registers: */
set_debugreg(0, 0);
set_debugreg(0, 1);
......
......@@ -30,12 +30,13 @@
* 1C(%esp) - %ds
* 20(%esp) - %es
* 24(%esp) - %fs
* 28(%esp) - orig_eax
* 2C(%esp) - %eip
* 30(%esp) - %cs
* 34(%esp) - %eflags
* 38(%esp) - %oldesp
* 3C(%esp) - %oldss
* 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
* 2C(%esp) - orig_eax
* 30(%esp) - %eip
* 34(%esp) - %cs
* 38(%esp) - %eflags
* 3C(%esp) - %oldesp
* 40(%esp) - %oldss
*
* "current" is in register %ebx during any slow entries.
*/
......@@ -101,121 +102,221 @@
#define resume_userspace_sig resume_userspace
#endif
#define SAVE_ALL \
cld; \
pushl %fs; \
CFI_ADJUST_CFA_OFFSET 4;\
/*CFI_REL_OFFSET fs, 0;*/\
pushl %es; \
CFI_ADJUST_CFA_OFFSET 4;\
/*CFI_REL_OFFSET es, 0;*/\
pushl %ds; \
CFI_ADJUST_CFA_OFFSET 4;\
/*CFI_REL_OFFSET ds, 0;*/\
pushl %eax; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET eax, 0;\
pushl %ebp; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET ebp, 0;\
pushl %edi; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET edi, 0;\
pushl %esi; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET esi, 0;\
pushl %edx; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET edx, 0;\
pushl %ecx; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET ecx, 0;\
pushl %ebx; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET ebx, 0;\
movl $(__USER_DS), %edx; \
movl %edx, %ds; \
movl %edx, %es; \
movl $(__KERNEL_PERCPU), %edx; \
/*
* User gs save/restore
*
* %gs is used for userland TLS and kernel only uses it for stack
* canary which is required to be at %gs:20 by gcc. Read the comment
* at the top of stackprotector.h for more info.
*
* Local labels 98 and 99 are used.
*/
#ifdef CONFIG_X86_32_LAZY_GS
/* unfortunately push/pop can't be no-op */
.macro PUSH_GS
pushl $0
CFI_ADJUST_CFA_OFFSET 4
.endm
.macro POP_GS pop=0
addl $(4 + \pop), %esp
CFI_ADJUST_CFA_OFFSET -(4 + \pop)
.endm
.macro POP_GS_EX
.endm
/* all the rest are no-op */
.macro PTGS_TO_GS
.endm
.macro PTGS_TO_GS_EX
.endm
.macro GS_TO_REG reg
.endm
.macro REG_TO_PTGS reg
.endm
.macro SET_KERNEL_GS reg
.endm
#else /* CONFIG_X86_32_LAZY_GS */
.macro PUSH_GS
pushl %gs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET gs, 0*/
.endm
.macro POP_GS pop=0
98: popl %gs
CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE gs*/
.if \pop <> 0
add $\pop, %esp
CFI_ADJUST_CFA_OFFSET -\pop
.endif
.endm
.macro POP_GS_EX
.pushsection .fixup, "ax"
99: movl $0, (%esp)
jmp 98b
.section __ex_table, "a"
.align 4
.long 98b, 99b
.popsection
.endm
.macro PTGS_TO_GS
98: mov PT_GS(%esp), %gs
.endm
.macro PTGS_TO_GS_EX
.pushsection .fixup, "ax"
99: movl $0, PT_GS(%esp)
jmp 98b
.section __ex_table, "a"
.align 4
.long 98b, 99b
.popsection
.endm
.macro GS_TO_REG reg
movl %gs, \reg
/*CFI_REGISTER gs, \reg*/
.endm
.macro REG_TO_PTGS reg
movl \reg, PT_GS(%esp)
/*CFI_REL_OFFSET gs, PT_GS*/
.endm
.macro SET_KERNEL_GS reg
movl $(__KERNEL_STACK_CANARY), \reg
movl \reg, %gs
.endm
#endif /* CONFIG_X86_32_LAZY_GS */
.macro SAVE_ALL
cld
PUSH_GS
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0;*/
pushl %es
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0;*/
pushl %ds
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET ds, 0;*/
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eax, 0
pushl %ebp
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebp, 0
pushl %edi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edi, 0
pushl %esi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET esi, 0
pushl %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0
pushl %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0
pushl %ebx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
movl $(__USER_DS), %edx
movl %edx, %ds
movl %edx, %es
movl $(__KERNEL_PERCPU), %edx
movl %edx, %fs
SET_KERNEL_GS %edx
.endm
#define RESTORE_INT_REGS \
popl %ebx; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE ebx;\
popl %ecx; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE ecx;\
popl %edx; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE edx;\
popl %esi; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE esi;\
popl %edi; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE edi;\
popl %ebp; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE ebp;\
popl %eax; \
CFI_ADJUST_CFA_OFFSET -4;\
.macro RESTORE_INT_REGS
popl %ebx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebx
popl %ecx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ecx
popl %edx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE edx
popl %esi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE esi
popl %edi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE edi
popl %ebp
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebp
popl %eax
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE eax
.endm
#define RESTORE_REGS \
RESTORE_INT_REGS; \
1: popl %ds; \
CFI_ADJUST_CFA_OFFSET -4;\
/*CFI_RESTORE ds;*/\
2: popl %es; \
CFI_ADJUST_CFA_OFFSET -4;\
/*CFI_RESTORE es;*/\
3: popl %fs; \
CFI_ADJUST_CFA_OFFSET -4;\
/*CFI_RESTORE fs;*/\
.pushsection .fixup,"ax"; \
4: movl $0,(%esp); \
jmp 1b; \
5: movl $0,(%esp); \
jmp 2b; \
6: movl $0,(%esp); \
jmp 3b; \
.section __ex_table,"a";\
.align 4; \
.long 1b,4b; \
.long 2b,5b; \
.long 3b,6b; \
.macro RESTORE_REGS pop=0
RESTORE_INT_REGS
1: popl %ds
CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE ds;*/
2: popl %es
CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE es;*/
3: popl %fs
CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE fs;*/
POP_GS \pop
.pushsection .fixup, "ax"
4: movl $0, (%esp)
jmp 1b
5: movl $0, (%esp)
jmp 2b
6: movl $0, (%esp)
jmp 3b
.section __ex_table, "a"
.align 4
.long 1b, 4b
.long 2b, 5b
.long 3b, 6b
.popsection
POP_GS_EX
.endm
#define RING0_INT_FRAME \
CFI_STARTPROC simple;\
CFI_SIGNAL_FRAME;\
CFI_DEF_CFA esp, 3*4;\
/*CFI_OFFSET cs, -2*4;*/\
.macro RING0_INT_FRAME
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, 3*4
/*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4
.endm
#define RING0_EC_FRAME \
CFI_STARTPROC simple;\
CFI_SIGNAL_FRAME;\
CFI_DEF_CFA esp, 4*4;\
/*CFI_OFFSET cs, -2*4;*/\
.macro RING0_EC_FRAME
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, 4*4
/*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4
.endm
#define RING0_PTREGS_FRAME \
CFI_STARTPROC simple;\
CFI_SIGNAL_FRAME;\
CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
.macro RING0_PTREGS_FRAME
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
CFI_OFFSET eip, PT_EIP-PT_OLDESP
/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
CFI_OFFSET eax, PT_EAX-PT_OLDESP
CFI_OFFSET ebp, PT_EBP-PT_OLDESP
CFI_OFFSET edi, PT_EDI-PT_OLDESP
CFI_OFFSET esi, PT_ESI-PT_OLDESP
CFI_OFFSET edx, PT_EDX-PT_OLDESP
CFI_OFFSET ecx, PT_ECX-PT_OLDESP
CFI_OFFSET ebx, PT_EBX-PT_OLDESP
.endm
ENTRY(ret_from_fork)
CFI_STARTPROC
......@@ -362,6 +463,7 @@ sysenter_exit:
xorl %ebp,%ebp
TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs
PTGS_TO_GS
ENABLE_INTERRUPTS_SYSEXIT
#ifdef CONFIG_AUDITSYSCALL
......@@ -410,6 +512,7 @@ sysexit_audit:
.align 4
.long 1b,2b
.popsection
PTGS_TO_GS_EX
ENDPROC(ia32_sysenter_target)
# system call handler stub
......@@ -452,8 +555,7 @@ restore_all:
restore_nocheck:
TRACE_IRQS_IRET
restore_nocheck_notrace:
RESTORE_REGS
addl $4, %esp # skip orig_eax/error_code
RESTORE_REGS 4 # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
irq_return:
INTERRUPT_RETURN
......@@ -595,28 +697,50 @@ syscall_badsys:
END(syscall_badsys)
CFI_ENDPROC
#define FIXUP_ESPFIX_STACK \
/* since we are on a wrong stack, we cant make it a C code :( */ \
PER_CPU(gdt_page, %ebx); \
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
addl %esp, %eax; \
pushl $__KERNEL_DS; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl %eax; \
CFI_ADJUST_CFA_OFFSET 4; \
lss (%esp), %esp; \
CFI_ADJUST_CFA_OFFSET -8;
#define UNWIND_ESPFIX_STACK \
movl %ss, %eax; \
/* see if on espfix stack */ \
cmpw $__ESPFIX_SS, %ax; \
jne 27f; \
movl $__KERNEL_DS, %eax; \
movl %eax, %ds; \
movl %eax, %es; \
/* switch to normal stack */ \
FIXUP_ESPFIX_STACK; \
27:;
/*
* System calls that need a pt_regs pointer.
*/
#define PTREGSCALL(name) \
ALIGN; \
ptregs_##name: \
leal 4(%esp),%eax; \
jmp sys_##name;
PTREGSCALL(iopl)
PTREGSCALL(fork)
PTREGSCALL(clone)
PTREGSCALL(vfork)
PTREGSCALL(execve)
PTREGSCALL(sigaltstack)
PTREGSCALL(sigreturn)
PTREGSCALL(rt_sigreturn)
PTREGSCALL(vm86)
PTREGSCALL(vm86old)
.macro FIXUP_ESPFIX_STACK
/* since we are on a wrong stack, we cant make it a C code :( */
PER_CPU(gdt_page, %ebx)
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
addl %esp, %eax
pushl $__KERNEL_DS
CFI_ADJUST_CFA_OFFSET 4
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
lss (%esp), %esp
CFI_ADJUST_CFA_OFFSET -8
.endm
.macro UNWIND_ESPFIX_STACK
movl %ss, %eax
/* see if on espfix stack */
cmpw $__ESPFIX_SS, %ax
jne 27f
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %es
/* switch to normal stack */
FIXUP_ESPFIX_STACK
27:
.endm
/*
* Build the entry stubs and pointer table with some assembler magic.
......@@ -1070,7 +1194,10 @@ ENTRY(page_fault)
CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
/* the function address is in %fs's slot on the stack */
/* the function address is in %gs's slot on the stack */
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
pushl %es
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0*/
......@@ -1099,20 +1226,15 @@ error_code:
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
cld
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
UNWIND_ESPFIX_STACK
popl %ecx
CFI_ADJUST_CFA_OFFSET -4
/*CFI_REGISTER es, ecx*/
movl PT_FS(%esp), %edi # get the function address
GS_TO_REG %ecx
movl PT_GS(%esp), %edi # get the function address
movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
mov %ecx, PT_FS(%esp)
/*CFI_REL_OFFSET fs, ES*/
REG_TO_PTGS %ecx
SET_KERNEL_GS %ecx
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
......@@ -1136,26 +1258,27 @@ END(page_fault)
* by hand onto the new stack - while updating the return eip past
* the instruction that would have done it for sysenter.
*/
#define FIX_STACK(offset, ok, label) \
cmpw $__KERNEL_CS,4(%esp); \
jne ok; \
label: \
movl TSS_sysenter_sp0+offset(%esp),%esp; \
CFI_DEF_CFA esp, 0; \
CFI_UNDEFINED eip; \
pushfl; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl $__KERNEL_CS; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl $sysenter_past_esp; \
CFI_ADJUST_CFA_OFFSET 4; \
.macro FIX_STACK offset ok label
cmpw $__KERNEL_CS, 4(%esp)
jne \ok
\label:
movl TSS_sysenter_sp0 + \offset(%esp), %esp
CFI_DEF_CFA esp, 0
CFI_UNDEFINED eip
pushfl
CFI_ADJUST_CFA_OFFSET 4
pushl $__KERNEL_CS
CFI_ADJUST_CFA_OFFSET 4
pushl $sysenter_past_esp
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eip, 0
.endm
ENTRY(debug)
RING0_INT_FRAME
cmpl $ia32_sysenter_target,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
debug_stack_correct:
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
......@@ -1213,7 +1336,7 @@ nmi_stack_correct:
nmi_stack_fixup:
RING0_INT_FRAME
FIX_STACK(12,nmi_stack_correct, 1)
FIX_STACK 12, nmi_stack_correct, 1
jmp nmi_stack_correct
nmi_debug_stack_check:
......@@ -1224,7 +1347,7 @@ nmi_debug_stack_check:
jb nmi_stack_correct
cmpl $debug_esp_fix_insn,(%esp)
ja nmi_stack_correct
FIX_STACK(24,nmi_stack_correct, 1)
FIX_STACK 24, nmi_stack_correct, 1
jmp nmi_stack_correct
nmi_espfix_stack:
......
......@@ -19,6 +19,7 @@
#include <asm/asm-offsets.h>
#include <asm/setup.h>
#include <asm/processor-flags.h>
#include <asm/percpu.h>
/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)
......@@ -437,8 +438,26 @@ is386: movl $2,%ecx # set MP
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
xorl %eax,%eax # Clear GS and LDT
#ifdef CONFIG_CC_STACKPROTECTOR
/*
* The linker can't handle this by relocation. Manually set
* base address in stack canary segment descriptor.
*/
cmpb $0,ready
jne 1f
movl $per_cpu__gdt_page,%eax
movl $per_cpu__stack_canary,%ecx
subl $20, %ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
1:
#endif
movl $(__KERNEL_STACK_CANARY),%eax
movl %eax,%gs
xorl %eax,%eax # Clear LDT
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
......
......@@ -205,19 +205,6 @@ ENTRY(secondary_startup_64)
pushq $0
popfq
#ifdef CONFIG_SMP
/*
* Fix up static pointers that need __per_cpu_load added. The assembler
* is unable to do this directly. This is only needed for the boot cpu.
* These values are set up with the correct base addresses by C code for
* secondary cpus.
*/
movq initial_gs(%rip), %rax
cmpl $0, per_cpu__cpu_number(%rax)
jne 1f
addq %rax, early_gdt_descr_base(%rip)
1:
#endif
/*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
......@@ -275,11 +262,7 @@ ENTRY(secondary_startup_64)
ENTRY(initial_code)
.quad x86_64_start_kernel
ENTRY(initial_gs)
#ifdef CONFIG_SMP
.quad __per_cpu_load
#else
.quad PER_CPU_VAR(irq_stack_union)
#endif
.quad INIT_PER_CPU_VAR(irq_stack_union)
__FINITDATA
ENTRY(stack_start)
......@@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt)
early_gdt_descr:
.word GDT_ENTRIES*8-1
early_gdt_descr_base:
.quad per_cpu__gdt_page
.quad INIT_PER_CPU_VAR(gdt_page)
ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */
......
......@@ -131,9 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
}
#ifdef CONFIG_X86_32
asmlinkage long sys_iopl(unsigned long regsp)
long sys_iopl(struct pt_regs *regs)
{
struct pt_regs *regs = (struct pt_regs *)&regsp;
unsigned int level = regs->bx;
struct thread_struct *t = &current->thread;
int rc;
......
......@@ -11,6 +11,7 @@
#include <stdarg.h>
#include <linux/stackprotector.h>
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
......@@ -91,6 +92,15 @@ void cpu_idle(void)
{
int cpu = smp_processor_id();
/*
* If we're the non-boot CPU, nothing set the stack canary up
* for us. CPU0 already has it initialized but no harm in
* doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
......@@ -131,7 +141,7 @@ void __show_regs(struct pt_regs *regs, int all)
if (user_mode_vm(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
savesegment(gs, gs);
gs = get_user_gs(regs);
} else {
sp = (unsigned long) (&regs->sp);
savesegment(ss, ss);
......@@ -212,6 +222,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.ds = __USER_DS;
regs.es = __USER_DS;
regs.fs = __KERNEL_PERCPU;
regs.gs = __KERNEL_STACK_CANARY;
regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl();
......@@ -304,7 +315,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
p->thread.ip = (unsigned long) ret_from_fork;
savesegment(gs, p->thread.gs);
task_user_gs(p) = get_user_gs(regs);
tsk = current;
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
......@@ -342,7 +353,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
__asm__("movl %0, %%gs" : : "r"(0));
set_user_gs(regs, 0);
regs->fs = 0;
set_fs(USER_DS);
regs->ds = __USER_DS;
......@@ -539,7 +550,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* used %fs or %gs (it does not today), or if the kernel is
* running inside of a hypervisor layer.
*/
savesegment(gs, prev->gs);
lazy_save_gs(prev->gs);
/*
* Load the per-thread Thread-Local Storage descriptor.
......@@ -585,31 +596,31 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Restore %gs if needed (which is common)
*/
if (prev->gs | next->gs)
loadsegment(gs, next->gs);
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
return prev_p;
}
asmlinkage int sys_fork(struct pt_regs regs)
int sys_fork(struct pt_regs *regs)
{
return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
asmlinkage int sys_clone(struct pt_regs regs)
int sys_clone(struct pt_regs *regs)
{
unsigned long clone_flags;
unsigned long newsp;
int __user *parent_tidptr, *child_tidptr;
clone_flags = regs.bx;
newsp = regs.cx;
parent_tidptr = (int __user *)regs.dx;
child_tidptr = (int __user *)regs.di;
clone_flags = regs->bx;
newsp = regs->cx;
parent_tidptr = (int __user *)regs->dx;
child_tidptr = (int __user *)regs->di;
if (!newsp)
newsp = regs.sp;
return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
newsp = regs->sp;
return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
}
/*
......@@ -622,27 +633,27 @@ asmlinkage int sys_clone(struct pt_regs regs)
* do not have enough call-clobbered registers to hold all
* the information you need.
*/
asmlinkage int sys_vfork(struct pt_regs regs)
int sys_vfork(struct pt_regs *regs)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
/*
* sys_execve() executes a new program.
*/
asmlinkage int sys_execve(struct pt_regs regs)
int sys_execve(struct pt_regs *regs)
{
int error;
char *filename;
filename = getname((char __user *) regs.bx);
filename = getname((char __user *) regs->bx);
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
error = do_execve(filename,
(char __user * __user *) regs.cx,
(char __user * __user *) regs.dx,
&regs);
(char __user * __user *) regs->cx,
(char __user * __user *) regs->dx,
regs);
if (error == 0) {
/* Make sure we don't return using sysenter.. */
set_thread_flag(TIF_IRET);
......
......@@ -120,12 +120,11 @@ void cpu_idle(void)
current_thread_info()->status |= TS_POLLING;
/*
* If we're the non-boot CPU, nothing set the PDA stack
* canary up for us - and if we are the boot CPU we have
* a 0 stack canary. This is a good place for updating
* it, as we wont ever return from this function (so the
* invalid canaries already on the stack wont ever
* trigger):
* If we're the non-boot CPU, nothing set the stack canary up
* for us. CPU0 already has it initialized but no harm in
* doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
......
......@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
regno >>= 2;
if (regno > FS)
--regno;
return &regs->bx + regno;
return &regs->bx + (regno >> 2);
}
static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
......@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
if (offset != offsetof(struct user_regs_struct, gs))
retval = *pt_regs_access(task_pt_regs(task), offset);
else {
retval = task->thread.gs;
if (task == current)
savesegment(gs, retval);
retval = get_user_gs(task_pt_regs(task));
else
retval = task_user_gs(task);
}
return retval;
}
......@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task,
break;
case offsetof(struct user_regs_struct, gs):
task->thread.gs = value;
if (task == current)
/*
* The user-mode %gs is not affected by
* kernel entry, so we must update the CPU.
*/
loadsegment(gs, value);
set_user_gs(task_pt_regs(task), value);
else
task_user_gs(task) = value;
}
return 0;
......
......@@ -16,6 +16,7 @@
#include <asm/proto.h>
#include <asm/cpumask.h>
#include <asm/cpu.h>
#include <asm/stackprotector.h>
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
# define DBG(x...) printk(KERN_DEBUG x)
......@@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void)
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);
setup_stack_canary_segment(cpu);
/*
* Copy data used in early init routines from the
* initial arrays to the per cpu data areas. These
......
......@@ -50,27 +50,23 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
#define COPY(x) { \
#define COPY(x) do { \
get_user_ex(regs->x, &sc->x); \
}
} while (0)
#define COPY_SEG(seg) { \
#define GET_SEG(seg) ({ \
unsigned short tmp; \
get_user_ex(tmp, &sc->seg); \
regs->seg = tmp; \
}
tmp; \
})
#define COPY_SEG_CPL3(seg) { \
unsigned short tmp; \
get_user_ex(tmp, &sc->seg); \
regs->seg = tmp | 3; \
}
#define COPY_SEG(seg) do { \
regs->seg = GET_SEG(seg); \
} while (0)
#define GET_SEG(seg) { \
unsigned short tmp; \
get_user_ex(tmp, &sc->seg); \
loadsegment(seg, tmp); \
}
#define COPY_SEG_CPL3(seg) do { \
regs->seg = GET_SEG(seg) | 3; \
} while (0)
static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
......@@ -86,7 +82,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
get_user_try {
#ifdef CONFIG_X86_32
GET_SEG(gs);
set_user_gs(regs, GET_SEG(gs));
COPY_SEG(fs);
COPY_SEG(es);
COPY_SEG(ds);
......@@ -138,12 +134,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
put_user_try {
#ifdef CONFIG_X86_32
{
unsigned int tmp;
savesegment(gs, tmp);
put_user_ex(tmp, (unsigned int __user *)&sc->gs);
}
put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
put_user_ex(regs->es, (unsigned int __user *)&sc->es);
put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
......@@ -558,14 +549,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_X86_32
asmlinkage int sys_sigaltstack(unsigned long bx)
int sys_sigaltstack(struct pt_regs *regs)
{
/*
* This is needed to make gcc realize it doesn't own the
* "struct pt_regs"
*/
struct pt_regs *regs = (struct pt_regs *)&bx;
const stack_t __user *uss = (const stack_t __user *)bx;
const stack_t __user *uss = (const stack_t __user *)regs->bx;
stack_t __user *uoss = (stack_t __user *)regs->cx;
return do_sigaltstack(uss, uoss, regs->sp);
......@@ -583,14 +569,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
* Do a signal return; undo the signal stack.
*/
#ifdef CONFIG_X86_32
asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
unsigned long sys_sigreturn(struct pt_regs *regs)
{
struct sigframe __user *frame;
struct pt_regs *regs;
unsigned long ax;
sigset_t set;
regs = (struct pt_regs *) &__unused;
frame = (struct sigframe __user *)(regs->sp - 8);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
......@@ -617,7 +601,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
}
#endif /* CONFIG_X86_32 */
static long do_rt_sigreturn(struct pt_regs *regs)
long sys_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
unsigned long ax;
......@@ -648,25 +632,6 @@ static long do_rt_sigreturn(struct pt_regs *regs)
return 0;
}
#ifdef CONFIG_X86_32
/*
* Note: do not pass in pt_regs directly as with tail-call optimization
* GCC will incorrectly stomp on the caller's frame and corrupt user-space
* register state:
*/
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
{
struct pt_regs *regs = (struct pt_regs *)&__unused;
return do_rt_sigreturn(regs);
}
#else /* !CONFIG_X86_32 */
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
{
return do_rt_sigreturn(regs);
}
#endif /* CONFIG_X86_32 */
/*
* OK, we're invoking a handler:
*/
......
ENTRY(sys_call_table)
.long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
.long sys_exit
.long sys_fork
.long ptregs_fork
.long sys_read
.long sys_write
.long sys_open /* 5 */
......@@ -10,7 +10,7 @@ ENTRY(sys_call_table)
.long sys_creat
.long sys_link
.long sys_unlink /* 10 */
.long sys_execve
.long ptregs_execve
.long sys_chdir
.long sys_time
.long sys_mknod
......@@ -109,17 +109,17 @@ ENTRY(sys_call_table)
.long sys_newlstat
.long sys_newfstat
.long sys_uname
.long sys_iopl /* 110 */
.long ptregs_iopl /* 110 */
.long sys_vhangup
.long sys_ni_syscall /* old "idle" system call */
.long sys_vm86old
.long ptregs_vm86old
.long sys_wait4
.long sys_swapoff /* 115 */
.long sys_sysinfo
.long sys_ipc
.long sys_fsync
.long sys_sigreturn
.long sys_clone /* 120 */
.long ptregs_sigreturn
.long ptregs_clone /* 120 */
.long sys_setdomainname
.long sys_newuname
.long sys_modify_ldt
......@@ -165,14 +165,14 @@ ENTRY(sys_call_table)
.long sys_mremap
.long sys_setresuid16
.long sys_getresuid16 /* 165 */
.long sys_vm86
.long ptregs_vm86
.long sys_ni_syscall /* Old sys_query_module */
.long sys_poll
.long sys_nfsservctl
.long sys_setresgid16 /* 170 */
.long sys_getresgid16
.long sys_prctl
.long sys_rt_sigreturn
.long ptregs_rt_sigreturn
.long sys_rt_sigaction
.long sys_rt_sigprocmask /* 175 */
.long sys_rt_sigpending
......@@ -185,11 +185,11 @@ ENTRY(sys_call_table)
.long sys_getcwd
.long sys_capget
.long sys_capset /* 185 */
.long sys_sigaltstack
.long ptregs_sigaltstack
.long sys_sendfile
.long sys_ni_syscall /* reserved for streams1 */
.long sys_ni_syscall /* reserved for streams2 */
.long sys_vfork /* 190 */
.long ptregs_vfork /* 190 */
.long sys_getrlimit
.long sys_mmap2
.long sys_truncate64
......
......@@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info)
}
#endif /* CONFIG_MATH_EMULATION */
dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_X86_32
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
conditional_sti(&regs);
conditional_sti(regs);
info.regs = &regs;
info.regs = regs;
math_emulate(&info);
} else {
math_state_restore(); /* interrupts still off */
conditional_sti(&regs);
conditional_sti(regs);
}
#else
math_state_restore();
......
......@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
ret = KVM86->regs32;
ret->fs = current->thread.saved_fs;
loadsegment(gs, current->thread.saved_gs);
set_user_gs(ret, current->thread.saved_gs);
return ret;
}
......@@ -197,9 +197,9 @@ static void mark_screen_rdonly(struct mm_struct *mm)
static int do_vm86_irq_handling(int subfunction, int irqnumber);
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
asmlinkage int sys_vm86old(struct pt_regs regs)
int sys_vm86old(struct pt_regs *regs)
{
struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx;
struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
* This remains on the stack until we
......@@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
if (tmp)
goto out;
memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
info.regs32 = &regs;
info.regs32 = regs;
tsk->thread.vm86_info = v86;
do_sys_vm86(&info, tsk);
ret = 0; /* we never return here */
......@@ -227,7 +227,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
}
asmlinkage int sys_vm86(struct pt_regs regs)
int sys_vm86(struct pt_regs *regs)
{
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
......@@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs)
struct vm86plus_struct __user *v86;
tsk = current;
switch (regs.bx) {
switch (regs->bx) {
case VM86_REQUEST_IRQ:
case VM86_FREE_IRQ:
case VM86_GET_IRQ_BITS:
case VM86_GET_AND_RESET_IRQ:
ret = do_vm86_irq_handling(regs.bx, (int)regs.cx);
ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
goto out;
case VM86_PLUS_INSTALL_CHECK:
/*
......@@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs)
ret = -EPERM;
if (tsk->thread.saved_sp0)
goto out;
v86 = (struct vm86plus_struct __user *)regs.cx;
v86 = (struct vm86plus_struct __user *)regs->cx;
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
offsetof(struct kernel_vm86_struct, regs32) -
sizeof(info.regs));
ret = -EFAULT;
if (tmp)
goto out;
info.regs32 = &regs;
info.regs32 = regs;
info.vm86plus.is_vm86pus = 1;
tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
do_sys_vm86(&info, tsk);
......@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
info->regs32->ax = 0;
tsk->thread.saved_sp0 = tsk->thread.sp0;
tsk->thread.saved_fs = info->regs32->fs;
savesegment(gs, tsk->thread.saved_gs);
tsk->thread.saved_gs = get_user_gs(info->regs32);
tss = &per_cpu(init_tss, get_cpu());
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
......
......@@ -257,6 +257,14 @@ SECTIONS
DWARF_DEBUG
}
/*
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union);
/*
* Build-time check on the image size:
*/
......
......@@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
/* There's one problem which normal hardware doesn't have: the Host
* can't handle us removing entries we're currently using. So we clear
* the GS register here: if it's needed it'll be reloaded anyway. */
loadsegment(gs, 0);
lazy_load_gs(0);
lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
}
......
......@@ -150,11 +150,9 @@ static long pm_address(u_char FPU_modrm, u_char segment,
#endif /* PARANOID */
switch (segment) {
/* gs isn't used by the kernel, so it still has its
user-space value. */
case PREFIX_GS_ - 1:
/* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
savesegment(gs, addr->selector);
/* user gs handling can be lazy, use special accessors */
addr->selector = get_user_gs(FPU_info->regs);
break;
default:
addr->selector = PM_REG_(segment);
......
......@@ -702,7 +702,7 @@ void __cpuinit numa_set_node(int cpu, int node)
}
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
dump_stack();
return;
......@@ -790,7 +790,7 @@ int early_cpu_to_node(int cpu)
if (early_per_cpu_ptr(x86_cpu_to_node_map))
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
if (!per_cpu_offset(cpu)) {
if (!cpu_possible(cpu)) {
printk(KERN_WARNING
"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
dump_stack();
......
......@@ -38,7 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
$(filter -g%,$(KBUILD_CFLAGS))
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector)
$(vobjs): KBUILD_CFLAGS += $(CFL)
......
......@@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t,
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
{
/*
* XXX sleazy hack: If we're being called in a lazy-cpu zone,
* it means we're in a context switch, and %gs has just been
* saved. This means we can zero it out to prevent faults on
* exit from the hypervisor if the next process has no %gs.
* Either way, it has been saved, and the new value will get
* loaded properly. This will go away as soon as Xen has been
* modified to not save/restore %gs for normal hypercalls.
* XXX sleazy hack: If we're being called in a lazy-cpu zone
* and lazy gs handling is enabled, it means we're in a
* context switch, and %gs has just been saved. This means we
* can zero it out to prevent faults on exit from the
* hypervisor if the next process has no %gs. Either way, it
* has been saved, and the new value will get loaded properly.
* This will go away as soon as Xen has been modified to not
* save/restore %gs for normal hypercalls.
*
* On x86_64, this hack is not used for %gs, because gs points
* to KERNEL_GS_BASE (and uses it for PDA references), so we
......@@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
*/
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
#ifdef CONFIG_X86_32
loadsegment(gs, 0);
lazy_load_gs(0);
#else
loadsegment(fs, 0);
#endif
......
/*
Asm versions of Xen pv-ops, suitable for either direct use or inlining.
The inline versions are the same as the direct-use versions, with the
pre- and post-amble chopped off.
This code is encoded for size rather than absolute efficiency,
with a view to being able to inline as much as possible.
We only bother with direct forms (ie, vcpu in percpu data) of
the operations here; the indirect forms are better handled in
C, since they're generally too large to inline anyway.
* Asm versions of Xen pv-ops, suitable for either direct use or
* inlining. The inline versions are the same as the direct-use
* versions, with the pre- and post-amble chopped off.
*
* This code is encoded for size rather than absolute efficiency, with
* a view to being able to inline as much as possible.
*
* We only bother with direct forms (ie, vcpu in percpu data) of the
* operations here; the indirect forms are better handled in C, since
* they're generally too large to inline anyway.
*/
#include <asm/asm-offsets.h>
......@@ -18,17 +18,19 @@
#include "xen-asm.h"
/*
Enable events. This clears the event mask and tests the pending
event status with one and operation. If there are pending
events, then enter the hypervisor to get them handled.
* Enable events. This clears the event mask and tests the pending
* event status with one and operation. If there are pending events,
* then enter the hypervisor to get them handled.
*/
ENTRY(xen_irq_enable_direct)
/* Unmask events */
movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/* Preempt here doesn't matter because that will deal with
any pending interrupts. The pending check may end up being
run on the wrong CPU, but that doesn't hurt. */
/*
* Preempt here doesn't matter because that will deal with any
* pending interrupts. The pending check may end up being run
* on the wrong CPU, but that doesn't hurt.
*/
/* Test for pending */
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
......@@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
/*
Disabling events is simply a matter of making the event mask
non-zero.
* Disabling events is simply a matter of making the event mask
* non-zero.
*/
ENTRY(xen_irq_disable_direct)
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
......@@ -54,18 +56,18 @@ ENDPATCH(xen_irq_disable_direct)
RELOC(xen_irq_disable_direct, 0)
/*
(xen_)save_fl is used to get the current interrupt enable status.
Callers expect the status to be in X86_EFLAGS_IF, and other bits
may be set in the return value. We take advantage of this by
making sure that X86_EFLAGS_IF has the right value (and other bits
in that byte are 0), but other bits in the return value are
undefined. We need to toggle the state of the bit, because
Xen and x86 use opposite senses (mask vs enable).
* (xen_)save_fl is used to get the current interrupt enable status.
* Callers expect the status to be in X86_EFLAGS_IF, and other bits
* may be set in the return value. We take advantage of this by
* making sure that X86_EFLAGS_IF has the right value (and other bits
* in that byte are 0), but other bits in the return value are
* undefined. We need to toggle the state of the bit, because Xen and
* x86 use opposite senses (mask vs enable).
*/
ENTRY(xen_save_fl_direct)
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
setz %ah
addb %ah,%ah
addb %ah, %ah
ENDPATCH(xen_save_fl_direct)
ret
ENDPROC(xen_save_fl_direct)
......@@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct)
/*
In principle the caller should be passing us a value return
from xen_save_fl_direct, but for robustness sake we test only
the X86_EFLAGS_IF flag rather than the whole byte. After
setting the interrupt mask state, it checks for unmasked
pending events and enters the hypervisor to get them delivered
if so.
* In principle the caller should be passing us a value return from
* xen_save_fl_direct, but for robustness sake we test only the
* X86_EFLAGS_IF flag rather than the whole byte. After setting the
* interrupt mask state, it checks for unmasked pending events and
* enters the hypervisor to get them delivered if so.
*/
ENTRY(xen_restore_fl_direct)
#ifdef CONFIG_X86_64
......@@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct)
testb $X86_EFLAGS_IF>>8, %ah
#endif
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/* Preempt here doesn't matter because that will deal with
any pending interrupts. The pending check may end up being
run on the wrong CPU, but that doesn't hurt. */
/*
* Preempt here doesn't matter because that will deal with any
* pending interrupts. The pending check may end up being run
* on the wrong CPU, but that doesn't hurt.
*/
/* check for unmasked and pending */
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
......@@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct)
/*
Force an event check by making a hypercall,
but preserve regs before making the call.
* Force an event check by making a hypercall, but preserve regs
* before making the call.
*/
check_events:
#ifdef CONFIG_X86_32
......@@ -137,4 +140,3 @@ check_events:
pop %rax
#endif
ret
/*
Asm versions of Xen pv-ops, suitable for either direct use or inlining.
The inline versions are the same as the direct-use versions, with the
pre- and post-amble chopped off.
This code is encoded for size rather than absolute efficiency,
with a view to being able to inline as much as possible.
We only bother with direct forms (ie, vcpu in pda) of the operations
here; the indirect forms are better handled in C, since they're
generally too large to inline anyway.
* Asm versions of Xen pv-ops, suitable for either direct use or
* inlining. The inline versions are the same as the direct-use
* versions, with the pre- and post-amble chopped off.
*
* This code is encoded for size rather than absolute efficiency, with
* a view to being able to inline as much as possible.
*
* We only bother with direct forms (ie, vcpu in pda) of the
* operations here; the indirect forms are better handled in C, since
* they're generally too large to inline anyway.
*/
//#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
......@@ -21,8 +20,8 @@
#include "xen-asm.h"
/*
Force an event check by making a hypercall,
but preserve regs before making the call.
* Force an event check by making a hypercall, but preserve regs
* before making the call.
*/
check_events:
push %eax
......@@ -35,10 +34,10 @@ check_events:
ret
/*
We can't use sysexit directly, because we're not running in ring0.
But we can easily fake it up using iret. Assuming xen_sysexit
is jumped to with a standard stack frame, we can just strip it
back to a standard iret frame and use iret.
* We can't use sysexit directly, because we're not running in ring0.
* But we can easily fake it up using iret. Assuming xen_sysexit is
* jumped to with a standard stack frame, we can just strip it back to
* a standard iret frame and use iret.
*/
ENTRY(xen_sysexit)
movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
......@@ -49,33 +48,31 @@ ENTRY(xen_sysexit)
ENDPROC(xen_sysexit)
/*
This is run where a normal iret would be run, with the same stack setup:
8: eflags
4: cs
esp-> 0: eip
This attempts to make sure that any pending events are dealt
with on return to usermode, but there is a small window in
which an event can happen just before entering usermode. If
the nested interrupt ends up setting one of the TIF_WORK_MASK
pending work flags, they will not be tested again before
returning to usermode. This means that a process can end up
with pending work, which will be unprocessed until the process
enters and leaves the kernel again, which could be an
unbounded amount of time. This means that a pending signal or
reschedule event could be indefinitely delayed.
The fix is to notice a nested interrupt in the critical
window, and if one occurs, then fold the nested interrupt into
the current interrupt stack frame, and re-process it
iteratively rather than recursively. This means that it will
exit via the normal path, and all pending work will be dealt
with appropriately.
Because the nested interrupt handler needs to deal with the
current stack state in whatever form its in, we keep things
simple by only using a single register which is pushed/popped
on the stack.
* This is run where a normal iret would be run, with the same stack setup:
* 8: eflags
* 4: cs
* esp-> 0: eip
*
* This attempts to make sure that any pending events are dealt with
* on return to usermode, but there is a small window in which an
* event can happen just before entering usermode. If the nested
* interrupt ends up setting one of the TIF_WORK_MASK pending work
* flags, they will not be tested again before returning to
* usermode. This means that a process can end up with pending work,
* which will be unprocessed until the process enters and leaves the
* kernel again, which could be an unbounded amount of time. This
* means that a pending signal or reschedule event could be
* indefinitely delayed.
*
* The fix is to notice a nested interrupt in the critical window, and
* if one occurs, then fold the nested interrupt into the current
* interrupt stack frame, and re-process it iteratively rather than
* recursively. This means that it will exit via the normal path, and
* all pending work will be dealt with appropriately.
*
* Because the nested interrupt handler needs to deal with the current
* stack state in whatever form its in, we keep things simple by only
* using a single register which is pushed/popped on the stack.
*/
ENTRY(xen_iret)
/* test eflags for special cases */
......@@ -85,13 +82,15 @@ ENTRY(xen_iret)
push %eax
ESP_OFFSET=4 # bytes pushed onto stack
/* Store vcpu_info pointer for easy access. Do it this
way to avoid having to reload %fs */
/*
* Store vcpu_info pointer for easy access. Do it this way to
* avoid having to reload %fs
*/
#ifdef CONFIG_SMP
GET_THREAD_INFO(%eax)
movl TI_cpu(%eax),%eax
movl __per_cpu_offset(,%eax,4),%eax
mov per_cpu__xen_vcpu(%eax),%eax
movl TI_cpu(%eax), %eax
movl __per_cpu_offset(,%eax,4), %eax
mov per_cpu__xen_vcpu(%eax), %eax
#else
movl per_cpu__xen_vcpu, %eax
#endif
......@@ -99,37 +98,46 @@ ENTRY(xen_iret)
/* check IF state we're restoring */
testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
/* Maybe enable events. Once this happens we could get a
recursive event, so the critical region starts immediately
afterwards. However, if that happens we don't end up
resuming the code, so we don't have to be worried about
being preempted to another CPU. */
/*
* Maybe enable events. Once this happens we could get a
* recursive event, so the critical region starts immediately
* afterwards. However, if that happens we don't end up
* resuming the code, so we don't have to be worried about
* being preempted to another CPU.
*/
setz XEN_vcpu_info_mask(%eax)
xen_iret_start_crit:
/* check for unmasked and pending */
cmpw $0x0001, XEN_vcpu_info_pending(%eax)
/* If there's something pending, mask events again so we
can jump back into xen_hypervisor_callback */
/*
* If there's something pending, mask events again so we can
* jump back into xen_hypervisor_callback
*/
sete XEN_vcpu_info_mask(%eax)
popl %eax
/* From this point on the registers are restored and the stack
updated, so we don't need to worry about it if we're preempted */
/*
* From this point on the registers are restored and the stack
* updated, so we don't need to worry about it if we're
* preempted
*/
iret_restore_end:
/* Jump to hypervisor_callback after fixing up the stack.
Events are masked, so jumping out of the critical
region is OK. */
/*
* Jump to hypervisor_callback after fixing up the stack.
* Events are masked, so jumping out of the critical region is
* OK.
*/
je xen_hypervisor_callback
1: iret
xen_iret_end_crit:
.section __ex_table,"a"
.section __ex_table, "a"
.align 4
.long 1b,iret_exc
.long 1b, iret_exc
.previous
hyper_iret:
......@@ -139,55 +147,55 @@ hyper_iret:
.globl xen_iret_start_crit, xen_iret_end_crit
/*
This is called by xen_hypervisor_callback in entry.S when it sees
that the EIP at the time of interrupt was between xen_iret_start_crit
and xen_iret_end_crit. We're passed the EIP in %eax so we can do
a more refined determination of what to do.
The stack format at this point is:
----------------
ss : (ss/esp may be present if we came from usermode)
esp :
eflags } outer exception info
cs }
eip }
---------------- <- edi (copy dest)
eax : outer eax if it hasn't been restored
----------------
eflags } nested exception info
cs } (no ss/esp because we're nested
eip } from the same ring)
orig_eax }<- esi (copy src)
- - - - - - - -
fs }
es }
ds } SAVE_ALL state
eax }
: :
ebx }<- esp
----------------
In order to deliver the nested exception properly, we need to shift
everything from the return addr up to the error code so it
sits just under the outer exception info. This means that when we
handle the exception, we do it in the context of the outer exception
rather than starting a new one.
The only caveat is that if the outer eax hasn't been
restored yet (ie, it's still on stack), we need to insert
its value into the SAVE_ALL state before going on, since
it's usermode state which we eventually need to restore.
* This is called by xen_hypervisor_callback in entry.S when it sees
* that the EIP at the time of interrupt was between
* xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in
* %eax so we can do a more refined determination of what to do.
*
* The stack format at this point is:
* ----------------
* ss : (ss/esp may be present if we came from usermode)
* esp :
* eflags } outer exception info
* cs }
* eip }
* ---------------- <- edi (copy dest)
* eax : outer eax if it hasn't been restored
* ----------------
* eflags } nested exception info
* cs } (no ss/esp because we're nested
* eip } from the same ring)
* orig_eax }<- esi (copy src)
* - - - - - - - -
* fs }
* es }
* ds } SAVE_ALL state
* eax }
* : :
* ebx }<- esp
* ----------------
*
* In order to deliver the nested exception properly, we need to shift
* everything from the return addr up to the error code so it sits
* just under the outer exception info. This means that when we
* handle the exception, we do it in the context of the outer
* exception rather than starting a new one.
*
* The only caveat is that if the outer eax hasn't been restored yet
* (ie, it's still on stack), we need to insert its value into the
* SAVE_ALL state before going on, since it's usermode state which we
* eventually need to restore.
*/
ENTRY(xen_iret_crit_fixup)
/*
Paranoia: Make sure we're really coming from kernel space.
One could imagine a case where userspace jumps into the
critical range address, but just before the CPU delivers a GP,
it decides to deliver an interrupt instead. Unlikely?
Definitely. Easy to avoid? Yes. The Intel documents
explicitly say that the reported EIP for a bad jump is the
jump instruction itself, not the destination, but some virtual
environments get this wrong.
* Paranoia: Make sure we're really coming from kernel space.
* One could imagine a case where userspace jumps into the
* critical range address, but just before the CPU delivers a
* GP, it decides to deliver an interrupt instead. Unlikely?
* Definitely. Easy to avoid? Yes. The Intel documents
* explicitly say that the reported EIP for a bad jump is the
* jump instruction itself, not the destination, but some
* virtual environments get this wrong.
*/
movl PT_CS(%esp), %ecx
andl $SEGMENT_RPL_MASK, %ecx
......@@ -197,15 +205,17 @@ ENTRY(xen_iret_crit_fixup)
lea PT_ORIG_EAX(%esp), %esi
lea PT_EFLAGS(%esp), %edi
/* If eip is before iret_restore_end then stack
hasn't been restored yet. */
/*
* If eip is before iret_restore_end then stack
* hasn't been restored yet.
*/
cmp $iret_restore_end, %eax
jae 1f
movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */
movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */
movl %eax, PT_EAX(%esp)
lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */
/* set up the copy */
1: std
......@@ -213,6 +223,6 @@ ENTRY(xen_iret_crit_fixup)
rep movsl
cld
lea 4(%edi),%esp /* point esp to new frame */
lea 4(%edi), %esp /* point esp to new frame */
2: jmp xen_do_upcall
/*
Asm versions of Xen pv-ops, suitable for either direct use or inlining.
The inline versions are the same as the direct-use versions, with the
pre- and post-amble chopped off.
This code is encoded for size rather than absolute efficiency,
with a view to being able to inline as much as possible.
We only bother with direct forms (ie, vcpu in pda) of the operations
here; the indirect forms are better handled in C, since they're
generally too large to inline anyway.
* Asm versions of Xen pv-ops, suitable for either direct use or
* inlining. The inline versions are the same as the direct-use
* versions, with the pre- and post-amble chopped off.
*
* This code is encoded for size rather than absolute efficiency, with
* a view to being able to inline as much as possible.
*
* We only bother with direct forms (ie, vcpu in pda) of the
* operations here; the indirect forms are better handled in C, since
* they're generally too large to inline anyway.
*/
#include <asm/errno.h>
......@@ -21,25 +21,25 @@
#include "xen-asm.h"
ENTRY(xen_adjust_exception_frame)
mov 8+0(%rsp),%rcx
mov 8+8(%rsp),%r11
mov 8+0(%rsp), %rcx
mov 8+8(%rsp), %r11
ret $16
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
Xen64 iret frame:
ss
rsp
rflags
cs
rip <-- standard iret frame
flags
rcx }
r11 }<-- pushed by hypercall page
rsp -> rax }
* Xen64 iret frame:
*
* ss
* rsp
* rflags
* cs
* rip <-- standard iret frame
*
* flags
*
* rcx }
* r11 }<-- pushed by hypercall page
* rsp->rax }
*/
ENTRY(xen_iret)
pushq $0
......@@ -48,8 +48,8 @@ ENDPATCH(xen_iret)
RELOC(xen_iret, 1b+1)
/*
sysexit is not used for 64-bit processes, so it's
only ever used to return to 32-bit compat userspace.
* sysexit is not used for 64-bit processes, so it's only ever used to
* return to 32-bit compat userspace.
*/
ENTRY(xen_sysexit)
pushq $__USER32_DS
......@@ -64,10 +64,12 @@ ENDPATCH(xen_sysexit)
RELOC(xen_sysexit, 1b+1)
ENTRY(xen_sysret64)
/* We're already on the usermode stack at this point, but still
with the kernel gs, so we can easily switch back */
/*
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp
movq PER_CPU_VAR(kernel_stack), %rsp
pushq $__USER_DS
pushq PER_CPU_VAR(old_rsp)
......@@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64)
RELOC(xen_sysret64, 1b+1)
ENTRY(xen_sysret32)
/* We're already on the usermode stack at this point, but still
with the kernel gs, so we can easily switch back */
/*
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack), %rsp
......@@ -98,28 +102,27 @@ ENDPATCH(xen_sysret32)
RELOC(xen_sysret32, 1b+1)
/*
Xen handles syscall callbacks much like ordinary exceptions,
which means we have:
- kernel gs
- kernel rsp
- an iret-like stack frame on the stack (including rcx and r11):
ss
rsp
rflags
cs
rip
r11
rsp-> rcx
In all the entrypoints, we undo all that to make it look
like a CPU-generated syscall/sysenter and jump to the normal
entrypoint.
* Xen handles syscall callbacks much like ordinary exceptions, which
* means we have:
* - kernel gs
* - kernel rsp
* - an iret-like stack frame on the stack (including rcx and r11):
* ss
* rsp
* rflags
* cs
* rip
* r11
* rsp->rcx
*
* In all the entrypoints, we undo all that to make it look like a
* CPU-generated syscall/sysenter and jump to the normal entrypoint.
*/
.macro undo_xen_syscall
mov 0*8(%rsp),%rcx
mov 1*8(%rsp),%r11
mov 5*8(%rsp),%rsp
mov 0*8(%rsp), %rcx
mov 1*8(%rsp), %r11
mov 5*8(%rsp), %rsp
.endm
/* Normal 64-bit system call target */
......@@ -146,7 +149,7 @@ ENDPROC(xen_sysenter_target)
ENTRY(xen_syscall32_target)
ENTRY(xen_sysenter_target)
lea 16(%rsp), %rsp /* strip %rcx,%r11 */
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $VGCF_in_syscall
jmp hypercall_iret
......
......@@ -19,8 +19,6 @@
#ifndef __GRU_H__
#define __GRU_H__
#include <asm/uv/uv.h>
/*
* GRU architectural definitions
*/
......
......@@ -36,23 +36,11 @@
#include <linux/interrupt.h>
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <asm/uv/uv.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
#if defined CONFIG_X86_64
#include <asm/genapic.h>
#include <asm/irq.h>
#define IS_UV() is_uv_system()
#elif defined CONFIG_IA64
#include <asm/system.h>
#include <asm/sn/simulator.h>
/* temp support for running on hardware simulator */
#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv")
#else
#define IS_UV() 0
#endif
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_mmrs.h>
......@@ -381,7 +369,7 @@ static int __init gru_init(void)
char id[10];
void *gru_start_vaddr;
if (!IS_UV())
if (!is_uv_system())
return 0;
#if defined CONFIG_IA64
......@@ -451,7 +439,7 @@ static void __exit gru_exit(void)
int order = get_order(sizeof(struct gru_state) *
GRU_CHIPLETS_PER_BLADE);
if (!IS_UV())
if (!is_uv_system())
return;
for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
......
......@@ -15,21 +15,19 @@
#include <linux/mutex.h>
#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
#include <asm/uv/uv.h>
#define is_uv() is_uv_system()
#endif
#ifndef is_uv
#define is_uv() 0
#endif
#ifdef CONFIG_IA64
#if defined CONFIG_IA64
#include <asm/system.h>
#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
#define is_shub() ia64_platform_is("sn2")
#ifdef CONFIG_IA64_SGI_UV
#define is_uv() ia64_platform_is("uv")
#else
#define is_uv() 0
#endif
#endif
#ifdef CONFIG_X86_64
#include <asm/genapic.h>
#define is_uv() is_uv_system()
#endif
#ifndef is_shub1
......@@ -44,10 +42,6 @@
#define is_shub() 0
#endif
#ifndef is_uv
#define is_uv() 0
#endif
#ifdef USE_DBUG_ON
#define DBUG_ON(condition) BUG_ON(condition)
#else
......
......@@ -111,6 +111,15 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re
#endif
}
static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
{
#ifdef ELF_CORE_COPY_KERNEL_REGS
ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs);
#else
elf_core_copy_regs(elfregs, regs);
#endif
}
static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
{
#ifdef ELF_CORE_COPY_TASK_REGS
......
......@@ -8,8 +8,15 @@
#include <asm/percpu.h>
#ifndef PER_CPU_BASE_SECTION
#ifdef CONFIG_SMP
#define PER_CPU_BASE_SECTION ".data.percpu"
#else
#define PER_CPU_BASE_SECTION ".data"
#endif
#endif
#ifdef CONFIG_SMP
#ifdef MODULE
#define PER_CPU_SHARED_ALIGNED_SECTION ""
......@@ -20,7 +27,6 @@
#else
#define PER_CPU_BASE_SECTION ".data"
#define PER_CPU_SHARED_ALIGNED_SECTION ""
#define PER_CPU_FIRST_SECTION ""
......
......@@ -1130,7 +1130,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
return;
memset(&prstatus, 0, sizeof(prstatus));
prstatus.pr_pid = current->pid;
elf_core_copy_regs(&prstatus.pr_reg, regs);
elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
&prstatus, sizeof(prstatus));
final_note(buf);
......
......@@ -359,10 +359,6 @@ EXPORT_SYMBOL(warn_slowpath);
#ifdef CONFIG_CC_STACKPROTECTOR
#ifndef GCC_HAS_SP
#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this.
#endif
/*
* Called when gcc's -fstack-protector feature is used, and
* gcc detects corruption of the on-stack canary value
......
#!/bin/sh
echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
if [ "$?" -eq "0" ] ; then
echo y
else
echo n
fi
#!/bin/sh
echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
if [ "$?" -eq "0" ] ; then
echo $2
echo y
else
echo n
fi
......@@ -415,8 +415,9 @@ static int parse_elf(struct elf_info *info, const char *filename)
const char *secstrings
= (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
const char *secname;
int nobits = sechdrs[i].sh_type == SHT_NOBITS;
if (sechdrs[i].sh_offset > info->size) {
if (!nobits && sechdrs[i].sh_offset > info->size) {
fatal("%s is truncated. sechdrs[i].sh_offset=%lu > "
"sizeof(*hrd)=%zu\n", filename,
(unsigned long)sechdrs[i].sh_offset,
......@@ -425,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
}
secname = secstrings + sechdrs[i].sh_name;
if (strcmp(secname, ".modinfo") == 0) {
if (nobits)
fatal("%s has NOBITS .modinfo\n", filename);
info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
info->modinfo_len = sechdrs[i].sh_size;
} else if (strcmp(secname, "__ksymtab") == 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment