Commit 3100e448 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 vdso updates from Ingo Molnar:
 "Various vDSO updates from Andy Lutomirski, mostly cleanups and
  reorganization to improve maintainability, but also some
  micro-optimizations and robustization changes"

* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86_64/vsyscall: Restore orig_ax after vsyscall seccomp
  x86_64: Add a comment explaining the TASK_SIZE_MAX guard page
  x86_64,vsyscall: Make vsyscall emulation configurable
  x86_64, vsyscall: Rewrite comment and clean up headers in vsyscall code
  x86_64, vsyscall: Turn vsyscalls all the way off when vsyscall==none
  x86,vdso: Use LSL unconditionally for vgetcpu
  x86: vdso: Fix build with older gcc
  x86_64/vdso: Clean up vgetcpu init and merge the vdso initcalls
  x86_64/vdso: Remove jiffies from the vvar page
  x86/vdso: Make the PER_CPU segment 32 bits
  x86/vdso: Make the PER_CPU segment start out accessed
  x86/vdso: Change the PER_CPU segment to use struct desc_struct
  x86_64/vdso: Move getcpu code from vsyscall_64.c to vdso/vma.c
  x86_64/vsyscall: Move all of the gate_area code to vsyscall_64.c
parents c9f861c7 26893107
...@@ -992,6 +992,24 @@ config X86_ESPFIX64 ...@@ -992,6 +992,24 @@ config X86_ESPFIX64
def_bool y def_bool y
depends on X86_16BIT && X86_64 depends on X86_16BIT && X86_64
config X86_VSYSCALL_EMULATION
bool "Enable vsyscall emulation" if EXPERT
default y
depends on X86_64
---help---
This enables emulation of the legacy vsyscall page. Disabling
it is roughly equivalent to booting with vsyscall=none, except
that it will also disable the helpful warning if a program
tries to use a vsyscall. With this option set to N, offending
programs will just segfault, citing addresses of the form
0xffffffffff600?00.
This option is required by many programs built before 2013, and
care should be used even with newer programs if set to N.
Disabling this option saves about 7K of kernel size and
possibly 4K of additional runtime pagetable memory.
config TOSHIBA config TOSHIBA
tristate "Toshiba Laptop support" tristate "Toshiba Laptop support"
depends on X86_32 depends on X86_32
......
...@@ -69,7 +69,9 @@ enum fixed_addresses { ...@@ -69,7 +69,9 @@ enum fixed_addresses {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
FIX_HOLE, FIX_HOLE,
#else #else
#ifdef CONFIG_X86_VSYSCALL_EMULATION
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
#endif
#ifdef CONFIG_PARAVIRT_CLOCK #ifdef CONFIG_PARAVIRT_CLOCK
PVCLOCK_FIXMAP_BEGIN, PVCLOCK_FIXMAP_BEGIN,
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
......
...@@ -39,6 +39,8 @@ void copy_page(void *to, void *from); ...@@ -39,6 +39,8 @@ void copy_page(void *to, void *from);
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#define __HAVE_ARCH_GATE_AREA 1 #ifdef CONFIG_X86_VSYSCALL_EMULATION
# define __HAVE_ARCH_GATE_AREA 1
#endif
#endif /* _ASM_X86_PAGE_64_H */ #endif /* _ASM_X86_PAGE_64_H */
...@@ -894,7 +894,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); ...@@ -894,7 +894,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#else #else
/* /*
* User space process size. 47bits minus one guard page. * User space process size. 47bits minus one guard page. The guard
* page is necessary on Intel CPUs: if a SYSCALL instruction is at
* the highest possible canonical userspace address, then that
* syscall will enter the kernel with a non-canonical return
* address, and SYSRET will explode dangerously. We avoid this
* particular problem by preventing anything from being mapped
* at the maximum canonical address.
*/ */
#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) #define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
......
...@@ -70,4 +70,23 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s) ...@@ -70,4 +70,23 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s)
++s->seq; ++s->seq;
} }
#ifdef CONFIG_X86_64
#define VGETCPU_CPU_MASK 0xfff
static inline unsigned int __getcpu(void)
{
unsigned int p;
/*
* Load per CPU data from GDT. LSL is faster than RDTSCP and
* works on all CPUs.
*/
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
return p;
}
#endif /* CONFIG_X86_64 */
#endif /* _ASM_X86_VGTOD_H */ #endif /* _ASM_X86_VGTOD_H */
...@@ -4,15 +4,7 @@ ...@@ -4,15 +4,7 @@
#include <linux/seqlock.h> #include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h> #include <uapi/asm/vsyscall.h>
#define VGETCPU_RDTSCP 1 #ifdef CONFIG_X86_VSYSCALL_EMULATION
#define VGETCPU_LSL 2
/* kernel space (writeable) */
extern int vgetcpu_mode;
extern struct timezone sys_tz;
#include <asm/vvar.h>
extern void map_vsyscall(void); extern void map_vsyscall(void);
/* /*
...@@ -20,25 +12,12 @@ extern void map_vsyscall(void); ...@@ -20,25 +12,12 @@ extern void map_vsyscall(void);
* Returns true if handled. * Returns true if handled.
*/ */
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
#else
#ifdef CONFIG_X86_64 static inline void map_vsyscall(void) {}
static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
#define VGETCPU_CPU_MASK 0xfff
static inline unsigned int __getcpu(void)
{ {
unsigned int p; return false;
if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
native_read_tscp(&p);
} else {
/* Load per CPU data from GDT */
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
}
return p;
} }
#endif /* CONFIG_X86_64 */ #endif
#endif /* _ASM_X86_VSYSCALL_H */ #endif /* _ASM_X86_VSYSCALL_H */
...@@ -44,8 +44,6 @@ extern char __vvar_page; ...@@ -44,8 +44,6 @@ extern char __vvar_page;
/* DECLARE_VVAR(offset, type, name) */ /* DECLARE_VVAR(offset, type, name) */
DECLARE_VVAR(0, volatile unsigned long, jiffies)
DECLARE_VVAR(16, int, vgetcpu_mode)
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
#undef DECLARE_VVAR #undef DECLARE_VVAR
......
...@@ -28,8 +28,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o ...@@ -28,8 +28,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += mcount_64.o obj-$(CONFIG_X86_64) += mcount_64.o
obj-y += syscall_$(BITS).o vsyscall_gtod.o obj-y += syscall_$(BITS).o vsyscall_gtod.o
obj-$(CONFIG_X86_64) += vsyscall_64.o obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o obj-$(CONFIG_SYSFS) += ksysfs.o
obj-y += bootflag.o e820.o obj-y += bootflag.o e820.o
......
...@@ -958,14 +958,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) ...@@ -958,14 +958,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
static void vgetcpu_set_mode(void)
{
if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
vgetcpu_mode = VGETCPU_RDTSCP;
else
vgetcpu_mode = VGETCPU_LSL;
}
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
/* May not be __init: called during resume */ /* May not be __init: called during resume */
static void syscall32_cpu_init(void) static void syscall32_cpu_init(void)
...@@ -1008,8 +1000,6 @@ void __init identify_boot_cpu(void) ...@@ -1008,8 +1000,6 @@ void __init identify_boot_cpu(void)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
sysenter_setup(); sysenter_setup();
enable_sep_cpu(); enable_sep_cpu();
#else
vgetcpu_set_mode();
#endif #endif
cpu_detect_tlb(&boot_cpu_data); cpu_detect_tlb(&boot_cpu_data);
} }
......
...@@ -1192,9 +1192,7 @@ void __init setup_arch(char **cmdline_p) ...@@ -1192,9 +1192,7 @@ void __init setup_arch(char **cmdline_p)
tboot_probe(); tboot_probe();
#ifdef CONFIG_X86_64
map_vsyscall(); map_vsyscall();
#endif
generic_apic_probe(); generic_apic_probe();
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include <asm/time.h> #include <asm/time.h>
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
__visible DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; __visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
#endif #endif
unsigned long profile_pc(struct pt_regs *regs) unsigned long profile_pc(struct pt_regs *regs)
......
/* /*
* Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net>
*
* Based on the original implementation which is:
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
* Copyright 2003 Andi Kleen, SuSE Labs. * Copyright 2003 Andi Kleen, SuSE Labs.
* *
* [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] * Parts of the original code have been moved to arch/x86/vdso/vma.c
*
* This file implements vsyscall emulation. vsyscalls are a legacy ABI:
* Userspace can request certain kernel services by calling fixed
* addresses. This concept is problematic:
* *
* Thanks to hpa@transmeta.com for some useful hint. * - It interferes with ASLR.
* Special thanks to Ingo Molnar for his early experience with * - It's awkward to write code that lives in kernel addresses but is
* a different vsyscall implementation for Linux/IA32 and for the name. * callable by userspace at fixed addresses.
* - The whole concept is impossible for 32-bit compat userspace.
* - UML cannot easily virtualize a vsyscall.
* *
* vsyscall 1 is located at -10Mbyte, vsyscall 2 is located * As of mid-2014, I believe that there is no new userspace code that
* at virtual address -10Mbyte+1024bytes etc... There are at max 4 * will use a vsyscall if the vDSO is present. I hope that there will
* vsyscalls. One vsyscall can reserve more than 1 slot to avoid * soon be no new userspace code that will ever use a vsyscall.
* jumping out of line if necessary. We cannot add more with this
* mechanism because older kernels won't return -ENOSYS.
* *
* Note: the concept clashes with user mode linux. UML users should * The code in this file emulates vsyscalls when notified of a page
* use the vDSO. * fault to a vsyscall address.
*/ */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/time.h>
#include <linux/init.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/seqlock.h>
#include <linux/jiffies.h>
#include <linux/sysctl.h>
#include <linux/topology.h>
#include <linux/timekeeper_internal.h>
#include <linux/getcpu.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/notifier.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <asm/vsyscall.h> #include <asm/vsyscall.h>
#include <asm/pgtable.h>
#include <asm/compat.h>
#include <asm/page.h>
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#include <asm/errno.h>
#include <asm/io.h>
#include <asm/segment.h>
#include <asm/desc.h>
#include <asm/topology.h>
#include <asm/traps.h> #include <asm/traps.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "vsyscall_trace.h" #include "vsyscall_trace.h"
DEFINE_VVAR(int, vgetcpu_mode);
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
static int __init vsyscall_setup(char *str) static int __init vsyscall_setup(char *str)
...@@ -222,6 +206,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) ...@@ -222,6 +206,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
"seccomp tried to change syscall nr or ip"); "seccomp tried to change syscall nr or ip");
do_exit(SIGSYS); do_exit(SIGSYS);
} }
regs->orig_ax = -1;
if (tmp) if (tmp)
goto do_ret; /* skip requested */ goto do_ret; /* skip requested */
...@@ -284,46 +269,54 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) ...@@ -284,46 +269,54 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
} }
/* /*
* Assume __initcall executes before all user space. Hopefully kmod * A pseudo VMA to allow ptrace access for the vsyscall page. This only
* doesn't violate that. We'll find out if it does. * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
* not need special handling anymore:
*/ */
static void vsyscall_set_cpu(int cpu) static const char *gate_vma_name(struct vm_area_struct *vma)
{ {
unsigned long d; return "[vsyscall]";
unsigned long node = 0;
#ifdef CONFIG_NUMA
node = cpu_to_node(cpu);
#endif
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
write_rdtscp_aux((node << 12) | cpu);
/*
* Store cpu number in limit so that it can be loaded quickly
* in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
*/
d = 0x0f40000000000ULL;
d |= cpu;
d |= (node & 0xf) << 12;
d |= (node >> 4) << 48;
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
} }
static struct vm_operations_struct gate_vma_ops = {
static void cpu_vsyscall_init(void *arg) .name = gate_vma_name,
};
static struct vm_area_struct gate_vma = {
.vm_start = VSYSCALL_ADDR,
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
.vm_page_prot = PAGE_READONLY_EXEC,
.vm_flags = VM_READ | VM_EXEC,
.vm_ops = &gate_vma_ops,
};
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{ {
/* preemption should be already off */ #ifdef CONFIG_IA32_EMULATION
vsyscall_set_cpu(raw_smp_processor_id()); if (!mm || mm->context.ia32_compat)
return NULL;
#endif
if (vsyscall_mode == NONE)
return NULL;
return &gate_vma;
} }
static int int in_gate_area(struct mm_struct *mm, unsigned long addr)
cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
{ {
long cpu = (long)arg; struct vm_area_struct *vma = get_gate_vma(mm);
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) if (!vma)
smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); return 0;
return NOTIFY_DONE; return (addr >= vma->vm_start) && (addr < vma->vm_end);
}
/*
* Use this when you have no reliable mm, typically from interrupt
* context. It is less reliable than using a task's mm and may give
* false positives.
*/
int in_gate_area_no_mm(unsigned long addr)
{
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
} }
void __init map_vsyscall(void) void __init map_vsyscall(void)
...@@ -331,24 +324,12 @@ void __init map_vsyscall(void) ...@@ -331,24 +324,12 @@ void __init map_vsyscall(void)
extern char __vsyscall_page; extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
if (vsyscall_mode != NONE)
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
vsyscall_mode == NATIVE vsyscall_mode == NATIVE
? PAGE_KERNEL_VSYSCALL ? PAGE_KERNEL_VSYSCALL
: PAGE_KERNEL_VVAR); : PAGE_KERNEL_VVAR);
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR); (unsigned long)VSYSCALL_ADDR);
} }
static int __init vsyscall_init(void)
{
cpu_notifier_register_begin();
on_each_cpu(cpu_vsyscall_init, NULL, 1);
/* notifier priority > KVM */
__hotcpu_notifier(cpu_vsyscall_notifier, 30);
cpu_notifier_register_done();
return 0;
}
__initcall(vsyscall_init);
...@@ -1204,55 +1204,6 @@ int kern_addr_valid(unsigned long addr) ...@@ -1204,55 +1204,6 @@ int kern_addr_valid(unsigned long addr)
return pfn_valid(pte_pfn(*pte)); return pfn_valid(pte_pfn(*pte));
} }
/*
* A pseudo VMA to allow ptrace access for the vsyscall page. This only
* covers the 64bit vsyscall page now. 32bit has a real VMA now and does
* not need special handling anymore:
*/
static const char *gate_vma_name(struct vm_area_struct *vma)
{
return "[vsyscall]";
}
static struct vm_operations_struct gate_vma_ops = {
.name = gate_vma_name,
};
static struct vm_area_struct gate_vma = {
.vm_start = VSYSCALL_ADDR,
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
.vm_page_prot = PAGE_READONLY_EXEC,
.vm_flags = VM_READ | VM_EXEC,
.vm_ops = &gate_vma_ops,
};
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef CONFIG_IA32_EMULATION
if (!mm || mm->context.ia32_compat)
return NULL;
#endif
return &gate_vma;
}
int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma = get_gate_vma(mm);
if (!vma)
return 0;
return (addr >= vma->vm_start) && (addr < vma->vm_end);
}
/*
* Use this when you have no reliable mm, typically from interrupt
* context. It is less reliable than using a task's mm and may give
* false positives.
*/
int in_gate_area_no_mm(unsigned long addr)
{
return (addr & PAGE_MASK) == VSYSCALL_ADDR;
}
static unsigned long probe_memory_block_size(void) static unsigned long probe_memory_block_size(void)
{ {
/* start from 2g */ /* start from 2g */
......
...@@ -7,9 +7,7 @@ ...@@ -7,9 +7,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/getcpu.h> #include <linux/getcpu.h>
#include <linux/jiffies.h>
#include <linux/time.h> #include <linux/time.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h> #include <asm/vgtod.h>
notrace long notrace long
......
/* /*
* Set up the VMAs to tell the VM about the vDSO.
* Copyright 2007 Andi Kleen, SUSE Labs. * Copyright 2007 Andi Kleen, SUSE Labs.
* Subject to the GPL, v.2 * Subject to the GPL, v.2
*
* This contains most of the x86 vDSO kernel-side code.
*/ */
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/err.h> #include <linux/err.h>
...@@ -10,17 +11,17 @@ ...@@ -10,17 +11,17 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/elf.h> #include <linux/elf.h>
#include <asm/vsyscall.h> #include <linux/cpu.h>
#include <asm/vgtod.h> #include <asm/vgtod.h>
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/vdso.h> #include <asm/vdso.h>
#include <asm/vvar.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/hpet.h> #include <asm/hpet.h>
#include <asm/desc.h>
#if defined(CONFIG_X86_64) #if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1; unsigned int __read_mostly vdso64_enabled = 1;
extern unsigned short vdso_sync_cpuid;
#endif #endif
void __init init_vdso_image(const struct vdso_image *image) void __init init_vdso_image(const struct vdso_image *image)
...@@ -38,20 +39,6 @@ void __init init_vdso_image(const struct vdso_image *image) ...@@ -38,20 +39,6 @@ void __init init_vdso_image(const struct vdso_image *image)
image->alt_len)); image->alt_len));
} }
#if defined(CONFIG_X86_64)
static int __init init_vdso(void)
{
init_vdso_image(&vdso_image_64);
#ifdef CONFIG_X86_X32_ABI
init_vdso_image(&vdso_image_x32);
#endif
return 0;
}
subsys_initcall(init_vdso);
#endif
struct linux_binprm; struct linux_binprm;
/* Put the vdso above the (randomized) stack with another randomized offset. /* Put the vdso above the (randomized) stack with another randomized offset.
...@@ -238,3 +225,63 @@ static __init int vdso_setup(char *s) ...@@ -238,3 +225,63 @@ static __init int vdso_setup(char *s)
} }
__setup("vdso=", vdso_setup); __setup("vdso=", vdso_setup);
#endif #endif
#ifdef CONFIG_X86_64
static void vgetcpu_cpu_init(void *arg)
{
int cpu = smp_processor_id();
struct desc_struct d = { };
unsigned long node = 0;
#ifdef CONFIG_NUMA
node = cpu_to_node(cpu);
#endif
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
write_rdtscp_aux((node << 12) | cpu);
/*
* Store cpu number in limit so that it can be loaded
* quickly in user space in vgetcpu. (12 bits for the CPU
* and 8 bits for the node)
*/
d.limit0 = cpu | ((node & 0xf) << 12);
d.limit = node >> 4;
d.type = 5; /* RO data, expand down, accessed */
d.dpl = 3; /* Visible to user code */
d.s = 1; /* Not a system segment */
d.p = 1; /* Present */
d.d = 1; /* 32-bit */
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}
static int
vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
{
long cpu = (long)arg;
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
return NOTIFY_DONE;
}
static int __init init_vdso(void)
{
init_vdso_image(&vdso_image_64);
#ifdef CONFIG_X86_X32_ABI
init_vdso_image(&vdso_image_x32);
#endif
cpu_notifier_register_begin();
on_each_cpu(vgetcpu_cpu_init, NULL, 1);
/* notifier priority > KVM */
__hotcpu_notifier(vgetcpu_cpu_notifier, 30);
cpu_notifier_register_done();
return 0;
}
subsys_initcall(init_vdso);
#endif /* CONFIG_X86_64 */
...@@ -1412,8 +1412,10 @@ static int xen_pgd_alloc(struct mm_struct *mm) ...@@ -1412,8 +1412,10 @@ static int xen_pgd_alloc(struct mm_struct *mm)
page->private = (unsigned long)user_pgd; page->private = (unsigned long)user_pgd;
if (user_pgd != NULL) { if (user_pgd != NULL) {
#ifdef CONFIG_X86_VSYSCALL_EMULATION
user_pgd[pgd_index(VSYSCALL_ADDR)] = user_pgd[pgd_index(VSYSCALL_ADDR)] =
__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
#endif
ret = 0; ret = 0;
} }
...@@ -1976,7 +1978,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) ...@@ -1976,7 +1978,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
# ifdef CONFIG_HIGHMEM # ifdef CONFIG_HIGHMEM
case FIX_KMAP_BEGIN ... FIX_KMAP_END: case FIX_KMAP_BEGIN ... FIX_KMAP_END:
# endif # endif
#else #elif defined(CONFIG_X86_VSYSCALL_EMULATION)
case VSYSCALL_PAGE: case VSYSCALL_PAGE:
#endif #endif
case FIX_TEXT_POKE0: case FIX_TEXT_POKE0:
...@@ -2015,7 +2017,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) ...@@ -2015,7 +2017,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
__native_set_fixmap(idx, pte); __native_set_fixmap(idx, pte);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_VSYSCALL_EMULATION
/* Replicate changes to map the vsyscall page into the user /* Replicate changes to map the vsyscall page into the user
pagetable vsyscall mapping. */ pagetable vsyscall mapping. */
if (idx == VSYSCALL_PAGE) { if (idx == VSYSCALL_PAGE) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment