Commit 98d0ac38 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by H. Peter Anvin

x86-64: Move vread_tsc and vread_hpet into the vDSO

The vsyscall page now consists entirely of trap instructions.

Cc: John Stultz <johnstul@us.ibm.com>
Signed-off-by: default avatarAndy Lutomirski <luto@mit.edu>
Link: http://lkml.kernel.org/r/637648f303f2ef93af93bae25186e9a1bea093f5.1310639973.git.luto@mit.eduSigned-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 433bd805
...@@ -7,8 +7,12 @@ ...@@ -7,8 +7,12 @@
#define __ARCH_HAS_CLOCKSOURCE_DATA #define __ARCH_HAS_CLOCKSOURCE_DATA
#define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
struct arch_clocksource_data { struct arch_clocksource_data {
cycle_t (*vread)(void); int vclock_mode;
}; };
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
......
...@@ -51,10 +51,6 @@ extern int unsynchronized_tsc(void); ...@@ -51,10 +51,6 @@ extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void); extern int check_tsc_unstable(void);
extern unsigned long native_calibrate_tsc(void); extern unsigned long native_calibrate_tsc(void);
#ifdef CONFIG_X86_64
extern cycles_t vread_tsc(void);
#endif
/* /*
* Boot-time check whether the TSCs are synchronized across * Boot-time check whether the TSCs are synchronized across
* all CPUs/cores: * all CPUs/cores:
......
...@@ -13,7 +13,7 @@ struct vsyscall_gtod_data { ...@@ -13,7 +13,7 @@ struct vsyscall_gtod_data {
struct timezone sys_tz; struct timezone sys_tz;
struct { /* extract of a clocksource struct */ struct { /* extract of a clocksource struct */
cycle_t (*vread)(void); int vclock_mode;
cycle_t cycle_last; cycle_t cycle_last;
cycle_t mask; cycle_t mask;
u32 mult; u32 mult;
......
...@@ -16,10 +16,6 @@ enum vsyscall_num { ...@@ -16,10 +16,6 @@ enum vsyscall_num {
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/seqlock.h> #include <linux/seqlock.h>
/* Definitions for CONFIG_GENERIC_TIME definitions */
#define __vsyscall_fn \
__attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
#define VGETCPU_RDTSCP 1 #define VGETCPU_RDTSCP 1
#define VGETCPU_LSL 2 #define VGETCPU_LSL 2
......
...@@ -24,17 +24,12 @@ endif ...@@ -24,17 +24,12 @@ endif
nostackp := $(call cc-option, -fno-stack-protector) nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp) CFLAGS_hpet.o := $(nostackp)
CFLAGS_vread_tsc_64.o := $(nostackp)
CFLAGS_paravirt.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp)
GCOV_PROFILE_vsyscall_64.o := n GCOV_PROFILE_vsyscall_64.o := n
GCOV_PROFILE_hpet.o := n GCOV_PROFILE_hpet.o := n
GCOV_PROFILE_tsc.o := n GCOV_PROFILE_tsc.o := n
GCOV_PROFILE_vread_tsc_64.o := n
GCOV_PROFILE_paravirt.o := n GCOV_PROFILE_paravirt.o := n
# vread_tsc_64 is hot and should be fully optimized:
CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls
obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o obj-y += time.o ioport.o ldt.o dumpstack.o
...@@ -43,7 +38,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o ...@@ -43,7 +38,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-$(CONFIG_X86_64) += vsyscall_emu_64.o obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
obj-y += bootflag.o e820.o obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/mce.h> #include <asm/mce.h>
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/vsyscall.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -250,7 +249,6 @@ static void __init_or_module add_nops(void *insns, unsigned int len) ...@@ -250,7 +249,6 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[]; extern s32 __smp_locks[], __smp_locks_end[];
extern char __vsyscall_0;
void *text_poke_early(void *addr, const void *opcode, size_t len); void *text_poke_early(void *addr, const void *opcode, size_t len);
/* Replace instructions with better alternatives for this CPU type. /* Replace instructions with better alternatives for this CPU type.
...@@ -294,12 +292,6 @@ void __init_or_module apply_alternatives(struct alt_instr *start, ...@@ -294,12 +292,6 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
add_nops(insnbuf + a->replacementlen, add_nops(insnbuf + a->replacementlen,
a->instrlen - a->replacementlen); a->instrlen - a->replacementlen);
#ifdef CONFIG_X86_64
/* vsyscall code is not mapped yet. resolve it manually. */
if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
}
#endif
text_poke_early(instr, insnbuf, a->instrlen); text_poke_early(instr, insnbuf, a->instrlen);
} }
} }
......
...@@ -738,13 +738,6 @@ static cycle_t read_hpet(struct clocksource *cs) ...@@ -738,13 +738,6 @@ static cycle_t read_hpet(struct clocksource *cs)
return (cycle_t)hpet_readl(HPET_COUNTER); return (cycle_t)hpet_readl(HPET_COUNTER);
} }
#ifdef CONFIG_X86_64
static cycle_t __vsyscall_fn vread_hpet(void)
{
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
}
#endif
static struct clocksource clocksource_hpet = { static struct clocksource clocksource_hpet = {
.name = "hpet", .name = "hpet",
.rating = 250, .rating = 250,
...@@ -753,7 +746,7 @@ static struct clocksource clocksource_hpet = { ...@@ -753,7 +746,7 @@ static struct clocksource clocksource_hpet = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS, .flags = CLOCK_SOURCE_IS_CONTINUOUS,
.resume = hpet_resume_counter, .resume = hpet_resume_counter,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
.archdata = { .vread = vread_hpet }, .archdata = { .vclock_mode = VCLOCK_HPET },
#endif #endif
}; };
......
...@@ -777,7 +777,7 @@ static struct clocksource clocksource_tsc = { ...@@ -777,7 +777,7 @@ static struct clocksource clocksource_tsc = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS | .flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY, CLOCK_SOURCE_MUST_VERIFY,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
.archdata = { .vread = vread_tsc }, .archdata = { .vclock_mode = VCLOCK_TSC },
#endif #endif
}; };
......
...@@ -169,9 +169,6 @@ SECTIONS ...@@ -169,9 +169,6 @@ SECTIONS
.vsyscall : AT(VLOAD(.vsyscall)) { .vsyscall : AT(VLOAD(.vsyscall)) {
*(.vsyscall_0) *(.vsyscall_0)
. = ALIGN(L1_CACHE_BYTES);
*(.vsyscall_fn)
. = 1024; . = 1024;
*(.vsyscall_1) *(.vsyscall_1)
......
/* This code runs in userspace. */
#define DISABLE_BRANCH_PROFILING
#include <asm/vgtod.h>
notrace cycle_t __vsyscall_fn vread_tsc(void)
{
cycle_t ret;
u64 last;
/*
* Empirically, a fence (of type that depends on the CPU)
* before rdtsc is enough to ensure that rdtsc is ordered
* with respect to loads. The various CPU manuals are unclear
* as to whether rdtsc can be reordered with later loads,
* but no one has ever seen it happen.
*/
rdtsc_barrier();
ret = (cycle_t)vget_cycles();
last = VVAR(vsyscall_gtod_data).clock.cycle_last;
if (likely(ret >= last))
return ret;
/*
* GCC likes to generate cmov here, but this branch is extremely
* predictable (it's just a funciton of time and the likely is
* very likely) and there's a data dependence, so force GCC
* to generate a branch instead. I don't barrier() because
* we don't actually need a barrier, and if this function
* ever gets inlined it will generate worse code.
*/
asm volatile ("");
return last;
}
...@@ -74,7 +74,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, ...@@ -74,7 +74,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
/* copy vsyscall data */ /* copy vsyscall data */
vsyscall_gtod_data.clock.vread = clock->archdata.vread; vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
vsyscall_gtod_data.clock.mask = clock->mask; vsyscall_gtod_data.clock.mask = clock->mask;
vsyscall_gtod_data.clock.mult = mult; vsyscall_gtod_data.clock.mult = mult;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/time.h> #include <linux/time.h>
#include <linux/string.h> #include <linux/string.h>
#include <asm/vsyscall.h> #include <asm/vsyscall.h>
#include <asm/fixmap.h>
#include <asm/vgtod.h> #include <asm/vgtod.h>
#include <asm/timex.h> #include <asm/timex.h>
#include <asm/hpet.h> #include <asm/hpet.h>
...@@ -25,6 +26,43 @@ ...@@ -25,6 +26,43 @@
#define gtod (&VVAR(vsyscall_gtod_data)) #define gtod (&VVAR(vsyscall_gtod_data))
notrace static cycle_t vread_tsc(void)
{
cycle_t ret;
u64 last;
/*
* Empirically, a fence (of type that depends on the CPU)
* before rdtsc is enough to ensure that rdtsc is ordered
* with respect to loads. The various CPU manuals are unclear
* as to whether rdtsc can be reordered with later loads,
* but no one has ever seen it happen.
*/
rdtsc_barrier();
ret = (cycle_t)vget_cycles();
last = VVAR(vsyscall_gtod_data).clock.cycle_last;
if (likely(ret >= last))
return ret;
/*
* GCC likes to generate cmov here, but this branch is extremely
* predictable (it's just a funciton of time and the likely is
* very likely) and there's a data dependence, so force GCC
* to generate a branch instead. I don't barrier() because
* we don't actually need a barrier, and if this function
* ever gets inlined it will generate worse code.
*/
asm volatile ("");
return last;
}
static notrace cycle_t vread_hpet(void)
{
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
}
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{ {
long ret; long ret;
...@@ -36,9 +74,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) ...@@ -36,9 +74,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
notrace static inline long vgetns(void) notrace static inline long vgetns(void)
{ {
long v; long v;
cycles_t (*vread)(void); cycles_t cycles;
vread = gtod->clock.vread; if (gtod->clock.vclock_mode == VCLOCK_TSC)
v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; cycles = vread_tsc();
else
cycles = vread_hpet();
v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
return (v * gtod->clock.mult) >> gtod->clock.shift; return (v * gtod->clock.mult) >> gtod->clock.shift;
} }
...@@ -118,11 +159,11 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) ...@@ -118,11 +159,11 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{ {
switch (clock) { switch (clock) {
case CLOCK_REALTIME: case CLOCK_REALTIME:
if (likely(gtod->clock.vread)) if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
return do_realtime(ts); return do_realtime(ts);
break; break;
case CLOCK_MONOTONIC: case CLOCK_MONOTONIC:
if (likely(gtod->clock.vread)) if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
return do_monotonic(ts); return do_monotonic(ts);
break; break;
case CLOCK_REALTIME_COARSE: case CLOCK_REALTIME_COARSE:
...@@ -139,7 +180,7 @@ int clock_gettime(clockid_t, struct timespec *) ...@@ -139,7 +180,7 @@ int clock_gettime(clockid_t, struct timespec *)
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{ {
long ret; long ret;
if (likely(gtod->clock.vread)) { if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) {
if (likely(tv != NULL)) { if (likely(tv != NULL)) {
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
offsetof(struct timespec, tv_nsec) || offsetof(struct timespec, tv_nsec) ||
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment