Commit ba33ea81 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "This is another big update. Main changes are:

   - lots of x86 system call (and other traps/exceptions) entry code
     enhancements.  In particular the complex parts of the 64-bit entry
     code have been migrated to C code as well, and a number of dusty
     corners have been refreshed.  (Andy Lutomirski)

   - vDSO special mapping robustification and general cleanups (Andy
     Lutomirski)

   - cpufeature refactoring, cleanups and speedups (Borislav Petkov)

   - lots of other changes ..."

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (64 commits)
  x86/cpufeature: Enable new AVX-512 features
  x86/entry/traps: Show unhandled signal for i386 in do_trap()
  x86/entry: Call enter_from_user_mode() with IRQs off
  x86/entry/32: Change INT80 to be an interrupt gate
  x86/entry: Improve system call entry comments
  x86/entry: Remove TIF_SINGLESTEP entry work
  x86/entry/32: Add and check a stack canary for the SYSENTER stack
  x86/entry/32: Simplify and fix up the SYSENTER stack #DB/NMI fixup
  x86/entry: Only allocate space for tss_struct::SYSENTER_stack if needed
  x86/entry: Vastly simplify SYSENTER TF (single-step) handling
  x86/entry/traps: Clear DR6 early in do_debug() and improve the comment
  x86/entry/traps: Clear TIF_BLOCKSTEP on all debug exceptions
  x86/entry/32: Restore FLAGS on SYSEXIT
  x86/entry/32: Filter NT and speed up AC filtering in SYSENTER
  x86/entry/compat: In SYSENTER, sink AC clearing below the existing FLAGS test
  selftests/x86: In syscall_nt, test NT|TF as well
  x86/asm-offsets: Remove PARAVIRT_enabled
  x86/entry/32: Introduce and use X86_BUG_ESPFIX instead of paravirt_enabled
  uprobes: __create_xol_area() must nullify xol_mapping.fault
  x86/cpufeature: Create a new synthetic cpu capability for machine check recovery
  ...
parents e23604ed d0500494
......@@ -666,7 +666,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
clearcpuid=BITNUM [X86]
Disable CPUID feature X for the kernel. See
arch/x86/include/asm/cpufeature.h for the valid bit
arch/x86/include/asm/cpufeatures.h for the valid bit
numbers. Note the Linux specific bits are not necessarily
stable over kernel options, but the vendor specific
ones should be.
......
......@@ -60,6 +60,8 @@ Machine check
threshold to 1. Enabling this may make memory predictive failure
analysis less effective if the bios sets thresholds for memory
errors since we will not see details for all errors.
mce=recovery
Force-enable recoverable machine check code paths
nomce (for compatibility with i386): same as mce=off
......
......@@ -338,16 +338,6 @@ config DEBUG_IMR_SELFTEST
If unsure say N here.
config X86_DEBUG_STATIC_CPU_HAS
bool "Debug alternatives"
depends on DEBUG_KERNEL
---help---
This option causes additional code to be generated which
fails if static_cpu_has() is used before alternatives have
run.
If unsure, say N.
config X86_DEBUG_FPU
bool "Debug the x86 FPU code"
depends on DEBUG_KERNEL
......
#ifndef BOOT_CPUFLAGS_H
#define BOOT_CPUFLAGS_H
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/processor-flags.h>
struct cpu_features {
......
......@@ -17,7 +17,7 @@
#include "../include/asm/required-features.h"
#include "../include/asm/disabled-features.h"
#include "../include/asm/cpufeature.h"
#include "../include/asm/cpufeatures.h"
#include "../kernel/cpu/capflags.c"
int main(void)
......
......@@ -33,7 +33,7 @@
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/api.h>
......
......@@ -30,7 +30,7 @@
#include <linux/kernel.h>
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/internal.h>
......
......@@ -30,7 +30,7 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <asm/fpu/api.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
......
......@@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1
.endm
#else /* CONFIG_X86_64 */
/*
* For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
* are different from the entry_32.S versions in not changing the segment
* registers. So only suitable for in kernel use, not when transitioning
* from or to user space. The resulting stack frame is not a standard
* pt_regs frame. The main use case is calling C code from assembler
* when all the registers need to be preserved.
*/
.macro SAVE_ALL
pushl %eax
pushl %ebp
pushl %edi
pushl %esi
pushl %edx
pushl %ecx
pushl %ebx
.endm
.macro RESTORE_ALL
popl %ebx
popl %ecx
popl %edx
popl %esi
popl %edi
popl %ebp
popl %eax
.endm
#endif /* CONFIG_X86_64 */
/*
......
......@@ -26,6 +26,7 @@
#include <asm/traps.h>
#include <asm/vdso.h>
#include <asm/uaccess.h>
#include <asm/cpufeature.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
......@@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void)
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit();
}
#else
static inline void enter_from_user_mode(void) {}
#endif
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
......@@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
#ifdef CONFIG_CONTEXT_TRACKING
/*
* If TIF_NOHZ is set, we are required to call user_exit() before
* doing anything that could touch RCU.
*/
if (work & _TIF_NOHZ) {
enter_from_user_mode();
work &= ~_TIF_NOHZ;
}
#endif
#ifdef CONFIG_SECCOMP
/*
* Do seccomp first -- it should minimize exposure of other
......@@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
/*
* If we stepped into a sysenter/syscall insn, it trapped in
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
* If user-mode had set TF itself, then it's still clear from
* do_debug() and we need to set it again to restore the user
* state. If we entered on the slow path, TF was already set.
*/
if (work & _TIF_SINGLESTEP)
regs->flags |= X86_EFLAGS_TF;
#ifdef CONFIG_SECCOMP
/*
* Call seccomp_phase2 before running the other hooks so that
......@@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{
struct thread_info *ti = pt_regs_to_thread_info(regs);
u32 cached_flags;
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
......@@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
lockdep_sys_exit();
cached_flags =
READ_ONCE(pt_regs_to_thread_info(regs)->flags);
cached_flags = READ_ONCE(ti->flags);
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
exit_to_usermode_loop(regs, cached_flags);
#ifdef CONFIG_COMPAT
/*
* Compat syscalls set TS_COMPAT. Make sure we clear it before
* returning to user mode. We need to clear it *after* signal
* handling, because syscall restart has a fixup for compat
* syscalls. The fixup is exercised by the ptrace_syscall_32
* selftest.
*/
ti->status &= ~TS_COMPAT;
#endif
user_enter();
}
......@@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
syscall_slow_exit_work(regs, cached_flags);
#ifdef CONFIG_COMPAT
local_irq_disable();
prepare_exit_to_usermode(regs);
}
#ifdef CONFIG_X86_64
__visible void do_syscall_64(struct pt_regs *regs)
{
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned long nr = regs->orig_ax;
enter_from_user_mode();
local_irq_enable();
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);
/*
* Compat syscalls set TS_COMPAT. Make sure we clear it before
* returning to user mode.
* NB: Native and x32 syscalls are dispatched from the same
* table. The only functional difference is the x32 bit in
* regs->orig_ax, which changes the behavior of some syscalls.
*/
ti->status &= ~TS_COMPAT;
#endif
if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
regs->ax = sys_call_table[nr & __SYSCALL_MASK](
regs->di, regs->si, regs->dx,
regs->r10, regs->r8, regs->r9);
}
local_irq_disable();
prepare_exit_to_usermode(regs);
syscall_return_slowpath(regs);
}
#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
/*
* Does a 32-bit syscall. Called with IRQs on and does all entry and
* exit work and returns with IRQs off. This function is extremely hot
* in workloads that use it, and it's usually called from
* Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
* all entry and exit work and returns with IRQs off. This function is
* extremely hot in workloads that use it, and it's usually called from
* do_fast_syscall_32, so forcibly inline it to improve performance.
*/
#ifdef CONFIG_X86_32
/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
__visible
#else
/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
static
#endif
__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned int nr = (unsigned int)regs->orig_ax;
......@@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
syscall_return_slowpath(regs);
}
#ifdef CONFIG_X86_64
/* Handles INT80 on 64-bit kernels */
__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
/* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs)
{
enter_from_user_mode();
local_irq_enable();
do_syscall_32_irqs_on(regs);
}
#endif
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
......@@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
*/
regs->ip = landing_pad;
/*
* Fetch EBP from where the vDSO stashed it.
*
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
*/
enter_from_user_mode();
local_irq_enable();
/* Fetch EBP from where the vDSO stashed it. */
if (
#ifdef CONFIG_X86_64
/*
......@@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
#ifdef CONFIG_CONTEXT_TRACKING
enter_from_user_mode();
#endif
prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -19,12 +19,21 @@
.section .entry.text, "ax"
/*
* 32-bit SYSENTER instruction entry.
* 32-bit SYSENTER entry.
*
* SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
* IF and VM in rflags are cleared (IOW: interrupts are off).
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
* on 64-bit kernels running on Intel CPUs.
*
* The SYSENTER instruction, in principle, should *only* occur in the
* vDSO. In practice, a small number of Android devices were shipped
* with a copy of Bionic that inlined a SYSENTER instruction. This
* never happened in any of Google's Bionic versions -- it only happened
* in a narrow range of Intel-provided versions.
*
* SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
* IF and VM in RFLAGS are cleared (IOW: interrupts are off).
* SYSENTER does not save anything on the stack,
* and does not save old rip (!!!) and rflags.
* and does not save old RIP (!!!), RSP, or RFLAGS.
*
* Arguments:
* eax system call number
......@@ -35,10 +44,6 @@
* edi arg5
* ebp user stack
* 0(%ebp) arg6
*
* This is purely a fast path. For anything complicated we use the int 0x80
* path below. We set up a complete hardware stack frame to share code
* with the int 0x80 path.
*/
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
......@@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat)
*/
pushfq /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
ASM_CLAC /* Clear AC after saving FLAGS */
pushq $__USER32_CS /* pt_regs->cs */
xorq %r8,%r8
pushq %r8 /* pt_regs->ip = 0 (placeholder) */
......@@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat)
cld
/*
* Sysenter doesn't filter flags, so we need to clear NT
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
* NT was set instead of doing an unconditional popfq.
* either was set instead of doing an unconditional popfq.
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
* If TF is set, we will single-step all the way to here -- do_debug
* will ignore all the traps. (Yes, this is slow, but so is
* single-stepping in general. This allows us to avoid having
* a more complicated code to handle the case where a user program
* forces us to single-step through the SYSENTER entry code.)
*
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
......@@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat)
pushq $X86_EFLAGS_FIXED
popfq
jmp .Lsysenter_flags_fixed
GLOBAL(__end_entry_SYSENTER_compat)
ENDPROC(entry_SYSENTER_compat)
/*
* 32-bit SYSCALL instruction entry.
* 32-bit SYSCALL entry.
*
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
* on 64-bit kernels running on AMD CPUs.
*
* The SYSCALL instruction, in principle, should *only* occur in the
* vDSO. In practice, it appears that this really is the case.
* As evidence:
*
* - The calling convention for SYSCALL has changed several times without
* anyone noticing.
*
* 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
* are not needed). SYSCALL does not save anything on the stack
* and does not change rsp.
* - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
* user task that did SYSCALL without immediately reloading SS
* would randomly crash.
*
* Note: rflags saving+masking-with-MSR happens only in Long mode
* - Most programmers do not directly target AMD CPUs, and the 32-bit
* SYSCALL instruction does not exist on Intel CPUs. Even on AMD
* CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
* because the SYSCALL instruction in legacy/native 32-bit mode (as
* opposed to compat mode) is sufficiently poorly designed as to be
* essentially unusable.
*
* 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
* RFLAGS to R11, then loads new SS, CS, and RIP from previously
* programmed MSRs. RFLAGS gets masked by a value from another MSR
* (so CLD and CLAC are not needed). SYSCALL does not save anything on
* the stack and does not change RSP.
*
* Note: RFLAGS saving+masking-with-MSR happens only in Long mode
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
* Don't get confused: rflags saving+masking depends on Long Mode Active bit
* Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
*
......@@ -236,7 +267,21 @@ sysret32_from_system_call:
END(entry_SYSCALL_compat)
/*
* Emulated IA32 system calls via int 0x80.
* 32-bit legacy system call entry.
*
* 32-bit x86 Linux system calls traditionally used the INT $0x80
* instruction. INT $0x80 lands here.
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
* various programs and libraries. It is also used by the vDSO's
* __kernel_vsyscall fallback for hardware that doesn't support a faster
* entry method. Restarted 32-bit system calls also fall back to INT
* $0x80 regardless of what instruction was originally used to do the
* system call.
*
* This is considered a slow path. It is not used by most libc
* implementations on modern hardware except during process startup.
*
* Arguments:
* eax system call number
......@@ -245,17 +290,8 @@ END(entry_SYSCALL_compat)
* edx arg3
* esi arg4
* edi arg5
* ebp arg6 (note: not saved in the stack frame, should not be touched)
*
* Notes:
* Uses the same stack frame as the x86-64 version.
* All registers except eax must be saved (but ptrace may violate that).
* Arguments are zero extended. For system calls that want sign extension and
* take long arguments a wrapper is needed. Most calls can just be called
* directly.
* Assumes it is only called from user space and entered with interrupts off.
* ebp arg6
*/
ENTRY(entry_INT80_compat)
/*
* Interrupts are off on entry.
......@@ -300,7 +336,7 @@ ENTRY(entry_INT80_compat)
TRACE_IRQS_OFF
movq %rsp, %rdi
call do_syscall_32_irqs_off
call do_int80_syscall_32
.Lsyscall_32_done:
/* Go back to user mode. */
......
......@@ -6,17 +6,11 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
#ifdef CONFIG_IA32_EMULATION
#define SYM(sym, compat) compat
#else
#define SYM(sym, compat) sym
#endif
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
......
......@@ -6,19 +6,14 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
#define __SYSCALL_64_QUAL_(sym) sym
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
#ifdef CONFIG_X86_X32_ABI
# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
#else
# define __SYSCALL_X32(nr, sym, compat) /* nothing */
#endif
#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
......
......@@ -21,7 +21,7 @@
12 common brk sys_brk
13 64 rt_sigaction sys_rt_sigaction
14 common rt_sigprocmask sys_rt_sigprocmask
15 64 rt_sigreturn stub_rt_sigreturn
15 64 rt_sigreturn sys_rt_sigreturn/ptregs
16 64 ioctl sys_ioctl
17 common pread64 sys_pread64
18 common pwrite64 sys_pwrite64
......@@ -62,10 +62,10 @@
53 common socketpair sys_socketpair
54 64 setsockopt sys_setsockopt
55 64 getsockopt sys_getsockopt
56 common clone stub_clone
57 common fork stub_fork
58 common vfork stub_vfork
59 64 execve stub_execve
56 common clone sys_clone/ptregs
57 common fork sys_fork/ptregs
58 common vfork sys_vfork/ptregs
59 64 execve sys_execve/ptregs
60 common exit sys_exit
61 common wait4 sys_wait4
62 common kill sys_kill
......@@ -178,7 +178,7 @@
169 common reboot sys_reboot
170 common sethostname sys_sethostname
171 common setdomainname sys_setdomainname
172 common iopl sys_iopl
172 common iopl sys_iopl/ptregs
173 common ioperm sys_ioperm
174 64 create_module
175 common init_module sys_init_module
......@@ -328,7 +328,7 @@
319 common memfd_create sys_memfd_create
320 common kexec_file_load sys_kexec_file_load
321 common bpf sys_bpf
322 64 execveat stub_execveat
322 64 execveat sys_execveat/ptregs
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
......@@ -339,14 +339,14 @@
# for native 64-bit operation.
#
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn stub_x32_rt_sigreturn
513 x32 rt_sigreturn sys32_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
515 x32 readv compat_sys_readv
516 x32 writev compat_sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
520 x32 execve stub_x32_execve
520 x32 execve compat_sys_execve/ptregs
521 x32 ptrace compat_sys_ptrace
522 x32 rt_sigpending compat_sys_rt_sigpending
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
......@@ -371,4 +371,4 @@
542 x32 getsockopt compat_sys_getsockopt
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
545 x32 execveat stub_x32_execveat
545 x32 execveat compat_sys_execveat/ptregs
......@@ -3,13 +3,63 @@
in="$1"
out="$2"
syscall_macro() {
abi="$1"
nr="$2"
entry="$3"
# Entry can be either just a function name or "function/qualifier"
real_entry="${entry%%/*}"
qualifier="${entry:${#real_entry}}" # Strip the function name
qualifier="${qualifier:1}" # Strip the slash, if any
echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
}
emit() {
abi="$1"
nr="$2"
entry="$3"
compat="$4"
if [ "$abi" == "64" -a -n "$compat" ]; then
echo "a compat entry for a 64-bit syscall makes no sense" >&2
exit 1
fi
if [ -z "$compat" ]; then
if [ -n "$entry" ]; then
syscall_macro "$abi" "$nr" "$entry"
fi
else
echo "#ifdef CONFIG_X86_32"
if [ -n "$entry" ]; then
syscall_macro "$abi" "$nr" "$entry"
fi
echo "#else"
syscall_macro "$abi" "$nr" "$compat"
echo "#endif"
fi
}
grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
if [ -n "$compat" ]; then
echo "__SYSCALL_${abi}($nr, $entry, $compat)"
elif [ -n "$entry" ]; then
echo "__SYSCALL_${abi}($nr, $entry, $entry)"
if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then
# COMMON is the same as 64, except that we don't expect X32
# programs to use it. Our expectation has nothing to do with
# any generated code, so treat them the same.
emit 64 "$nr" "$entry" "$compat"
elif [ "$abi" == "X32" ]; then
# X32 is equivalent to 64 on an X32-compatible kernel.
echo "#ifdef CONFIG_X86_X32_ABI"
emit 64 "$nr" "$entry" "$compat"
echo "#endif"
elif [ "$abi" == "I386" ]; then
emit "$abi" "$nr" "$entry" "$compat"
else
echo "Unknown abi $abi" >&2
exit 1
fi
done
) > "$out"
......@@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
fprintf(outfile, "static struct page *pages[%lu];\n\n",
mapping_size / 4096);
fprintf(outfile, "const struct vdso_image %s = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
fprintf(outfile, "\t.text_mapping = {\n");
fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
fprintf(outfile, "\t\t.pages = pages,\n");
fprintf(outfile, "\t},\n");
if (alt_sec) {
fprintf(outfile, "\t.alt = %lu,\n",
(unsigned long)GET_LE(&alt_sec->sh_offset));
......
......@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/mm_types.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/vdso.h>
......
......@@ -3,7 +3,7 @@
*/
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
......
......@@ -20,6 +20,7 @@
#include <asm/page.h>
#include <asm/hpet.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
......@@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1;
void __init init_vdso_image(const struct vdso_image *image)
{
int i;
int npages = (image->size) / PAGE_SIZE;
BUG_ON(image->size % PAGE_SIZE != 0);
for (i = 0; i < npages; i++)
image->text_mapping.pages[i] =
virt_to_page(image->data + i*PAGE_SIZE);
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
......@@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
#endif
}
static int vdso_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
return VM_FAULT_SIGBUS;
vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
get_page(vmf->page);
return 0;
}
static const struct vm_special_mapping text_mapping = {
.name = "[vdso]",
.fault = vdso_fault,
};
static int vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
long sym_offset;
int ret = -EFAULT;
if (!image)
return VM_FAULT_SIGBUS;
sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
image->sym_vvar_start;
/*
* Sanity check: a symbol offset of zero means that the page
* does not exist for this vdso image, not that the page is at
* offset zero relative to the text mapping. This should be
* impossible here, because sym_offset should only be zero for
* the page past the end of the vvar mapping.
*/
if (sym_offset == 0)
return VM_FAULT_SIGBUS;
if (sym_offset == image->sym_vvar_page) {
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
} else if (sym_offset == image->sym_hpet_page) {
#ifdef CONFIG_HPET_TIMER
if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
ret = vm_insert_pfn_prot(
vma,
(unsigned long)vmf->virtual_address,
hpet_address >> PAGE_SHIFT,
pgprot_noncached(PAGE_READONLY));
}
#endif
} else if (sym_offset == image->sym_pvclock_page) {
struct pvclock_vsyscall_time_info *pvti =
pvclock_pvti_cpu0_va();
if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
ret = vm_insert_pfn(
vma,
(unsigned long)vmf->virtual_address,
__pa(pvti) >> PAGE_SHIFT);
}
}
if (ret == 0 || ret == -EBUSY)
return VM_FAULT_NOPAGE;
return VM_FAULT_SIGBUS;
}
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long addr, text_start;
int ret = 0;
static struct page *no_pages[] = {NULL};
static struct vm_special_mapping vvar_mapping = {
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.pages = no_pages,
.fault = vvar_fault,
};
struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
......@@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
text_start = addr - image->sym_vvar_start;
current->mm->context.vdso = (void __user *)text_start;
current->mm->context.vdso_image = image;
/*
* MAYWRITE to allow gdb to COW and set breakpoints
......@@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
&image->text_mapping);
&text_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
......@@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
vma = _install_special_mapping(mm,
addr,
-image->sym_vvar_start,
VM_READ|VM_MAYREAD,
VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
VM_PFNMAP,
&vvar_mapping);
if (IS_ERR(vma)) {
......@@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
goto up_fail;
}
if (image->sym_vvar_page)
ret = remap_pfn_range(vma,
text_start + image->sym_vvar_page,
__pa_symbol(&__vvar_page) >> PAGE_SHIFT,
PAGE_SIZE,
PAGE_READONLY);
if (ret)
goto up_fail;
#ifdef CONFIG_HPET_TIMER
if (hpet_address && image->sym_hpet_page) {
ret = io_remap_pfn_range(vma,
text_start + image->sym_hpet_page,
hpet_address >> PAGE_SHIFT,
PAGE_SIZE,
pgprot_noncached(PAGE_READONLY));
if (ret)
goto up_fail;
}
#endif
pvti = pvclock_pvti_cpu0_va();
if (pvti && image->sym_pvclock_page) {
ret = remap_pfn_range(vma,
text_start + image->sym_pvclock_page,
__pa(pvti) >> PAGE_SHIFT,
PAGE_SIZE,
PAGE_READONLY);
if (ret)
goto up_fail;
}
up_fail:
if (ret)
current->mm->context.vdso = NULL;
......@@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg)
#ifdef CONFIG_NUMA
node = cpu_to_node(cpu);
#endif
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
if (static_cpu_has(X86_FEATURE_RDTSCP))
write_rdtscp_aux((node << 12) | cpu);
/*
......
......@@ -16,6 +16,8 @@
#include <asm/vgtod.h>
#include <asm/vvar.h>
int vclocks_used __read_mostly;
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
void update_vsyscall_tz(void)
......@@ -26,12 +28,17 @@ void update_vsyscall_tz(void)
void update_vsyscall(struct timekeeper *tk)
{
int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
/* Mark the new vclock used. */
BUILD_BUG_ON(VCLOCK_MAX >= 32);
WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
gtod_write_begin(vdata);
/* copy vsyscall data */
vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
vdata->vclock_mode = vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;
......
......@@ -151,12 +151,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
".popsection"
/*
* This must be included *after* the definition of ALTERNATIVE due to
* <asm/arch_hweight.h>
*/
#include <asm/cpufeature.h>
/*
* Alternative instructions for different CPU types or capabilities.
*
......
......@@ -6,7 +6,6 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/apicdef.h>
#include <linux/atomic.h>
#include <asm/fixmap.h>
......
#ifndef _ASM_X86_HWEIGHT_H
#define _ASM_X86_HWEIGHT_H
#include <asm/cpufeatures.h>
#ifdef CONFIG_64BIT
/* popcnt %edi, %eax -- redundant REX prefix for alignment */
#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
......
......@@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static inline void __set_bit(long nr, volatile unsigned long *addr)
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
}
......@@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr)
* clear_bit() is atomic and implies release semantics before the memory
* operation. It can be used for an unlock.
*/
static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
clear_bit(nr, addr);
}
static inline void __clear_bit(long nr, volatile unsigned long *addr)
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
}
......@@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr)
* No memory barrier is required here, because x86 cannot reorder stores past
* older loads. Same principle as spin_unlock.
*/
static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
__clear_bit(nr, addr);
......@@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static inline void __change_bit(long nr, volatile unsigned long *addr)
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
}
......@@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
static inline void change_bit(long nr, volatile unsigned long *addr)
static __always_inline void change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
......@@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
}
......@@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
......@@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
}
......@@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
......@@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
}
/* WARNING: non atomic and it can be reordered! */
static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
......@@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
}
......@@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
int oldbit;
......@@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
*
* Undefined if no bit exists, so code should check against 0 first.
*/
static inline unsigned long __ffs(unsigned long word)
static __always_inline unsigned long __ffs(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
......@@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word)
*
* Undefined if no zero exists, so code should check against ~0UL first.
*/
static inline unsigned long ffz(unsigned long word)
static __always_inline unsigned long ffz(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
......@@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word)
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
static inline unsigned long __fls(unsigned long word)
static __always_inline unsigned long __fls(unsigned long word)
{
asm("bsr %1,%0"
: "=r" (word)
......@@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word)
* set bit if value is nonzero. The first (least significant) bit
* is at position 1.
*/
static inline int ffs(int x)
static __always_inline int ffs(int x)
{
int r;
......@@ -434,7 +434,7 @@ static inline int ffs(int x)
* set bit if value is nonzero. The last (most significant) bit is
* at position 32.
*/
static inline int fls(int x)
static __always_inline int fls(int x)
{
int r;
......
......@@ -3,10 +3,11 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
#define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
#define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
#define VCLOCK_MAX 3
struct arch_clocksource_data {
int vclock_mode;
......
......@@ -2,6 +2,7 @@
#define ASM_X86_CMPXCHG_H
#include <linux/compiler.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
/*
......
This diff is collapsed.
This diff is collapsed.
......@@ -98,4 +98,27 @@ struct desc_ptr {
#endif /* !__ASSEMBLY__ */
/* Access rights as returned by LAR */
#define AR_TYPE_RODATA (0 * (1 << 9))
#define AR_TYPE_RWDATA (1 * (1 << 9))
#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9))
#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9))
#define AR_TYPE_XOCODE (4 * (1 << 9))
#define AR_TYPE_XRCODE (5 * (1 << 9))
#define AR_TYPE_XOCODE_CONF (6 * (1 << 9))
#define AR_TYPE_XRCODE_CONF (7 * (1 << 9))
#define AR_TYPE_MASK (7 * (1 << 9))
#define AR_DPL0 (0 * (1 << 13))
#define AR_DPL3 (3 * (1 << 13))
#define AR_DPL_MASK (3 * (1 << 13))
#define AR_A (1 << 8) /* "Accessed" */
#define AR_S (1 << 12) /* If clear, "System" segment */
#define AR_P (1 << 15) /* "Present" */
#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */
#define AR_L (1 << 21) /* "Long mode" for code segments */
#define AR_DB (1 << 22) /* D/B, effect depends on type */
#define AR_G (1 << 23) /* "Granularity" (limit in pages) */
#endif /* _ASM_X86_DESC_DEFS_H */
......@@ -17,6 +17,7 @@
#include <asm/user.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
#include <asm/cpufeature.h>
/*
* High level FPU state handling functions:
......@@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
*/
static __always_inline __pure bool use_eager_fpu(void)
{
return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
return static_cpu_has(X86_FEATURE_EAGER_FPU);
}
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
return static_cpu_has(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
return static_cpu_has_safe(X86_FEATURE_XSAVE);
return static_cpu_has(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
return static_cpu_has_safe(X86_FEATURE_FXSR);
return static_cpu_has(X86_FEATURE_FXSR);
}
/*
......@@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
if (static_cpu_has_safe(X86_FEATURE_XSAVES))
if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
......@@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
if (static_cpu_has_safe(X86_FEATURE_XSAVES))
if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
......@@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
* pending. Clear the x87 state here by setting it to fixed values.
* "m" is a random variable that should be in L1.
*/
if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
"emms\n\t"
......
#ifdef __ASSEMBLY__
#ifndef _ASM_X86_FRAME_H
#define _ASM_X86_FRAME_H
#include <asm/asm.h>
/* The annotation hides the frame from the unwinder and makes it look
like a ordinary ebp save/restore. This avoids some special cases for
frame pointer later */
/*
* These are stack frame creation macros. They should be used by every
* callable non-leaf asm function to make kernel stack traces more reliable.
*/
#ifdef CONFIG_FRAME_POINTER
.macro FRAME
__ASM_SIZE(push,) %__ASM_REG(bp)
__ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp)
.endm
.macro ENDFRAME
__ASM_SIZE(pop,) %__ASM_REG(bp)
.endm
#else
.macro FRAME
.endm
.macro ENDFRAME
.endm
#endif
#endif /* __ASSEMBLY__ */
#ifdef __ASSEMBLY__
.macro FRAME_BEGIN
push %_ASM_BP
_ASM_MOV %_ASM_SP, %_ASM_BP
.endm
.macro FRAME_END
pop %_ASM_BP
.endm
#else /* !__ASSEMBLY__ */
#define FRAME_BEGIN \
"push %" _ASM_BP "\n" \
_ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
#define FRAME_END "pop %" _ASM_BP "\n"
#endif /* __ASSEMBLY__ */
#define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */
#define FRAME_BEGIN
#define FRAME_END
#define FRAME_OFFSET 0
#endif /* CONFIG_FRAME_POINTER */
#endif /* _ASM_X86_FRAME_H */
#ifndef _ASM_IRQ_WORK_H
#define _ASM_IRQ_WORK_H
#include <asm/processor.h>
#include <asm/cpufeature.h>
static inline bool arch_irq_work_has_interrupt(void)
{
......
......@@ -135,6 +135,7 @@ struct mca_config {
bool ignore_ce;
bool disabled;
bool ser;
bool recovery;
bool bios_cmci_threshold;
u8 banks;
s8 bootlog;
......
......@@ -19,7 +19,8 @@ typedef struct {
#endif
struct mutex lock;
void __user *vdso;
void __user *vdso; /* vdso base address */
const struct vdso_image *vdso_image; /* vdso image in use */
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;
......
#ifndef _ASM_X86_MSR_INDEX_H
#define _ASM_X86_MSR_INDEX_H
/* CPU model specific register (MSR) numbers */
/*
* CPU model specific register (MSR) numbers.
*
* Do not add new entries to this file unless the definitions are shared
* between multiple compilation units.
*/
/* x86-64 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */
......
......@@ -3,6 +3,8 @@
#include <linux/sched.h>
#include <asm/cpufeature.h>
#define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf
#define MWAIT_SUBSTATE_SIZE 4
......
......@@ -13,7 +13,7 @@ struct vm86;
#include <asm/types.h>
#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
......@@ -24,7 +24,6 @@ struct vm86;
#include <asm/fpu/types.h>
#include <linux/personality.h>
#include <linux/cpumask.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/math64.h>
......@@ -300,10 +299,13 @@ struct tss_struct {
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
#ifdef CONFIG_X86_32
/*
* Space for the temporary SYSENTER stack:
* Space for the temporary SYSENTER stack.
*/
unsigned long SYSENTER_stack_canary;
unsigned long SYSENTER_stack[64];
#endif
} ____cacheline_aligned;
......
......@@ -7,12 +7,23 @@
void syscall_init(void);
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
void entry_SYSCALL_compat(void);
#endif
#ifdef CONFIG_X86_32
void entry_INT80_32(void);
void entry_INT80_compat(void);
void entry_SYSENTER_32(void);
void __begin_SYSENTER_singlestep_region(void);
void __end_SYSENTER_singlestep_region(void);
#endif
#ifdef CONFIG_IA32_EMULATION
void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_INT80_compat(void);
#endif
void x86_configure_nx(void);
void x86_report_nx(void);
......
......@@ -13,7 +13,6 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask);
......
......@@ -15,7 +15,7 @@
#include <linux/stringify.h>
#include <asm/nops.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
/* "Raw" instruction opcodes */
#define __ASM_CLAC .byte 0x0f,0x01,0xca
......
......@@ -16,7 +16,6 @@
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
#include <asm/cpufeature.h>
extern int smp_num_siblings;
extern unsigned int num_processors;
......
......@@ -49,7 +49,7 @@
*/
#ifndef __ASSEMBLY__
struct task_struct;
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <linux/atomic.h>
struct thread_info {
......@@ -134,10 +134,13 @@ struct thread_info {
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)
/* work to do in syscall_trace_enter() */
/*
* work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
* enter_from_user_mode()
*/
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
/* work to do on any return to user space */
......
......@@ -5,6 +5,7 @@
#include <linux/sched.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
#ifdef CONFIG_PARAVIRT
......
......@@ -8,7 +8,7 @@
#include <linux/errno.h>
#include <linux/lockdep.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/page.h>
/*
......
......@@ -13,9 +13,6 @@ struct vdso_image {
void *data;
unsigned long size; /* Always a multiple of PAGE_SIZE */
/* text_mapping.pages is big enough for data/size page pointers */
struct vm_special_mapping text_mapping;
unsigned long alt, alt_len;
long sym_vvar_start; /* Negative offset to the vvar area */
......
......@@ -37,6 +37,12 @@ struct vsyscall_gtod_data {
};
extern struct vsyscall_gtod_data vsyscall_gtod_data;
extern int vclocks_used;
static inline bool vclock_was_used(int vclock)
{
return READ_ONCE(vclocks_used) & (1 << vclock);
}
static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
{
unsigned ret;
......
......@@ -256,7 +256,7 @@ struct sigcontext_64 {
__u16 cs;
__u16 gs;
__u16 fs;
__u16 __pad0;
__u16 ss;
__u64 err;
__u64 trapno;
__u64 oldmask;
......@@ -341,9 +341,37 @@ struct sigcontext {
__u64 rip;
__u64 eflags; /* RFLAGS */
__u16 cs;
/*
* Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
* Linux saved and restored fs and gs in these slots. This
* was counterproductive, as fsbase and gsbase were never
* saved, so arch_prctl was presumably unreliable.
*
* These slots should never be reused without extreme caution:
*
* - Some DOSEMU versions stash fs and gs in these slots manually,
* thus overwriting anything the kernel expects to be preserved
* in these slots.
*
* - If these slots are ever needed for any other purpose,
* there is some risk that very old 64-bit binaries could get
* confused. I doubt that many such binaries still work,
* though, since the same patch in 2.5.64 also removed the
* 64-bit set_thread_area syscall, so it appears that there
* is no TLS API beyond modify_ldt that works in both pre-
* and post-2.5.64 kernels.
*
* If the kernel ever adds explicit fs, gs, fsbase, and gsbase
* save/restore, it will most likely need to be opt-in and use
* different context slots.
*/
__u16 gs;
__u16 fs;
__u16 __pad0;
union {
__u16 ss; /* If UC_SIGCONTEXT_SS */
__u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */
};
__u64 err;
__u64 trapno;
__u64 oldmask;
......
#ifndef _ASM_X86_UCONTEXT_H
#define _ASM_X86_UCONTEXT_H
#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
* information in the memory layout pointed
* by the fpstate pointer in the ucontext's
* sigcontext struct (uc_mcontext).
*/
/*
* Indicates the presence of extended state information in the memory
* layout pointed by the fpstate pointer in the ucontext's sigcontext
* struct (uc_mcontext).
*/
#define UC_FP_XSTATE 0x1
#ifdef __x86_64__
/*
* UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
* kernels that save SS in the sigcontext. All kernels that set
* UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
* regardless of SS (i.e. they implement espfix).
*
* Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
* when delivering a signal that came from 64-bit code.
*
* Sigreturn restores SS as follows:
*
* if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
* saved CS is not 64-bit)
* new SS = saved SS (will fail IRET and signal if invalid)
* else
* new SS = a flat 32-bit data segment
*
* This behavior serves three purposes:
*
* - Legacy programs that construct a 64-bit sigcontext from scratch
* with zero or garbage in the SS slot (e.g. old CRIU) and call
* sigreturn will still work.
*
* - Old DOSEMU versions sometimes catch a signal from a segmented
* context, delete the old SS segment (with modify_ldt), and change
* the saved CS to a 64-bit segment. These DOSEMU versions expect
* sigreturn to send them back to 64-bit mode without killing them,
* despite the fact that the SS selector when the signal was raised is
* no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel
* will fix up SS for these DOSEMU versions.
*
* - Old and new programs that catch a signal and return without
* modifying the saved context will end up in exactly the state they
* started in, even if they were running in a segmented context when
* the signal was raised.. Old kernels would lose track of the
* previous SS value.
*/
#define UC_SIGCONTEXT_SS 0x2
#define UC_STRICT_RESTORE_SS 0x4
#endif
#include <asm-generic/ucontext.h>
......
......@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
unsigned long value;
unsigned int id = (x >> 24) & 0xff;
if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, value);
id |= (value << 2) & 0xff00;
}
......@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
......
......@@ -59,7 +59,6 @@ void common(void) {
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
......
......@@ -7,7 +7,7 @@
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
......@@ -52,6 +52,11 @@ void foo(void)
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
offsetofend(struct tss_struct, SYSENTER_stack));
/* Offset from cpu_tss to SYSENTER_stack */
OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
/* Size of SYSENTER_stack */
DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
......
......@@ -4,17 +4,11 @@
#include <asm/ia32.h>
#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
#ifdef CONFIG_X86_X32_ABI
# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
#else
# define __SYSCALL_X32(nr, sym, compat) /* nothing */
#endif
#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
static char syscalls_64[] = {
#include <asm/syscalls_64.h>
};
#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
......
......@@ -42,7 +42,7 @@ ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
cpufeature = $(src)/../../include/asm/cpufeature.h
cpufeature = $(src)/../../include/asm/cpufeatures.h
targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
......
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
......
......@@ -800,6 +800,31 @@ static void detect_nopl(struct cpuinfo_x86 *c)
clear_cpu_cap(c, X86_FEATURE_NOPL);
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
/*
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
* systems that run Linux at CPL > 0 may or may not have the
* issue, but, even if they have the issue, there's absolutely
* nothing we can do about it because we can't use the real IRET
* instruction.
*
* NB: For the time being, only 32-bit kernels support
* X86_BUG_ESPFIX as such. 64-bit kernels directly choose
* whether to apply espfix using paravirt hooks. If any
* non-paravirt system ever shows up that does *not* have the
* ESPFIX issue, we can change this.
*/
#ifdef CONFIG_X86_32
#ifdef CONFIG_PARAVIRT
do {
extern void native_iret(void);
if (pv_cpu_ops.iret == native_iret)
set_cpu_bug(c, X86_BUG_ESPFIX);
} while (0);
#else
set_cpu_bug(c, X86_BUG_ESPFIX);
#endif
#endif
}
......@@ -1475,20 +1500,6 @@ void cpu_init(void)
}
#endif
#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
void warn_pre_alternatives(void)
{
WARN(1, "You're using static_cpu_has before alternatives have run!\n");
}
EXPORT_SYMBOL_GPL(warn_pre_alternatives);
#endif
inline bool __static_cpu_has_safe(u16 bit)
{
return boot_cpu_has(bit);
}
EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
static void bsp_resume(void)
{
if (this_cpu->c_bsp_resume)
......
......@@ -8,6 +8,7 @@
#include <linux/timer.h>
#include <asm/pci-direct.h>
#include <asm/tsc.h>
#include <asm/cpufeature.h>
#include "cpu.h"
......
......@@ -8,7 +8,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/bugs.h>
......
......@@ -14,7 +14,7 @@
#include <linux/sysfs.h>
#include <linux/pci.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/amd_nb.h>
#include <asm/smp.h>
......
#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/slab.h>
......
......@@ -1578,6 +1578,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
/*
* MCG_CAP.MCG_SER_P is necessary but not sufficient to know
* whether this processor will actually generate recoverable
* machine checks. Check to see if this is an E7 model Xeon.
* We can't do a model number check because E5 and E7 use the
* same model number. E5 doesn't support recovery, E7 does.
*/
if (mca_cfg.recovery || (mca_cfg.ser &&
!strncmp(c->x86_model_id,
"Intel(R) Xeon(R) CPU E7-", 24)))
set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
}
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
......@@ -2030,6 +2041,8 @@ static int __init mcheck_enable(char *str)
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true;
else if (!strcmp(str, "recovery"))
cfg->recovery = true;
else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
......
#!/bin/sh
#
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
#
IN=$1
......@@ -49,8 +49,8 @@ dump_array()
trap 'rm "$OUT"' EXIT
(
echo "#ifndef _ASM_X86_CPUFEATURE_H"
echo "#include <asm/cpufeature.h>"
echo "#ifndef _ASM_X86_CPUFEATURES_H"
echo "#include <asm/cpufeatures.h>"
echo "#endif"
echo ""
......
......@@ -47,7 +47,7 @@
#include <linux/smp.h>
#include <linux/syscore_ops.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
......
#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
#include "cpu.h"
......
......@@ -24,6 +24,7 @@
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/setup.h>
#include <asm/cpufeature.h>
/*
* The e820 map is the map that gets modified e.g. with command line parameters
......
......@@ -51,6 +51,9 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
}
......
......@@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { }
#endif
/* Defined as markers to the end of the ftrace default trampolines */
extern void ftrace_caller_end(void);
extern void ftrace_regs_caller_end(void);
extern void ftrace_return(void);
extern void ftrace_epilogue(void);
extern void ftrace_caller_op_ptr(void);
extern void ftrace_regs_caller_op_ptr(void);
......@@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
} else {
start_offset = (unsigned long)ftrace_caller;
end_offset = (unsigned long)ftrace_caller_end;
end_offset = (unsigned long)ftrace_epilogue;
op_offset = (unsigned long)ftrace_caller_op_ptr;
}
......@@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/*
* Allocate enough size to store the ftrace_caller code,
* the jmp to ftrace_return, as well as the address of
* the jmp to ftrace_epilogue, as well as the address of
* the ftrace_ops this trampoline is used for.
*/
trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
......@@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = (unsigned long)trampoline + size;
/* The trampoline ends with a jmp to ftrace_return */
jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
/* The trampoline ends with a jmp to ftrace_epilogue */
jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue);
memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
/*
......
......@@ -19,7 +19,7 @@
#include <asm/setup.h>
#include <asm/processor-flags.h>
#include <asm/msr-index.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/percpu.h>
#include <asm/nops.h>
#include <asm/bootparam.h>
......
......@@ -38,7 +38,6 @@
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
......
......@@ -12,6 +12,7 @@
#include <linux/pm.h>
#include <linux/io.h>
#include <asm/cpufeature.h>
#include <asm/irqdomain.h>
#include <asm/fixmap.h>
#include <asm/hpet.h>
......
......@@ -168,12 +168,14 @@ GLOBAL(ftrace_call)
restore_mcount_regs
/*
* The copied trampoline must call ftrace_return as it
* The copied trampoline must call ftrace_epilogue as it
* still may need to call the function graph tracer.
*
* The code up to this label is copied into trampolines so
* think twice before adding any new code or changing the
* layout here.
*/
GLOBAL(ftrace_caller_end)
GLOBAL(ftrace_return)
GLOBAL(ftrace_epilogue)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)
......@@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call)
popfq
/*
* As this jmp to ftrace_return can be a short jump
* As this jmp to ftrace_epilogue can be a short jump
* it must not be copied into the trampoline.
* The trampoline will add the code to jump
* to the return.
*/
GLOBAL(ftrace_regs_caller_end)
jmp ftrace_return
jmp ftrace_epilogue
END(ftrace_regs_caller)
......
......@@ -40,7 +40,7 @@
#include <linux/uaccess.h>
#include <linux/gfp.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
static struct class *msr_class;
......
......@@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
#ifdef CONFIG_X86_32
.SYSENTER_stack_canary = STACK_END_MAGIC,
#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
......
......@@ -61,7 +61,38 @@
regs->seg = GET_SEG(seg) | 3; \
} while (0)
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
#ifdef CONFIG_X86_64
/*
* If regs->ss will cause an IRET fault, change it. Otherwise leave it
* alone. Using this generally makes no sense unless
* user_64bit_mode(regs) would return true.
*/
static void force_valid_ss(struct pt_regs *regs)
{
u32 ar;
asm volatile ("lar %[old_ss], %[ar]\n\t"
"jz 1f\n\t" /* If invalid: */
"xorl %[ar], %[ar]\n\t" /* set ar = 0 */
"1:"
: [ar] "=r" (ar)
: [old_ss] "rm" ((u16)regs->ss));
/*
* For a valid 64-bit user context, we need DPL 3, type
* read-write data or read-write exp-down data, and S and P
* set. We can't use VERW because VERW doesn't check the
* P bit.
*/
ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
regs->ss = __USER_DS;
}
#endif
static int restore_sigcontext(struct pt_regs *regs,
struct sigcontext __user *sc,
unsigned long uc_flags)
{
unsigned long buf_val;
void __user *buf;
......@@ -94,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
COPY(r15);
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
#else /* !CONFIG_X86_32 */
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
COPY_SEG_CPL3(cs);
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_X86_64
/*
* Fix up SS if needed for the benefit of old DOSEMU and
* CRIU.
*/
if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
user_64bit_mode(regs)))
force_valid_ss(regs);
#endif
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
......@@ -165,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
put_user_ex(regs->cs, &sc->cs);
put_user_ex(0, &sc->gs);
put_user_ex(0, &sc->fs);
put_user_ex(regs->ss, &sc->ss);
#endif /* CONFIG_X86_32 */
put_user_ex(fpstate, &sc->fpstate);
......@@ -403,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
return 0;
}
#else /* !CONFIG_X86_32 */
static unsigned long frame_uc_flags(struct pt_regs *regs)
{
unsigned long flags;
if (cpu_has_xsave)
flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
else
flags = UC_SIGCONTEXT_SS;
if (likely(user_64bit_mode(regs)))
flags |= UC_STRICT_RESTORE_SS;
return flags;
}
static int __setup_rt_frame(int sig, struct ksignal *ksig,
sigset_t *set, struct pt_regs *regs)
{
......@@ -422,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
......@@ -459,10 +506,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->sp = (unsigned long)frame;
/* Set up the CS register to run signal handlers in 64-bit mode,
even if the handler happens to be interrupting 32-bit code. */
/*
* Set up the CS and SS registers to run signal handlers in
* 64-bit mode, even if the handler happens to be interrupting
* 32-bit or 16-bit code.
*
* SS is subtle. In 64-bit mode, we don't need any particular
* SS descriptor, but we do need SS to be valid. It's possible
* that the old SS is entirely bogus -- this can happen if the
* signal we're trying to deliver is #GP or #SS caused by a bad
* SS value. We also have a compatbility issue here: DOSEMU
* relies on the contents of the SS register indicating the
* SS value at the time of the signal, even though that code in
* DOSEMU predates sigreturn's ability to restore SS. (DOSEMU
* avoids relying on sigreturn to restore SS; instead it uses
* a trampoline.) So we do our best: if the old SS was valid,
* we keep it. Otherwise we replace it.
*/
regs->cs = __USER_CS;
if (unlikely(regs->ss != __USER_DS))
force_valid_ss(regs);
return 0;
}
#endif /* CONFIG_X86_32 */
......@@ -489,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
......@@ -554,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void)
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->sc))
/*
* x86_32 has no uc_flags bits relevant to restore_sigcontext.
* Save a few cycles by skipping the __get_user.
*/
if (restore_sigcontext(regs, &frame->sc, 0))
goto badframe;
return regs->ax;
......@@ -570,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
sigset_t set;
unsigned long uc_flags;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
if (__get_user(uc_flags, &frame->uc.uc_flags))
goto badframe;
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
......@@ -692,12 +761,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
#ifdef CONFIG_X86_64
if (is_ia32_task())
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
#else
return __NR_restart_syscall;
#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
__NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
#endif
}
/*
......@@ -763,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
sigset_t set;
unsigned long uc_flags;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
......@@ -770,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
if (__get_user(uc_flags, &frame->uc.uc_flags))
goto badframe;
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
......
......@@ -83,30 +83,16 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
static inline void conditional_sti(struct pt_regs *regs)
static inline void cond_local_irq_enable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
preempt_count_inc();
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
static inline void conditional_cli(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
}
static inline void preempt_conditional_cli(struct pt_regs *regs)
static inline void cond_local_irq_disable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
preempt_count_dec();
}
void ist_enter(struct pt_regs *regs)
......@@ -262,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
#ifdef CONFIG_X86_64
if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
printk_ratelimit()) {
pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
......@@ -271,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
print_vma_addr(" in ", regs->ip);
pr_cont("\n");
}
#endif
force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
}
......@@ -286,7 +270,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
conditional_sti(regs);
cond_local_irq_enable(regs);
do_trap(trapnr, signr, str, regs, error_code,
fill_trap_info(regs, signr, trapnr, &info));
}
......@@ -368,7 +352,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
return;
conditional_sti(regs);
cond_local_irq_enable(regs);
if (!user_mode(regs))
die("bounds", regs, error_code);
......@@ -443,7 +427,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
struct task_struct *tsk;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
conditional_sti(regs);
cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
local_irq_enable();
......@@ -517,9 +501,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
preempt_conditional_sti(regs);
preempt_disable();
cond_local_irq_enable(regs);
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs);
cond_local_irq_disable(regs);
preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
ist_exit(regs);
......@@ -571,6 +557,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
NOKPROBE_SYMBOL(fixup_bad_iret);
#endif
static bool is_sysenter_singlestep(struct pt_regs *regs)
{
/*
* We don't try for precision here. If we're anywhere in the region of
* code that can be single-stepped in the SYSENTER entry path, then
* assume that this is a useless single-step trap due to SYSENTER
* being invoked with TF set. (We don't know in advance exactly
* which instructions will be hit because BTF could plausibly
* be set.)
*/
#ifdef CONFIG_X86_32
return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
(unsigned long)__end_SYSENTER_singlestep_region -
(unsigned long)__begin_SYSENTER_singlestep_region;
#elif defined(CONFIG_IA32_EMULATION)
return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
(unsigned long)__end_entry_SYSENTER_compat -
(unsigned long)entry_SYSENTER_compat;
#else
return false;
#endif
}
/*
* Our handling of the processor debug registers is non-trivial.
* We do not clear them on entry and exit from the kernel. Therefore
......@@ -605,10 +614,41 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
ist_enter(regs);
get_debugreg(dr6, 6);
/*
* The Intel SDM says:
*
* Certain debug exceptions may clear bits 0-3. The remaining
* contents of the DR6 register are never cleared by the
* processor. To avoid confusion in identifying debug
* exceptions, debug handlers should clear the register before
* returning to the interrupted task.
*
* Keep it simple: clear DR6 immediately.
*/
set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
dr6 &= ~DR6_RESERVED;
/*
* The SDM says "The processor clears the BTF flag when it
* generates a debug exception." Clear TIF_BLOCKSTEP to keep
* TIF_BLOCKSTEP in sync with the hardware BTF flag.
*/
clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
is_sysenter_singlestep(regs))) {
dr6 &= ~DR_STEP;
if (!dr6)
goto exit;
/*
* else we might have gotten a single-step trap and hit a
* watchpoint at the same time, in which case we should fall
* through and handle the watchpoint.
*/
}
/*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
......@@ -617,18 +657,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
if (!dr6 && user_mode(regs))
user_icebp = 1;
/* Catch kmemcheck conditions first of all! */
/* Catch kmemcheck conditions! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
goto exit;
/* DR6 may or may not be cleared by the CPU */
set_debugreg(0, 6);
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
......@@ -648,24 +680,25 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_inc();
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
preempt_disable();
cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB);
preempt_conditional_cli(regs);
cond_local_irq_disable(regs);
preempt_enable_no_resched();
debug_stack_usage_dec();
goto exit;
}
/*
* Single-stepping through system calls: ignore any exceptions in
* kernel space, but re-enable TF when returning to user mode.
*
* We already checked v86 mode above, so we can check for kernel mode
* by just checking the CPL of CS.
*/
if ((dr6 & DR_STEP) && !user_mode(regs)) {
if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
/*
* Historical junk that used to handle SYSENTER single-stepping.
* This should be unreachable now. If we survive for a while
* without anyone hitting this warning, we'll turn this into
* an oops.
*/
tsk->thread.debugreg6 &= ~DR_STEP;
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
......@@ -673,10 +706,19 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
cond_local_irq_disable(regs);
preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
#if defined(CONFIG_X86_32)
/*
* This is the most likely code path that involves non-trivial use
* of the SYSENTER stack. Check that we haven't overrun it.
*/
WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
"Overran or corrupted SYSENTER stack\n");
#endif
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);
......@@ -696,7 +738,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
return;
conditional_sti(regs);
cond_local_irq_enable(regs);
if (!user_mode(regs)) {
if (!fixup_exception(regs, trapnr)) {
......@@ -743,7 +785,7 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
conditional_sti(regs);
cond_local_irq_enable(regs);
}
dotraplinkage void
......@@ -756,7 +798,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
conditional_sti(regs);
cond_local_irq_enable(regs);
info.regs = regs;
math_emulate(&info);
......@@ -765,7 +807,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#endif
fpu__restore(&current->thread.fpu); /* interrupts still off */
#ifdef CONFIG_X86_32
conditional_sti(regs);
cond_local_irq_enable(regs);
#endif
}
NOKPROBE_SYMBOL(do_device_not_available);
......@@ -868,7 +910,7 @@ void __init trap_init(void)
#endif
#ifdef CONFIG_X86_32
set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
......
......@@ -30,7 +30,7 @@
* appropriately. Either display a message or halt.
*/
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
verify_cpu:
......
......@@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
/* make room for real-mode segments */
tsk->thread.sp0 += 16;
if (static_cpu_has_safe(X86_FEATURE_SEP))
if (static_cpu_has(X86_FEATURE_SEP))
tsk->thread.sysenter_cs = 0;
load_sp0(tss, &tsk->thread);
......
......@@ -192,6 +192,17 @@ SECTIONS
:init
#endif
/*
* Section for code used exclusively before alternatives are run. All
* references to such code must be patched out by alternatives, normally
* by using X86_FEATURE_ALWAYS CPU feature bit.
*
* See static_cpu_has() for an example.
*/
.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
*(.altinstr_aux)
}
INIT_DATA_SECTION(16)
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
......
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
......
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
......
......@@ -10,7 +10,7 @@
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/smap.h>
......
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
......
......@@ -6,7 +6,7 @@
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#undef memmove
......
/* Copyright 2002 Andi Kleen, SuSE Labs */
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
.weak memset
......
......@@ -943,7 +943,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return -EINVAL;
}
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
return 0;
......@@ -959,7 +959,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
/* Set prot based on lookup */
pcm = lookup_memtype(pfn_t_to_phys(pfn));
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
return 0;
......
......@@ -4,6 +4,7 @@
#include <asm/pgtable.h>
#include <asm/proto.h>
#include <asm/cpufeature.h>
static int disable_nx;
......
......@@ -24,7 +24,6 @@
#include <asm/nmi.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include "op_x86_model.h"
#include "op_counter.h"
......
......@@ -3,7 +3,7 @@
#include <asm/asm.h>
#include <asm/segment.h>
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
#include <asm/cmpxchg.h>
#include <asm/nops.h>
......
......@@ -25,11 +25,11 @@
#define old_mmap sys_old_mmap
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
......
......@@ -35,14 +35,11 @@
#define stub_execveat sys_execveat
#define stub_rt_sigreturn sys_rt_sigreturn
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
......
......@@ -9,14 +9,12 @@
#include <asm/types.h>
#ifdef __i386__
#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
#else
#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_64.h>
};
......
......@@ -1431,7 +1431,7 @@ static int __init intel_pstate_init(void)
if (!all_cpu_data)
return -ENOMEM;
if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) {
pr_info("intel_pstate: HWP enabled\n");
hwp_active++;
}
......
......@@ -931,7 +931,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
if (bio_flags & EXTENT_BIO_TREE_LOG)
return 0;
#ifdef CONFIG_X86
if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
if (static_cpu_has(X86_FEATURE_XMM4_2))
return 0;
#endif
return 1;
......
......@@ -2139,6 +2139,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn);
int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, pgprot_t pgprot);
int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn);
int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
......
......@@ -566,10 +566,26 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
}
#endif
struct vm_special_mapping
{
const char *name;
struct vm_fault;
struct vm_special_mapping {
const char *name; /* The name, e.g. "[vdso]". */
/*
* If .fault is not provided, this points to a
* NULL-terminated array of pages that back the special mapping.
*
* This must not be NULL unless .fault is provided.
*/
struct page **pages;
/*
* If non-NULL, then this is called to resolve page faults
* on the special mapping. If used, .pages is not checked.
*/
int (*fault)(const struct vm_special_mapping *sm,
struct vm_area_struct *vma,
struct vm_fault *vmf);
};
enum tlb_flush_reason {
......
......@@ -1178,6 +1178,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
goto free_area;
area->xol_mapping.name = "[uprobes]";
area->xol_mapping.fault = NULL;
area->xol_mapping.pages = area->pages;
area->pages[0] = alloc_page(GFP_HIGHUSER);
if (!area->pages[0])
......
......@@ -17,7 +17,7 @@
#include <linux/atomic.h>
#ifdef CONFIG_X86
#include <asm/processor.h> /* for boot_cpu_has below */
#include <asm/cpufeature.h> /* for boot_cpu_has below */
#endif
#define TEST(bit, op, c_op, val) \
......
......@@ -1550,9 +1550,30 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
*/
int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn)
{
return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
}
EXPORT_SYMBOL(vm_insert_pfn);
/**
* vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
* @vma: user vma to map to
* @addr: target user address of this page
* @pfn: source kernel pfn
* @pgprot: pgprot flags for the inserted page
*
* This is exactly like vm_insert_pfn, except that it allows drivers to
* to override pgprot on a per-page basis.
*
* This only makes sense for IO mappings, and it makes no sense for
* cow mappings. In general, using multiple vmas is preferable;
* vm_insert_pfn_prot should only be used if using multiple VMAs is
* impractical.
*/
int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, pgprot_t pgprot)
{
int ret;
pgprot_t pgprot = vma->vm_page_prot;
/*
* Technically, architectures with pte_special can avoid all these
* restrictions (same for remap_pfn_range). However we would like
......@@ -1574,7 +1595,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
return ret;
}
EXPORT_SYMBOL(vm_insert_pfn);
EXPORT_SYMBOL(vm_insert_pfn_prot);
int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn)
......
......@@ -3066,11 +3066,16 @@ static int special_mapping_fault(struct vm_area_struct *vma,
pgoff_t pgoff;
struct page **pages;
if (vma->vm_ops == &legacy_special_mapping_vmops)
if (vma->vm_ops == &legacy_special_mapping_vmops) {
pages = vma->vm_private_data;
else
pages = ((struct vm_special_mapping *)vma->vm_private_data)->
pages;
} else {
struct vm_special_mapping *sm = vma->vm_private_data;
if (sm->fault)
return sm->fault(sm, vma, vmf);
pages = sm->pages;
}
for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
pgoff--;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment