Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
3c76db70
Commit
3c76db70
authored
Mar 12, 2018
by
Ingo Molnar
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'x86/pti' into x86/mm, to pick up dependencies
Signed-off-by:
Ingo Molnar
<
mingo@kernel.org
>
parents
194a9749
7958b224
Changes
69
Hide whitespace changes
Inline
Side-by-side
Showing
69 changed files
with
910 additions
and
471 deletions
+910
-471
Makefile
Makefile
+5
-0
arch/x86/Kconfig
arch/x86/Kconfig
+2
-10
arch/x86/Makefile
arch/x86/Makefile
+3
-4
arch/x86/entry/calling.h
arch/x86/entry/calling.h
+19
-15
arch/x86/entry/entry_32.S
arch/x86/entry/entry_32.S
+1
-2
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64.S
+92
-61
arch/x86/entry/entry_64_compat.S
arch/x86/entry/entry_64_compat.S
+41
-42
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscall_32.tbl
+19
-19
arch/x86/entry/vsyscall/vsyscall_64.c
arch/x86/entry/vsyscall/vsyscall_64.c
+3
-13
arch/x86/ia32/sys_ia32.c
arch/x86/ia32/sys_ia32.c
+44
-30
arch/x86/include/asm/apm.h
arch/x86/include/asm/apm.h
+6
-0
arch/x86/include/asm/asm-prototypes.h
arch/x86/include/asm/asm-prototypes.h
+0
-3
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/cpufeatures.h
+3
-0
arch/x86/include/asm/efi.h
arch/x86/include/asm/efi.h
+15
-2
arch/x86/include/asm/microcode.h
arch/x86/include/asm/microcode.h
+7
-2
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/mmu_context.h
+1
-0
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/nospec-branch.h
+118
-20
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt.h
+13
-4
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/paravirt_types.h
+4
-1
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable.h
+4
-4
arch/x86/include/asm/pgtable_32.h
arch/x86/include/asm/pgtable_32.h
+1
-0
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/pgtable_64.h
+1
-0
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/pgtable_types.h
+10
-2
arch/x86/include/asm/processor.h
arch/x86/include/asm/processor.h
+1
-0
arch/x86/include/asm/refcount.h
arch/x86/include/asm/refcount.h
+2
-2
arch/x86/include/asm/rmwcc.h
arch/x86/include/asm/rmwcc.h
+8
-8
arch/x86/include/asm/sections.h
arch/x86/include/asm/sections.h
+1
-0
arch/x86/include/asm/sys_ia32.h
arch/x86/include/asm/sys_ia32.h
+30
-18
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/io_apic.c
+1
-1
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/bugs.c
+11
-1
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/common.c
+30
-0
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel.c
+7
-0
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/microcode/amd.c
+5
-5
arch/x86/kernel/cpu/microcode/core.c
arch/x86/kernel/cpu/microcode/core.c
+123
-38
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/microcode/intel.c
+45
-13
arch/x86/kernel/head_64.S
arch/x86/kernel/head_64.S
+2
-0
arch/x86/kernel/ioport.c
arch/x86/kernel/ioport.c
+1
-1
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/kprobes/core.c
+9
-1
arch/x86/kernel/setup.c
arch/x86/kernel/setup.c
+5
-12
arch/x86/kernel/setup_percpu.c
arch/x86/kernel/setup_percpu.c
+4
-13
arch/x86/kernel/unwind_orc.c
arch/x86/kernel/unwind_orc.c
+1
-2
arch/x86/kernel/vmlinux.lds.S
arch/x86/kernel/vmlinux.lds.S
+2
-0
arch/x86/kvm/svm.c
arch/x86/kvm/svm.c
+5
-4
arch/x86/kvm/vmx.c
arch/x86/kvm/vmx.c
+5
-4
arch/x86/lib/Makefile
arch/x86/lib/Makefile
+0
-1
arch/x86/lib/retpoline.S
arch/x86/lib/retpoline.S
+0
-56
arch/x86/mm/cpu_entry_area.c
arch/x86/mm/cpu_entry_area.c
+6
-0
arch/x86/mm/fault.c
arch/x86/mm/fault.c
+0
-4
arch/x86/mm/init_32.c
arch/x86/mm/init_32.c
+15
-0
arch/x86/mm/mem_encrypt_boot.S
arch/x86/mm/mem_encrypt_boot.S
+2
-0
arch/x86/mm/pti.c
arch/x86/mm/pti.c
+1
-1
arch/x86/realmode/rm/trampoline_64.S
arch/x86/realmode/rm/trampoline_64.S
+1
-1
arch/x86/xen/suspend.c
arch/x86/xen/suspend.c
+16
-0
include/linux/compiler-clang.h
include/linux/compiler-clang.h
+5
-0
include/linux/compiler-gcc.h
include/linux/compiler-gcc.h
+4
-0
include/linux/init.h
include/linux/init.h
+4
-4
include/linux/jump_label.h
include/linux/jump_label.h
+3
-0
include/linux/kernel.h
include/linux/kernel.h
+1
-0
include/linux/nospec.h
include/linux/nospec.h
+3
-23
init/main.c
init/main.c
+2
-0
kernel/extable.c
kernel/extable.c
+1
-1
kernel/jump_label.c
kernel/jump_label.c
+22
-5
scripts/Makefile.build
scripts/Makefile.build
+8
-0
tools/objtool/builtin-check.c
tools/objtool/builtin-check.c
+4
-2
tools/objtool/builtin-orc.c
tools/objtool/builtin-orc.c
+1
-5
tools/objtool/builtin.h
tools/objtool/builtin.h
+5
-0
tools/objtool/check.c
tools/objtool/check.c
+88
-5
tools/objtool/check.h
tools/objtool/check.h
+2
-1
tools/testing/selftests/x86/test_vsyscall.c
tools/testing/selftests/x86/test_vsyscall.c
+6
-5
No files found.
Makefile
View file @
3c76db70
...
...
@@ -489,6 +489,11 @@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
KBUILD_AFLAGS
+=
$(CLANG_TARGET)
$(CLANG_GCC_TC)
endif
RETPOLINE_CFLAGS_GCC
:=
-mindirect-branch
=
thunk-extern
-mindirect-branch-register
RETPOLINE_CFLAGS_CLANG
:=
-mretpoline-external-thunk
RETPOLINE_CFLAGS
:=
$(
call
cc-option,
$(RETPOLINE_CFLAGS_GCC)
,
$(
call
cc-option,
$(RETPOLINE_CFLAGS_CLANG)
))
export
RETPOLINE_CFLAGS
ifeq
($(config-targets),1)
# ===========================================================================
# *config targets only - make sure prerequisites are updated, and descend
...
...
arch/x86/Kconfig
View file @
3c76db70
...
...
@@ -430,6 +430,7 @@ config GOLDFISH
config RETPOLINE
bool "Avoid speculative indirect branches in kernel"
default y
select STACK_VALIDATION if HAVE_STACK_VALIDATION
help
Compile kernel with the retpoline compiler options to guard against
kernel-to-user data leaks by avoiding speculative indirect
...
...
@@ -2315,7 +2316,7 @@ choice
it can be used to assist security vulnerability exploitation.
This setting can be changed at boot time via the kernel command
line parameter vsyscall=[
native|
emulate|none].
line parameter vsyscall=[emulate|none].
On a system with recent enough glibc (2.14 or newer) and no
static binaries, you can say None without a performance penalty
...
...
@@ -2323,15 +2324,6 @@ choice
If unsure, select "Emulate".
config LEGACY_VSYSCALL_NATIVE
bool "Native"
help
Actual executable code is located in the fixed vsyscall
address mapping, implementing time() efficiently. Since
this makes the mapping executable, it can be used during
security vulnerability exploitation (traditionally as
ROP gadgets). This configuration is not recommended.
config LEGACY_VSYSCALL_EMULATE
bool "Emulate"
help
...
...
arch/x86/Makefile
View file @
3c76db70
...
...
@@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef
CONFIG_RETPOLINE
RETPOLINE_CFLAGS
+=
$(
call
cc-option,-mindirect-branch
=
thunk-extern
-mindirect-branch-register
)
ifneq
($(RETPOLINE_CFLAGS),)
KBUILD_CFLAGS
+=
$(RETPOLINE_CFLAGS)
-DRETPOLINE
endif
ifneq
($(RETPOLINE_CFLAGS),)
KBUILD_CFLAGS
+=
$(RETPOLINE_CFLAGS)
-DRETPOLINE
endif
endif
archscripts
:
scripts_basic
...
...
arch/x86/entry/calling.h
View file @
3c76db70
...
...
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
.
macro
PUSH_AND_CLEAR_REGS
rdx
=%
rdx
rax
=%
rax
.
macro
PUSH_AND_CLEAR_REGS
rdx
=%
rdx
rax
=%
rax
save_ret
=
0
/*
* Push registers and sanitize registers of values that a
* speculation attack might otherwise want to exploit. The
...
...
@@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with
* could be put to use in a speculative execution gadget.
* Interleave XOR with PUSH for better uop scheduling:
*/
.
if
\
save_ret
pushq
%
rsi
/* pt_regs->si */
movq
8
(
%
rsp
),
%
rsi
/* temporarily store the return address in %rsi */
movq
%
rdi
,
8
(
%
rsp
)
/* pt_regs->di (overwriting original return address) */
.
else
pushq
%
rdi
/* pt_regs->di */
pushq
%
rsi
/* pt_regs->si */
.
endif
pushq
\
rdx
/* pt_regs->dx */
pushq
%
rcx
/* pt_regs->cx */
pushq
\
rax
/* pt_regs->ax */
pushq
%
r8
/* pt_regs->r8 */
xor
q
%
r8
,
%
r8
/* nospec r8 */
xor
l
%
r8d
,
%
r8d
/* nospec r8 */
pushq
%
r9
/* pt_regs->r9 */
xor
q
%
r9
,
%
r9
/* nospec r9 */
xor
l
%
r9d
,
%
r9d
/* nospec r9 */
pushq
%
r10
/* pt_regs->r10 */
xor
q
%
r10
,
%
r10
/* nospec r10 */
xor
l
%
r10d
,
%
r10d
/* nospec r10 */
pushq
%
r11
/* pt_regs->r11 */
xor
q
%
r11
,
%
r11
/* nospec r11*/
xor
l
%
r11d
,
%
r11d
/* nospec r11*/
pushq
%
rbx
/* pt_regs->rbx */
xorl
%
ebx
,
%
ebx
/* nospec rbx*/
pushq
%
rbp
/* pt_regs->rbp */
xorl
%
ebp
,
%
ebp
/* nospec rbp*/
pushq
%
r12
/* pt_regs->r12 */
xor
q
%
r12
,
%
r12
/* nospec r12*/
xor
l
%
r12d
,
%
r12d
/* nospec r12*/
pushq
%
r13
/* pt_regs->r13 */
xor
q
%
r13
,
%
r13
/* nospec r13*/
xor
l
%
r13d
,
%
r13d
/* nospec r13*/
pushq
%
r14
/* pt_regs->r14 */
xor
q
%
r14
,
%
r14
/* nospec r14*/
xor
l
%
r14d
,
%
r14d
/* nospec r14*/
pushq
%
r15
/* pt_regs->r15 */
xor
q
%
r15
,
%
r15
/* nospec r15*/
xor
l
%
r15d
,
%
r15d
/* nospec r15*/
UNWIND_HINT_REGS
.
if
\
save_ret
pushq
%
rsi
/* return address on top of stack */
.
endif
.
endm
.
macro
POP_REGS
pop_rdi
=
1
skip_r11rcx
=
0
...
...
@@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with
*/
.
macro
ENCODE_FRAME_POINTER
ptregs_offset
=
0
#ifdef CONFIG_FRAME_POINTER
.
if
\
ptregs_offset
leaq
\
ptregs_offset
(
%
rsp
),
%
rbp
.
else
mov
%
rsp
,
%
rbp
.
endif
orq
$
0x1
,
%
rbp
leaq
1
+
\
ptregs_offset
(
%
rsp
),
%
rbp
#endif
.
endm
...
...
arch/x86/entry/entry_32.S
View file @
3c76db70
...
...
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
*
exist
,
overwrite
the
RSB
with
entries
which
capture
*
speculative
execution
to
prevent
attack
.
*/
/
*
Clobbers
%
ebx
*/
FILL_RETURN_BUFFER
RSB_CLEAR_LOOPS
,
X86_FEATURE_RSB_CTXSW
FILL_RETURN_BUFFER
%
ebx
,
RSB_CLEAR_LOOPS
,
X86_FEATURE_RSB_CTXSW
#endif
/
*
restore
callee
-
saved
registers
*/
...
...
arch/x86/entry/entry_64.S
View file @
3c76db70
...
...
@@ -369,8 +369,7 @@ ENTRY(__switch_to_asm)
*
exist
,
overwrite
the
RSB
with
entries
which
capture
*
speculative
execution
to
prevent
attack
.
*/
/
*
Clobbers
%
rbx
*/
FILL_RETURN_BUFFER
RSB_CLEAR_LOOPS
,
X86_FEATURE_RSB_CTXSW
FILL_RETURN_BUFFER
%
r12
,
RSB_CLEAR_LOOPS
,
X86_FEATURE_RSB_CTXSW
#endif
/
*
restore
callee
-
saved
registers
*/
...
...
@@ -454,9 +453,19 @@ END(irq_entries_start)
*
*
The
invariant
is
that
,
if
irq_count
!=
-
1
,
then
the
IRQ
stack
is
in
use
.
*/
.
macro
ENTER_IRQ_STACK
regs
=
1
old_rsp
.
macro
ENTER_IRQ_STACK
regs
=
1
old_rsp
save_ret
=
0
DEBUG_ENTRY_ASSERT_IRQS_OFF
.
if
\
save_ret
/
*
*
If
save_ret
is
set
,
the
original
stack
contains
one
additional
*
entry
--
the
return
address
.
Therefore
,
move
the
address
one
*
entry
below
%
rsp
to
\
old_rsp
.
*/
leaq
8
(%
rsp
),
\
old_rsp
.
else
movq
%
rsp
,
\
old_rsp
.
endif
.
if
\
regs
UNWIND_HINT_REGS
base
=
\
old_rsp
...
...
@@ -502,6 +511,15 @@ END(irq_entries_start)
.
if
\
regs
UNWIND_HINT_REGS
indirect
=
1
.
endif
.
if
\
save_ret
/
*
*
Push
the
return
address
to
the
stack
.
This
return
address
can
*
be
found
at
the
"real"
original
RSP
,
which
was
offset
by
8
at
*
the
beginning
of
this
macro
.
*/
pushq
-
8
(
\
old_rsp
)
.
endif
.
endm
/*
...
...
@@ -525,27 +543,65 @@ END(irq_entries_start)
.
endm
/*
*
Interrupt
entry
/
exit
.
*
*
Interrupt
entry
points
save
only
callee
clobbered
registers
in
fast
path
.
*
Interrupt
entry
helper
function
.
*
*
Entry
runs
with
interrupts
off
.
*
Entry
runs
with
interrupts
off
.
Stack
layout
at
entry
:
*
+----------------------------------------------------+
*
| regs->ss |
*
| regs->rsp |
*
| regs->eflags |
*
| regs->cs |
*
| regs->ip |
*
+----------------------------------------------------+
*
| regs->orig_ax = ~(interrupt number) |
*
+----------------------------------------------------+
*
| return address |
*
+----------------------------------------------------+
*/
/*
0(%
rsp
):
~
(
interrupt
number
)
*/
.
macro
interrupt
func
ENTRY
(
interrupt_entry
)
UNWIND_HINT_FUNC
ASM_CLAC
cld
testb
$
3
,
CS
-
ORIG_RAX
(%
rsp
)
testb
$
3
,
CS
-
ORIG_RAX
+
8
(%
rsp
)
jz
1
f
SWAPGS
call
switch_to_thread_stack
/
*
*
Switch
to
the
thread
stack
.
The
IRET
frame
and
orig_ax
are
*
on
the
stack
,
as
well
as
the
return
address
.
RDI
..
R12
are
*
not
(
yet
)
on
the
stack
and
space
has
not
(
yet
)
been
*
allocated
for
them
.
*/
pushq
%
rdi
/
*
Need
to
switch
before
accessing
the
thread
stack
.
*/
SWITCH_TO_KERNEL_CR3
scratch_reg
=%
rdi
movq
%
rsp
,
%
rdi
movq
PER_CPU_VAR
(
cpu_current_top_of_stack
),
%
rsp
/
*
*
We
have
RDI
,
return
address
,
and
orig_ax
on
the
stack
on
*
top
of
the
IRET
frame
.
That
means
offset
=
24
*/
UNWIND_HINT_IRET_REGS
base
=%
rdi
offset
=
24
pushq
7
*
8
(%
rdi
)
/*
regs
->
ss
*/
pushq
6
*
8
(%
rdi
)
/*
regs
->
rsp
*/
pushq
5
*
8
(%
rdi
)
/*
regs
->
eflags
*/
pushq
4
*
8
(%
rdi
)
/*
regs
->
cs
*/
pushq
3
*
8
(%
rdi
)
/*
regs
->
ip
*/
pushq
2
*
8
(%
rdi
)
/*
regs
->
orig_ax
*/
pushq
8
(%
rdi
)
/*
return
address
*/
UNWIND_HINT_FUNC
movq
(%
rdi
),
%
rdi
1
:
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
PUSH_AND_CLEAR_REGS
save_ret
=
1
ENCODE_FRAME_POINTER
8
testb
$
3
,
CS
(%
rsp
)
testb
$
3
,
CS
+
8
(%
rsp
)
jz
1
f
/
*
...
...
@@ -553,7 +609,7 @@ END(irq_entries_start)
*
*
We
need
to
tell
lockdep
that
IRQs
are
off
.
We
can
't do this until
*
we
fix
gsbase
,
and
we
should
do
it
before
enter_from_user_mode
*
(
which
can
take
locks
)
.
Since
TRACE_IRQS_OFF
idempotent
,
*
(
which
can
take
locks
)
.
Since
TRACE_IRQS_OFF
i
s
i
dempotent
,
*
the
simplest
way
to
handle
it
is
to
just
call
it
twice
if
*
we
enter
from
user
mode
.
There
's no reason to optimize this since
*
TRACE_IRQS_OFF
is
a
no
-
op
if
lockdep
is
off
.
...
...
@@ -563,12 +619,15 @@ END(irq_entries_start)
CALL_enter_from_user_mode
1
:
ENTER_IRQ_STACK
old_rsp
=%
rdi
ENTER_IRQ_STACK
old_rsp
=%
rdi
save_ret
=
1
/
*
We
entered
an
interrupt
context
-
irqs
are
off
:
*/
TRACE_IRQS_OFF
call
\
func
/*
rdi
points
to
pt_regs
*/
.
endm
ret
END
(
interrupt_entry
)
/*
Interrupt
entry
/
exit
.
*/
/
*
*
The
interrupt
stubs
push
(
~
vector
+
0x80
)
onto
the
stack
and
...
...
@@ -576,9 +635,10 @@ END(irq_entries_start)
*/
.
p2align
CONFIG_X86_L1_CACHE_SHIFT
common_interrupt
:
ASM_CLAC
addq
$
-
0x80
,
(%
rsp
)
/*
Adjust
vector
to
[-
256
,
-
1
]
range
*/
interrupt
do_IRQ
call
interrupt_entry
UNWIND_HINT_REGS
indirect
=
1
call
do_IRQ
/*
rdi
points
to
pt_regs
*/
/
*
0
(%
rsp
)
:
old
RSP
*/
ret_from_intr
:
DISABLE_INTERRUPTS
(
CLBR_ANY
)
...
...
@@ -771,10 +831,11 @@ END(common_interrupt)
.
macro
apicinterrupt3
num
sym
do_sym
ENTRY
(\
sym
)
UNWIND_HINT_IRET_REGS
ASM_CLAC
pushq
$~
(
\
num
)
.
Lcommon_
\
sym
:
interrupt
\
do_sym
call
interrupt_entry
UNWIND_HINT_REGS
indirect
=
1
call
\
do_sym
/*
rdi
points
to
pt_regs
*/
jmp
ret_from_intr
END
(\
sym
)
.
endm
...
...
@@ -837,34 +898,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
/*
*
Switch
to
the
thread
stack
.
This
is
called
with
the
IRET
frame
and
*
orig_ax
on
the
stack
.
(
That
is
,
RDI
..
R12
are
not
on
the
stack
and
*
space
has
not
been
allocated
for
them
.
)
*/
ENTRY
(
switch_to_thread_stack
)
UNWIND_HINT_FUNC
pushq
%
rdi
/
*
Need
to
switch
before
accessing
the
thread
stack
.
*/
SWITCH_TO_KERNEL_CR3
scratch_reg
=%
rdi
movq
%
rsp
,
%
rdi
movq
PER_CPU_VAR
(
cpu_current_top_of_stack
),
%
rsp
UNWIND_HINT
sp_offset
=
16
sp_reg
=
ORC_REG_DI
pushq
7
*
8
(%
rdi
)
/*
regs
->
ss
*/
pushq
6
*
8
(%
rdi
)
/*
regs
->
rsp
*/
pushq
5
*
8
(%
rdi
)
/*
regs
->
eflags
*/
pushq
4
*
8
(%
rdi
)
/*
regs
->
cs
*/
pushq
3
*
8
(%
rdi
)
/*
regs
->
ip
*/
pushq
2
*
8
(%
rdi
)
/*
regs
->
orig_ax
*/
pushq
8
(%
rdi
)
/*
return
address
*/
UNWIND_HINT_FUNC
movq
(%
rdi
),
%
rdi
ret
END
(
switch_to_thread_stack
)
.
macro
idtentry
sym
do_sym
has_error_code
:
req
paranoid
=
0
shift_ist
=-
1
ENTRY
(\
sym
)
UNWIND_HINT_IRET_REGS
offset
=
\
has_error_code
*
8
...
...
@@ -880,12 +913,8 @@ ENTRY(\sym)
pushq
$
-
1
/*
ORIG_RAX
:
no
syscall
to
restart
*/
.
endif
/
*
Save
all
registers
in
pt_regs
*/
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
.
if
\
paranoid
<
2
testb
$
3
,
CS
(%
rsp
)
/*
If
coming
from
userspace
,
switch
stacks
*/
testb
$
3
,
CS
-
ORIG_RAX
(%
rsp
)
/*
If
coming
from
userspace
,
switch
stacks
*/
jnz
.
Lfrom_usermode_switch_stack_
\
@
.
endif
...
...
@@ -1135,13 +1164,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
/*
*
Switch
gs
if
needed
.
*
S
ave
all
registers
in
pt_regs
,
and
s
witch
gs
if
needed
.
*
Use
slow
,
but
surefire
"are we in kernel?"
check
.
*
Return
:
ebx
=
0
:
need
swapgs
on
exit
,
ebx
=
1
:
otherwise
*/
ENTRY
(
paranoid_entry
)
UNWIND_HINT_FUNC
cld
PUSH_AND_CLEAR_REGS
save_ret
=
1
ENCODE_FRAME_POINTER
8
movl
$
1
,
%
ebx
movl
$MSR_GS_BASE
,
%
ecx
rdmsr
...
...
@@ -1186,12 +1217,14 @@ ENTRY(paranoid_exit)
END
(
paranoid_exit
)
/*
*
S
witch
gs
if
needed
.
*
S
ave
all
registers
in
pt_regs
,
and
switch
GS
if
needed
.
*
Return
:
EBX
=
0
:
came
from
user
mode
; EBX=1: otherwise
*/
ENTRY
(
error_entry
)
UNWIND_HINT_
REGS
offset
=
8
UNWIND_HINT_
FUNC
cld
PUSH_AND_CLEAR_REGS
save_ret
=
1
ENCODE_FRAME_POINTER
8
testb
$
3
,
CS
+
8
(%
rsp
)
jz
.
Lerror_kernelspace
...
...
@@ -1582,8 +1615,6 @@ end_repeat_nmi:
*
frame
to
point
back
to
repeat_nmi
.
*/
pushq
$
-
1
/*
ORIG_RAX
:
no
syscall
to
restart
*/
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
/
*
*
Use
paranoid_entry
to
handle
SWAPGS
,
but
no
need
to
use
paranoid_exit
...
...
arch/x86/entry/entry_64_compat.S
View file @
3c76db70
...
...
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
pushq
%
rcx
/*
pt_regs
->
cx
*/
pushq
$
-
ENOSYS
/*
pt_regs
->
ax
*/
pushq
$
0
/*
pt_regs
->
r8
=
0
*/
xor
q
%
r8
,
%
r8
/*
nospec
r8
*/
xor
l
%
r8d
,
%
r8d
/*
nospec
r8
*/
pushq
$
0
/*
pt_regs
->
r9
=
0
*/
xor
q
%
r9
,
%
r9
/*
nospec
r9
*/
xor
l
%
r9d
,
%
r9d
/*
nospec
r9
*/
pushq
$
0
/*
pt_regs
->
r10
=
0
*/
xor
q
%
r10
,
%
r10
/*
nospec
r10
*/
xor
l
%
r10d
,
%
r10d
/*
nospec
r10
*/
pushq
$
0
/*
pt_regs
->
r11
=
0
*/
xor
q
%
r11
,
%
r11
/*
nospec
r11
*/
xor
l
%
r11d
,
%
r11d
/*
nospec
r11
*/
pushq
%
rbx
/*
pt_regs
->
rbx
*/
xorl
%
ebx
,
%
ebx
/*
nospec
rbx
*/
pushq
%
rbp
/*
pt_regs
->
rbp
(
will
be
overwritten
)
*/
xorl
%
ebp
,
%
ebp
/*
nospec
rbp
*/
pushq
$
0
/*
pt_regs
->
r12
=
0
*/
xor
q
%
r12
,
%
r12
/*
nospec
r12
*/
xor
l
%
r12d
,
%
r12d
/*
nospec
r12
*/
pushq
$
0
/*
pt_regs
->
r13
=
0
*/
xor
q
%
r13
,
%
r13
/*
nospec
r13
*/
xor
l
%
r13d
,
%
r13d
/*
nospec
r13
*/
pushq
$
0
/*
pt_regs
->
r14
=
0
*/
xor
q
%
r14
,
%
r14
/*
nospec
r14
*/
xor
l
%
r14d
,
%
r14d
/*
nospec
r14
*/
pushq
$
0
/*
pt_regs
->
r15
=
0
*/
xor
q
%
r15
,
%
r15
/*
nospec
r15
*/
xor
l
%
r15d
,
%
r15d
/*
nospec
r15
*/
cld
/
*
...
...
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq
%
rbp
/*
pt_regs
->
cx
(
stashed
in
bp
)
*/
pushq
$
-
ENOSYS
/*
pt_regs
->
ax
*/
pushq
$
0
/*
pt_regs
->
r8
=
0
*/
xor
q
%
r8
,
%
r8
/*
nospec
r8
*/
xor
l
%
r8d
,
%
r8d
/*
nospec
r8
*/
pushq
$
0
/*
pt_regs
->
r9
=
0
*/
xor
q
%
r9
,
%
r9
/*
nospec
r9
*/
xor
l
%
r9d
,
%
r9d
/*
nospec
r9
*/
pushq
$
0
/*
pt_regs
->
r10
=
0
*/
xor
q
%
r10
,
%
r10
/*
nospec
r10
*/
xor
l
%
r10d
,
%
r10d
/*
nospec
r10
*/
pushq
$
0
/*
pt_regs
->
r11
=
0
*/
xor
q
%
r11
,
%
r11
/*
nospec
r11
*/
xor
l
%
r11d
,
%
r11d
/*
nospec
r11
*/
pushq
%
rbx
/*
pt_regs
->
rbx
*/
xorl
%
ebx
,
%
ebx
/*
nospec
rbx
*/
pushq
%
rbp
/*
pt_regs
->
rbp
(
will
be
overwritten
)
*/
xorl
%
ebp
,
%
ebp
/*
nospec
rbp
*/
pushq
$
0
/*
pt_regs
->
r12
=
0
*/
xor
q
%
r12
,
%
r12
/*
nospec
r12
*/
xor
l
%
r12d
,
%
r12d
/*
nospec
r12
*/
pushq
$
0
/*
pt_regs
->
r13
=
0
*/
xor
q
%
r13
,
%
r13
/*
nospec
r13
*/
xor
l
%
r13d
,
%
r13d
/*
nospec
r13
*/
pushq
$
0
/*
pt_regs
->
r14
=
0
*/
xor
q
%
r14
,
%
r14
/*
nospec
r14
*/
xor
l
%
r14d
,
%
r14d
/*
nospec
r14
*/
pushq
$
0
/*
pt_regs
->
r15
=
0
*/
xor
q
%
r15
,
%
r15
/*
nospec
r15
*/
xor
l
%
r15d
,
%
r15d
/*
nospec
r15
*/
/
*
*
User
mode
is
traced
as
though
IRQs
are
on
,
and
SYSENTER
...
...
@@ -298,9 +298,9 @@ sysret32_from_system_call:
*/
SWITCH_TO_USER_CR3_NOSTACK
scratch_reg
=%
r8
scratch_reg2
=%
r9
xor
q
%
r8
,
%
r8
xor
q
%
r9
,
%
r9
xor
q
%
r10
,
%
r10
xor
l
%
r8d
,
%
r8d
xor
l
%
r9d
,
%
r9d
xor
l
%
r10d
,
%
r10d
swapgs
sysretl
END
(
entry_SYSCALL_compat
)
...
...
@@ -347,36 +347,47 @@ ENTRY(entry_INT80_compat)
*/
movl
%
eax
,
%
eax
/
*
switch
to
thread
stack
expects
orig_ax
and
rdi
to
be
pushed
*/
pushq
%
rax
/*
pt_regs
->
orig_ax
*/
pushq
%
rdi
/*
pt_regs
->
di
*/
/
*
switch
to
thread
stack
expects
orig_ax
to
be
pushed
*/
call
switch_to_thread_stack
/
*
Need
to
switch
before
accessing
the
thread
stack
.
*/
SWITCH_TO_KERNEL_CR3
scratch_reg
=%
rdi
movq
%
rsp
,
%
rdi
movq
PER_CPU_VAR
(
cpu_current_top_of_stack
),
%
rsp
pushq
%
rdi
/*
pt_regs
->
di
*/
pushq
6
*
8
(%
rdi
)
/*
regs
->
ss
*/
pushq
5
*
8
(%
rdi
)
/*
regs
->
rsp
*/
pushq
4
*
8
(%
rdi
)
/*
regs
->
eflags
*/
pushq
3
*
8
(%
rdi
)
/*
regs
->
cs
*/
pushq
2
*
8
(%
rdi
)
/*
regs
->
ip
*/
pushq
1
*
8
(%
rdi
)
/*
regs
->
orig_ax
*/
pushq
(%
rdi
)
/*
pt_regs
->
di
*/
pushq
%
rsi
/*
pt_regs
->
si
*/
pushq
%
rdx
/*
pt_regs
->
dx
*/
pushq
%
rcx
/*
pt_regs
->
cx
*/
pushq
$
-
ENOSYS
/*
pt_regs
->
ax
*/
pushq
$
0
/*
pt_regs
->
r8
=
0
*/
xor
q
%
r8
,
%
r8
/*
nospec
r8
*/
xor
l
%
r8d
,
%
r8d
/*
nospec
r8
*/
pushq
$
0
/*
pt_regs
->
r9
=
0
*/
xor
q
%
r9
,
%
r9
/*
nospec
r9
*/
xor
l
%
r9d
,
%
r9d
/*
nospec
r9
*/
pushq
$
0
/*
pt_regs
->
r10
=
0
*/
xor
q
%
r10
,
%
r10
/*
nospec
r10
*/
xor
l
%
r10d
,
%
r10d
/*
nospec
r10
*/
pushq
$
0
/*
pt_regs
->
r11
=
0
*/
xor
q
%
r11
,
%
r11
/*
nospec
r11
*/
xor
l
%
r11d
,
%
r11d
/*
nospec
r11
*/
pushq
%
rbx
/*
pt_regs
->
rbx
*/
xorl
%
ebx
,
%
ebx
/*
nospec
rbx
*/
pushq
%
rbp
/*
pt_regs
->
rbp
*/
xorl
%
ebp
,
%
ebp
/*
nospec
rbp
*/
pushq
%
r12
/*
pt_regs
->
r12
*/
xor
q
%
r12
,
%
r12
/*
nospec
r12
*/
xor
l
%
r12d
,
%
r12d
/*
nospec
r12
*/
pushq
%
r13
/*
pt_regs
->
r13
*/
xor
q
%
r13
,
%
r13
/*
nospec
r13
*/
xor
l
%
r13d
,
%
r13d
/*
nospec
r13
*/
pushq
%
r14
/*
pt_regs
->
r14
*/
xor
q
%
r14
,
%
r14
/*
nospec
r14
*/
xor
l
%
r14d
,
%
r14d
/*
nospec
r14
*/
pushq
%
r15
/*
pt_regs
->
r15
*/
xor
q
%
r15
,
%
r15
/*
nospec
r15
*/
xor
l
%
r15d
,
%
r15d
/*
nospec
r15
*/
cld
/
*
...
...
@@ -393,15 +404,3 @@ ENTRY(entry_INT80_compat)
TRACE_IRQS_ON
jmp
swapgs_restore_regs_and_return_to_usermode
END
(
entry_INT80_compat
)
ENTRY
(
stub32_clone
)
/
*
*
The
32
-
bit
clone
ABI
is
:
clone
(
...
,
int
tls_val
,
int
*
child_tidptr
)
.
*
The
64
-
bit
clone
ABI
is
:
clone
(
...
,
int
*
child_tidptr
,
int
tls_val
)
.
*
*
The
native
64
-
bit
kernel
's sys_clone() implements the latter,
*
so
we
need
to
swap
arguments
here
before
calling
it
:
*/
xchg
%
r8
,
%
rcx
jmp
sys_clone
ENDPROC
(
stub32_clone
)
arch/x86/entry/syscalls/syscall_32.tbl
View file @
3c76db70
...
...
@@ -8,12 +8,12 @@
#
0 i386 restart_syscall sys_restart_syscall
1 i386 exit sys_exit
2 i386 fork sys_fork
sys_fork
2 i386 fork sys_fork
3 i386 read sys_read
4 i386 write sys_write
5 i386 open sys_open compat_sys_open
6 i386 close sys_close
7 i386 waitpid sys_waitpid
sys32
_waitpid
7 i386 waitpid sys_waitpid
compat_sys_x86
_waitpid
8 i386 creat sys_creat
9 i386 link sys_link
10 i386 unlink sys_unlink
...
...
@@ -78,7 +78,7 @@
69 i386 ssetmask sys_ssetmask
70 i386 setreuid sys_setreuid16
71 i386 setregid sys_setregid16
72 i386 sigsuspend sys_sigsuspend
sys_sigsuspend
72 i386 sigsuspend sys_sigsuspend
73 i386 sigpending sys_sigpending compat_sys_sigpending
74 i386 sethostname sys_sethostname
75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
...
...
@@ -96,7 +96,7 @@
87 i386 swapon sys_swapon
88 i386 reboot sys_reboot
89 i386 readdir sys_old_readdir compat_sys_old_readdir
90 i386 mmap sys_old_mmap
sys32
_mmap
90 i386 mmap sys_old_mmap
compat_sys_x86
_mmap
91 i386 munmap sys_munmap
92 i386 truncate sys_truncate compat_sys_truncate
93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
...
...
@@ -126,7 +126,7 @@
117 i386 ipc sys_ipc compat_sys_ipc
118 i386 fsync sys_fsync
119 i386 sigreturn sys_sigreturn sys32_sigreturn
120 i386 clone sys_clone
stub32
_clone
120 i386 clone sys_clone
compat_sys_x86
_clone
121 i386 setdomainname sys_setdomainname
122 i386 uname sys_newuname
123 i386 modify_ldt sys_modify_ldt
...
...
@@ -186,8 +186,8 @@
177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
179 i386 rt_sigsuspend sys_rt_sigsuspend
180 i386 pread64 sys_pread64
sys32
_pread
181 i386 pwrite64 sys_pwrite64
sys32
_pwrite
180 i386 pread64 sys_pread64
compat_sys_x86
_pread
181 i386 pwrite64 sys_pwrite64
compat_sys_x86
_pwrite
182 i386 chown sys_chown16
183 i386 getcwd sys_getcwd
184 i386 capget sys_capget
...
...
@@ -196,14 +196,14 @@
187 i386 sendfile sys_sendfile compat_sys_sendfile
188 i386 getpmsg
189 i386 putpmsg
190 i386 vfork sys_vfork
sys_vfork
190 i386 vfork sys_vfork
191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
192 i386 mmap2 sys_mmap_pgoff
193 i386 truncate64 sys_truncate64
sys32
_truncate64
194 i386 ftruncate64 sys_ftruncate64
sys32
_ftruncate64
195 i386 stat64 sys_stat64
sys32
_stat64
196 i386 lstat64 sys_lstat64
sys32
_lstat64
197 i386 fstat64 sys_fstat64
sys32
_fstat64
193 i386 truncate64 sys_truncate64
compat_sys_x86
_truncate64
194 i386 ftruncate64 sys_ftruncate64
compat_sys_x86
_ftruncate64
195 i386 stat64 sys_stat64
compat_sys_x86
_stat64
196 i386 lstat64 sys_lstat64
compat_sys_x86
_lstat64
197 i386 fstat64 sys_fstat64
compat_sys_x86
_fstat64
198 i386 lchown32 sys_lchown
199 i386 getuid32 sys_getuid
200 i386 getgid32 sys_getgid
...
...
@@ -231,7 +231,7 @@
# 222 is unused
# 223 is unused
224 i386 gettid sys_gettid
225 i386 readahead sys_readahead
sys32
_readahead
225 i386 readahead sys_readahead
compat_sys_x86
_readahead
226 i386 setxattr sys_setxattr
227 i386 lsetxattr sys_lsetxattr
228 i386 fsetxattr sys_fsetxattr
...
...
@@ -256,7 +256,7 @@
247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
248 i386 io_submit sys_io_submit compat_sys_io_submit
249 i386 io_cancel sys_io_cancel
250 i386 fadvise64 sys_fadvise64
sys32
_fadvise64
250 i386 fadvise64 sys_fadvise64
compat_sys_x86
_fadvise64
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
252 i386 exit_group sys_exit_group
253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
...
...
@@ -278,7 +278,7 @@
269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
270 i386 tgkill sys_tgkill
271 i386 utimes sys_utimes compat_sys_utimes
272 i386 fadvise64_64 sys_fadvise64_64
sys32
_fadvise64_64
272 i386 fadvise64_64 sys_fadvise64_64
compat_sys_x86
_fadvise64_64
273 i386 vserver
274 i386 mbind sys_mbind
275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
...
...
@@ -306,7 +306,7 @@
297 i386 mknodat sys_mknodat
298 i386 fchownat sys_fchownat
299 i386 futimesat sys_futimesat compat_sys_futimesat
300 i386 fstatat64 sys_fstatat64
sys32
_fstatat
300 i386 fstatat64 sys_fstatat64
compat_sys_x86
_fstatat
301 i386 unlinkat sys_unlinkat
302 i386 renameat sys_renameat
303 i386 linkat sys_linkat
...
...
@@ -320,7 +320,7 @@
311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
313 i386 splice sys_splice
314 i386 sync_file_range sys_sync_file_range
sys32
_sync_file_range
314 i386 sync_file_range sys_sync_file_range
compat_sys_x86
_sync_file_range
315 i386 tee sys_tee
316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
317 i386 move_pages sys_move_pages compat_sys_move_pages
...
...
@@ -330,7 +330,7 @@
321 i386 signalfd sys_signalfd compat_sys_signalfd
322 i386 timerfd_create sys_timerfd_create
323 i386 eventfd sys_eventfd
324 i386 fallocate sys_fallocate
sys32
_fallocate
324 i386 fallocate sys_fallocate
compat_sys_x86
_fallocate
325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
...
...
arch/x86/entry/vsyscall/vsyscall_64.c
View file @
3c76db70
...
...
@@ -42,10 +42,8 @@
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
static
enum
{
EMULATE
,
NATIVE
,
NONE
}
vsyscall_mode
=
#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
NATIVE
;
#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
static
enum
{
EMULATE
,
NONE
}
vsyscall_mode
=
#ifdef CONFIG_LEGACY_VSYSCALL_NONE
NONE
;
#else
EMULATE
;
...
...
@@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
if
(
str
)
{
if
(
!
strcmp
(
"emulate"
,
str
))
vsyscall_mode
=
EMULATE
;
else
if
(
!
strcmp
(
"native"
,
str
))
vsyscall_mode
=
NATIVE
;
else
if
(
!
strcmp
(
"none"
,
str
))
vsyscall_mode
=
NONE
;
else
...
...
@@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
WARN_ON_ONCE
(
address
!=
regs
->
ip
);
/* This should be unreachable in NATIVE mode. */
if
(
WARN_ON
(
vsyscall_mode
==
NATIVE
))
return
false
;
if
(
vsyscall_mode
==
NONE
)
{
warn_bad_vsyscall
(
KERN_INFO
,
regs
,
"vsyscall attempted with vsyscall=none"
);
...
...
@@ -370,9 +362,7 @@ void __init map_vsyscall(void)
if
(
vsyscall_mode
!=
NONE
)
{
__set_fixmap
(
VSYSCALL_PAGE
,
physaddr_vsyscall
,
vsyscall_mode
==
NATIVE
?
PAGE_KERNEL_VSYSCALL
:
PAGE_KERNEL_VVAR
);
PAGE_KERNEL_VVAR
);
set_vsyscall_pgtable_user_bits
(
swapper_pg_dir
);
}
...
...
arch/x86/ia32/sys_ia32.c
View file @
3c76db70
...
...
@@ -51,15 +51,14 @@
#define AA(__x) ((unsigned long)(__x))
asmlinkage
long
sys32_truncate64
(
const
char
__user
*
filename
,
unsigned
long
offset_low
,
unsigned
long
offset_high
)
COMPAT_SYSCALL_DEFINE3
(
x86_truncate64
,
const
char
__user
*
,
filename
,
unsigned
long
,
offset_low
,
unsigned
long
,
offset_high
)
{
return
sys_truncate
(
filename
,
((
loff_t
)
offset_high
<<
32
)
|
offset_low
);
}
asmlinkage
long
sys32_ftruncate64
(
unsigned
int
fd
,
unsigned
long
offset_low
,
unsigned
long
offset_high
)
COMPAT_SYSCALL_DEFINE3
(
x86_ftruncate64
,
unsigned
int
,
fd
,
unsigned
long
,
offset_low
,
unsigned
long
,
offset_high
)
{
return
sys_ftruncate
(
fd
,
((
loff_t
)
offset_high
<<
32
)
|
offset_low
);
}
...
...
@@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
return
0
;
}
asmlinkage
long
sys32_stat64
(
const
char
__user
*
filename
,
struct
stat64
__user
*
statbuf
)
COMPAT_SYSCALL_DEFINE2
(
x86_stat64
,
const
char
__user
*
,
filename
,
struct
stat64
__user
*
,
statbuf
)
{
struct
kstat
stat
;
int
ret
=
vfs_stat
(
filename
,
&
stat
);
...
...
@@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename,
return
ret
;
}
asmlinkage
long
sys32_lstat64
(
const
char
__user
*
filename
,
struct
stat64
__user
*
statbuf
)
COMPAT_SYSCALL_DEFINE2
(
x86_lstat64
,
const
char
__user
*
,
filename
,
struct
stat64
__user
*
,
statbuf
)
{
struct
kstat
stat
;
int
ret
=
vfs_lstat
(
filename
,
&
stat
);
...
...
@@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename,
return
ret
;
}
asmlinkage
long
sys32_fstat64
(
unsigned
int
fd
,
struct
stat64
__user
*
statbuf
)
COMPAT_SYSCALL_DEFINE2
(
x86_fstat64
,
unsigned
int
,
fd
,
struct
stat64
__user
*
,
statbuf
)
{
struct
kstat
stat
;
int
ret
=
vfs_fstat
(
fd
,
&
stat
);
...
...
@@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
return
ret
;
}
asmlinkage
long
sys32_fstatat
(
unsigned
int
dfd
,
const
char
__user
*
filename
,
struct
stat64
__user
*
statbuf
,
int
flag
)
COMPAT_SYSCALL_DEFINE4
(
x86_fstatat
,
unsigned
int
,
dfd
,
const
char
__user
*
,
filename
,
struct
stat64
__user
*
,
statbuf
,
int
,
flag
)
{
struct
kstat
stat
;
int
error
;
...
...
@@ -153,7 +154,7 @@ struct mmap_arg_struct32 {
unsigned
int
offset
;
};
asmlinkage
long
sys32_mmap
(
struct
mmap_arg_struct32
__user
*
arg
)
COMPAT_SYSCALL_DEFINE1
(
x86_mmap
,
struct
mmap_arg_struct32
__user
*
,
arg
)
{
struct
mmap_arg_struct32
a
;
...
...
@@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
a
.
offset
>>
PAGE_SHIFT
);
}
asmlinkage
long
sys32_waitpid
(
compat_pid_t
pid
,
unsigned
int
__user
*
stat_addr
,
int
options
)
COMPAT_SYSCALL_DEFINE3
(
x86_waitpid
,
compat_pid_t
,
pid
,
unsigned
int
__user
*
,
stat_addr
,
int
,
options
)
{
return
compat_sys_wait4
(
pid
,
stat_addr
,
options
,
NULL
);
}
/* warning: next two assume little endian */
asmlinkage
long
sys32_pread
(
unsigned
int
fd
,
char
__user
*
ubuf
,
u32
count
,
u32
poslo
,
u32
poshi
)
COMPAT_SYSCALL_DEFINE5
(
x86_pread
,
unsigned
int
,
fd
,
char
__user
*
,
ubuf
,
u32
,
count
,
u32
,
poslo
,
u32
,
poshi
)
{
return
sys_pread64
(
fd
,
ubuf
,
count
,
((
loff_t
)
AA
(
poshi
)
<<
32
)
|
AA
(
poslo
));
}
asmlinkage
long
sys32_pwrite
(
unsigned
int
fd
,
const
char
__user
*
ubuf
,
u32
count
,
u32
poslo
,
u32
poshi
)
COMPAT_SYSCALL_DEFINE5
(
x86_pwrite
,
unsigned
int
,
fd
,
const
char
__user
*
,
ubuf
,
u32
,
count
,
u32
,
poslo
,
u32
,
poshi
)
{
return
sys_pwrite64
(
fd
,
ubuf
,
count
,
((
loff_t
)
AA
(
poshi
)
<<
32
)
|
AA
(
poslo
));
...
...
@@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
* Some system calls that need sign extended arguments. This could be
* done by a generic wrapper.
*/
long
sys32_fadvise64_64
(
int
fd
,
__u32
offset_low
,
__u32
offset_high
,
__u32
len_low
,
__u32
len_high
,
int
advice
)
COMPAT_SYSCALL_DEFINE6
(
x86_fadvise64_64
,
int
,
fd
,
__u32
,
offset_low
,
__u32
,
offset_high
,
__u32
,
len_low
,
__u32
,
len_high
,
int
,
advice
)
{
return
sys_fadvise64_64
(
fd
,
(((
u64
)
offset_high
)
<<
32
)
|
offset_low
,
...
...
@@ -202,31 +204,43 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
advice
);
}
asmlinkage
ssize_t
sys32_readahead
(
int
fd
,
unsigned
off_lo
,
unsigned
off_hi
,
size_t
count
)
COMPAT_SYSCALL_DEFINE4
(
x86_readahead
,
int
,
fd
,
unsigned
int
,
off_lo
,
unsigned
int
,
off_hi
,
size_t
,
count
)
{
return
sys_readahead
(
fd
,
((
u64
)
off_hi
<<
32
)
|
off_lo
,
count
);
}
asmlinkage
long
sys32_sync_file_range
(
int
fd
,
unsigned
off_low
,
unsigned
off_hi
,
unsigned
n_low
,
unsigned
n_hi
,
int
flags
)
COMPAT_SYSCALL_DEFINE6
(
x86_sync_file_range
,
int
,
fd
,
unsigned
int
,
off_low
,
unsigned
int
,
off_hi
,
unsigned
int
,
n_low
,
unsigned
int
,
n_hi
,
int
,
flags
)
{
return
sys_sync_file_range
(
fd
,
((
u64
)
off_hi
<<
32
)
|
off_low
,
((
u64
)
n_hi
<<
32
)
|
n_low
,
flags
);
}
asmlinkage
long
sys32_fadvise64
(
int
fd
,
unsigned
offset_lo
,
unsigned
offset_hi
,
size_t
len
,
int
advice
)
COMPAT_SYSCALL_DEFINE5
(
x86_fadvise64
,
int
,
fd
,
unsigned
int
,
offset_lo
,
unsigned
int
,
offset_hi
,
size_t
,
len
,
int
,
advice
)
{
return
sys_fadvise64_64
(
fd
,
((
u64
)
offset_hi
<<
32
)
|
offset_lo
,
len
,
advice
);
}
asmlinkage
long
sys32_fallocate
(
int
fd
,
int
mode
,
unsigned
offset_lo
,
unsigned
offset_hi
,
unsigned
len_lo
,
unsigned
len_hi
)
COMPAT_SYSCALL_DEFINE6
(
x86_fallocate
,
int
,
fd
,
int
,
mode
,
unsigned
int
,
offset_lo
,
unsigned
int
,
offset_hi
,
unsigned
int
,
len_lo
,
unsigned
int
,
len_hi
)
{
return
sys_fallocate
(
fd
,
mode
,
((
u64
)
offset_hi
<<
32
)
|
offset_lo
,
((
u64
)
len_hi
<<
32
)
|
len_lo
);
}
/*
* The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
*/
COMPAT_SYSCALL_DEFINE5
(
x86_clone
,
unsigned
long
,
clone_flags
,
unsigned
long
,
newsp
,
int
__user
*
,
parent_tidptr
,
unsigned
long
,
tls_val
,
int
__user
*
,
child_tidptr
)
{
return
sys_clone
(
clone_flags
,
newsp
,
parent_tidptr
,
child_tidptr
,
tls_val
);
}
arch/x86/include/asm/apm.h
View file @
3c76db70
...
...
@@ -7,6 +7,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
#include <asm/nospec-branch.h>
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
...
...
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
firmware_restrict_branch_speculation_start
();
__asm__
__volatile__
(
APM_DO_ZERO_SEGS
"pushl %%edi
\n\t
"
"pushl %%ebp
\n\t
"
...
...
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S"
(
*
esi
)
:
"a"
(
func
),
"b"
(
ebx_in
),
"c"
(
ecx_in
)
:
"memory"
,
"cc"
);
firmware_restrict_branch_speculation_end
();
}
static
inline
bool
apm_bios_call_simple_asm
(
u32
func
,
u32
ebx_in
,
...
...
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
firmware_restrict_branch_speculation_start
();
__asm__
__volatile__
(
APM_DO_ZERO_SEGS
"pushl %%edi
\n\t
"
"pushl %%ebp
\n\t
"
...
...
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S"
(
si
)
:
"a"
(
func
),
"b"
(
ebx_in
),
"c"
(
ecx_in
)
:
"memory"
,
"cc"
);
firmware_restrict_branch_speculation_end
();
return
error
;
}
...
...
arch/x86/include/asm/asm-prototypes.h
View file @
3c76db70
...
...
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
INDIRECT_THUNK
(
si
)
INDIRECT_THUNK
(
di
)
INDIRECT_THUNK
(
bp
)
asmlinkage
void
__fill_rsb
(
void
);
asmlinkage
void
__clear_rsb
(
void
);
#endif
/* CONFIG_RETPOLINE */
arch/x86/include/asm/cpufeatures.h
View file @
3c76db70
...
...
@@ -213,6 +213,7 @@
#define X86_FEATURE_SEV ( 7*32+20)
/* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_USE_IBPB ( 7*32+21)
/* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22)
/* "" Use IBRS during runtime firmware calls */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0)
/* Intel TPR Shadow */
...
...
@@ -315,6 +316,7 @@
#define X86_FEATURE_VPCLMULQDQ (16*32+10)
/* Carry-Less Multiplication Double Quadword */
#define X86_FEATURE_AVX512_VNNI (16*32+11)
/* Vector Neural Network Instructions */
#define X86_FEATURE_AVX512_BITALG (16*32+12)
/* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
#define X86_FEATURE_TME (16*32+13)
/* Intel Total Memory Encryption */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14)
/* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16)
/* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22)
/* RDPID instruction */
...
...
@@ -327,6 +329,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2)
/* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3)
/* AVX-512 Multiply Accumulation Single precision */
#define X86_FEATURE_PCONFIG (18*32+18)
/* Intel PCONFIG */
#define X86_FEATURE_SPEC_CTRL (18*32+26)
/* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27)
/* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29)
/* IA32_ARCH_CAPABILITIES MSR (Intel) */
...
...
arch/x86/include/asm/efi.h
View file @
3c76db70
...
...
@@ -6,6 +6,7 @@
#include <asm/pgtable.h>
#include <asm/processor-flags.h>
#include <asm/tlb.h>
#include <asm/nospec-branch.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
...
...
@@ -36,8 +37,18 @@
extern
asmlinkage
unsigned
long
efi_call_phys
(
void
*
,
...);
#define arch_efi_call_virt_setup() kernel_fpu_begin()
#define arch_efi_call_virt_teardown() kernel_fpu_end()
#define arch_efi_call_virt_setup() \
({ \
kernel_fpu_begin(); \
firmware_restrict_branch_speculation_start(); \
})
#define arch_efi_call_virt_teardown() \
({ \
firmware_restrict_branch_speculation_end(); \
kernel_fpu_end(); \
})
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
...
...
@@ -73,6 +84,7 @@ struct efi_scratch {
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
firmware_restrict_branch_speculation_start(); \
\
if (efi_scratch.use_pgd) { \
efi_scratch.prev_cr3 = __read_cr3(); \
...
...
@@ -91,6 +103,7 @@ struct efi_scratch {
__flush_tlb_all(); \
} \
\
firmware_restrict_branch_speculation_end(); \
__kernel_fpu_end(); \
preempt_enable(); \
})
...
...
arch/x86/include/asm/microcode.h
View file @
3c76db70
...
...
@@ -37,7 +37,12 @@ struct cpu_signature {
struct
device
;
enum
ucode_state
{
UCODE_ERROR
,
UCODE_OK
,
UCODE_NFOUND
};
enum
ucode_state
{
UCODE_OK
=
0
,
UCODE_UPDATED
,
UCODE_NFOUND
,
UCODE_ERROR
,
};
struct
microcode_ops
{
enum
ucode_state
(
*
request_microcode_user
)
(
int
cpu
,
...
...
@@ -54,7 +59,7 @@ struct microcode_ops {
* are being called.
* See also the "Synchronization" section in microcode_core.c.
*/
int
(
*
apply_microcode
)
(
int
cpu
);
enum
ucode_state
(
*
apply_microcode
)
(
int
cpu
);
int
(
*
collect_cpu_info
)
(
int
cpu
,
struct
cpu_signature
*
csig
);
};
...
...
arch/x86/include/asm/mmu_context.h
View file @
3c76db70
...
...
@@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)
return
(
void
*
)(
LDT_BASE_ADDR
+
LDT_SLOT_STRIDE
*
slot
);
#else
BUG
();
return
(
void
*
)
fix_to_virt
(
FIX_HOLE
);
#endif
}
...
...
arch/x86/include/asm/nospec-branch.h
View file @
3c76db70
...
...
@@ -8,6 +8,50 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
/*
* Fill the CPU return stack buffer.
*
* Each entry in the RSB, if used for a speculative 'ret', contains an
* infinite 'pause; lfence; jmp' loop to capture speculative execution.
*
* This is required in various cases for retpoline and IBRS-based
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
* eliminate potentially bogus entries from the RSB, and sometimes
* purely to ensure that it doesn't get empty, which on some CPUs would
* allow predictions from other (unwanted!) sources to be used.
*
* We define a CPP macro such that it can be used from both .S files and
* inline assembly. It's possible to do a .macro and then include that
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
*/
#define RSB_CLEAR_LOOPS 32
/* To forcibly overwrite all entries */
#define RSB_FILL_LOOPS 16
/* To avoid underflow */
/*
* Google experimented with loop-unrolling and this turned out to be
* the optimal version — two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
mov $(nr/2), reg; \
771: \
call 772f; \
773:
/* speculation trap */
\
pause; \
lfence; \
jmp 773b; \
772: \
call 774f; \
775:
/* speculation trap */
\
pause; \
lfence; \
jmp 775b; \
774: \
dec reg; \
jnz 771b; \
add $(BITS_PER_LONG/8) * nr, sp;
#ifdef __ASSEMBLY__
/*
...
...
@@ -23,6 +67,18 @@
.
popsection
.
endm
/*
* This should be used immediately before an indirect jump/call. It tells
* objtool the subsequent indirect jump/call is vouched safe for retpoline
* builds.
*/
.
macro
ANNOTATE_RETPOLINE_SAFE
.
Lannotate_
\@
:
.
pushsection
.
discard
.
retpoline_safe
_ASM_PTR
.
Lannotate_
\@
.
popsection
.
endm
/*
* These are the bare retpoline primitives for indirect jmp and call.
* Do not use these directly; they only exist to make the ALTERNATIVE
...
...
@@ -59,9 +115,9 @@
.
macro
JMP_NOSPEC
reg
:
req
#ifdef CONFIG_RETPOLINE
ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE_2
__stringify
(
jmp
*
\
reg
),
\
ALTERNATIVE_2
__stringify
(
ANNOTATE_RETPOLINE_SAFE
;
jmp
*
\
reg
),
\
__stringify
(
RETPOLINE_JMP
\
reg
),
X86_FEATURE_RETPOLINE
,
\
__stringify
(
lfence
;
jmp
*
\
reg
),
X86_FEATURE_RETPOLINE_AMD
__stringify
(
lfence
;
ANNOTATE_RETPOLINE_SAFE
;
jmp
*
\
reg
),
X86_FEATURE_RETPOLINE_AMD
#else
jmp
*
\
reg
#endif
...
...
@@ -70,18 +126,25 @@
.
macro
CALL_NOSPEC
reg
:
req
#ifdef CONFIG_RETPOLINE
ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE_2
__stringify
(
call
*
\
reg
),
\
ALTERNATIVE_2
__stringify
(
ANNOTATE_RETPOLINE_SAFE
;
call
*
\
reg
),
\
__stringify
(
RETPOLINE_CALL
\
reg
),
X86_FEATURE_RETPOLINE
,
\
__stringify
(
lfence
;
call
*
\
reg
),
X86_FEATURE_RETPOLINE_AMD
__stringify
(
lfence
;
ANNOTATE_RETPOLINE_SAFE
;
call
*
\
reg
),
X86_FEATURE_RETPOLINE_AMD
#else
call
*
\
reg
#endif
.
endm
/* This clobbers the BX register */
.
macro
FILL_RETURN_BUFFER
nr
:
req
ftr
:
req
/*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
.
macro
FILL_RETURN_BUFFER
reg
:
req
nr
:
req
ftr
:
req
#ifdef CONFIG_RETPOLINE
ALTERNATIVE
""
,
"call __clear_rsb"
,
\
ftr
ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE
"jmp .Lskip_rsb_\@"
,
\
__stringify
(
__FILL_RETURN_BUFFER
(
\
reg
,
\
nr
,
%
_ASM_SP
))
\
\
ftr
.
Lskip_rsb_
\@
:
#endif
.
endm
...
...
@@ -93,6 +156,12 @@
".long 999b - .\n\t" \
".popsection\n\t"
#define ANNOTATE_RETPOLINE_SAFE \
"999:\n\t" \
".pushsection .discard.retpoline_safe\n\t" \
_ASM_PTR " 999b\n\t" \
".popsection\n\t"
#if defined(CONFIG_X86_64) && defined(RETPOLINE)
/*
...
...
@@ -102,6 +171,7 @@
# define CALL_NOSPEC \
ANNOTATE_NOSPEC_ALTERNATIVE \
ALTERNATIVE( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
"call __x86_indirect_thunk_%V[thunk_target]\n", \
X86_FEATURE_RETPOLINE)
...
...
@@ -156,26 +226,54 @@ extern char __indirect_thunk_end[];
static
inline
void
vmexit_fill_RSB
(
void
)
{
#ifdef CONFIG_RETPOLINE
alternative_input
(
""
,
"call __fill_rsb"
,
X86_FEATURE_RETPOLINE
,
ASM_NO_INPUT_CLOBBER
(
_ASM_BX
,
"memory"
));
unsigned
long
loops
;
asm
volatile
(
ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE
(
"jmp 910f"
,
__stringify
(
__FILL_RETURN_BUFFER
(
%
0
,
RSB_CLEAR_LOOPS
,
%
1
)),
X86_FEATURE_RETPOLINE
)
"910:"
:
"=r"
(
loops
),
ASM_CALL_CONSTRAINT
:
:
"memory"
);
#endif
}
#define alternative_msr_write(_msr, _val, _feature) \
asm volatile(ALTERNATIVE("", \
"movl %[msr], %%ecx\n\t" \
"movl %[val], %%eax\n\t" \
"movl $0, %%edx\n\t" \
"wrmsr", \
_feature) \
: : [msr] "i" (_msr), [val] "i" (_val) \
: "eax", "ecx", "edx", "memory")
static
inline
void
indirect_branch_prediction_barrier
(
void
)
{
asm
volatile
(
ALTERNATIVE
(
""
,
"movl %[msr], %%ecx
\n\t
"
"movl %[val], %%eax
\n\t
"
"movl $0, %%edx
\n\t
"
"wrmsr"
,
X86_FEATURE_USE_IBPB
)
:
:
[
msr
]
"i"
(
MSR_IA32_PRED_CMD
),
[
val
]
"i"
(
PRED_CMD_IBPB
)
:
"eax"
,
"ecx"
,
"edx"
,
"memory"
);
alternative_msr_write
(
MSR_IA32_PRED_CMD
,
PRED_CMD_IBPB
,
X86_FEATURE_USE_IBPB
);
}
/*
* With retpoline, we must use IBRS to restrict branch prediction
* before calling into firmware.
*
* (Implemented as CPP macros due to header hell.)
*/
#define firmware_restrict_branch_speculation_start() \
do { \
preempt_disable(); \
alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
#endif
/* __ASSEMBLY__ */
/*
...
...
arch/x86/include/asm/paravirt.h
View file @
3c76db70
...
...
@@ -7,6 +7,7 @@
#ifdef CONFIG_PARAVIRT
#include <asm/pgtable_types.h>
#include <asm/asm.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt_types.h>
...
...
@@ -884,23 +885,27 @@ extern void default_banner(void);
#define INTERRUPT_RETURN \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
ANNOTATE_RETPOLINE_SAFE; \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define ENABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
#else
/* !CONFIG_X86_32 */
...
...
@@ -922,21 +927,25 @@ extern void default_banner(void);
*/
#define SWAPGS \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
)
#define GET_CR2_INTO_RAX \
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
ANNOTATE_RETPOLINE_SAFE; \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#endif
...
...
arch/x86/include/asm/paravirt_types.h
View file @
3c76db70
...
...
@@ -43,6 +43,7 @@
#include <asm/desc_defs.h>
#include <asm/kmap_types.h>
#include <asm/pgtable_types.h>
#include <asm/nospec-branch.h>
struct
page
;
struct
thread_struct
;
...
...
@@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
* offset into the paravirt_patch_template structure, and can therefore be
* freely converted back into a structure offset.
*/
#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
"call *%c[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
...
...
arch/x86/include/asm/pgtable.h
View file @
3c76db70
...
...
@@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
{
pmdval_t
v
=
native_pmd_val
(
pmd
);
return
_
_pmd
(
v
|
set
);
return
native_make
_pmd
(
v
|
set
);
}
static
inline
pmd_t
pmd_clear_flags
(
pmd_t
pmd
,
pmdval_t
clear
)
{
pmdval_t
v
=
native_pmd_val
(
pmd
);
return
_
_pmd
(
v
&
~
clear
);
return
native_make
_pmd
(
v
&
~
clear
);
}
static
inline
pmd_t
pmd_mkold
(
pmd_t
pmd
)
...
...
@@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
{
pudval_t
v
=
native_pud_val
(
pud
);
return
_
_pud
(
v
|
set
);
return
native_make
_pud
(
v
|
set
);
}
static
inline
pud_t
pud_clear_flags
(
pud_t
pud
,
pudval_t
clear
)
{
pudval_t
v
=
native_pud_val
(
pud
);
return
_
_pud
(
v
&
~
clear
);
return
native_make
_pud
(
v
&
~
clear
);
}
static
inline
pud_t
pud_mkold
(
pud_t
pud
)
...
...
arch/x86/include/asm/pgtable_32.h
View file @
3c76db70
...
...
@@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[];
static
inline
void
pgtable_cache_init
(
void
)
{
}
static
inline
void
check_pgt_cache
(
void
)
{
}
void
paging_init
(
void
);
void
sync_initial_page_table
(
void
);
static
inline
int
pgd_large
(
pgd_t
pgd
)
{
return
0
;
}
...
...
arch/x86/include/asm/pgtable_64.h
View file @
3c76db70
...
...
@@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[];
#define swapper_pg_dir init_top_pgt
extern
void
paging_init
(
void
);
static
inline
void
sync_initial_page_table
(
void
)
{
}
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %p(%016lx)\n", \
...
...
arch/x86/include/asm/pgtable_types.h
View file @
3c76db70
...
...
@@ -174,7 +174,6 @@ enum page_cache_mode {
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
...
...
@@ -206,7 +205,6 @@ enum page_cache_mode {
#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
...
...
@@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud)
#else
#include <asm-generic/pgtable-nopud.h>
static
inline
pud_t
native_make_pud
(
pudval_t
val
)
{
return
(
pud_t
)
{
.
p4d
.
pgd
=
native_make_pgd
(
val
)
};
}
static
inline
pudval_t
native_pud_val
(
pud_t
pud
)
{
return
native_pgd_val
(
pud
.
p4d
.
pgd
);
...
...
@@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
#else
#include <asm-generic/pgtable-nopmd.h>
static
inline
pmd_t
native_make_pmd
(
pmdval_t
val
)
{
return
(
pmd_t
)
{
.
pud
.
p4d
.
pgd
=
native_make_pgd
(
val
)
};
}
static
inline
pmdval_t
native_pmd_val
(
pmd_t
pmd
)
{
return
native_pgd_val
(
pmd
.
pud
.
p4d
.
pgd
);
...
...
arch/x86/include/asm/processor.h
View file @
3c76db70
...
...
@@ -977,4 +977,5 @@ bool xen_set_default_idle(void);
void
stop_this_cpu
(
void
*
dummy
);
void
df_debug
(
struct
pt_regs
*
regs
,
long
error_code
);
void
microcode_check
(
void
);
#endif
/* _ASM_X86_PROCESSOR_H */
arch/x86/include/asm/refcount.h
View file @
3c76db70
...
...
@@ -67,13 +67,13 @@ static __always_inline __must_check
bool
refcount_sub_and_test
(
unsigned
int
i
,
refcount_t
*
r
)
{
GEN_BINARY_SUFFIXED_RMWcc
(
LOCK_PREFIX
"subl"
,
REFCOUNT_CHECK_LT_ZERO
,
r
->
refs
.
counter
,
"er"
,
i
,
"%0"
,
e
);
r
->
refs
.
counter
,
"er"
,
i
,
"%0"
,
e
,
"cx"
);
}
static
__always_inline
__must_check
bool
refcount_dec_and_test
(
refcount_t
*
r
)
{
GEN_UNARY_SUFFIXED_RMWcc
(
LOCK_PREFIX
"decl"
,
REFCOUNT_CHECK_LT_ZERO
,
r
->
refs
.
counter
,
"%0"
,
e
);
r
->
refs
.
counter
,
"%0"
,
e
,
"cx"
);
}
static
__always_inline
__must_check
...
...
arch/x86/include/asm/rmwcc.h
View file @
3c76db70
...
...
@@ -2,8 +2,7 @@
#ifndef _ASM_X86_RMWcc
#define _ASM_X86_RMWcc
#define __CLOBBERS_MEM "memory"
#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx"
#define __CLOBBERS_MEM(clb...) "memory", ## clb
#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
...
...
@@ -40,18 +39,19 @@ do { \
#endif
/* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM
()
)
#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc
)
\
#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc
, clobbers...)
\
__GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
__CLOBBERS_MEM
_CC_CX
)
__CLOBBERS_MEM
(clobbers)
)
#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
__CLOBBERS_MEM, vcon (val))
__CLOBBERS_MEM
()
, vcon (val))
#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \
#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \
clobbers...) \
__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
__CLOBBERS_MEM
_CC_CX
, vcon (val))
__CLOBBERS_MEM
(clobbers)
, vcon (val))
#endif
/* _ASM_X86_RMWcc */
arch/x86/include/asm/sections.h
View file @
3c76db70
...
...
@@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[];
#if defined(CONFIG_X86_64)
extern
char
__end_rodata_hpage_align
[];
extern
char
__entry_trampoline_start
[],
__entry_trampoline_end
[];
#endif
#endif
/* _ASM_X86_SECTIONS_H */
arch/x86/include/asm/sys_ia32.h
View file @
3c76db70
...
...
@@ -20,31 +20,43 @@
#include <asm/ia32.h>
/* ia32/sys_ia32.c */
asmlinkage
long
sys32_truncate64
(
const
char
__user
*
,
unsigned
long
,
unsigned
long
);
asmlinkage
long
sys32_ftruncate64
(
unsigned
int
,
unsigned
long
,
unsigned
long
);
asmlinkage
long
compat_sys_x86_truncate64
(
const
char
__user
*
,
unsigned
long
,
unsigned
long
);
asmlinkage
long
compat_sys_x86_ftruncate64
(
unsigned
int
,
unsigned
long
,
unsigned
long
);
asmlinkage
long
sys32_stat64
(
const
char
__user
*
,
struct
stat64
__user
*
);
asmlinkage
long
sys32_lstat64
(
const
char
__user
*
,
struct
stat64
__user
*
);
asmlinkage
long
sys32_fstat64
(
unsigned
int
,
struct
stat64
__user
*
);
asmlinkage
long
sys32_fstatat
(
unsigned
int
,
const
char
__user
*
,
asmlinkage
long
compat_sys_x86_stat64
(
const
char
__user
*
,
struct
stat64
__user
*
);
asmlinkage
long
compat_sys_x86_lstat64
(
const
char
__user
*
,
struct
stat64
__user
*
);
asmlinkage
long
compat_sys_x86_fstat64
(
unsigned
int
,
struct
stat64
__user
*
);
asmlinkage
long
compat_sys_x86_fstatat
(
unsigned
int
,
const
char
__user
*
,
struct
stat64
__user
*
,
int
);
struct
mmap_arg_struct32
;
asmlinkage
long
sys32
_mmap
(
struct
mmap_arg_struct32
__user
*
);
asmlinkage
long
compat_sys_x86
_mmap
(
struct
mmap_arg_struct32
__user
*
);
asmlinkage
long
sys32_waitpid
(
compat_pid_t
,
unsigned
int
__user
*
,
int
);
asmlinkage
long
compat_sys_x86_waitpid
(
compat_pid_t
,
unsigned
int
__user
*
,
int
);
asmlinkage
long
sys32_pread
(
unsigned
int
,
char
__user
*
,
u32
,
u32
,
u32
);
asmlinkage
long
sys32_pwrite
(
unsigned
int
,
const
char
__user
*
,
u32
,
u32
,
u32
);
asmlinkage
long
compat_sys_x86_pread
(
unsigned
int
,
char
__user
*
,
u32
,
u32
,
u32
);
asmlinkage
long
compat_sys_x86_pwrite
(
unsigned
int
,
const
char
__user
*
,
u32
,
u32
,
u32
);
long
sys32_fadvise64_64
(
int
,
__u32
,
__u32
,
__u32
,
__u32
,
int
);
long
sys32_vm86_warning
(
void
);
asmlinkage
long
compat_sys_x86_fadvise64_64
(
int
,
__u32
,
__u32
,
__u32
,
__u32
,
int
);
asmlinkage
ssize_t
sys32_readahead
(
int
,
unsigned
,
unsigned
,
size_t
);
asmlinkage
long
sys32_sync_file_range
(
int
,
unsigned
,
unsigned
,
unsigned
,
unsigned
,
int
);
asmlinkage
long
sys32_fadvise64
(
int
,
unsigned
,
unsigned
,
size_t
,
int
);
asmlinkage
long
sys32_fallocate
(
int
,
int
,
unsigned
,
unsigned
,
unsigned
,
unsigned
);
asmlinkage
ssize_t
compat_sys_x86_readahead
(
int
,
unsigned
int
,
unsigned
int
,
size_t
);
asmlinkage
long
compat_sys_x86_sync_file_range
(
int
,
unsigned
int
,
unsigned
int
,
unsigned
int
,
unsigned
int
,
int
);
asmlinkage
long
compat_sys_x86_fadvise64
(
int
,
unsigned
int
,
unsigned
int
,
size_t
,
int
);
asmlinkage
long
compat_sys_x86_fallocate
(
int
,
int
,
unsigned
int
,
unsigned
int
,
unsigned
int
,
unsigned
int
);
asmlinkage
long
compat_sys_x86_clone
(
unsigned
long
,
unsigned
long
,
int
__user
*
,
unsigned
long
,
int
__user
*
);
/* ia32/ia32_signal.c */
asmlinkage
long
sys32_sigreturn
(
void
);
...
...
arch/x86/kernel/apic/io_apic.c
View file @
3c76db70
...
...
@@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)
do
{
rep_nop
();
now
=
rdtsc
();
}
while
((
now
-
start
)
<
40000000000UL
/
HZ
&&
}
while
((
now
-
start
)
<
40000000000UL
L
/
HZ
&&
time_before_eq
(
jiffies
,
end
));
}
...
...
arch/x86/kernel/cpu/bugs.c
View file @
3c76db70
...
...
@@ -300,6 +300,15 @@ static void __init spectre_v2_select_mitigation(void)
setup_force_cpu_cap
(
X86_FEATURE_USE_IBPB
);
pr_info
(
"Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier
\n
"
);
}
/*
* Retpoline means the kernel is safe because it has no indirect
* branches. But firmware isn't, so use IBRS to protect that.
*/
if
(
boot_cpu_has
(
X86_FEATURE_IBRS
))
{
setup_force_cpu_cap
(
X86_FEATURE_USE_IBRS_FW
);
pr_info
(
"Enabling Restricted Speculation for firmware calls
\n
"
);
}
}
#undef pr_fmt
...
...
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if
(
!
boot_cpu_has_bug
(
X86_BUG_SPECTRE_V2
))
return
sprintf
(
buf
,
"Not affected
\n
"
);
return
sprintf
(
buf
,
"%s%s%s
\n
"
,
spectre_v2_strings
[
spectre_v2_enabled
],
return
sprintf
(
buf
,
"%s%s%s
%s
\n
"
,
spectre_v2_strings
[
spectre_v2_enabled
],
boot_cpu_has
(
X86_FEATURE_USE_IBPB
)
?
", IBPB"
:
""
,
boot_cpu_has
(
X86_FEATURE_USE_IBRS_FW
)
?
", IBRS_FW"
:
""
,
spectre_v2_module_string
());
}
#endif
arch/x86/kernel/cpu/common.c
View file @
3c76db70
...
...
@@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)
return
0
;
}
core_initcall
(
init_cpu_syscore
);
/*
* The microcode loader calls this upon late microcode load to recheck features,
* only when microcode has been updated. Caller holds microcode_mutex and CPU
* hotplug lock.
*/
void
microcode_check
(
void
)
{
struct
cpuinfo_x86
info
;
perf_check_microcode
();
/* Reload CPUID max function as it might've changed. */
info
.
cpuid_level
=
cpuid_eax
(
0
);
/*
* Copy all capability leafs to pick up the synthetic ones so that
* memcmp() below doesn't fail on that. The ones coming from CPUID will
* get overwritten in get_cpu_cap().
*/
memcpy
(
&
info
.
x86_capability
,
&
boot_cpu_data
.
x86_capability
,
sizeof
(
info
.
x86_capability
));
get_cpu_cap
(
&
info
);
if
(
!
memcmp
(
&
info
.
x86_capability
,
&
boot_cpu_data
.
x86_capability
,
sizeof
(
info
.
x86_capability
)))
return
;
pr_warn
(
"x86/CPU: CPU features have changed after loading microcode, but might not take effect.
\n
"
);
pr_warn
(
"x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.
\n
"
);
}
arch/x86/kernel/cpu/intel.c
View file @
3c76db70
...
...
@@ -144,6 +144,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
{
int
i
;
/*
* We know that the hypervisor lie to us on the microcode version so
* we may as well hope that it is running the correct version.
*/
if
(
cpu_has
(
c
,
X86_FEATURE_HYPERVISOR
))
return
false
;
for
(
i
=
0
;
i
<
ARRAY_SIZE
(
spectre_bad_microcodes
);
i
++
)
{
if
(
c
->
x86_model
==
spectre_bad_microcodes
[
i
].
model
&&
c
->
x86_stepping
==
spectre_bad_microcodes
[
i
].
stepping
)
...
...
arch/x86/kernel/cpu/microcode/amd.c
View file @
3c76db70
...
...
@@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
return
patch_size
;
}
static
int
apply_microcode_amd
(
int
cpu
)
static
enum
ucode_state
apply_microcode_amd
(
int
cpu
)
{
struct
cpuinfo_x86
*
c
=
&
cpu_data
(
cpu
);
struct
microcode_amd
*
mc_amd
;
...
...
@@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
p
=
find_patch
(
cpu
);
if
(
!
p
)
return
0
;
return
UCODE_NFOUND
;
mc_amd
=
p
->
data
;
uci
->
mc
=
p
->
data
;
...
...
@@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
if
(
rev
>=
mc_amd
->
hdr
.
patch_id
)
{
c
->
microcode
=
rev
;
uci
->
cpu_sig
.
rev
=
rev
;
return
0
;
return
UCODE_OK
;
}
if
(
__apply_microcode_amd
(
mc_amd
))
{
pr_err
(
"CPU%d: update failed for patch_level=0x%08x
\n
"
,
cpu
,
mc_amd
->
hdr
.
patch_id
);
return
-
1
;
return
UCODE_ERROR
;
}
pr_info
(
"CPU%d: new patch_level=0x%08x
\n
"
,
cpu
,
mc_amd
->
hdr
.
patch_id
);
...
...
@@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
uci
->
cpu_sig
.
rev
=
mc_amd
->
hdr
.
patch_id
;
c
->
microcode
=
mc_amd
->
hdr
.
patch_id
;
return
0
;
return
UCODE_UPDATED
;
}
static
int
install_equiv_cpu_table
(
const
u8
*
buf
)
...
...
arch/x86/kernel/cpu/microcode/core.c
View file @
3c76db70
...
...
@@ -22,13 +22,16 @@
#define pr_fmt(fmt) "microcode: " fmt
#include <linux/platform_device.h>
#include <linux/stop_machine.h>
#include <linux/syscore_ops.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
#include <linux/firmware.h>
#include <linux/kernel.h>
#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/fs.h>
#include <linux/mm.h>
...
...
@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
*/
static
DEFINE_MUTEX
(
microcode_mutex
);
/*
* Serialize late loading so that CPUs get updated one-by-one.
*/
static
DEFINE_SPINLOCK
(
update_lock
);
struct
ucode_cpu_info
ucode_cpu_info
[
NR_CPUS
];
struct
cpu_info_ctx
{
...
...
@@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu)
return
ret
;
}
struct
apply_microcode_ctx
{
int
err
;
};
static
void
apply_microcode_local
(
void
*
arg
)
{
struct
apply_microcode_ctx
*
ctx
=
arg
;
enum
ucode_state
*
err
=
arg
;
ctx
->
err
=
microcode_ops
->
apply_microcode
(
smp_processor_id
());
*
err
=
microcode_ops
->
apply_microcode
(
smp_processor_id
());
}
static
int
apply_microcode_on_target
(
int
cpu
)
{
struct
apply_microcode_ctx
ctx
=
{
.
err
=
0
}
;
enum
ucode_state
err
;
int
ret
;
ret
=
smp_call_function_single
(
cpu
,
apply_microcode_local
,
&
ctx
,
1
);
if
(
!
ret
)
ret
=
ctx
.
err
;
ret
=
smp_call_function_single
(
cpu
,
apply_microcode_local
,
&
err
,
1
);
if
(
!
ret
)
{
if
(
err
==
UCODE_ERROR
)
ret
=
1
;
}
return
ret
;
}
...
...
@@ -489,31 +494,110 @@ static void __exit microcode_dev_exit(void)
/* fake device for request_firmware */
static
struct
platform_device
*
microcode_pdev
;
static
int
reload_for_cpu
(
int
cpu
)
/*
* Late loading dance. Why the heavy-handed stomp_machine effort?
*
* - HT siblings must be idle and not execute other code while the other sibling
* is loading microcode in order to avoid any negative interactions caused by
* the loading.
*
* - In addition, microcode update on the cores must be serialized until this
* requirement can be relaxed in the future. Right now, this is conservative
* and good.
*/
#define SPINUNIT 100
/* 100 nsec */
static
int
check_online_cpus
(
void
)
{
struct
ucode_cpu_info
*
uci
=
ucode_cpu_info
+
cpu
;
enum
ucode_state
ustate
;
int
err
=
0
;
if
(
num_online_cpus
()
==
num_present_cpus
())
return
0
;
if
(
!
uci
->
valid
)
return
err
;
pr_err
(
"Not all CPUs online, aborting microcode update.
\n
"
);
ustate
=
microcode_ops
->
request_microcode_fw
(
cpu
,
&
microcode_pdev
->
dev
,
true
);
if
(
ustate
==
UCODE_OK
)
apply_microcode_on_target
(
cpu
);
else
if
(
ustate
==
UCODE_ERROR
)
err
=
-
EINVAL
;
return
err
;
return
-
EINVAL
;
}
static
atomic_t
late_cpus
;
/*
* Returns:
* < 0 - on error
* 0 - no update done
* 1 - microcode was updated
*/
static
int
__reload_late
(
void
*
info
)
{
unsigned
int
timeout
=
NSEC_PER_SEC
;
int
all_cpus
=
num_online_cpus
();
int
cpu
=
smp_processor_id
();
enum
ucode_state
err
;
int
ret
=
0
;
atomic_dec
(
&
late_cpus
);
/*
* Wait for all CPUs to arrive. A load will not be attempted unless all
* CPUs show up.
* */
while
(
atomic_read
(
&
late_cpus
))
{
if
(
timeout
<
SPINUNIT
)
{
pr_err
(
"Timeout while waiting for CPUs rendezvous, remaining: %d
\n
"
,
atomic_read
(
&
late_cpus
));
return
-
1
;
}
ndelay
(
SPINUNIT
);
timeout
-=
SPINUNIT
;
touch_nmi_watchdog
();
}
spin_lock
(
&
update_lock
);
apply_microcode_local
(
&
err
);
spin_unlock
(
&
update_lock
);
if
(
err
>
UCODE_NFOUND
)
{
pr_warn
(
"Error reloading microcode on CPU %d
\n
"
,
cpu
);
ret
=
-
1
;
}
else
if
(
err
==
UCODE_UPDATED
)
{
ret
=
1
;
}
atomic_inc
(
&
late_cpus
);
while
(
atomic_read
(
&
late_cpus
)
!=
all_cpus
)
cpu_relax
();
return
ret
;
}
/*
* Reload microcode late on all CPUs. Wait for a sec until they
* all gather together.
*/
static
int
microcode_reload_late
(
void
)
{
int
ret
;
atomic_set
(
&
late_cpus
,
num_online_cpus
());
ret
=
stop_machine_cpuslocked
(
__reload_late
,
NULL
,
cpu_online_mask
);
if
(
ret
<
0
)
return
ret
;
else
if
(
ret
>
0
)
microcode_check
();
return
ret
;
}
static
ssize_t
reload_store
(
struct
device
*
dev
,
struct
device_attribute
*
attr
,
const
char
*
buf
,
size_t
size
)
{
enum
ucode_state
tmp_ret
=
UCODE_OK
;
int
bsp
=
boot_cpu_data
.
cpu_index
;
unsigned
long
val
;
int
cpu
;
ssize_t
ret
=
0
,
tmp_ret
;
ssize_t
ret
=
0
;
ret
=
kstrtoul
(
buf
,
0
,
&
val
);
if
(
ret
)
...
...
@@ -522,23 +606,24 @@ static ssize_t reload_store(struct device *dev,
if
(
val
!=
1
)
return
size
;
tmp_ret
=
microcode_ops
->
request_microcode_fw
(
bsp
,
&
microcode_pdev
->
dev
,
true
);
if
(
tmp_ret
!=
UCODE_OK
)
return
size
;
get_online_cpus
();
mutex_lock
(
&
microcode_mutex
);
for_each_online_cpu
(
cpu
)
{
tmp_ret
=
reload_for_cpu
(
cpu
);
if
(
tmp_ret
!=
0
)
pr_warn
(
"Error reloading microcode on CPU %d
\n
"
,
cpu
);
/* save retval of the first encountered reload error */
if
(
!
ret
)
ret
=
tmp_re
t
;
}
if
(
!
ret
)
perf_check_microcod
e
();
ret
=
check_online_cpus
();
if
(
ret
)
goto
pu
t
;
mutex_lock
(
&
microcode_mutex
);
ret
=
microcode_reload_lat
e
();
mutex_unlock
(
&
microcode_mutex
);
put:
put_online_cpus
();
if
(
!
ret
)
if
(
ret
>=
0
)
ret
=
size
;
return
ret
;
...
...
arch/x86/kernel/cpu/microcode/intel.c
View file @
3c76db70
...
...
@@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if
(
!
mc
)
return
0
;
/*
* Save us the MSR write below - which is a particular expensive
* operation - when the other hyperthread has updated the microcode
* already.
*/
rev
=
intel_get_microcode_revision
();
if
(
rev
>=
mc
->
hdr
.
rev
)
{
uci
->
cpu_sig
.
rev
=
rev
;
return
UCODE_OK
;
}
/*
* Writeback and invalidate caches before updating microcode to avoid
* internal issues depending on what the microcode is updating.
*/
native_wbinvd
();
/* write microcode via MSR 0x79 */
native_wrmsrl
(
MSR_IA32_UCODE_WRITE
,
(
unsigned
long
)
mc
->
bits
);
...
...
@@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
return
0
;
}
static
int
apply_microcode_intel
(
int
cpu
)
static
enum
ucode_state
apply_microcode_intel
(
int
cpu
)
{
struct
ucode_cpu_info
*
uci
=
ucode_cpu_info
+
cpu
;
struct
cpuinfo_x86
*
c
=
&
cpu_data
(
cpu
);
struct
microcode_intel
*
mc
;
struct
ucode_cpu_info
*
uci
;
struct
cpuinfo_x86
*
c
;
static
int
prev_rev
;
u32
rev
;
/* We should bind the task to the CPU */
if
(
WARN_ON
(
raw_smp_processor_id
()
!=
cpu
))
return
-
1
;
return
UCODE_ERROR
;
uci
=
ucode_cpu_info
+
cpu
;
mc
=
uci
->
mc
;
/* Look for a newer patch in our cache: */
mc
=
find_patch
(
uci
)
;
if
(
!
mc
)
{
/* Look for a newer patch in our cache: */
mc
=
find_patch
(
uci
);
mc
=
uci
->
mc
;
if
(
!
mc
)
return
0
;
return
UCODE_NFOUND
;
}
/*
* Save us the MSR write below - which is a particular expensive
* operation - when the other hyperthread has updated the microcode
* already.
*/
rev
=
intel_get_microcode_revision
();
if
(
rev
>=
mc
->
hdr
.
rev
)
{
uci
->
cpu_sig
.
rev
=
rev
;
c
->
microcode
=
rev
;
return
UCODE_OK
;
}
/*
* Writeback and invalidate caches before updating microcode to avoid
* internal issues depending on what the microcode is updating.
*/
native_wbinvd
();
/* write microcode via MSR 0x79 */
wrmsrl
(
MSR_IA32_UCODE_WRITE
,
(
unsigned
long
)
mc
->
bits
);
...
...
@@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu)
if
(
rev
!=
mc
->
hdr
.
rev
)
{
pr_err
(
"CPU%d update to revision 0x%x failed
\n
"
,
cpu
,
mc
->
hdr
.
rev
);
return
-
1
;
return
UCODE_ERROR
;
}
if
(
rev
!=
prev_rev
)
{
...
...
@@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu)
prev_rev
=
rev
;
}
c
=
&
cpu_data
(
cpu
);
uci
->
cpu_sig
.
rev
=
rev
;
c
->
microcode
=
rev
;
return
0
;
return
UCODE_UPDATED
;
}
static
enum
ucode_state
generic_load_microcode
(
int
cpu
,
void
*
data
,
size_t
size
,
...
...
arch/x86/kernel/head_64.S
View file @
3c76db70
...
...
@@ -23,6 +23,7 @@
#include <asm/nops.h>
#include "../entry/calling.h"
#include <asm/export.h>
#include <asm/nospec-branch.h>
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
...
...
@@ -137,6 +138,7 @@ ENTRY(secondary_startup_64)
/
*
Ensure
I
am
executing
from
virtual
addresses
*/
movq
$
1
f
,
%
rax
ANNOTATE_RETPOLINE_SAFE
jmp
*%
rax
1
:
UNWIND_HINT_EMPTY
...
...
arch/x86/kernel/ioport.c
View file @
3c76db70
...
...
@@ -23,7 +23,7 @@
/*
* this changes the io permissions bitmap in the current task.
*/
asmlinkage
long
sys_ioperm
(
unsigned
long
from
,
unsigned
long
num
,
int
turn_on
)
SYSCALL_DEFINE3
(
ioperm
,
unsigned
long
,
from
,
unsigned
long
,
num
,
int
,
turn_on
)
{
struct
thread_struct
*
t
=
&
current
->
thread
;
struct
tss_struct
*
tss
;
...
...
arch/x86/kernel/kprobes/core.c
View file @
3c76db70
...
...
@@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler);
bool
arch_within_kprobe_blacklist
(
unsigned
long
addr
)
{
bool
is_in_entry_trampoline_section
=
false
;
#ifdef CONFIG_X86_64
is_in_entry_trampoline_section
=
(
addr
>=
(
unsigned
long
)
__entry_trampoline_start
&&
addr
<
(
unsigned
long
)
__entry_trampoline_end
);
#endif
return
(
addr
>=
(
unsigned
long
)
__kprobes_text_start
&&
addr
<
(
unsigned
long
)
__kprobes_text_end
)
||
(
addr
>=
(
unsigned
long
)
__entry_text_start
&&
addr
<
(
unsigned
long
)
__entry_text_end
);
addr
<
(
unsigned
long
)
__entry_text_end
)
||
is_in_entry_trampoline_section
;
}
int
__init
arch_init_kprobes
(
void
)
...
...
arch/x86/kernel/setup.c
View file @
3c76db70
...
...
@@ -1203,20 +1203,13 @@ void __init setup_arch(char **cmdline_p)
kasan_init
();
#ifdef CONFIG_X86_32
/* sync back kernel address range */
clone_pgd_range
(
initial_page_table
+
KERNEL_PGD_BOUNDARY
,
swapper_pg_dir
+
KERNEL_PGD_BOUNDARY
,
KERNEL_PGD_PTRS
);
/*
* sync back low identity map too. It is used for example
* in the 32-bit EFI stub.
* Sync back kernel address range.
*
* FIXME: Can the later sync in setup_cpu_entry_areas() replace
* this call?
*/
clone_pgd_range
(
initial_page_table
,
swapper_pg_dir
+
KERNEL_PGD_BOUNDARY
,
min
(
KERNEL_PGD_PTRS
,
KERNEL_PGD_BOUNDARY
));
#endif
sync_initial_page_table
();
tboot_probe
();
...
...
arch/x86/kernel/setup_percpu.c
View file @
3c76db70
...
...
@@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void)
/* Setup cpu initialized, callin, callout masks */
setup_cpu_local_masks
();
#ifdef CONFIG_X86_32
/*
* Sync back kernel address range again. We already did this in
* setup_arch(), but percpu data also needs to be available in
* the smpboot asm. We can't reliably pick up percpu mappings
* using vmalloc_fault(), because exception dispatch needs
* percpu data.
*
* FIXME: Can the later sync in setup_cpu_entry_areas() replace
* this call?
*/
clone_pgd_range
(
initial_page_table
+
KERNEL_PGD_BOUNDARY
,
swapper_pg_dir
+
KERNEL_PGD_BOUNDARY
,
KERNEL_PGD_PTRS
);
/*
* sync back low identity map too. It is used for example
* in the 32-bit EFI stub.
*/
clone_pgd_range
(
initial_page_table
,
swapper_pg_dir
+
KERNEL_PGD_BOUNDARY
,
min
(
KERNEL_PGD_PTRS
,
KERNEL_PGD_BOUNDARY
));
#endif
sync_initial_page_table
();
}
arch/x86/kernel/unwind_orc.c
View file @
3c76db70
...
...
@@ -5,7 +5,6 @@
#include <asm/unwind.h>
#include <asm/orc_types.h>
#include <asm/orc_lookup.h>
#include <asm/sections.h>
#define orc_warn(fmt, ...) \
printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
...
...
@@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
}
/* vmlinux .init slow lookup: */
if
(
i
p
>=
(
unsigned
long
)
_sinittext
&&
ip
<
(
unsigned
long
)
_einittext
)
if
(
i
nit_kernel_text
(
ip
)
)
return
__orc_find
(
__start_orc_unwind_ip
,
__start_orc_unwind
,
__stop_orc_unwind_ip
-
__start_orc_unwind_ip
,
ip
);
...
...
arch/x86/kernel/vmlinux.lds.S
View file @
3c76db70
...
...
@@ -118,9 +118,11 @@ SECTIONS
#ifdef CONFIG_X86_64
.
=
ALIGN
(
PAGE_SIZE
)
;
VMLINUX_SYMBOL
(
__entry_trampoline_start
)
=
.
;
_entry_trampoline
=
.
;
*(.
entry_trampoline
)
.
=
ALIGN
(
PAGE_SIZE
)
;
VMLINUX_SYMBOL
(
__entry_trampoline_end
)
=
.
;
ASSERT
(.
-
_entry_trampoline
==
PAGE_SIZE
,
"entry trampoline is too big"
)
;
#endif
...
...
arch/x86/kvm/svm.c
View file @
3c76db70
...
...
@@ -49,6 +49,7 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
#include <asm/microcode.h>
#include <asm/nospec-branch.h>
#include <asm/virtext.h>
...
...
@@ -5355,7 +5356,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* being speculatively taken.
*/
if
(
svm
->
spec_ctrl
)
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
svm
->
spec_ctrl
);
native_
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
svm
->
spec_ctrl
);
asm
volatile
(
"push %%"
_ASM_BP
";
\n\t
"
...
...
@@ -5464,11 +5465,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
if
(
!
msr_write_intercepted
(
vcpu
,
MSR_IA32_SPEC_CTRL
))
rdmsrl
(
MSR_IA32_SPEC_CTRL
,
svm
->
spec_ctrl
);
if
(
unlikely
(
!
msr_write_intercepted
(
vcpu
,
MSR_IA32_SPEC_CTRL
)
))
svm
->
spec_ctrl
=
native_read_msr
(
MSR_IA32_SPEC_CTRL
);
if
(
svm
->
spec_ctrl
)
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
0
);
native_
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
0
);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB
();
...
...
arch/x86/kvm/vmx.c
View file @
3c76db70
...
...
@@ -51,6 +51,7 @@
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
#include <asm/microcode.h>
#include <asm/nospec-branch.h>
#include "trace.h"
...
...
@@ -9452,7 +9453,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* being speculatively taken.
*/
if
(
vmx
->
spec_ctrl
)
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
vmx
->
spec_ctrl
);
native_
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
vmx
->
spec_ctrl
);
vmx
->
__launched
=
vmx
->
loaded_vmcs
->
launched
;
asm
(
...
...
@@ -9587,11 +9588,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
if
(
!
msr_write_intercepted
(
vcpu
,
MSR_IA32_SPEC_CTRL
))
rdmsrl
(
MSR_IA32_SPEC_CTRL
,
vmx
->
spec_ctrl
);
if
(
unlikely
(
!
msr_write_intercepted
(
vcpu
,
MSR_IA32_SPEC_CTRL
)
))
vmx
->
spec_ctrl
=
native_read_msr
(
MSR_IA32_SPEC_CTRL
);
if
(
vmx
->
spec_ctrl
)
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
0
);
native_
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
0
);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB
();
...
...
arch/x86/lib/Makefile
View file @
3c76db70
...
...
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE)
+=
kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION)
+=
error-inject.o
lib-$(CONFIG_RETPOLINE)
+=
retpoline.o
OBJECT_FILES_NON_STANDARD_retpoline.o
:=
y
obj-y
+=
msr.o msr-reg.o msr-reg-export.o hweight.o
...
...
arch/x86/lib/retpoline.S
View file @
3c76db70
...
...
@@ -7,7 +7,6 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/bitsperlong.h>
.
macro
THUNK
reg
.
section
.
text
.
__x86
.
indirect_thunk
...
...
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
GENERATE_THUNK
(
r14
)
GENERATE_THUNK
(
r15
)
#endif
/*
*
Fill
the
CPU
return
stack
buffer
.
*
*
Each
entry
in
the
RSB
,
if
used
for
a
speculative
'ret'
,
contains
an
*
infinite
'pause; lfence; jmp'
loop
to
capture
speculative
execution
.
*
*
This
is
required
in
various
cases
for
retpoline
and
IBRS
-
based
*
mitigations
for
the
Spectre
variant
2
vulnerability
.
Sometimes
to
*
eliminate
potentially
bogus
entries
from
the
RSB
,
and
sometimes
*
purely
to
ensure
that
it
doesn
't get empty, which on some CPUs would
*
allow
predictions
from
other
(
unwanted
!)
sources
to
be
used
.
*
*
Google
experimented
with
loop
-
unrolling
and
this
turned
out
to
be
*
the
optimal
version
-
two
calls
,
each
with
their
own
speculation
*
trap
should
their
return
address
end
up
getting
used
,
in
a
loop
.
*/
.
macro
STUFF_RSB
nr
:
req
sp
:
req
mov
$
(
\
nr
/
2
),
%
_ASM_BX
.
align
16
771
:
call
772
f
773
:
/
*
speculation
trap
*/
pause
lfence
jmp
773
b
.
align
16
772
:
call
774
f
775
:
/
*
speculation
trap
*/
pause
lfence
jmp
775
b
.
align
16
774
:
dec
%
_ASM_BX
jnz
771
b
add
$
((
BITS_PER_LONG
/
8
)
*
\
nr
),
\
sp
.
endm
#define RSB_FILL_LOOPS 16 /* To avoid underflow */
ENTRY
(
__fill_rsb
)
STUFF_RSB
RSB_FILL_LOOPS
,
%
_ASM_SP
ret
END
(
__fill_rsb
)
EXPORT_SYMBOL_GPL
(
__fill_rsb
)
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
ENTRY
(
__clear_rsb
)
STUFF_RSB
RSB_CLEAR_LOOPS
,
%
_ASM_SP
ret
END
(
__clear_rsb
)
EXPORT_SYMBOL_GPL
(
__clear_rsb
)
arch/x86/mm/cpu_entry_area.c
View file @
3c76db70
...
...
@@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void)
for_each_possible_cpu
(
cpu
)
setup_cpu_entry_area
(
cpu
);
/*
* This is the last essential update to swapper_pgdir which needs
* to be synchronized to initial_page_table on 32bit.
*/
sync_initial_page_table
();
}
arch/x86/mm/fault.c
View file @
3c76db70
...
...
@@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
tsk
=
current
;
mm
=
tsk
->
mm
;
/*
* Detect and handle instructions that would cause a page fault for
* both a tracked kernel page and a userspace page.
*/
prefetchw
(
&
mm
->
mmap_sem
);
if
(
unlikely
(
kmmio_fault
(
regs
,
address
)))
...
...
arch/x86/mm/init_32.c
View file @
3c76db70
...
...
@@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
}
#endif
/* CONFIG_HIGHMEM */
void
__init
sync_initial_page_table
(
void
)
{
clone_pgd_range
(
initial_page_table
+
KERNEL_PGD_BOUNDARY
,
swapper_pg_dir
+
KERNEL_PGD_BOUNDARY
,
KERNEL_PGD_PTRS
);
/*
* sync back low identity map too. It is used for example
* in the 32-bit EFI stub.
*/
clone_pgd_range
(
initial_page_table
,
swapper_pg_dir
+
KERNEL_PGD_BOUNDARY
,
min
(
KERNEL_PGD_PTRS
,
KERNEL_PGD_BOUNDARY
));
}
void
__init
native_pagetable_init
(
void
)
{
unsigned
long
pfn
,
va
;
...
...
arch/x86/mm/mem_encrypt_boot.S
View file @
3c76db70
...
...
@@ -15,6 +15,7 @@
#include <asm/page.h>
#include <asm/processor-flags.h>
#include <asm/msr-index.h>
#include <asm/nospec-branch.h>
.
text
.
code64
...
...
@@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
movq
%
rax
,
%
r8
/*
Workarea
encryption
routine
*/
addq
$PAGE_SIZE
,
%
r8
/*
Workarea
intermediate
copy
buffer
*/
ANNOTATE_RETPOLINE_SAFE
call
*%
rax
/*
Call
the
encryption
routine
*/
pop
%
r12
...
...
arch/x86/mm/pti.c
View file @
3c76db70
...
...
@@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void)
}
/*
* Clone the ESPFIX P4D into the user space visi
n
ble page table
* Clone the ESPFIX P4D into the user space visible page table
*/
static
void
__init
pti_setup_espfix64
(
void
)
{
...
...
arch/x86/realmode/rm/trampoline_64.S
View file @
3c76db70
...
...
@@ -102,7 +102,7 @@ ENTRY(startup_32)
*
don
't we'
ll
eventually
crash
trying
to
execute
encrypted
*
instructions
.
*/
bt
$TH_FLAGS_SME_ACTIVE_BIT
,
pa_tr_flags
bt
l
$TH_FLAGS_SME_ACTIVE_BIT
,
pa_tr_flags
jnc
.
Ldone
movl
$MSR_K8_SYSCFG
,
%
ecx
rdmsr
...
...
arch/x86/xen/suspend.c
View file @
3c76db70
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/tick.h>
#include <linux/percpu-defs.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/grant_table.h>
#include <xen/events.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
#include <asm/fixmap.h>
...
...
@@ -15,6 +18,8 @@
#include "mmu.h"
#include "pmu.h"
static
DEFINE_PER_CPU
(
u64
,
spec_ctrl
);
void
xen_arch_pre_suspend
(
void
)
{
xen_save_time_memory_area
();
...
...
@@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)
static
void
xen_vcpu_notify_restore
(
void
*
data
)
{
if
(
xen_pv_domain
()
&&
boot_cpu_has
(
X86_FEATURE_SPEC_CTRL
))
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
this_cpu_read
(
spec_ctrl
));
/* Boot processor notified via generic timekeeping_resume() */
if
(
smp_processor_id
()
==
0
)
return
;
...
...
@@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)
static
void
xen_vcpu_notify_suspend
(
void
*
data
)
{
u64
tmp
;
tick_suspend_local
();
if
(
xen_pv_domain
()
&&
boot_cpu_has
(
X86_FEATURE_SPEC_CTRL
))
{
rdmsrl
(
MSR_IA32_SPEC_CTRL
,
tmp
);
this_cpu_write
(
spec_ctrl
,
tmp
);
wrmsrl
(
MSR_IA32_SPEC_CTRL
,
0
);
}
}
void
xen_arch_resume
(
void
)
...
...
include/linux/compiler-clang.h
View file @
3c76db70
...
...
@@ -27,3 +27,8 @@
#if __has_feature(address_sanitizer)
#define __SANITIZE_ADDRESS__
#endif
/* Clang doesn't have a way to turn it off per-function, yet. */
#ifdef __noretpoline
#undef __noretpoline
#endif
include/linux/compiler-gcc.h
View file @
3c76db70
...
...
@@ -93,6 +93,10 @@
#define __weak __attribute__((weak))
#define __alias(symbol) __attribute__((alias(#symbol)))
#ifdef RETPOLINE
#define __noretpoline __attribute__((indirect_branch("keep")))
#endif
/*
* it doesn't make sense on ARM (currently the only user of __naked)
* to trace naked functions because then mcount is called without
...
...
include/linux/init.h
View file @
3c76db70
...
...
@@ -6,10 +6,10 @@
#include <linux/types.h>
/* Built-in __init functions needn't be compiled with retpoline */
#if defined(
RETPOLINE
) && !defined(MODULE)
#define __no
retpoline __attribute__((indirect_branch("keep")))
#if defined(
__noretpoline
) && !defined(MODULE)
#define __no
initretpoline __noretpoline
#else
#define __noretpoline
#define __no
init
retpoline
#endif
/* These macros are used to mark some functions or
...
...
@@ -47,7 +47,7 @@
/* These are for everybody (although not all archs will actually
discard it in modules) */
#define __init __section(.init.text) __cold __latent_entropy __noretpoline
#define __init __section(.init.text) __cold __latent_entropy __no
init
retpoline
#define __initdata __section(.init.data)
#define __initconst __section(.init.rodata)
#define __exitdata __section(.exit.data)
...
...
include/linux/jump_label.h
View file @
3c76db70
...
...
@@ -151,6 +151,7 @@ extern struct jump_entry __start___jump_table[];
extern
struct
jump_entry
__stop___jump_table
[];
extern
void
jump_label_init
(
void
);
extern
void
jump_label_invalidate_init
(
void
);
extern
void
jump_label_lock
(
void
);
extern
void
jump_label_unlock
(
void
);
extern
void
arch_jump_label_transform
(
struct
jump_entry
*
entry
,
...
...
@@ -198,6 +199,8 @@ static __always_inline void jump_label_init(void)
static_key_initialized
=
true
;
}
static
inline
void
jump_label_invalidate_init
(
void
)
{}
static
__always_inline
bool
static_key_false
(
struct
static_key
*
key
)
{
if
(
unlikely
(
static_key_count
(
key
)
>
0
))
...
...
include/linux/kernel.h
View file @
3c76db70
...
...
@@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option);
extern
char
*
next_arg
(
char
*
args
,
char
**
param
,
char
**
val
);
extern
int
core_kernel_text
(
unsigned
long
addr
);
extern
int
init_kernel_text
(
unsigned
long
addr
);
extern
int
core_kernel_data
(
unsigned
long
addr
);
extern
int
__kernel_text_address
(
unsigned
long
addr
);
extern
int
kernel_text_address
(
unsigned
long
addr
);
...
...
include/linux/nospec.h
View file @
3c76db70
...
...
@@ -5,6 +5,7 @@
#ifndef _LINUX_NOSPEC_H
#define _LINUX_NOSPEC_H
#include <asm/barrier.h>
/**
* array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
...
...
@@ -29,26 +30,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
}
#endif
/*
* Warn developers about inappropriate array_index_nospec() usage.
*
* Even if the CPU speculates past the WARN_ONCE branch, the
* sign bit of @index is taken into account when generating the
* mask.
*
* This warning is compiled out when the compiler can infer that
* @index and @size are less than LONG_MAX.
*/
#define array_index_mask_nospec_check(index, size) \
({ \
if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \
"array_index_nospec() limited to range of [0, LONG_MAX]\n")) \
_mask = 0; \
else \
_mask = array_index_mask_nospec(index, size); \
_mask; \
})
/*
* array_index_nospec - sanitize an array index after a bounds check
*
...
...
@@ -67,12 +48,11 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
({ \
typeof(index) _i = (index); \
typeof(size) _s = (size); \
unsigned long _mask = array_index_mask_nospec
_check(_i, _s);
\
unsigned long _mask = array_index_mask_nospec
(_i, _s);
\
\
BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
\
_i &= _mask; \
_i; \
(typeof(_i)) (_i & _mask); \
})
#endif
/* _LINUX_NOSPEC_H */
init/main.c
View file @
3c76db70
...
...
@@ -89,6 +89,7 @@
#include <linux/io.h>
#include <linux/cache.h>
#include <linux/rodata_test.h>
#include <linux/jump_label.h>
#include <asm/io.h>
#include <asm/bugs.h>
...
...
@@ -1000,6 +1001,7 @@ static int __ref kernel_init(void *unused)
/* need to finish all async __init code before freeing the memory */
async_synchronize_full
();
ftrace_free_init_mem
();
jump_label_invalidate_init
();
free_initmem
();
mark_readonly
();
system_state
=
SYSTEM_RUNNING
;
...
...
kernel/extable.c
View file @
3c76db70
...
...
@@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
return
e
;
}
static
inline
int
init_kernel_text
(
unsigned
long
addr
)
int
init_kernel_text
(
unsigned
long
addr
)
{
if
(
addr
>=
(
unsigned
long
)
_sinittext
&&
addr
<
(
unsigned
long
)
_einittext
)
...
...
kernel/jump_label.c
View file @
3c76db70
...
...
@@ -366,12 +366,15 @@ static void __jump_label_update(struct static_key *key,
{
for
(;
(
entry
<
stop
)
&&
(
jump_entry_key
(
entry
)
==
key
);
entry
++
)
{
/*
* entry->code set to 0 invalidates module init text sections
* kernel_text_address() verifies we are not in core kernel
* init code, see jump_label_invalidate_module_init().
* An entry->code of 0 indicates an entry which has been
* disabled because it was in an init text area.
*/
if
(
entry
->
code
&&
kernel_text_address
(
entry
->
code
))
arch_jump_label_transform
(
entry
,
jump_label_type
(
entry
));
if
(
entry
->
code
)
{
if
(
kernel_text_address
(
entry
->
code
))
arch_jump_label_transform
(
entry
,
jump_label_type
(
entry
));
else
WARN_ONCE
(
1
,
"can't patch jump_label at %pS"
,
(
void
*
)
entry
->
code
);
}
}
}
...
...
@@ -417,6 +420,19 @@ void __init jump_label_init(void)
cpus_read_unlock
();
}
/* Disable any jump label entries in __init code */
void
__init
jump_label_invalidate_init
(
void
)
{
struct
jump_entry
*
iter_start
=
__start___jump_table
;
struct
jump_entry
*
iter_stop
=
__stop___jump_table
;
struct
jump_entry
*
iter
;
for
(
iter
=
iter_start
;
iter
<
iter_stop
;
iter
++
)
{
if
(
init_kernel_text
(
iter
->
code
))
iter
->
code
=
0
;
}
}
#ifdef CONFIG_MODULES
static
enum
jump_label_type
jump_label_init_type
(
struct
jump_entry
*
entry
)
...
...
@@ -633,6 +649,7 @@ static void jump_label_del_module(struct module *mod)
}
}
/* Disable any jump label entries in module init code */
static
void
jump_label_invalidate_module_init
(
struct
module
*
mod
)
{
struct
jump_entry
*
iter_start
=
mod
->
jump_entries
;
...
...
scripts/Makefile.build
View file @
3c76db70
...
...
@@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool
objtool_args
=
$(
if
$(CONFIG_UNWINDER_ORC)
,orc generate,check
)
objtool_args
+=
$(
if
$
(
part-of-module
)
,
--module
,
)
ifndef
CONFIG_FRAME_POINTER
objtool_args
+=
--no-fp
endif
...
...
@@ -264,6 +266,12 @@ objtool_args += --no-unreachable
else
objtool_args
+=
$(
call
cc-ifversion,
-lt
, 0405,
--no-unreachable
)
endif
ifdef
CONFIG_RETPOLINE
ifneq
($(RETPOLINE_CFLAGS),)
objtool_args
+=
--retpoline
endif
endif
ifdef
CONFIG_MODVERSIONS
objtool_o
=
$
(
@D
)
/.tmp_
$
(
@F
)
...
...
tools/objtool/builtin-check.c
View file @
3c76db70
...
...
@@ -29,7 +29,7 @@
#include "builtin.h"
#include "check.h"
bool
no_fp
,
no_unreachable
;
bool
no_fp
,
no_unreachable
,
retpoline
,
module
;
static
const
char
*
const
check_usage
[]
=
{
"objtool check [<options>] file.o"
,
...
...
@@ -39,6 +39,8 @@ static const char * const check_usage[] = {
const
struct
option
check_options
[]
=
{
OPT_BOOLEAN
(
'f'
,
"no-fp"
,
&
no_fp
,
"Skip frame pointer validation"
),
OPT_BOOLEAN
(
'u'
,
"no-unreachable"
,
&
no_unreachable
,
"Skip 'unreachable instruction' warnings"
),
OPT_BOOLEAN
(
'r'
,
"retpoline"
,
&
retpoline
,
"Validate retpoline assumptions"
),
OPT_BOOLEAN
(
'm'
,
"module"
,
&
module
,
"Indicates the object will be part of a kernel module"
),
OPT_END
(),
};
...
...
@@ -53,5 +55,5 @@ int cmd_check(int argc, const char **argv)
objname
=
argv
[
0
];
return
check
(
objname
,
no_fp
,
no_unreachable
,
false
);
return
check
(
objname
,
false
);
}
tools/objtool/builtin-orc.c
View file @
3c76db70
...
...
@@ -25,7 +25,6 @@
*/
#include <string.h>
#include <subcmd/parse-options.h>
#include "builtin.h"
#include "check.h"
...
...
@@ -36,9 +35,6 @@ static const char *orc_usage[] = {
NULL
,
};
extern
const
struct
option
check_options
[];
extern
bool
no_fp
,
no_unreachable
;
int
cmd_orc
(
int
argc
,
const
char
**
argv
)
{
const
char
*
objname
;
...
...
@@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv)
objname
=
argv
[
0
];
return
check
(
objname
,
no_fp
,
no_unreachable
,
true
);
return
check
(
objname
,
true
);
}
if
(
!
strcmp
(
argv
[
0
],
"dump"
))
{
...
...
tools/objtool/builtin.h
View file @
3c76db70
...
...
@@ -17,6 +17,11 @@
#ifndef _BUILTIN_H
#define _BUILTIN_H
#include <subcmd/parse-options.h>
extern
const
struct
option
check_options
[];
extern
bool
no_fp
,
no_unreachable
,
retpoline
,
module
;
extern
int
cmd_check
(
int
argc
,
const
char
**
argv
);
extern
int
cmd_orc
(
int
argc
,
const
char
**
argv
);
...
...
tools/objtool/check.c
View file @
3c76db70
...
...
@@ -18,6 +18,7 @@
#include <string.h>
#include <stdlib.h>
#include "builtin.h"
#include "check.h"
#include "elf.h"
#include "special.h"
...
...
@@ -33,7 +34,6 @@ struct alternative {
};
const
char
*
objname
;
static
bool
no_fp
;
struct
cfi_state
initial_func_cfi
;
struct
instruction
*
find_insn
(
struct
objtool_file
*
file
,
...
...
@@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file)
* disguise, so convert them accordingly.
*/
insn
->
type
=
INSN_JUMP_DYNAMIC
;
insn
->
retpoline_safe
=
true
;
continue
;
}
else
{
/* sibling call */
...
...
@@ -548,7 +549,8 @@ static int add_call_destinations(struct objtool_file *file)
if
(
!
insn
->
call_dest
&&
!
insn
->
ignore
)
{
WARN_FUNC
(
"unsupported intra-function call"
,
insn
->
sec
,
insn
->
offset
);
WARN
(
"If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."
);
if
(
retpoline
)
WARN
(
"If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."
);
return
-
1
;
}
...
...
@@ -923,7 +925,11 @@ static struct rela *find_switch_table(struct objtool_file *file,
if
(
find_symbol_containing
(
file
->
rodata
,
text_rela
->
addend
))
continue
;
return
find_rela_by_dest
(
file
->
rodata
,
text_rela
->
addend
);
rodata_rela
=
find_rela_by_dest
(
file
->
rodata
,
text_rela
->
addend
);
if
(
!
rodata_rela
)
continue
;
return
rodata_rela
;
}
return
NULL
;
...
...
@@ -1108,6 +1114,41 @@ static int read_unwind_hints(struct objtool_file *file)
return
0
;
}
static
int
read_retpoline_hints
(
struct
objtool_file
*
file
)
{
struct
section
*
sec
;
struct
instruction
*
insn
;
struct
rela
*
rela
;
sec
=
find_section_by_name
(
file
->
elf
,
".rela.discard.retpoline_safe"
);
if
(
!
sec
)
return
0
;
list_for_each_entry
(
rela
,
&
sec
->
rela_list
,
list
)
{
if
(
rela
->
sym
->
type
!=
STT_SECTION
)
{
WARN
(
"unexpected relocation symbol type in %s"
,
sec
->
name
);
return
-
1
;
}
insn
=
find_insn
(
file
,
rela
->
sym
->
sec
,
rela
->
addend
);
if
(
!
insn
)
{
WARN
(
"bad .discard.retpoline_safe entry"
);
return
-
1
;
}
if
(
insn
->
type
!=
INSN_JUMP_DYNAMIC
&&
insn
->
type
!=
INSN_CALL_DYNAMIC
)
{
WARN_FUNC
(
"retpoline_safe hint not an indirect jump/call"
,
insn
->
sec
,
insn
->
offset
);
return
-
1
;
}
insn
->
retpoline_safe
=
true
;
}
return
0
;
}
static
int
decode_sections
(
struct
objtool_file
*
file
)
{
int
ret
;
...
...
@@ -1146,6 +1187,10 @@ static int decode_sections(struct objtool_file *file)
if
(
ret
)
return
ret
;
ret
=
read_retpoline_hints
(
file
);
if
(
ret
)
return
ret
;
return
0
;
}
...
...
@@ -1891,6 +1936,38 @@ static int validate_unwind_hints(struct objtool_file *file)
return
warnings
;
}
static
int
validate_retpoline
(
struct
objtool_file
*
file
)
{
struct
instruction
*
insn
;
int
warnings
=
0
;
for_each_insn
(
file
,
insn
)
{
if
(
insn
->
type
!=
INSN_JUMP_DYNAMIC
&&
insn
->
type
!=
INSN_CALL_DYNAMIC
)
continue
;
if
(
insn
->
retpoline_safe
)
continue
;
/*
* .init.text code is ran before userspace and thus doesn't
* strictly need retpolines, except for modules which are
* loaded late, they very much do need retpoline in their
* .init.text
*/
if
(
!
strcmp
(
insn
->
sec
->
name
,
".init.text"
)
&&
!
module
)
continue
;
WARN_FUNC
(
"indirect %s found in RETPOLINE build"
,
insn
->
sec
,
insn
->
offset
,
insn
->
type
==
INSN_JUMP_DYNAMIC
?
"jump"
:
"call"
);
warnings
++
;
}
return
warnings
;
}
static
bool
is_kasan_insn
(
struct
instruction
*
insn
)
{
return
(
insn
->
type
==
INSN_CALL
&&
...
...
@@ -2022,13 +2099,12 @@ static void cleanup(struct objtool_file *file)
elf_close
(
file
->
elf
);
}
int
check
(
const
char
*
_objname
,
bool
_no_fp
,
bool
no_unreachable
,
bool
orc
)
int
check
(
const
char
*
_objname
,
bool
orc
)
{
struct
objtool_file
file
;
int
ret
,
warnings
=
0
;
objname
=
_objname
;
no_fp
=
_no_fp
;
file
.
elf
=
elf_open
(
objname
,
orc
?
O_RDWR
:
O_RDONLY
);
if
(
!
file
.
elf
)
...
...
@@ -2052,6 +2128,13 @@ int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
if
(
list_empty
(
&
file
.
insn_list
))
goto
out
;
if
(
retpoline
)
{
ret
=
validate_retpoline
(
&
file
);
if
(
ret
<
0
)
return
ret
;
warnings
+=
ret
;
}
ret
=
validate_functions
(
&
file
);
if
(
ret
<
0
)
goto
out
;
...
...
tools/objtool/check.h
View file @
3c76db70
...
...
@@ -45,6 +45,7 @@ struct instruction {
unsigned
char
type
;
unsigned
long
immediate
;
bool
alt_group
,
visited
,
dead_end
,
ignore
,
hint
,
save
,
restore
,
ignore_alts
;
bool
retpoline_safe
;
struct
symbol
*
call_dest
;
struct
instruction
*
jump_dest
;
struct
instruction
*
first_jump_src
;
...
...
@@ -63,7 +64,7 @@ struct objtool_file {
bool
ignore_unreachables
,
c_file
,
hints
;
};
int
check
(
const
char
*
objname
,
bool
no_fp
,
bool
no_unreachable
,
bool
orc
);
int
check
(
const
char
*
objname
,
bool
orc
);
struct
instruction
*
find_insn
(
struct
objtool_file
*
file
,
struct
section
*
sec
,
unsigned
long
offset
);
...
...
tools/testing/selftests/x86/test_vsyscall.c
View file @
3c76db70
...
...
@@ -450,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
num_vsyscall_traps
++
;
}
static
int
test_
native_vsyscall
(
void
)
static
int
test_
emulation
(
void
)
{
time_t
tmp
;
bool
is_native
;
...
...
@@ -458,7 +458,7 @@ static int test_native_vsyscall(void)
if
(
!
vtime
)
return
0
;
printf
(
"[RUN]
\t
checking
for native vsyscall
\n
"
);
printf
(
"[RUN]
\t
checking
that vsyscalls are emulated
\n
"
);
sethandler
(
SIGTRAP
,
sigtrap
,
0
);
set_eflags
(
get_eflags
()
|
X86_EFLAGS_TF
);
vtime
(
&
tmp
);
...
...
@@ -474,11 +474,12 @@ static int test_native_vsyscall(void)
*/
is_native
=
(
num_vsyscall_traps
>
1
);
printf
(
"
\t
vsyscalls are %s (%d instructions in vsyscall page)
\n
"
,
printf
(
"[%s]
\t
vsyscalls are %s (%d instructions in vsyscall page)
\n
"
,
(
is_native
?
"FAIL"
:
"OK"
),
(
is_native
?
"native"
:
"emulated"
),
(
int
)
num_vsyscall_traps
);
return
0
;
return
is_native
;
}
#endif
...
...
@@ -498,7 +499,7 @@ int main(int argc, char **argv)
nerrs
+=
test_vsys_r
();
#ifdef __x86_64__
nerrs
+=
test_
native_vsyscall
();
nerrs
+=
test_
emulation
();
#endif
return
nerrs
?
1
:
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment