Commit 86833aec authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-entry-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 entry update from Thomas Gleixner:
 "A single update for the x86 entry code:

  The current CR3 handling for kernel page table isolation in the
  paranoid return paths which are relevant for #NMI, #MCE, #VC, #DB and
  #DF is unconditionally writing CR3 with the value retrieved on
  exception entry.

  In the vast majority of cases when returning to the kernel this is a
  pointless exercise because CR3 was not modified on exception entry.
  The only situation where this is necessary is when the exception
  interrupts a entry from user before switching to kernel CR3 or
  interrupts an exit to user after switching back to user CR3.

  As CR3 writes can be expensive on some systems this becomes measurable
  overhead with high frequency #NMIs such as perf.

  Avoid this overhead by checking the CR3 value, which was saved on
  entry, and write it back to CR3 only when it is a user CR3"

* tag 'x86-entry-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/entry: Avoid redundant CR3 write on paranoid returns
parents 720c8579 bb998361
...@@ -244,17 +244,19 @@ For 32-bit we have the following conventions - kernel is built with ...@@ -244,17 +244,19 @@ For 32-bit we have the following conventions - kernel is built with
.Ldone_\@: .Ldone_\@:
.endm .endm
.macro RESTORE_CR3 scratch_reg:req save_reg:req /* Restore CR3 from a kernel context. May restore a user CR3 value. */
.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/* /*
* KERNEL pages can always resume with NOFLUSH as we do * If CR3 contained the kernel page tables at the paranoid exception
* explicit flushes. * entry, then there is nothing to restore as CR3 is not modified while
* handling the exception.
*/ */
bt $PTI_USER_PGTABLE_BIT, \save_reg bt $PTI_USER_PGTABLE_BIT, \save_reg
jnc .Lnoflush_\@ jnc .Lend_\@
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/* /*
* Check if there's a pending flush for the user ASID we're * Check if there's a pending flush for the user ASID we're
...@@ -262,20 +264,12 @@ For 32-bit we have the following conventions - kernel is built with ...@@ -262,20 +264,12 @@ For 32-bit we have the following conventions - kernel is built with
*/ */
movq \save_reg, \scratch_reg movq \save_reg, \scratch_reg
andq $(0x7FF), \scratch_reg andq $(0x7FF), \scratch_reg
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
jnc .Lnoflush_\@
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
jmp .Lwrcr3_\@ jc .Lwrcr3_\@
.Lnoflush_\@:
SET_NOFLUSH_BIT \save_reg SET_NOFLUSH_BIT \save_reg
.Lwrcr3_\@: .Lwrcr3_\@:
/*
* The CR3 write could be avoided when not changing its value,
* but would require a CR3 read *and* a scratch register.
*/
movq \save_reg, %cr3 movq \save_reg, %cr3
.Lend_\@: .Lend_\@:
.endm .endm
...@@ -290,7 +284,7 @@ For 32-bit we have the following conventions - kernel is built with ...@@ -290,7 +284,7 @@ For 32-bit we have the following conventions - kernel is built with
.endm .endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
.endm .endm
.macro RESTORE_CR3 scratch_reg:req save_reg:req .macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
.endm .endm
#endif #endif
......
...@@ -970,14 +970,14 @@ SYM_CODE_START_LOCAL(paranoid_exit) ...@@ -970,14 +970,14 @@ SYM_CODE_START_LOCAL(paranoid_exit)
IBRS_EXIT save_reg=%r15 IBRS_EXIT save_reg=%r15
/* /*
* The order of operations is important. RESTORE_CR3 requires * The order of operations is important. PARANOID_RESTORE_CR3 requires
* kernel GSBASE. * kernel GSBASE.
* *
* NB to anyone to try to optimize this code: this code does * NB to anyone to try to optimize this code: this code does
* not execute at all for exceptions from user mode. Those * not execute at all for exceptions from user mode. Those
* exceptions go through error_return instead. * exceptions go through error_return instead.
*/ */
RESTORE_CR3 scratch_reg=%rax save_reg=%r14 PARANOID_RESTORE_CR3 scratch_reg=%rax save_reg=%r14
/* Handle the three GSBASE cases */ /* Handle the three GSBASE cases */
ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
...@@ -1406,8 +1406,7 @@ end_repeat_nmi: ...@@ -1406,8 +1406,7 @@ end_repeat_nmi:
/* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
IBRS_EXIT save_reg=%r15 IBRS_EXIT save_reg=%r15
/* Always restore stashed CR3 value (see paranoid_entry) */ PARANOID_RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
/* /*
* The above invocation of paranoid_entry stored the GSBASE * The above invocation of paranoid_entry stored the GSBASE
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment