Commit b1360211 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-fred-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 FRED updates from Thomas Gleixner:

 - Enable FRED right after init_mem_mapping() because at that point the
   early IDT fault handler is replaced by the real fault handler. The
   real fault handler retrieves the faulting address from the stack
   frame and not from CR2 when the FRED feature is set. But that
   obviously only works when FRED is enabled in the CPU as well.

 - Set SS to __KERNEL_DS when enabling FRED to prevent a corner case
   where ERETS can observe a SS mismatch and raises a #GP.

* tag 'x86-fred-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/entry: Set FRED RSP0 on return to userspace instead of context switch
  x86/msr: Switch between WRMSRNS and WRMSR with the alternatives mechanism
  x86/entry: Test ti_work for zero before processing individual bits
  x86/fred: Set SS to __KERNEL_DS when enabling FRED
  x86/fred: Enable FRED right after init_mem_mapping()
  x86/fred: Move FRED RSP initialization into a separate function
  x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param()
parents c3056a7d fe85ee39
......@@ -8,6 +8,7 @@
#include <asm/nospec-branch.h>
#include <asm/io_bitmap.h>
#include <asm/fpu/api.h>
#include <asm/fred.h>
/* Check that the stack and regs on entry from user mode are sane. */
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
......@@ -44,8 +45,7 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
}
#define arch_enter_from_user_mode arch_enter_from_user_mode
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
static inline void arch_exit_work(unsigned long ti_work)
{
if (ti_work & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
......@@ -56,6 +56,15 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
fpregs_assert_state_consistent();
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
}
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
arch_exit_work(ti_work);
fred_update_rsp0();
#ifdef CONFIG_COMPAT
/*
......
......@@ -36,6 +36,7 @@
#ifdef CONFIG_X86_FRED
#include <linux/kernel.h>
#include <linux/sched/task_stack.h>
#include <asm/ptrace.h>
......@@ -84,13 +85,33 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int
}
void cpu_init_fred_exceptions(void);
void cpu_init_fred_rsps(void);
void fred_complete_exception_setup(void);
DECLARE_PER_CPU(unsigned long, fred_rsp0);
static __always_inline void fred_sync_rsp0(unsigned long rsp0)
{
__this_cpu_write(fred_rsp0, rsp0);
}
static __always_inline void fred_update_rsp0(void)
{
unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
__this_cpu_write(fred_rsp0, rsp0);
}
}
#else /* CONFIG_X86_FRED */
static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
static inline void cpu_init_fred_exceptions(void) { }
static inline void cpu_init_fred_rsps(void) { }
static inline void fred_complete_exception_setup(void) { }
static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
static inline void fred_sync_rsp0(unsigned long rsp0) { }
static inline void fred_update_rsp0(void) { }
#endif /* CONFIG_X86_FRED */
#endif /* !__ASSEMBLY__ */
......
......@@ -99,19 +99,6 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
/*
* WRMSRNS behaves exactly like WRMSR with the only difference being
* that it is not a serializing instruction by default.
*/
static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
{
/* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
asm volatile("1: .byte 0x0f,0x01,0xc6\n"
"2:\n"
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
: : "c" (msr), "a"(low), "d" (high));
}
#define native_rdmsr(msr, val1, val2) \
do { \
u64 __val = __rdmsr((msr)); \
......@@ -312,9 +299,19 @@ do { \
#endif /* !CONFIG_PARAVIRT_XXL */
/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
static __always_inline void wrmsrns(u32 msr, u64 val)
{
__wrmsrns(msr, val, val >> 32);
/*
* WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
* DS prefix to avoid a trailing NOP.
*/
asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS)
"2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
: : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
}
/*
......
......@@ -582,7 +582,8 @@ extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void cpu_init(void);
extern void cpu_init_exception_handling(void);
extern void cpu_init_exception_handling(bool boot_cpu);
extern void cpu_init_replace_early_idt(void);
extern void cr4_init(void);
extern void set_task_blockstep(struct task_struct *task, bool on);
......
......@@ -70,13 +70,9 @@ static inline void update_task_stack(struct task_struct *task)
#ifdef CONFIG_X86_32
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
/* WRMSRNS is a baseline feature for FRED. */
wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + THREAD_SIZE);
} else if (cpu_feature_enabled(X86_FEATURE_XENPV)) {
if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV))
/* Xen PV enters the kernel on the thread stack. */
load_sp0(task_top_of_stack(task));
}
#endif
}
......
......@@ -1510,6 +1510,11 @@ static void __init cpu_parse_early_param(void)
if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
/* Minimize the gap between FRED is available and available but disabled. */
arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg));
if (arglen != 2 || strncmp(arg, "on", 2))
setup_clear_cpu_cap(X86_FEATURE_FRED);
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
if (arglen <= 0)
return;
......@@ -2171,7 +2176,7 @@ static inline void tss_setup_io_bitmap(struct tss_struct *tss)
* Setup everything needed to handle exceptions from the IDT, including the IST
* exceptions which use paranoid_entry().
*/
void cpu_init_exception_handling(void)
void cpu_init_exception_handling(bool boot_cpu)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
int cpu = raw_smp_processor_id();
......@@ -2190,10 +2195,23 @@ void cpu_init_exception_handling(void)
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
/* The boot CPU has enabled FRED during early boot */
if (!boot_cpu)
cpu_init_fred_exceptions();
cpu_init_fred_rsps();
} else {
load_current_idt();
}
}
void __init cpu_init_replace_early_idt(void)
{
if (cpu_feature_enabled(X86_FEATURE_FRED))
cpu_init_fred_exceptions();
else
load_current_idt();
idt_setup_early_pf();
}
/*
......
......@@ -83,7 +83,6 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
{ X86_FEATURE_SHSTK, X86_FEATURE_XSAVES },
{ X86_FEATURE_FRED, X86_FEATURE_LKGS },
{ X86_FEATURE_FRED, X86_FEATURE_WRMSRNS },
{}
};
......
......@@ -21,17 +21,53 @@
#define FRED_STKLVL(vector, lvl) ((lvl) << (2 * (vector)))
DEFINE_PER_CPU(unsigned long, fred_rsp0);
EXPORT_PER_CPU_SYMBOL(fred_rsp0);
void cpu_init_fred_exceptions(void)
{
/* When FRED is enabled by default, remove this log message */
pr_info("Initialize FRED on CPU%d\n", smp_processor_id());
/*
* If a kernel event is delivered before a CPU goes to user level for
* the first time, its SS is NULL thus NULL is pushed into the SS field
* of the FRED stack frame. But before ERETS is executed, the CPU may
* context switch to another task and go to user level. Then when the
* CPU comes back to kernel mode, SS is changed to __KERNEL_DS. Later
* when ERETS is executed to return from the kernel event handler, a #GP
* fault is generated because SS doesn't match the SS saved in the FRED
* stack frame.
*
* Initialize SS to __KERNEL_DS when enabling FRED to avoid such #GPs.
*/
loadsegment(ss, __KERNEL_DS);
wrmsrl(MSR_IA32_FRED_CONFIG,
/* Reserve for CALL emulation */
FRED_CONFIG_REDZONE |
FRED_CONFIG_INT_STKLVL(0) |
FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
wrmsrl(MSR_IA32_FRED_STKLVLS, 0);
wrmsrl(MSR_IA32_FRED_RSP0, 0);
wrmsrl(MSR_IA32_FRED_RSP1, 0);
wrmsrl(MSR_IA32_FRED_RSP2, 0);
wrmsrl(MSR_IA32_FRED_RSP3, 0);
/* Enable FRED */
cr4_set_bits(X86_CR4_FRED);
/* Any further IDT use is a bug */
idt_invalidate();
/* Use int $0x80 for 32-bit system calls in FRED mode */
setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
/* Must be called after setup_cpu_entry_areas() */
void cpu_init_fred_rsps(void)
{
/*
* The purpose of separate stacks for NMI, #DB and #MC *in the kernel*
* (remember that user space faults are always taken on stack level 0)
......@@ -47,13 +83,4 @@ void cpu_init_fred_exceptions(void)
wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
/* Enable FRED */
cr4_set_bits(X86_CR4_FRED);
/* Any further IDT use is a bug */
idt_invalidate();
/* Use int $0x80 for 32-bit system calls in FRED mode */
setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
......@@ -1039,7 +1039,12 @@ void __init setup_arch(char **cmdline_p)
init_mem_mapping();
idt_setup_early_pf();
/*
* init_mem_mapping() relies on the early IDT page fault handling.
* Now either enable FRED or install the real page fault handler
* for 64-bit in the IDT.
*/
cpu_init_replace_early_idt();
/*
* Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
......
......@@ -246,7 +246,7 @@ static void notrace start_secondary(void *unused)
__flush_tlb_all();
}
cpu_init_exception_handling();
cpu_init_exception_handling(false);
/*
* Load the microcode before reaching the AP alive synchronization
......
......@@ -1451,34 +1451,8 @@ DEFINE_IDTENTRY_SW(iret_error)
}
#endif
/* Do not enable FRED by default yet. */
static bool enable_fred __ro_after_init = false;
#ifdef CONFIG_X86_FRED
static int __init fred_setup(char *str)
{
if (!str)
return -EINVAL;
if (!cpu_feature_enabled(X86_FEATURE_FRED))
return 0;
if (!strcmp(str, "on"))
enable_fred = true;
else if (!strcmp(str, "off"))
enable_fred = false;
else
pr_warn("invalid FRED option: 'fred=%s'\n", str);
return 0;
}
early_param("fred", fred_setup);
#endif
void __init trap_init(void)
{
if (cpu_feature_enabled(X86_FEATURE_FRED) && !enable_fred)
setup_clear_cpu_cap(X86_FEATURE_FRED);
/* Init cpu_entry_area before IST entries are set up */
setup_cpu_entry_areas();
......@@ -1486,7 +1460,7 @@ void __init trap_init(void)
sev_es_init_vc_handling();
/* Initialize TSS before setting up traps so ISTs work */
cpu_init_exception_handling();
cpu_init_exception_handling(true);
/* Setup traps as cpu_init() might #GP */
if (!cpu_feature_enabled(X86_FEATURE_FRED))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment