Commit c063a217 authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Peter Zijlstra

x86/percpu: Move current_top_of_stack next to current_task

Extend the struct pcpu_hot cacheline with current_top_of_stack;
another very frequently used value.
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.493038635@infradead.org
parent 7443b296
...@@ -1181,7 +1181,7 @@ SYM_CODE_START(asm_exc_nmi) ...@@ -1181,7 +1181,7 @@ SYM_CODE_START(asm_exc_nmi)
* is using the thread stack right now, so it's safe for us to use it. * is using the thread stack right now, so it's safe for us to use it.
*/ */
movl %esp, %ebx movl %esp, %ebx
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp
call exc_nmi call exc_nmi
movl %ebx, %esp movl %ebx, %esp
...@@ -1243,7 +1243,7 @@ SYM_CODE_START(rewind_stack_and_make_dead) ...@@ -1243,7 +1243,7 @@ SYM_CODE_START(rewind_stack_and_make_dead)
/* Prevent any naive code from trying to unwind to our caller. */ /* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp xorl %ebp, %ebp
movl PER_CPU_VAR(cpu_current_top_of_stack), %esi movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi
leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
call make_task_dead call make_task_dead
......
...@@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64) ...@@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64)
/* tss.sp2 is scratch space. */ /* tss.sp2 is scratch space. */
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR ANNOTATE_NOENDBR
...@@ -1209,7 +1209,7 @@ SYM_CODE_START(asm_exc_nmi) ...@@ -1209,7 +1209,7 @@ SYM_CODE_START(asm_exc_nmi)
FENCE_SWAPGS_USER_ENTRY FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8 UNWIND_HINT_IRET_REGS base=%rdx offset=8
pushq 5*8(%rdx) /* pt_regs->ss */ pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */ pushq 4*8(%rdx) /* pt_regs->rsp */
...@@ -1525,7 +1525,7 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) ...@@ -1525,7 +1525,7 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
/* Prevent any naive code from trying to unwind to our caller. */ /* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp xorl %ebp, %ebp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax
leaq -PTREGS_SIZE(%rax), %rsp leaq -PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_REGS UNWIND_HINT_REGS
......
...@@ -58,7 +58,7 @@ SYM_CODE_START(entry_SYSENTER_compat) ...@@ -58,7 +58,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
popq %rax popq %rax
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
/* Construct struct pt_regs on stack */ /* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */ pushq $__USER32_DS /* pt_regs->ss */
...@@ -191,7 +191,7 @@ SYM_CODE_START(entry_SYSCALL_compat) ...@@ -191,7 +191,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
/* Switch to the kernel stack */ /* Switch to the kernel stack */
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR ANNOTATE_NOENDBR
...@@ -332,7 +332,7 @@ SYM_CODE_START(entry_INT80_compat) ...@@ -332,7 +332,7 @@ SYM_CODE_START(entry_INT80_compat)
ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
movq %rsp, %rax movq %rsp, %rax
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
pushq 5*8(%rax) /* regs->ss */ pushq 5*8(%rax) /* regs->ss */
pushq 4*8(%rax) /* regs->rsp */ pushq 4*8(%rax) /* regs->rsp */
......
...@@ -17,6 +17,7 @@ struct pcpu_hot { ...@@ -17,6 +17,7 @@ struct pcpu_hot {
struct task_struct *current_task; struct task_struct *current_task;
int preempt_count; int preempt_count;
int cpu_number; int cpu_number;
unsigned long top_of_stack;
}; };
u8 pad[64]; u8 pad[64];
}; };
......
...@@ -426,8 +426,6 @@ struct irq_stack { ...@@ -426,8 +426,6 @@ struct irq_stack {
char stack[IRQ_STACK_SIZE]; char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE); } __aligned(IRQ_STACK_SIZE);
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
struct fixed_percpu_data { struct fixed_percpu_data {
/* /*
...@@ -566,7 +564,7 @@ static __always_inline unsigned long current_top_of_stack(void) ...@@ -566,7 +564,7 @@ static __always_inline unsigned long current_top_of_stack(void)
* and around vm86 mode and sp0 on x86_64 is special because of the * and around vm86 mode and sp0 on x86_64 is special because of the
* entry trampoline. * entry trampoline.
*/ */
return this_cpu_read_stable(cpu_current_top_of_stack); return this_cpu_read_stable(pcpu_hot.top_of_stack);
} }
static __always_inline bool on_thread_stack(void) static __always_inline bool on_thread_stack(void)
......
...@@ -109,6 +109,8 @@ static void __used common(void) ...@@ -109,6 +109,8 @@ static void __used common(void)
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
if (IS_ENABLED(CONFIG_KVM_INTEL)) { if (IS_ENABLED(CONFIG_KVM_INTEL)) {
BLANK(); BLANK();
OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
......
...@@ -2015,6 +2015,7 @@ __setup("clearcpuid=", setup_clearcpuid); ...@@ -2015,6 +2015,7 @@ __setup("clearcpuid=", setup_clearcpuid);
DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
.current_task = &init_task, .current_task = &init_task,
.preempt_count = INIT_PREEMPT_COUNT, .preempt_count = INIT_PREEMPT_COUNT,
.top_of_stack = TOP_OF_INIT_STACK,
}; };
EXPORT_PER_CPU_SYMBOL(pcpu_hot); EXPORT_PER_CPU_SYMBOL(pcpu_hot);
...@@ -2026,8 +2027,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); ...@@ -2026,8 +2027,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
DEFINE_PER_CPU(void *, hardirq_stack_ptr); DEFINE_PER_CPU(void *, hardirq_stack_ptr);
DEFINE_PER_CPU(bool, hardirq_stack_inuse); DEFINE_PER_CPU(bool, hardirq_stack_inuse);
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
static void wrmsrl_cstar(unsigned long val) static void wrmsrl_cstar(unsigned long val)
{ {
/* /*
...@@ -2078,15 +2077,6 @@ void syscall_init(void) ...@@ -2078,15 +2077,6 @@ void syscall_init(void)
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
/*
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
* the top of the kernel stack. Use an extra percpu variable to track the
* top of the kernel stack directly.
*/
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
#ifdef CONFIG_STACKPROTECTOR #ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, __stack_chk_guard); DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
......
...@@ -191,13 +191,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -191,13 +191,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
arch_end_context_switch(next_p); arch_end_context_switch(next_p);
/* /*
* Reload esp0 and cpu_current_top_of_stack. This changes * Reload esp0 and pcpu_hot.top_of_stack. This changes
* current_thread_info(). Refresh the SYSENTER configuration in * current_thread_info(). Refresh the SYSENTER configuration in
* case prev or next is vm86. * case prev or next is vm86.
*/ */
update_task_stack(next_p); update_task_stack(next_p);
refresh_sysenter_cs(next); refresh_sysenter_cs(next);
this_cpu_write(cpu_current_top_of_stack, this_cpu_write(pcpu_hot.top_of_stack,
(unsigned long)task_stack_page(next_p) + (unsigned long)task_stack_page(next_p) +
THREAD_SIZE); THREAD_SIZE);
......
...@@ -618,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -618,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Switch the PDA and FPU contexts. * Switch the PDA and FPU contexts.
*/ */
raw_cpu_write(pcpu_hot.current_task, next_p); raw_cpu_write(pcpu_hot.current_task, next_p);
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p));
switch_fpu_finish(); switch_fpu_finish();
......
...@@ -1056,7 +1056,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) ...@@ -1056,7 +1056,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */ /* Stack for startup_32 can be just as for start_secondary onwards */
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle);
#else #else
initial_gs = per_cpu_offset(cpu); initial_gs = per_cpu_offset(cpu);
#endif #endif
......
...@@ -851,7 +851,7 @@ DEFINE_IDTENTRY_RAW(exc_int3) ...@@ -851,7 +851,7 @@ DEFINE_IDTENTRY_RAW(exc_int3)
*/ */
asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs) asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
{ {
struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1;
if (regs != eregs) if (regs != eregs)
*regs = *eregs; *regs = *eregs;
return regs; return regs;
...@@ -869,7 +869,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r ...@@ -869,7 +869,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
* trust it and switch to the current kernel stack * trust it and switch to the current kernel stack
*/ */
if (ip_within_syscall_gap(regs)) { if (ip_within_syscall_gap(regs)) {
sp = this_cpu_read(cpu_current_top_of_stack); sp = this_cpu_read(pcpu_hot.top_of_stack);
goto sync; goto sync;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment