Commit cb84d11e authored by Dave Martin's avatar Dave Martin Committed by Catalin Marinas

arm64: neon: Remove support for nested or hardirq kernel-mode NEON

Support for kernel-mode NEON to be nested and/or used in hardirq
context adds significant complexity, and the benefits may be
marginal.  In practice, kernel-mode NEON is not used in hardirq
context, and is rarely used in softirq context (by certain mac80211
drivers).

This patch implements an arm64 may_use_simd() function to allow
clients to check whether kernel-mode NEON is usable in the current
context, and simplifies kernel_neon_{begin,end}() to handle only
saving of the task FPSIMD state (if any).  Without nesting, there
is no other state to save.

The partial fpsimd save/restore functions become redundant as a
result of these changes, so they are removed too.

The save/restore model is changed to operate directly on
task_struct without additional percpu storage.  This simplifies the
code and saves a bit of memory, but means that softirqs must now be
disabled when manipulating the task fpsimd state from task context:
correspondingly, preempt_{en,dis}sable() calls are upgraded to
local_bh_{en,dis}able() as appropriate.  fpsimd_thread_switch()
already runs with hardirqs disabled and so is already protected
from softirqs.

These changes should make it easier to support kernel-mode NEON in
the presence of the Scalable Vector extension in the future.
Signed-off-by: default avatarDave Martin <Dave.Martin@arm.com>
Reviewed-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent 4328825d
...@@ -41,16 +41,6 @@ struct fpsimd_state { ...@@ -41,16 +41,6 @@ struct fpsimd_state {
unsigned int cpu; unsigned int cpu;
}; };
/*
* Struct for stacking the bottom 'n' FP/SIMD registers.
*/
struct fpsimd_partial_state {
u32 fpsr;
u32 fpcr;
u32 num_regs;
__uint128_t vregs[32];
};
#if defined(__KERNEL__) && defined(CONFIG_COMPAT) #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */ /* Masks for extracting the FPSR and FPCR from the FPSCR */
...@@ -77,10 +67,6 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state); ...@@ -77,10 +67,6 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
u32 num_regs);
extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
/* For use by EFI runtime services calls only */ /* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void); extern void __efi_fpsimd_begin(void);
extern void __efi_fpsimd_end(void); extern void __efi_fpsimd_end(void);
......
...@@ -75,59 +75,3 @@ ...@@ -75,59 +75,3 @@
ldr w\tmpnr, [\state, #16 * 2 + 4] ldr w\tmpnr, [\state, #16 * 2 + 4]
fpsimd_restore_fpcr x\tmpnr, \state fpsimd_restore_fpcr x\tmpnr, \state
.endm .endm
.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
mrs x\tmpnr1, fpsr
str w\numnr, [\state, #8]
mrs x\tmpnr2, fpcr
stp w\tmpnr1, w\tmpnr2, [\state]
adr x\tmpnr1, 0f
add \state, \state, x\numnr, lsl #4
sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
br x\tmpnr1
stp q30, q31, [\state, #-16 * 30 - 16]
stp q28, q29, [\state, #-16 * 28 - 16]
stp q26, q27, [\state, #-16 * 26 - 16]
stp q24, q25, [\state, #-16 * 24 - 16]
stp q22, q23, [\state, #-16 * 22 - 16]
stp q20, q21, [\state, #-16 * 20 - 16]
stp q18, q19, [\state, #-16 * 18 - 16]
stp q16, q17, [\state, #-16 * 16 - 16]
stp q14, q15, [\state, #-16 * 14 - 16]
stp q12, q13, [\state, #-16 * 12 - 16]
stp q10, q11, [\state, #-16 * 10 - 16]
stp q8, q9, [\state, #-16 * 8 - 16]
stp q6, q7, [\state, #-16 * 6 - 16]
stp q4, q5, [\state, #-16 * 4 - 16]
stp q2, q3, [\state, #-16 * 2 - 16]
stp q0, q1, [\state, #-16 * 0 - 16]
0:
.endm
.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
ldp w\tmpnr1, w\tmpnr2, [\state]
msr fpsr, x\tmpnr1
fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
adr x\tmpnr1, 0f
ldr w\tmpnr2, [\state, #8]
add \state, \state, x\tmpnr2, lsl #4
sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
br x\tmpnr1
ldp q30, q31, [\state, #-16 * 30 - 16]
ldp q28, q29, [\state, #-16 * 28 - 16]
ldp q26, q27, [\state, #-16 * 26 - 16]
ldp q24, q25, [\state, #-16 * 24 - 16]
ldp q22, q23, [\state, #-16 * 22 - 16]
ldp q20, q21, [\state, #-16 * 20 - 16]
ldp q18, q19, [\state, #-16 * 18 - 16]
ldp q16, q17, [\state, #-16 * 16 - 16]
ldp q14, q15, [\state, #-16 * 14 - 16]
ldp q12, q13, [\state, #-16 * 12 - 16]
ldp q10, q11, [\state, #-16 * 10 - 16]
ldp q8, q9, [\state, #-16 * 8 - 16]
ldp q6, q7, [\state, #-16 * 6 - 16]
ldp q4, q5, [\state, #-16 * 4 - 16]
ldp q2, q3, [\state, #-16 * 2 - 16]
ldp q0, q1, [\state, #-16 * 0 - 16]
0:
.endm
...@@ -16,9 +16,7 @@ ...@@ -16,9 +16,7 @@
#define cpu_has_neon() system_supports_fpsimd() #define cpu_has_neon() system_supports_fpsimd()
#define kernel_neon_begin() kernel_neon_begin_partial(32) void kernel_neon_begin(void);
void kernel_neon_begin_partial(u32 num_regs);
void kernel_neon_end(void); void kernel_neon_end(void);
#endif /* ! __ASM_NEON_H */ #endif /* ! __ASM_NEON_H */
...@@ -9,15 +9,46 @@ ...@@ -9,15 +9,46 @@
#ifndef __ASM_SIMD_H #ifndef __ASM_SIMD_H
#define __ASM_SIMD_H #define __ASM_SIMD_H
#include <linux/compiler.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h> #include <linux/types.h>
#ifdef CONFIG_KERNEL_MODE_NEON
DECLARE_PER_CPU(bool, kernel_neon_busy);
/* /*
* may_use_simd - whether it is allowable at this time to issue SIMD * may_use_simd - whether it is allowable at this time to issue SIMD
* instructions or access the SIMD register file * instructions or access the SIMD register file
*
* Callers must not assume that the result remains true beyond the next
* preempt_enable() or return from softirq context.
*/ */
static __must_check inline bool may_use_simd(void) static __must_check inline bool may_use_simd(void)
{ {
return true; /*
* The raw_cpu_read() is racy if called with preemption enabled.
* This is not a bug: kernel_neon_busy is only set when
* preemption is disabled, so we cannot migrate to another CPU
* while it is set, nor can we migrate to a CPU where it is set.
* So, if we find it clear on some CPU then we're guaranteed to
* find it clear on any CPU we could migrate to.
*
* If we are in between kernel_neon_begin()...kernel_neon_end(),
* the flag will be set, but preemption is also disabled, so we
* can't migrate to another CPU and spuriously see it become
* false.
*/
return !in_irq() && !in_nmi() && !raw_cpu_read(kernel_neon_busy);
} }
#else /* ! CONFIG_KERNEL_MODE_NEON */
static __must_check inline bool may_use_simd(void) {
return false;
}
#endif /* ! CONFIG_KERNEL_MODE_NEON */
#endif #endif
...@@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state) ...@@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state)
fpsimd_restore x0, 8 fpsimd_restore x0, 8
ret ret
ENDPROC(fpsimd_load_state) ENDPROC(fpsimd_load_state)
#ifdef CONFIG_KERNEL_MODE_NEON
/*
* Save the bottom n FP registers.
*
* x0 - pointer to struct fpsimd_partial_state
*/
ENTRY(fpsimd_save_partial_state)
fpsimd_save_partial x0, 1, 8, 9
ret
ENDPROC(fpsimd_save_partial_state)
/*
* Load the bottom n FP registers.
*
* x0 - pointer to struct fpsimd_partial_state
*/
ENTRY(fpsimd_load_partial_state)
fpsimd_restore_partial x0, 8, 9
ret
ENDPROC(fpsimd_load_partial_state)
#endif
...@@ -17,18 +17,18 @@ ...@@ -17,18 +17,18 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <linux/bottom_half.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/cpu_pm.h> #include <linux/cpu_pm.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/percpu.h>
#include <linux/preempt.h> #include <linux/preempt.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/signal.h> #include <linux/signal.h>
#include <linux/hardirq.h>
#include <asm/fpsimd.h> #include <asm/fpsimd.h>
#include <asm/cputype.h> #include <asm/cputype.h>
#include <asm/neon.h>
#include <asm/simd.h> #include <asm/simd.h>
#define FPEXC_IOF (1 << 0) #define FPEXC_IOF (1 << 0)
...@@ -65,6 +65,13 @@ ...@@ -65,6 +65,13 @@
* CPU currently contain the most recent userland FPSIMD state of the current * CPU currently contain the most recent userland FPSIMD state of the current
* task. * task.
* *
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
* save the task's FPSIMD context back to task_struct from softirq context.
* To prevent this from racing with the manipulation of the task's FPSIMD state
* from task context and thereby corrupting the state, it is necessary to
* protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
* flag with local_bh_disable() unless softirqs are already masked.
*
* For a certain task, the sequence may look something like this: * For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_state.cpu field * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
* contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
...@@ -164,9 +171,14 @@ void fpsimd_flush_thread(void) ...@@ -164,9 +171,14 @@ void fpsimd_flush_thread(void)
{ {
if (!system_supports_fpsimd()) if (!system_supports_fpsimd())
return; return;
local_bh_disable();
memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_flush_task_state(current); fpsimd_flush_task_state(current);
set_thread_flag(TIF_FOREIGN_FPSTATE); set_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
} }
/* /*
...@@ -177,10 +189,13 @@ void fpsimd_preserve_current_state(void) ...@@ -177,10 +189,13 @@ void fpsimd_preserve_current_state(void)
{ {
if (!system_supports_fpsimd()) if (!system_supports_fpsimd())
return; return;
preempt_disable();
local_bh_disable();
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state); fpsimd_save_state(&current->thread.fpsimd_state);
preempt_enable();
local_bh_enable();
} }
/* /*
...@@ -192,7 +207,9 @@ void fpsimd_restore_current_state(void) ...@@ -192,7 +207,9 @@ void fpsimd_restore_current_state(void)
{ {
if (!system_supports_fpsimd()) if (!system_supports_fpsimd())
return; return;
preempt_disable();
local_bh_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state; struct fpsimd_state *st = &current->thread.fpsimd_state;
...@@ -200,7 +217,8 @@ void fpsimd_restore_current_state(void) ...@@ -200,7 +217,8 @@ void fpsimd_restore_current_state(void)
__this_cpu_write(fpsimd_last_state, st); __this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id(); st->cpu = smp_processor_id();
} }
preempt_enable();
local_bh_enable();
} }
/* /*
...@@ -212,7 +230,9 @@ void fpsimd_update_current_state(struct fpsimd_state *state) ...@@ -212,7 +230,9 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
{ {
if (!system_supports_fpsimd()) if (!system_supports_fpsimd())
return; return;
preempt_disable();
local_bh_disable();
fpsimd_load_state(state); fpsimd_load_state(state);
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state; struct fpsimd_state *st = &current->thread.fpsimd_state;
...@@ -220,7 +240,8 @@ void fpsimd_update_current_state(struct fpsimd_state *state) ...@@ -220,7 +240,8 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
__this_cpu_write(fpsimd_last_state, st); __this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id(); st->cpu = smp_processor_id();
} }
preempt_enable();
local_bh_enable();
} }
/* /*
...@@ -233,49 +254,69 @@ void fpsimd_flush_task_state(struct task_struct *t) ...@@ -233,49 +254,69 @@ void fpsimd_flush_task_state(struct task_struct *t)
#ifdef CONFIG_KERNEL_MODE_NEON #ifdef CONFIG_KERNEL_MODE_NEON
static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); DEFINE_PER_CPU(bool, kernel_neon_busy);
static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
/* /*
* Kernel-side NEON support functions * Kernel-side NEON support functions
*/ */
void kernel_neon_begin_partial(u32 num_regs)
/*
* kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
* context
*
* Must not be called unless may_use_simd() returns true.
* Task context in the FPSIMD registers is saved back to memory as necessary.
*
* A matching call to kernel_neon_end() must be made before returning from the
* calling context.
*
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
*/
void kernel_neon_begin(void)
{ {
if (WARN_ON(!system_supports_fpsimd())) if (WARN_ON(!system_supports_fpsimd()))
return; return;
if (in_interrupt()) {
struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
BUG_ON(num_regs > 32); BUG_ON(!may_use_simd());
fpsimd_save_partial_state(s, roundup(num_regs, 2));
} else { local_bh_disable();
/*
* Save the userland FPSIMD state if we have one and if we __this_cpu_write(kernel_neon_busy, true);
* haven't done so already. Clear fpsimd_last_state to indicate
* that there is no longer userland FPSIMD state in the /* Save unsaved task fpsimd state, if any: */
* registers. if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
*/
preempt_disable();
if (current->mm &&
!test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state); fpsimd_save_state(&current->thread.fpsimd_state);
this_cpu_write(fpsimd_last_state, NULL);
} /* Invalidate any task state remaining in the fpsimd regs: */
__this_cpu_write(fpsimd_last_state, NULL);
preempt_disable();
local_bh_enable();
} }
EXPORT_SYMBOL(kernel_neon_begin_partial); EXPORT_SYMBOL(kernel_neon_begin);
/*
* kernel_neon_end(): give the CPU FPSIMD registers back to the current task
*
* Must be called from a context in which kernel_neon_begin() was previously
* called, with no call to kernel_neon_end() in the meantime.
*
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
*/
void kernel_neon_end(void) void kernel_neon_end(void)
{ {
bool busy;
if (!system_supports_fpsimd()) if (!system_supports_fpsimd())
return; return;
if (in_interrupt()) {
struct fpsimd_partial_state *s = this_cpu_ptr( busy = __this_cpu_xchg(kernel_neon_busy, false);
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); WARN_ON(!busy); /* No matching kernel_neon_begin()? */
fpsimd_load_partial_state(s);
} else {
preempt_enable(); preempt_enable();
}
} }
EXPORT_SYMBOL(kernel_neon_end); EXPORT_SYMBOL(kernel_neon_end);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment