Commit af7167d6 authored by Mark Brown's avatar Mark Brown Committed by Catalin Marinas

arm64/sme: Implement streaming SVE context switching

When in streaming mode we need to save and restore the streaming mode
SVE register state rather than the regular SVE register state. This uses
the streaming mode vector length and omits FFR but is otherwise identical,
if TIF_SVE is enabled when we are in streaming mode then streaming mode
takes precedence.

This does not handle use of streaming SVE state with KVM, ptrace or
signals. This will be updated in further patches.
Signed-off-by: default avatarMark Brown <broonie@kernel.org>
Reviewed-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-15-broonie@kernel.orgSigned-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent b40c559b
...@@ -47,11 +47,21 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); ...@@ -47,11 +47,21 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
void *sve_state, unsigned int sve_vl, void *sve_state, unsigned int sve_vl,
u64 *svcr); unsigned int sme_vl, u64 *svcr);
extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_cpu_state(void); extern void fpsimd_save_and_flush_cpu_state(void);
static inline bool thread_sm_enabled(struct thread_struct *thread)
{
return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK);
}
static inline bool thread_za_enabled(struct thread_struct *thread)
{
return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_ZA_MASK);
}
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */ /* Maximum VL that SVE/SME VL-agnostic software can transparently support */
#define VL_ARCH_MAX 0x100 #define VL_ARCH_MAX 0x100
...@@ -63,7 +73,14 @@ static inline size_t sve_ffr_offset(int vl) ...@@ -63,7 +73,14 @@ static inline size_t sve_ffr_offset(int vl)
static inline void *sve_pffr(struct thread_struct *thread) static inline void *sve_pffr(struct thread_struct *thread)
{ {
return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread)); unsigned int vl;
if (system_supports_sme() && thread_sm_enabled(thread))
vl = thread_get_sme_vl(thread);
else
vl = thread_get_sve_vl(thread);
return (char *)thread->sve_state + sve_ffr_offset(vl);
} }
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
...@@ -72,6 +89,7 @@ extern void sve_load_state(void const *state, u32 const *pfpsr, ...@@ -72,6 +89,7 @@ extern void sve_load_state(void const *state, u32 const *pfpsr,
extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void); extern unsigned int sve_get_vl(void);
extern void sve_set_vq(unsigned long vq_minus_1); extern void sve_set_vq(unsigned long vq_minus_1);
extern void sme_set_vq(unsigned long vq_minus_1);
struct arm64_cpu_capabilities; struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
......
...@@ -262,6 +262,17 @@ ...@@ -262,6 +262,17 @@
921: 921:
.endm .endm
/* Update SMCR_EL1.LEN with the new VQ */
.macro sme_load_vq xvqminus1, xtmp, xtmp2
mrs_s \xtmp, SYS_SMCR_EL1
bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
orr \xtmp2, \xtmp2, \xvqminus1
cmp \xtmp2, \xtmp
b.eq 921f
msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
921:
.endm
/* Preserve the first 128-bits of Znz and zero the rest. */ /* Preserve the first 128-bits of Znz and zero the rest. */
.macro _sve_flush_z nz .macro _sve_flush_z nz
_sve_check_zreg \nz _sve_check_zreg \nz
......
...@@ -184,6 +184,11 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread) ...@@ -184,6 +184,11 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread)
return thread_get_vl(thread, ARM64_VEC_SVE); return thread_get_vl(thread, ARM64_VEC_SVE);
} }
static inline unsigned int thread_get_sme_vl(struct thread_struct *thread)
{
return thread_get_vl(thread, ARM64_VEC_SME);
}
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); unsigned int task_get_vl(const struct task_struct *task, enum vec_type type);
void task_set_vl(struct task_struct *task, enum vec_type type, void task_set_vl(struct task_struct *task, enum vec_type type,
unsigned long vl); unsigned long vl);
...@@ -197,6 +202,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task) ...@@ -197,6 +202,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task)
return task_get_vl(task, ARM64_VEC_SVE); return task_get_vl(task, ARM64_VEC_SVE);
} }
static inline unsigned int task_get_sme_vl(const struct task_struct *task)
{
return task_get_vl(task, ARM64_VEC_SME);
}
static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl) static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl)
{ {
task_set_vl(task, ARM64_VEC_SVE, vl); task_set_vl(task, ARM64_VEC_SVE, vl);
......
...@@ -94,4 +94,9 @@ SYM_FUNC_START(sme_get_vl) ...@@ -94,4 +94,9 @@ SYM_FUNC_START(sme_get_vl)
ret ret
SYM_FUNC_END(sme_get_vl) SYM_FUNC_END(sme_get_vl)
SYM_FUNC_START(sme_set_vq)
sme_load_vq x0, x1, x2
ret
SYM_FUNC_END(sme_set_vq)
#endif /* CONFIG_ARM64_SME */ #endif /* CONFIG_ARM64_SME */
...@@ -123,6 +123,7 @@ struct fpsimd_last_state_struct { ...@@ -123,6 +123,7 @@ struct fpsimd_last_state_struct {
void *sve_state; void *sve_state;
u64 *svcr; u64 *svcr;
unsigned int sve_vl; unsigned int sve_vl;
unsigned int sme_vl;
}; };
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
...@@ -301,17 +302,28 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, ...@@ -301,17 +302,28 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
task->thread.vl_onexec[type] = vl; task->thread.vl_onexec[type] = vl;
} }
/*
* TIF_SME controls whether a task can use SME without trapping while
* in userspace, when TIF_SME is set then we must have storage
* alocated in sve_state and za_state to store the contents of both ZA
* and the SVE registers for both streaming and non-streaming modes.
*
* If both SVCR.ZA and SVCR.SM are disabled then at any point we
* may disable TIF_SME and reenable traps.
*/
/* /*
* TIF_SVE controls whether a task can use SVE without trapping while * TIF_SVE controls whether a task can use SVE without trapping while
* in userspace, and also the way a task's FPSIMD/SVE state is stored * in userspace, and also (together with TIF_SME) the way a task's
* in thread_struct. * FPSIMD/SVE state is stored in thread_struct.
* *
* The kernel uses this flag to track whether a user task is actively * The kernel uses this flag to track whether a user task is actively
* using SVE, and therefore whether full SVE register state needs to * using SVE, and therefore whether full SVE register state needs to
* be tracked. If not, the cheaper FPSIMD context handling code can * be tracked. If not, the cheaper FPSIMD context handling code can
* be used instead of the more costly SVE equivalents. * be used instead of the more costly SVE equivalents.
* *
* * TIF_SVE set: * * TIF_SVE or SVCR.SM set:
* *
* The task can execute SVE instructions while in userspace without * The task can execute SVE instructions while in userspace without
* trapping to the kernel. * trapping to the kernel.
...@@ -319,7 +331,8 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, ...@@ -319,7 +331,8 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
* When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
* corresponding Zn), P0-P15 and FFR are encoded in in * corresponding Zn), P0-P15 and FFR are encoded in in
* task->thread.sve_state, formatted appropriately for vector * task->thread.sve_state, formatted appropriately for vector
* length task->thread.sve_vl. * length task->thread.sve_vl or, if SVCR.SM is set,
* task->thread.sme_vl.
* *
* task->thread.sve_state must point to a valid buffer at least * task->thread.sve_state must point to a valid buffer at least
* sve_state_size(task) bytes in size. * sve_state_size(task) bytes in size.
...@@ -357,19 +370,40 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, ...@@ -357,19 +370,40 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
*/ */
static void task_fpsimd_load(void) static void task_fpsimd_load(void)
{ {
bool restore_sve_regs = false;
bool restore_ffr;
WARN_ON(!system_supports_fpsimd()); WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context()); WARN_ON(!have_cpu_fpsimd_context());
if (IS_ENABLED(CONFIG_ARM64_SME) && test_thread_flag(TIF_SME)) /* Check if we should restore SVE first */
write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0);
if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
restore_sve_regs = true;
restore_ffr = true;
}
/* Restore SME, override SVE register configuration if needed */
if (system_supports_sme()) {
unsigned long sme_vl = task_get_sme_vl(current);
if (test_thread_flag(TIF_SME))
sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0);
if (thread_sm_enabled(&current->thread)) {
restore_sve_regs = true;
restore_ffr = system_supports_fa64();
}
}
if (restore_sve_regs)
sve_load_state(sve_pffr(&current->thread), sve_load_state(sve_pffr(&current->thread),
&current->thread.uw.fpsimd_state.fpsr, true); &current->thread.uw.fpsimd_state.fpsr,
} else { restore_ffr);
else
fpsimd_load_state(&current->thread.uw.fpsimd_state); fpsimd_load_state(&current->thread.uw.fpsimd_state);
}
} }
/* /*
...@@ -387,6 +421,9 @@ static void fpsimd_save(void) ...@@ -387,6 +421,9 @@ static void fpsimd_save(void)
struct fpsimd_last_state_struct const *last = struct fpsimd_last_state_struct const *last =
this_cpu_ptr(&fpsimd_last_state); this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
bool save_sve_regs = false;
bool save_ffr;
unsigned int vl;
WARN_ON(!system_supports_fpsimd()); WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context()); WARN_ON(!have_cpu_fpsimd_context());
...@@ -394,15 +431,33 @@ static void fpsimd_save(void) ...@@ -394,15 +431,33 @@ static void fpsimd_save(void)
if (test_thread_flag(TIF_FOREIGN_FPSTATE)) if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return; return;
if (IS_ENABLED(CONFIG_ARM64_SME) && if (test_thread_flag(TIF_SVE)) {
test_thread_flag(TIF_SME)) { save_sve_regs = true;
save_ffr = true;
vl = last->sve_vl;
}
if (system_supports_sme()) {
u64 *svcr = last->svcr; u64 *svcr = last->svcr;
*svcr = read_sysreg_s(SYS_SVCR_EL0); *svcr = read_sysreg_s(SYS_SVCR_EL0);
if (thread_za_enabled(&current->thread)) {
/* ZA state managment is not implemented yet */
force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
return;
} }
if (IS_ENABLED(CONFIG_ARM64_SVE) && /* If we are in streaming mode override regular SVE. */
test_thread_flag(TIF_SVE)) { if (*svcr & SYS_SVCR_EL0_SM_MASK) {
if (WARN_ON(sve_get_vl() != last->sve_vl)) { save_sve_regs = true;
save_ffr = system_supports_fa64();
vl = last->sme_vl;
}
}
if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
/* Get the configured VL from RDVL, will account for SM */
if (WARN_ON(sve_get_vl() != vl)) {
/* /*
* Can't save the user regs, so current would * Can't save the user regs, so current would
* re-enter user with corrupt state. * re-enter user with corrupt state.
...@@ -413,8 +468,8 @@ static void fpsimd_save(void) ...@@ -413,8 +468,8 @@ static void fpsimd_save(void)
} }
sve_save_state((char *)last->sve_state + sve_save_state((char *)last->sve_state +
sve_ffr_offset(last->sve_vl), sve_ffr_offset(vl),
&last->st->fpsr, true); &last->st->fpsr, save_ffr);
} else { } else {
fpsimd_save_state(last->st); fpsimd_save_state(last->st);
} }
...@@ -619,7 +674,14 @@ static void sve_to_fpsimd(struct task_struct *task) ...@@ -619,7 +674,14 @@ static void sve_to_fpsimd(struct task_struct *task)
*/ */
static size_t sve_state_size(struct task_struct const *task) static size_t sve_state_size(struct task_struct const *task)
{ {
return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task))); unsigned int vl = 0;
if (system_supports_sve())
vl = task_get_sve_vl(task);
if (system_supports_sme())
vl = max(vl, task_get_sme_vl(task));
return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
} }
/* /*
...@@ -748,7 +810,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, ...@@ -748,7 +810,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
} }
fpsimd_flush_task_state(task); fpsimd_flush_task_state(task);
if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
thread_sm_enabled(&task->thread))
sve_to_fpsimd(task); sve_to_fpsimd(task);
if (system_supports_sme() && type == ARM64_VEC_SME) if (system_supports_sme() && type == ARM64_VEC_SME)
...@@ -1375,6 +1438,9 @@ void fpsimd_flush_thread(void) ...@@ -1375,6 +1438,9 @@ void fpsimd_flush_thread(void)
fpsimd_flush_thread_vl(ARM64_VEC_SVE); fpsimd_flush_thread_vl(ARM64_VEC_SVE);
} }
if (system_supports_sme())
fpsimd_flush_thread_vl(ARM64_VEC_SME);
put_cpu_fpsimd_context(); put_cpu_fpsimd_context();
} }
...@@ -1418,6 +1484,7 @@ static void fpsimd_bind_task_to_cpu(void) ...@@ -1418,6 +1484,7 @@ static void fpsimd_bind_task_to_cpu(void)
last->st = &current->thread.uw.fpsimd_state; last->st = &current->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state; last->sve_state = current->thread.sve_state;
last->sve_vl = task_get_sve_vl(current); last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = &current->thread.svcr; last->svcr = &current->thread.svcr;
current->thread.fpsimd_cpu = smp_processor_id(); current->thread.fpsimd_cpu = smp_processor_id();
...@@ -1433,7 +1500,8 @@ static void fpsimd_bind_task_to_cpu(void) ...@@ -1433,7 +1500,8 @@ static void fpsimd_bind_task_to_cpu(void)
} }
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
unsigned int sve_vl, u64 *svcr) unsigned int sve_vl, unsigned int sme_vl,
u64 *svcr)
{ {
struct fpsimd_last_state_struct *last = struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state); this_cpu_ptr(&fpsimd_last_state);
...@@ -1445,6 +1513,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, ...@@ -1445,6 +1513,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
last->svcr = svcr; last->svcr = svcr;
last->sve_state = sve_state; last->sve_state = sve_state;
last->sve_vl = sve_vl; last->sve_vl = sve_vl;
last->sme_vl = sme_vl;
} }
/* /*
......
...@@ -116,7 +116,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) ...@@ -116,7 +116,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
vcpu->arch.sve_state, vcpu->arch.sve_state,
vcpu->arch.sve_max_vl, vcpu->arch.sve_max_vl,
NULL); 0, NULL);
clear_thread_flag(TIF_FOREIGN_FPSTATE); clear_thread_flag(TIF_FOREIGN_FPSTATE);
update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment