Commit 61650023 authored by Huacai Chen's avatar Huacai Chen

LoongArch: Add vector extensions support

Add LoongArch's vector extensions support, which including 128bit LSX
(i.e., Loongson SIMD eXtension) and 256bit LASX (i.e., Loongson Advanced
SIMD eXtension).

Linux kernel doesn't use vector itself, it only handle exceptions and
context save/restore. So it only needs a subset of these instructions:

* Vector load/store:   vld vst vldx vstx xvld xvst xvldx xvstx
* 8bit-elements move:  vpickve2gr.b xvpickve2gr.b vinsgr2vr.b xvinsgr2vr.b
* 16bit-elements move: vpickve2gr.h xvpickve2gr.h vinsgr2vr.h xvinsgr2vr.h
* 32bit-elements move: vpickve2gr.w xvpickve2gr.w vinsgr2vr.w xvinsgr2vr.w
* 64bit-elements move: vpickve2gr.d xvpickve2gr.d vinsgr2vr.d xvinsgr2vr.d
* Elements permute:    vpermi.w vpermi.d xvpermi.w xvpermi.d xvpermi.q

Introduce AS_HAS_LSX_EXTENSION and AS_HAS_LASX_EXTENSION to avoid non-
vector toolchains complains unsupported instructions.
Signed-off-by: default avatarHuacai Chen <chenhuacai@loongson.cn>
parent aa5e65dc
......@@ -164,14 +164,6 @@ config 32BIT
config 64BIT
def_bool y
config CPU_HAS_FPU
bool
default y
config CPU_HAS_PREFETCH
bool
default y
config GENERIC_BUG
def_bool y
depends on BUG
......@@ -247,6 +239,12 @@ config AS_HAS_EXPLICIT_RELOCS
config AS_HAS_FCSR_CLASS
def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0)
config AS_HAS_LSX_EXTENSION
def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0)
config AS_HAS_LASX_EXTENSION
def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
menu "Kernel type and options"
source "kernel/Kconfig.hz"
......@@ -487,6 +485,43 @@ config ARCH_STRICT_ALIGN
to run kernel only on systems with h/w unaligned access support in
order to optimise for performance.
config CPU_HAS_FPU
bool
default y
config CPU_HAS_LSX
bool "Support for the Loongson SIMD Extension"
depends on AS_HAS_LSX_EXTENSION
help
Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers
and a set of SIMD instructions to operate on them. When this option
is enabled the kernel will support allocating & switching LSX
vector register contexts. If you know that your kernel will only be
running on CPUs which do not support LSX or that your userland will
not be making use of it then you may wish to say N here to reduce
the size & complexity of your kernel.
If unsure, say Y.
config CPU_HAS_LASX
bool "Support for the Loongson Advanced SIMD Extension"
depends on CPU_HAS_LSX
depends on AS_HAS_LASX_EXTENSION
help
Loongson Advanced SIMD Extension (LASX) introduces 256 bit wide vector
registers and a set of SIMD instructions to operate on them. When this
option is enabled the kernel will support allocating & switching LASX
vector register contexts. If you know that your kernel will only be
running on CPUs which do not support LASX or that your userland will
not be making use of it then you may wish to say N here to reduce
the size & complexity of your kernel.
If unsure, say Y.
config CPU_HAS_PREFETCH
bool
default y
config KEXEC
bool "Kexec system call"
select KEXEC_CORE
......
This diff is collapsed.
......@@ -28,6 +28,26 @@ extern void _init_fpu(unsigned int);
extern void _save_fp(struct loongarch_fpu *);
extern void _restore_fp(struct loongarch_fpu *);
extern void _save_lsx(struct loongarch_fpu *fpu);
extern void _restore_lsx(struct loongarch_fpu *fpu);
extern void _init_lsx_upper(void);
extern void _restore_lsx_upper(struct loongarch_fpu *fpu);
extern void _save_lasx(struct loongarch_fpu *fpu);
extern void _restore_lasx(struct loongarch_fpu *fpu);
extern void _init_lasx_upper(void);
extern void _restore_lasx_upper(struct loongarch_fpu *fpu);
static inline void enable_lsx(void);
static inline void disable_lsx(void);
static inline void save_lsx(struct task_struct *t);
static inline void restore_lsx(struct task_struct *t);
static inline void enable_lasx(void);
static inline void disable_lasx(void);
static inline void save_lasx(struct task_struct *t);
static inline void restore_lasx(struct task_struct *t);
/*
* Mask the FCSR Cause bits according to the Enable bits, observing
* that Unimplemented is always enabled.
......@@ -44,6 +64,29 @@ static inline int is_fp_enabled(void)
1 : 0;
}
static inline int is_lsx_enabled(void)
{
if (!cpu_has_lsx)
return 0;
return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ?
1 : 0;
}
static inline int is_lasx_enabled(void)
{
if (!cpu_has_lasx)
return 0;
return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ?
1 : 0;
}
static inline int is_simd_enabled(void)
{
return is_lsx_enabled() | is_lasx_enabled();
}
#define enable_fpu() set_csr_euen(CSR_EUEN_FPEN)
#define disable_fpu() clear_csr_euen(CSR_EUEN_FPEN)
......@@ -81,9 +124,22 @@ static inline void own_fpu(int restore)
static inline void lose_fpu_inatomic(int save, struct task_struct *tsk)
{
if (is_fpu_owner()) {
if (save)
_save_fp(&tsk->thread.fpu);
disable_fpu();
if (!is_simd_enabled()) {
if (save)
_save_fp(&tsk->thread.fpu);
disable_fpu();
} else {
if (save) {
if (!is_lasx_enabled())
save_lsx(tsk);
else
save_lasx(tsk);
}
disable_fpu();
disable_lsx();
disable_lasx();
clear_tsk_thread_flag(tsk, TIF_USEDSIMD);
}
clear_tsk_thread_flag(tsk, TIF_USEDFPU);
}
KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
......@@ -129,4 +185,127 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
return tsk->thread.fpu.fpr;
}
static inline int is_simd_owner(void)
{
return test_thread_flag(TIF_USEDSIMD);
}
#ifdef CONFIG_CPU_HAS_LSX
static inline void enable_lsx(void)
{
if (cpu_has_lsx)
csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
}
static inline void disable_lsx(void)
{
if (cpu_has_lsx)
csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
}
static inline void save_lsx(struct task_struct *t)
{
if (cpu_has_lsx)
_save_lsx(&t->thread.fpu);
}
static inline void restore_lsx(struct task_struct *t)
{
if (cpu_has_lsx)
_restore_lsx(&t->thread.fpu);
}
static inline void init_lsx_upper(void)
{
/*
* Check cpu_has_lsx only if it's a constant. This will allow the
* compiler to optimise out code for CPUs without LSX without adding
* an extra redundant check for CPUs with LSX.
*/
if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
return;
_init_lsx_upper();
}
static inline void restore_lsx_upper(struct task_struct *t)
{
if (cpu_has_lsx)
_restore_lsx_upper(&t->thread.fpu);
}
#else
static inline void enable_lsx(void) {}
static inline void disable_lsx(void) {}
static inline void save_lsx(struct task_struct *t) {}
static inline void restore_lsx(struct task_struct *t) {}
static inline void init_lsx_upper(void) {}
static inline void restore_lsx_upper(struct task_struct *t) {}
#endif
#ifdef CONFIG_CPU_HAS_LASX
static inline void enable_lasx(void)
{
if (cpu_has_lasx)
csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
}
static inline void disable_lasx(void)
{
if (cpu_has_lasx)
csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
}
static inline void save_lasx(struct task_struct *t)
{
if (cpu_has_lasx)
_save_lasx(&t->thread.fpu);
}
static inline void restore_lasx(struct task_struct *t)
{
if (cpu_has_lasx)
_restore_lasx(&t->thread.fpu);
}
static inline void init_lasx_upper(void)
{
if (cpu_has_lasx)
_init_lasx_upper();
}
static inline void restore_lasx_upper(struct task_struct *t)
{
if (cpu_has_lasx)
_restore_lasx_upper(&t->thread.fpu);
}
#else
static inline void enable_lasx(void) {}
static inline void disable_lasx(void) {}
static inline void save_lasx(struct task_struct *t) {}
static inline void restore_lasx(struct task_struct *t) {}
static inline void init_lasx_upper(void) {}
static inline void restore_lasx_upper(struct task_struct *t) {}
#endif
static inline int thread_lsx_context_live(void)
{
if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
return 0;
return test_thread_flag(TIF_LSX_CTX_LIVE);
}
static inline int thread_lasx_context_live(void)
{
if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx)
return 0;
return test_thread_flag(TIF_LASX_CTX_LIVE);
}
#endif /* _ASM_FPU_H */
......@@ -41,9 +41,19 @@ struct user_pt_regs {
} __attribute__((aligned(8)));
struct user_fp_state {
uint64_t fpr[32];
uint64_t fcc;
uint32_t fcsr;
uint64_t fpr[32];
uint64_t fcc;
uint32_t fcsr;
};
struct user_lsx_state {
/* 32 registers, 128 bits width per register. */
uint64_t vregs[32*2];
};
struct user_lasx_state {
/* 32 registers, 256 bits width per register. */
uint64_t vregs[32*4];
};
struct user_watch_state {
......
......@@ -41,4 +41,22 @@ struct fpu_context {
__u32 fcsr;
};
/* LSX context */
#define LSX_CTX_MAGIC 0x53580001
#define LSX_CTX_ALIGN 16
struct lsx_context {
__u64 regs[2*32];
__u64 fcc;
__u32 fcsr;
};
/* LASX context */
#define LASX_CTX_MAGIC 0x41535801
#define LASX_CTX_ALIGN 32
struct lasx_context {
__u64 regs[4*32];
__u64 fcc;
__u32 fcsr;
};
#endif /* _UAPI_ASM_SIGCONTEXT_H */
......@@ -116,6 +116,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_FPU;
elf_hwcap |= HWCAP_LOONGARCH_FPU;
}
#ifdef CONFIG_CPU_HAS_LSX
if (config & CPUCFG2_LSX) {
c->options |= LOONGARCH_CPU_LSX;
elf_hwcap |= HWCAP_LOONGARCH_LSX;
}
#endif
#ifdef CONFIG_CPU_HAS_LASX
if (config & CPUCFG2_LASX) {
c->options |= LOONGARCH_CPU_LASX;
elf_hwcap |= HWCAP_LOONGARCH_LASX;
}
#endif
if (config & CPUCFG2_COMPLEX) {
c->options |= LOONGARCH_CPU_COMPLEX;
elf_hwcap |= HWCAP_LOONGARCH_COMPLEX;
......
......@@ -145,6 +145,154 @@
movgr2fcsr fcsr0, \tmp0
.endm
.macro sc_save_lsx base
#ifdef CONFIG_CPU_HAS_LSX
EX vst $vr0, \base, (0 * LSX_REG_WIDTH)
EX vst $vr1, \base, (1 * LSX_REG_WIDTH)
EX vst $vr2, \base, (2 * LSX_REG_WIDTH)
EX vst $vr3, \base, (3 * LSX_REG_WIDTH)
EX vst $vr4, \base, (4 * LSX_REG_WIDTH)
EX vst $vr5, \base, (5 * LSX_REG_WIDTH)
EX vst $vr6, \base, (6 * LSX_REG_WIDTH)
EX vst $vr7, \base, (7 * LSX_REG_WIDTH)
EX vst $vr8, \base, (8 * LSX_REG_WIDTH)
EX vst $vr9, \base, (9 * LSX_REG_WIDTH)
EX vst $vr10, \base, (10 * LSX_REG_WIDTH)
EX vst $vr11, \base, (11 * LSX_REG_WIDTH)
EX vst $vr12, \base, (12 * LSX_REG_WIDTH)
EX vst $vr13, \base, (13 * LSX_REG_WIDTH)
EX vst $vr14, \base, (14 * LSX_REG_WIDTH)
EX vst $vr15, \base, (15 * LSX_REG_WIDTH)
EX vst $vr16, \base, (16 * LSX_REG_WIDTH)
EX vst $vr17, \base, (17 * LSX_REG_WIDTH)
EX vst $vr18, \base, (18 * LSX_REG_WIDTH)
EX vst $vr19, \base, (19 * LSX_REG_WIDTH)
EX vst $vr20, \base, (20 * LSX_REG_WIDTH)
EX vst $vr21, \base, (21 * LSX_REG_WIDTH)
EX vst $vr22, \base, (22 * LSX_REG_WIDTH)
EX vst $vr23, \base, (23 * LSX_REG_WIDTH)
EX vst $vr24, \base, (24 * LSX_REG_WIDTH)
EX vst $vr25, \base, (25 * LSX_REG_WIDTH)
EX vst $vr26, \base, (26 * LSX_REG_WIDTH)
EX vst $vr27, \base, (27 * LSX_REG_WIDTH)
EX vst $vr28, \base, (28 * LSX_REG_WIDTH)
EX vst $vr29, \base, (29 * LSX_REG_WIDTH)
EX vst $vr30, \base, (30 * LSX_REG_WIDTH)
EX vst $vr31, \base, (31 * LSX_REG_WIDTH)
#endif
.endm
.macro sc_restore_lsx base
#ifdef CONFIG_CPU_HAS_LSX
EX vld $vr0, \base, (0 * LSX_REG_WIDTH)
EX vld $vr1, \base, (1 * LSX_REG_WIDTH)
EX vld $vr2, \base, (2 * LSX_REG_WIDTH)
EX vld $vr3, \base, (3 * LSX_REG_WIDTH)
EX vld $vr4, \base, (4 * LSX_REG_WIDTH)
EX vld $vr5, \base, (5 * LSX_REG_WIDTH)
EX vld $vr6, \base, (6 * LSX_REG_WIDTH)
EX vld $vr7, \base, (7 * LSX_REG_WIDTH)
EX vld $vr8, \base, (8 * LSX_REG_WIDTH)
EX vld $vr9, \base, (9 * LSX_REG_WIDTH)
EX vld $vr10, \base, (10 * LSX_REG_WIDTH)
EX vld $vr11, \base, (11 * LSX_REG_WIDTH)
EX vld $vr12, \base, (12 * LSX_REG_WIDTH)
EX vld $vr13, \base, (13 * LSX_REG_WIDTH)
EX vld $vr14, \base, (14 * LSX_REG_WIDTH)
EX vld $vr15, \base, (15 * LSX_REG_WIDTH)
EX vld $vr16, \base, (16 * LSX_REG_WIDTH)
EX vld $vr17, \base, (17 * LSX_REG_WIDTH)
EX vld $vr18, \base, (18 * LSX_REG_WIDTH)
EX vld $vr19, \base, (19 * LSX_REG_WIDTH)
EX vld $vr20, \base, (20 * LSX_REG_WIDTH)
EX vld $vr21, \base, (21 * LSX_REG_WIDTH)
EX vld $vr22, \base, (22 * LSX_REG_WIDTH)
EX vld $vr23, \base, (23 * LSX_REG_WIDTH)
EX vld $vr24, \base, (24 * LSX_REG_WIDTH)
EX vld $vr25, \base, (25 * LSX_REG_WIDTH)
EX vld $vr26, \base, (26 * LSX_REG_WIDTH)
EX vld $vr27, \base, (27 * LSX_REG_WIDTH)
EX vld $vr28, \base, (28 * LSX_REG_WIDTH)
EX vld $vr29, \base, (29 * LSX_REG_WIDTH)
EX vld $vr30, \base, (30 * LSX_REG_WIDTH)
EX vld $vr31, \base, (31 * LSX_REG_WIDTH)
#endif
.endm
.macro sc_save_lasx base
#ifdef CONFIG_CPU_HAS_LASX
EX xvst $xr0, \base, (0 * LASX_REG_WIDTH)
EX xvst $xr1, \base, (1 * LASX_REG_WIDTH)
EX xvst $xr2, \base, (2 * LASX_REG_WIDTH)
EX xvst $xr3, \base, (3 * LASX_REG_WIDTH)
EX xvst $xr4, \base, (4 * LASX_REG_WIDTH)
EX xvst $xr5, \base, (5 * LASX_REG_WIDTH)
EX xvst $xr6, \base, (6 * LASX_REG_WIDTH)
EX xvst $xr7, \base, (7 * LASX_REG_WIDTH)
EX xvst $xr8, \base, (8 * LASX_REG_WIDTH)
EX xvst $xr9, \base, (9 * LASX_REG_WIDTH)
EX xvst $xr10, \base, (10 * LASX_REG_WIDTH)
EX xvst $xr11, \base, (11 * LASX_REG_WIDTH)
EX xvst $xr12, \base, (12 * LASX_REG_WIDTH)
EX xvst $xr13, \base, (13 * LASX_REG_WIDTH)
EX xvst $xr14, \base, (14 * LASX_REG_WIDTH)
EX xvst $xr15, \base, (15 * LASX_REG_WIDTH)
EX xvst $xr16, \base, (16 * LASX_REG_WIDTH)
EX xvst $xr17, \base, (17 * LASX_REG_WIDTH)
EX xvst $xr18, \base, (18 * LASX_REG_WIDTH)
EX xvst $xr19, \base, (19 * LASX_REG_WIDTH)
EX xvst $xr20, \base, (20 * LASX_REG_WIDTH)
EX xvst $xr21, \base, (21 * LASX_REG_WIDTH)
EX xvst $xr22, \base, (22 * LASX_REG_WIDTH)
EX xvst $xr23, \base, (23 * LASX_REG_WIDTH)
EX xvst $xr24, \base, (24 * LASX_REG_WIDTH)
EX xvst $xr25, \base, (25 * LASX_REG_WIDTH)
EX xvst $xr26, \base, (26 * LASX_REG_WIDTH)
EX xvst $xr27, \base, (27 * LASX_REG_WIDTH)
EX xvst $xr28, \base, (28 * LASX_REG_WIDTH)
EX xvst $xr29, \base, (29 * LASX_REG_WIDTH)
EX xvst $xr30, \base, (30 * LASX_REG_WIDTH)
EX xvst $xr31, \base, (31 * LASX_REG_WIDTH)
#endif
.endm
.macro sc_restore_lasx base
#ifdef CONFIG_CPU_HAS_LASX
EX xvld $xr0, \base, (0 * LASX_REG_WIDTH)
EX xvld $xr1, \base, (1 * LASX_REG_WIDTH)
EX xvld $xr2, \base, (2 * LASX_REG_WIDTH)
EX xvld $xr3, \base, (3 * LASX_REG_WIDTH)
EX xvld $xr4, \base, (4 * LASX_REG_WIDTH)
EX xvld $xr5, \base, (5 * LASX_REG_WIDTH)
EX xvld $xr6, \base, (6 * LASX_REG_WIDTH)
EX xvld $xr7, \base, (7 * LASX_REG_WIDTH)
EX xvld $xr8, \base, (8 * LASX_REG_WIDTH)
EX xvld $xr9, \base, (9 * LASX_REG_WIDTH)
EX xvld $xr10, \base, (10 * LASX_REG_WIDTH)
EX xvld $xr11, \base, (11 * LASX_REG_WIDTH)
EX xvld $xr12, \base, (12 * LASX_REG_WIDTH)
EX xvld $xr13, \base, (13 * LASX_REG_WIDTH)
EX xvld $xr14, \base, (14 * LASX_REG_WIDTH)
EX xvld $xr15, \base, (15 * LASX_REG_WIDTH)
EX xvld $xr16, \base, (16 * LASX_REG_WIDTH)
EX xvld $xr17, \base, (17 * LASX_REG_WIDTH)
EX xvld $xr18, \base, (18 * LASX_REG_WIDTH)
EX xvld $xr19, \base, (19 * LASX_REG_WIDTH)
EX xvld $xr20, \base, (20 * LASX_REG_WIDTH)
EX xvld $xr21, \base, (21 * LASX_REG_WIDTH)
EX xvld $xr22, \base, (22 * LASX_REG_WIDTH)
EX xvld $xr23, \base, (23 * LASX_REG_WIDTH)
EX xvld $xr24, \base, (24 * LASX_REG_WIDTH)
EX xvld $xr25, \base, (25 * LASX_REG_WIDTH)
EX xvld $xr26, \base, (26 * LASX_REG_WIDTH)
EX xvld $xr27, \base, (27 * LASX_REG_WIDTH)
EX xvld $xr28, \base, (28 * LASX_REG_WIDTH)
EX xvld $xr29, \base, (29 * LASX_REG_WIDTH)
EX xvld $xr30, \base, (30 * LASX_REG_WIDTH)
EX xvld $xr31, \base, (31 * LASX_REG_WIDTH)
#endif
.endm
/*
* Save a thread's fp context.
*/
......@@ -166,6 +314,76 @@ SYM_FUNC_START(_restore_fp)
jr ra
SYM_FUNC_END(_restore_fp)
#ifdef CONFIG_CPU_HAS_LSX
/*
* Save a thread's LSX vector context.
*/
SYM_FUNC_START(_save_lsx)
lsx_save_all a0 t1 t2
jr ra
SYM_FUNC_END(_save_lsx)
EXPORT_SYMBOL(_save_lsx)
/*
* Restore a thread's LSX vector context.
*/
SYM_FUNC_START(_restore_lsx)
lsx_restore_all a0 t1 t2
jr ra
SYM_FUNC_END(_restore_lsx)
SYM_FUNC_START(_save_lsx_upper)
lsx_save_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_save_lsx_upper)
SYM_FUNC_START(_restore_lsx_upper)
lsx_restore_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_restore_lsx_upper)
SYM_FUNC_START(_init_lsx_upper)
lsx_init_all_upper t1
jr ra
SYM_FUNC_END(_init_lsx_upper)
#endif
#ifdef CONFIG_CPU_HAS_LASX
/*
* Save a thread's LASX vector context.
*/
SYM_FUNC_START(_save_lasx)
lasx_save_all a0 t1 t2
jr ra
SYM_FUNC_END(_save_lasx)
EXPORT_SYMBOL(_save_lasx)
/*
* Restore a thread's LASX vector context.
*/
SYM_FUNC_START(_restore_lasx)
lasx_restore_all a0 t1 t2
jr ra
SYM_FUNC_END(_restore_lasx)
SYM_FUNC_START(_save_lasx_upper)
lasx_save_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_save_lasx_upper)
SYM_FUNC_START(_restore_lasx_upper)
lasx_restore_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_restore_lasx_upper)
SYM_FUNC_START(_init_lasx_upper)
lasx_init_all_upper t1
jr ra
SYM_FUNC_END(_init_lasx_upper)
#endif
/*
* Load the FPU with signalling NANS. This bit pattern we're using has
* the property that no matter whether considered as single or as double
......@@ -244,6 +462,58 @@ SYM_FUNC_START(_restore_fp_context)
jr ra
SYM_FUNC_END(_restore_fp_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_save_lsx_context)
sc_save_fcc a1, t0, t1
sc_save_fcsr a2, t0
sc_save_lsx a0
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_save_lsx_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_restore_lsx_context)
sc_restore_lsx a0
sc_restore_fcc a1, t1, t2
sc_restore_fcsr a2, t1
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_restore_lsx_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_save_lasx_context)
sc_save_fcc a1, t0, t1
sc_save_fcsr a2, t0
sc_save_lasx a0
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_save_lasx_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_restore_lasx_context)
sc_restore_lasx a0
sc_restore_fcc a1, t1, t2
sc_restore_fcsr a2, t1
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_restore_lasx_context)
SYM_FUNC_START(fault)
li.w a0, -EFAULT # failure
jr ra
......
......@@ -117,8 +117,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*/
preempt_disable();
if (is_fpu_owner())
save_fp(current);
if (is_fpu_owner()) {
if (is_lasx_enabled())
save_lasx(current);
else if (is_lsx_enabled())
save_lsx(current);
else
save_fp(current);
}
preempt_enable();
......
......@@ -250,6 +250,90 @@ static int cfg_set(struct task_struct *target,
return 0;
}
#ifdef CONFIG_CPU_HAS_LSX
static void copy_pad_fprs(struct task_struct *target,
const struct user_regset *regset,
struct membuf *to, unsigned int live_sz)
{
int i, j;
unsigned long long fill = ~0ull;
unsigned int cp_sz, pad_sz;
cp_sz = min(regset->size, live_sz);
pad_sz = regset->size - cp_sz;
WARN_ON(pad_sz % sizeof(fill));
for (i = 0; i < NUM_FPU_REGS; i++) {
membuf_write(to, &target->thread.fpu.fpr[i], cp_sz);
for (j = 0; j < (pad_sz / sizeof(fill)); j++) {
membuf_store(to, fill);
}
}
}
static int simd_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
const unsigned int wr_size = NUM_FPU_REGS * regset->size;
if (!tsk_used_math(target)) {
/* The task hasn't used FP or LSX, fill with 0xff */
copy_pad_fprs(target, regset, &to, 0);
} else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) {
/* Copy scalar FP context, fill the rest with 0xff */
copy_pad_fprs(target, regset, &to, 8);
#ifdef CONFIG_CPU_HAS_LASX
} else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) {
/* Copy LSX 128 Bit context, fill the rest with 0xff */
copy_pad_fprs(target, regset, &to, 16);
#endif
} else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) {
/* Trivially copy the vector registers */
membuf_write(&to, &target->thread.fpu.fpr, wr_size);
} else {
/* Copy as much context as possible, fill the rest with 0xff */
copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0]));
}
return 0;
}
static int simd_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
const unsigned int wr_size = NUM_FPU_REGS * regset->size;
unsigned int cp_sz;
int i, err, start;
init_fp_ctx(target);
if (sizeof(target->thread.fpu.fpr[0]) == regset->size) {
/* Trivially copy the vector registers */
err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.fpr,
0, wr_size);
} else {
/* Copy as much context as possible */
cp_sz = min_t(unsigned int, regset->size,
sizeof(target->thread.fpu.fpr[0]));
i = start = err = 0;
for (; i < NUM_FPU_REGS; i++, start += regset->size) {
err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.fpr[i],
start, start + cp_sz);
}
}
return err;
}
#endif /* CONFIG_CPU_HAS_LSX */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
/*
......@@ -708,6 +792,12 @@ enum loongarch_regset {
REGSET_GPR,
REGSET_FPR,
REGSET_CPUCFG,
#ifdef CONFIG_CPU_HAS_LSX
REGSET_LSX,
#endif
#ifdef CONFIG_CPU_HAS_LASX
REGSET_LASX,
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
REGSET_HW_BREAK,
REGSET_HW_WATCH,
......@@ -739,6 +829,26 @@ static const struct user_regset loongarch64_regsets[] = {
.regset_get = cfg_get,
.set = cfg_set,
},
#ifdef CONFIG_CPU_HAS_LSX
[REGSET_LSX] = {
.core_note_type = NT_LOONGARCH_LSX,
.n = NUM_FPU_REGS,
.size = 16,
.align = 16,
.regset_get = simd_get,
.set = simd_set,
},
#endif
#ifdef CONFIG_CPU_HAS_LASX
[REGSET_LASX] = {
.core_note_type = NT_LOONGARCH_LASX,
.n = NUM_FPU_REGS,
.size = 32,
.align = 32,
.regset_get = simd_get,
.set = simd_set,
},
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
.core_note_type = NT_LOONGARCH_HW_BREAK,
......
This diff is collapsed.
......@@ -852,12 +852,67 @@ static void init_restore_fp(void)
BUG_ON(!is_fp_enabled());
}
static void init_restore_lsx(void)
{
enable_lsx();
if (!thread_lsx_context_live()) {
/* First time LSX context user */
init_restore_fp();
init_lsx_upper();
set_thread_flag(TIF_LSX_CTX_LIVE);
} else {
if (!is_simd_owner()) {
if (is_fpu_owner()) {
restore_lsx_upper(current);
} else {
__own_fpu();
restore_lsx(current);
}
}
}
set_thread_flag(TIF_USEDSIMD);
BUG_ON(!is_fp_enabled());
BUG_ON(!is_lsx_enabled());
}
static void init_restore_lasx(void)
{
enable_lasx();
if (!thread_lasx_context_live()) {
/* First time LASX context user */
init_restore_lsx();
init_lasx_upper();
set_thread_flag(TIF_LASX_CTX_LIVE);
} else {
if (is_fpu_owner() || is_simd_owner()) {
init_restore_lsx();
restore_lasx_upper(current);
} else {
__own_fpu();
enable_lsx();
restore_lasx(current);
}
}
set_thread_flag(TIF_USEDSIMD);
BUG_ON(!is_fp_enabled());
BUG_ON(!is_lsx_enabled());
BUG_ON(!is_lasx_enabled());
}
asmlinkage void noinstr do_fpu(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
die_if_kernel("do_fpu invoked from kernel context!", regs);
BUG_ON(is_lsx_enabled());
BUG_ON(is_lasx_enabled());
preempt_disable();
init_restore_fp();
......@@ -872,9 +927,20 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
force_sig(SIGILL);
local_irq_disable();
if (!cpu_has_lsx) {
force_sig(SIGILL);
goto out;
}
die_if_kernel("do_lsx invoked from kernel context!", regs);
BUG_ON(is_lasx_enabled());
preempt_disable();
init_restore_lsx();
preempt_enable();
out:
local_irq_disable();
irqentry_exit(regs, state);
}
......@@ -883,9 +949,19 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
force_sig(SIGILL);
local_irq_disable();
if (!cpu_has_lasx) {
force_sig(SIGILL);
goto out;
}
die_if_kernel("do_lasx invoked from kernel context!", regs);
preempt_disable();
init_restore_lasx();
preempt_enable();
out:
local_irq_disable();
irqentry_exit(regs, state);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment