Commit b5510d9b authored by Hendrik Brueckner's avatar Hendrik Brueckner Committed by Martin Schwidefsky

s390/fpu: always enable the vector facility if it is available

If the kernel detects that the s390 hardware supports the vector
facility, it is enabled by default at an early stage.  To force
it off, use the novx kernel parameter.  Note that there is a small
time window, where the vector facility is enabled before it is
forced to be off.

With enabling the vector facility by default, the FPU save and
restore functions can be improved.  They do not longer require
to manage expensive control register updates to enable or disable
the vector enablement control for particular processes.
Signed-off-by: default avatarHendrik Brueckner <brueckner@linux.vnet.ibm.com>
Reviewed-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent 395e6aa1
...@@ -46,8 +46,6 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit) ...@@ -46,8 +46,6 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
__ctl_load(reg, cr, cr); __ctl_load(reg, cr, cr);
} }
void __ctl_set_vx(void);
void smp_ctl_set_bit(int cr, int bit); void smp_ctl_set_bit(int cr, int bit);
void smp_ctl_clear_bit(int cr, int bit); void smp_ctl_clear_bit(int cr, int bit);
......
...@@ -8,10 +8,6 @@ ...@@ -8,10 +8,6 @@
#ifndef _ASM_S390_FPU_INTERNAL_H #ifndef _ASM_S390_FPU_INTERNAL_H
#define _ASM_S390_FPU_INTERNAL_H #define _ASM_S390_FPU_INTERNAL_H
#define FPU_USE_VX 1 /* Vector extension is active */
#ifndef __ASSEMBLY__
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/string.h> #include <linux/string.h>
#include <asm/linkage.h> #include <asm/linkage.h>
...@@ -20,7 +16,6 @@ ...@@ -20,7 +16,6 @@
struct fpu { struct fpu {
__u32 fpc; /* Floating-point control */ __u32 fpc; /* Floating-point control */
__u32 flags;
union { union {
void *regs; void *regs;
freg_t *fprs; /* Floating-point register save area */ freg_t *fprs; /* Floating-point register save area */
...@@ -30,9 +25,6 @@ struct fpu { ...@@ -30,9 +25,6 @@ struct fpu {
void save_fpu_regs(void); void save_fpu_regs(void);
#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
/* VX array structure for address operand constraints in inline assemblies */ /* VX array structure for address operand constraints in inline assemblies */
struct vx_array { __vector128 _[__NUM_VXRS]; }; struct vx_array { __vector128 _[__NUM_VXRS]; };
...@@ -89,7 +81,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) ...@@ -89,7 +81,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
{ {
fpregs->pad = 0; fpregs->pad = 0;
if (is_vx_fpu(fpu)) if (MACHINE_HAS_VX)
convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
else else
memcpy((freg_t *)&fpregs->fprs, fpu->fprs, memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
...@@ -98,13 +90,11 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) ...@@ -98,13 +90,11 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
{ {
if (is_vx_fpu(fpu)) if (MACHINE_HAS_VX)
convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
else else
memcpy(fpu->fprs, (freg_t *)&fpregs->fprs, memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
sizeof(fpregs->fprs)); sizeof(fpregs->fprs));
} }
#endif
#endif /* _ASM_S390_FPU_INTERNAL_H */ #endif /* _ASM_S390_FPU_INTERNAL_H */
...@@ -29,7 +29,6 @@ int main(void) ...@@ -29,7 +29,6 @@ int main(void)
BLANK(); BLANK();
DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp)); DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc)); DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs)); DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause)); DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address)); DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
......
...@@ -249,7 +249,7 @@ static int save_sigregs_ext32(struct pt_regs *regs, ...@@ -249,7 +249,7 @@ static int save_sigregs_ext32(struct pt_regs *regs,
return -EFAULT; return -EFAULT;
/* Save vector registers to signal stack */ /* Save vector registers to signal stack */
if (is_vx_task(current)) { if (MACHINE_HAS_VX) {
for (i = 0; i < __NUM_VXRS_LOW; i++) for (i = 0; i < __NUM_VXRS_LOW; i++)
vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1); vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
...@@ -277,7 +277,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs, ...@@ -277,7 +277,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
*(__u32 *)&regs->gprs[i] = gprs_high[i]; *(__u32 *)&regs->gprs[i] = gprs_high[i];
/* Restore vector registers from signal stack */ /* Restore vector registers from signal stack */
if (is_vx_task(current)) { if (MACHINE_HAS_VX) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) || sizeof(sregs_ext->vxrs_low)) ||
__copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW, __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
...@@ -470,8 +470,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, ...@@ -470,8 +470,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
*/ */
uc_flags = UC_GPRS_HIGH; uc_flags = UC_GPRS_HIGH;
if (MACHINE_HAS_VX) { if (MACHINE_HAS_VX) {
if (is_vx_task(current)) uc_flags |= UC_VXRS;
uc_flags |= UC_VXRS;
} else } else
frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) + frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
sizeof(frame->uc.uc_mcontext_ext.vxrs_high); sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
......
...@@ -329,9 +329,19 @@ static __init void detect_machine_facilities(void) ...@@ -329,9 +329,19 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_TE; S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
if (test_facility(51)) if (test_facility(51))
S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
if (test_facility(129)) if (test_facility(129)) {
S390_lowcore.machine_flags |= MACHINE_FLAG_VX; S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
__ctl_set_bit(0, 17);
}
}
static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
__ctl_clear_bit(0, 17);
return 1;
} }
early_param("novx", disable_vector_extension);
static int __init cad_setup(char *str) static int __init cad_setup(char *str)
{ {
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
#include <asm/page.h> #include <asm/page.h>
#include <asm/sigp.h> #include <asm/sigp.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/fpu-internal.h>
#include <asm/vx-insn.h> #include <asm/vx-insn.h>
__PT_R0 = __PT_GPRS __PT_R0 = __PT_GPRS
...@@ -748,15 +747,12 @@ ENTRY(psw_idle) ...@@ -748,15 +747,12 @@ ENTRY(psw_idle)
br %r14 br %r14
.Lpsw_idle_end: .Lpsw_idle_end:
/* Store floating-point controls and floating-point or vector extension /*
* registers instead. A critical section cleanup assures that the registers * Store floating-point controls and floating-point or vector register
* are stored even if interrupted for some other work. The register %r2 * depending whether the vector facility is available. A critical section
* designates a struct fpu to store register contents. If the specified * cleanup assures that the registers are stored even if interrupted for
* structure does not contain a register save area, the register store is * some other work. The CIF_FPU flag is set to trigger a lazy restore
* omitted (see also comments in arch_dup_task_struct()). * of the register contents at return from io or a system call.
*
* The CIF_FPU flag is set in any case. The CIF_FPU triggers a lazy restore
* of the register contents at system call or io return.
*/ */
ENTRY(save_fpu_regs) ENTRY(save_fpu_regs)
lg %r2,__LC_CURRENT lg %r2,__LC_CURRENT
...@@ -768,7 +764,7 @@ ENTRY(save_fpu_regs) ...@@ -768,7 +764,7 @@ ENTRY(save_fpu_regs)
lg %r3,__THREAD_FPU_regs(%r2) lg %r3,__THREAD_FPU_regs(%r2)
ltgr %r3,%r3 ltgr %r3,%r3
jz .Lsave_fpu_regs_done # no save area -> set CIF_FPU jz .Lsave_fpu_regs_done # no save area -> set CIF_FPU
tm __THREAD_FPU_flags+3(%r2),FPU_USE_VX tm __LC_MACHINE_FLAGS+5,4 # MACHINE_HAS_VX
jz .Lsave_fpu_regs_fp # no -> store FP regs jz .Lsave_fpu_regs_fp # no -> store FP regs
.Lsave_fpu_regs_vx_low: .Lsave_fpu_regs_vx_low:
VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
...@@ -797,15 +793,15 @@ ENTRY(save_fpu_regs) ...@@ -797,15 +793,15 @@ ENTRY(save_fpu_regs)
br %r14 br %r14
.Lsave_fpu_regs_end: .Lsave_fpu_regs_end:
/* Load floating-point controls and floating-point or vector extension /*
* registers. A critical section cleanup assures that the register contents * Load floating-point controls and floating-point or vector registers.
* are loaded even if interrupted for some other work. Depending on the saved * A critical section cleanup assures that the register contents are
* FP/VX state, the vector-enablement control, CR0.46, is either set or cleared. * loaded even if interrupted for some other work.
* *
* There are special calling conventions to fit into sysc and io return work: * There are special calling conventions to fit into sysc and io return work:
* %r15: <kernel stack> * %r15: <kernel stack>
* The function requires: * The function requires:
* %r4 and __SF_EMPTY+32(%r15) * %r4
*/ */
load_fpu_regs: load_fpu_regs:
lg %r4,__LC_CURRENT lg %r4,__LC_CURRENT
...@@ -813,25 +809,14 @@ load_fpu_regs: ...@@ -813,25 +809,14 @@ load_fpu_regs:
tm __LC_CPU_FLAGS+7,_CIF_FPU tm __LC_CPU_FLAGS+7,_CIF_FPU
bnor %r14 bnor %r14
lfpc __THREAD_FPU_fpc(%r4) lfpc __THREAD_FPU_fpc(%r4)
stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0 tm __LC_MACHINE_FLAGS+5,4 # MACHINE_HAS_VX
tm __THREAD_FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ?
lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
jz .Lload_fpu_regs_fp_ctl # -> no VX, load FP regs jz .Lload_fpu_regs_fp # -> no VX, load FP regs
.Lload_fpu_regs_vx_ctl:
tm __SF_EMPTY+32+5(%r15),2 # test VX control
jo .Lload_fpu_regs_vx
oi __SF_EMPTY+32+5(%r15),2 # set VX control
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
.Lload_fpu_regs_vx: .Lload_fpu_regs_vx:
VLM %v0,%v15,0,%r4 VLM %v0,%v15,0,%r4
.Lload_fpu_regs_vx_high: .Lload_fpu_regs_vx_high:
VLM %v16,%v31,256,%r4 VLM %v16,%v31,256,%r4
j .Lload_fpu_regs_done j .Lload_fpu_regs_done
.Lload_fpu_regs_fp_ctl:
tm __SF_EMPTY+32+5(%r15),2 # test VX control
jz .Lload_fpu_regs_fp
ni __SF_EMPTY+32+5(%r15),253 # clear VX control
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
.Lload_fpu_regs_fp: .Lload_fpu_regs_fp:
ld 0,0(%r4) ld 0,0(%r4)
ld 1,8(%r4) ld 1,8(%r4)
...@@ -854,16 +839,6 @@ load_fpu_regs: ...@@ -854,16 +839,6 @@ load_fpu_regs:
br %r14 br %r14
.Lload_fpu_regs_end: .Lload_fpu_regs_end:
/* Test and set the vector enablement control in CR0.46 */
ENTRY(__ctl_set_vx)
stctg %c0,%c0,__SF_EMPTY(%r15)
tm __SF_EMPTY+5(%r15),2
bor %r14
oi __SF_EMPTY+5(%r15),2
lctlg %c0,%c0,__SF_EMPTY(%r15)
br %r14
.L__ctl_set_vx_end:
.L__critical_end: .L__critical_end:
/* /*
...@@ -1019,10 +994,6 @@ cleanup_critical: ...@@ -1019,10 +994,6 @@ cleanup_critical:
jl 0f jl 0f
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
jl .Lcleanup_load_fpu_regs jl .Lcleanup_load_fpu_regs
clg %r9,BASED(.Lcleanup_table+112) # __ctl_set_vx
jl 0f
clg %r9,BASED(.Lcleanup_table+120) # .L__ctl_set_vx_end
jl .Lcleanup___ctl_set_vx
0: br %r14 0: br %r14
.align 8 .align 8
...@@ -1041,8 +1012,6 @@ cleanup_critical: ...@@ -1041,8 +1012,6 @@ cleanup_critical:
.quad .Lsave_fpu_regs_end .quad .Lsave_fpu_regs_end
.quad load_fpu_regs .quad load_fpu_regs
.quad .Lload_fpu_regs_end .quad .Lload_fpu_regs_end
.quad __ctl_set_vx
.quad .L__ctl_set_vx_end
#if IS_ENABLED(CONFIG_KVM) #if IS_ENABLED(CONFIG_KVM)
.Lcleanup_table_sie: .Lcleanup_table_sie:
...@@ -1226,7 +1195,7 @@ cleanup_critical: ...@@ -1226,7 +1195,7 @@ cleanup_critical:
lg %r3,__THREAD_FPU_regs(%r2) lg %r3,__THREAD_FPU_regs(%r2)
ltgr %r3,%r3 ltgr %r3,%r3
jz 5f # no save area -> set CIF_FPU jz 5f # no save area -> set CIF_FPU
tm __THREAD_FPU_flags+3(%r2),FPU_USE_VX tm __LC_MACHINE_FLAGS+5,4 # MACHINE_HAS_VX
jz 4f # no VX -> store FP regs jz 4f # no VX -> store FP regs
2: # Store vector registers (V0-V15) 2: # Store vector registers (V0-V15)
VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
...@@ -1272,37 +1241,21 @@ cleanup_critical: ...@@ -1272,37 +1241,21 @@ cleanup_critical:
jhe 1f jhe 1f
clg %r9,BASED(.Lcleanup_load_fpu_regs_fp) clg %r9,BASED(.Lcleanup_load_fpu_regs_fp)
jhe 2f jhe 2f
clg %r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
jhe 3f
clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high) clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
jhe 4f jhe 3f
clg %r9,BASED(.Lcleanup_load_fpu_regs_vx) clg %r9,BASED(.Lcleanup_load_fpu_regs_vx)
jhe 5f jhe 4f
clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
jhe 6f
lg %r4,__LC_CURRENT lg %r4,__LC_CURRENT
aghi %r4,__TASK_thread aghi %r4,__TASK_thread
lfpc __THREAD_FPU_fpc(%r4) lfpc __THREAD_FPU_fpc(%r4)
tm __THREAD_FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ? tm __LC_MACHINE_FLAGS+5,4 # MACHINE_HAS_VX
lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
jz 3f # -> no VX, load FP regs jz 2f # -> no VX, load FP regs
6: # Set VX-enablement control 4: # Load V0 ..V15 registers
stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
tm __SF_EMPTY+32+5(%r15),2 # test VX control
jo 5f
oi __SF_EMPTY+32+5(%r15),2 # set VX control
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
5: # Load V0 ..V15 registers
VLM %v0,%v15,0,%r4 VLM %v0,%v15,0,%r4
4: # Load V16..V31 registers 3: # Load V16..V31 registers
VLM %v16,%v31,256,%r4 VLM %v16,%v31,256,%r4
j 1f j 1f
3: # Clear VX-enablement control for FP
stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
tm __SF_EMPTY+32+5(%r15),2 # test VX control
jz 2f
ni __SF_EMPTY+32+5(%r15),253 # clear VX control
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
2: # Load floating-point registers 2: # Load floating-point registers
ld 0,0(%r4) ld 0,0(%r4)
ld 1,8(%r4) ld 1,8(%r4)
...@@ -1324,28 +1277,15 @@ cleanup_critical: ...@@ -1324,28 +1277,15 @@ cleanup_critical:
ni __LC_CPU_FLAGS+7,255-_CIF_FPU ni __LC_CPU_FLAGS+7,255-_CIF_FPU
lg %r9,48(%r11) # return from load_fpu_regs lg %r9,48(%r11) # return from load_fpu_regs
br %r14 br %r14
.Lcleanup_load_fpu_regs_vx_ctl:
.quad .Lload_fpu_regs_vx_ctl
.Lcleanup_load_fpu_regs_vx: .Lcleanup_load_fpu_regs_vx:
.quad .Lload_fpu_regs_vx .quad .Lload_fpu_regs_vx
.Lcleanup_load_fpu_regs_vx_high: .Lcleanup_load_fpu_regs_vx_high:
.quad .Lload_fpu_regs_vx_high .quad .Lload_fpu_regs_vx_high
.Lcleanup_load_fpu_regs_fp_ctl:
.quad .Lload_fpu_regs_fp_ctl
.Lcleanup_load_fpu_regs_fp: .Lcleanup_load_fpu_regs_fp:
.quad .Lload_fpu_regs_fp .quad .Lload_fpu_regs_fp
.Lcleanup_load_fpu_regs_done: .Lcleanup_load_fpu_regs_done:
.quad .Lload_fpu_regs_done .quad .Lload_fpu_regs_done
.Lcleanup___ctl_set_vx:
stctg %c0,%c0,__SF_EMPTY(%r15)
tm __SF_EMPTY+5(%r15),2
bor %r14
oi __SF_EMPTY+5(%r15),2
lctlg %c0,%c0,__SF_EMPTY(%r15)
lg %r9,48(%r11) # return from __ctl_set_vx
br %r14
/* /*
* Integer constants * Integer constants
*/ */
......
...@@ -21,8 +21,6 @@ void psw_idle(struct s390_idle_data *, unsigned long); ...@@ -21,8 +21,6 @@ void psw_idle(struct s390_idle_data *, unsigned long);
asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
int alloc_vector_registers(struct task_struct *tsk);
void do_protection_exception(struct pt_regs *regs); void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs);
......
...@@ -87,31 +87,29 @@ void arch_release_task_struct(struct task_struct *tsk) ...@@ -87,31 +87,29 @@ void arch_release_task_struct(struct task_struct *tsk)
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{ {
size_t fpu_regs_size;
*dst = *src; *dst = *src;
/* Set up a new floating-point register save area */ /*
dst->thread.fpu.fpc = 0; * If the vector extension is available, it is enabled for all tasks,
dst->thread.fpu.flags = 0; /* Always start with VX disabled */ * and, thus, the FPU register save area must be allocated accordingly.
dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, */
GFP_KERNEL|__GFP_REPEAT); fpu_regs_size = MACHINE_HAS_VX ? sizeof(__vector128) * __NUM_VXRS
if (!dst->thread.fpu.fprs) : sizeof(freg_t) * __NUM_FPRS;
dst->thread.fpu.regs = kzalloc(fpu_regs_size, GFP_KERNEL|__GFP_REPEAT);
if (!dst->thread.fpu.regs)
return -ENOMEM; return -ENOMEM;
/* /*
* Save the floating-point or vector register state of the current * Save the floating-point or vector register state of the current
* task. The state is not saved for early kernel threads, for example, * task and set the CIF_FPU flag to lazy restore the FPU register
* the init_task, which do not have an allocated save area. * state when returning to user space.
* The CIF_FPU flag is set in any case to lazy clear or restore a saved
* state when switching to a different task or returning to user space.
*/ */
save_fpu_regs(); save_fpu_regs();
dst->thread.fpu.fpc = current->thread.fpu.fpc; dst->thread.fpu.fpc = current->thread.fpu.fpc;
if (is_vx_task(current)) memcpy(dst->thread.fpu.regs, current->thread.fpu.regs, fpu_regs_size);
convert_vx_to_fp(dst->thread.fpu.fprs,
current->thread.fpu.vxrs);
else
memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
sizeof(freg_t) * __NUM_FPRS);
return 0; return 0;
} }
...@@ -199,7 +197,7 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) ...@@ -199,7 +197,7 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
save_fpu_regs(); save_fpu_regs();
fpregs->fpc = current->thread.fpu.fpc; fpregs->fpc = current->thread.fpu.fpc;
fpregs->pad = 0; fpregs->pad = 0;
if (is_vx_task(current)) if (MACHINE_HAS_VX)
convert_vx_to_fp((freg_t *)&fpregs->fprs, convert_vx_to_fp((freg_t *)&fpregs->fprs,
current->thread.fpu.vxrs); current->thread.fpu.vxrs);
else else
......
...@@ -239,7 +239,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) ...@@ -239,7 +239,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
* or the child->thread.fpu.vxrs array * or the child->thread.fpu.vxrs array
*/ */
offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
if (is_vx_task(child)) if (MACHINE_HAS_VX)
tmp = *(addr_t *) tmp = *(addr_t *)
((addr_t) child->thread.fpu.vxrs + 2*offset); ((addr_t) child->thread.fpu.vxrs + 2*offset);
else else
...@@ -383,7 +383,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) ...@@ -383,7 +383,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
* or the child->thread.fpu.vxrs array * or the child->thread.fpu.vxrs array
*/ */
offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
if (is_vx_task(child)) if (MACHINE_HAS_VX)
*(addr_t *)((addr_t) *(addr_t *)((addr_t)
child->thread.fpu.vxrs + 2*offset) = data; child->thread.fpu.vxrs + 2*offset) = data;
else else
...@@ -617,7 +617,7 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) ...@@ -617,7 +617,7 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
* or the child->thread.fpu.vxrs array * or the child->thread.fpu.vxrs array
*/ */
offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
if (is_vx_task(child)) if (MACHINE_HAS_VX)
tmp = *(__u32 *) tmp = *(__u32 *)
((addr_t) child->thread.fpu.vxrs + 2*offset); ((addr_t) child->thread.fpu.vxrs + 2*offset);
else else
...@@ -742,7 +742,7 @@ static int __poke_user_compat(struct task_struct *child, ...@@ -742,7 +742,7 @@ static int __poke_user_compat(struct task_struct *child,
* or the child->thread.fpu.vxrs array * or the child->thread.fpu.vxrs array
*/ */
offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
if (is_vx_task(child)) if (MACHINE_HAS_VX)
*(__u32 *)((addr_t) *(__u32 *)((addr_t)
child->thread.fpu.vxrs + 2*offset) = tmp; child->thread.fpu.vxrs + 2*offset) = tmp;
else else
...@@ -981,7 +981,7 @@ static int s390_fpregs_set(struct task_struct *target, ...@@ -981,7 +981,7 @@ static int s390_fpregs_set(struct task_struct *target,
if (rc) if (rc)
return rc; return rc;
if (is_vx_task(target)) if (MACHINE_HAS_VX)
convert_fp_to_vx(target->thread.fpu.vxrs, fprs); convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
else else
memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs)); memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
...@@ -1047,13 +1047,10 @@ static int s390_vxrs_low_get(struct task_struct *target, ...@@ -1047,13 +1047,10 @@ static int s390_vxrs_low_get(struct task_struct *target,
if (!MACHINE_HAS_VX) if (!MACHINE_HAS_VX)
return -ENODEV; return -ENODEV;
if (is_vx_task(target)) { if (target == current)
if (target == current) save_fpu_regs();
save_fpu_regs(); for (i = 0; i < __NUM_VXRS_LOW; i++)
for (i = 0; i < __NUM_VXRS_LOW; i++) vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
} else
memset(vxrs, 0, sizeof(vxrs));
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
} }
...@@ -1067,11 +1064,7 @@ static int s390_vxrs_low_set(struct task_struct *target, ...@@ -1067,11 +1064,7 @@ static int s390_vxrs_low_set(struct task_struct *target,
if (!MACHINE_HAS_VX) if (!MACHINE_HAS_VX)
return -ENODEV; return -ENODEV;
if (!is_vx_task(target)) { if (target == current)
rc = alloc_vector_registers(target);
if (rc)
return rc;
} else if (target == current)
save_fpu_regs(); save_fpu_regs();
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
...@@ -1091,13 +1084,10 @@ static int s390_vxrs_high_get(struct task_struct *target, ...@@ -1091,13 +1084,10 @@ static int s390_vxrs_high_get(struct task_struct *target,
if (!MACHINE_HAS_VX) if (!MACHINE_HAS_VX)
return -ENODEV; return -ENODEV;
if (is_vx_task(target)) { if (target == current)
if (target == current) save_fpu_regs();
save_fpu_regs(); memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
sizeof(vxrs));
} else
memset(vxrs, 0, sizeof(vxrs));
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
} }
...@@ -1110,11 +1100,7 @@ static int s390_vxrs_high_set(struct task_struct *target, ...@@ -1110,11 +1100,7 @@ static int s390_vxrs_high_set(struct task_struct *target,
if (!MACHINE_HAS_VX) if (!MACHINE_HAS_VX)
return -ENODEV; return -ENODEV;
if (!is_vx_task(target)) { if (target == current)
rc = alloc_vector_registers(target);
if (rc)
return rc;
} else if (target == current)
save_fpu_regs(); save_fpu_regs();
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
......
...@@ -10,7 +10,6 @@ EXPORT_SYMBOL(_mcount); ...@@ -10,7 +10,6 @@ EXPORT_SYMBOL(_mcount);
EXPORT_SYMBOL(sie64a); EXPORT_SYMBOL(sie64a);
EXPORT_SYMBOL(sie_exit); EXPORT_SYMBOL(sie_exit);
EXPORT_SYMBOL(save_fpu_regs); EXPORT_SYMBOL(save_fpu_regs);
EXPORT_SYMBOL(__ctl_set_vx);
#endif #endif
EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memset);
...@@ -179,7 +179,7 @@ static int save_sigregs_ext(struct pt_regs *regs, ...@@ -179,7 +179,7 @@ static int save_sigregs_ext(struct pt_regs *regs,
int i; int i;
/* Save vector registers to signal stack */ /* Save vector registers to signal stack */
if (is_vx_task(current)) { if (MACHINE_HAS_VX) {
for (i = 0; i < __NUM_VXRS_LOW; i++) for (i = 0; i < __NUM_VXRS_LOW; i++)
vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1); vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
...@@ -199,7 +199,7 @@ static int restore_sigregs_ext(struct pt_regs *regs, ...@@ -199,7 +199,7 @@ static int restore_sigregs_ext(struct pt_regs *regs,
int i; int i;
/* Restore vector registers from signal stack */ /* Restore vector registers from signal stack */
if (is_vx_task(current)) { if (MACHINE_HAS_VX) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) || sizeof(sregs_ext->vxrs_low)) ||
__copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW, __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
...@@ -381,8 +381,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, ...@@ -381,8 +381,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
uc_flags = 0; uc_flags = 0;
if (MACHINE_HAS_VX) { if (MACHINE_HAS_VX) {
frame_size += sizeof(_sigregs_ext); frame_size += sizeof(_sigregs_ext);
if (is_vx_task(current)) uc_flags |= UC_VXRS;
uc_flags |= UC_VXRS;
} }
frame = get_sigframe(&ksig->ka, regs, frame_size); frame = get_sigframe(&ksig->ka, regs, frame_size);
if (frame == (void __user *) -1UL) if (frame == (void __user *) -1UL)
......
...@@ -224,29 +224,6 @@ NOKPROBE_SYMBOL(illegal_op); ...@@ -224,29 +224,6 @@ NOKPROBE_SYMBOL(illegal_op);
DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
"specification exception"); "specification exception");
int alloc_vector_registers(struct task_struct *tsk)
{
__vector128 *vxrs;
freg_t *fprs;
/* Allocate vector register save area. */
vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
GFP_KERNEL|__GFP_REPEAT);
if (!vxrs)
return -ENOMEM;
preempt_disable();
if (tsk == current)
save_fpu_regs();
/* Copy the 16 floating point registers */
convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
fprs = tsk->thread.fpu.fprs;
tsk->thread.fpu.vxrs = vxrs;
tsk->thread.fpu.flags |= FPU_USE_VX;
kfree(fprs);
preempt_enable();
return 0;
}
void vector_exception(struct pt_regs *regs) void vector_exception(struct pt_regs *regs)
{ {
int si_code, vic; int si_code, vic;
...@@ -281,13 +258,6 @@ void vector_exception(struct pt_regs *regs) ...@@ -281,13 +258,6 @@ void vector_exception(struct pt_regs *regs)
do_trap(regs, SIGFPE, si_code, "vector exception"); do_trap(regs, SIGFPE, si_code, "vector exception");
} }
static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
return 1;
}
__setup("novx", disable_vector_extension);
void data_exception(struct pt_regs *regs) void data_exception(struct pt_regs *regs)
{ {
__u16 __user *location; __u16 __user *location;
...@@ -296,15 +266,6 @@ void data_exception(struct pt_regs *regs) ...@@ -296,15 +266,6 @@ void data_exception(struct pt_regs *regs)
location = get_trap_ip(regs); location = get_trap_ip(regs);
save_fpu_regs(); save_fpu_regs();
/* Check for vector register enablement */
if (MACHINE_HAS_VX && !is_vx_task(current) &&
(current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
alloc_vector_registers(current);
/* Vector data exception is suppressing, rewind psw. */
regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
clear_pt_regs_flag(regs, PIF_PER_TRAP);
return;
}
if (current->thread.fpu.fpc & FPC_DXC_MASK) if (current->thread.fpu.fpc & FPC_DXC_MASK)
signal = SIGFPE; signal = SIGFPE;
else else
......
...@@ -1292,7 +1292,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) ...@@ -1292,7 +1292,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
static inline void save_fpu_to(struct fpu *dst) static inline void save_fpu_to(struct fpu *dst)
{ {
dst->fpc = current->thread.fpu.fpc; dst->fpc = current->thread.fpu.fpc;
dst->flags = current->thread.fpu.flags;
dst->regs = current->thread.fpu.regs; dst->regs = current->thread.fpu.regs;
} }
...@@ -1303,7 +1302,6 @@ static inline void save_fpu_to(struct fpu *dst) ...@@ -1303,7 +1302,6 @@ static inline void save_fpu_to(struct fpu *dst)
static inline void load_fpu_from(struct fpu *from) static inline void load_fpu_from(struct fpu *from)
{ {
current->thread.fpu.fpc = from->fpc; current->thread.fpu.fpc = from->fpc;
current->thread.fpu.flags = from->flags;
current->thread.fpu.regs = from->regs; current->thread.fpu.regs = from->regs;
} }
...@@ -1315,15 +1313,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ...@@ -1315,15 +1313,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (test_kvm_facility(vcpu->kvm, 129)) { if (test_kvm_facility(vcpu->kvm, 129)) {
current->thread.fpu.fpc = vcpu->run->s.regs.fpc; current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
current->thread.fpu.flags = FPU_USE_VX;
/* /*
* Use the register save area in the SIE-control block * Use the register save area in the SIE-control block
* for register restore and save in kvm_arch_vcpu_put() * for register restore and save in kvm_arch_vcpu_put()
*/ */
current->thread.fpu.vxrs = current->thread.fpu.vxrs =
(__vector128 *)&vcpu->run->s.regs.vrs; (__vector128 *)&vcpu->run->s.regs.vrs;
/* Always enable the vector extension for KVM */
__ctl_set_vx();
} else } else
load_fpu_from(&vcpu->arch.guest_fpregs); load_fpu_from(&vcpu->arch.guest_fpregs);
...@@ -2326,7 +2321,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) ...@@ -2326,7 +2321,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
* registers and the FPC value and store them in the * registers and the FPC value and store them in the
* guest_fpregs structure. * guest_fpregs structure.
*/ */
WARN_ON(!is_vx_task(current)); /* XXX remove later */
vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc; vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs, convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
current->thread.fpu.vxrs); current->thread.fpu.vxrs);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment