Commit b61b1595 authored by Sven Schnelle's avatar Sven Schnelle Committed by Vasily Gorbik

s390: add stack for machine check handler

The previous code used the normal kernel stack for machine checks.
This is problematic when a machine check interrupts a system call
or interrupt handler right at the beginning where registers are set up.

Assume system_call is interrupted at the first instruction and a machine
check is triggered. The machine check handler is called, checks the PSW
to see whether it is coming from user space, notices that it is already
in kernel mode but %r15 still contains the user space stack. This would
lead to a kernel crash.

There are basically two ways of fixing that: Either using the 'critical
cleanup' approach which compares the address in the PSW to see whether
it is already at a point where the stack has been set up, or use an extra
stack for the machine check handler.

For simplicity, we will go with the second approach and allocate an extra
stack. This adds some memory overhead for large systems, but usually large
system have plenty of memory so this isn't really a concern. But it keeps
the mchk stack setup simple and less error prone.

Fixes: 0b0ed657 ("s390: remove critical section cleanup from entry.S")
Signed-off-by: default avatarSven Schnelle <svens@linux.ibm.com>
Cc: <stable@kernel.org> # v5.8+
Reviewed-by: default avatarHeiko Carstens <hca@linux.ibm.com>
Signed-off-by: default avatarVasily Gorbik <gor@linux.ibm.com>
parent 64985c3a
...@@ -107,16 +107,15 @@ struct lowcore { ...@@ -107,16 +107,15 @@ struct lowcore {
__u64 async_stack; /* 0x0350 */ __u64 async_stack; /* 0x0350 */
__u64 nodat_stack; /* 0x0358 */ __u64 nodat_stack; /* 0x0358 */
__u64 restart_stack; /* 0x0360 */ __u64 restart_stack; /* 0x0360 */
__u64 mcck_stack; /* 0x0368 */
/* Restart function and parameter. */ /* Restart function and parameter. */
__u64 restart_fn; /* 0x0368 */ __u64 restart_fn; /* 0x0370 */
__u64 restart_data; /* 0x0370 */ __u64 restart_data; /* 0x0378 */
__u64 restart_source; /* 0x0378 */ __u64 restart_source; /* 0x0380 */
/* Address space pointer. */ /* Address space pointer. */
__u64 kernel_asce; /* 0x0380 */ __u64 kernel_asce; /* 0x0388 */
__u64 user_asce; /* 0x0388 */ __u64 user_asce; /* 0x0390 */
__u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */
/* /*
* The lpp and current_pid fields form a * The lpp and current_pid fields form a
......
...@@ -118,6 +118,7 @@ int main(void) ...@@ -118,6 +118,7 @@ int main(void)
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack); OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
OFFSET(__LC_RESTART_STACK, lowcore, restart_stack); OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
OFFSET(__LC_MCCK_STACK, lowcore, mcck_stack);
OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
......
...@@ -70,6 +70,8 @@ _LPP_OFFSET = __LC_LPP ...@@ -70,6 +70,8 @@ _LPP_OFFSET = __LC_LPP
je \oklabel je \oklabel
clg %r14,__LC_ASYNC_STACK clg %r14,__LC_ASYNC_STACK
je \oklabel je \oklabel
clg %r14,__LC_MCCK_STACK
je \oklabel
clg %r14,__LC_NODAT_STACK clg %r14,__LC_NODAT_STACK
je \oklabel je \oklabel
clg %r14,__LC_RESTART_STACK clg %r14,__LC_RESTART_STACK
...@@ -548,20 +550,16 @@ ENTRY(mcck_int_handler) ...@@ -548,20 +550,16 @@ ENTRY(mcck_int_handler)
jhe .Lmcck_stack jhe .Lmcck_stack
lghi %r11,__LC_GPREGS_SAVE_AREA+64 # inside critical section, do cleanup lghi %r11,__LC_GPREGS_SAVE_AREA+64 # inside critical section, do cleanup
brasl %r14,.Lcleanup_sie brasl %r14,.Lcleanup_sie
.Lmcck_stack:
#endif #endif
CHECK_STACK __LC_GPREGS_SAVE_AREA+64 j .Lmcck_stack
lgr %r11,%r15
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
stg %r11,__SF_BACKCHAIN(%r15)
j 5f
.Lmcck_user: .Lmcck_user:
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
.Lmcck_stack:
lg %r15,__LC_MCCK_STACK
.Lmcck_skip:
la %r11,STACK_FRAME_OVERHEAD(%r15)
lctlg %c1,%c1,__LC_KERNEL_ASCE lctlg %c1,%c1,__LC_KERNEL_ASCE
lg %r15,__LC_KERNEL_STACK
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
5: la %r11,STACK_FRAME_OVERHEAD(%r15)
.Lmcck_skip:
lghi %r14,__LC_GPREGS_SAVE_AREA+64 lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11) stmg %r0,%r7,__PT_R0(%r11)
# clear user controlled registers to prevent speculative use # clear user controlled registers to prevent speculative use
...@@ -602,7 +600,6 @@ ENTRY(mcck_int_handler) ...@@ -602,7 +600,6 @@ ENTRY(mcck_int_handler)
.Lmcck_panic: .Lmcck_panic:
lg %r15,__LC_NODAT_STACK lg %r15,__LC_NODAT_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15)
j .Lmcck_skip j .Lmcck_skip
ENDPROC(mcck_int_handler) ENDPROC(mcck_int_handler)
......
...@@ -338,7 +338,7 @@ int __init arch_early_irq_init(void) ...@@ -338,7 +338,7 @@ int __init arch_early_irq_init(void)
return 0; return 0;
} }
static int __init async_stack_realloc(void) static int __init stack_realloc(void)
{ {
unsigned long old, new; unsigned long old, new;
...@@ -348,9 +348,16 @@ static int __init async_stack_realloc(void) ...@@ -348,9 +348,16 @@ static int __init async_stack_realloc(void)
panic("Couldn't allocate async stack"); panic("Couldn't allocate async stack");
WRITE_ONCE(S390_lowcore.async_stack, new + STACK_INIT_OFFSET); WRITE_ONCE(S390_lowcore.async_stack, new + STACK_INIT_OFFSET);
free_pages(old, THREAD_SIZE_ORDER); free_pages(old, THREAD_SIZE_ORDER);
old = S390_lowcore.mcck_stack - STACK_INIT_OFFSET;
new = stack_alloc();
if (!new)
panic("Couldn't allocate machine check stack");
WRITE_ONCE(S390_lowcore.mcck_stack, new + STACK_INIT_OFFSET);
memblock_free(old, THREAD_SIZE);
return 0; return 0;
} }
early_initcall(async_stack_realloc); early_initcall(stack_realloc);
void __init arch_call_rest_init(void) void __init arch_call_rest_init(void)
{ {
...@@ -372,6 +379,7 @@ void __init arch_call_rest_init(void) ...@@ -372,6 +379,7 @@ void __init arch_call_rest_init(void)
static void __init setup_lowcore_dat_off(void) static void __init setup_lowcore_dat_off(void)
{ {
unsigned long int_psw_mask = PSW_KERNEL_BITS; unsigned long int_psw_mask = PSW_KERNEL_BITS;
unsigned long mcck_stack;
struct lowcore *lc; struct lowcore *lc;
if (IS_ENABLED(CONFIG_KASAN)) if (IS_ENABLED(CONFIG_KASAN))
...@@ -439,6 +447,12 @@ static void __init setup_lowcore_dat_off(void) ...@@ -439,6 +447,12 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_data = 0; lc->restart_data = 0;
lc->restart_source = -1UL; lc->restart_source = -1UL;
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
__func__, THREAD_SIZE, THREAD_SIZE);
lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
/* Setup absolute zero lowcore */ /* Setup absolute zero lowcore */
mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
......
...@@ -189,7 +189,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) ...@@ -189,7 +189,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{ {
unsigned long async_stack, nodat_stack; unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc; struct lowcore *lc;
if (pcpu != &pcpu_devices[0]) { if (pcpu != &pcpu_devices[0]) {
...@@ -202,13 +202,15 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) ...@@ -202,13 +202,15 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
} }
async_stack = stack_alloc(); async_stack = stack_alloc();
if (!async_stack) mcck_stack = stack_alloc();
goto out; if (!async_stack || !mcck_stack)
goto out_stack;
lc = pcpu->lowcore; lc = pcpu->lowcore;
memcpy(lc, &S390_lowcore, 512); memcpy(lc, &S390_lowcore, 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512);
lc->async_stack = async_stack + STACK_INIT_OFFSET; lc->async_stack = async_stack + STACK_INIT_OFFSET;
lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET; lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
lc->cpu_nr = cpu; lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0; lc->spinlock_index = 0;
...@@ -216,12 +218,13 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) ...@@ -216,12 +218,13 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
if (nmi_alloc_per_cpu(lc)) if (nmi_alloc_per_cpu(lc))
goto out_async; goto out_stack;
lowcore_ptr[cpu] = lc; lowcore_ptr[cpu] = lc;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
return 0; return 0;
out_async: out_stack:
stack_free(mcck_stack);
stack_free(async_stack); stack_free(async_stack);
out: out:
if (pcpu != &pcpu_devices[0]) { if (pcpu != &pcpu_devices[0]) {
...@@ -233,16 +236,18 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) ...@@ -233,16 +236,18 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
static void pcpu_free_lowcore(struct pcpu *pcpu) static void pcpu_free_lowcore(struct pcpu *pcpu)
{ {
unsigned long async_stack, nodat_stack, lowcore; unsigned long async_stack, nodat_stack, mcck_stack, lowcore;
nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET; async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET;
mcck_stack = pcpu->lowcore->mcck_stack - STACK_INIT_OFFSET;
lowcore = (unsigned long) pcpu->lowcore; lowcore = (unsigned long) pcpu->lowcore;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[pcpu - pcpu_devices] = NULL; lowcore_ptr[pcpu - pcpu_devices] = NULL;
nmi_free_per_cpu(pcpu->lowcore); nmi_free_per_cpu(pcpu->lowcore);
stack_free(async_stack); stack_free(async_stack);
stack_free(mcck_stack);
if (pcpu == &pcpu_devices[0]) if (pcpu == &pcpu_devices[0])
return; return;
free_pages(nodat_stack, THREAD_SIZE_ORDER); free_pages(nodat_stack, THREAD_SIZE_ORDER);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment