Commit 20a6ff72 authored by Austin Clements's avatar Austin Clements

runtime: eliminate uses of BP on amd64

Any place that clobbers BP in the runtime can potentially interfere
with frame pointer unwinding with GOEXPERIMENT=framepointer.  This
change eliminates uses of BP in the runtime to address this problem.
We have spare registers everywhere this occurs, so there's no downside
to eliminating BP.  Where possible, this uses the same new register as
the amd64p32 runtime, which doesn't use BP due to restrictions placed
on it by NaCL.

One nice side effect of this is that it will let perf/VTune unwind the
call stack even through a call to systemstack, which will let us get
really good call graphs from the garbage collector.

Change-Id: I0ffa14cb4dd2b613a7049b8ec59df37c52286212
Reviewed-on: https://go-review.googlesource.com/3390Reviewed-by: default avatarMinux Ma <minux@golang.org>
Reviewed-by: default avatarRuss Cox <rsc@golang.org>
parent 28b51184
...@@ -96,8 +96,8 @@ ok: ...@@ -96,8 +96,8 @@ ok:
CALL runtime·schedinit(SB) CALL runtime·schedinit(SB)
// create a new goroutine to start program // create a new goroutine to start program
MOVQ $runtime·main·f(SB), BP // entry MOVQ $runtime·main·f(SB), AX // entry
PUSHQ BP PUSHQ AX
PUSHQ $0 // arg size PUSHQ $0 // arg size
CALL runtime·newproc(SB) CALL runtime·newproc(SB)
POPQ AX POPQ AX
...@@ -213,8 +213,8 @@ TEXT runtime·systemstack(SB), NOSPLIT, $0-8 ...@@ -213,8 +213,8 @@ TEXT runtime·systemstack(SB), NOSPLIT, $0-8
CMPQ AX, DX CMPQ AX, DX
JEQ noswitch JEQ noswitch
MOVQ m_curg(BX), BP MOVQ m_curg(BX), R8
CMPQ AX, BP CMPQ AX, R8
JEQ switch JEQ switch
// Bad: g is not gsignal, not g0, not curg. What is it? // Bad: g is not gsignal, not g0, not curg. What is it?
...@@ -224,8 +224,8 @@ TEXT runtime·systemstack(SB), NOSPLIT, $0-8 ...@@ -224,8 +224,8 @@ TEXT runtime·systemstack(SB), NOSPLIT, $0-8
switch: switch:
// save our state in g->sched. Pretend to // save our state in g->sched. Pretend to
// be systemstack_switch if the G stack is scanned. // be systemstack_switch if the G stack is scanned.
MOVQ $runtime·systemstack_switch(SB), BP MOVQ $runtime·systemstack_switch(SB), SI
MOVQ BP, (g_sched+gobuf_pc)(AX) MOVQ SI, (g_sched+gobuf_pc)(AX)
MOVQ SP, (g_sched+gobuf_sp)(AX) MOVQ SP, (g_sched+gobuf_sp)(AX)
MOVQ AX, (g_sched+gobuf_g)(AX) MOVQ AX, (g_sched+gobuf_g)(AX)
...@@ -305,9 +305,9 @@ TEXT runtime·morestack(SB),NOSPLIT,$0-0 ...@@ -305,9 +305,9 @@ TEXT runtime·morestack(SB),NOSPLIT,$0-0
MOVQ DX, (g_sched+gobuf_ctxt)(SI) MOVQ DX, (g_sched+gobuf_ctxt)(SI)
// Call newstack on m->g0's stack. // Call newstack on m->g0's stack.
MOVQ m_g0(BX), BP MOVQ m_g0(BX), BX
MOVQ BP, g(CX) MOVQ BX, g(CX)
MOVQ (g_sched+gobuf_sp)(BP), SP MOVQ (g_sched+gobuf_sp)(BX), SP
CALL runtime·newstack(SB) CALL runtime·newstack(SB)
MOVQ $0, 0x1003 // crash if newstack returns MOVQ $0, 0x1003 // crash if newstack returns
RET RET
...@@ -619,17 +619,17 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0 ...@@ -619,17 +619,17 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0
// We get called to create new OS threads too, and those // We get called to create new OS threads too, and those
// come in on the m->g0 stack already. // come in on the m->g0 stack already.
get_tls(CX) get_tls(CX)
MOVQ g(CX), BP MOVQ g(CX), R8
MOVQ g_m(BP), BP MOVQ g_m(R8), R8
MOVQ m_g0(BP), SI MOVQ m_g0(R8), SI
MOVQ g(CX), DI MOVQ g(CX), DI
CMPQ SI, DI CMPQ SI, DI
JEQ nosave JEQ nosave
MOVQ m_gsignal(BP), SI MOVQ m_gsignal(R8), SI
CMPQ SI, DI CMPQ SI, DI
JEQ nosave JEQ nosave
MOVQ m_g0(BP), SI MOVQ m_g0(R8), SI
CALL gosave<>(SB) CALL gosave<>(SB)
MOVQ SI, g(CX) MOVQ SI, g(CX)
MOVQ (g_sched+gobuf_sp)(SI), SP MOVQ (g_sched+gobuf_sp)(SI), SP
...@@ -683,15 +683,15 @@ TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24 ...@@ -683,15 +683,15 @@ TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24
// the linker analysis by using an indirect call through AX. // the linker analysis by using an indirect call through AX.
get_tls(CX) get_tls(CX)
#ifdef GOOS_windows #ifdef GOOS_windows
MOVL $0, BP MOVL $0, BX
CMPQ CX, $0 CMPQ CX, $0
JEQ 2(PC) JEQ 2(PC)
#endif #endif
MOVQ g(CX), BP MOVQ g(CX), BX
CMPQ BP, $0 CMPQ BX, $0
JEQ needm JEQ needm
MOVQ g_m(BP), BP MOVQ g_m(BX), BX
MOVQ BP, R8 // holds oldm until end of function MOVQ BX, R8 // holds oldm until end of function
JMP havem JMP havem
needm: needm:
MOVQ $0, 0(SP) MOVQ $0, 0(SP)
...@@ -699,8 +699,8 @@ needm: ...@@ -699,8 +699,8 @@ needm:
CALL AX CALL AX
MOVQ 0(SP), R8 MOVQ 0(SP), R8
get_tls(CX) get_tls(CX)
MOVQ g(CX), BP MOVQ g(CX), BX
MOVQ g_m(BP), BP MOVQ g_m(BX), BX
// Set m->sched.sp = SP, so that if a panic happens // Set m->sched.sp = SP, so that if a panic happens
// during the function we are about to execute, it will // during the function we are about to execute, it will
...@@ -713,7 +713,7 @@ needm: ...@@ -713,7 +713,7 @@ needm:
// and then systemstack will try to use it. If we don't set it here, // and then systemstack will try to use it. If we don't set it here,
// that restored SP will be uninitialized (typically 0) and // that restored SP will be uninitialized (typically 0) and
// will not be usable. // will not be usable.
MOVQ m_g0(BP), SI MOVQ m_g0(BX), SI
MOVQ SP, (g_sched+gobuf_sp)(SI) MOVQ SP, (g_sched+gobuf_sp)(SI)
havem: havem:
...@@ -722,7 +722,7 @@ havem: ...@@ -722,7 +722,7 @@ havem:
// Save current sp in m->g0->sched.sp in preparation for // Save current sp in m->g0->sched.sp in preparation for
// switch back to m->curg stack. // switch back to m->curg stack.
// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
MOVQ m_g0(BP), SI MOVQ m_g0(BX), SI
MOVQ (g_sched+gobuf_sp)(SI), AX MOVQ (g_sched+gobuf_sp)(SI), AX
MOVQ AX, 0(SP) MOVQ AX, 0(SP)
MOVQ SP, (g_sched+gobuf_sp)(SI) MOVQ SP, (g_sched+gobuf_sp)(SI)
...@@ -742,11 +742,11 @@ havem: ...@@ -742,11 +742,11 @@ havem:
// the earlier calls. // the earlier calls.
// //
// In the new goroutine, 0(SP) holds the saved R8. // In the new goroutine, 0(SP) holds the saved R8.
MOVQ m_curg(BP), SI MOVQ m_curg(BX), SI
MOVQ SI, g(CX) MOVQ SI, g(CX)
MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
MOVQ (g_sched+gobuf_pc)(SI), BP MOVQ (g_sched+gobuf_pc)(SI), BX
MOVQ BP, -8(DI) MOVQ BX, -8(DI)
LEAQ -(8+8)(DI), SP LEAQ -(8+8)(DI), SP
MOVQ R8, 0(SP) MOVQ R8, 0(SP)
CALL runtime·cgocallbackg(SB) CALL runtime·cgocallbackg(SB)
...@@ -755,17 +755,17 @@ havem: ...@@ -755,17 +755,17 @@ havem:
// Restore g->sched (== m->curg->sched) from saved values. // Restore g->sched (== m->curg->sched) from saved values.
get_tls(CX) get_tls(CX)
MOVQ g(CX), SI MOVQ g(CX), SI
MOVQ 8(SP), BP MOVQ 8(SP), BX
MOVQ BP, (g_sched+gobuf_pc)(SI) MOVQ BX, (g_sched+gobuf_pc)(SI)
LEAQ (8+8)(SP), DI LEAQ (8+8)(SP), DI
MOVQ DI, (g_sched+gobuf_sp)(SI) MOVQ DI, (g_sched+gobuf_sp)(SI)
// Switch back to m->g0's stack and restore m->g0->sched.sp. // Switch back to m->g0's stack and restore m->g0->sched.sp.
// (Unlike m->curg, the g0 goroutine never uses sched.pc, // (Unlike m->curg, the g0 goroutine never uses sched.pc,
// so we do not have to restore it.) // so we do not have to restore it.)
MOVQ g(CX), BP MOVQ g(CX), BX
MOVQ g_m(BP), BP MOVQ g_m(BX), BX
MOVQ m_g0(BP), SI MOVQ m_g0(BX), SI
MOVQ SI, g(CX) MOVQ SI, g(CX)
MOVQ (g_sched+gobuf_sp)(SI), SP MOVQ (g_sched+gobuf_sp)(SI), SP
MOVQ 0(SP), AX MOVQ 0(SP), AX
...@@ -915,8 +915,8 @@ aes0to15: ...@@ -915,8 +915,8 @@ aes0to15:
// a page boundary, so we can load it directly. // a page boundary, so we can load it directly.
MOVOU -16(AX), X0 MOVOU -16(AX), X0
ADDQ CX, CX ADDQ CX, CX
MOVQ $masks<>(SB), BP MOVQ $masks<>(SB), AX
PAND (BP)(CX*8), X0 PAND (AX)(CX*8), X0
// scramble 3 times // scramble 3 times
AESENC X6, X0 AESENC X6, X0
...@@ -931,8 +931,8 @@ endofpage: ...@@ -931,8 +931,8 @@ endofpage:
// Then shift bytes down using pshufb. // Then shift bytes down using pshufb.
MOVOU -32(AX)(CX*1), X0 MOVOU -32(AX)(CX*1), X0
ADDQ CX, CX ADDQ CX, CX
MOVQ $shifts<>(SB), BP MOVQ $shifts<>(SB), AX
PSHUFB (BP)(CX*8), X0 PSHUFB (AX)(CX*8), X0
AESENC X6, X0 AESENC X6, X0
AESENC X7, X0 AESENC X7, X0
AESENC X7, X0 AESENC X7, X0
...@@ -1384,13 +1384,13 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 ...@@ -1384,13 +1384,13 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ SI, DI CMPQ SI, DI
JEQ allsame JEQ allsame
CMPQ BX, DX CMPQ BX, DX
MOVQ DX, BP MOVQ DX, R8
CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
CMPQ BP, $8 CMPQ R8, $8
JB small JB small
loop: loop:
CMPQ BP, $16 CMPQ R8, $16
JBE _0through16 JBE _0through16
MOVOU (SI), X0 MOVOU (SI), X0
MOVOU (DI), X1 MOVOU (DI), X1
...@@ -1400,7 +1400,7 @@ loop: ...@@ -1400,7 +1400,7 @@ loop:
JNE diff16 // branch if at least one byte is not equal JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI ADDQ $16, SI
ADDQ $16, DI ADDQ $16, DI
SUBQ $16, BP SUBQ $16, R8
JMP loop JMP loop
// AX = bit mask of differences // AX = bit mask of differences
...@@ -1415,15 +1415,15 @@ diff16: ...@@ -1415,15 +1415,15 @@ diff16:
// 0 through 16 bytes left, alen>=8, blen>=8 // 0 through 16 bytes left, alen>=8, blen>=8
_0through16: _0through16:
CMPQ BP, $8 CMPQ R8, $8
JBE _0through8 JBE _0through8
MOVQ (SI), AX MOVQ (SI), AX
MOVQ (DI), CX MOVQ (DI), CX
CMPQ AX, CX CMPQ AX, CX
JNE diff8 JNE diff8
_0through8: _0through8:
MOVQ -8(SI)(BP*1), AX MOVQ -8(SI)(R8*1), AX
MOVQ -8(DI)(BP*1), CX MOVQ -8(DI)(R8*1), CX
CMPQ AX, CX CMPQ AX, CX
JEQ allsame JEQ allsame
...@@ -1440,7 +1440,7 @@ diff8: ...@@ -1440,7 +1440,7 @@ diff8:
// 0-7 bytes in common // 0-7 bytes in common
small: small:
LEAQ (BP*8), CX // bytes left -> bits left LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64) NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ allsame JEQ allsame
...@@ -1450,7 +1450,7 @@ small: ...@@ -1450,7 +1450,7 @@ small:
MOVQ (SI), SI MOVQ (SI), SI
JMP si_finish JMP si_finish
si_high: si_high:
MOVQ -8(SI)(BP*1), SI MOVQ -8(SI)(R8*1), SI
SHRQ CX, SI SHRQ CX, SI
si_finish: si_finish:
SHLQ CX, SI SHLQ CX, SI
...@@ -1461,7 +1461,7 @@ si_finish: ...@@ -1461,7 +1461,7 @@ si_finish:
MOVQ (DI), DI MOVQ (DI), DI
JMP di_finish JMP di_finish
di_high: di_high:
MOVQ -8(DI)(BP*1), DI MOVQ -8(DI)(R8*1), DI
SHRQ CX, DI SHRQ CX, DI
di_finish: di_finish:
SHLQ CX, DI SHLQ CX, DI
......
...@@ -186,9 +186,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64 ...@@ -186,9 +186,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVQ R10, 40(SP) MOVQ R10, 40(SP)
// g = m->signal // g = m->signal
MOVQ g_m(R10), BP MOVQ g_m(R10), AX
MOVQ m_gsignal(BP), BP MOVQ m_gsignal(AX), AX
MOVQ BP, g(BX) MOVQ AX, g(BX)
MOVQ DI, 0(SP) MOVQ DI, 0(SP)
MOVQ SI, 8(SP) MOVQ SI, 8(SP)
......
...@@ -180,9 +180,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64 ...@@ -180,9 +180,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVQ R10, 40(SP) MOVQ R10, 40(SP)
// g = m->signal // g = m->signal
MOVQ g_m(R10), BP MOVQ g_m(R10), AX
MOVQ m_gsignal(BP), BP MOVQ m_gsignal(AX), AX
MOVQ BP, g(BX) MOVQ AX, g(BX)
MOVQ DI, 0(SP) MOVQ DI, 0(SP)
MOVQ SI, 8(SP) MOVQ SI, 8(SP)
......
...@@ -216,9 +216,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64 ...@@ -216,9 +216,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVQ R10, 40(SP) MOVQ R10, 40(SP)
// g = m->gsignal // g = m->gsignal
MOVQ g_m(R10), BP MOVQ g_m(R10), AX
MOVQ m_gsignal(BP), BP MOVQ m_gsignal(AX), AX
MOVQ BP, g(BX) MOVQ AX, g(BX)
MOVQ DI, 0(SP) MOVQ DI, 0(SP)
MOVQ SI, 8(SP) MOVQ SI, 8(SP)
......
...@@ -236,9 +236,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64 ...@@ -236,9 +236,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVQ R10, 40(SP) MOVQ R10, 40(SP)
// g = m->signal // g = m->signal
MOVQ g_m(R10), BP MOVQ g_m(R10), AX
MOVQ m_gsignal(BP), BP MOVQ m_gsignal(AX), AX
MOVQ BP, g(BX) MOVQ AX, g(BX)
MOVQ DI, 0(SP) MOVQ DI, 0(SP)
MOVQ SI, 8(SP) MOVQ SI, 8(SP)
......
...@@ -227,9 +227,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64 ...@@ -227,9 +227,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVQ R10, 40(SP) MOVQ R10, 40(SP)
// g = m->signal // g = m->signal
MOVQ g_m(R10), BP MOVQ g_m(R10), AX
MOVQ m_gsignal(BP), BP MOVQ m_gsignal(AX), AX
MOVQ BP, g(BX) MOVQ AX, g(BX)
MOVQ DI, 0(SP) MOVQ DI, 0(SP)
MOVQ SI, 8(SP) MOVQ SI, 8(SP)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment