Commit a158382b authored by Ian Lance Taylor's avatar Ian Lance Taylor

runtime: call amd64 VDSO entry points on large stack

If the Linux kernel was built with CONFIG_OPTIMIZE_INLINING=n and was
built with hardening options turned on, GCC will insert a stack probe
in the VDSO function that requires a full page of stack space.
The stack probe can corrupt memory if another thread is using it.
Avoid sporadic crashes by calling the VDSO on the g0 or gsignal stack.

While we're at it, align the stack as C code expects. We've been
getting away with a misaligned stack, but it's possible that the VDSO
code will change in the future to break that assumption.

Benchmarks show a 11% hit on time.Now, but it's only 6ns.

name                      old time/op  new time/op  delta
AfterFunc-12              1.66ms ± 0%  1.66ms ± 1%     ~     (p=0.905 n=9+10)
After-12                  1.90ms ± 6%  1.86ms ± 0%   -2.05%  (p=0.012 n=10+8)
Stop-12                    113µs ± 3%   115µs ± 2%   +1.60%  (p=0.017 n=9+10)
SimultaneousAfterFunc-12   145µs ± 1%   144µs ± 0%   -0.68%  (p=0.002 n=10+8)
StartStop-12              39.5µs ± 3%  40.4µs ± 5%   +2.19%  (p=0.023 n=10+10)
Reset-12                  10.2µs ± 0%  10.4µs ± 0%   +2.45%  (p=0.000 n=10+9)
Sleep-12                   190µs ± 1%   190µs ± 1%     ~     (p=0.971 n=10+10)
Ticker-12                 4.68ms ± 2%  4.64ms ± 2%   -0.83%  (p=0.043 n=9+10)
Now-12                    48.4ns ±11%  54.0ns ±11%  +11.42%  (p=0.017 n=10+10)
NowUnixNano-12            48.5ns ±13%  56.9ns ± 8%  +17.30%  (p=0.000 n=10+10)
Format-12                  489ns ±11%   504ns ± 6%     ~     (p=0.289 n=10+10)
FormatNow-12               436ns ±23%   480ns ±13%  +10.25%  (p=0.026 n=9+10)
MarshalJSON-12             656ns ±14%   587ns ±24%     ~     (p=0.063 n=10+10)
MarshalText-12             647ns ± 7%   638ns ± 9%     ~     (p=0.516 n=10+10)
Parse-12                   348ns ± 8%   328ns ± 9%   -5.66%  (p=0.030 n=10+10)
ParseDuration-12           136ns ± 9%   140ns ±11%     ~     (p=0.425 n=10+10)
Hour-12                   14.8ns ± 6%  15.6ns ±11%     ~     (p=0.085 n=10+10)
Second-12                 14.0ns ± 6%  14.3ns ±12%     ~     (p=0.443 n=10+10)
Year-12                   32.4ns ±11%  33.4ns ± 6%     ~     (p=0.492 n=10+10)
Day-12                    41.5ns ± 9%  42.3ns ±12%     ~     (p=0.239 n=10+10)

Fixes #20427

Change-Id: Ia395cbb863215f4499b8e7ef95f4b99f51090911
Reviewed-on: https://go-review.googlesource.com/76990Reviewed-by: default avatarAustin Clements <austin@google.com>
parent fcee1897
...@@ -203,17 +203,34 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-16 ...@@ -203,17 +203,34 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-16
RET RET
// func walltime() (sec int64, nsec int32) // func walltime() (sec int64, nsec int32)
TEXT runtime·walltime(SB), NOSPLIT, $16 TEXT runtime·walltime(SB), NOSPLIT, $0-12
// We don't know how much stack space the VDSO code will need,
// so switch to g0.
MOVL SP, BP // Save old SP; BP unchanged by C code.
get_tls(CX)
MOVL g(CX), AX
MOVL g_m(AX), CX
MOVL m_curg(CX), DX
CMPL AX, DX // Only switch if on curg.
JNE noswitch
MOVL m_g0(CX), DX
MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
noswitch:
SUBL $16, SP // Space for results
ANDL $~15, SP // Align for C code
// Stack layout, depending on call path: // Stack layout, depending on call path:
// x(SP) vDSO INVOKE_SYSCALL // x(SP) vDSO INVOKE_SYSCALL
// 12 ts.tv_nsec ts.tv_nsec // 12 ts.tv_nsec ts.tv_nsec
// 8 ts.tv_sec ts.tv_sec // 8 ts.tv_sec ts.tv_sec
// 4 &ts - // 4 &ts -
// 0 CLOCK_<id> - // 0 CLOCK_<id> -
//
// If we take the vDSO path, we're calling a function with gcc calling convention.
// We're guaranteed 128 bytes on entry. We've taken 16, and the call uses another 4,
// leaving 108 for __vdso_clock_gettime to use.
MOVL runtime·__vdso_clock_gettime_sym(SB), AX MOVL runtime·__vdso_clock_gettime_sym(SB), AX
CMPL AX, $0 CMPL AX, $0
JEQ fallback JEQ fallback
...@@ -234,6 +251,8 @@ finish: ...@@ -234,6 +251,8 @@ finish:
MOVL 8(SP), AX // sec MOVL 8(SP), AX // sec
MOVL 12(SP), BX // nsec MOVL 12(SP), BX // nsec
MOVL BP, SP // Restore real SP
// sec is in AX, nsec in BX // sec is in AX, nsec in BX
MOVL AX, sec_lo+0(FP) MOVL AX, sec_lo+0(FP)
MOVL $0, sec_hi+4(FP) MOVL $0, sec_hi+4(FP)
...@@ -242,8 +261,26 @@ finish: ...@@ -242,8 +261,26 @@ finish:
// int64 nanotime(void) so really // int64 nanotime(void) so really
// void nanotime(int64 *nsec) // void nanotime(int64 *nsec)
TEXT runtime·nanotime(SB), NOSPLIT, $16 TEXT runtime·nanotime(SB), NOSPLIT, $0-8
// See comments above in walltime() about stack space usage and layout. // Switch to g0 stack. See comment above in runtime·walltime.
MOVL SP, BP // Save old SP; BP unchanged by C code.
get_tls(CX)
MOVL g(CX), AX
MOVL g_m(AX), CX
MOVL m_curg(CX), DX
CMPL AX, DX // Only switch if on curg.
JNE noswitch
MOVL m_g0(CX), DX
MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
noswitch:
SUBL $16, SP // Space for results
ANDL $~15, SP // Align for C code
MOVL runtime·__vdso_clock_gettime_sym(SB), AX MOVL runtime·__vdso_clock_gettime_sym(SB), AX
CMPL AX, $0 CMPL AX, $0
JEQ fallback JEQ fallback
...@@ -264,6 +301,8 @@ finish: ...@@ -264,6 +301,8 @@ finish:
MOVL 8(SP), AX // sec MOVL 8(SP), AX // sec
MOVL 12(SP), BX // nsec MOVL 12(SP), BX // nsec
MOVL BP, SP // Restore real SP
// sec is in AX, nsec in BX // sec is in AX, nsec in BX
// convert to DX:AX nsec // convert to DX:AX nsec
MOVL $1000000000, CX MOVL $1000000000, CX
......
...@@ -181,11 +181,31 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-28 ...@@ -181,11 +181,31 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-28
RET RET
// func walltime() (sec int64, nsec int32) // func walltime() (sec int64, nsec int32)
TEXT runtime·walltime(SB),NOSPLIT,$16 TEXT runtime·walltime(SB),NOSPLIT,$0-12
// Be careful. We're calling a function with gcc calling convention here. // We don't know how much stack space the VDSO code will need,
// We're guaranteed 128 bytes on entry, and we've taken 16, and the // so switch to g0.
// call uses another 8. // In particular, a kernel configured with CONFIG_OPTIMIZE_INLINING=n
// That leaves 104 for the gettime code to use. Hope that's enough! // and hardening can use a full page of stack space in gettime_sym
// due to stack probes inserted to avoid stack/heap collisions.
// See issue #20427.
MOVQ SP, BP // Save old SP; BP unchanged by C code.
get_tls(CX)
MOVQ g(CX), AX
MOVQ g_m(AX), CX
MOVQ m_curg(CX), DX
CMPQ AX, DX // Only switch if on curg.
JNE noswitch
MOVQ m_g0(CX), DX
MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
noswitch:
SUBQ $16, SP // Space for results
ANDQ $~15, SP // Align for C code
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0 CMPQ AX, $0
JEQ fallback JEQ fallback
...@@ -194,6 +214,7 @@ TEXT runtime·walltime(SB),NOSPLIT,$16 ...@@ -194,6 +214,7 @@ TEXT runtime·walltime(SB),NOSPLIT,$16
CALL AX CALL AX
MOVQ 0(SP), AX // sec MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec MOVQ 8(SP), DX // nsec
MOVQ BP, SP // Restore real SP
MOVQ AX, sec+0(FP) MOVQ AX, sec+0(FP)
MOVL DX, nsec+8(FP) MOVL DX, nsec+8(FP)
RET RET
...@@ -205,13 +226,31 @@ fallback: ...@@ -205,13 +226,31 @@ fallback:
MOVQ 0(SP), AX // sec MOVQ 0(SP), AX // sec
MOVL 8(SP), DX // usec MOVL 8(SP), DX // usec
IMULQ $1000, DX IMULQ $1000, DX
MOVQ BP, SP // Restore real SP
MOVQ AX, sec+0(FP) MOVQ AX, sec+0(FP)
MOVL DX, nsec+8(FP) MOVL DX, nsec+8(FP)
RET RET
TEXT runtime·nanotime(SB),NOSPLIT,$16 TEXT runtime·nanotime(SB),NOSPLIT,$0-8
// Duplicate time.now here to avoid using up precious stack space. // Switch to g0 stack. See comment above in runtime·walltime.
// See comment above in time.now.
MOVQ SP, BP // Save old SP; BX unchanged by C code.
get_tls(CX)
MOVQ g(CX), AX
MOVQ g_m(AX), CX
MOVQ m_curg(CX), DX
CMPQ AX, DX // Only switch if on curg.
JNE noswitch
MOVQ m_g0(CX), DX
MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
noswitch:
SUBQ $16, SP // Space for results
ANDQ $~15, SP // Align for C code
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0 CMPQ AX, $0
JEQ fallback JEQ fallback
...@@ -220,6 +259,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$16 ...@@ -220,6 +259,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$16
CALL AX CALL AX
MOVQ 0(SP), AX // sec MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec MOVQ 8(SP), DX // nsec
MOVQ BP, SP // Restore real SP
// sec is in AX, nsec in DX // sec is in AX, nsec in DX
// return nsec in AX // return nsec in AX
IMULQ $1000000000, AX IMULQ $1000000000, AX
...@@ -233,6 +273,7 @@ fallback: ...@@ -233,6 +273,7 @@ fallback:
CALL AX CALL AX
MOVQ 0(SP), AX // sec MOVQ 0(SP), AX // sec
MOVL 8(SP), DX // usec MOVL 8(SP), DX // usec
MOVQ BP, SP // Restore real SP
IMULQ $1000, DX IMULQ $1000, DX
// sec is in AX, nsec in DX // sec is in AX, nsec in DX
// return nsec in AX // return nsec in AX
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment