Commit f11e4eb5 authored by Austin Clements's avatar Austin Clements

runtime: shrink stacks during concurrent mark

Currently we shrink stacks during STW mark termination because it used
to be unsafe to shrink them concurrently. For some programs, this
significantly increases pause time: stack shrinking costs ~5ms/MB
copied plus 2µs/shrink.

Now that we've made it safe to shrink a stack without the world being
stopped, shrink them during the concurrent mark phase.

This reduces the STW time in the program from issue #12967 by an order
of magnitude and brings it from over the 10ms goal to well under:

name           old 95%ile-markTerm-time  new 95%ile-markTerm-time  delta
Stackshrink-4               23.8ms ±60%               1.80ms ±39%  -92.44%  (p=0.008 n=5+5)

Fixes #12967.

This slows down the go1 and garbage benchmarks overall by < 0.5%.

name              old time/op  new time/op  delta
XBenchGarbage-12  2.48ms ± 1%  2.49ms ± 1%  +0.45%  (p=0.005 n=25+21)

name                      old time/op    new time/op    delta
BinaryTree17-12              2.93s ± 2%     2.97s ± 2%  +1.34%  (p=0.002 n=19+20)
Fannkuch11-12                2.51s ± 1%     2.59s ± 0%  +3.09%  (p=0.000 n=18+18)
FmtFprintfEmpty-12          51.1ns ± 2%    51.5ns ± 1%    ~     (p=0.280 n=20+17)
FmtFprintfString-12          175ns ± 1%     169ns ± 1%  -3.01%  (p=0.000 n=20+20)
FmtFprintfInt-12             160ns ± 1%     160ns ± 0%  +0.53%  (p=0.000 n=20+20)
FmtFprintfIntInt-12          265ns ± 0%     266ns ± 1%  +0.59%  (p=0.000 n=20+20)
FmtFprintfPrefixedInt-12     237ns ± 1%     238ns ± 1%  +0.44%  (p=0.000 n=20+20)
FmtFprintfFloat-12           326ns ± 1%     341ns ± 1%  +4.55%  (p=0.000 n=20+19)
FmtManyArgs-12              1.01µs ± 0%    1.02µs ± 0%  +0.43%  (p=0.000 n=20+19)
GobDecode-12                8.41ms ± 1%    8.30ms ± 2%  -1.22%  (p=0.000 n=20+19)
GobEncode-12                6.66ms ± 1%    6.68ms ± 0%  +0.30%  (p=0.000 n=18+19)
Gzip-12                      322ms ± 1%     322ms ± 1%    ~     (p=1.000 n=20+20)
Gunzip-12                   42.8ms ± 0%    42.9ms ± 0%    ~     (p=0.174 n=20+20)
HTTPClientServer-12         69.7µs ± 1%    70.6µs ± 1%  +1.20%  (p=0.000 n=20+20)
JSONEncode-12               16.8ms ± 0%    16.8ms ± 1%    ~     (p=0.154 n=19+19)
JSONDecode-12               65.1ms ± 0%    65.3ms ± 1%  +0.34%  (p=0.003 n=20+20)
Mandelbrot200-12            3.93ms ± 0%    3.92ms ± 0%    ~     (p=0.396 n=19+20)
GoParse-12                  3.66ms ± 1%    3.65ms ± 1%    ~     (p=0.117 n=16+18)
RegexpMatchEasy0_32-12      85.0ns ± 2%    85.5ns ± 2%    ~     (p=0.143 n=20+20)
RegexpMatchEasy0_1K-12       267ns ± 1%     267ns ± 1%    ~     (p=0.867 n=20+17)
RegexpMatchEasy1_32-12      83.3ns ± 2%    83.8ns ± 1%    ~     (p=0.068 n=20+20)
RegexpMatchEasy1_1K-12       432ns ± 1%     432ns ± 1%    ~     (p=0.804 n=20+19)
RegexpMatchMedium_32-12      133ns ± 0%     133ns ± 0%    ~     (p=1.000 n=20+20)
RegexpMatchMedium_1K-12     40.3µs ± 1%    40.4µs ± 1%    ~     (p=0.319 n=20+19)
RegexpMatchHard_32-12       2.10µs ± 1%    2.10µs ± 1%    ~     (p=0.723 n=20+18)
RegexpMatchHard_1K-12       63.0µs ± 0%    63.0µs ± 0%    ~     (p=0.158 n=19+17)
Revcomp-12                   461ms ± 1%     476ms ± 8%  +3.29%  (p=0.002 n=20+20)
Template-12                 80.1ms ± 1%    79.3ms ± 1%  -1.00%  (p=0.000 n=20+20)
TimeParse-12                 360ns ± 0%     360ns ± 0%    ~     (p=0.802 n=18+19)
TimeFormat-12                374ns ± 1%     372ns ± 0%  -0.77%  (p=0.000 n=20+19)
[Geo mean]                  61.8µs         62.0µs       +0.40%

Change-Id: Ib60cd46b7a4987e07670eb271d22f6cee5802842
Reviewed-on: https://go-review.googlesource.com/20044Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent c14d25c6
...@@ -144,11 +144,10 @@ func markroot(gcw *gcWork, i uint32) { ...@@ -144,11 +144,10 @@ func markroot(gcw *gcWork, i uint32) {
gp.waitsince = work.tstart gp.waitsince = work.tstart
} }
// Shrink a stack if not much of it is being used but not in the scan phase. if gcphase == _GCmarktermination && status == _Gdead {
if gcphase == _GCmarktermination { // Free gp's stack if necessary. Only do this
// Shrink during STW GCmarktermination phase thus avoiding // during mark termination because otherwise
// complications introduced by shrinking during // _Gdead may be transient.
// non-STW phases.
shrinkstack(gp) shrinkstack(gp)
} }
...@@ -599,6 +598,13 @@ func scanstack(gp *g) { ...@@ -599,6 +598,13 @@ func scanstack(gp *g) {
throw("can't scan gchelper stack") throw("can't scan gchelper stack")
} }
// Shrink the stack if not much of it is being used. During
// concurrent GC, we can do this during concurrent mark.
if !work.markrootDone {
shrinkstack(gp)
}
// Prepare for stack barrier insertion/removal.
var sp, barrierOffset, nextBarrier uintptr var sp, barrierOffset, nextBarrier uintptr
if gp.syscallsp != 0 { if gp.syscallsp != 0 {
sp = gp.syscallsp sp = gp.syscallsp
...@@ -647,6 +653,7 @@ func scanstack(gp *g) { ...@@ -647,6 +653,7 @@ func scanstack(gp *g) {
throw("scanstack in wrong phase") throw("scanstack in wrong phase")
} }
// Scan the stack.
var cache pcvalueCache var cache pcvalueCache
gcw := &getg().m.p.ptr().gcw gcw := &getg().m.p.ptr().gcw
n := 0 n := 0
......
...@@ -1069,7 +1069,8 @@ func gostartcallfn(gobuf *gobuf, fv *funcval) { ...@@ -1069,7 +1069,8 @@ func gostartcallfn(gobuf *gobuf, fv *funcval) {
// Called at garbage collection time. // Called at garbage collection time.
// gp must be stopped, but the world need not be. // gp must be stopped, but the world need not be.
func shrinkstack(gp *g) { func shrinkstack(gp *g) {
if readgstatus(gp) == _Gdead { gstatus := readgstatus(gp)
if gstatus&^_Gscan == _Gdead {
if gp.stack.lo != 0 { if gp.stack.lo != 0 {
// Free whole stack - it will get reallocated // Free whole stack - it will get reallocated
// if G is used again. // if G is used again.
...@@ -1084,6 +1085,9 @@ func shrinkstack(gp *g) { ...@@ -1084,6 +1085,9 @@ func shrinkstack(gp *g) {
if gp.stack.lo == 0 { if gp.stack.lo == 0 {
throw("missing stack in shrinkstack") throw("missing stack in shrinkstack")
} }
if gstatus&_Gscan == 0 {
throw("bad status in shrinkstack")
}
if debug.gcshrinkstackoff > 0 { if debug.gcshrinkstackoff > 0 {
return return
...@@ -1119,9 +1123,7 @@ func shrinkstack(gp *g) { ...@@ -1119,9 +1123,7 @@ func shrinkstack(gp *g) {
print("shrinking stack ", oldsize, "->", newsize, "\n") print("shrinking stack ", oldsize, "->", newsize, "\n")
} }
oldstatus := casgcopystack(gp)
copystack(gp, newsize, false) copystack(gp, newsize, false)
casgstatus(gp, _Gcopystack, oldstatus)
} }
// freeStackSpans frees unused stack spans at the end of GC. // freeStackSpans frees unused stack spans at the end of GC.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment