Commit 01826280 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: refactor helpgc functionality in preparation for parallel GC

Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064
parent b554fb91
...@@ -414,7 +414,8 @@ enum ...@@ -414,7 +414,8 @@ enum
void runtime·MProf_Malloc(void*, uintptr); void runtime·MProf_Malloc(void*, uintptr);
void runtime·MProf_Free(void*, uintptr); void runtime·MProf_Free(void*, uintptr);
void runtime·MProf_GC(void); void runtime·MProf_GC(void);
int32 runtime·helpgc(bool*); int32 runtime·gcprocs(void);
void runtime·helpgc(int32 nproc);
void runtime·gchelper(void); void runtime·gchelper(void);
bool runtime·getfinalizer(void *p, bool del, void (**fn)(void*), int32 *nret); bool runtime·getfinalizer(void *p, bool del, void (**fn)(void*), int32 *nret);
......
...@@ -366,7 +366,6 @@ debug_scanblock(byte *b, int64 n) ...@@ -366,7 +366,6 @@ debug_scanblock(byte *b, int64 n)
if(s == nil) if(s == nil)
continue; continue;
p = (byte*)((uintptr)s->start<<PageShift); p = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass == 0) { if(s->sizeclass == 0) {
obj = p; obj = p;
...@@ -925,7 +924,6 @@ runtime·gc(int32 force) ...@@ -925,7 +924,6 @@ runtime·gc(int32 force)
int64 t0, t1, t2, t3; int64 t0, t1, t2, t3;
uint64 heap0, heap1, obj0, obj1; uint64 heap0, heap1, obj0, obj1;
byte *p; byte *p;
bool extra;
GCStats stats; GCStats stats;
// The gc is turned off (via enablegc) until // The gc is turned off (via enablegc) until
...@@ -966,18 +964,21 @@ runtime·gc(int32 force) ...@@ -966,18 +964,21 @@ runtime·gc(int32 force)
m->gcing = 1; m->gcing = 1;
runtime·stoptheworld(); runtime·stoptheworld();
heap0 = 0;
obj0 = 0;
if(gctrace) {
cachestats(nil); cachestats(nil);
heap0 = mstats.heap_alloc; heap0 = mstats.heap_alloc;
obj0 = mstats.nmalloc - mstats.nfree; obj0 = mstats.nmalloc - mstats.nfree;
}
runtime·lock(&work.markgate); runtime·lock(&work.markgate);
runtime·lock(&work.sweepgate); runtime·lock(&work.sweepgate);
extra = false; work.nproc = runtime·gcprocs();
work.nproc = 1; if(work.nproc > 1) {
if(runtime·gomaxprocs > 1 && runtime·ncpu > 1) {
runtime·noteclear(&work.alldone); runtime·noteclear(&work.alldone);
work.nproc += runtime·helpgc(&extra); runtime·helpgc(work.nproc);
} }
work.nwait = 0; work.nwait = 0;
work.ndone = 0; work.ndone = 0;
...@@ -1036,15 +1037,7 @@ runtime·gc(int32 force) ...@@ -1036,15 +1037,7 @@ runtime·gc(int32 force)
runtime·MProf_GC(); runtime·MProf_GC();
runtime·semrelease(&runtime·worldsema); runtime·semrelease(&runtime·worldsema);
runtime·starttheworld();
// If we could have used another helper proc, start one now,
// in the hope that it will be available next time.
// It would have been even better to start it before the collection,
// but doing so requires allocating memory, so it's tricky to
// coordinate. This lazy approach works out in practice:
// we don't mind if the first couple gc rounds don't have quite
// the maximum number of procs.
runtime·starttheworld(extra);
// give the queued finalizers, if any, a chance to run // give the queued finalizers, if any, a chance to run
if(finq != nil) if(finq != nil)
...@@ -1068,7 +1061,7 @@ runtime·ReadMemStats(MStats *stats) ...@@ -1068,7 +1061,7 @@ runtime·ReadMemStats(MStats *stats)
*stats = mstats; *stats = mstats;
m->gcing = 0; m->gcing = 0;
runtime·semrelease(&runtime·worldsema); runtime·semrelease(&runtime·worldsema);
runtime·starttheworld(false); runtime·starttheworld();
} }
static void static void
......
...@@ -355,7 +355,7 @@ func Stack(b Slice, all bool) (n int32) { ...@@ -355,7 +355,7 @@ func Stack(b Slice, all bool) (n int32) {
if(all) { if(all) {
m->gcing = 0; m->gcing = 0;
runtime·semrelease(&runtime·worldsema); runtime·semrelease(&runtime·worldsema);
runtime·starttheworld(false); runtime·starttheworld();
} }
} }
...@@ -398,7 +398,7 @@ func GoroutineProfile(b Slice) (n int32, ok bool) { ...@@ -398,7 +398,7 @@ func GoroutineProfile(b Slice) (n int32, ok bool) {
m->gcing = 0; m->gcing = 0;
runtime·semrelease(&runtime·worldsema); runtime·semrelease(&runtime·worldsema);
runtime·starttheworld(false); runtime·starttheworld();
} }
} }
...@@ -646,35 +646,38 @@ top: ...@@ -646,35 +646,38 @@ top:
} }
int32 int32
runtime·helpgc(bool *extra) runtime·gcprocs(void)
{ {
M *mp; int32 n;
int32 n, max;
// Figure out how many CPUs to use. // Figure out how many CPUs to use during GC.
// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
max = runtime·gomaxprocs; n = runtime·gomaxprocs;
if(max > runtime·ncpu) if(n > runtime·ncpu)
max = runtime·ncpu; n = runtime·ncpu;
if(max > MaxGcproc) if(n > MaxGcproc)
max = MaxGcproc; n = MaxGcproc;
if(n > runtime·sched.mwait+1) // one M is currently running
n = runtime·sched.mwait+1;
return n;
}
// We're going to use one CPU no matter what. void
// Figure out the max number of additional CPUs. runtime·helpgc(int32 nproc)
max--; {
M *mp;
int32 n;
runtime·lock(&runtime·sched); runtime·lock(&runtime·sched);
n = 0; for(n = 1; n < nproc; n++) { // one M is currently running
while(n < max && (mp = mget(nil)) != nil) { mp = mget(nil);
n++; if(mp == nil)
runtime·throw("runtime·gcprocs inconsistency");
mp->helpgc = 1; mp->helpgc = 1;
mp->waitnextg = 0; mp->waitnextg = 0;
runtime·notewakeup(&mp->havenextg); runtime·notewakeup(&mp->havenextg);
} }
runtime·unlock(&runtime·sched); runtime·unlock(&runtime·sched);
if(extra)
*extra = n != max;
return n;
} }
void void
...@@ -714,18 +717,30 @@ runtime·stoptheworld(void) ...@@ -714,18 +717,30 @@ runtime·stoptheworld(void)
} }
void void
runtime·starttheworld(bool extra) runtime·starttheworld(void)
{ {
M *m; M *m;
int32 max;
// Figure out how many CPUs GC could possibly use.
max = runtime·gomaxprocs;
if(max > runtime·ncpu)
max = runtime·ncpu;
if(max > MaxGcproc)
max = MaxGcproc;
schedlock(); schedlock();
runtime·gcwaiting = 0; runtime·gcwaiting = 0;
setmcpumax(runtime·gomaxprocs); setmcpumax(runtime·gomaxprocs);
matchmg(); matchmg();
if(extra && canaddmcpu()) { if(runtime·gcprocs() < max && canaddmcpu()) {
// Start a new m that will (we hope) be idle // If GC could have used another helper proc, start one now,
// and so available to help when the next // in the hope that it will be available next time.
// garbage collection happens. // It would have been even better to start it before the collection,
// but doing so requires allocating memory, so it's tricky to
// coordinate. This lazy approach works out in practice:
// we don't mind if the first couple gc rounds don't have quite
// the maximum number of procs.
// canaddmcpu above did mcpu++ // canaddmcpu above did mcpu++
// (necessary, because m will be doing various // (necessary, because m will be doing various
// initialization work so is definitely running), // initialization work so is definitely running),
......
...@@ -636,7 +636,7 @@ int64 runtime·cputicks(void); ...@@ -636,7 +636,7 @@ int64 runtime·cputicks(void);
#pragma varargck type "S" String #pragma varargck type "S" String
void runtime·stoptheworld(void); void runtime·stoptheworld(void);
void runtime·starttheworld(bool); void runtime·starttheworld(void);
extern uint32 runtime·worldsema; extern uint32 runtime·worldsema;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment