Commit d839a809 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: make GC stats per-M

This is factored out part of:
https://golang.org/cl/5279048/
(Parallel GC)

benchmark                             old ns/op    new ns/op    delta
garbage.BenchmarkParser              3999106750   3975026500   -0.60%
garbage.BenchmarkParser-2            3720553750   3719196500   -0.04%
garbage.BenchmarkParser-4            3502857000   3474980500   -0.80%
garbage.BenchmarkParser-8            3375448000   3341310500   -1.01%
garbage.BenchmarkParserLastPause      329401000    324097000   -1.61%
garbage.BenchmarkParserLastPause-2    208953000    214222000   +2.52%
garbage.BenchmarkParserLastPause-4    110933000    111656000   +0.65%
garbage.BenchmarkParserLastPause-8     71969000     78230000   +8.70%
garbage.BenchmarkParserPause          230808842    197237400  -14.55%
garbage.BenchmarkParserPause-2        123674365    125197595   +1.23%
garbage.BenchmarkParserPause-4         80518525     85710333   +6.45%
garbage.BenchmarkParserPause-8         58310243     56940512   -2.35%
garbage.BenchmarkTree2                 31471700     31289400   -0.58%
garbage.BenchmarkTree2-2               21536800     21086300   -2.09%
garbage.BenchmarkTree2-4               11074700     10880000   -1.76%
garbage.BenchmarkTree2-8                7568600      7351400   -2.87%
garbage.BenchmarkTree2LastPause       314664000    312840000   -0.58%
garbage.BenchmarkTree2LastPause-2     215319000    210815000   -2.09%
garbage.BenchmarkTree2LastPause-4     110698000    108751000   -1.76%
garbage.BenchmarkTree2LastPause-8      75635000     73463000   -2.87%
garbage.BenchmarkTree2Pause           174280857    173147571   -0.65%
garbage.BenchmarkTree2Pause-2         131332714    129665761   -1.27%
garbage.BenchmarkTree2Pause-4          93803095     93422904   -0.41%
garbage.BenchmarkTree2Pause-8          86242333     85146761   -1.27%

R=rsc
CC=golang-dev
https://golang.org/cl/5987045
parent 77e1227a
......@@ -67,9 +67,6 @@ enum {
//
uint32 runtime·worldsema = 1;
// TODO: Make these per-M.
static uint64 nhandoff;
static int32 gctrace;
typedef struct Workbuf Workbuf;
......@@ -529,13 +526,17 @@ getfull(Workbuf *b)
}
if(work.nwait == work.nproc)
return nil;
if(i < 10)
if(i < 10) {
m->gcstats.nprocyield++;
runtime·procyield(20);
else if(i < 20)
} else if(i < 20) {
m->gcstats.nosyield++;
runtime·osyield();
else
} else {
m->gcstats.nsleep++;
runtime·usleep(100);
}
}
}
static Workbuf*
......@@ -550,7 +551,8 @@ handoff(Workbuf *b)
b->nobj -= n;
b1->nobj = n;
runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
nhandoff += n;
m->gcstats.nhandoff++;
m->gcstats.nhandoffcnt += n;
// Put b on full list - let first half of b get stolen.
runtime·lock(&work.fmu);
......@@ -852,20 +854,30 @@ stealcache(void)
}
static void
cachestats(void)
cachestats(GCStats *stats)
{
M *m;
MCache *c;
int32 i;
uint64 stacks_inuse;
uint64 stacks_sys;
uint64 *src, *dst;
if(stats)
runtime·memclr((byte*)stats, sizeof(*stats));
stacks_inuse = 0;
stacks_sys = 0;
for(m=runtime·allm; m; m=m->alllink) {
runtime·purgecachedstats(m);
stacks_inuse += m->stackalloc->inuse;
stacks_sys += m->stackalloc->sys;
if(stats) {
src = (uint64*)&m->gcstats;
dst = (uint64*)stats;
for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
dst[i] += src[i];
runtime·memclr((byte*)&m->gcstats, sizeof(m->gcstats));
}
c = m->mcache;
for(i=0; i<nelem(c->local_by_size); i++) {
mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
......@@ -885,6 +897,7 @@ runtime·gc(int32 force)
uint64 heap0, heap1, obj0, obj1;
byte *p;
bool extra;
GCStats stats;
// The gc is turned off (via enablegc) until
// the bootstrap has completed.
......@@ -920,12 +933,11 @@ runtime·gc(int32 force)
}
t0 = runtime·nanotime();
nhandoff = 0;
m->gcing = 1;
runtime·stoptheworld();
cachestats();
cachestats(nil);
heap0 = mstats.heap_alloc;
obj0 = mstats.nmalloc - mstats.nfree;
......@@ -955,13 +967,13 @@ runtime·gc(int32 force)
t2 = runtime·nanotime();
stealcache();
cachestats();
cachestats(&stats);
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
m->gcing = 0;
m->locks++; // disable gc during the mallocs in newproc
if(finq != nil) {
m->locks++; // disable gc during the mallocs in newproc
// kick off or wake up goroutine to run queued finalizers
if(fing == nil)
fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runtime·gc);
......@@ -969,10 +981,9 @@ runtime·gc(int32 force)
fingwait = 0;
runtime·ready(fing);
}
}
m->locks--;
}
cachestats();
heap1 = mstats.heap_alloc;
obj1 = mstats.nmalloc - mstats.nfree;
......@@ -985,11 +996,13 @@ runtime·gc(int32 force)
runtime·printf("pause %D\n", t3-t0);
if(gctrace) {
runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D handoff\n",
runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
" %D(%D) handoff, %D/%D/%D yields\n",
mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
heap0>>20, heap1>>20, obj0, obj1,
mstats.nmalloc, mstats.nfree,
nhandoff);
stats.nhandoff, stats.nhandoffcnt,
stats.nprocyield, stats.nosyield, stats.nsleep);
}
runtime·MProf_GC();
......@@ -1022,7 +1035,7 @@ runtime·ReadMemStats(MStats *stats)
runtime·semacquire(&runtime·worldsema);
m->gcing = 1;
runtime·stoptheworld();
cachestats();
cachestats(nil);
*stats = mstats;
m->gcing = 0;
runtime·semrelease(&runtime·worldsema);
......
......@@ -71,6 +71,7 @@ typedef struct Complex128 Complex128;
typedef struct WinCall WinCall;
typedef struct Timers Timers;
typedef struct Timer Timer;
typedef struct GCStats GCStats;
/*
* per-cpu declaration.
......@@ -166,6 +167,16 @@ struct Gobuf
byte* pc;
G* g;
};
struct GCStats
{
// the struct must consist of only uint64's,
// because it is casted to uint64[].
uint64 nhandoff;
uint64 nhandoffcnt;
uint64 nprocyield;
uint64 nosyield;
uint64 nsleep;
};
struct G
{
byte* stackguard; // cannot move - also known to linker, libmach, runtime/cgo
......@@ -243,6 +254,7 @@ struct M
uintptr waitsema; // semaphore for parking on locks
uint32 waitsemacount;
uint32 waitsemalock;
GCStats gcstats;
#ifdef GOOS_windows
void* thread; // thread handle
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment