Commit 779c45a5 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: improved scheduler

Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062
parent d17506e5
...@@ -1633,20 +1633,12 @@ runtime·gchelper(void) ...@@ -1633,20 +1633,12 @@ runtime·gchelper(void)
// extra memory used). // extra memory used).
static int32 gcpercent = GcpercentUnknown; static int32 gcpercent = GcpercentUnknown;
static void
stealcache(void)
{
M *mp;
for(mp=runtime·allm; mp; mp=mp->alllink)
runtime·MCache_ReleaseAll(mp->mcache);
}
static void static void
cachestats(GCStats *stats) cachestats(GCStats *stats)
{ {
M *mp; M *mp;
MCache *c; MCache *c;
P *p, **pp;
int32 i; int32 i;
uint64 stacks_inuse; uint64 stacks_inuse;
uint64 *src, *dst; uint64 *src, *dst;
...@@ -1655,8 +1647,6 @@ cachestats(GCStats *stats) ...@@ -1655,8 +1647,6 @@ cachestats(GCStats *stats)
runtime·memclr((byte*)stats, sizeof(*stats)); runtime·memclr((byte*)stats, sizeof(*stats));
stacks_inuse = 0; stacks_inuse = 0;
for(mp=runtime·allm; mp; mp=mp->alllink) { for(mp=runtime·allm; mp; mp=mp->alllink) {
c = mp->mcache;
runtime·purgecachedstats(c);
stacks_inuse += mp->stackinuse*FixedStack; stacks_inuse += mp->stackinuse*FixedStack;
if(stats) { if(stats) {
src = (uint64*)&mp->gcstats; src = (uint64*)&mp->gcstats;
...@@ -1665,6 +1655,12 @@ cachestats(GCStats *stats) ...@@ -1665,6 +1655,12 @@ cachestats(GCStats *stats)
dst[i] += src[i]; dst[i] += src[i];
runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
} }
}
for(pp=runtime·allp; p=*pp; pp++) {
c = p->mcache;
if(c==nil)
continue;
runtime·purgecachedstats(c);
for(i=0; i<nelem(c->local_by_size); i++) { for(i=0; i<nelem(c->local_by_size); i++) {
mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc; mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
c->local_by_size[i].nmalloc = 0; c->local_by_size[i].nmalloc = 0;
...@@ -1819,12 +1815,11 @@ gc(struct gc_args *args) ...@@ -1819,12 +1815,11 @@ gc(struct gc_args *args)
runtime·parfordo(work.sweepfor); runtime·parfordo(work.sweepfor);
t3 = runtime·nanotime(); t3 = runtime·nanotime();
stealcache();
cachestats(&stats);
if(work.nproc > 1) if(work.nproc > 1)
runtime·notesleep(&work.alldone); runtime·notesleep(&work.alldone);
cachestats(&stats);
stats.nprocyield += work.sweepfor->nprocyield; stats.nprocyield += work.sweepfor->nprocyield;
stats.nosyield += work.sweepfor->nosyield; stats.nosyield += work.sweepfor->nosyield;
stats.nsleep += work.sweepfor->nsleep; stats.nsleep += work.sweepfor->nsleep;
......
This diff is collapsed.
...@@ -118,10 +118,19 @@ enum ...@@ -118,10 +118,19 @@ enum
Grunning, Grunning,
Gsyscall, Gsyscall,
Gwaiting, Gwaiting,
Gmoribund, Gmoribund_unused, // currently unused, but hardcoded in gdb scripts
Gdead, Gdead,
}; };
enum enum
{
// P status
Pidle,
Prunning,
Psyscall,
Pgcstop,
Pdead,
};
enum
{ {
true = 1, true = 1,
false = 0, false = 0,
...@@ -214,6 +223,7 @@ struct G ...@@ -214,6 +223,7 @@ struct G
Gobuf sched; Gobuf sched;
uintptr gcstack; // if status==Gsyscall, gcstack = stackbase to use during gc uintptr gcstack; // if status==Gsyscall, gcstack = stackbase to use during gc
uintptr gcsp; // if status==Gsyscall, gcsp = sched.sp to use during gc uintptr gcsp; // if status==Gsyscall, gcsp = sched.sp to use during gc
byte* gcpc; // if status==Gsyscall, gcpc = sched.pc to use during gc
uintptr gcguard; // if status==Gsyscall, gcguard = stackguard to use during gc uintptr gcguard; // if status==Gsyscall, gcguard = stackguard to use during gc
uintptr stack0; uintptr stack0;
FuncVal* fnstart; // initial function FuncVal* fnstart; // initial function
...@@ -224,13 +234,11 @@ struct G ...@@ -224,13 +234,11 @@ struct G
uint32 selgen; // valid sudog pointer uint32 selgen; // valid sudog pointer
int8* waitreason; // if status==Gwaiting int8* waitreason; // if status==Gwaiting
G* schedlink; G* schedlink;
bool readyonstop;
bool ispanic; bool ispanic;
bool issystem; bool issystem;
int8 raceignore; // ignore race detection events int8 raceignore; // ignore race detection events
M* m; // for debuggers, but offset not hard-coded M* m; // for debuggers, but offset not hard-coded
M* lockedm; M* lockedm;
M* idlem;
int32 sig; int32 sig;
int32 writenbuf; int32 writenbuf;
byte* writebuf; byte* writebuf;
...@@ -259,22 +267,24 @@ struct M ...@@ -259,22 +267,24 @@ struct M
G* gsignal; // signal-handling G G* gsignal; // signal-handling G
uint32 tls[8]; // thread-local storage (for 386 extern register) uint32 tls[8]; // thread-local storage (for 386 extern register)
G* curg; // current running goroutine G* curg; // current running goroutine
P* p; // attached P for executing Go code (nil if not executing Go code)
P* nextp;
int32 id; int32 id;
int32 mallocing; int32 mallocing;
int32 throwing; int32 throwing;
int32 gcing; int32 gcing;
int32 locks; int32 locks;
int32 nomemprof; int32 nomemprof;
int32 waitnextg;
int32 dying; int32 dying;
int32 profilehz; int32 profilehz;
int32 helpgc; int32 helpgc;
bool blockingsyscall;
bool spinning;
uint32 fastrand; uint32 fastrand;
uint64 ncgocall; // number of cgo calls in total uint64 ncgocall; // number of cgo calls in total
int32 ncgo; // number of cgo calls currently in progress int32 ncgo; // number of cgo calls currently in progress
CgoMal* cgomal; CgoMal* cgomal;
Note havenextg; Note park;
G* nextg;
M* alllink; // on allm M* alllink; // on allm
M* schedlink; M* schedlink;
uint32 machport; // Return address for Mach IPC (OS X) uint32 machport; // Return address for Mach IPC (OS X)
...@@ -284,7 +294,6 @@ struct M ...@@ -284,7 +294,6 @@ struct M
uint32 stackcachecnt; uint32 stackcachecnt;
void* stackcache[StackCacheSize]; void* stackcache[StackCacheSize];
G* lockedg; G* lockedg;
G* idleg;
uintptr createstack[32]; // Stack that created this thread. uintptr createstack[32]; // Stack that created this thread.
uint32 freglo[16]; // D[i] lsb and F[i] uint32 freglo[16]; // D[i] lsb and F[i]
uint32 freghi[16]; // D[i] msb and F[i+16] uint32 freghi[16]; // D[i] msb and F[i+16]
...@@ -298,6 +307,8 @@ struct M ...@@ -298,6 +307,8 @@ struct M
bool racecall; bool racecall;
bool needextram; bool needextram;
void* racepc; void* racepc;
void (*waitunlockf)(Lock*);
Lock* waitlock;
uint32 moreframesize_minalloc; uint32 moreframesize_minalloc;
uintptr settype_buf[1024]; uintptr settype_buf[1024];
...@@ -317,7 +328,11 @@ struct P ...@@ -317,7 +328,11 @@ struct P
{ {
Lock; Lock;
uint32 status; // one of Pidle/Prunning/...
P* link; P* link;
uint32 tick; // incremented on every scheduler or system call
M* m; // back-link to associated M (nil if idle)
MCache* mcache;
// Queue of runnable goroutines. // Queue of runnable goroutines.
G** runq; G** runq;
...@@ -608,6 +623,7 @@ extern uintptr runtime·zerobase; ...@@ -608,6 +623,7 @@ extern uintptr runtime·zerobase;
extern G* runtime·allg; extern G* runtime·allg;
extern G* runtime·lastg; extern G* runtime·lastg;
extern M* runtime·allm; extern M* runtime·allm;
extern P** runtime·allp;
extern int32 runtime·gomaxprocs; extern int32 runtime·gomaxprocs;
extern bool runtime·singleproc; extern bool runtime·singleproc;
extern uint32 runtime·panicking; extern uint32 runtime·panicking;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment