Commit 4cc7bf32 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

pprof: add goroutine blocking profiling

The profiler collects goroutine blocking information similar to Google Perf Tools.
You may see an example of the profile (converted to svg) attached to
http://code.google.com/p/go/issues/detail?id=3946
The public API changes are:
+pkg runtime, func BlockProfile([]BlockProfileRecord) (int, bool)
+pkg runtime, func SetBlockProfileRate(int)
+pkg runtime, method (*BlockProfileRecord) Stack() []uintptr
+pkg runtime, type BlockProfileRecord struct
+pkg runtime, type BlockProfileRecord struct, Count int64
+pkg runtime, type BlockProfileRecord struct, Cycles int64
+pkg runtime, type BlockProfileRecord struct, embedded StackRecord

R=rsc, dave, minux.ma, r
CC=gobot, golang-dev, r, remyoudompheng
https://golang.org/cl/6443115
parent ebb0e5db
...@@ -112,6 +112,18 @@ directory containing the package sources, has its own flags: ...@@ -112,6 +112,18 @@ directory containing the package sources, has its own flags:
garbage collector, provided the test can run in the available garbage collector, provided the test can run in the available
memory without garbage collection. memory without garbage collection.
-test.blockprofile block.out
Write a goroutine blocking profile to the specified file
when all tests are complete.
-test.blockprofilerate n
Control the detail provided in goroutine blocking profiles by setting
runtime.BlockProfileRate to n. See 'godoc runtime BlockProfileRate'.
The profiler aims to sample, on average, one blocking event every
n nanoseconds the program spends blocked. By default,
if -test.blockprofile is set without this flag, all blocking events
are recorded, equivalent to -test.blockprofilerate=1.
-test.parallel n -test.parallel n
Allow parallel execution of test functions that call t.Parallel. Allow parallel execution of test functions that call t.Parallel.
The value of this flag is the maximum number of tests to run The value of this flag is the maximum number of tests to run
......
...@@ -31,6 +31,8 @@ var usageMessage = `Usage of go test: ...@@ -31,6 +31,8 @@ var usageMessage = `Usage of go test:
-cpuprofile="": passes -test.cpuprofile to test -cpuprofile="": passes -test.cpuprofile to test
-memprofile="": passes -test.memprofile to test -memprofile="": passes -test.memprofile to test
-memprofilerate=0: passes -test.memprofilerate to test -memprofilerate=0: passes -test.memprofilerate to test
-blockprofile="": pases -test.blockprofile to test
-blockprofilerate=0: passes -test.blockprofilerate to test
-parallel=0: passes -test.parallel to test -parallel=0: passes -test.parallel to test
-run="": passes -test.run to test -run="": passes -test.run to test
-short=false: passes -test.short to test -short=false: passes -test.short to test
...@@ -82,6 +84,8 @@ var testFlagDefn = []*testFlagSpec{ ...@@ -82,6 +84,8 @@ var testFlagDefn = []*testFlagSpec{
{name: "cpuprofile", passToTest: true}, {name: "cpuprofile", passToTest: true},
{name: "memprofile", passToTest: true}, {name: "memprofile", passToTest: true},
{name: "memprofilerate", passToTest: true}, {name: "memprofilerate", passToTest: true},
{name: "blockprofile", passToTest: true},
{name: "blockprofilerate", passToTest: true},
{name: "parallel", passToTest: true}, {name: "parallel", passToTest: true},
{name: "run", passToTest: true}, {name: "run", passToTest: true},
{name: "short", boolVar: new(bool), passToTest: true}, {name: "short", boolVar: new(bool), passToTest: true},
......
...@@ -30,6 +30,10 @@ ...@@ -30,6 +30,10 @@
// //
// go tool pprof http://localhost:6060/debug/pprof/profile // go tool pprof http://localhost:6060/debug/pprof/profile
// //
// Or to look at the goroutine blocking profile:
//
// go tool pprof http://localhost:6060/debug/pprof/block
//
// Or to view all available profiles: // Or to view all available profiles:
// //
// go tool pprof http://localhost:6060/debug/pprof/ // go tool pprof http://localhost:6060/debug/pprof/
......
...@@ -22,6 +22,7 @@ struct SudoG ...@@ -22,6 +22,7 @@ struct SudoG
G* g; // g and selgen constitute G* g; // g and selgen constitute
uint32 selgen; // a weak pointer to g uint32 selgen; // a weak pointer to g
SudoG* link; SudoG* link;
int64 releasetime;
byte* elem; // data element byte* elem; // data element
}; };
...@@ -154,6 +155,7 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) ...@@ -154,6 +155,7 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres)
SudoG *sg; SudoG *sg;
SudoG mysg; SudoG mysg;
G* gp; G* gp;
int64 t0;
if(c == nil) { if(c == nil) {
USED(t); USED(t);
...@@ -174,6 +176,13 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) ...@@ -174,6 +176,13 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres)
runtime·prints("\n"); runtime·prints("\n");
} }
t0 = 0;
mysg.releasetime = 0;
if(runtime·blockprofilerate > 0) {
t0 = runtime·cputicks();
mysg.releasetime = -1;
}
runtime·lock(c); runtime·lock(c);
if(c->closed) if(c->closed)
goto closed; goto closed;
...@@ -189,6 +198,8 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) ...@@ -189,6 +198,8 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres)
gp->param = sg; gp->param = sg;
if(sg->elem != nil) if(sg->elem != nil)
c->elemalg->copy(c->elemsize, sg->elem, ep); c->elemalg->copy(c->elemsize, sg->elem, ep);
if(sg->releasetime)
sg->releasetime = runtime·cputicks();
runtime·ready(gp); runtime·ready(gp);
if(pres != nil) if(pres != nil)
...@@ -216,6 +227,9 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) ...@@ -216,6 +227,9 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres)
goto closed; goto closed;
} }
if(mysg.releasetime > 0)
runtime·blockevent(mysg.releasetime - t0, 2);
return; return;
asynch: asynch:
...@@ -246,11 +260,15 @@ asynch: ...@@ -246,11 +260,15 @@ asynch:
if(sg != nil) { if(sg != nil) {
gp = sg->g; gp = sg->g;
runtime·unlock(c); runtime·unlock(c);
if(sg->releasetime)
sg->releasetime = runtime·cputicks();
runtime·ready(gp); runtime·ready(gp);
} else } else
runtime·unlock(c); runtime·unlock(c);
if(pres != nil) if(pres != nil)
*pres = true; *pres = true;
if(mysg.releasetime > 0)
runtime·blockevent(mysg.releasetime - t0, 2);
return; return;
closed: closed:
...@@ -265,6 +283,7 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive ...@@ -265,6 +283,7 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive
SudoG *sg; SudoG *sg;
SudoG mysg; SudoG mysg;
G *gp; G *gp;
int64 t0;
if(runtime·gcwaiting) if(runtime·gcwaiting)
runtime·gosched(); runtime·gosched();
...@@ -282,6 +301,13 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive ...@@ -282,6 +301,13 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive
return; // not reached return; // not reached
} }
t0 = 0;
mysg.releasetime = 0;
if(runtime·blockprofilerate > 0) {
t0 = runtime·cputicks();
mysg.releasetime = -1;
}
runtime·lock(c); runtime·lock(c);
if(c->dataqsiz > 0) if(c->dataqsiz > 0)
goto asynch; goto asynch;
...@@ -297,6 +323,8 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive ...@@ -297,6 +323,8 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive
c->elemalg->copy(c->elemsize, ep, sg->elem); c->elemalg->copy(c->elemsize, ep, sg->elem);
gp = sg->g; gp = sg->g;
gp->param = sg; gp->param = sg;
if(sg->releasetime)
sg->releasetime = runtime·cputicks();
runtime·ready(gp); runtime·ready(gp);
if(selected != nil) if(selected != nil)
...@@ -328,6 +356,8 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive ...@@ -328,6 +356,8 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive
if(received != nil) if(received != nil)
*received = true; *received = true;
if(mysg.releasetime > 0)
runtime·blockevent(mysg.releasetime - t0, 2);
return; return;
asynch: asynch:
...@@ -362,6 +392,8 @@ asynch: ...@@ -362,6 +392,8 @@ asynch:
if(sg != nil) { if(sg != nil) {
gp = sg->g; gp = sg->g;
runtime·unlock(c); runtime·unlock(c);
if(sg->releasetime)
sg->releasetime = runtime·cputicks();
runtime·ready(gp); runtime·ready(gp);
} else } else
runtime·unlock(c); runtime·unlock(c);
...@@ -370,6 +402,8 @@ asynch: ...@@ -370,6 +402,8 @@ asynch:
*selected = true; *selected = true;
if(received != nil) if(received != nil)
*received = true; *received = true;
if(mysg.releasetime > 0)
runtime·blockevent(mysg.releasetime - t0, 2);
return; return;
closed: closed:
...@@ -380,6 +414,8 @@ closed: ...@@ -380,6 +414,8 @@ closed:
if(received != nil) if(received != nil)
*received = false; *received = false;
runtime·unlock(c); runtime·unlock(c);
if(mysg.releasetime > 0)
runtime·blockevent(mysg.releasetime - t0, 2);
} }
// chansend1(hchan *chan any, elem any); // chansend1(hchan *chan any, elem any);
......
...@@ -138,6 +138,31 @@ func CPUProfile() []byte ...@@ -138,6 +138,31 @@ func CPUProfile() []byte
// SetCPUProfileRate directly. // SetCPUProfileRate directly.
func SetCPUProfileRate(hz int) func SetCPUProfileRate(hz int)
// SetBlockProfileRate controls the fraction of goroutine blocking events
// that are reported in the blocking profile. The profiler aims to sample
// an average of one blocking event per rate nanoseconds spent blocked.
//
// To include every blocking event in the profile, pass rate = 1.
// To turn off profiling entirely, pass rate <= 0.
func SetBlockProfileRate(rate int)
// BlockProfileRecord describes blocking events originated
// at a particular call sequence (stack trace).
type BlockProfileRecord struct {
Count int64
Cycles int64
StackRecord
}
// BlockProfile returns n, the number of records in the current blocking profile.
// If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
// If len(p) < n, BlockProfile does not change p and returns n, false.
//
// Most clients should use the runtime/pprof package or
// the testing package's -test.blockprofile flag instead
// of calling BlockProfile directly.
func BlockProfile(p []BlockProfileRecord) (n int, ok bool)
// Stack formats a stack trace of the calling goroutine into buf // Stack formats a stack trace of the calling goroutine into buf
// and returns the number of bytes written to buf. // and returns the number of bytes written to buf.
// If all is true, Stack formats stack traces of all other goroutines // If all is true, Stack formats stack traces of all other goroutines
......
...@@ -15,21 +15,35 @@ package runtime ...@@ -15,21 +15,35 @@ package runtime
// NOTE(rsc): Everything here could use cas if contention became an issue. // NOTE(rsc): Everything here could use cas if contention became an issue.
static Lock proflock; static Lock proflock;
// Per-call-stack allocation information. enum { MProf, BProf }; // profile types
// Per-call-stack profiling information.
// Lookup by hashing call stack into a linked-list hash table. // Lookup by hashing call stack into a linked-list hash table.
typedef struct Bucket Bucket; typedef struct Bucket Bucket;
struct Bucket struct Bucket
{ {
Bucket *next; // next in hash list Bucket *next; // next in hash list
Bucket *allnext; // next in list of all buckets Bucket *allnext; // next in list of all mbuckets/bbuckets
uintptr allocs; int32 typ;
uintptr frees; union
uintptr alloc_bytes; {
uintptr free_bytes; struct // typ == MProf
uintptr recent_allocs; // since last gc {
uintptr recent_frees; uintptr allocs;
uintptr recent_alloc_bytes; uintptr frees;
uintptr recent_free_bytes; uintptr alloc_bytes;
uintptr free_bytes;
uintptr recent_allocs; // since last gc
uintptr recent_frees;
uintptr recent_alloc_bytes;
uintptr recent_free_bytes;
};
struct // typ == BProf
{
int64 count;
int64 cycles;
};
};
uintptr hash; uintptr hash;
uintptr nstk; uintptr nstk;
uintptr stk[1]; uintptr stk[1];
...@@ -38,12 +52,13 @@ enum { ...@@ -38,12 +52,13 @@ enum {
BuckHashSize = 179999, BuckHashSize = 179999,
}; };
static Bucket **buckhash; static Bucket **buckhash;
static Bucket *buckets; static Bucket *mbuckets; // memory profile buckets
static Bucket *bbuckets; // blocking profile buckets
static uintptr bucketmem; static uintptr bucketmem;
// Return the bucket for stk[0:nstk], allocating new bucket if needed. // Return the bucket for stk[0:nstk], allocating new bucket if needed.
static Bucket* static Bucket*
stkbucket(uintptr *stk, int32 nstk, bool alloc) stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
{ {
int32 i; int32 i;
uintptr h; uintptr h;
...@@ -66,7 +81,7 @@ stkbucket(uintptr *stk, int32 nstk, bool alloc) ...@@ -66,7 +81,7 @@ stkbucket(uintptr *stk, int32 nstk, bool alloc)
i = h%BuckHashSize; i = h%BuckHashSize;
for(b = buckhash[i]; b; b=b->next) for(b = buckhash[i]; b; b=b->next)
if(b->hash == h && b->nstk == nstk && if(b->typ == typ && b->hash == h && b->nstk == nstk &&
runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
return b; return b;
...@@ -76,12 +91,18 @@ stkbucket(uintptr *stk, int32 nstk, bool alloc) ...@@ -76,12 +91,18 @@ stkbucket(uintptr *stk, int32 nstk, bool alloc)
b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1); b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
bucketmem += sizeof *b + nstk*sizeof stk[0]; bucketmem += sizeof *b + nstk*sizeof stk[0];
runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
b->typ = typ;
b->hash = h; b->hash = h;
b->nstk = nstk; b->nstk = nstk;
b->next = buckhash[i]; b->next = buckhash[i];
buckhash[i] = b; buckhash[i] = b;
b->allnext = buckets; if(typ == MProf) {
buckets = b; b->allnext = mbuckets;
mbuckets = b;
} else {
b->allnext = bbuckets;
bbuckets = b;
}
return b; return b;
} }
...@@ -92,7 +113,7 @@ runtime·MProf_GC(void) ...@@ -92,7 +113,7 @@ runtime·MProf_GC(void)
Bucket *b; Bucket *b;
runtime·lock(&proflock); runtime·lock(&proflock);
for(b=buckets; b; b=b->allnext) { for(b=mbuckets; b; b=b->allnext) {
b->allocs += b->recent_allocs; b->allocs += b->recent_allocs;
b->frees += b->recent_frees; b->frees += b->recent_frees;
b->alloc_bytes += b->recent_alloc_bytes; b->alloc_bytes += b->recent_alloc_bytes;
...@@ -228,7 +249,7 @@ runtime·MProf_Malloc(void *p, uintptr size) ...@@ -228,7 +249,7 @@ runtime·MProf_Malloc(void *p, uintptr size)
m->nomemprof++; m->nomemprof++;
nstk = runtime·callers(1, stk, 32); nstk = runtime·callers(1, stk, 32);
runtime·lock(&proflock); runtime·lock(&proflock);
b = stkbucket(stk, nstk, true); b = stkbucket(MProf, stk, nstk, true);
b->recent_allocs++; b->recent_allocs++;
b->recent_alloc_bytes += size; b->recent_alloc_bytes += size;
setaddrbucket((uintptr)p, b); setaddrbucket((uintptr)p, b);
...@@ -256,6 +277,35 @@ runtime·MProf_Free(void *p, uintptr size) ...@@ -256,6 +277,35 @@ runtime·MProf_Free(void *p, uintptr size)
m->nomemprof--; m->nomemprof--;
} }
int64 runtime·blockprofilerate; // in CPU ticks
void
runtime·SetBlockProfileRate(intgo rate)
{
runtime·atomicstore64((uint64*)&runtime·blockprofilerate, rate * runtime·tickspersecond() / (1000*1000*1000));
}
void
runtime·blockevent(int64 cycles, int32 skip)
{
int32 nstk;
int64 rate;
uintptr stk[32];
Bucket *b;
if(cycles <= 0)
return;
rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate);
if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles))
return;
nstk = runtime·callers(skip, stk, 32);
runtime·lock(&proflock);
b = stkbucket(BProf, stk, nstk, true);
b->count++;
b->cycles += cycles;
runtime·unlock(&proflock);
}
// Go interface to profile data. (Declared in extern.go) // Go interface to profile data. (Declared in extern.go)
// Assumes Go sizeof(int) == sizeof(int32) // Assumes Go sizeof(int) == sizeof(int32)
...@@ -290,20 +340,53 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { ...@@ -290,20 +340,53 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
runtime·lock(&proflock); runtime·lock(&proflock);
n = 0; n = 0;
for(b=buckets; b; b=b->allnext) for(b=mbuckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes) if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
n++; n++;
ok = false; ok = false;
if(n <= p.len) { if(n <= p.len) {
ok = true; ok = true;
r = (Record*)p.array; r = (Record*)p.array;
for(b=buckets; b; b=b->allnext) for(b=mbuckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes) if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
record(r++, b); record(r++, b);
} }
runtime·unlock(&proflock); runtime·unlock(&proflock);
} }
// Must match BlockProfileRecord in debug.go.
typedef struct BRecord BRecord;
struct BRecord {
int64 count;
int64 cycles;
uintptr stk[32];
};
func BlockProfile(p Slice) (n int, ok bool) {
Bucket *b;
BRecord *r;
int32 i;
runtime·lock(&proflock);
n = 0;
for(b=bbuckets; b; b=b->allnext)
n++;
ok = false;
if(n <= p.len) {
ok = true;
r = (BRecord*)p.array;
for(b=bbuckets; b; b=b->allnext, r++) {
r->count = b->count;
r->cycles = b->cycles;
for(i=0; i<b->nstk && i<nelem(r->stk); i++)
r->stk[i] = b->stk[i];
for(; i<nelem(r->stk); i++)
r->stk[i] = 0;
}
}
runtime·unlock(&proflock);
}
// Must match StackRecord in debug.go. // Must match StackRecord in debug.go.
typedef struct TRecord TRecord; typedef struct TRecord TRecord;
struct TRecord { struct TRecord {
......
...@@ -36,6 +36,7 @@ import ( ...@@ -36,6 +36,7 @@ import (
// goroutine - stack traces of all current goroutines // goroutine - stack traces of all current goroutines
// heap - a sampling of all heap allocations // heap - a sampling of all heap allocations
// threadcreate - stack traces that led to the creation of new OS threads // threadcreate - stack traces that led to the creation of new OS threads
// block - stack traces that led to blocking on synchronization primitives
// //
// These predefine profiles maintain themselves and panic on an explicit // These predefine profiles maintain themselves and panic on an explicit
// Add or Remove method call. // Add or Remove method call.
...@@ -76,6 +77,12 @@ var heapProfile = &Profile{ ...@@ -76,6 +77,12 @@ var heapProfile = &Profile{
write: writeHeap, write: writeHeap,
} }
var blockProfile = &Profile{
name: "block",
count: countBlock,
write: writeBlock,
}
func lockProfiles() { func lockProfiles() {
profiles.mu.Lock() profiles.mu.Lock()
if profiles.m == nil { if profiles.m == nil {
...@@ -84,6 +91,7 @@ func lockProfiles() { ...@@ -84,6 +91,7 @@ func lockProfiles() {
"goroutine": goroutineProfile, "goroutine": goroutineProfile,
"threadcreate": threadcreateProfile, "threadcreate": threadcreateProfile,
"heap": heapProfile, "heap": heapProfile,
"block": blockProfile,
} }
} }
} }
...@@ -600,3 +608,60 @@ func StopCPUProfile() { ...@@ -600,3 +608,60 @@ func StopCPUProfile() {
runtime.SetCPUProfileRate(0) runtime.SetCPUProfileRate(0)
<-cpu.done <-cpu.done
} }
type byCycles []runtime.BlockProfileRecord
func (x byCycles) Len() int { return len(x) }
func (x byCycles) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byCycles) Less(i, j int) bool { return x[i].Cycles > x[j].Cycles }
// countBlock returns the number of records in the blocking profile.
func countBlock() int {
n, _ := runtime.BlockProfile(nil)
return n
}
// writeBlock writes the current blocking profile to w.
func writeBlock(w io.Writer, debug int) error {
var p []runtime.BlockProfileRecord
n, ok := runtime.BlockProfile(nil)
for {
p = make([]runtime.BlockProfileRecord, n+50)
n, ok = runtime.BlockProfile(p)
if ok {
p = p[:n]
break
}
}
sort.Sort(byCycles(p))
b := bufio.NewWriter(w)
var tw *tabwriter.Writer
w = b
if debug > 0 {
tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
w = tw
}
fmt.Fprintf(w, "--- contention:\n")
fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
for i := range p {
r := &p[i]
fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
for _, pc := range r.Stack() {
fmt.Fprintf(w, " %#x", pc)
}
fmt.Fprint(w, "\n")
if debug > 0 {
printStackRecord(w, r.Stack(), false)
}
}
if tw != nil {
tw.Flush()
}
return b.Flush()
}
func runtime_cyclesPerSecond() int64
...@@ -358,3 +358,40 @@ runtime·fastrand1(void) ...@@ -358,3 +358,40 @@ runtime·fastrand1(void)
m->fastrand = x; m->fastrand = x;
return x; return x;
} }
static Lock ticksLock;
static int64 ticks;
int64
runtime·tickspersecond(void)
{
int64 res, t0, t1, c0, c1;
res = (int64)runtime·atomicload64((uint64*)&ticks);
if(res != 0)
return ticks;
runtime·lock(&ticksLock);
res = ticks;
if(res == 0) {
t0 = runtime·nanotime();
c0 = runtime·cputicks();
runtime·usleep(100*1000);
t1 = runtime·nanotime();
c1 = runtime·cputicks();
if(t1 == t0)
t1++;
res = (c1-c0)*1000*1000*1000/(t1-t0);
if(res == 0)
res++;
runtime·atomicstore64((uint64*)&ticks, res);
}
runtime·unlock(&ticksLock);
return res;
}
void
runtimepprof·runtime_cyclesPerSecond(int64 res)
{
res = runtime·tickspersecond();
FLUSH(&res);
}
...@@ -642,6 +642,9 @@ void runtime·resetcpuprofiler(int32); ...@@ -642,6 +642,9 @@ void runtime·resetcpuprofiler(int32);
void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32); void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32);
void runtime·usleep(uint32); void runtime·usleep(uint32);
int64 runtime·cputicks(void); int64 runtime·cputicks(void);
int64 runtime·tickspersecond(void);
void runtime·blockevent(int64, int32);
extern int64 runtime·blockprofilerate;
#pragma varargck argpos runtime·printf 1 #pragma varargck argpos runtime·printf 1
#pragma varargck type "d" int32 #pragma varargck type "d" int32
......
...@@ -24,20 +24,21 @@ package sync ...@@ -24,20 +24,21 @@ package sync
typedef struct Sema Sema; typedef struct Sema Sema;
struct Sema struct Sema
{ {
uint32 volatile *addr; uint32 volatile* addr;
G *g; G* g;
Sema *prev; int64 releasetime;
Sema *next; Sema* prev;
Sema* next;
}; };
typedef struct SemaRoot SemaRoot; typedef struct SemaRoot SemaRoot;
struct SemaRoot struct SemaRoot
{ {
Lock; Lock;
Sema *head; Sema* head;
Sema *tail; Sema* tail;
// Number of waiters. Read w/o the lock. // Number of waiters. Read w/o the lock.
uint32 volatile nwait; uint32 volatile nwait;
}; };
// Prime to not correlate with any user patterns. // Prime to not correlate with any user patterns.
...@@ -97,12 +98,13 @@ cansemacquire(uint32 *addr) ...@@ -97,12 +98,13 @@ cansemacquire(uint32 *addr)
return 0; return 0;
} }
void static void
runtime·semacquire(uint32 volatile *addr) semacquireimpl(uint32 volatile *addr, int32 profile)
{ {
Sema s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it Sema s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it
SemaRoot *root; SemaRoot *root;
int64 t0;
// Easy case. // Easy case.
if(cansemacquire(addr)) if(cansemacquire(addr))
return; return;
...@@ -114,6 +116,12 @@ runtime·semacquire(uint32 volatile *addr) ...@@ -114,6 +116,12 @@ runtime·semacquire(uint32 volatile *addr)
// sleep // sleep
// (waiter descriptor is dequeued by signaler) // (waiter descriptor is dequeued by signaler)
root = semroot(addr); root = semroot(addr);
t0 = 0;
s.releasetime = 0;
if(profile && runtime·blockprofilerate > 0) {
t0 = runtime·cputicks();
s.releasetime = -1;
}
for(;;) { for(;;) {
runtime·lock(root); runtime·lock(root);
// Add ourselves to nwait to disable "easy case" in semrelease. // Add ourselves to nwait to disable "easy case" in semrelease.
...@@ -128,11 +136,20 @@ runtime·semacquire(uint32 volatile *addr) ...@@ -128,11 +136,20 @@ runtime·semacquire(uint32 volatile *addr)
// (we set nwait above), so go to sleep. // (we set nwait above), so go to sleep.
semqueue(root, addr, &s); semqueue(root, addr, &s);
runtime·park(runtime·unlock, root, "semacquire"); runtime·park(runtime·unlock, root, "semacquire");
if(cansemacquire(addr)) if(cansemacquire(addr)) {
if(t0)
runtime·blockevent(s.releasetime - t0, 3);
return; return;
}
} }
} }
void
runtime·semacquire(uint32 volatile *addr)
{
semacquireimpl(addr, 0);
}
void void
runtime·semrelease(uint32 volatile *addr) runtime·semrelease(uint32 volatile *addr)
{ {
...@@ -164,12 +181,15 @@ runtime·semrelease(uint32 volatile *addr) ...@@ -164,12 +181,15 @@ runtime·semrelease(uint32 volatile *addr)
} }
} }
runtime·unlock(root); runtime·unlock(root);
if(s) if(s) {
if(s->releasetime)
s->releasetime = runtime·cputicks();
runtime·ready(s->g); runtime·ready(s->g);
}
} }
func runtime_Semacquire(addr *uint32) { func runtime_Semacquire(addr *uint32) {
runtime·semacquire(addr); semacquireimpl(addr, 1);
} }
func runtime_Semrelease(addr *uint32) { func runtime_Semrelease(addr *uint32) {
......
...@@ -206,14 +206,8 @@ runtime·setup_auxv(int32 argc, void *argv_list) ...@@ -206,14 +206,8 @@ runtime·setup_auxv(int32 argc, void *argv_list)
#pragma textflag 7 #pragma textflag 7
int64 int64
runtime·cputicks() { runtime·cputicks() {
// copied from runtime.c:/^fastrand1 // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1().
uint32 x; // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
// runtime·randomNumber provides better seeding of fastrand1.
x = runtime·randomNumber; return runtime·nanotime() + runtime·randomNumber;
x += x;
if(x & 0x80000000L)
x ^= 0x88888eefUL;
runtime·randomNumber = x;
return ((int64)x) << 32 | x;
} }
...@@ -102,14 +102,16 @@ var ( ...@@ -102,14 +102,16 @@ var (
short = flag.Bool("test.short", false, "run smaller test suite to save time") short = flag.Bool("test.short", false, "run smaller test suite to save time")
// Report as tests are run; default is silent for success. // Report as tests are run; default is silent for success.
chatty = flag.Bool("test.v", false, "verbose: print additional output") chatty = flag.Bool("test.v", false, "verbose: print additional output")
match = flag.String("test.run", "", "regular expression to select tests and examples to run") match = flag.String("test.run", "", "regular expression to select tests and examples to run")
memProfile = flag.String("test.memprofile", "", "write a memory profile to the named file after execution") memProfile = flag.String("test.memprofile", "", "write a memory profile to the named file after execution")
memProfileRate = flag.Int("test.memprofilerate", 0, "if >=0, sets runtime.MemProfileRate") memProfileRate = flag.Int("test.memprofilerate", 0, "if >=0, sets runtime.MemProfileRate")
cpuProfile = flag.String("test.cpuprofile", "", "write a cpu profile to the named file during execution") cpuProfile = flag.String("test.cpuprofile", "", "write a cpu profile to the named file during execution")
timeout = flag.Duration("test.timeout", 0, "if positive, sets an aggregate time limit for all tests") blockProfile = flag.String("test.blockprofile", "", "write a goroutine blocking profile to the named file after execution")
cpuListStr = flag.String("test.cpu", "", "comma-separated list of number of CPUs to use for each test") blockProfileRate = flag.Int("test.blockprofilerate", 1, "if >= 0, calls runtime.SetBlockProfileRate()")
parallel = flag.Int("test.parallel", runtime.GOMAXPROCS(0), "maximum test parallelism") timeout = flag.Duration("test.timeout", 0, "if positive, sets an aggregate time limit for all tests")
cpuListStr = flag.String("test.cpu", "", "comma-separated list of number of CPUs to use for each test")
parallel = flag.Int("test.parallel", runtime.GOMAXPROCS(0), "maximum test parallelism")
haveExamples bool // are there examples? haveExamples bool // are there examples?
...@@ -420,7 +422,9 @@ func before() { ...@@ -420,7 +422,9 @@ func before() {
} }
// Could save f so after can call f.Close; not worth the effort. // Could save f so after can call f.Close; not worth the effort.
} }
if *blockProfile != "" && *blockProfileRate >= 0 {
runtime.SetBlockProfileRate(*blockProfileRate)
}
} }
// after runs after all testing. // after runs after all testing.
...@@ -439,6 +443,17 @@ func after() { ...@@ -439,6 +443,17 @@ func after() {
} }
f.Close() f.Close()
} }
if *blockProfile != "" && *blockProfileRate >= 0 {
f, err := os.Create(*blockProfile)
if err != nil {
fmt.Fprintf(os.Stderr, "testing: %s", err)
return
}
if err = pprof.Lookup("block").WriteTo(f, 0); err != nil {
fmt.Fprintf(os.Stderr, "testing: can't write %s: %s", *blockProfile, err)
}
f.Close()
}
} }
var timer *time.Timer var timer *time.Timer
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment