Commit f378f300 authored by Keith Randall's avatar Keith Randall

undo CL 101570044 / 2c57aaea79c4

redo stack allocation.  This is mostly the same as
the original CL with a few bug fixes.

1. add racemalloc() for stack allocations
2. fix poolalloc/poolfree to terminate free lists correctly.
3. adjust span ref count correctly.
4. don't use cache for sizes >= StackCacheSize.

Should fix bugs and memory leaks in original changelist.

««« original CL description
undo CL 104200047 / 318b04f28372

Breaks windows and race detector.
TBR=rsc

««« original CL description
runtime: stack allocator, separate from mallocgc

In order to move malloc to Go, we need to have a
separate stack allocator.  If we run out of stack
during malloc, malloc will not be available
to allocate a new stack.

Stacks are the last remaining FlagNoGC objects in the
GC heap.  Once they are out, we can get rid of the
distinction between the allocated/blockboundary bits.
(This will be in a separate change.)

Fixes #7468
Fixes #7424

LGTM=rsc, dvyukov
R=golang-codereviews, dvyukov, khr, dave, rsc
CC=golang-codereviews
https://golang.org/cl/104200047
»»»

TBR=rsc
CC=golang-codereviews
https://golang.org/cl/101570044
»»»

LGTM=dvyukov
R=dvyukov, dave, khr, alex.brainman
CC=golang-codereviews
https://golang.org/cl/112240044
parent 6b2aabee
......@@ -116,6 +116,12 @@ enum
MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap.
HeapAllocChunk = 1<<20, // Chunk size for heap growth
// Per-P, per order stack segment cache size.
StackCacheSize = 32*1024,
// Number of orders that get caching. Order 0 is FixedStack
// and each successive order is twice as large.
NumStackOrders = 3,
// Number of bits in page to span calculations (4k pages).
// On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason).
// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
......@@ -247,8 +253,8 @@ struct MStats
// Statistics about allocation of low-level fixed-size structures.
// Protected by FixAlloc locks.
uint64 stacks_inuse; // bootstrap stacks
uint64 stacks_sys;
uint64 stacks_inuse; // this number is included in heap_inuse above
uint64 stacks_sys; // always 0 in mstats
uint64 mspan_inuse; // MSpan structures
uint64 mspan_sys;
uint64 mcache_inuse; // MCache structures
......@@ -305,6 +311,13 @@ struct MCacheList
uint32 nlist;
};
typedef struct StackFreeList StackFreeList;
struct StackFreeList
{
MLink *list; // linked list of free stacks
uintptr size; // total size of stacks in list
};
// Per-thread (in Go, per-P) cache for small objects.
// No locking needed because it is per-thread (per-P).
struct MCache
......@@ -320,6 +333,9 @@ struct MCache
// The rest is not accessed on every malloc.
MSpan* alloc[NumSizeClasses]; // spans to allocate from
MCacheList free[NumSizeClasses];// lists of explicitly freed objects
StackFreeList stackcache[NumStackOrders];
// Local allocator stats, flushed during GC.
uintptr local_nlookup; // number of pointer lookups
uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
......@@ -330,6 +346,7 @@ struct MCache
MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass);
void runtime·MCache_Free(MCache *c, MLink *p, int32 sizeclass, uintptr size);
void runtime·MCache_ReleaseAll(MCache *c);
void runtime·stackcache_clear(MCache *c);
// MTypes describes the types of blocks allocated within a span.
// The compression field describes the layout of the data.
......@@ -409,7 +426,8 @@ struct SpecialProfile
// An MSpan is a run of pages.
enum
{
MSpanInUse = 0,
MSpanInUse = 0, // allocated for garbage collected heap
MSpanStack, // allocated for use by stack allocator
MSpanFree,
MSpanListHead,
MSpanDead,
......@@ -525,7 +543,9 @@ extern MHeap runtime·mheap;
void runtime·MHeap_Init(MHeap *h);
MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero);
MSpan* runtime·MHeap_AllocStack(MHeap *h, uintptr npage);
void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
void runtime·MHeap_FreeStack(MHeap *h, MSpan *s);
MSpan* runtime·MHeap_Lookup(MHeap *h, void *v);
MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v);
void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);
......@@ -533,7 +553,6 @@ void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
void runtime·MHeap_MapBits(MHeap *h);
void runtime·MHeap_MapSpans(MHeap *h);
void runtime·MHeap_Scavenger(void);
void runtime·MHeap_SplitSpan(MHeap *h, MSpan *s);
void* runtime·mallocgc(uintptr size, uintptr typ, uint32 flag);
void* runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat);
......
......@@ -39,16 +39,35 @@ runtime·allocmcache(void)
return c;
}
void
runtime·freemcache(MCache *c)
static void
freemcache(MCache *c)
{
runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
runtime·lock(&runtime·mheap);
runtime·purgecachedstats(c);
runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
runtime·unlock(&runtime·mheap);
}
static void
freemcache_m(G *gp)
{
MCache *c;
c = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
freemcache(c);
runtime·gogo(&gp->sched);
}
void
runtime·freemcache(MCache *c)
{
g->m->ptrarg[0] = c;
runtime·mcall(freemcache_m);
}
// Gets a span that has a free object in it and assigns it
// to be the cached span for the given sizeclass. Returns this span.
MSpan*
......
......@@ -164,6 +164,8 @@ MCentral_Free(MCentral *c, MLink *v)
s = runtime·MHeap_Lookup(&runtime·mheap, v);
if(s == nil || s->ref == 0)
runtime·throw("invalid free");
if(s->state != MSpanInUse)
runtime·throw("free into stack span");
if(s->sweepgen != runtime·mheap.sweepgen)
runtime·throw("free into unswept span");
......
......@@ -30,7 +30,7 @@ type MemStats struct {
// Low-level fixed-size structure allocator statistics.
// Inuse is bytes used now.
// Sys is bytes obtained from system.
StackInuse uint64 // bootstrap stacks
StackInuse uint64 // bytes used by stack allocator
StackSys uint64
MSpanInuse uint64 // mspan structures
MSpanSys uint64
......
......@@ -1252,12 +1252,12 @@ markroot(ParFor *desc, uint32 i)
SpecialFinalizer *spf;
s = allspans[spanidx];
if(s->state != MSpanInUse)
continue;
if(s->sweepgen != sg) {
runtime·printf("sweep %d %d\n", s->sweepgen, sg);
runtime·throw("gc: unswept span");
}
if(s->state != MSpanInUse)
continue;
// The garbage collector ignores type pointers stored in MSpan.types:
// - Compiler-generated types are stored outside of heap.
// - The reflect package has runtime-generated types cached in its data structures.
......@@ -2119,23 +2119,29 @@ flushallmcaches(void)
if(c==nil)
continue;
runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
}
}
static void
flushallmcaches_m(G *gp)
{
flushallmcaches();
runtime·gogo(&gp->sched);
}
void
runtime·updatememstats(GCStats *stats)
{
M *mp;
MSpan *s;
int32 i;
uint64 stacks_inuse, smallfree;
uint64 smallfree;
uint64 *src, *dst;
if(stats)
runtime·memclr((byte*)stats, sizeof(*stats));
stacks_inuse = 0;
for(mp=runtime·allm; mp; mp=mp->alllink) {
stacks_inuse += mp->stackinuse*FixedStack;
if(stats) {
src = (uint64*)&mp->gcstats;
dst = (uint64*)stats;
......@@ -2144,7 +2150,6 @@ runtime·updatememstats(GCStats *stats)
runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
}
}
mstats.stacks_inuse = stacks_inuse;
mstats.mcache_inuse = runtime·mheap.cachealloc.inuse;
mstats.mspan_inuse = runtime·mheap.spanalloc.inuse;
mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
......@@ -2167,7 +2172,7 @@ runtime·updatememstats(GCStats *stats)
}
// Flush MCache's to MCentral.
flushallmcaches();
runtime·mcall(flushallmcaches_m);
// Aggregate local stats.
cachestats();
......@@ -2504,6 +2509,12 @@ runtime·ReadMemStats(MStats *stats)
// Size of the trailing by_size array differs between Go and C,
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
runtime·memcopy(runtime·sizeof_C_MStats, stats, &mstats);
// Stack numbers are part of the heap numbers, separate those out for user consumption
stats->stacks_sys = stats->stacks_inuse;
stats->heap_inuse -= stats->stacks_inuse;
stats->heap_sys -= stats->stacks_inuse;
g->m->gcing = 0;
g->m->locks++;
runtime·semrelease(&runtime·worldsema);
......
This diff is collapsed.
......@@ -153,6 +153,7 @@ runtime·schedinit(void)
runtime·precisestack = true; // haveexperiment("precisestack");
runtime·symtabinit();
runtime·stackinit();
runtime·mallocinit();
mcommoninit(g->m);
......@@ -1946,7 +1947,7 @@ gfput(P *p, G *gp)
runtime·throw("gfput: bad stacksize");
}
top = (Stktop*)gp->stackbase;
if(top->malloced) {
if(stksize != FixedStack) {
// non-standard stack size - free it.
runtime·stackfree(gp, (void*)gp->stack0, top);
gp->stack0 = 0;
......@@ -2013,6 +2014,9 @@ retry:
gp->stackbase = (uintptr)stk + FixedStack - sizeof(Stktop);
gp->stackguard = (uintptr)stk + StackGuard;
gp->stackguard0 = gp->stackguard;
} else {
if(raceenabled)
runtime·racemalloc((void*)gp->stack0, gp->stackbase + sizeof(Stktop) - gp->stack0);
}
}
return gp;
......
......@@ -146,13 +146,6 @@ enum
{
PtrSize = sizeof(void*),
};
enum
{
// Per-M stack segment cache size.
StackCacheSize = 32,
// Global <-> per-M stack segment cache transfer batch size.
StackCacheBatch = 16,
};
/*
* structures
*/
......@@ -326,10 +319,6 @@ struct M
M* schedlink;
uint32 machport; // Return address for Mach IPC (OS X)
MCache* mcache;
int32 stackinuse;
uint32 stackcachepos;
uint32 stackcachecnt;
void* stackcache[StackCacheSize];
G* lockedg;
uintptr createstack[32];// Stack that created this thread.
uint32 freglo[16]; // D[i] lsb and F[i]
......@@ -346,6 +335,8 @@ struct M
bool (*waitunlockf)(G*, void*);
void* waitlock;
uintptr forkstackguard;
uintptr scalararg[4]; // scalar argument/return for mcall
void* ptrarg[4]; // pointer argument/return for mcall
#ifdef GOOS_windows
void* thread; // thread handle
// these are here because they are too large to be on the stack
......@@ -428,7 +419,6 @@ struct Stktop
uint8* argp; // pointer to arguments in old frame
bool panic; // is this frame the top of a panic?
bool malloced;
};
struct SigTab
{
......@@ -866,6 +856,7 @@ int32 runtime·funcarglen(Func*, uintptr);
int32 runtime·funcspdelta(Func*, uintptr);
int8* runtime·funcname(Func*);
int32 runtime·pcdatavalue(Func*, int32, uintptr);
void runtime·stackinit(void);
void* runtime·stackalloc(G*, uint32);
void runtime·stackfree(G*, void*, Stktop*);
void runtime·shrinkstack(G*);
......
This diff is collapsed.
......@@ -281,3 +281,52 @@ func TestDeferPtrs(t *testing.T) {
defer set(&y, 42)
growStack()
}
// use about n KB of stack
func useStack(n int) {
if n == 0 {
return
}
var b [1024]byte // makes frame about 1KB
useStack(n - 1 + int(b[99]))
}
func growing(c chan int, done chan struct{}) {
for n := range c {
useStack(n)
done <- struct{}{}
}
done <- struct{}{}
}
func TestStackCache(t *testing.T) {
// Allocate a bunch of goroutines and grow their stacks.
// Repeat a few times to test the stack cache.
const (
R = 4
G = 200
S = 5
)
for i := 0; i < R; i++ {
var reqchans [G]chan int
done := make(chan struct{})
for j := 0; j < G; j++ {
reqchans[j] = make(chan int)
go growing(reqchans[j], done)
}
for s := 0; s < S; s++ {
for j := 0; j < G; j++ {
reqchans[j] <- 1 << uint(s)
}
for j := 0; j < G; j++ {
<-done
}
}
for j := 0; j < G; j++ {
close(reqchans[j])
}
for j := 0; j < G; j++ {
<-done
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment