Commit 3cf83c18 authored by Keith Randall's avatar Keith Randall

undo CL 104200047 / 318b04f28372

Breaks windows and race detector.
TBR=rsc

««« original CL description
runtime: stack allocator, separate from mallocgc

In order to move malloc to Go, we need to have a
separate stack allocator.  If we run out of stack
during malloc, malloc will not be available
to allocate a new stack.

Stacks are the last remaining FlagNoGC objects in the
GC heap.  Once they are out, we can get rid of the
distinction between the allocated/blockboundary bits.
(This will be in a separate change.)

Fixes #7468
Fixes #7424

LGTM=rsc, dvyukov
R=golang-codereviews, dvyukov, khr, dave, rsc
CC=golang-codereviews
https://golang.org/cl/104200047
»»»

TBR=rsc
CC=golang-codereviews
https://golang.org/cl/101570044
parent 7c13860c
...@@ -116,12 +116,6 @@ enum ...@@ -116,12 +116,6 @@ enum
MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap. MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap.
HeapAllocChunk = 1<<20, // Chunk size for heap growth HeapAllocChunk = 1<<20, // Chunk size for heap growth
// Per-P, per order stack segment cache size.
StackCacheSize = 32*1024,
// Number of orders that get caching. Order 0 is StackMin
// and each successive order is twice as large.
NumStackOrders = 3,
// Number of bits in page to span calculations (4k pages). // Number of bits in page to span calculations (4k pages).
// On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason). // On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason).
// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits. // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
...@@ -253,8 +247,8 @@ struct MStats ...@@ -253,8 +247,8 @@ struct MStats
// Statistics about allocation of low-level fixed-size structures. // Statistics about allocation of low-level fixed-size structures.
// Protected by FixAlloc locks. // Protected by FixAlloc locks.
uint64 stacks_inuse; // this number is included in heap_inuse above uint64 stacks_inuse; // bootstrap stacks
uint64 stacks_sys; // always 0 in mstats uint64 stacks_sys;
uint64 mspan_inuse; // MSpan structures uint64 mspan_inuse; // MSpan structures
uint64 mspan_sys; uint64 mspan_sys;
uint64 mcache_inuse; // MCache structures uint64 mcache_inuse; // MCache structures
...@@ -311,13 +305,6 @@ struct MCacheList ...@@ -311,13 +305,6 @@ struct MCacheList
uint32 nlist; uint32 nlist;
}; };
typedef struct StackFreeList StackFreeList;
struct StackFreeList
{
MLink *list; // linked list of free stacks
uintptr size; // total size of stacks in list
};
// Per-thread (in Go, per-P) cache for small objects. // Per-thread (in Go, per-P) cache for small objects.
// No locking needed because it is per-thread (per-P). // No locking needed because it is per-thread (per-P).
struct MCache struct MCache
...@@ -333,9 +320,6 @@ struct MCache ...@@ -333,9 +320,6 @@ struct MCache
// The rest is not accessed on every malloc. // The rest is not accessed on every malloc.
MSpan* alloc[NumSizeClasses]; // spans to allocate from MSpan* alloc[NumSizeClasses]; // spans to allocate from
MCacheList free[NumSizeClasses];// lists of explicitly freed objects MCacheList free[NumSizeClasses];// lists of explicitly freed objects
StackFreeList stackcache[NumStackOrders];
// Local allocator stats, flushed during GC. // Local allocator stats, flushed during GC.
uintptr local_nlookup; // number of pointer lookups uintptr local_nlookup; // number of pointer lookups
uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize) uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
...@@ -346,7 +330,6 @@ struct MCache ...@@ -346,7 +330,6 @@ struct MCache
MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass); MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass);
void runtime·MCache_Free(MCache *c, MLink *p, int32 sizeclass, uintptr size); void runtime·MCache_Free(MCache *c, MLink *p, int32 sizeclass, uintptr size);
void runtime·MCache_ReleaseAll(MCache *c); void runtime·MCache_ReleaseAll(MCache *c);
void runtime·stackcache_clear(MCache *c);
// MTypes describes the types of blocks allocated within a span. // MTypes describes the types of blocks allocated within a span.
// The compression field describes the layout of the data. // The compression field describes the layout of the data.
...@@ -426,8 +409,7 @@ struct SpecialProfile ...@@ -426,8 +409,7 @@ struct SpecialProfile
// An MSpan is a run of pages. // An MSpan is a run of pages.
enum enum
{ {
MSpanInUse = 0, // allocated for garbage collected heap MSpanInUse = 0,
MSpanStack, // allocated for use by stack allocator
MSpanFree, MSpanFree,
MSpanListHead, MSpanListHead,
MSpanDead, MSpanDead,
...@@ -543,9 +525,7 @@ extern MHeap runtime·mheap; ...@@ -543,9 +525,7 @@ extern MHeap runtime·mheap;
void runtime·MHeap_Init(MHeap *h); void runtime·MHeap_Init(MHeap *h);
MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero); MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero);
MSpan* runtime·MHeap_AllocStack(MHeap *h, uintptr npage);
void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct); void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
void runtime·MHeap_FreeStack(MHeap *h, MSpan *s);
MSpan* runtime·MHeap_Lookup(MHeap *h, void *v); MSpan* runtime·MHeap_Lookup(MHeap *h, void *v);
MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v); MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v);
void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj); void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);
...@@ -553,6 +533,7 @@ void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n); ...@@ -553,6 +533,7 @@ void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
void runtime·MHeap_MapBits(MHeap *h); void runtime·MHeap_MapBits(MHeap *h);
void runtime·MHeap_MapSpans(MHeap *h); void runtime·MHeap_MapSpans(MHeap *h);
void runtime·MHeap_Scavenger(void); void runtime·MHeap_Scavenger(void);
void runtime·MHeap_SplitSpan(MHeap *h, MSpan *s);
void* runtime·mallocgc(uintptr size, uintptr typ, uint32 flag); void* runtime·mallocgc(uintptr size, uintptr typ, uint32 flag);
void* runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat); void* runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat);
......
...@@ -43,7 +43,6 @@ void ...@@ -43,7 +43,6 @@ void
runtime·freemcache(MCache *c) runtime·freemcache(MCache *c)
{ {
runtime·MCache_ReleaseAll(c); runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
runtime·lock(&runtime·mheap); runtime·lock(&runtime·mheap);
runtime·purgecachedstats(c); runtime·purgecachedstats(c);
runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c); runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
......
...@@ -263,8 +263,6 @@ MCentral_Grow(MCentral *c) ...@@ -263,8 +263,6 @@ MCentral_Grow(MCentral *c)
runtime·unlock(c); runtime·unlock(c);
runtime·MGetSizeClassInfo(c->sizeclass, &size, &npages, &n); runtime·MGetSizeClassInfo(c->sizeclass, &size, &npages, &n);
s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0, 1); s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0, 1);
if(s->next != nil || s->prev != nil)
runtime·throw("internal error: MSpan should not be in a list");
if(s == nil) { if(s == nil) {
// TODO(rsc): Log out of memory // TODO(rsc): Log out of memory
runtime·lock(c); runtime·lock(c);
......
...@@ -30,7 +30,7 @@ type MemStats struct { ...@@ -30,7 +30,7 @@ type MemStats struct {
// Low-level fixed-size structure allocator statistics. // Low-level fixed-size structure allocator statistics.
// Inuse is bytes used now. // Inuse is bytes used now.
// Sys is bytes obtained from system. // Sys is bytes obtained from system.
StackInuse uint64 // bytes used by stack allocator StackInuse uint64 // bootstrap stacks
StackSys uint64 StackSys uint64
MSpanInuse uint64 // mspan structures MSpanInuse uint64 // mspan structures
MSpanSys uint64 MSpanSys uint64
......
...@@ -1252,12 +1252,12 @@ markroot(ParFor *desc, uint32 i) ...@@ -1252,12 +1252,12 @@ markroot(ParFor *desc, uint32 i)
SpecialFinalizer *spf; SpecialFinalizer *spf;
s = allspans[spanidx]; s = allspans[spanidx];
if(s->state != MSpanInUse)
continue;
if(s->sweepgen != sg) { if(s->sweepgen != sg) {
runtime·printf("sweep %d %d\n", s->sweepgen, sg); runtime·printf("sweep %d %d\n", s->sweepgen, sg);
runtime·throw("gc: unswept span"); runtime·throw("gc: unswept span");
} }
if(s->state != MSpanInUse)
continue;
// The garbage collector ignores type pointers stored in MSpan.types: // The garbage collector ignores type pointers stored in MSpan.types:
// - Compiler-generated types are stored outside of heap. // - Compiler-generated types are stored outside of heap.
// - The reflect package has runtime-generated types cached in its data structures. // - The reflect package has runtime-generated types cached in its data structures.
...@@ -2124,7 +2124,6 @@ flushallmcaches(void) ...@@ -2124,7 +2124,6 @@ flushallmcaches(void)
if(c==nil) if(c==nil)
continue; continue;
runtime·MCache_ReleaseAll(c); runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
} }
} }
...@@ -2134,12 +2133,14 @@ runtime·updatememstats(GCStats *stats) ...@@ -2134,12 +2133,14 @@ runtime·updatememstats(GCStats *stats)
M *mp; M *mp;
MSpan *s; MSpan *s;
int32 i; int32 i;
uint64 smallfree; uint64 stacks_inuse, smallfree;
uint64 *src, *dst; uint64 *src, *dst;
if(stats) if(stats)
runtime·memclr((byte*)stats, sizeof(*stats)); runtime·memclr((byte*)stats, sizeof(*stats));
stacks_inuse = 0;
for(mp=runtime·allm; mp; mp=mp->alllink) { for(mp=runtime·allm; mp; mp=mp->alllink) {
stacks_inuse += mp->stackinuse*FixedStack;
if(stats) { if(stats) {
src = (uint64*)&mp->gcstats; src = (uint64*)&mp->gcstats;
dst = (uint64*)stats; dst = (uint64*)stats;
...@@ -2148,6 +2149,7 @@ runtime·updatememstats(GCStats *stats) ...@@ -2148,6 +2149,7 @@ runtime·updatememstats(GCStats *stats)
runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
} }
} }
mstats.stacks_inuse = stacks_inuse;
mstats.mcache_inuse = runtime·mheap.cachealloc.inuse; mstats.mcache_inuse = runtime·mheap.cachealloc.inuse;
mstats.mspan_inuse = runtime·mheap.spanalloc.inuse; mstats.mspan_inuse = runtime·mheap.spanalloc.inuse;
mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys + mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
...@@ -2507,12 +2509,6 @@ runtime·ReadMemStats(MStats *stats) ...@@ -2507,12 +2509,6 @@ runtime·ReadMemStats(MStats *stats)
// Size of the trailing by_size array differs between Go and C, // Size of the trailing by_size array differs between Go and C,
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility. // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
runtime·memcopy(runtime·sizeof_C_MStats, stats, &mstats); runtime·memcopy(runtime·sizeof_C_MStats, stats, &mstats);
// Stack numbers are part of the heap numbers, separate those out for user consumption
stats->stacks_sys = stats->stacks_inuse;
stats->heap_inuse -= stats->stacks_inuse;
stats->heap_sys -= stats->stacks_inuse;
g->m->gcing = 0; g->m->gcing = 0;
g->m->locks++; g->m->locks++;
runtime·semrelease(&runtime·worldsema); runtime·semrelease(&runtime·worldsema);
......
...@@ -9,16 +9,16 @@ ...@@ -9,16 +9,16 @@
// When a MSpan is in the heap free list, state == MSpanFree // When a MSpan is in the heap free list, state == MSpanFree
// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span. // and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
// //
// When a MSpan is allocated, state == MSpanInUse or MSpanStack // When a MSpan is allocated, state == MSpanInUse
// and heapmap(i) == span for all s->start <= i < s->start+s->npages. // and heapmap(i) == span for all s->start <= i < s->start+s->npages.
#include "runtime.h" #include "runtime.h"
#include "arch_GOARCH.h" #include "arch_GOARCH.h"
#include "malloc.h" #include "malloc.h"
static MSpan *MHeap_AllocSpanLocked(MHeap*, uintptr); static MSpan *MHeap_AllocLocked(MHeap*, uintptr, int32);
static void MHeap_FreeSpanLocked(MHeap*, MSpan*);
static bool MHeap_Grow(MHeap*, uintptr); static bool MHeap_Grow(MHeap*, uintptr);
static void MHeap_FreeLocked(MHeap*, MSpan*);
static MSpan *MHeap_AllocLarge(MHeap*, uintptr); static MSpan *MHeap_AllocLarge(MHeap*, uintptr);
static MSpan *BestFit(MSpan*, uintptr, MSpan*); static MSpan *BestFit(MSpan*, uintptr, MSpan*);
...@@ -165,38 +165,19 @@ MHeap_Reclaim(MHeap *h, uintptr npage) ...@@ -165,38 +165,19 @@ MHeap_Reclaim(MHeap *h, uintptr npage)
runtime·lock(h); runtime·lock(h);
} }
// Allocate a new span of npage pages from the heap for GC'd memory // Allocate a new span of npage pages from the heap
// and record its size class in the HeapMap and HeapMapCache. // and record its size class in the HeapMap and HeapMapCache.
static MSpan* MSpan*
mheap_alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large) runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero)
{ {
MSpan *s; MSpan *s;
if(g != g->m->g0)
runtime·throw("mheap_alloc not on M stack");
runtime·lock(h); runtime·lock(h);
// To prevent excessive heap growth, before allocating n pages
// we need to sweep and reclaim at least n pages.
if(!h->sweepdone)
MHeap_Reclaim(h, npage);
// transfer stats from cache to global
mstats.heap_alloc += g->m->mcache->local_cachealloc; mstats.heap_alloc += g->m->mcache->local_cachealloc;
g->m->mcache->local_cachealloc = 0; g->m->mcache->local_cachealloc = 0;
s = MHeap_AllocLocked(h, npage, sizeclass);
s = MHeap_AllocSpanLocked(h, npage);
if(s != nil) { if(s != nil) {
// Record span info, because gc needs to be mstats.heap_inuse += npage<<PageShift;
// able to map interior pointer to containing span.
s->state = MSpanInUse;
s->ref = 0;
s->sizeclass = sizeclass;
s->elemsize = (sizeclass==0 ? s->npages<<PageShift : runtime·class_to_size[sizeclass]);
s->types.compression = MTypes_Empty;
s->sweepgen = h->sweepgen;
// update stats, sweep lists
if(large) { if(large) {
mstats.heap_objects++; mstats.heap_objects++;
mstats.heap_alloc += npage<<PageShift; mstats.heap_alloc += npage<<PageShift;
...@@ -208,42 +189,6 @@ mheap_alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large) ...@@ -208,42 +189,6 @@ mheap_alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large)
} }
} }
runtime·unlock(h); runtime·unlock(h);
return s;
}
void
mheap_alloc_m(G *gp)
{
MHeap *h;
MSpan *s;
h = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
s = mheap_alloc(h, g->m->scalararg[0], g->m->scalararg[1], g->m->scalararg[2]);
g->m->ptrarg[0] = s;
runtime·gogo(&gp->sched);
}
MSpan*
runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero)
{
MSpan *s;
// Don't do any operations that lock the heap on the G stack.
// It might trigger stack growth, and the stack growth code needs
// to be able to allocate heap.
if(g == g->m->g0) {
s = mheap_alloc(h, npage, sizeclass, large);
} else {
g->m->ptrarg[0] = h;
g->m->scalararg[0] = npage;
g->m->scalararg[1] = sizeclass;
g->m->scalararg[2] = large;
runtime·mcall(mheap_alloc_m);
s = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
}
if(s != nil) { if(s != nil) {
if(needzero && s->needzero) if(needzero && s->needzero)
runtime·memclr((byte*)(s->start<<PageShift), s->npages<<PageShift); runtime·memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
...@@ -252,34 +197,18 @@ runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool ...@@ -252,34 +197,18 @@ runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool
return s; return s;
} }
MSpan*
runtime·MHeap_AllocStack(MHeap *h, uintptr npage)
{
MSpan *s;
if(g != g->m->g0)
runtime·throw("mheap_allocstack not on M stack");
runtime·lock(h);
s = MHeap_AllocSpanLocked(h, npage);
if(s != nil) {
s->state = MSpanStack;
s->ref = 0;
mstats.stacks_inuse += s->npages<<PageShift;
}
runtime·unlock(h);
return s;
}
// Allocates a span of the given size. h must be locked.
// The returned span has been removed from the
// free list, but its state is still MSpanFree.
static MSpan* static MSpan*
MHeap_AllocSpanLocked(MHeap *h, uintptr npage) MHeap_AllocLocked(MHeap *h, uintptr npage, int32 sizeclass)
{ {
uintptr n; uintptr n;
MSpan *s, *t; MSpan *s, *t;
PageID p; PageID p;
// To prevent excessive heap growth, before allocating n pages
// we need to sweep and reclaim at least n pages.
if(!h->sweepdone)
MHeap_Reclaim(h, npage);
// Try in fixed-size lists up to max. // Try in fixed-size lists up to max.
for(n=npage; n < nelem(h->free); n++) { for(n=npage; n < nelem(h->free); n++) {
if(!runtime·MSpanList_IsEmpty(&h->free[n])) { if(!runtime·MSpanList_IsEmpty(&h->free[n])) {
...@@ -303,13 +232,13 @@ HaveSpan: ...@@ -303,13 +232,13 @@ HaveSpan:
if(s->npages < npage) if(s->npages < npage)
runtime·throw("MHeap_AllocLocked - bad npages"); runtime·throw("MHeap_AllocLocked - bad npages");
runtime·MSpanList_Remove(s); runtime·MSpanList_Remove(s);
if(s->next != nil || s->prev != nil) runtime·atomicstore(&s->sweepgen, h->sweepgen);
runtime·throw("still in list"); s->state = MSpanInUse;
if(s->npreleased > 0) { mstats.heap_idle -= s->npages<<PageShift;
mstats.heap_released -= s->npreleased<<PageShift;
if(s->npreleased > 0)
runtime·SysUsed((void*)(s->start<<PageShift), s->npages<<PageShift); runtime·SysUsed((void*)(s->start<<PageShift), s->npages<<PageShift);
mstats.heap_released -= s->npreleased<<PageShift; s->npreleased = 0;
s->npreleased = 0;
}
if(s->npages > npage) { if(s->npages > npage) {
// Trim extra and put it back in the heap. // Trim extra and put it back in the heap.
...@@ -323,25 +252,22 @@ HaveSpan: ...@@ -323,25 +252,22 @@ HaveSpan:
h->spans[p] = t; h->spans[p] = t;
h->spans[p+t->npages-1] = t; h->spans[p+t->npages-1] = t;
t->needzero = s->needzero; t->needzero = s->needzero;
s->state = MSpanStack; // prevent coalescing with s runtime·atomicstore(&t->sweepgen, h->sweepgen);
t->state = MSpanStack; t->state = MSpanInUse;
MHeap_FreeSpanLocked(h, t); MHeap_FreeLocked(h, t);
t->unusedsince = s->unusedsince; // preserve age (TODO: wrong: t is possibly merged and/or deallocated at this point) t->unusedsince = s->unusedsince; // preserve age
s->state = MSpanFree;
} }
s->unusedsince = 0; s->unusedsince = 0;
// Record span info, because gc needs to be
// able to map interior pointer to containing span.
s->sizeclass = sizeclass;
s->elemsize = (sizeclass==0 ? s->npages<<PageShift : runtime·class_to_size[sizeclass]);
s->types.compression = MTypes_Empty;
p = s->start; p = s->start;
p -= ((uintptr)h->arena_start>>PageShift); p -= ((uintptr)h->arena_start>>PageShift);
for(n=0; n<npage; n++) for(n=0; n<npage; n++)
h->spans[p+n] = s; h->spans[p+n] = s;
mstats.heap_inuse += npage<<PageShift;
mstats.heap_idle -= npage<<PageShift;
//runtime·printf("spanalloc %p\n", s->start << PageShift);
if(s->next != nil || s->prev != nil)
runtime·throw("still in list");
return s; return s;
} }
...@@ -412,7 +338,7 @@ MHeap_Grow(MHeap *h, uintptr npage) ...@@ -412,7 +338,7 @@ MHeap_Grow(MHeap *h, uintptr npage)
h->spans[p + s->npages - 1] = s; h->spans[p + s->npages - 1] = s;
runtime·atomicstore(&s->sweepgen, h->sweepgen); runtime·atomicstore(&s->sweepgen, h->sweepgen);
s->state = MSpanInUse; s->state = MSpanInUse;
MHeap_FreeSpanLocked(h, s); MHeap_FreeLocked(h, s);
return true; return true;
} }
...@@ -454,83 +380,34 @@ runtime·MHeap_LookupMaybe(MHeap *h, void *v) ...@@ -454,83 +380,34 @@ runtime·MHeap_LookupMaybe(MHeap *h, void *v)
} }
// Free the span back into the heap. // Free the span back into the heap.
static void void
mheap_free(MHeap *h, MSpan *s, int32 acct) runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct)
{ {
if(g != g->m->g0)
runtime·throw("mheap_free not on M stack");
runtime·lock(h); runtime·lock(h);
mstats.heap_alloc += g->m->mcache->local_cachealloc; mstats.heap_alloc += g->m->mcache->local_cachealloc;
g->m->mcache->local_cachealloc = 0; g->m->mcache->local_cachealloc = 0;
mstats.heap_inuse -= s->npages<<PageShift;
if(acct) { if(acct) {
mstats.heap_alloc -= s->npages<<PageShift; mstats.heap_alloc -= s->npages<<PageShift;
mstats.heap_objects--; mstats.heap_objects--;
} }
s->types.compression = MTypes_Empty; MHeap_FreeLocked(h, s);
MHeap_FreeSpanLocked(h, s);
runtime·unlock(h); runtime·unlock(h);
} }
static void static void
mheap_free_m(G *gp) MHeap_FreeLocked(MHeap *h, MSpan *s)
{
MHeap *h;
MSpan *s;
h = g->m->ptrarg[0];
s = g->m->ptrarg[1];
g->m->ptrarg[0] = nil;
g->m->ptrarg[1] = nil;
mheap_free(h, s, g->m->scalararg[0]);
runtime·gogo(&gp->sched);
}
void
runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct)
{
if(g == g->m->g0) {
mheap_free(h, s, acct);
} else {
g->m->ptrarg[0] = h;
g->m->ptrarg[1] = s;
g->m->scalararg[0] = acct;
runtime·mcall(mheap_free_m);
}
}
void
runtime·MHeap_FreeStack(MHeap *h, MSpan *s)
{
if(g != g->m->g0)
runtime·throw("mheap_freestack not on M stack");
s->needzero = 1;
runtime·lock(h);
MHeap_FreeSpanLocked(h, s);
mstats.stacks_inuse -= s->npages<<PageShift;
runtime·unlock(h);
}
static void
MHeap_FreeSpanLocked(MHeap *h, MSpan *s)
{ {
MSpan *t; MSpan *t;
PageID p; PageID p;
switch(s->state) { s->types.compression = MTypes_Empty;
case MSpanStack:
break; if(s->state != MSpanInUse || s->ref != 0 || s->sweepgen != h->sweepgen) {
case MSpanInUse: runtime·printf("MHeap_FreeLocked - span %p ptr %p state %d ref %d sweepgen %d/%d\n",
if(s->ref != 0 || s->sweepgen != h->sweepgen) { s, s->start<<PageShift, s->state, s->ref, s->sweepgen, h->sweepgen);
runtime·printf("MHeap_FreeSpanLocked - span %p ptr %p ref %d sweepgen %d/%d\n", runtime·throw("MHeap_FreeLocked - invalid free");
s, s->start<<PageShift, s->ref, s->sweepgen, h->sweepgen);
runtime·throw("MHeap_FreeSpanLocked - invalid free");
}
break;
default:
runtime·throw("MHeap_FreeSpanLocked - invalid span state");
break;
} }
mstats.heap_inuse -= s->npages<<PageShift;
mstats.heap_idle += s->npages<<PageShift; mstats.heap_idle += s->npages<<PageShift;
s->state = MSpanFree; s->state = MSpanFree;
runtime·MSpanList_Remove(s); runtime·MSpanList_Remove(s);
...@@ -542,7 +419,7 @@ MHeap_FreeSpanLocked(MHeap *h, MSpan *s) ...@@ -542,7 +419,7 @@ MHeap_FreeSpanLocked(MHeap *h, MSpan *s)
// Coalesce with earlier, later spans. // Coalesce with earlier, later spans.
p = s->start; p = s->start;
p -= (uintptr)h->arena_start >> PageShift; p -= (uintptr)h->arena_start >> PageShift;
if(p > 0 && (t = h->spans[p-1]) != nil && t->state != MSpanInUse && t->state != MSpanStack) { if(p > 0 && (t = h->spans[p-1]) != nil && t->state != MSpanInUse) {
s->start = t->start; s->start = t->start;
s->npages += t->npages; s->npages += t->npages;
s->npreleased = t->npreleased; // absorb released pages s->npreleased = t->npreleased; // absorb released pages
...@@ -553,7 +430,7 @@ MHeap_FreeSpanLocked(MHeap *h, MSpan *s) ...@@ -553,7 +430,7 @@ MHeap_FreeSpanLocked(MHeap *h, MSpan *s)
t->state = MSpanDead; t->state = MSpanDead;
runtime·FixAlloc_Free(&h->spanalloc, t); runtime·FixAlloc_Free(&h->spanalloc, t);
} }
if((p+s->npages)*sizeof(h->spans[0]) < h->spans_mapped && (t = h->spans[p+s->npages]) != nil && t->state != MSpanInUse && t->state != MSpanStack) { if((p+s->npages)*sizeof(h->spans[0]) < h->spans_mapped && (t = h->spans[p+s->npages]) != nil && t->state != MSpanInUse) {
s->npages += t->npages; s->npages += t->npages;
s->npreleased += t->npreleased; s->npreleased += t->npreleased;
s->needzero |= t->needzero; s->needzero |= t->needzero;
...@@ -621,15 +498,6 @@ scavenge(int32 k, uint64 now, uint64 limit) ...@@ -621,15 +498,6 @@ scavenge(int32 k, uint64 now, uint64 limit)
} }
} }
static void
scavenge_m(G *gp)
{
runtime·lock(&runtime·mheap);
scavenge(g->m->scalararg[0], g->m->scalararg[1], g->m->scalararg[2]);
runtime·unlock(&runtime·mheap);
runtime·gogo(&gp->sched);
}
static FuncVal forcegchelperv = {(void(*)(void))forcegchelper}; static FuncVal forcegchelperv = {(void(*)(void))forcegchelper};
// Release (part of) unused memory to OS. // Release (part of) unused memory to OS.
...@@ -639,7 +507,7 @@ void ...@@ -639,7 +507,7 @@ void
runtime·MHeap_Scavenger(void) runtime·MHeap_Scavenger(void)
{ {
MHeap *h; MHeap *h;
uint64 tick, forcegc, limit; uint64 tick, now, forcegc, limit;
int64 unixnow; int64 unixnow;
int32 k; int32 k;
Note note, *notep; Note note, *notep;
...@@ -678,11 +546,9 @@ runtime·MHeap_Scavenger(void) ...@@ -678,11 +546,9 @@ runtime·MHeap_Scavenger(void)
runtime·printf("scvg%d: GC forced\n", k); runtime·printf("scvg%d: GC forced\n", k);
runtime·lock(h); runtime·lock(h);
} }
now = runtime·nanotime();
scavenge(k, now, limit);
runtime·unlock(h); runtime·unlock(h);
g->m->scalararg[0] = k;
g->m->scalararg[1] = runtime·nanotime();
g->m->scalararg[2] = limit;
runtime·mcall(scavenge_m);
} }
} }
...@@ -690,11 +556,9 @@ void ...@@ -690,11 +556,9 @@ void
runtimedebug·freeOSMemory(void) runtimedebug·freeOSMemory(void)
{ {
runtime·gc(2); // force GC and do eager sweep runtime·gc(2); // force GC and do eager sweep
runtime·lock(&runtime·mheap);
g->m->scalararg[0] = -1; scavenge(-1, ~(uintptr)0, 0);
g->m->scalararg[1] = ~(uintptr)0; runtime·unlock(&runtime·mheap);
g->m->scalararg[2] = 0;
runtime·mcall(scavenge_m);
} }
// Initialize a new span with the given start and npages. // Initialize a new span with the given start and npages.
...@@ -977,3 +841,92 @@ runtime·freeallspecials(MSpan *span, void *p, uintptr size) ...@@ -977,3 +841,92 @@ runtime·freeallspecials(MSpan *span, void *p, uintptr size)
runtime·throw("can't explicitly free an object with a finalizer"); runtime·throw("can't explicitly free an object with a finalizer");
} }
} }
// Split an allocated span into two equal parts.
void
runtime·MHeap_SplitSpan(MHeap *h, MSpan *s)
{
MSpan *t;
MCentral *c;
uintptr i;
uintptr npages;
PageID p;
if(s->state != MSpanInUse)
runtime·throw("MHeap_SplitSpan on a free span");
if(s->sizeclass != 0 && s->ref != 1)
runtime·throw("MHeap_SplitSpan doesn't have an allocated object");
npages = s->npages;
// remove the span from whatever list it is in now
if(s->sizeclass > 0) {
// must be in h->central[x].empty
c = &h->central[s->sizeclass];
runtime·lock(c);
runtime·MSpanList_Remove(s);
runtime·unlock(c);
runtime·lock(h);
} else {
// must be in h->busy/busylarge
runtime·lock(h);
runtime·MSpanList_Remove(s);
}
// heap is locked now
if(npages == 1) {
// convert span of 1 PageSize object to a span of 2 PageSize/2 objects.
s->ref = 2;
s->sizeclass = runtime·SizeToClass(PageSize/2);
s->elemsize = PageSize/2;
} else {
// convert span of n>1 pages into two spans of n/2 pages each.
if((s->npages & 1) != 0)
runtime·throw("MHeap_SplitSpan on an odd size span");
// compute position in h->spans
p = s->start;
p -= (uintptr)h->arena_start >> PageShift;
// Allocate a new span for the first half.
t = runtime·FixAlloc_Alloc(&h->spanalloc);
runtime·MSpan_Init(t, s->start, npages/2);
t->limit = (byte*)((t->start + npages/2) << PageShift);
t->state = MSpanInUse;
t->elemsize = npages << (PageShift - 1);
t->sweepgen = s->sweepgen;
if(t->elemsize <= MaxSmallSize) {
t->sizeclass = runtime·SizeToClass(t->elemsize);
t->ref = 1;
}
// the old span holds the second half.
s->start += npages/2;
s->npages = npages/2;
s->elemsize = npages << (PageShift - 1);
if(s->elemsize <= MaxSmallSize) {
s->sizeclass = runtime·SizeToClass(s->elemsize);
s->ref = 1;
}
// update span lookup table
for(i = p; i < p + npages/2; i++)
h->spans[i] = t;
}
// place the span into a new list
if(s->sizeclass > 0) {
runtime·unlock(h);
c = &h->central[s->sizeclass];
runtime·lock(c);
// swept spans are at the end of the list
runtime·MSpanList_InsertBack(&c->empty, s);
runtime·unlock(c);
} else {
// Swept spans are at the end of lists.
if(s->npages < nelem(h->free))
runtime·MSpanList_InsertBack(&h->busy[s->npages], s);
else
runtime·MSpanList_InsertBack(&h->busylarge, s);
runtime·unlock(h);
}
}
...@@ -152,7 +152,6 @@ runtime·schedinit(void) ...@@ -152,7 +152,6 @@ runtime·schedinit(void)
runtime·precisestack = true; // haveexperiment("precisestack"); runtime·precisestack = true; // haveexperiment("precisestack");
runtime·symtabinit(); runtime·symtabinit();
runtime·stackinit();
runtime·mallocinit(); runtime·mallocinit();
mcommoninit(g->m); mcommoninit(g->m);
...@@ -1927,7 +1926,7 @@ gfput(P *p, G *gp) ...@@ -1927,7 +1926,7 @@ gfput(P *p, G *gp)
runtime·throw("gfput: bad stacksize"); runtime·throw("gfput: bad stacksize");
} }
top = (Stktop*)gp->stackbase; top = (Stktop*)gp->stackbase;
if(stksize != FixedStack) { if(top->malloced) {
// non-standard stack size - free it. // non-standard stack size - free it.
runtime·stackfree(gp, (void*)gp->stack0, top); runtime·stackfree(gp, (void*)gp->stack0, top);
gp->stack0 = 0; gp->stack0 = 0;
......
...@@ -146,6 +146,13 @@ enum ...@@ -146,6 +146,13 @@ enum
{ {
PtrSize = sizeof(void*), PtrSize = sizeof(void*),
}; };
enum
{
// Per-M stack segment cache size.
StackCacheSize = 32,
// Global <-> per-M stack segment cache transfer batch size.
StackCacheBatch = 16,
};
/* /*
* structures * structures
*/ */
...@@ -319,6 +326,10 @@ struct M ...@@ -319,6 +326,10 @@ struct M
M* schedlink; M* schedlink;
uint32 machport; // Return address for Mach IPC (OS X) uint32 machport; // Return address for Mach IPC (OS X)
MCache* mcache; MCache* mcache;
int32 stackinuse;
uint32 stackcachepos;
uint32 stackcachecnt;
void* stackcache[StackCacheSize];
G* lockedg; G* lockedg;
uintptr createstack[32];// Stack that created this thread. uintptr createstack[32];// Stack that created this thread.
uint32 freglo[16]; // D[i] lsb and F[i] uint32 freglo[16]; // D[i] lsb and F[i]
...@@ -335,8 +346,6 @@ struct M ...@@ -335,8 +346,6 @@ struct M
bool (*waitunlockf)(G*, void*); bool (*waitunlockf)(G*, void*);
void* waitlock; void* waitlock;
uintptr forkstackguard; uintptr forkstackguard;
uintptr scalararg[4]; // scalar argument/return for mcall
void* ptrarg[4]; // pointer argument/return for mcall
#ifdef GOOS_windows #ifdef GOOS_windows
void* thread; // thread handle void* thread; // thread handle
// these are here because they are too large to be on the stack // these are here because they are too large to be on the stack
...@@ -419,6 +428,7 @@ struct Stktop ...@@ -419,6 +428,7 @@ struct Stktop
uint8* argp; // pointer to arguments in old frame uint8* argp; // pointer to arguments in old frame
bool panic; // is this frame the top of a panic? bool panic; // is this frame the top of a panic?
bool malloced;
}; };
struct SigTab struct SigTab
{ {
...@@ -856,7 +866,6 @@ int32 runtime·funcarglen(Func*, uintptr); ...@@ -856,7 +866,6 @@ int32 runtime·funcarglen(Func*, uintptr);
int32 runtime·funcspdelta(Func*, uintptr); int32 runtime·funcspdelta(Func*, uintptr);
int8* runtime·funcname(Func*); int8* runtime·funcname(Func*);
int32 runtime·pcdatavalue(Func*, int32, uintptr); int32 runtime·pcdatavalue(Func*, int32, uintptr);
void runtime·stackinit(void);
void* runtime·stackalloc(G*, uint32); void* runtime·stackalloc(G*, uint32);
void runtime·stackfree(G*, void*, Stktop*); void runtime·stackfree(G*, void*, Stktop*);
void runtime·shrinkstack(G*); void runtime·shrinkstack(G*);
......
...@@ -21,163 +21,76 @@ enum ...@@ -21,163 +21,76 @@ enum
StackDebug = 0, StackDebug = 0,
StackFromSystem = 0, // allocate stacks from system memory instead of the heap StackFromSystem = 0, // allocate stacks from system memory instead of the heap
StackFaultOnFree = 0, // old stacks are mapped noaccess to detect use after free StackFaultOnFree = 0, // old stacks are mapped noaccess to detect use after free
StackCache = 1,
}; };
// Global pool of spans that have free stacks. typedef struct StackCacheNode StackCacheNode;
// Stacks are assigned an order according to size. struct StackCacheNode
// order = log_2(size/FixedStack)
// There is a free list for each order.
static MSpan stackpool[NumStackOrders];
static Lock stackpoolmu;
// TODO: one lock per order?
void
runtime·stackinit(void)
{
int32 i;
for(i = 0; i < NumStackOrders; i++)
runtime·MSpanList_Init(&stackpool[i]);
}
// Allocates a stack from the free pool. Must be called with
// stackpoolmu held.
static MLink*
poolalloc(uint8 order)
{
MSpan *list;
MSpan *s;
MLink *x;
uintptr i;
list = &stackpool[order];
s = list->next;
if(s == list) {
// no free stacks. Allocate another span worth.
s = runtime·MHeap_AllocStack(&runtime·mheap, StackCacheSize >> PageShift);
if(s == nil)
runtime·throw("out of memory");
for(i = 0; i < StackCacheSize; i += FixedStack << order) {
x = (MLink*)((s->start << PageShift) + i);
x->next = s->freelist;
s->freelist = x;
}
}
x = s->freelist;
s->freelist = x->next;
s->ref--;
if(s->ref == 0) {
// all stacks in s are allocated.
runtime·MSpanList_Remove(s);
}
return x;
}
// Adds stack x to the free pool. Must be called with stackpoolmu held.
static void
poolfree(MLink *x, uint8 order)
{ {
MSpan *s; StackCacheNode *next;
void* batch[StackCacheBatch-1];
};
s = runtime·MHeap_Lookup(&runtime·mheap, x); static StackCacheNode *stackcache;
x->next = s->freelist; static Lock stackcachemu;
s->freelist = x;
if(s->ref == 0) {
// s now has a free stack
runtime·MSpanList_Insert(&stackpool[order], s);
}
s->ref++;
if(s->ref == (StackCacheSize / FixedStack) >> order) {
// span is completely free - return to heap
runtime·MSpanList_Remove(s);
runtime·MHeap_FreeStack(&runtime·mheap, s);
}
}
// stackcacherefill/stackcacherelease implement a global pool of stack segments. // stackcacherefill/stackcacherelease implement a global cache of stack segments.
// The pool is required to prevent unlimited growth of per-thread caches. // The cache is required to prevent unlimited growth of per-thread caches.
static void static void
stackcacherefill(MCache *c, uint8 order) stackcacherefill(void)
{ {
MLink *x, *list; StackCacheNode *n;
uintptr size; int32 i, pos;
if(StackDebug >= 1) runtime·lock(&stackcachemu);
runtime·printf("stackcacherefill order=%d\n", order); n = stackcache;
if(n)
// Grab some stacks from the global cache. stackcache = n->next;
// Grab half of the allowed capacity (to prevent thrashing). runtime·unlock(&stackcachemu);
list = nil; if(n == nil) {
size = 0; n = (StackCacheNode*)runtime·SysAlloc(FixedStack*StackCacheBatch, &mstats.stacks_sys);
runtime·lock(&stackpoolmu); if(n == nil)
while(size < StackCacheSize/2) { runtime·throw("out of memory (stackcacherefill)");
x = poolalloc(order); for(i = 0; i < StackCacheBatch-1; i++)
x->next = list; n->batch[i] = (byte*)n + (i+1)*FixedStack;
list = x; }
size += FixedStack << order; pos = g->m->stackcachepos;
} for(i = 0; i < StackCacheBatch-1; i++) {
runtime·unlock(&stackpoolmu); g->m->stackcache[pos] = n->batch[i];
pos = (pos + 1) % StackCacheSize;
c->stackcache[order].list = list; }
c->stackcache[order].size = size; g->m->stackcache[pos] = n;
pos = (pos + 1) % StackCacheSize;
g->m->stackcachepos = pos;
g->m->stackcachecnt += StackCacheBatch;
} }
static void static void
stackcacherelease(MCache *c, uint8 order) stackcacherelease(void)
{ {
MLink *x, *y; StackCacheNode *n;
uintptr size; uint32 i, pos;
if(StackDebug >= 1) pos = (g->m->stackcachepos - g->m->stackcachecnt) % StackCacheSize;
runtime·printf("stackcacherelease order=%d\n", order); n = (StackCacheNode*)g->m->stackcache[pos];
x = c->stackcache[order].list; pos = (pos + 1) % StackCacheSize;
size = c->stackcache[order].size; for(i = 0; i < StackCacheBatch-1; i++) {
runtime·lock(&stackpoolmu); n->batch[i] = g->m->stackcache[pos];
while(size > StackCacheSize/2) { pos = (pos + 1) % StackCacheSize;
y = x->next; }
poolfree(x, order); g->m->stackcachecnt -= StackCacheBatch;
x = y; runtime·lock(&stackcachemu);
size -= FixedStack << order; n->next = stackcache;
} stackcache = n;
runtime·unlock(&stackpoolmu); runtime·unlock(&stackcachemu);
c->stackcache[order].list = x;
c->stackcache[order].size = size;
}
void
runtime·stackcache_clear(MCache *c)
{
uint8 order;
MLink *x, *y;
if(StackDebug >= 1)
runtime·printf("stackcache clear\n");
runtime·lock(&stackpoolmu);
for(order = 0; order < NumStackOrders; order++) {
x = c->stackcache[order].list;
while(x != nil) {
y = x->next;
poolfree(x, order);
x = y;
}
c->stackcache[order].list = nil;
c->stackcache[order].size = 0;
}
runtime·unlock(&stackpoolmu);
} }
void* void*
runtime·stackalloc(G *gp, uint32 n) runtime·stackalloc(G *gp, uint32 n)
{ {
uint8 order; uint32 pos;
uint32 n2;
void *v; void *v;
bool malloced;
Stktop *top; Stktop *top;
MLink *x;
MSpan *s;
MCache *c;
// Stackalloc must be called on scheduler stack, so that we // Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs. // never try to grow the stack during the code that stackalloc runs.
...@@ -197,58 +110,41 @@ runtime·stackalloc(G *gp, uint32 n) ...@@ -197,58 +110,41 @@ runtime·stackalloc(G *gp, uint32 n)
return v; return v;
} }
// Small stacks are allocated with a fixed-size free-list allocator. // Minimum-sized stacks are allocated with a fixed-size free-list allocator,
// If we need a stack of a bigger size, we fall back on allocating // but if we need a stack of a bigger size, we fall back on malloc
// a dedicated span. // (assuming that inside malloc all the stack frames are small,
if(StackCache && n < FixedStack << NumStackOrders) { // so that we do not deadlock).
order = 0; malloced = true;
n2 = n; if(n == FixedStack || g->m->mallocing) {
while(n2 > FixedStack) { if(n != FixedStack) {
order++; runtime·printf("stackalloc: in malloc, size=%d want %d\n", FixedStack, n);
n2 >>= 1; runtime·throw("stackalloc");
} }
c = g->m->mcache; if(g->m->stackcachecnt == 0)
if(c == nil) { stackcacherefill();
// This can happen in the guts of exitsyscall or pos = g->m->stackcachepos;
// procresize. Just get a stack from the global pool. pos = (pos - 1) % StackCacheSize;
runtime·lock(&stackpoolmu); v = g->m->stackcache[pos];
x = poolalloc(order); g->m->stackcachepos = pos;
runtime·unlock(&stackpoolmu); g->m->stackcachecnt--;
} else { g->m->stackinuse++;
x = c->stackcache[order].list; malloced = false;
if(x == nil) { } else
stackcacherefill(c, order); v = runtime·mallocgc(n, 0, FlagNoProfiling|FlagNoGC|FlagNoZero|FlagNoInvokeGC);
x = c->stackcache[order].list;
}
c->stackcache[order].list = x->next;
c->stackcache[order].size -= n;
}
v = (byte*)x;
} else {
s = runtime·MHeap_AllocStack(&runtime·mheap, (n+PageSize-1) >> PageShift);
if(s == nil)
runtime·throw("out of memory");
v = (byte*)(s->start<<PageShift);
}
top = (Stktop*)((byte*)v+n-sizeof(Stktop)); top = (Stktop*)((byte*)v+n-sizeof(Stktop));
runtime·memclr((byte*)top, sizeof(*top)); runtime·memclr((byte*)top, sizeof(*top));
if(StackDebug >= 1) top->malloced = malloced;
runtime·printf(" allocated %p\n", v);
return v; return v;
} }
void void
runtime·stackfree(G *gp, void *v, Stktop *top) runtime·stackfree(G *gp, void *v, Stktop *top)
{ {
uint8 order; uint32 pos;
uintptr n, n2; uintptr n;
MSpan *s;
MLink *x;
MCache *c;
n = (uintptr)(top+1) - (uintptr)v; n = (uintptr)(top+1) - (uintptr)v;
if(n & (n-1))
runtime·throw("stack not a power of 2");
if(StackDebug >= 1) if(StackDebug >= 1)
runtime·printf("stackfree %p %d\n", v, (int32)n); runtime·printf("stackfree %p %d\n", v, (int32)n);
gp->stacksize -= n; gp->stacksize -= n;
...@@ -259,34 +155,19 @@ runtime·stackfree(G *gp, void *v, Stktop *top) ...@@ -259,34 +155,19 @@ runtime·stackfree(G *gp, void *v, Stktop *top)
runtime·SysFree(v, n, &mstats.stacks_sys); runtime·SysFree(v, n, &mstats.stacks_sys);
return; return;
} }
if(StackCache && n < FixedStack << NumStackOrders) { if(top->malloced) {
order = 0; runtime·free(v);
n2 = n; return;
while(n2 > FixedStack) {
order++;
n2 >>= 1;
}
x = (MLink*)v;
c = g->m->mcache;
if(c == nil) {
runtime·lock(&stackpoolmu);
poolfree(x, order);
runtime·unlock(&stackpoolmu);
} else {
if(c->stackcache[order].size >= StackCacheSize)
stackcacherelease(c, order);
x->next = c->stackcache[order].list;
c->stackcache[order].list = x;
c->stackcache[order].size += n;
}
} else {
s = runtime·MHeap_Lookup(&runtime·mheap, v);
if(s->state != MSpanStack) {
runtime·printf("%p %p\n", s->start<<PageShift, v);
runtime·throw("bad span state");
}
runtime·MHeap_FreeStack(&runtime·mheap, s);
} }
if(n != FixedStack)
runtime·throw("stackfree: bad fixed size");
if(g->m->stackcachecnt == StackCacheSize)
stackcacherelease();
pos = g->m->stackcachepos;
g->m->stackcache[pos] = v;
g->m->stackcachepos = (pos + 1) % StackCacheSize;
g->m->stackcachecnt++;
g->m->stackinuse--;
} }
// Called from runtime·lessstack when returning from a function which // Called from runtime·lessstack when returning from a function which
...@@ -718,6 +599,7 @@ copystack(G *gp, uintptr nframes, uintptr newsize) ...@@ -718,6 +599,7 @@ copystack(G *gp, uintptr nframes, uintptr newsize)
uintptr oldsize, used; uintptr oldsize, used;
AdjustInfo adjinfo; AdjustInfo adjinfo;
Stktop *oldtop, *newtop; Stktop *oldtop, *newtop;
bool malloced;
if(gp->syscallstack != 0) if(gp->syscallstack != 0)
runtime·throw("can't handle stack copy in syscall yet"); runtime·throw("can't handle stack copy in syscall yet");
...@@ -731,9 +613,10 @@ copystack(G *gp, uintptr nframes, uintptr newsize) ...@@ -731,9 +613,10 @@ copystack(G *gp, uintptr nframes, uintptr newsize)
newstk = runtime·stackalloc(gp, newsize); newstk = runtime·stackalloc(gp, newsize);
newbase = newstk + newsize; newbase = newstk + newsize;
newtop = (Stktop*)(newbase - sizeof(Stktop)); newtop = (Stktop*)(newbase - sizeof(Stktop));
malloced = newtop->malloced;
if(StackDebug >= 1) if(StackDebug >= 1)
runtime·printf("copystack gp=%p [%p %p]/%d -> [%p %p]/%d\n", gp, oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize); runtime·printf("copystack [%p %p]/%d -> [%p %p]/%d\n", oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize);
USED(oldsize); USED(oldsize);
// adjust pointers in the to-be-copied frames // adjust pointers in the to-be-copied frames
...@@ -748,6 +631,7 @@ copystack(G *gp, uintptr nframes, uintptr newsize) ...@@ -748,6 +631,7 @@ copystack(G *gp, uintptr nframes, uintptr newsize)
// copy the stack (including Stktop) to the new location // copy the stack (including Stktop) to the new location
runtime·memmove(newbase - used, oldbase - used, used); runtime·memmove(newbase - used, oldbase - used, used);
newtop->malloced = malloced;
// Swap out old stack for new one // Swap out old stack for new one
gp->stackbase = (uintptr)newtop; gp->stackbase = (uintptr)newtop;
...@@ -908,7 +792,7 @@ runtime·newstack(void) ...@@ -908,7 +792,7 @@ runtime·newstack(void)
top = (Stktop*)(stk+framesize-sizeof(*top)); top = (Stktop*)(stk+framesize-sizeof(*top));
if(StackDebug >= 1) { if(StackDebug >= 1) {
runtime·printf("\t-> new stack gp=%p [%p, %p]\n", gp, stk, top); runtime·printf("\t-> new stack [%p, %p]\n", stk, top);
} }
top->stackbase = gp->stackbase; top->stackbase = gp->stackbase;
...@@ -997,6 +881,7 @@ runtime·shrinkstack(G *gp) ...@@ -997,6 +881,7 @@ runtime·shrinkstack(G *gp)
int32 nframes; int32 nframes;
byte *oldstk, *oldbase; byte *oldstk, *oldbase;
uintptr used, oldsize, newsize; uintptr used, oldsize, newsize;
MSpan *span;
if(!runtime·copystack) if(!runtime·copystack)
return; return;
...@@ -1010,14 +895,53 @@ runtime·shrinkstack(G *gp) ...@@ -1010,14 +895,53 @@ runtime·shrinkstack(G *gp)
if(used >= oldsize / 4) if(used >= oldsize / 4)
return; // still using at least 1/4 of the segment. return; // still using at least 1/4 of the segment.
if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case? // To shrink to less than 1/2 a page, we need to copy.
return; if(newsize < PageSize/2) {
if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case?
return;
#ifdef GOOS_windows #ifdef GOOS_windows
if(gp->m != nil && gp->m->libcallsp != 0) if(gp->m != nil && gp->m->libcallsp != 0)
return; return;
#endif #endif
nframes = copyabletopsegment(gp); nframes = copyabletopsegment(gp);
if(nframes == -1) if(nframes == -1)
return;
copystack(gp, nframes, newsize);
return; return;
copystack(gp, nframes, newsize); }
// To shrink a stack of one page size or more, we can shrink it
// without copying. Just deallocate the lower half.
span = runtime·MHeap_LookupMaybe(&runtime·mheap, oldstk);
if(span == nil)
return; // stack allocated outside heap. Can't shrink it. Can happen if stack is allocated while inside malloc. TODO: shrink by copying?
if(span->elemsize != oldsize)
runtime·throw("span element size doesn't match stack size");
if((uintptr)oldstk != span->start << PageShift)
runtime·throw("stack not at start of span");
if(StackDebug)
runtime·printf("shrinking stack in place %p %X->%X\n", oldstk, oldsize, newsize);
// new stack guard for smaller stack
gp->stackguard = (uintptr)oldstk + newsize + StackGuard;
gp->stackguard0 = (uintptr)oldstk + newsize + StackGuard;
if(gp->stack0 == (uintptr)oldstk)
gp->stack0 = (uintptr)oldstk + newsize;
gp->stacksize -= oldsize - newsize;
// Free bottom half of the stack.
if(runtime·debug.efence || StackFromSystem) {
if(runtime·debug.efence || StackFaultOnFree)
runtime·SysFault(oldstk, newsize);
else
runtime·SysFree(oldstk, newsize, &mstats.stacks_sys);
return;
}
// First, we trick malloc into thinking
// we allocated the stack as two separate half-size allocs. Then the
// free() call does the rest of the work for us.
runtime·MSpan_EnsureSwept(span);
runtime·MHeap_SplitSpan(&runtime·mheap, span);
runtime·free(oldstk);
} }
...@@ -281,52 +281,3 @@ func TestDeferPtrs(t *testing.T) { ...@@ -281,52 +281,3 @@ func TestDeferPtrs(t *testing.T) {
defer set(&y, 42) defer set(&y, 42)
growStack() growStack()
} }
// use about n KB of stack
func useStack(n int) {
if n == 0 {
return
}
var b [1024]byte // makes frame about 1KB
useStack(n - 1 + int(b[99]))
}
func growing(c chan int, done chan struct{}) {
for n := range c {
useStack(n)
done <- struct{}{}
}
done <- struct{}{}
}
func TestStackCache(t *testing.T) {
// Allocate a bunch of goroutines and grow their stacks.
// Repeat a few times to test the stack cache.
const (
R = 4
G = 200
S = 5
)
for i := 0; i < R; i++ {
var reqchans [G]chan int
done := make(chan struct{})
for j := 0; j < G; j++ {
reqchans[j] = make(chan int)
go growing(reqchans[j], done)
}
for s := 0; s < S; s++ {
for j := 0; j < G; j++ {
reqchans[j] <- 1 << uint(s)
}
for j := 0; j < G; j++ {
<-done
}
}
for j := 0; j < G; j++ {
close(reqchans[j])
}
for j := 0; j < G; j++ {
<-done
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment