Commit 56b54912 authored by Rick Hudson's avatar Rick Hudson

Merge remote-tracking branch 'origin/dev.garbage'

This commit moves the GC from free list allocation to
bit mark allocation. Instead of using the bitmaps
generated during the mark phases to generate free
list and then using the free lists for allocation we
allocate directly from the bitmaps.

The change in the garbage benchmark

name              old time/op  new time/op  delta
XBenchGarbage-12  2.22ms ± 1%  2.13ms ± 1%  -3.90%  (p=0.000 n=18+18)

Change-Id: I17f57233336f0ca5ef5404c3be4ecb443ab622aa
parents d8d33514 e9eaa181
Reviving dev.garbage branch for use in new garbage collection experiment.
......@@ -529,7 +529,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
return
}
b, hbits, span := heapBitsForObject(uintptr(p), 0, 0)
b, hbits, span, _ := heapBitsForObject(uintptr(p), 0, 0)
base = b
if base == 0 {
return
......
......@@ -447,7 +447,7 @@ func dumproots() {
continue
}
spf := (*specialfinalizer)(unsafe.Pointer(sp))
p := unsafe.Pointer((uintptr(s.start) << _PageShift) + uintptr(spf.special.offset))
p := unsafe.Pointer(s.base() + uintptr(spf.special.offset))
dumpfinalizer(p, spf.fn, spf.fint, spf.ot)
}
}
......@@ -467,15 +467,19 @@ func dumpobjs() {
if s.state != _MSpanInUse {
continue
}
p := uintptr(s.start << _PageShift)
p := s.base()
size := s.elemsize
n := (s.npages << _PageShift) / size
if n > uintptr(len(freemark)) {
throw("freemark array doesn't have enough entries")
}
for l := s.freelist; l.ptr() != nil; l = l.ptr().next {
freemark[(uintptr(l)-p)/size] = true
for freeIndex := s.freeindex; freeIndex < s.nelems; freeIndex++ {
if s.isFree(freeIndex) {
freemark[freeIndex] = true
}
}
for j := uintptr(0); j < n; j, p = j+1, p+size {
if freemark[j] {
freemark[j] = false
......@@ -615,7 +619,7 @@ func dumpmemprof() {
continue
}
spp := (*specialprofile)(unsafe.Pointer(sp))
p := uintptr(s.start<<_PageShift) + uintptr(spp.special.offset)
p := s.base() + uintptr(spp.special.offset)
dumpint(tagAllocSample)
dumpint(uint64(p))
dumpint(uint64(uintptr(unsafe.Pointer(spp.b))))
......@@ -710,7 +714,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
i := uintptr(0)
hbits := heapBitsForAddr(p)
for ; i < nptr; i++ {
if i >= 2 && !hbits.isMarked() {
if i >= 2 && !hbits.morePointers() {
break // end of object
}
if hbits.isPointer() {
......
......@@ -94,6 +94,9 @@ const (
pageShift = _PageShift
pageSize = _PageSize
pageMask = _PageMask
// By construction, single page spans of the smallest object class
// have the most objects per span.
maxObjsPerSpan = pageSize / 8
mSpanInUse = _MSpanInUse
......@@ -167,9 +170,6 @@ const (
_MaxGcproc = 32
)
// Page number (address>>pageShift)
type pageID uintptr
const _MaxArena32 = 2 << 30
// OS-defined helpers:
......@@ -384,6 +384,10 @@ func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer {
return sysReserve(nil, n, reserved)
}
// sysAlloc allocates the next n bytes from the heap arena. The
// returned pointer is always _PageSize aligned and between
// h.arena_start and h.arena_end. sysAlloc returns nil on failure.
// There is no corresponding free function.
func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
if n > h.arena_end-h.arena_used {
// We are in 32-bit mode, maybe we didn't use all possible address space yet.
......@@ -484,6 +488,65 @@ func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
// base address for all 0-byte allocations
var zerobase uintptr
// nextFreeFast returns the next free object if one is quickly available.
// Otherwise it returns 0.
func nextFreeFast(s *mspan) gclinkptr {
theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
if theBit < 64 {
result := s.freeindex + uintptr(theBit)
if result < s.nelems {
freeidx := result + 1
if freeidx%64 == 0 && freeidx != s.nelems {
return 0
}
s.allocCache >>= (theBit + 1)
s.freeindex = freeidx
v := gclinkptr(result*s.elemsize + s.base())
s.allocCount++
return v
}
}
return 0
}
// nextFree returns the next free object from the cached span if one is available.
// Otherwise it refills the cache with a span with an available object and
// returns that object along with a flag indicating that this was a heavy
// weight allocation. If it is a heavy weight allocation the caller must
// determine whether a new GC cycle needs to be started or if the GC is active
// whether this goroutine needs to assist the GC.
func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, s *mspan, shouldhelpgc bool) {
s = c.alloc[sizeclass]
shouldhelpgc = false
freeIndex := s.nextFreeIndex()
if freeIndex == s.nelems {
// The span is full.
if uintptr(s.allocCount) != s.nelems {
println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount != s.nelems && freeIndex == s.nelems")
}
systemstack(func() {
c.refill(int32(sizeclass))
})
shouldhelpgc = true
s = c.alloc[sizeclass]
freeIndex = s.nextFreeIndex()
}
if freeIndex >= s.nelems {
throw("freeIndex is not valid")
}
v = gclinkptr(freeIndex*s.elemsize + s.base())
s.allocCount++
if uintptr(s.allocCount) > s.nelems {
println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount > s.nelems")
}
return
}
// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
......@@ -538,7 +601,6 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
shouldhelpgc := false
dataSize := size
c := gomcache()
var s *mspan
var x unsafe.Pointer
noscan := typ == nil || typ.kind&kindNoPointers != 0
if size <= maxSmallSize {
......@@ -591,20 +653,11 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
return x
}
// Allocate a new maxTinySize block.
s = c.alloc[tinySizeClass]
v := s.freelist
if v.ptr() == nil {
systemstack(func() {
c.refill(tinySizeClass)
})
shouldhelpgc = true
s = c.alloc[tinySizeClass]
v = s.freelist
span := c.alloc[tinySizeClass]
v := nextFreeFast(span)
if v == 0 {
v, _, shouldhelpgc = c.nextFree(tinySizeClass)
}
s.freelist = v.ptr().next
s.ref++
// prefetchnta offers best performance, see change list message.
prefetchnta(uintptr(v.ptr().next))
x = unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
......@@ -623,26 +676,14 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
sizeclass = size_to_class128[(size-1024+127)>>7]
}
size = uintptr(class_to_size[sizeclass])
s = c.alloc[sizeclass]
v := s.freelist
if v.ptr() == nil {
systemstack(func() {
c.refill(int32(sizeclass))
})
shouldhelpgc = true
s = c.alloc[sizeclass]
v = s.freelist
span := c.alloc[sizeclass]
v := nextFreeFast(span)
if v == 0 {
v, span, shouldhelpgc = c.nextFree(sizeclass)
}
s.freelist = v.ptr().next
s.ref++
// prefetchnta offers best performance, see change list message.
prefetchnta(uintptr(v.ptr().next))
x = unsafe.Pointer(v)
if needzero {
v.ptr().next = 0
if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 {
memclr(unsafe.Pointer(v), size)
}
if needzero && span.needzero != 0 {
memclr(unsafe.Pointer(v), size)
}
}
} else {
......@@ -651,13 +692,15 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
systemstack(func() {
s = largeAlloc(size, needzero)
})
x = unsafe.Pointer(uintptr(s.start << pageShift))
s.freeindex = 1
s.allocCount = 1
x = unsafe.Pointer(s.base())
size = s.elemsize
}
var scanSize uintptr
if noscan {
// All objects are pre-marked as noscan. Nothing to do.
heapBitsSetTypeNoScan(uintptr(x), size)
} else {
// If allocating a defer+arg block, now that we've picked a malloc size
// large enough to hold everything, cut the "asked for" size down to
......@@ -701,6 +744,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if raceenabled {
racemalloc(x, size)
}
if msanenabled {
msanmalloc(x, size)
}
......@@ -755,8 +799,8 @@ func largeAlloc(size uintptr, needzero bool) *mspan {
if s == nil {
throw("out of memory")
}
s.limit = uintptr(s.start)<<_PageShift + size
heapBitsForSpan(s.base()).initSpan(s.layout())
s.limit = s.base() + size
heapBitsForSpan(s.base()).initSpan(s)
return s
}
......
This diff is collapsed.
......@@ -108,9 +108,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
_g_.m.locks++
// Return the current cached span to the central lists.
s := c.alloc[sizeclass]
if s.freelist.ptr() != nil {
throw("refill on a nonempty span")
if uintptr(s.allocCount) != s.nelems {
throw("refill of span with free space remaining")
}
if s != &emptymspan {
s.incache = false
}
......@@ -120,10 +122,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
if s == nil {
throw("out of memory")
}
if s.freelist.ptr() == nil {
println(s.ref, (s.npages<<_PageShift)/s.elemsize)
throw("empty span")
if uintptr(s.allocCount) == s.nelems {
throw("span has no free space")
}
c.alloc[sizeclass] = s
_g_.m.locks--
return s
......
......@@ -18,7 +18,7 @@ import "runtime/internal/atomic"
type mcentral struct {
lock mutex
sizeclass int32
nonempty mSpanList // list of spans with a free object
nonempty mSpanList // list of spans with a free object, ie a nonempty free list
empty mSpanList // list of spans with no free objects (or cached in an mcache)
}
......@@ -67,7 +67,9 @@ retry:
c.empty.insertBack(s)
unlock(&c.lock)
s.sweep(true)
if s.freelist.ptr() != nil {
freeIndex := s.nextFreeIndex()
if freeIndex != s.nelems {
s.freeindex = freeIndex
goto havespan
}
lock(&c.lock)
......@@ -98,11 +100,11 @@ retry:
// c is unlocked.
havespan:
cap := int32((s.npages << _PageShift) / s.elemsize)
n := cap - int32(s.ref)
if n == 0 {
throw("empty span")
n := cap - int32(s.allocCount)
if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {
throw("span has no free objects")
}
usedBytes := uintptr(s.ref) * s.elemsize
usedBytes := uintptr(s.allocCount) * s.elemsize
if usedBytes > 0 {
reimburseSweepCredit(usedBytes)
}
......@@ -115,10 +117,16 @@ havespan:
// heap_live changed.
gcController.revise()
}
if s.freelist.ptr() == nil {
throw("freelist empty")
}
s.incache = true
freeByteBase := s.freeindex &^ (64 - 1)
whichByte := freeByteBase / 8
// Init alloc bits cache.
s.refillAllocCache(whichByte)
// Adjust the allocCache so that s.freeindex corresponds to the low bit in
// s.allocCache.
s.allocCache >>= s.freeindex % 64
return s
}
......@@ -128,12 +136,12 @@ func (c *mcentral) uncacheSpan(s *mspan) {
s.incache = false
if s.ref == 0 {
throw("uncaching full span")
if s.allocCount == 0 {
throw("uncaching span but s.allocCount == 0")
}
cap := int32((s.npages << _PageShift) / s.elemsize)
n := cap - int32(s.ref)
n := cap - int32(s.allocCount)
if n > 0 {
c.empty.remove(s)
c.nonempty.insert(s)
......@@ -144,22 +152,19 @@ func (c *mcentral) uncacheSpan(s *mspan) {
unlock(&c.lock)
}
// Free n objects from a span s back into the central free list c.
// Called during sweep.
// Returns true if the span was returned to heap. Sets sweepgen to
// the latest generation.
// If preserve=true, don't return the span to heap nor relink in MCentral lists;
// caller takes care of it.
func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool {
// freeSpan updates c and s after sweeping s.
// It sets s's sweepgen to the latest generation,
// and, based on the number of free objects in s,
// moves s to the appropriate list of c or returns it
// to the heap.
// freeSpan returns true if s was returned to the heap.
// If preserve=true, it does not move s (the caller
// must take care of it).
func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
if s.incache {
throw("freespan into cached span")
throw("freeSpan given cached span")
}
// Add the objects back to s's free list.
wasempty := s.freelist.ptr() == nil
end.ptr().next = s.freelist
s.freelist = start
s.ref -= uint16(n)
s.needzero = 1
if preserve {
// preserve is set only when called from MCentral_CacheSpan above,
......@@ -185,21 +190,18 @@ func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, p
// lock of c above.)
atomic.Store(&s.sweepgen, mheap_.sweepgen)
if s.ref != 0 {
if s.allocCount != 0 {
unlock(&c.lock)
return false
}
// s is completely freed, return it to the heap.
c.nonempty.remove(s)
s.needzero = 1
s.freelist = 0
unlock(&c.lock)
mheap_.freeSpan(s, 0)
return true
}
// Fetch a new span from the heap and carve into objects for the free list.
// grow allocates a new empty span from the heap and initializes it for c's size class.
func (c *mcentral) grow() *mspan {
npages := uintptr(class_to_allocnpages[c.sizeclass])
size := uintptr(class_to_size[c.sizeclass])
......@@ -210,21 +212,9 @@ func (c *mcentral) grow() *mspan {
return nil
}
p := uintptr(s.start << _PageShift)
p := s.base()
s.limit = p + size*n
head := gclinkptr(p)
tail := gclinkptr(p)
// i==0 iteration already done
for i := uintptr(1); i < n; i++ {
p += size
tail.ptr().next = gclinkptr(p)
tail = gclinkptr(p)
}
if s.freelist.ptr() != nil {
throw("freelist not empty")
}
tail.ptr().next = 0
s.freelist = head
heapBitsForSpan(s.base()).initSpan(s.layout())
heapBitsForSpan(s.base()).initSpan(s)
return s
}
......@@ -402,7 +402,7 @@ func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
if s == nil {
return
}
x = unsafe.Pointer(uintptr(s.start) << pageShift)
x = unsafe.Pointer(s.base())
if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse {
s = nil
......
......@@ -360,7 +360,7 @@ func markrootSpans(gcw *gcWork, shard int) {
// retain everything it points to.
spf := (*specialfinalizer)(unsafe.Pointer(sp))
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
// Mark everything that can be reached from
// the object (but *not* the object itself or
......@@ -962,7 +962,10 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
if blocking {
b = gcw.get()
} else {
b = gcw.tryGet()
b = gcw.tryGetFast()
if b == 0 {
b = gcw.tryGet()
}
}
if b == 0 {
// work barrier reached or tryGet failed.
......@@ -1025,7 +1028,11 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
// PREFETCH(wbuf->obj[wbuf.nobj - 3];
// }
//
b := gcw.tryGet()
b := gcw.tryGetFast()
if b == 0 {
b = gcw.tryGet()
}
if b == 0 {
break
}
......@@ -1075,8 +1082,8 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
// Same work as in scanobject; see comments there.
obj := *(*uintptr)(unsafe.Pointer(b + i))
if obj != 0 && arena_start <= obj && obj < arena_used {
if obj, hbits, span := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw)
if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw, objIndex)
}
}
}
......@@ -1141,8 +1148,8 @@ func scanobject(b uintptr, gcw *gcWork) {
// Check if it points into heap and not back at the current object.
if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
// Mark the object.
if obj, hbits, span := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw)
if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw, objIndex)
}
}
}
......@@ -1155,9 +1162,9 @@ func scanobject(b uintptr, gcw *gcWork) {
// Preemption must be disabled.
//go:nowritebarrier
func shade(b uintptr) {
if obj, hbits, span := heapBitsForObject(b, 0, 0); obj != 0 {
if obj, hbits, span, objIndex := heapBitsForObject(b, 0, 0); obj != 0 {
gcw := &getg().m.p.ptr().gcw
greyobject(obj, 0, 0, hbits, span, gcw)
greyobject(obj, 0, 0, hbits, span, gcw, objIndex)
if gcphase == _GCmarktermination || gcBlackenPromptly {
// Ps aren't allowed to cache work during mark
// termination.
......@@ -1170,14 +1177,15 @@ func shade(b uintptr) {
// If it isn't already marked, mark it and enqueue into gcw.
// base and off are for debugging only and could be removed.
//go:nowritebarrierrec
func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork) {
func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr) {
// obj should be start of allocation, and so must be at least pointer-aligned.
if obj&(sys.PtrSize-1) != 0 {
throw("greyobject: obj not pointer-aligned")
}
mbits := span.markBitsForIndex(objIndex)
if useCheckmark {
if !hbits.isMarked() {
if !mbits.isMarked() {
printlock()
print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
......@@ -1199,11 +1207,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
}
} else {
// If marked we have nothing to do.
if hbits.isMarked() {
if mbits.isMarked() {
return
}
hbits.setMarked()
// mbits.setMarked() // Avoid extra call overhead with manual inlining.
atomic.Or8(mbits.bytep, mbits.mask)
// If this is a noscan object, fast-track it to black
// instead of greying it.
if !hbits.hasPointers(span.elemsize) {
......@@ -1218,8 +1226,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
// Previously we put the obj in an 8 element buffer that is drained at a rate
// to give the PREFETCH time to do its work.
// Use of PREFETCHNTA might be more appropriate than PREFETCH
gcw.put(obj)
if !gcw.putFast(obj) {
gcw.put(obj)
}
}
// gcDumpObject dumps the contents of obj for debugging and marks the
......@@ -1238,7 +1247,7 @@ func gcDumpObject(label string, obj, off uintptr) {
print(" s=nil\n")
return
}
print(" s.start*_PageSize=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
skipped := false
for i := uintptr(0); i < s.elemsize; i += sys.PtrSize {
// For big objects, just print the beginning (because
......@@ -1274,7 +1283,7 @@ func gcmarknewobject(obj, size, scanSize uintptr) {
if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
heapBitsForAddr(obj).setMarked()
markBitsForAddr(obj).setMarked()
gcw := &getg().m.p.ptr().gcw
gcw.bytesMarked += uint64(size)
gcw.scanWork += int64(scanSize)
......
......@@ -8,7 +8,6 @@ package runtime
import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
......@@ -52,6 +51,7 @@ func finishsweep_m(stw bool) {
}
}
}
nextMarkBitArenaEpoch()
}
func bgsweep(c chan int) {
......@@ -187,21 +187,16 @@ func (s *mspan) sweep(preserve bool) bool {
res := false
nfree := 0
var head, end gclinkptr
c := _g_.m.mcache
freeToHeap := false
// Mark any free objects in this span so we don't collect them.
sstart := uintptr(s.start << _PageShift)
for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
if uintptr(link) < sstart || s.limit <= uintptr(link) {
// Free list is corrupted.
dumpFreeList(s)
throw("free list corrupted")
}
heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
}
// The allocBits indicate which unmarked objects don't need to be
// processed since they were free at the end of the last GC cycle
// and were not allocated since then.
// If the allocBits index is >= s.freeindex and the bit
// is not marked then the object remains unallocated
// since the last GC.
// This situation is analogous to being on a freelist.
// Unlink & free special records for any objects we're about to free.
// Two complications here:
......@@ -215,17 +210,18 @@ func (s *mspan) sweep(preserve bool) bool {
special := *specialp
for special != nil {
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
hbits := heapBitsForAddr(p)
if !hbits.isMarked() {
objIndex := uintptr(special.offset) / size
p := s.base() + objIndex*size
mbits := s.markBitsForIndex(objIndex)
if !mbits.isMarked() {
// This object is not marked and has at least one special record.
// Pass 1: see if it has at least one finalizer.
hasFin := false
endOffset := p - uintptr(s.start<<_PageShift) + size
endOffset := p - s.base() + size
for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
if tmp.kind == _KindSpecialFinalizer {
// Stop freeing of object if it has a finalizer.
hbits.setMarkedNonAtomic()
mbits.setMarkedNonAtomic()
hasFin = true
break
}
......@@ -234,7 +230,7 @@ func (s *mspan) sweep(preserve bool) bool {
for special != nil && uintptr(special.offset) < endOffset {
// Find the exact byte for which the special was setup
// (as opposed to object beginning).
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)
p := s.base() + uintptr(special.offset)
if special.kind == _KindSpecialFinalizer || !hasFin {
// Splice out special record.
y := special
......@@ -255,67 +251,67 @@ func (s *mspan) sweep(preserve bool) bool {
}
}
// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
size, n, _ := s.layout()
heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
// At this point we know that we are looking at garbage object
// that needs to be collected.
if debug.allocfreetrace != 0 {
tracefree(unsafe.Pointer(p), size)
}
if msanenabled {
msanfree(unsafe.Pointer(p), size)
if debug.allocfreetrace != 0 {
// Find all newly freed objects. This doesn't have to
// efficient; allocfreetrace has massive overhead.
mbits := s.markBitsForBase()
abits := s.allocBitsForIndex(0)
for i := uintptr(0); i < s.nelems; i++ {
if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) {
x := s.base() + i*s.elemsize
tracefree(unsafe.Pointer(x), size)
}
mbits.advance()
abits.advance()
}
}
// Reset to allocated+noscan.
if cl == 0 {
// Free large span.
if preserve {
throw("can't preserve large span")
}
s.needzero = 1
// Count the number of free objects in this span.
nfree = s.countFree()
if cl == 0 && nfree != 0 {
s.needzero = 1
freeToHeap = true
}
nalloc := uint16(s.nelems) - uint16(nfree)
nfreed := s.allocCount - nalloc
if nalloc > s.allocCount {
print("runtime: nelems=", s.nelems, " nfree=", nfree, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n")
throw("sweep increased allocation count")
}
// Free the span after heapBitsSweepSpan
// returns, since it's not done with the span.
freeToHeap = true
} else {
// Free small object.
if size > 2*sys.PtrSize {
*(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed"
} else if size > sys.PtrSize {
*(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0
}
if head.ptr() == nil {
head = gclinkptr(p)
} else {
end.ptr().next = gclinkptr(p)
}
end = gclinkptr(p)
end.ptr().next = gclinkptr(0x0bade5)
nfree++
}
})
s.allocCount = nalloc
wasempty := s.nextFreeIndex() == s.nelems
s.freeindex = 0 // reset allocation index to start of span.
// gcmarkBits becomes the allocBits.
// get a fresh cleared gcmarkBits in preparation for next GC
s.allocBits = s.gcmarkBits
s.gcmarkBits = newMarkBits(s.nelems)
// Initialize alloc bits cache.
s.refillAllocCache(0)
// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.
if freeToHeap || nfree == 0 {
if freeToHeap || nfreed == 0 {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("MSpan_Sweep: bad span state after sweep")
}
// Serialization point.
// At this point the mark bits are cleared and allocation ready
// to go so release the span.
atomic.Store(&s.sweepgen, sweepgen)
}
if nfree > 0 {
c.local_nsmallfree[cl] += uintptr(nfree)
res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve)
if nfreed > 0 && cl != 0 {
c.local_nsmallfree[cl] += uintptr(nfreed)
res = mheap_.central[cl].mcentral.freeSpan(s, preserve, wasempty)
// MCentral_FreeSpan updates sweepgen
} else if freeToHeap {
// Free large span to heap
......@@ -336,7 +332,7 @@ func (s *mspan) sweep(preserve bool) bool {
// implement and then call some kind of MHeap_DeleteSpan.
if debug.efence > 0 {
s.limit = 0 // prevent mlookup from finding this span
sysFault(unsafe.Pointer(uintptr(s.start<<_PageShift)), size)
sysFault(unsafe.Pointer(s.base()), size)
} else {
mheap_.freeSpan(s, 1)
}
......@@ -399,27 +395,3 @@ func reimburseSweepCredit(unusableBytes uintptr) {
throw("spanBytesAlloc underflow")
}
}
func dumpFreeList(s *mspan) {
printlock()
print("runtime: free list of span ", s, ":\n")
sstart := uintptr(s.start << _PageShift)
link := s.freelist
for i := 0; i < int(s.npages*_PageSize/s.elemsize); i++ {
if i != 0 {
print(" -> ")
}
print(hex(link))
if link.ptr() == nil {
break
}
if uintptr(link) < sstart || s.limit <= uintptr(link) {
// Bad link. Stop walking before we crash.
print(" (BAD)")
break
}
link = link.ptr().next
}
print("\n")
printunlock()
}
......@@ -116,6 +116,22 @@ func (w *gcWork) put(obj uintptr) {
wbuf.nobj++
}
// putFast does a put and returns true if it can be done quickly
// otherwise it returns false and the caller needs to call put.
//go:nowritebarrier
func (w *gcWork) putFast(obj uintptr) bool {
wbuf := w.wbuf1.ptr()
if wbuf == nil {
return false
} else if wbuf.nobj == len(wbuf.obj) {
return false
}
wbuf.obj[wbuf.nobj] = obj
wbuf.nobj++
return true
}
// tryGet dequeues a pointer for the garbage collector to trace.
//
// If there are no pointers remaining in this gcWork or in the global
......@@ -147,6 +163,23 @@ func (w *gcWork) tryGet() uintptr {
return wbuf.obj[wbuf.nobj]
}
// tryGetFast dequeues a pointer for the garbage collector to trace
// if one is readily available. Otherwise it returns 0 and
// the caller is expected to call tryGet().
//go:nowritebarrier
func (w *gcWork) tryGetFast() uintptr {
wbuf := w.wbuf1.ptr()
if wbuf == nil {
return 0
}
if wbuf.nobj == 0 {
return 0
}
wbuf.nobj--
return wbuf.obj[wbuf.nobj]
}
// get dequeues a pointer for the garbage collector to trace, blocking
// if necessary to ensure all pointers from all queues and caches have
// been retrieved. get returns 0 if there are no pointers remaining.
......
This diff is collapsed.
......@@ -55,7 +55,7 @@ var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
func sizeToClass(size int32) int32 {
if size > _MaxSmallSize {
throw("SizeToClass - invalid size")
throw("invalid size")
}
if size > 1024-8 {
return int32(size_to_class128[(size-1024+127)>>7])
......@@ -79,7 +79,7 @@ func initSizes() {
}
}
if align&(align-1) != 0 {
throw("InitSizes - bug")
throw("incorrect alignment")
}
// Make the allocnpages big enough that
......@@ -106,10 +106,18 @@ func initSizes() {
sizeclass++
}
if sizeclass != _NumSizeClasses {
print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
throw("InitSizes - bad NumSizeClasses")
print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
throw("bad NumSizeClasses")
}
// Check maxObjsPerSpan => number of objects invariant.
for i, size := range class_to_size {
if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
throw("span contains too many objects")
}
if size == 0 && i != 0 {
throw("size is 0 but class is not 0")
}
}
// Initialize the size_to_class tables.
nextsize := 0
for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
......@@ -128,12 +136,12 @@ func initSizes() {
for n := int32(0); n < _MaxSmallSize; n++ {
sizeclass := sizeToClass(n)
if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("incorrect SizeToClass\n")
goto dump
}
if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("SizeToClass too big\n")
goto dump
}
......@@ -155,18 +163,18 @@ func initSizes() {
dump:
if true {
print("NumSizeClasses=", _NumSizeClasses, "\n")
print("runtime: NumSizeClasses=", _NumSizeClasses, "\n")
print("runtime·class_to_size:")
for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
print(" ", class_to_size[sizeclass], "")
}
print("\n\n")
print("size_to_class8:")
print("runtime: size_to_class8:")
for i := 0; i < len(size_to_class8); i++ {
print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
}
print("\n")
print("size_to_class128:")
print("runtime: size_to_class128:")
for i := 0; i < len(size_to_class128); i++ {
print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
}
......
......@@ -295,9 +295,9 @@ func updatememstats(stats *gcstats) {
memstats.nmalloc++
memstats.alloc += uint64(s.elemsize)
} else {
memstats.nmalloc += uint64(s.ref)
memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
memstats.nmalloc += uint64(s.allocCount)
memstats.by_size[s.sizeclass].nmalloc += uint64(s.allocCount)
memstats.alloc += uint64(s.allocCount) * uint64(s.elemsize)
}
}
unlock(&mheap_.lock)
......
......@@ -191,26 +191,26 @@ func stackpoolalloc(order uint8) gclinkptr {
if s == nil {
throw("out of memory")
}
if s.ref != 0 {
throw("bad ref")
if s.allocCount != 0 {
throw("bad allocCount")
}
if s.freelist.ptr() != nil {
throw("bad freelist")
if s.stackfreelist.ptr() != nil {
throw("bad stackfreelist")
}
for i := uintptr(0); i < _StackCacheSize; i += _FixedStack << order {
x := gclinkptr(uintptr(s.start)<<_PageShift + i)
x.ptr().next = s.freelist
s.freelist = x
x := gclinkptr(s.base() + i)
x.ptr().next = s.stackfreelist
s.stackfreelist = x
}
list.insert(s)
}
x := s.freelist
x := s.stackfreelist
if x.ptr() == nil {
throw("span has no free stacks")
}
s.freelist = x.ptr().next
s.ref++
if s.freelist.ptr() == nil {
s.stackfreelist = x.ptr().next
s.allocCount++
if s.stackfreelist.ptr() == nil {
// all stacks in s are allocated.
list.remove(s)
}
......@@ -223,14 +223,14 @@ func stackpoolfree(x gclinkptr, order uint8) {
if s.state != _MSpanStack {
throw("freeing stack not in a stack span")
}
if s.freelist.ptr() == nil {
if s.stackfreelist.ptr() == nil {
// s will now have a free stack
stackpool[order].insert(s)
}
x.ptr().next = s.freelist
s.freelist = x
s.ref--
if gcphase == _GCoff && s.ref == 0 {
x.ptr().next = s.stackfreelist
s.stackfreelist = x
s.allocCount--
if gcphase == _GCoff && s.allocCount == 0 {
// Span is completely free. Return it to the heap
// immediately if we're sweeping.
//
......@@ -247,7 +247,7 @@ func stackpoolfree(x gclinkptr, order uint8) {
//
// By not freeing, we prevent step #4 until GC is done.
stackpool[order].remove(s)
s.freelist = 0
s.stackfreelist = 0
mheap_.freeStack(s)
}
}
......@@ -391,7 +391,7 @@ func stackalloc(n uint32) (stack, []stkbar) {
throw("out of memory")
}
}
v = unsafe.Pointer(s.start << _PageShift)
v = unsafe.Pointer(s.base())
}
if raceenabled {
......@@ -456,7 +456,7 @@ func stackfree(stk stack, n uintptr) {
} else {
s := mheap_.lookup(v)
if s.state != _MSpanStack {
println(hex(s.start<<_PageShift), v)
println(hex(s.base()), v)
throw("bad span state")
}
if gcphase == _GCoff {
......@@ -1136,9 +1136,9 @@ func freeStackSpans() {
list := &stackpool[order]
for s := list.first; s != nil; {
next := s.next
if s.ref == 0 {
if s.allocCount == 0 {
list.remove(s)
s.freelist = 0
s.stackfreelist = 0
mheap_.freeStack(s)
}
s = next
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment