Commit 07e738ec authored by Michael Anthony Knyszek's avatar Michael Anthony Knyszek Committed by Michael Knyszek

runtime: use only treaps for tracking spans

Currently, mheap tracks spans in both mSpanLists and mTreaps, but
mSpanLists, while they tend to be smaller, complicate the
implementation. Here we simplify the implementation by removing
free and busy from mheap and renaming freelarge -> free and busylarge
-> busy.

This change also slightly changes the reclamation policy. Previously,
for allocations under 1MB we would attempt to find a small span of the
right size. Now, we just try to find any number of spans totaling the
right size. This may increase heap fragmentation, but that will be dealt
with using virtual memory tricks in follow-up CLs.

For #14045.

Garbage-heavy benchmarks show very little change, except what appears
to be a decrease in STW times and peak RSS.

name                      old STW-ns/GC       new STW-ns/GC       delta
Garbage/benchmem-MB=64-8           263k ±64%           217k ±24%  -17.66%  (p=0.028 n=25+23)

name                      old STW-ns/op       new STW-ns/op       delta
Garbage/benchmem-MB=64-8          9.39k ±65%          7.80k ±24%  -16.88%  (p=0.037 n=25+23)

name                      old peak-RSS-bytes  new peak-RSS-bytes  delta
Garbage/benchmem-MB=64-8           281M ± 0%           249M ± 4%  -11.40%  (p=0.000 n=19+18)

https://perf.golang.org/search?q=upload:20181005.1

Go1 benchmarks perform roughly the same, the most notable regression
being the JSON encode/decode benchmark with worsens by ~2%.

name                     old time/op    new time/op    delta
BinaryTree17-8              3.02s ± 2%     2.99s ± 2%  -1.18%  (p=0.000 n=25+24)
Fannkuch11-8                3.05s ± 1%     3.02s ± 2%  -1.20%  (p=0.000 n=25+25)
FmtFprintfEmpty-8          43.6ns ± 5%    43.4ns ± 3%    ~     (p=0.528 n=25+25)
FmtFprintfString-8         74.9ns ± 3%    73.4ns ± 1%  -2.03%  (p=0.001 n=25+24)
FmtFprintfInt-8            79.3ns ± 3%    77.9ns ± 1%  -1.73%  (p=0.003 n=25+25)
FmtFprintfIntInt-8          119ns ± 6%     116ns ± 0%  -2.68%  (p=0.000 n=25+18)
FmtFprintfPrefixedInt-8     134ns ± 4%     132ns ± 1%  -1.52%  (p=0.004 n=25+25)
FmtFprintfFloat-8           240ns ± 1%     241ns ± 1%    ~     (p=0.403 n=24+23)
FmtManyArgs-8               543ns ± 1%     537ns ± 1%  -1.00%  (p=0.000 n=25+25)
GobDecode-8                6.88ms ± 1%    6.92ms ± 4%    ~     (p=0.088 n=24+22)
GobEncode-8                5.92ms ± 1%    5.93ms ± 1%    ~     (p=0.898 n=25+24)
Gzip-8                      267ms ± 2%     266ms ± 2%    ~     (p=0.213 n=25+24)
Gunzip-8                   35.4ms ± 1%    35.6ms ± 1%  +0.70%  (p=0.000 n=25+25)
HTTPClientServer-8          104µs ± 2%     104µs ± 2%    ~     (p=0.686 n=25+25)
JSONEncode-8               9.67ms ± 1%    9.80ms ± 4%  +1.32%  (p=0.000 n=25+25)
JSONDecode-8               47.7ms ± 1%    48.8ms ± 5%  +2.33%  (p=0.000 n=25+25)
Mandelbrot200-8            4.87ms ± 1%    4.91ms ± 1%  +0.79%  (p=0.000 n=25+25)
GoParse-8                  3.59ms ± 4%    3.55ms ± 1%    ~     (p=0.199 n=25+24)
RegexpMatchEasy0_32-8      90.3ns ± 1%    89.9ns ± 1%  -0.47%  (p=0.000 n=25+21)
RegexpMatchEasy0_1K-8       204ns ± 1%     204ns ± 1%    ~     (p=0.914 n=25+24)
RegexpMatchEasy1_32-8      84.9ns ± 0%    84.6ns ± 1%  -0.36%  (p=0.000 n=24+25)
RegexpMatchEasy1_1K-8       350ns ± 1%     348ns ± 3%  -0.59%  (p=0.007 n=25+25)
RegexpMatchMedium_32-8      122ns ± 1%     121ns ± 0%  -1.08%  (p=0.000 n=25+18)
RegexpMatchMedium_1K-8     36.1µs ± 1%    34.6µs ± 1%  -4.02%  (p=0.000 n=25+25)
RegexpMatchHard_32-8       1.69µs ± 2%    1.65µs ± 1%  -2.38%  (p=0.000 n=25+25)
RegexpMatchHard_1K-8       50.8µs ± 1%    49.4µs ± 1%  -2.69%  (p=0.000 n=25+24)
Revcomp-8                   453ms ± 2%     449ms ± 3%  -0.74%  (p=0.022 n=25+24)
Template-8                 63.2ms ± 2%    63.4ms ± 1%    ~     (p=0.127 n=25+24)
TimeParse-8                 313ns ± 1%     315ns ± 3%    ~     (p=0.924 n=24+25)
TimeFormat-8                294ns ± 1%     292ns ± 2%  -0.65%  (p=0.004 n=23+24)
[Geo mean]                 49.9µs         49.6µs       -0.65%

name                     old speed      new speed      delta
GobDecode-8               112MB/s ± 1%   110MB/s ± 4%  -1.00%  (p=0.036 n=24+24)
GobEncode-8               130MB/s ± 1%   129MB/s ± 1%    ~     (p=0.894 n=25+24)
Gzip-8                   72.7MB/s ± 2%  73.0MB/s ± 2%    ~     (p=0.208 n=25+24)
Gunzip-8                  549MB/s ± 1%   545MB/s ± 1%  -0.70%  (p=0.000 n=25+25)
JSONEncode-8              201MB/s ± 1%   198MB/s ± 3%  -1.29%  (p=0.000 n=25+25)
JSONDecode-8             40.7MB/s ± 1%  39.8MB/s ± 5%  -2.23%  (p=0.000 n=25+25)
GoParse-8                16.2MB/s ± 4%  16.3MB/s ± 1%    ~     (p=0.211 n=25+24)
RegexpMatchEasy0_32-8     354MB/s ± 1%   356MB/s ± 1%  +0.47%  (p=0.000 n=25+21)
RegexpMatchEasy0_1K-8    5.00GB/s ± 0%  4.99GB/s ± 1%    ~     (p=0.588 n=24+24)
RegexpMatchEasy1_32-8     377MB/s ± 1%   378MB/s ± 1%  +0.39%  (p=0.000 n=25+25)
RegexpMatchEasy1_1K-8    2.92GB/s ± 1%  2.94GB/s ± 3%  +0.65%  (p=0.008 n=25+25)
RegexpMatchMedium_32-8   8.14MB/s ± 1%  8.22MB/s ± 1%  +0.98%  (p=0.000 n=25+24)
RegexpMatchMedium_1K-8   28.4MB/s ± 1%  29.6MB/s ± 1%  +4.19%  (p=0.000 n=25+25)
RegexpMatchHard_32-8     18.9MB/s ± 2%  19.4MB/s ± 1%  +2.43%  (p=0.000 n=25+25)
RegexpMatchHard_1K-8     20.2MB/s ± 1%  20.7MB/s ± 1%  +2.76%  (p=0.000 n=25+24)
Revcomp-8                 561MB/s ± 2%   566MB/s ± 3%  +0.75%  (p=0.021 n=25+24)
Template-8               30.7MB/s ± 2%  30.6MB/s ± 1%    ~     (p=0.131 n=25+24)
[Geo mean]                120MB/s        121MB/s       +0.48%

https://perf.golang.org/search?q=upload:20181004.6

Change-Id: I97f9fee34577961a116a8ddd445c6272253f0f95
Reviewed-on: https://go-review.googlesource.com/c/139837
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarAustin Clements <austin@google.com>
parent e508a5f0
...@@ -136,8 +136,7 @@ const ( ...@@ -136,8 +136,7 @@ const (
_TinySize = 16 _TinySize = 16
_TinySizeClass = int8(2) _TinySizeClass = int8(2)
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
_MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
// Per-P, per order stack segment cache size. // Per-P, per order stack segment cache size.
_StackCacheSize = 32 * 1024 _StackCacheSize = 32 * 1024
......
...@@ -30,13 +30,11 @@ const minPhysPageSize = 4096 ...@@ -30,13 +30,11 @@ const minPhysPageSize = 4096
//go:notinheap //go:notinheap
type mheap struct { type mheap struct {
lock mutex lock mutex
free [_MaxMHeapList]mSpanList // free lists of given length up to _MaxMHeapList free mTreap // free treap of spans
freelarge mTreap // free treap of length >= _MaxMHeapList busy mSpanList // busy list of spans
busy [_MaxMHeapList]mSpanList // busy lists of large spans of given length sweepgen uint32 // sweep generation, see comment in mspan
busylarge mSpanList // busy lists of large spans length >= _MaxMHeapList sweepdone uint32 // all spans are swept
sweepgen uint32 // sweep generation, see comment in mspan sweepers uint32 // number of active sweepone calls
sweepdone uint32 // all spans are swept
sweepers uint32 // number of active sweepone calls
// allspans is a slice of all mspans ever created. Each mspan // allspans is a slice of all mspans ever created. Each mspan
// appears exactly once. // appears exactly once.
...@@ -599,12 +597,7 @@ func (h *mheap) init() { ...@@ -599,12 +597,7 @@ func (h *mheap) init() {
h.spanalloc.zero = false h.spanalloc.zero = false
// h->mapcache needs no init // h->mapcache needs no init
for i := range h.free { h.busy.init()
h.free[i].init()
h.busy[i].init()
}
h.busylarge.init()
for i := range h.central { for i := range h.central {
h.central[i].mcentral.init(spanClass(i)) h.central[i].mcentral.init(spanClass(i))
} }
...@@ -647,30 +640,12 @@ retry: ...@@ -647,30 +640,12 @@ retry:
// Sweeps and reclaims at least npage pages into heap. // Sweeps and reclaims at least npage pages into heap.
// Called before allocating npage pages. // Called before allocating npage pages.
func (h *mheap) reclaim(npage uintptr) { func (h *mheap) reclaim(npage uintptr) {
// First try to sweep busy spans with large objects of size >= npage, if h.reclaimList(&h.busy, npage) != 0 {
// this has good chances of reclaiming the necessary space.
for i := int(npage); i < len(h.busy); i++ {
if h.reclaimList(&h.busy[i], npage) != 0 {
return // Bingo!
}
}
// Then -- even larger objects.
if h.reclaimList(&h.busylarge, npage) != 0 {
return // Bingo! return // Bingo!
} }
// Now try smaller objects.
// One such object is not enough, so we need to reclaim several of them.
reclaimed := uintptr(0)
for i := 0; i < int(npage) && i < len(h.busy); i++ {
reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed)
if reclaimed >= npage {
return
}
}
// Now sweep everything that is not yet swept. // Now sweep everything that is not yet swept.
var reclaimed uintptr
unlock(&h.lock) unlock(&h.lock)
for { for {
n := sweepone() n := sweepone()
...@@ -752,11 +727,7 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan { ...@@ -752,11 +727,7 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
mheap_.nlargealloc++ mheap_.nlargealloc++
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift)) atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
// Swept spans are at the end of lists. // Swept spans are at the end of lists.
if s.npages < uintptr(len(h.busy)) { h.busy.insertBack(s)
h.busy[s.npages].insertBack(s)
} else {
h.busylarge.insertBack(s)
}
} }
} }
// heap_scan and heap_live were updated. // heap_scan and heap_live were updated.
...@@ -867,31 +838,20 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) { ...@@ -867,31 +838,20 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
// The returned span has been removed from the // The returned span has been removed from the
// free list, but its state is still mSpanFree. // free list, but its state is still mSpanFree.
func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan { func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
var list *mSpanList
var s *mspan var s *mspan
// Try in fixed-size lists up to max. // Best fit in the treap of spans.
for i := int(npage); i < len(h.free); i++ { s = h.free.remove(npage)
list = &h.free[i]
if !list.isEmpty() {
s = list.first
list.remove(s)
goto HaveSpan
}
}
// Best fit in list of large spans.
s = h.allocLarge(npage) // allocLarge removed s from h.freelarge for us
if s == nil { if s == nil {
if !h.grow(npage) { if !h.grow(npage) {
return nil return nil
} }
s = h.allocLarge(npage) s = h.free.remove(npage)
if s == nil { if s == nil {
return nil return nil
} }
} }
HaveSpan:
// Mark span in use. // Mark span in use.
if s.state != mSpanFree { if s.state != mSpanFree {
throw("MHeap_AllocLocked - MSpan not free") throw("MHeap_AllocLocked - MSpan not free")
...@@ -933,21 +893,6 @@ HaveSpan: ...@@ -933,21 +893,6 @@ HaveSpan:
return s return s
} }
// Large spans have a minimum size of 1MByte. The maximum number of large spans to support
// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of
// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced
// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40.
func (h *mheap) isLargeSpan(npages uintptr) bool {
return npages >= uintptr(len(h.free))
}
// allocLarge allocates a span of at least npage pages from the treap of large spans.
// Returns nil if no such span currently exists.
func (h *mheap) allocLarge(npage uintptr) *mspan {
// Search treap for smallest span with >= npage pages.
return h.freelarge.remove(npage)
}
// Try to add at least npage pages of memory to the heap, // Try to add at least npage pages of memory to the heap,
// returning whether it worked. // returning whether it worked.
// //
...@@ -1023,7 +968,7 @@ func (h *mheap) freeManual(s *mspan, stat *uint64) { ...@@ -1023,7 +968,7 @@ func (h *mheap) freeManual(s *mspan, stat *uint64) {
unlock(&h.lock) unlock(&h.lock)
} }
// s must be on a busy list (h.busy or h.busylarge) or unlinked. // s must be on the busy list or unlinked.
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) { func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
switch s.state { switch s.state {
case mSpanManual: case mSpanManual:
...@@ -1048,7 +993,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i ...@@ -1048,7 +993,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
} }
s.state = mSpanFree s.state = mSpanFree
if s.inList() { if s.inList() {
h.busyList(s.npages).remove(s) h.busy.remove(s)
} }
// Stamp newly unused spans. The scavenger will use that // Stamp newly unused spans. The scavenger will use that
...@@ -1069,12 +1014,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i ...@@ -1069,12 +1014,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
h.setSpan(before.base(), s) h.setSpan(before.base(), s)
// The size is potentially changing so the treap needs to delete adjacent nodes and // The size is potentially changing so the treap needs to delete adjacent nodes and
// insert back as a combined node. // insert back as a combined node.
if h.isLargeSpan(before.npages) { h.free.removeSpan(before)
// We have a t, it is large so it has to be in the treap so we can remove it.
h.freelarge.removeSpan(before)
} else {
h.freeList(before.npages).remove(before)
}
before.state = mSpanDead before.state = mSpanDead
h.spanalloc.free(unsafe.Pointer(before)) h.spanalloc.free(unsafe.Pointer(before))
} }
...@@ -1085,32 +1025,13 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i ...@@ -1085,32 +1025,13 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
s.npreleased += after.npreleased s.npreleased += after.npreleased
s.needzero |= after.needzero s.needzero |= after.needzero
h.setSpan(s.base()+s.npages*pageSize-1, s) h.setSpan(s.base()+s.npages*pageSize-1, s)
if h.isLargeSpan(after.npages) { h.free.removeSpan(after)
h.freelarge.removeSpan(after)
} else {
h.freeList(after.npages).remove(after)
}
after.state = mSpanDead after.state = mSpanDead
h.spanalloc.free(unsafe.Pointer(after)) h.spanalloc.free(unsafe.Pointer(after))
} }
// Insert s into appropriate list or treap. // Insert s into the free treap.
if h.isLargeSpan(s.npages) { h.free.insert(s)
h.freelarge.insert(s)
} else {
h.freeList(s.npages).insert(s)
}
}
func (h *mheap) freeList(npages uintptr) *mSpanList {
return &h.free[npages]
}
func (h *mheap) busyList(npages uintptr) *mSpanList {
if npages < uintptr(len(h.busy)) {
return &h.busy[npages]
}
return &h.busylarge
} }
func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr { func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
...@@ -1123,21 +1044,6 @@ func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr { ...@@ -1123,21 +1044,6 @@ func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
return 0 return 0
} }
func scavengelist(list *mSpanList, now, limit uint64) uintptr {
if list.isEmpty() {
return 0
}
var sumreleased uintptr
for s := list.first; s != nil; s = s.next {
if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages {
continue
}
sumreleased += s.scavenge()
}
return sumreleased
}
func (h *mheap) scavenge(k int32, now, limit uint64) { func (h *mheap) scavenge(k int32, now, limit uint64) {
// Disallow malloc or panic while holding the heap lock. We do // Disallow malloc or panic while holding the heap lock. We do
// this here because this is an non-mallocgc entry-point to // this here because this is an non-mallocgc entry-point to
...@@ -1145,11 +1051,7 @@ func (h *mheap) scavenge(k int32, now, limit uint64) { ...@@ -1145,11 +1051,7 @@ func (h *mheap) scavenge(k int32, now, limit uint64) {
gp := getg() gp := getg()
gp.m.mallocing++ gp.m.mallocing++
lock(&h.lock) lock(&h.lock)
var sumreleased uintptr sumreleased := scavengetreap(h.free.treap, now, limit)
for i := 0; i < len(h.free); i++ {
sumreleased += scavengelist(&h.free[i], now, limit)
}
sumreleased += scavengetreap(h.freelarge.treap, now, limit)
unlock(&h.lock) unlock(&h.lock)
gp.m.mallocing-- gp.m.mallocing--
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment