Commit ecfe42ca authored by Russ Cox's avatar Russ Cox

runtime: keep pointer bits set always in 1-word spans

It's dumb to clear them in initSpan, set them in heapBitsSetType,
clear them in heapBitsSweepSpan, set them again in heapBitsSetType,
clear them again in heapBitsSweepSpan, and so on.

Set them in initSpan and be done with it (until the span is reused
for objects of a different size).

This avoids an atomic operation in a common case (one-word allocation).
Suggested by rlh.

name                   old mean              new mean              delta
BinaryTree17            5.87s × (0.97,1.03)   5.93s × (0.98,1.04)              ~    (p=0.056)
Fannkuch11              4.34s × (1.00,1.01)   4.41s × (1.00,1.00)            +1.42% (p=0.000)
FmtFprintfEmpty        86.1ns × (0.98,1.03)  88.9ns × (0.95,1.14)              ~    (p=0.066)
FmtFprintfString        292ns × (0.97,1.04)   284ns × (0.98,1.03)            -2.64% (p=0.000)
FmtFprintfInt           271ns × (0.98,1.06)   274ns × (0.98,1.05)              ~    (p=0.148)
FmtFprintfIntInt        478ns × (0.98,1.05)   487ns × (0.98,1.03)            +1.85% (p=0.004)
FmtFprintfPrefixedInt   397ns × (0.98,1.05)   394ns × (0.98,1.02)              ~    (p=0.184)
FmtFprintfFloat         553ns × (0.99,1.02)   543ns × (0.99,1.01)            -1.71% (p=0.000)
FmtManyArgs            1.90µs × (0.98,1.05)  1.88µs × (0.99,1.01)            -0.97% (p=0.037)
GobDecode              15.1ms × (0.99,1.01)  15.3ms × (0.99,1.01)            +0.78% (p=0.001)
GobEncode              11.7ms × (0.98,1.05)  11.6ms × (0.99,1.02)            -1.39% (p=0.009)
Gzip                    646ms × (1.00,1.01)   647ms × (1.00,1.01)              ~    (p=0.120)
Gunzip                  142ms × (1.00,1.00)   142ms × (1.00,1.00)              ~    (p=0.068)
HTTPClientServer       89.7µs × (0.99,1.01)  90.1µs × (0.98,1.03)              ~    (p=0.224)
JSONEncode             31.3ms × (0.99,1.01)  31.2ms × (0.99,1.02)              ~    (p=0.149)
JSONDecode              113ms × (0.99,1.01)   111ms × (0.99,1.01)            -1.25% (p=0.000)
Mandelbrot200          6.01ms × (1.00,1.00)  6.01ms × (1.00,1.00)            +0.09% (p=0.015)
GoParse                6.63ms × (0.98,1.03)  6.55ms × (0.99,1.02)            -1.10% (p=0.006)
RegexpMatchEasy0_32     161ns × (1.00,1.00)   161ns × (1.00,1.00)  (sample has zero variance)
RegexpMatchEasy0_1K     539ns × (0.99,1.01)   563ns × (0.99,1.01)            +4.51% (p=0.000)
RegexpMatchEasy1_32     140ns × (0.99,1.01)   141ns × (0.99,1.01)            +1.34% (p=0.000)
RegexpMatchEasy1_1K     886ns × (1.00,1.01)   888ns × (1.00,1.00)            +0.20% (p=0.003)
RegexpMatchMedium_32    252ns × (1.00,1.02)   255ns × (0.99,1.01)            +1.32% (p=0.000)
RegexpMatchMedium_1K   72.7µs × (1.00,1.00)  72.6µs × (1.00,1.00)              ~    (p=0.296)
RegexpMatchHard_32     3.84µs × (1.00,1.01)  3.84µs × (1.00,1.00)              ~    (p=0.339)
RegexpMatchHard_1K      117µs × (1.00,1.01)   117µs × (1.00,1.00)            -0.28% (p=0.022)
Revcomp                 914ms × (0.99,1.01)   909ms × (0.99,1.01)            -0.49% (p=0.031)
Template                128ms × (0.99,1.01)   127ms × (0.99,1.01)            -1.10% (p=0.000)
TimeParse               628ns × (0.99,1.01)   639ns × (0.99,1.01)            +1.69% (p=0.000)
TimeFormat              660ns × (0.99,1.01)   662ns × (0.99,1.02)              ~    (p=0.287)

Change-Id: I3127b0ab89708267c74aa7d0eae1db1a1bcdfda5
Reviewed-on: https://go-review.googlesource.com/9884Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 94934f84
...@@ -387,6 +387,18 @@ func (h heapBits) initSpan(size, n, total uintptr) { ...@@ -387,6 +387,18 @@ func (h heapBits) initSpan(size, n, total uintptr) {
throw("initSpan: unaligned length") throw("initSpan: unaligned length")
} }
nbyte := total / heapBitmapScale nbyte := total / heapBitmapScale
if ptrSize == 8 && size == ptrSize {
end := h.bitp
bitp := subtractb(end, nbyte-1)
for {
*bitp = bitPointerAll
if bitp == end {
break
}
bitp = add1(bitp)
}
return
}
memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte) memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
} }
...@@ -443,33 +455,34 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { ...@@ -443,33 +455,34 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
switch { switch {
default: default:
throw("heapBitsSweepSpan") throw("heapBitsSweepSpan")
case size == ptrSize: case ptrSize == 8 && size == ptrSize:
// Consider mark bits in all four 2-bit entries of each bitmap byte. // Consider mark bits in all four 2-bit entries of each bitmap byte.
bitp := h.bitp bitp := h.bitp
for i := uintptr(0); i < n; i += 4 { for i := uintptr(0); i < n; i += 4 {
x := uint32(*bitp) x := uint32(*bitp)
// Note that unlike the other size cases, we leave the pointer bits set here.
// These are initialized during initSpan when the span is created and left
// in place the whole time the span is used for pointer-sized objects.
// That lets heapBitsSetType avoid an atomic update to set the pointer bit
// during allocation.
if x&bitMarked != 0 { if x&bitMarked != 0 {
x &^= bitMarked x &^= bitMarked
} else { } else {
x &^= bitPointer
f(base + i*ptrSize) f(base + i*ptrSize)
} }
if x&(bitMarked<<heapBitsShift) != 0 { if x&(bitMarked<<heapBitsShift) != 0 {
x &^= bitMarked << heapBitsShift x &^= bitMarked << heapBitsShift
} else { } else {
x &^= bitPointer << heapBitsShift
f(base + (i+1)*ptrSize) f(base + (i+1)*ptrSize)
} }
if x&(bitMarked<<(2*heapBitsShift)) != 0 { if x&(bitMarked<<(2*heapBitsShift)) != 0 {
x &^= bitMarked << (2 * heapBitsShift) x &^= bitMarked << (2 * heapBitsShift)
} else { } else {
x &^= bitPointer << (2 * heapBitsShift)
f(base + (i+2)*ptrSize) f(base + (i+2)*ptrSize)
} }
if x&(bitMarked<<(3*heapBitsShift)) != 0 { if x&(bitMarked<<(3*heapBitsShift)) != 0 {
x &^= bitMarked << (3 * heapBitsShift) x &^= bitMarked << (3 * heapBitsShift)
} else { } else {
x &^= bitPointer << (3 * heapBitsShift)
f(base + (i+3)*ptrSize) f(base + (i+3)*ptrSize)
} }
*bitp = uint8(x) *bitp = uint8(x)
...@@ -570,21 +583,17 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -570,21 +583,17 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
if size == ptrSize { if size == ptrSize {
// It's one word and it has pointers, it must be a pointer. // It's one word and it has pointers, it must be a pointer.
// The bitmap byte is shared with the one-word object // In general we'd need an atomic update here if the
// next to it, and concurrent GC might be marking that // concurrent GC were marking objects in this span,
// object, so we must use an atomic update. // because each bitmap byte describes 3 other objects
// We can skip this if the GC is completely off. // in addition to the one being allocated.
// Note that there is some marking that happens during // However, since all allocated one-word objects are pointers
// gcphase == _GCscan, for completely scalar objects, // (non-pointers are aggregated into tinySize allocations),
// so it is not safe to check just for the marking phases. // initSpan sets the pointer bits for us. Nothing to do here.
// TODO(rsc): It may make sense to set all the pointer bits if doubleCheck {
// when initializing the span, and then the atomicor8 here if !h.isPointer() {
// goes away - heapBitsSetType would be a no-op throw("heapBitsSetType: pointer bit missing")
// in that case. }
if gcphase == _GCoff {
*h.bitp |= bitPointer << h.shift
} else {
atomicor8(h.bitp, bitPointer<<h.shift)
} }
return return
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment