Commit ecfe42ca authored by Russ Cox's avatar Russ Cox

runtime: keep pointer bits set always in 1-word spans

It's dumb to clear them in initSpan, set them in heapBitsSetType,
clear them in heapBitsSweepSpan, set them again in heapBitsSetType,
clear them again in heapBitsSweepSpan, and so on.

Set them in initSpan and be done with it (until the span is reused
for objects of a different size).

This avoids an atomic operation in a common case (one-word allocation).
Suggested by rlh.

name                   old mean              new mean              delta
BinaryTree17            5.87s × (0.97,1.03)   5.93s × (0.98,1.04)              ~    (p=0.056)
Fannkuch11              4.34s × (1.00,1.01)   4.41s × (1.00,1.00)            +1.42% (p=0.000)
FmtFprintfEmpty        86.1ns × (0.98,1.03)  88.9ns × (0.95,1.14)              ~    (p=0.066)
FmtFprintfString        292ns × (0.97,1.04)   284ns × (0.98,1.03)            -2.64% (p=0.000)
FmtFprintfInt           271ns × (0.98,1.06)   274ns × (0.98,1.05)              ~    (p=0.148)
FmtFprintfIntInt        478ns × (0.98,1.05)   487ns × (0.98,1.03)            +1.85% (p=0.004)
FmtFprintfPrefixedInt   397ns × (0.98,1.05)   394ns × (0.98,1.02)              ~    (p=0.184)
FmtFprintfFloat         553ns × (0.99,1.02)   543ns × (0.99,1.01)            -1.71% (p=0.000)
FmtManyArgs            1.90µs × (0.98,1.05)  1.88µs × (0.99,1.01)            -0.97% (p=0.037)
GobDecode              15.1ms × (0.99,1.01)  15.3ms × (0.99,1.01)            +0.78% (p=0.001)
GobEncode              11.7ms × (0.98,1.05)  11.6ms × (0.99,1.02)            -1.39% (p=0.009)
Gzip                    646ms × (1.00,1.01)   647ms × (1.00,1.01)              ~    (p=0.120)
Gunzip                  142ms × (1.00,1.00)   142ms × (1.00,1.00)              ~    (p=0.068)
HTTPClientServer       89.7µs × (0.99,1.01)  90.1µs × (0.98,1.03)              ~    (p=0.224)
JSONEncode             31.3ms × (0.99,1.01)  31.2ms × (0.99,1.02)              ~    (p=0.149)
JSONDecode              113ms × (0.99,1.01)   111ms × (0.99,1.01)            -1.25% (p=0.000)
Mandelbrot200          6.01ms × (1.00,1.00)  6.01ms × (1.00,1.00)            +0.09% (p=0.015)
GoParse                6.63ms × (0.98,1.03)  6.55ms × (0.99,1.02)            -1.10% (p=0.006)
RegexpMatchEasy0_32     161ns × (1.00,1.00)   161ns × (1.00,1.00)  (sample has zero variance)
RegexpMatchEasy0_1K     539ns × (0.99,1.01)   563ns × (0.99,1.01)            +4.51% (p=0.000)
RegexpMatchEasy1_32     140ns × (0.99,1.01)   141ns × (0.99,1.01)            +1.34% (p=0.000)
RegexpMatchEasy1_1K     886ns × (1.00,1.01)   888ns × (1.00,1.00)            +0.20% (p=0.003)
RegexpMatchMedium_32    252ns × (1.00,1.02)   255ns × (0.99,1.01)            +1.32% (p=0.000)
RegexpMatchMedium_1K   72.7µs × (1.00,1.00)  72.6µs × (1.00,1.00)              ~    (p=0.296)
RegexpMatchHard_32     3.84µs × (1.00,1.01)  3.84µs × (1.00,1.00)              ~    (p=0.339)
RegexpMatchHard_1K      117µs × (1.00,1.01)   117µs × (1.00,1.00)            -0.28% (p=0.022)
Revcomp                 914ms × (0.99,1.01)   909ms × (0.99,1.01)            -0.49% (p=0.031)
Template                128ms × (0.99,1.01)   127ms × (0.99,1.01)            -1.10% (p=0.000)
TimeParse               628ns × (0.99,1.01)   639ns × (0.99,1.01)            +1.69% (p=0.000)
TimeFormat              660ns × (0.99,1.01)   662ns × (0.99,1.02)              ~    (p=0.287)

Change-Id: I3127b0ab89708267c74aa7d0eae1db1a1bcdfda5
Reviewed-on: https://go-review.googlesource.com/9884Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 94934f84
......@@ -387,6 +387,18 @@ func (h heapBits) initSpan(size, n, total uintptr) {
throw("initSpan: unaligned length")
}
nbyte := total / heapBitmapScale
if ptrSize == 8 && size == ptrSize {
end := h.bitp
bitp := subtractb(end, nbyte-1)
for {
*bitp = bitPointerAll
if bitp == end {
break
}
bitp = add1(bitp)
}
return
}
memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
}
......@@ -443,33 +455,34 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
switch {
default:
throw("heapBitsSweepSpan")
case size == ptrSize:
case ptrSize == 8 && size == ptrSize:
// Consider mark bits in all four 2-bit entries of each bitmap byte.
bitp := h.bitp
for i := uintptr(0); i < n; i += 4 {
x := uint32(*bitp)
// Note that unlike the other size cases, we leave the pointer bits set here.
// These are initialized during initSpan when the span is created and left
// in place the whole time the span is used for pointer-sized objects.
// That lets heapBitsSetType avoid an atomic update to set the pointer bit
// during allocation.
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x &^= bitPointer
f(base + i*ptrSize)
}
if x&(bitMarked<<heapBitsShift) != 0 {
x &^= bitMarked << heapBitsShift
} else {
x &^= bitPointer << heapBitsShift
f(base + (i+1)*ptrSize)
}
if x&(bitMarked<<(2*heapBitsShift)) != 0 {
x &^= bitMarked << (2 * heapBitsShift)
} else {
x &^= bitPointer << (2 * heapBitsShift)
f(base + (i+2)*ptrSize)
}
if x&(bitMarked<<(3*heapBitsShift)) != 0 {
x &^= bitMarked << (3 * heapBitsShift)
} else {
x &^= bitPointer << (3 * heapBitsShift)
f(base + (i+3)*ptrSize)
}
*bitp = uint8(x)
......@@ -570,21 +583,17 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
if size == ptrSize {
// It's one word and it has pointers, it must be a pointer.
// The bitmap byte is shared with the one-word object
// next to it, and concurrent GC might be marking that
// object, so we must use an atomic update.
// We can skip this if the GC is completely off.
// Note that there is some marking that happens during
// gcphase == _GCscan, for completely scalar objects,
// so it is not safe to check just for the marking phases.
// TODO(rsc): It may make sense to set all the pointer bits
// when initializing the span, and then the atomicor8 here
// goes away - heapBitsSetType would be a no-op
// in that case.
if gcphase == _GCoff {
*h.bitp |= bitPointer << h.shift
} else {
atomicor8(h.bitp, bitPointer<<h.shift)
// In general we'd need an atomic update here if the
// concurrent GC were marking objects in this span,
// because each bitmap byte describes 3 other objects
// in addition to the one being allocated.
// However, since all allocated one-word objects are pointers
// (non-pointers are aggregated into tinySize allocations),
// initSpan sets the pointer bits for us. Nothing to do here.
if doubleCheck {
if !h.isPointer() {
throw("heapBitsSetType: pointer bit missing")
}
}
return
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment