Commit c1e267cc authored by Carl Mastrangelo's avatar Carl Mastrangelo Committed by Ian Lance Taylor

runtime: make append only clear uncopied memory

Also add a benchmark that shows off the new behavior.  The
existing benchmarks reuse the same slice, and thus don't ever have
to clear memory.  Running the Append|Grow benchmarks in runtime:

name                              old time/op  new time/op  delta
AppendSliceLarge/1024Bytes-12      265ns ± 1%   265ns ± 3%     ~     (p=0.524 n=17+20)
AppendSliceLarge/4096Bytes-12      807ns ± 3%   772ns ± 1%   -4.38%  (p=0.000 n=20+20)
AppendSliceLarge/16384Bytes-12    3.20µs ± 4%  2.82µs ± 4%  -11.93%  (p=0.000 n=19+20)
AppendSliceLarge/65536Bytes-12    13.0µs ± 4%  11.0µs ± 3%  -15.22%  (p=0.000 n=20+20)
AppendSliceLarge/262144Bytes-12   62.7µs ± 1%  51.6µs ± 1%  -17.67%  (p=0.000 n=19+20)
AppendSliceLarge/1048576Bytes-12   337µs ± 3%   289µs ± 3%  -14.36%  (p=0.000 n=20+20)
GrowSliceBytes-12                 31.2ns ± 4%  31.4ns ±11%     ~     (p=0.308 n=19+18)
GrowSliceInts-12                  53.4ns ±14%  45.0ns ± 6%  -15.74%  (p=0.000 n=20+19)
GrowSlicePtr-12                   87.0ns ± 3%  83.3ns ± 3%   -4.26%  (p=0.000 n=18+17)
GrowSliceStruct24Bytes-12         88.9ns ± 5%  77.8ns ± 2%  -12.45%  (p=0.000 n=20+19)
Append-12                         17.2ns ± 1%  17.3ns ± 2%     ~     (p=0.464 n=18+17)
AppendGrowByte-12                 2.28ms ± 1%  1.92ms ± 2%  -15.65%  (p=0.000 n=20+18)
AppendGrowString-12                255ms ± 3%   253ms ± 4%     ~     (p=0.065 n=19+19)
AppendSlice/1Bytes-12             3.13ns ± 0%  3.11ns ± 1%   -0.65%  (p=0.000 n=17+18)
AppendSlice/4Bytes-12             3.02ns ± 2%  3.11ns ± 1%   +3.27%  (p=0.000 n=18+17)
AppendSlice/7Bytes-12             4.14ns ± 3%  4.13ns ± 2%     ~     (p=0.380 n=19+18)
AppendSlice/8Bytes-12             3.74ns ± 3%  3.68ns ± 1%   -1.76%  (p=0.000 n=19+18)
AppendSlice/15Bytes-12            4.03ns ± 2%  4.04ns ± 2%     ~     (p=0.261 n=19+20)
AppendSlice/16Bytes-12            4.03ns ± 2%  4.03ns ± 0%     ~     (p=0.062 n=18+17)
AppendSlice/32Bytes-12            3.23ns ± 4%  3.43ns ± 1%   +6.10%  (p=0.000 n=17+18)
AppendStr/1Bytes-12               3.51ns ± 1%  3.52ns ± 1%     ~     (p=0.321 n=18+19)
AppendStr/4Bytes-12               3.46ns ± 1%  3.46ns ± 1%     ~     (p=0.977 n=18+20)
AppendStr/8Bytes-12               3.18ns ± 1%  3.19ns ± 1%     ~     (p=0.650 n=16+17)
AppendStr/16Bytes-12              6.08ns ±27%  5.52ns ± 3%   -9.16%  (p=0.002 n=18+19)
AppendStr/32Bytes-12              3.71ns ± 1%  3.53ns ± 1%   -4.73%  (p=0.000 n=20+19)
AppendSpecialCase-12              17.7ns ± 1%  17.8ns ± 3%   +0.86%  (p=0.045 n=17+18)
AppendInPlace/NoGrow/Byte-12       375ns ± 1%   376ns ± 1%   +0.35%  (p=0.021 n=20+18)
AppendInPlace/NoGrow/1Ptr-12      1.01µs ± 1%  1.10µs ± 1%   +9.28%  (p=0.000 n=18+20)
AppendInPlace/NoGrow/2Ptr-12      1.85µs ± 2%  1.71µs ± 1%   -7.51%  (p=0.000 n=19+18)
AppendInPlace/NoGrow/3Ptr-12      2.57µs ± 2%  2.44µs ± 1%   -5.08%  (p=0.000 n=19+19)
AppendInPlace/NoGrow/4Ptr-12      3.52µs ± 2%  3.35µs ± 2%   -4.70%  (p=0.000 n=20+19)
AppendInPlace/Grow/Byte-12         212ns ± 1%   217ns ± 8%   +2.57%  (p=0.000 n=20+20)
AppendInPlace/Grow/1Ptr-12         214ns ± 2%   217ns ± 3%   +1.23%  (p=0.001 n=18+19)
AppendInPlace/Grow/2Ptr-12         298ns ± 2%   300ns ± 2%   +0.55%  (p=0.038 n=19+20)
AppendInPlace/Grow/3Ptr-12         367ns ± 2%   366ns ± 2%     ~     (p=0.452 n=20+18)
AppendInPlace/Grow/4Ptr-12         416ns ± 2%   411ns ± 2%   -1.18%  (p=0.000 n=20+19)
StackGrowth-12                    43.4ns ± 1%  43.4ns ± 0%     ~     (p=1.000 n=16+16)
StackGrowthDeep-12                11.4µs ± 4%  10.3µs ± 4%   -9.65%  (p=0.000 n=20+19)

Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49
Reviewed-on: https://go-review.googlesource.com/30192Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 3f952b75
......@@ -100,6 +100,22 @@ func BenchmarkAppendSlice(b *testing.B) {
}
}
var (
blackhole []byte
)
func BenchmarkAppendSliceLarge(b *testing.B) {
for _, length := range []int{1 << 10, 4 << 10, 16 << 10, 64 << 10, 256 << 10, 1024 << 10} {
y := make([]byte, length)
b.Run(fmt.Sprint(length, "Bytes"), func(b *testing.B) {
for i := 0; i < b.N; i++ {
blackhole = nil
blackhole = append(blackhole, y...)
}
})
}
}
func BenchmarkAppendStr(b *testing.B) {
for _, str := range []string{
"1",
......
......@@ -111,19 +111,22 @@ func growslice(et *_type, old slice, cap int) slice {
}
}
var lenmem, capmem uintptr
var lenmem, newlenmem, capmem uintptr
const ptrSize = unsafe.Sizeof((*byte)(nil))
switch et.size {
case 1:
lenmem = uintptr(old.len)
newlenmem = uintptr(cap)
capmem = roundupsize(uintptr(newcap))
newcap = int(capmem)
case ptrSize:
lenmem = uintptr(old.len) * ptrSize
newlenmem = uintptr(cap) * ptrSize
capmem = roundupsize(uintptr(newcap) * ptrSize)
newcap = int(capmem / ptrSize)
default:
lenmem = uintptr(old.len) * et.size
newlenmem = uintptr(cap) * et.size
capmem = roundupsize(uintptr(newcap) * et.size)
newcap = int(capmem / et.size)
}
......@@ -136,7 +139,9 @@ func growslice(et *_type, old slice, cap int) slice {
if et.kind&kindNoPointers != 0 {
p = mallocgc(capmem, nil, false)
memmove(p, old.array, lenmem)
memclr(add(p, lenmem), capmem-lenmem)
// The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length).
// Only clear the part that will not be overwritten.
memclr(add(p, newlenmem), capmem-newlenmem)
} else {
// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
p = mallocgc(capmem, et, true)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment