Commit aafd9640 authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

runtime: speed up stack copying

I was surprised to see readvarint show up in a cpu profile.

Use a few simple optimizations to speed up stack copying:

* Avoid making a copy of the cache.entries array or any of its elements.
* Use a shift instead of a signed division in stackmapdata.
* Change readvarint to return the number of bytes consumed
  rather than an updated slice.
* Make some minor optimizations to readvarint to help the compiler.
* Avoid called readvarint when the value fits in a single byte.

The first and last optimizations are the most significant,
although they all contribute a little.

Add a benchmark for stack copying that includes lots of different
functions in a recursive loop, to bust the cache.

This might speed up other runtime operations as well;
I only benchmarked stack copying.

name                old time/op  new time/op  delta
StackCopy-8         96.4ms ± 2%  82.7ms ± 1%  -14.24%  (p=0.000 n=20+19)
StackCopyNoCache-8   167ms ± 1%   131ms ± 1%  -21.58%  (p=0.000 n=20+20)

Change-Id: I13d5c455c65073c73b656acad86cf8e8e3c9807b
Reviewed-on: https://go-review.googlesource.com/43150
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 2f7fbf88
...@@ -453,3 +453,175 @@ func count(n int) int { ...@@ -453,3 +453,175 @@ func count(n int) int {
} }
return 1 + count(n-1) return 1 + count(n-1)
} }
func BenchmarkStackCopyNoCache(b *testing.B) {
c := make(chan bool)
for i := 0; i < b.N; i++ {
go func() {
count1(1000000)
c <- true
}()
<-c
}
}
func count1(n int) int {
if n == 0 {
return 0
}
return 1 + count2(n-1)
}
func count2(n int) int {
if n == 0 {
return 0
}
return 1 + count3(n-1)
}
func count3(n int) int {
if n == 0 {
return 0
}
return 1 + count4(n-1)
}
func count4(n int) int {
if n == 0 {
return 0
}
return 1 + count5(n-1)
}
func count5(n int) int {
if n == 0 {
return 0
}
return 1 + count6(n-1)
}
func count6(n int) int {
if n == 0 {
return 0
}
return 1 + count7(n-1)
}
func count7(n int) int {
if n == 0 {
return 0
}
return 1 + count8(n-1)
}
func count8(n int) int {
if n == 0 {
return 0
}
return 1 + count9(n-1)
}
func count9(n int) int {
if n == 0 {
return 0
}
return 1 + count10(n-1)
}
func count10(n int) int {
if n == 0 {
return 0
}
return 1 + count11(n-1)
}
func count11(n int) int {
if n == 0 {
return 0
}
return 1 + count12(n-1)
}
func count12(n int) int {
if n == 0 {
return 0
}
return 1 + count13(n-1)
}
func count13(n int) int {
if n == 0 {
return 0
}
return 1 + count14(n-1)
}
func count14(n int) int {
if n == 0 {
return 0
}
return 1 + count15(n-1)
}
func count15(n int) int {
if n == 0 {
return 0
}
return 1 + count16(n-1)
}
func count16(n int) int {
if n == 0 {
return 0
}
return 1 + count17(n-1)
}
func count17(n int) int {
if n == 0 {
return 0
}
return 1 + count18(n-1)
}
func count18(n int) int {
if n == 0 {
return 0
}
return 1 + count19(n-1)
}
func count19(n int) int {
if n == 0 {
return 0
}
return 1 + count20(n-1)
}
func count20(n int) int {
if n == 0 {
return 0
}
return 1 + count21(n-1)
}
func count21(n int) int {
if n == 0 {
return 0
}
return 1 + count22(n-1)
}
func count22(n int) int {
if n == 0 {
return 0
}
return 1 + count23(n-1)
}
func count23(n int) int {
if n == 0 {
return 0
}
return 1 + count1(n-1)
}
...@@ -686,12 +686,13 @@ func pcvalue(f funcInfo, off int32, targetpc uintptr, cache *pcvalueCache, stric ...@@ -686,12 +686,13 @@ func pcvalue(f funcInfo, off int32, targetpc uintptr, cache *pcvalueCache, stric
// cheaper than doing the hashing for a less associative // cheaper than doing the hashing for a less associative
// cache. // cache.
if cache != nil { if cache != nil {
for _, ent := range cache.entries { for i := range cache.entries {
// We check off first because we're more // We check off first because we're more
// likely to have multiple entries with // likely to have multiple entries with
// different offsets for the same targetpc // different offsets for the same targetpc
// than the other way around, so we'll usually // than the other way around, so we'll usually
// fail in the first clause. // fail in the first clause.
ent := &cache.entries[i]
if ent.off == off && ent.targetpc == targetpc { if ent.off == off && ent.targetpc == targetpc {
return ent.val return ent.val
} }
...@@ -836,35 +837,47 @@ func funcdata(f funcInfo, i int32) unsafe.Pointer { ...@@ -836,35 +837,47 @@ func funcdata(f funcInfo, i int32) unsafe.Pointer {
// step advances to the next pc, value pair in the encoded table. // step advances to the next pc, value pair in the encoded table.
func step(p []byte, pc *uintptr, val *int32, first bool) (newp []byte, ok bool) { func step(p []byte, pc *uintptr, val *int32, first bool) (newp []byte, ok bool) {
p, uvdelta := readvarint(p) // For both uvdelta and pcdelta, the common case (~70%)
// is that they are a single byte. If so, avoid calling readvarint.
uvdelta := uint32(p[0])
if uvdelta == 0 && !first { if uvdelta == 0 && !first {
return nil, false return nil, false
} }
n := uint32(1)
if uvdelta&0x80 != 0 {
n, uvdelta = readvarint(p)
}
p = p[n:]
if uvdelta&1 != 0 { if uvdelta&1 != 0 {
uvdelta = ^(uvdelta >> 1) uvdelta = ^(uvdelta >> 1)
} else { } else {
uvdelta >>= 1 uvdelta >>= 1
} }
vdelta := int32(uvdelta) vdelta := int32(uvdelta)
p, pcdelta := readvarint(p) pcdelta := uint32(p[0])
n = 1
if pcdelta&0x80 != 0 {
n, pcdelta = readvarint(p)
}
p = p[n:]
*pc += uintptr(pcdelta * sys.PCQuantum) *pc += uintptr(pcdelta * sys.PCQuantum)
*val += vdelta *val += vdelta
return p, true return p, true
} }
// readvarint reads a varint from p. // readvarint reads a varint from p.
func readvarint(p []byte) (newp []byte, val uint32) { func readvarint(p []byte) (read uint32, val uint32) {
var v, shift uint32 var v, shift, n uint32
for { for {
b := p[0] b := p[n]
p = p[1:] n++
v |= (uint32(b) & 0x7F) << shift v |= uint32(b&0x7F) << (shift & 31)
if b&0x80 == 0 { if b&0x80 == 0 {
break break
} }
shift += 7 shift += 7
} }
return p, v return n, v
} }
type stackmap struct { type stackmap struct {
...@@ -878,7 +891,7 @@ func stackmapdata(stkmap *stackmap, n int32) bitvector { ...@@ -878,7 +891,7 @@ func stackmapdata(stkmap *stackmap, n int32) bitvector {
if n < 0 || n >= stkmap.n { if n < 0 || n >= stkmap.n {
throw("stackmapdata: index out of range") throw("stackmapdata: index out of range")
} }
return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+7)/8))))} return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+7)>>3))))}
} }
// inlinedCall is the encoding of entries in the FUNCDATA_InlTree table. // inlinedCall is the encoding of entries in the FUNCDATA_InlTree table.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment