Commit 9c3630f5 authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

compress/flate: avoid large stack growth in fillDeflate

Ranging over an array causes the array to be copied over to the
stack, which cause large re-growths. Instead, we should iterate
over slices of the array.

Also, assigning a large struct literal uses the stack even
though the actual fields being populated are small in comparison
to the entirety of the struct (see #18636).

Fixing the stack growth does not alter CPU-time performance much
since the stack-growth and copying was such a tiny portion of the
compression work:

name                         old time/op    new time/op    delta
Encode/Digits/Default/1e4-8     332µs ± 1%     332µs ± 1%   ~     (p=0.796 n=10+10)
Encode/Digits/Default/1e5-8    5.07ms ± 2%    5.05ms ± 1%   ~       (p=0.815 n=9+8)
Encode/Digits/Default/1e6-8    53.7ms ± 1%    53.9ms ± 1%   ~     (p=0.075 n=10+10)
Encode/Twain/Default/1e4-8      380µs ± 1%     380µs ± 1%   ~     (p=0.684 n=10+10)
Encode/Twain/Default/1e5-8     5.79ms ± 2%    5.79ms ± 1%   ~      (p=0.497 n=9+10)
Encode/Twain/Default/1e6-8     61.5ms ± 1%    61.8ms ± 1%   ~     (p=0.247 n=10+10)

name                         old speed      new speed      delta
Encode/Digits/Default/1e4-8  30.1MB/s ± 1%  30.1MB/s ± 1%   ~     (p=0.753 n=10+10)
Encode/Digits/Default/1e5-8  19.7MB/s ± 2%  19.8MB/s ± 1%   ~       (p=0.795 n=9+8)
Encode/Digits/Default/1e6-8  18.6MB/s ± 1%  18.5MB/s ± 1%   ~     (p=0.072 n=10+10)
Encode/Twain/Default/1e4-8   26.3MB/s ± 1%  26.3MB/s ± 1%   ~     (p=0.616 n=10+10)
Encode/Twain/Default/1e5-8   17.3MB/s ± 2%  17.3MB/s ± 1%   ~      (p=0.484 n=9+10)
Encode/Twain/Default/1e6-8   16.3MB/s ± 1%  16.2MB/s ± 1%   ~     (p=0.238 n=10+10)

Updates #18636
Fixes #18625

Change-Id: I471b20339bf675f63dc56d38b3acdd824fe23328
Reviewed-on: https://go-review.googlesource.com/35122Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 4f0aac52
...@@ -136,14 +136,17 @@ func (d *compressor) fillDeflate(b []byte) int { ...@@ -136,14 +136,17 @@ func (d *compressor) fillDeflate(b []byte) int {
delta := d.hashOffset - 1 delta := d.hashOffset - 1
d.hashOffset -= delta d.hashOffset -= delta
d.chainHead -= delta d.chainHead -= delta
for i, v := range d.hashPrev {
// Iterate over slices instead of arrays to avoid copying
// the entire table onto the stack (Issue #18625).
for i, v := range d.hashPrev[:] {
if int(v) > delta { if int(v) > delta {
d.hashPrev[i] = uint32(int(v) - delta) d.hashPrev[i] = uint32(int(v) - delta)
} else { } else {
d.hashPrev[i] = 0 d.hashPrev[i] = 0
} }
} }
for i, v := range d.hashHead { for i, v := range d.hashHead[:] {
if int(v) > delta { if int(v) > delta {
d.hashHead[i] = uint32(int(v) - delta) d.hashHead[i] = uint32(int(v) - delta)
} else { } else {
......
...@@ -12,6 +12,7 @@ import ( ...@@ -12,6 +12,7 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
"reflect" "reflect"
"runtime/debug"
"sync" "sync"
"testing" "testing"
) )
...@@ -864,3 +865,33 @@ func TestBestSpeedMaxMatchOffset(t *testing.T) { ...@@ -864,3 +865,33 @@ func TestBestSpeedMaxMatchOffset(t *testing.T) {
} }
} }
} }
func TestMaxStackSize(t *testing.T) {
// This test must not run in parallel with other tests as debug.SetMaxStack
// affects all goroutines.
n := debug.SetMaxStack(1 << 16)
defer debug.SetMaxStack(n)
var wg sync.WaitGroup
defer wg.Wait()
b := make([]byte, 1<<20)
for level := HuffmanOnly; level <= BestCompression; level++ {
// Run in separate goroutine to increase probability of stack regrowth.
wg.Add(1)
go func(level int) {
defer wg.Done()
zw, err := NewWriter(ioutil.Discard, level)
if err != nil {
t.Errorf("level %d, NewWriter() = %v, want nil", level, err)
}
if n, err := zw.Write(b); n != len(b) || err != nil {
t.Errorf("level %d, Write() = (%d, %v), want (%d, nil)", level, n, err, len(b))
}
if err := zw.Close(); err != nil {
t.Errorf("level %d, Close() = %v, want nil", level, err)
}
zw.Reset(ioutil.Discard)
}(level)
}
}
...@@ -60,7 +60,7 @@ func newDeflateFast() *deflateFast { ...@@ -60,7 +60,7 @@ func newDeflateFast() *deflateFast {
func (e *deflateFast) encode(dst []token, src []byte) []token { func (e *deflateFast) encode(dst []token, src []byte) []token {
// Ensure that e.cur doesn't wrap. // Ensure that e.cur doesn't wrap.
if e.cur > 1<<30 { if e.cur > 1<<30 {
*e = deflateFast{cur: maxStoreBlockSize, prev: e.prev[:0]} e.resetAll()
} }
// This check isn't in the Snappy implementation, but there, the caller // This check isn't in the Snappy implementation, but there, the caller
...@@ -265,6 +265,21 @@ func (e *deflateFast) reset() { ...@@ -265,6 +265,21 @@ func (e *deflateFast) reset() {
// Protect against e.cur wraparound. // Protect against e.cur wraparound.
if e.cur > 1<<30 { if e.cur > 1<<30 {
*e = deflateFast{cur: maxStoreBlockSize, prev: e.prev[:0]} e.resetAll()
}
}
// resetAll resets the deflateFast struct and is only called in rare
// situations to prevent integer overflow. It manually resets each field
// to avoid causing large stack growth.
//
// See https://golang.org/issue/18636.
func (e *deflateFast) resetAll() {
// This is equivalent to:
// *e = deflateFast{cur: maxStoreBlockSize, prev: e.prev[:0]}
e.cur = maxStoreBlockSize
e.prev = e.prev[:0]
for i := range e.table {
e.table[i] = tableEntry{}
} }
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment