Commit c4b16a38 authored by Ivan Krasin's avatar Ivan Krasin Committed by Russ Cox

compress/flate: make lazy matching work.

R=rsc, imkrasin
CC=golang-dev
https://golang.org/cl/5554066
parent fb3b2732
...@@ -31,6 +31,8 @@ const ( ...@@ -31,6 +31,8 @@ const (
hashSize = 1 << hashBits hashSize = 1 << hashBits
hashMask = (1 << hashBits) - 1 hashMask = (1 << hashBits) - 1
hashShift = (hashBits + minMatchLength - 1) / minMatchLength hashShift = (hashBits + minMatchLength - 1) / minMatchLength
skipNever = math.MaxInt32
) )
type compressionLevel struct { type compressionLevel struct {
...@@ -45,12 +47,12 @@ var levels = []compressionLevel{ ...@@ -45,12 +47,12 @@ var levels = []compressionLevel{
{3, 0, 32, 32, 6}, {3, 0, 32, 32, 6},
// Levels 4-9 use increasingly more lazy matching // Levels 4-9 use increasingly more lazy matching
// and increasingly stringent conditions for "good enough". // and increasingly stringent conditions for "good enough".
{4, 4, 16, 16, math.MaxInt32}, {4, 4, 16, 16, skipNever},
{8, 16, 32, 32, math.MaxInt32}, {8, 16, 32, 32, skipNever},
{8, 16, 128, 128, math.MaxInt32}, {8, 16, 128, 128, skipNever},
{8, 32, 128, 256, math.MaxInt32}, {8, 32, 128, 256, skipNever},
{32, 128, 258, 1024, math.MaxInt32}, {32, 128, 258, 1024, skipNever},
{32, 258, 258, 4096, math.MaxInt32}, {32, 258, 258, 4096, skipNever},
} }
type compressor struct { type compressor struct {
...@@ -100,7 +102,7 @@ func (d *compressor) fillDeflate(b []byte) int { ...@@ -100,7 +102,7 @@ func (d *compressor) fillDeflate(b []byte) int {
if d.blockStart >= windowSize { if d.blockStart >= windowSize {
d.blockStart -= windowSize d.blockStart -= windowSize
} else { } else {
d.blockStart = math.MaxInt32 d.blockStart = skipNever
} }
for i, h := range d.hashHead { for i, h := range d.hashHead {
v := h - windowSize v := h - windowSize
...@@ -273,18 +275,18 @@ Loop: ...@@ -273,18 +275,18 @@ Loop:
} }
if d.chainHead >= minIndex && if d.chainHead >= minIndex &&
(d.fastSkipHashing != 0 && lookahead > minMatchLength-1 || (d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 ||
d.fastSkipHashing == 0 && lookahead > prevLength && prevLength < d.lazy) { d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) {
if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead, minMatchLength-1, lookahead); ok { if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead, minMatchLength-1, lookahead); ok {
d.length = newLength d.length = newLength
d.offset = newOffset d.offset = newOffset
} }
} }
if d.fastSkipHashing != 0 && d.length >= minMatchLength || if d.fastSkipHashing != skipNever && d.length >= minMatchLength ||
d.fastSkipHashing == 0 && prevLength >= minMatchLength && d.length <= prevLength { d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength {
// There was a match at the previous step, and the current match is // There was a match at the previous step, and the current match is
// not better. Output the previous match. // not better. Output the previous match.
if d.fastSkipHashing != 0 { if d.fastSkipHashing != skipNever {
d.tokens[d.ti] = matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize)) d.tokens[d.ti] = matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize))
} else { } else {
d.tokens[d.ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize)) d.tokens[d.ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
...@@ -296,10 +298,10 @@ Loop: ...@@ -296,10 +298,10 @@ Loop:
// table. // table.
if d.length <= d.fastSkipHashing { if d.length <= d.fastSkipHashing {
var newIndex int var newIndex int
if d.fastSkipHashing != 0 { if d.fastSkipHashing != skipNever {
newIndex = d.index + d.length newIndex = d.index + d.length
} else { } else {
newIndex = prevLength - 1 newIndex = d.index + prevLength - 1
} }
for d.index++; d.index < newIndex; d.index++ { for d.index++; d.index < newIndex; d.index++ {
if d.index < d.maxInsertIndex { if d.index < d.maxInsertIndex {
...@@ -311,7 +313,7 @@ Loop: ...@@ -311,7 +313,7 @@ Loop:
d.hashHead[d.hash] = d.index d.hashHead[d.hash] = d.index
} }
} }
if d.fastSkipHashing == 0 { if d.fastSkipHashing == skipNever {
d.byteAvailable = false d.byteAvailable = false
d.length = minMatchLength - 1 d.length = minMatchLength - 1
} }
...@@ -331,9 +333,9 @@ Loop: ...@@ -331,9 +333,9 @@ Loop:
d.ti = 0 d.ti = 0
} }
} else { } else {
if d.fastSkipHashing != 0 || d.byteAvailable { if d.fastSkipHashing != skipNever || d.byteAvailable {
i := d.index - 1 i := d.index - 1
if d.fastSkipHashing != 0 { if d.fastSkipHashing != skipNever {
i = d.index i = d.index
} }
d.tokens[d.ti] = literalToken(uint32(d.window[i])) d.tokens[d.ti] = literalToken(uint32(d.window[i]))
...@@ -346,7 +348,7 @@ Loop: ...@@ -346,7 +348,7 @@ Loop:
} }
} }
d.index++ d.index++
if d.fastSkipHashing == 0 { if d.fastSkipHashing == skipNever {
d.byteAvailable = true d.byteAvailable = true
} }
} }
......
...@@ -225,10 +225,17 @@ func testSync(t *testing.T, level int, input []byte, name string) { ...@@ -225,10 +225,17 @@ func testSync(t *testing.T, level int, input []byte, name string) {
} }
func testToFromWithLevel(t *testing.T, level int, input []byte, name string) error { func testToFromWithLevel(t *testing.T, level int, input []byte, name string) error {
return testToFromWithLevelAndLimit(t, level, input, name, -1)
}
func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name string, limit int) error {
buffer := bytes.NewBuffer(nil) buffer := bytes.NewBuffer(nil)
w := NewWriter(buffer, level) w := NewWriter(buffer, level)
w.Write(input) w.Write(input)
w.Close() w.Close()
if limit > 0 && buffer.Len() > limit {
t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit)
}
r := NewReader(buffer) r := NewReader(buffer)
out, err := ioutil.ReadAll(r) out, err := ioutil.ReadAll(r)
if err != nil { if err != nil {
...@@ -244,12 +251,16 @@ func testToFromWithLevel(t *testing.T, level int, input []byte, name string) err ...@@ -244,12 +251,16 @@ func testToFromWithLevel(t *testing.T, level int, input []byte, name string) err
return nil return nil
} }
func testToFrom(t *testing.T, input []byte, name string) { func testToFromWithLimit(t *testing.T, input []byte, name string, limit [10]int) {
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
testToFromWithLevel(t, i, input, name) testToFromWithLevelAndLimit(t, i, input, name, limit[i])
} }
} }
func testToFrom(t *testing.T, input []byte, name string) {
testToFromWithLimit(t, input, name, [10]int{})
}
func TestDeflateInflate(t *testing.T) { func TestDeflateInflate(t *testing.T) {
for i, h := range deflateInflateTests { for i, h := range deflateInflateTests {
testToFrom(t, h.in, fmt.Sprintf("#%d", i)) testToFrom(t, h.in, fmt.Sprintf("#%d", i))
...@@ -265,12 +276,33 @@ func TestReverseBits(t *testing.T) { ...@@ -265,12 +276,33 @@ func TestReverseBits(t *testing.T) {
} }
} }
type deflateInflateStringTest struct {
filename string
label string
limit [10]int
}
var deflateInflateStringTests = []deflateInflateStringTest{
{
"../testdata/e.txt",
"2.718281828...",
[...]int{10013, 5065, 5096, 5115, 5093, 5079, 5079, 5079, 5079, 5079},
},
{
"../testdata/Mark.Twain-Tom.Sawyer.txt",
"Mark.Twain-Tom.Sawyer",
[...]int{416188, 191483, 185232, 179560, 175233, 171263, 169908, 169758, 169712, 169712},
},
}
func TestDeflateInflateString(t *testing.T) { func TestDeflateInflateString(t *testing.T) {
gold, err := ioutil.ReadFile("../testdata/e.txt") for _, test := range deflateInflateStringTests {
gold, err := ioutil.ReadFile(test.filename)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }
testToFromWithLevel(t, 1, gold, "2.718281828...") testToFromWithLimit(t, gold, test.label, test.limit)
}
} }
func TestReaderDict(t *testing.T) { func TestReaderDict(t *testing.T) {
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment