Commit daf43ba4 authored by Nigel Tao's avatar Nigel Tao

image/jpeg: change block from [64]int to [64]int32.

On 6g/linux:
benchmark                     old ns/op    new ns/op    delta
BenchmarkFDCT                      4606         4241   -7.92%
BenchmarkIDCT                      4187         3923   -6.31%
BenchmarkDecodeBaseline         3154864      3170224   +0.49%
BenchmarkDecodeProgressive      4072812      4017132   -1.37%
BenchmarkEncode                39406920     34596760  -12.21%

Stack requirements before (from 'go tool 6g -S'):
(scan.go:37) TEXT    (*decoder).processSOS+0(SB),$1352-32
(writer.go:448) TEXT    (*encoder).writeSOS+0(SB),$5344-24

after:
(scan.go:37) TEXT    (*decoder).processSOS+0(SB),$1064-32
(writer.go:448) TEXT    (*encoder).writeSOS+0(SB),$2520-24

Also, in encoder.writeSOS, re-use the yBlock scratch buffer for Cb and
Cr. This reduces the stack requirement slightly, but also avoids an
unlucky coincidence where a BenchmarkEncode stack split lands between
encoder.writeByte and bufio.Writer.WriteByte, which occurs very often
during Huffman encoding and is otherwise disasterous for the final
benchmark number. FWIW, the yBlock re-use *without* the s/int/int32/
change does not have a noticable effect on the benchmarks.

R=r
CC=golang-dev, rsc
https://golang.org/cl/6823043
parent 9c775353
......@@ -42,7 +42,7 @@ func TestDCT(t *testing.T) {
b := block{}
n := r.Int() % 64
for j := 0; j < n; j++ {
b[r.Int()%len(b)] = r.Int() % 256
b[r.Int()%len(b)] = r.Int31() % 256
}
blocks = append(blocks, b)
}
......@@ -144,9 +144,9 @@ func slowFDCT(b *block) {
dst[8*v+u] = sum / 8
}
}
// Convert from float64 to int.
// Convert from float64 to int32.
for i := range dst {
b[i] = int(dst[i] + 0.5)
b[i] = int32(dst[i] + 0.5)
}
}
......@@ -174,9 +174,9 @@ func slowIDCT(b *block) {
dst[8*y+x] = sum / 8
}
}
// Convert from float64 to int.
// Convert from float64 to int32.
for i := range dst {
b[i] = int(dst[i] + 0.5)
b[i] = int32(dst[i] + 0.5)
}
}
......
......@@ -61,15 +61,15 @@ func (d *decoder) ensureNBits(n int) error {
}
// The composition of RECEIVE and EXTEND, specified in section F.2.2.1.
func (d *decoder) receiveExtend(t uint8) (int, error) {
func (d *decoder) receiveExtend(t uint8) (int32, error) {
err := d.ensureNBits(int(t))
if err != nil {
return 0, err
}
d.b.n -= int(t)
d.b.m >>= t
s := 1 << t
x := int(d.b.a>>uint8(d.b.n)) & (s - 1)
s := int32(1) << t
x := int32(d.b.a>>uint8(d.b.n)) & (s - 1)
if x < s>>1 {
x += ((-1) << t) + 1
}
......
......@@ -39,7 +39,7 @@ package jpeg
const blockSize = 64 // A DCT block is 8x8.
type block [blockSize]int
type block [blockSize]int32
const (
w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
......
......@@ -187,7 +187,7 @@ func (d *decoder) processDQT(n int) error {
return FormatError("bad Tq value")
}
for i := range d.quant[tq] {
d.quant[tq][i] = int(d.tmp[i+1])
d.quant[tq][i] = int32(d.tmp[i+1])
}
}
if n != 0 {
......
......@@ -86,12 +86,12 @@ func (d *decoder) processSOS(n int) error {
// significant bit.
//
// For baseline JPEGs, these parameters are hard-coded to 0/63/0/0.
zigStart, zigEnd, ah, al := 0, blockSize-1, uint(0), uint(0)
zigStart, zigEnd, ah, al := int32(0), int32(blockSize-1), uint32(0), uint32(0)
if d.progressive {
zigStart = int(d.tmp[1+2*nComp])
zigEnd = int(d.tmp[2+2*nComp])
ah = uint(d.tmp[3+2*nComp] >> 4)
al = uint(d.tmp[3+2*nComp] & 0x0f)
zigStart = int32(d.tmp[1+2*nComp])
zigEnd = int32(d.tmp[2+2*nComp])
ah = uint32(d.tmp[3+2*nComp] >> 4)
al = uint32(d.tmp[3+2*nComp] & 0x0f)
if (zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || blockSize <= zigEnd {
return FormatError("bad spectral selection bounds")
}
......@@ -122,7 +122,7 @@ func (d *decoder) processSOS(n int) error {
var (
// b is the decoded coefficients, in natural (not zig-zag) order.
b block
dc [nColorComponent]int
dc [nColorComponent]int32
// mx0 and my0 are the location of the current (in terms of 8x8 blocks).
// For example, with 4:2:0 chroma subsampling, the block whose top left
// pixel co-ordinates are (16, 8) is the third block in the first row:
......@@ -218,7 +218,7 @@ func (d *decoder) processSOS(n int) error {
val0 := value >> 4
val1 := value & 0x0f
if val1 != 0 {
zig += int(val0)
zig += int32(val0)
if zig > zigEnd {
break
}
......@@ -315,7 +315,7 @@ func (d *decoder) processSOS(n int) error {
// Reset the Huffman decoder.
d.b = bits{}
// Reset the DC components, as per section F.2.1.3.1.
dc = [nColorComponent]int{}
dc = [nColorComponent]int32{}
// Reset the progressive decoder state, as per section G.1.2.2.
d.eobRun = 0
}
......@@ -327,7 +327,7 @@ func (d *decoder) processSOS(n int) error {
// refine decodes a successive approximation refinement block, as specified in
// section G.1.2.
func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) error {
func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int32) error {
// Refining a DC component is trivial.
if zigStart == 0 {
if zigEnd != 0 {
......@@ -348,7 +348,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
if d.eobRun == 0 {
loop:
for ; zig <= zigEnd; zig++ {
z := 0
z := int32(0)
value, err := d.decodeHuffman(h)
if err != nil {
return err
......@@ -382,7 +382,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
return FormatError("unexpected Huffman code")
}
zig, err = d.refineNonZeroes(b, zig, zigEnd, int(val0), delta)
zig, err = d.refineNonZeroes(b, zig, zigEnd, int32(val0), delta)
if err != nil {
return err
}
......@@ -405,7 +405,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
// refineNonZeroes refines non-zero entries of b in zig-zag order. If nz >= 0,
// the first nz zero entries are skipped over.
func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int) (int, error) {
func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32, error) {
for ; zig <= zigEnd; zig++ {
u := unzig[zig]
if b[u] == 0 {
......
......@@ -21,7 +21,7 @@ func min(x, y int) int {
}
// div returns a/b rounded to the nearest integer, instead of rounded to zero.
func div(a int, b int) int {
func div(a, b int32) int32 {
if a >= 0 {
return (a + (b >> 1)) / b
}
......@@ -268,14 +268,14 @@ func (e *encoder) emit(bits, nBits uint32) {
}
// emitHuff emits the given value with the given Huffman encoder.
func (e *encoder) emitHuff(h huffIndex, value int) {
func (e *encoder) emitHuff(h huffIndex, value int32) {
x := theHuffmanLUT[h][value]
e.emit(x&(1<<24-1), x>>24)
}
// emitHuffRLE emits a run of runLength copies of value encoded with the given
// Huffman encoder.
func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int32) {
a, b := value, value
if a < 0 {
a, b = -value, value-1
......@@ -286,7 +286,7 @@ func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
} else {
nBits = 8 + uint32(bitCount[a>>8])
}
e.emitHuff(h, runLength<<4|int(nBits))
e.emitHuff(h, runLength<<4|int32(nBits))
if nBits > 0 {
e.emit(uint32(b)&(1<<nBits-1), nBits)
}
......@@ -347,15 +347,15 @@ func (e *encoder) writeDHT() {
// writeBlock writes a block of pixel data using the given quantization table,
// returning the post-quantized DC value of the DCT-transformed block.
// b is in natural (not zig-zag) order.
func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int) int {
func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int32) int32 {
fdct(b)
// Emit the DC delta.
dc := div(b[0], (8 * int(e.quant[q][0])))
dc := div(b[0], 8*int32(e.quant[q][0]))
e.emitHuffRLE(huffIndex(2*q+0), 0, dc-prevDC)
// Emit the AC components.
h, runLength := huffIndex(2*q+1), 0
h, runLength := huffIndex(2*q+1), int32(0)
for zig := 1; zig < blockSize; zig++ {
ac := div(b[unzig[zig]], (8 * int(e.quant[q][zig])))
ac := div(b[unzig[zig]], 8*int32(e.quant[q][zig]))
if ac == 0 {
runLength++
} else {
......@@ -383,9 +383,9 @@ func toYCbCr(m image.Image, p image.Point, yBlock, cbBlock, crBlock *block) {
for i := 0; i < 8; i++ {
r, g, b, _ := m.At(min(p.X+i, xmax), min(p.Y+j, ymax)).RGBA()
yy, cb, cr := color.RGBToYCbCr(uint8(r>>8), uint8(g>>8), uint8(b>>8))
yBlock[8*j+i] = int(yy)
cbBlock[8*j+i] = int(cb)
crBlock[8*j+i] = int(cr)
yBlock[8*j+i] = int32(yy)
cbBlock[8*j+i] = int32(cb)
crBlock[8*j+i] = int32(cr)
}
}
}
......@@ -408,9 +408,9 @@ func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block)
}
pix := m.Pix[offset+sx*4:]
yy, cb, cr := color.RGBToYCbCr(pix[0], pix[1], pix[2])
yBlock[8*j+i] = int(yy)
cbBlock[8*j+i] = int(cb)
crBlock[8*j+i] = int(cr)
yBlock[8*j+i] = int32(yy)
cbBlock[8*j+i] = int32(cb)
crBlock[8*j+i] = int32(cr)
}
}
}
......@@ -450,12 +450,10 @@ func (e *encoder) writeSOS(m image.Image) {
var (
// Scratch buffers to hold the YCbCr values.
// The blocks are in natural (not zig-zag) order.
yBlock block
cbBlock [4]block
crBlock [4]block
cBlock block
b block
cb, cr [4]block
// DC components are delta-encoded.
prevDCY, prevDCCb, prevDCCr int
prevDCY, prevDCCb, prevDCCr int32
)
bounds := m.Bounds()
rgba, _ := m.(*image.RGBA)
......@@ -466,16 +464,16 @@ func (e *encoder) writeSOS(m image.Image) {
yOff := (i & 2) * 4
p := image.Pt(x+xOff, y+yOff)
if rgba != nil {
rgbaToYCbCr(rgba, p, &yBlock, &cbBlock[i], &crBlock[i])
rgbaToYCbCr(rgba, p, &b, &cb[i], &cr[i])
} else {
toYCbCr(m, p, &yBlock, &cbBlock[i], &crBlock[i])
toYCbCr(m, p, &b, &cb[i], &cr[i])
}
prevDCY = e.writeBlock(&yBlock, 0, prevDCY)
prevDCY = e.writeBlock(&b, 0, prevDCY)
}
scale(&cBlock, &cbBlock)
prevDCCb = e.writeBlock(&cBlock, 1, prevDCCb)
scale(&cBlock, &crBlock)
prevDCCr = e.writeBlock(&cBlock, 1, prevDCCr)
scale(&b, &cb)
prevDCCb = e.writeBlock(&b, 1, prevDCCb)
scale(&b, &cr)
prevDCCr = e.writeBlock(&b, 1, prevDCCr)
}
}
// Pad the last byte with 1's.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment