Commit f0c5b8b9 authored by Martin Möhrmann's avatar Martin Möhrmann Committed by Martin Möhrmann

image/color: optimize YCbCrToRGB

Use one comparison to detect underflow and overflow simultaneously.
Use a shift, bitwise complement and uint8 type conversion to handle
clamping to upper and lower bound without additional branching.

Overall the new code is faster for a mix of
common case, underflow and overflow.

name     old time/op  new time/op  delta
YCbCr-2  1.12ms ± 0%  0.64ms ± 0%  -43.01%  (p=0.000 n=48+47)

name              old time/op  new time/op  delta
YCbCrToRGB/0-2    5.52ns ± 0%  5.77ns ± 0%  +4.48%  (p=0.000 n=50+49)
YCbCrToRGB/128-2  6.05ns ± 0%  5.52ns ± 0%  -8.69%  (p=0.000 n=39+50)
YCbCrToRGB/255-2  5.80ns ± 0%  5.77ns ± 0%  -0.58%  (p=0.000 n=50+49)

Found in collaboration with Josh Bleecher Snyder and Ralph Corderoy.

Change-Id: Ic5020320f704966f545fdc1ae6bc24ddb5d3d09a
Reviewed-on: https://go-review.googlesource.com/21910Reviewed-by: default avatarJosh Bleecher Snyder <josharian@gmail.com>
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 1650ced9
...@@ -44,27 +44,44 @@ func YCbCrToRGB(y, cb, cr uint8) (uint8, uint8, uint8) { ...@@ -44,27 +44,44 @@ func YCbCrToRGB(y, cb, cr uint8) (uint8, uint8, uint8) {
// B = Y' + 1.77200*(Cb-128) // B = Y' + 1.77200*(Cb-128)
// http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'. // http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'.
yy1 := int32(y) * 0x10100 // Convert 0x12 to 0x121200. yy1 := int32(y) * 0x010100 // Convert 0x12 to 0x121200.
cb1 := int32(cb) - 128 cb1 := int32(cb) - 128
cr1 := int32(cr) - 128 cr1 := int32(cr) - 128
r := (yy1 + 91881*cr1) >> 16
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 // The bit twiddling below is equivalent to
b := (yy1 + 116130*cb1) >> 16 //
if r < 0 { // r := (yy1 + 91881*cr1) >> 16
r = 0 // if r < 0 {
} else if r > 0xff { // r = 0
r = 0xff // } else if r > 0xff {
// r = ^int32(0)
// }
//
// but uses fewer branches and is faster.
// Note that the uint8 type conversion in the return
// statement will convert ^int32(0) to 0xff.
// The code below to compute b and g uses a similar pattern.
r := yy1 + 91881*cr1
if uint32(r)&0xff000000 == 0 {
r >>= 16
} else {
r = ^(r >> 31)
} }
if g < 0 {
g = 0 b := yy1 + 116130*cb1
} else if g > 0xff { if uint32(b)&0xff000000 == 0 {
g = 0xff b >>= 16
} else {
b = ^(b >> 31)
} }
if b < 0 {
b = 0 g := yy1 - 22554*cb1 - 46802*cr1
} else if b > 0xff { if uint32(g)&0xff000000 == 0 {
b = 0xff g >>= 16
} else {
g = ^(g >> 31)
} }
return uint8(r), uint8(g), uint8(b) return uint8(r), uint8(g), uint8(b)
} }
......
...@@ -95,26 +95,42 @@ const sratioCase = ` ...@@ -95,26 +95,42 @@ const sratioCase = `
%s %s
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
cb1 := int32(src.Cb[ci]) - 128 cb1 := int32(src.Cb[ci]) - 128
cr1 := int32(src.Cr[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128
r := (yy1 + 91881*cr1) >> 16
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 // The bit twiddling below is equivalent to
b := (yy1 + 116130*cb1) >> 16 //
if r < 0 { // r := (yy1 + 91881*cr1) >> 16
r = 0 // if r < 0 {
} else if r > 255 { // r = 0
r = 255 // } else if r > 0xff {
// r = ^int32(0)
// }
//
// but uses fewer branches and is faster.
// Note that the uint8 type conversion in the return
// statement will convert ^int32(0) to 0xff.
// The code below to compute b and g uses a similar pattern.
r := yy1 + 91881*cr1
if uint32(r)&0xff000000 == 0 {
r >>= 16
} else {
r = ^(r >> 31)
} }
if g < 0 {
g = 0 b := yy1 + 116130*cb1
} else if g > 255 { if uint32(b)&0xff000000 == 0 {
g = 255 b >>= 16
} else {
b = ^(b >> 31)
} }
if b < 0 {
b = 0 g := yy1 - 22554*cb1 - 46802*cr1
} else if b > 255 { if uint32(g)&0xff000000 == 0 {
b = 255 g >>= 16
} else {
g = ^(g >> 31)
} }
dpix[x+0] = uint8(r) dpix[x+0] = uint8(r)
......
...@@ -44,26 +44,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po ...@@ -44,26 +44,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 { for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 {
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
cb1 := int32(src.Cb[ci]) - 128 cb1 := int32(src.Cb[ci]) - 128
cr1 := int32(src.Cr[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128
r := (yy1 + 91881*cr1) >> 16
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 // The bit twiddling below is equivalent to
b := (yy1 + 116130*cb1) >> 16 //
if r < 0 { // r := (yy1 + 91881*cr1) >> 16
r = 0 // if r < 0 {
} else if r > 255 { // r = 0
r = 255 // } else if r > 0xff {
// r = ^int32(0)
// }
//
// but uses fewer branches and is faster.
// Note that the uint8 type conversion in the return
// statement will convert ^int32(0) to 0xff.
// The code below to compute b and g uses a similar pattern.
r := yy1 + 91881*cr1
if uint32(r)&0xff000000 == 0 {
r >>= 16
} else {
r = ^(r >> 31)
} }
if g < 0 {
g = 0 b := yy1 + 116130*cb1
} else if g > 255 { if uint32(b)&0xff000000 == 0 {
g = 255 b >>= 16
} else {
b = ^(b >> 31)
} }
if b < 0 {
b = 0 g := yy1 - 22554*cb1 - 46802*cr1
} else if b > 255 { if uint32(g)&0xff000000 == 0 {
b = 255 g >>= 16
} else {
g = ^(g >> 31)
} }
dpix[x+0] = uint8(r) dpix[x+0] = uint8(r)
...@@ -83,26 +99,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po ...@@ -83,26 +99,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
ci := ciBase + sx/2 ci := ciBase + sx/2
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
cb1 := int32(src.Cb[ci]) - 128 cb1 := int32(src.Cb[ci]) - 128
cr1 := int32(src.Cr[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128
r := (yy1 + 91881*cr1) >> 16
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 // The bit twiddling below is equivalent to
b := (yy1 + 116130*cb1) >> 16 //
if r < 0 { // r := (yy1 + 91881*cr1) >> 16
r = 0 // if r < 0 {
} else if r > 255 { // r = 0
r = 255 // } else if r > 0xff {
// r = ^int32(0)
// }
//
// but uses fewer branches and is faster.
// Note that the uint8 type conversion in the return
// statement will convert ^int32(0) to 0xff.
// The code below to compute b and g uses a similar pattern.
r := yy1 + 91881*cr1
if uint32(r)&0xff000000 == 0 {
r >>= 16
} else {
r = ^(r >> 31)
} }
if g < 0 {
g = 0 b := yy1 + 116130*cb1
} else if g > 255 { if uint32(b)&0xff000000 == 0 {
g = 255 b >>= 16
} else {
b = ^(b >> 31)
} }
if b < 0 {
b = 0 g := yy1 - 22554*cb1 - 46802*cr1
} else if b > 255 { if uint32(g)&0xff000000 == 0 {
b = 255 g >>= 16
} else {
g = ^(g >> 31)
} }
dpix[x+0] = uint8(r) dpix[x+0] = uint8(r)
...@@ -122,26 +154,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po ...@@ -122,26 +154,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
ci := ciBase + sx/2 ci := ciBase + sx/2
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
cb1 := int32(src.Cb[ci]) - 128 cb1 := int32(src.Cb[ci]) - 128
cr1 := int32(src.Cr[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128
r := (yy1 + 91881*cr1) >> 16
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 // The bit twiddling below is equivalent to
b := (yy1 + 116130*cb1) >> 16 //
if r < 0 { // r := (yy1 + 91881*cr1) >> 16
r = 0 // if r < 0 {
} else if r > 255 { // r = 0
r = 255 // } else if r > 0xff {
// r = ^int32(0)
// }
//
// but uses fewer branches and is faster.
// Note that the uint8 type conversion in the return
// statement will convert ^int32(0) to 0xff.
// The code below to compute b and g uses a similar pattern.
r := yy1 + 91881*cr1
if uint32(r)&0xff000000 == 0 {
r >>= 16
} else {
r = ^(r >> 31)
} }
if g < 0 {
g = 0 b := yy1 + 116130*cb1
} else if g > 255 { if uint32(b)&0xff000000 == 0 {
g = 255 b >>= 16
} else {
b = ^(b >> 31)
} }
if b < 0 {
b = 0 g := yy1 - 22554*cb1 - 46802*cr1
} else if b > 255 { if uint32(g)&0xff000000 == 0 {
b = 255 g >>= 16
} else {
g = ^(g >> 31)
} }
dpix[x+0] = uint8(r) dpix[x+0] = uint8(r)
...@@ -160,26 +208,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po ...@@ -160,26 +208,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 { for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 {
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200. yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
cb1 := int32(src.Cb[ci]) - 128 cb1 := int32(src.Cb[ci]) - 128
cr1 := int32(src.Cr[ci]) - 128 cr1 := int32(src.Cr[ci]) - 128
r := (yy1 + 91881*cr1) >> 16
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16 // The bit twiddling below is equivalent to
b := (yy1 + 116130*cb1) >> 16 //
if r < 0 { // r := (yy1 + 91881*cr1) >> 16
r = 0 // if r < 0 {
} else if r > 255 { // r = 0
r = 255 // } else if r > 0xff {
} // r = ^int32(0)
if g < 0 { // }
g = 0 //
} else if g > 255 { // but uses fewer branches and is faster.
g = 255 // Note that the uint8 type conversion in the return
} // statement will convert ^int32(0) to 0xff.
if b < 0 { // The code below to compute b and g uses a similar pattern.
b = 0 r := yy1 + 91881*cr1
} else if b > 255 { if uint32(r)&0xff000000 == 0 {
b = 255 r >>= 16
} else {
r = ^(r >> 31)
}
b := yy1 + 116130*cb1
if uint32(b)&0xff000000 == 0 {
b >>= 16
} else {
b = ^(b >> 31)
}
g := yy1 - 22554*cb1 - 46802*cr1
if uint32(g)&0xff000000 == 0 {
g >>= 16
} else {
g = ^(g >> 31)
} }
dpix[x+0] = uint8(r) dpix[x+0] = uint8(r)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment