Commit bd91e356 authored by Kirill Smelkov's avatar Kirill Smelkov Committed by Ilya Tocar

cmd/compile/internal/ssa: generate bswap/store for indexed bigendian byte stores too on AMD64

Commit 10f75748 (CL 32222) added rewrite rules to combine byte loads/stores +
shifts into larger loads/stores + bswap. For loads both MOVBload and
MOVBloadidx1 were handled but for store only MOVBstore was there without
MOVBstoreidx added to rewrite pattern. Fix it.

Here is how generated code changes for the following 2 functions
(ommitting staying the same prologue/epilogue):

    func put32(b []byte, i int, v uint32) {
            binary.BigEndian.PutUint32(b[i:], v)
    }

    func put64(b []byte, i int, v uint64) {
            binary.BigEndian.PutUint64(b[i:], v)
    }

"".put32 t=1 size=100 args=0x28 locals=0x0

	// before
	0x0032 00050 (x.go:5)	MOVL	CX, DX
	0x0034 00052 (x.go:5)	SHRL	$24, CX
	0x0037 00055 (x.go:5)	MOVQ	"".b+8(FP), BX
	0x003c 00060 (x.go:5)	MOVB	CL, (BX)(AX*1)
	0x003f 00063 (x.go:5)	MOVL	DX, CX
	0x0041 00065 (x.go:5)	SHRL	$16, DX
	0x0044 00068 (x.go:5)	MOVB	DL, 1(BX)(AX*1)
	0x0048 00072 (x.go:5)	MOVL	CX, DX
	0x004a 00074 (x.go:5)	SHRL	$8, CX
	0x004d 00077 (x.go:5)	MOVB	CL, 2(BX)(AX*1)
	0x0051 00081 (x.go:5)	MOVB	DL, 3(BX)(AX*1)

	// after
	0x0032 00050 (x.go:5)	BSWAPL	CX
	0x0034 00052 (x.go:5)	MOVQ	"".b+8(FP), DX
	0x0039 00057 (x.go:5)	MOVL	CX, (DX)(AX*1)

"".put64 t=1 size=155 args=0x28 locals=0x0

	// before
	0x0037 00055 (x.go:9)	MOVQ	CX, DX
	0x003a 00058 (x.go:9)	SHRQ	$56, CX
	0x003e 00062 (x.go:9)	MOVQ	"".b+8(FP), BX
	0x0043 00067 (x.go:9)	MOVB	CL, (BX)(AX*1)
	0x0046 00070 (x.go:9)	MOVQ	DX, CX
	0x0049 00073 (x.go:9)	SHRQ	$48, DX
	0x004d 00077 (x.go:9)	MOVB	DL, 1(BX)(AX*1)
	0x0051 00081 (x.go:9)	MOVQ	CX, DX
	0x0054 00084 (x.go:9)	SHRQ	$40, CX
	0x0058 00088 (x.go:9)	MOVB	CL, 2(BX)(AX*1)
	0x005c 00092 (x.go:9)	MOVQ	DX, CX
	0x005f 00095 (x.go:9)	SHRQ	$32, DX
	0x0063 00099 (x.go:9)	MOVB	DL, 3(BX)(AX*1)
	0x0067 00103 (x.go:9)	MOVQ	CX, DX
	0x006a 00106 (x.go:9)	SHRQ	$24, CX
	0x006e 00110 (x.go:9)	MOVB	CL, 4(BX)(AX*1)
	0x0072 00114 (x.go:9)	MOVQ	DX, CX
	0x0075 00117 (x.go:9)	SHRQ	$16, DX
	0x0079 00121 (x.go:9)	MOVB	DL, 5(BX)(AX*1)
	0x007d 00125 (x.go:9)	MOVQ	CX, DX
	0x0080 00128 (x.go:9)	SHRQ	$8, CX
	0x0084 00132 (x.go:9)	MOVB	CL, 6(BX)(AX*1)
	0x0088 00136 (x.go:9)	MOVB	DL, 7(BX)(AX*1)

	// after
	0x0033 00051 (x.go:9)	BSWAPQ	CX
	0x0036 00054 (x.go:9)	MOVQ	"".b+8(FP), DX
	0x003b 00059 (x.go:9)	MOVQ	CX, (DX)(AX*1)

Updates #17151

Change-Id: I3f4a7f28f210e62e153e60da5abd1d39508cc6c4
Reviewed-on: https://go-review.googlesource.com/34635
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
parent a0645fca
......@@ -183,6 +183,14 @@ func f(b []byte, v uint64) {
},
{"amd64", "linux", `
import "encoding/binary"
func f(b []byte, i int, v uint64) {
binary.BigEndian.PutUint64(b[i:], v)
}
`,
[]string{"\tBSWAPQ\t"},
},
{"amd64", "linux", `
import "encoding/binary"
func f(b []byte) uint32 {
return binary.BigEndian.Uint32(b)
}
......@@ -202,6 +210,14 @@ import "encoding/binary"
func f(b []byte, v uint32) {
binary.BigEndian.PutUint32(b, v)
}
`,
[]string{"\tBSWAPL\t"},
},
{"amd64", "linux", `
import "encoding/binary"
func f(b []byte, i int, v uint32) {
binary.BigEndian.PutUint32(b[i:], v)
}
`,
[]string{"\tBSWAPL\t"},
},
......
......@@ -1732,6 +1732,18 @@
&& clobber(x2)
-> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
(MOVBstoreidx1 [i] {s} p idx w
x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w)
x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w)
x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
(MOVBstore [i] {s} p w
x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)
x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)
......@@ -1756,6 +1768,30 @@
&& clobber(x6)
-> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
(MOVBstoreidx1 [i] {s} p idx w
x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w)
x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w)
x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w)
x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w)
x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w)
x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w)
x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& x4.Uses == 1
&& x5.Uses == 1
&& x6.Uses == 1
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(x4)
&& clobber(x5)
&& clobber(x6)
-> (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
// Combine constant stores into larger (unaligned) stores.
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
&& x.Uses == 1
......
......@@ -4875,6 +4875,314 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value, config *Config) bool {
v.AddArg(mem)
return true
}
// match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
// result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
p := v.Args[0]
idx := v.Args[1]
w := v.Args[2]
x2 := v.Args[3]
if x2.Op != OpAMD64MOVBstoreidx1 {
break
}
if x2.AuxInt != i-1 {
break
}
if x2.Aux != s {
break
}
if p != x2.Args[0] {
break
}
if idx != x2.Args[1] {
break
}
x2_2 := x2.Args[2]
if x2_2.Op != OpAMD64SHRLconst {
break
}
if x2_2.AuxInt != 8 {
break
}
if w != x2_2.Args[0] {
break
}
x1 := x2.Args[3]
if x1.Op != OpAMD64MOVBstoreidx1 {
break
}
if x1.AuxInt != i-2 {
break
}
if x1.Aux != s {
break
}
if p != x1.Args[0] {
break
}
if idx != x1.Args[1] {
break
}
x1_2 := x1.Args[2]
if x1_2.Op != OpAMD64SHRLconst {
break
}
if x1_2.AuxInt != 16 {
break
}
if w != x1_2.Args[0] {
break
}
x0 := x1.Args[3]
if x0.Op != OpAMD64MOVBstoreidx1 {
break
}
if x0.AuxInt != i-3 {
break
}
if x0.Aux != s {
break
}
if p != x0.Args[0] {
break
}
if idx != x0.Args[1] {
break
}
x0_2 := x0.Args[2]
if x0_2.Op != OpAMD64SHRLconst {
break
}
if x0_2.AuxInt != 24 {
break
}
if w != x0_2.Args[0] {
break
}
mem := x0.Args[3]
if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
break
}
v.reset(OpAMD64MOVLstoreidx1)
v.AuxInt = i - 3
v.Aux = s
v.AddArg(p)
v.AddArg(idx)
v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
v0.AddArg(w)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
// result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
p := v.Args[0]
idx := v.Args[1]
w := v.Args[2]
x6 := v.Args[3]
if x6.Op != OpAMD64MOVBstoreidx1 {
break
}
if x6.AuxInt != i-1 {
break
}
if x6.Aux != s {
break
}
if p != x6.Args[0] {
break
}
if idx != x6.Args[1] {
break
}
x6_2 := x6.Args[2]
if x6_2.Op != OpAMD64SHRQconst {
break
}
if x6_2.AuxInt != 8 {
break
}
if w != x6_2.Args[0] {
break
}
x5 := x6.Args[3]
if x5.Op != OpAMD64MOVBstoreidx1 {
break
}
if x5.AuxInt != i-2 {
break
}
if x5.Aux != s {
break
}
if p != x5.Args[0] {
break
}
if idx != x5.Args[1] {
break
}
x5_2 := x5.Args[2]
if x5_2.Op != OpAMD64SHRQconst {
break
}
if x5_2.AuxInt != 16 {
break
}
if w != x5_2.Args[0] {
break
}
x4 := x5.Args[3]
if x4.Op != OpAMD64MOVBstoreidx1 {
break
}
if x4.AuxInt != i-3 {
break
}
if x4.Aux != s {
break
}
if p != x4.Args[0] {
break
}
if idx != x4.Args[1] {
break
}
x4_2 := x4.Args[2]
if x4_2.Op != OpAMD64SHRQconst {
break
}
if x4_2.AuxInt != 24 {
break
}
if w != x4_2.Args[0] {
break
}
x3 := x4.Args[3]
if x3.Op != OpAMD64MOVBstoreidx1 {
break
}
if x3.AuxInt != i-4 {
break
}
if x3.Aux != s {
break
}
if p != x3.Args[0] {
break
}
if idx != x3.Args[1] {
break
}
x3_2 := x3.Args[2]
if x3_2.Op != OpAMD64SHRQconst {
break
}
if x3_2.AuxInt != 32 {
break
}
if w != x3_2.Args[0] {
break
}
x2 := x3.Args[3]
if x2.Op != OpAMD64MOVBstoreidx1 {
break
}
if x2.AuxInt != i-5 {
break
}
if x2.Aux != s {
break
}
if p != x2.Args[0] {
break
}
if idx != x2.Args[1] {
break
}
x2_2 := x2.Args[2]
if x2_2.Op != OpAMD64SHRQconst {
break
}
if x2_2.AuxInt != 40 {
break
}
if w != x2_2.Args[0] {
break
}
x1 := x2.Args[3]
if x1.Op != OpAMD64MOVBstoreidx1 {
break
}
if x1.AuxInt != i-6 {
break
}
if x1.Aux != s {
break
}
if p != x1.Args[0] {
break
}
if idx != x1.Args[1] {
break
}
x1_2 := x1.Args[2]
if x1_2.Op != OpAMD64SHRQconst {
break
}
if x1_2.AuxInt != 48 {
break
}
if w != x1_2.Args[0] {
break
}
x0 := x1.Args[3]
if x0.Op != OpAMD64MOVBstoreidx1 {
break
}
if x0.AuxInt != i-7 {
break
}
if x0.Aux != s {
break
}
if p != x0.Args[0] {
break
}
if idx != x0.Args[1] {
break
}
x0_2 := x0.Args[2]
if x0_2.Op != OpAMD64SHRQconst {
break
}
if x0_2.AuxInt != 56 {
break
}
if w != x0_2.Args[0] {
break
}
mem := x0.Args[3]
if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
break
}
v.reset(OpAMD64MOVQstoreidx1)
v.AuxInt = i - 7
v.Aux = s
v.AddArg(p)
v.AddArg(idx)
v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
v0.AddArg(w)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment