Commit aff222cd authored by Lynn Boger's avatar Lynn Boger

cmd/compile: improve PPC64.rules to reduce size of rewritePPC64.go

Some rules in PPC64.rules cause an extremely large rewritePPC64.go
file to be generated, due to rules with commutative operations and
many operands. This happens with the existing
rules for combining byte loads in little endian order, and
also happens with the pending change to do the same for bytes
in big endian order.

The change improves the existing rules and reduces the size of
the rewrite file by more than 60%. Once this change is merged,
then the pending change for big endian ordered rules will be
updated to use rules that avoid generating an excessively large
rewrite file.

This also includes a fix to a performance regression for
littleEndian.PutUint16 on ppc64le.

Change-Id: I8d2ea42885fa2b84b30c63aa124b0a9b130564ff
Reviewed-on: https://go-review.googlesource.com/100675
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 7d4d2cb6
......@@ -860,12 +860,11 @@
(MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
// Lose widening ops fed to to stores
(MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (MOVBZreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
(MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
(MOVHstore [off] {sym} ptr (MOVHZreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
(MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
(MOVWstore [off] {sym} ptr (MOVWZreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
(MOVHstore [off] {sym} ptr (MOV(H|HZ|W|WZ)reg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
(MOVWstore [off] {sym} ptr (MOV(W|WZ)reg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (SRWconst (MOV(H|HZ)reg x) [c]) mem) && c <= 8 -> (MOVBstore [off] {sym} ptr (SRWconst <typ.UInt32> x [c]) mem)
(MOVBstore [off] {sym} ptr (SRWconst (MOV(W|WZ)reg x) [c]) mem) && c <= 24 -> (MOVBstore [off] {sym} ptr (SRWconst <typ.UInt32> x [c]) mem)
// Lose W-widening ops fed to compare-W
(CMPW x (MOVWreg y)) -> (CMPW x y)
......@@ -898,16 +897,16 @@
(FSUBS (FMULS x y) z) -> (FMSUBS x y z)
// The following statements are found in encoding/binary functions UintXX (load) and PutUintXX (store)
// and convert the statements in these functions from multiple single byte loads or stores to
// the single largest possible load or store. For now only little endian loads and stores on
// little endian machines are implemented. Longer rules make use of the match with shorter rules
// where possible.
// The following rules are intended to match statements as are found in encoding/binary
// functions UintXX (load) and PutUintXX (store), combining multi-byte loads and stores
// into wider loads and stores.
// Initial implementation handles only little endian loads and stores on little endian
// targets.
// TODO implement big endian loads and stores for little endian machines (using byte reverse
// loads and stores).
// b[0] | b[1]<<8 -> load 16-bit Little endian
(OR <t> x0:(MOVBZload [i0] {s} p mem)
o1:(SLWconst x1:(MOVBZload [i1] {s} p mem) [8]))
o1:(SL(W|D)const x1:(MOVBZload [i1] {s} p mem) [8]))
&& !config.BigEndian
&& i1 == i0+1
&& x0.Uses ==1 && x1.Uses == 1
......@@ -917,8 +916,8 @@
-> @mergePoint(b,x0,x1) (MOVHZload <t> {s} [i0] p mem)
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit Little endian
(OR <t> s1:(SLWconst x2:(MOVBZload [i3] {s} p mem) [24])
o0:(OR <t> s0:(SLWconst x1:(MOVBZload [i2] {s} p mem) [16]) x0:(MOVHZload [i0] {s} p mem)))
(OR <t> s1:(SL(W|D)const x2:(MOVBZload [i3] {s} p mem) [24])
o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i2] {s} p mem) [16]) x0:(MOVHZload [i0] {s} p mem)))
&& !config.BigEndian
&& i2 == i0+2
&& i3 == i0+3
......@@ -932,37 +931,45 @@
-> @mergePoint(b,x0,x1,x2) (MOVWZload <t> {s} [i0] p mem)
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4] <<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit Little endian
// Can't build on shorter rules because they use SLW instead of SLD
// Note: long rules with commutative ops will result in very large functions in rewritePPC64,
// so shorter rules which make use of previously defined rules are preferred.
// Offset must be multiple of 4 for MOVD
(OR <t> s6:(SLDconst x7:(MOVBZload [i7] {s} p mem) [56])
o5:(OR <t> s5:(SLDconst x6:(MOVBZload [i6] {s} p mem) [48])
o4:(OR <t> s4:(SLDconst x5:(MOVBZload [i5] {s} p mem) [40])
o3:(OR <t> s3:(SLDconst x4:(MOVBZload [i4] {s} p mem) [32])
o2:(OR <t> s2:(SLDconst x3:(MOVBZload [i3] {s} p mem) [24])
o1:(OR <t> s1:(SLDconst x2:(MOVBZload [i2] {s} p mem) [16])
o0:(OR <t> s0:(SLDconst x1:(MOVBZload [i1] {s} p mem) [8]) x0:(MOVBZload [i0] {s} p mem))))))))
x0:(MOVWZload {s} [i0] p mem)))))
&& !config.BigEndian
&& i0%4 == 0
&& i1 == i0+1
&& i2 == i0+2
&& i3 == i0+3
&& i4 == i0+4
&& i5 == i0+5
&& i6 == i0+6
&& i7 == i0+7
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses ==1 && x7.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1
&& s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1
&& mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
&& clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber (s6)
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} [i0] p mem)
&& x0.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses ==1 && x7.Uses == 1
&& o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1
&& s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1
&& mergePoint(b, x0, x4, x5, x6, x7) != nil
&& clobber(x0) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
&& clobber(s3) && clobber(s4) && clobber(s5) && clobber (s6)
&& clobber(o3) && clobber(o4) && clobber(o5)
-> @mergePoint(b,x0,x4,x5,x6,x7) (MOVDload <t> {s} [i0] p mem)
// 2 byte store Little endian as in:
// b[0] = byte(v >> 16)
// b[1] = byte(v >> 24)
// Added mainly to use when matching longer rules below
(MOVBstore [i1] {s} p (SR(W|D)const w [24])
x0:(MOVBstore [i0] {s} p (SR(W|D)const w [16]) mem))
&& !config.BigEndian
&& x0.Uses == 1
&& i1 == i0+1
&& clobber(x0)
-> (MOVHstore [i0] {s} p (SRWconst <typ.UInt16> w [16]) mem)
// 2 byte store Little endian as in:
// b[0] = byte(v)
// b[1] = byte(v >> 8)
(MOVBstore [i1] {s} p (SRWconst (MOVHZreg w) [8])
(MOVBstore [i1] {s} p (SR(W|D)const w [8])
x0:(MOVBstore [i0] {s} p w mem))
&& !config.BigEndian
&& x0.Uses == 1
......@@ -971,18 +978,14 @@
-> (MOVHstore [i0] {s} p w mem)
// 4 byte store Little endian as in:
// b[0] = byte(v)
// b[1] = byte(v >> 8)
// b[2] = byte(v >> 16)
// b[3] = byte(v >> 24)
(MOVBstore [i3] {s} p (SRWconst w [24])
x0:(MOVBstore [i2] {s} p (SRWconst w [16])
x1:(MOVBstore [i1] {s} p (SRWconst w [8])
x2:(MOVBstore [i0] {s} p w mem))))
// b[0:1] = uint16(v)
// b[2:3] = uint16(v >> 16)
(MOVHstore [i1] {s} p (SR(W|D)const w [16])
x0:(MOVHstore [i0] {s} p w mem))
&& !config.BigEndian
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& i1 == i0+1 && i2 == i0+2 && i3 == i0+3
&& clobber(x0) && clobber(x1) && clobber(x2)
&& x0.Uses == 1
&& i1 == i0+2
&& clobber(x0)
-> (MOVWstore [i0] {s} p w mem)
// 8 byte store Little endian as in:
......@@ -994,19 +997,16 @@
// b[5] = byte(v >> 40)
// b[6] = byte(v >> 48)
// b[7] = byte(v >> 56)
// Built on previously defined rules
// Offset must be multiple of 4 for MOVDstore
// Can't build on previous rules for 2 or 4 bytes because they use SRW not SRD
(MOVBstore [i7] {s} p (SRDconst w [56])
x0:(MOVBstore [i6] {s} p (SRDconst w [48])
x1:(MOVBstore [i5] {s} p (SRDconst w [40])
x2:(MOVBstore [i4] {s} p (SRDconst w [32])
x3:(MOVBstore [i3] {s} p (SRDconst w [24])
x4:(MOVBstore [i2] {s} p (SRDconst w [16])
x5:(MOVBstore [i1] {s} p (SRDconst w [8])
x6:(MOVBstore [i0] {s} p w mem))))))))
x3:(MOVWstore [i0] {s} p w mem)))))
&& !config.BigEndian
&& i0%4 == 0
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1
&& i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
-> (MOVDstore [i0] {s} p w mem)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment