Commit 1cfb5c3f authored by Michael Munday's avatar Michael Munday

cmd/compile: merge loads into operations on s390x

Adds the new canMergeLoad function which can be used by rules to
decide whether a load can be merged into an operation. The function
ensures that the merge will not reorder the load relative to memory
operations (for example, stores) in such a way that the block can no
longer be scheduled.

This new function enables transformations such as:

MOVD 0(R1), R2
ADD  R2, R3

to:

ADD  0(R1), R3

The two-operand form of the following instructions can now read a
single memory operand:

 - ADD
 - ADDC
 - ADDW
 - MULLD
 - MULLW
 - SUB
 - SUBC
 - SUBE
 - SUBW
 - AND
 - ANDW
 - OR
 - ORW
 - XOR
 - XORW

Improves SHA3 performance by 6-8%.

Updates #15054.

Change-Id: Ibcb9122126cd1a26f2c01c0dfdbb42fe5e7b5b94
Reviewed-on: https://go-review.googlesource.com/29272
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 92221fe8
...@@ -144,6 +144,35 @@ TEXT main·foo(SB),7,$16-0 // TEXT main.foo(SB), 7, $16-0 ...@@ -144,6 +144,35 @@ TEXT main·foo(SB),7,$16-0 // TEXT main.foo(SB), 7, $16-0
XORW $65536, R1 // c01700010000 XORW $65536, R1 // c01700010000
XORW $-2, R1 // c017fffffffe XORW $-2, R1 // c017fffffffe
ADD -524288(R1), R2 // e32010008008
ADD 524287(R3), R4 // e3403fff7f08
ADD -524289(R1), R2 // c0a1fff7ffffe32a10000008
ADD 524288(R3), R4 // c0a100080000e34a30000008
ADD -524289(R1)(R2*1), R3 // c0a1fff7ffff41aa2000e33a10000008
ADD 524288(R3)(R4*1), R5 // c0a10008000041aa4000e35a30000008
ADDC (R1), R2 // e3201000000a
ADDW (R5), R6 // 5a605000
ADDW 4095(R7), R8 // 5a807fff
ADDW -1(R1), R2 // e3201fffff5a
ADDW 4096(R3), R4 // e3403000015a
MULLD (R1)(R2*1), R3 // e3321000000c
MULLW (R3)(R4*1), R5 // 71543000
MULLW 4096(R3), R4 // e34030000151
SUB (R1), R2 // e32010000009
SUBC (R1), R2 // e3201000000b
SUBE (R1), R2 // e32010000089
SUBW (R1), R2 // 5b201000
SUBW -1(R1), R2 // e3201fffff5b
AND (R1), R2 // e32010000080
ANDW (R1), R2 // 54201000
ANDW -1(R1), R2 // e3201fffff54
OR (R1), R2 // e32010000081
ORW (R1), R2 // 56201000
ORW -1(R1), R2 // e3201fffff56
XOR (R1), R2 // e32010000082
XORW (R1), R2 // 57201000
XORW -1(R1), R2 // e3201fffff57
LAA R1, R2, 524287(R3) // eb213fff7ff8 LAA R1, R2, 524287(R3) // eb213fff7ff8
LAAG R4, R5, -524288(R6) // eb54600080e8 LAAG R4, R5, -524288(R6) // eb54600080e8
LAAL R7, R8, 8192(R9) // eb87900002fa LAAL R7, R8, 8192(R9) // eb87900002fa
......
...@@ -333,6 +333,22 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -333,6 +333,22 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.From.Val = math.Float64frombits(uint64(v.AuxInt))
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = x p.To.Reg = x
case ssa.OpS390XADDWload, ssa.OpS390XADDload,
ssa.OpS390XMULLWload, ssa.OpS390XMULLDload,
ssa.OpS390XSUBWload, ssa.OpS390XSUBload,
ssa.OpS390XANDWload, ssa.OpS390XANDload,
ssa.OpS390XORWload, ssa.OpS390XORload,
ssa.OpS390XXORWload, ssa.OpS390XXORload:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[1].Reg()
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpS390XMOVDload, case ssa.OpS390XMOVDload,
ssa.OpS390XMOVWZload, ssa.OpS390XMOVHZload, ssa.OpS390XMOVBZload, ssa.OpS390XMOVWZload, ssa.OpS390XMOVHZload, ssa.OpS390XMOVBZload,
ssa.OpS390XMOVDBRload, ssa.OpS390XMOVWBRload, ssa.OpS390XMOVHBRload, ssa.OpS390XMOVDBRload, ssa.OpS390XMOVWBRload, ssa.OpS390XMOVHBRload,
......
...@@ -947,6 +947,77 @@ ...@@ -947,6 +947,77 @@
(XOR x x) -> (MOVDconst [0]) (XOR x x) -> (MOVDconst [0])
(XORW x x) -> (MOVDconst [0]) (XORW x x) -> (MOVDconst [0])
// Fold memory operations into operations.
// Exclude global data (SB) because these instructions cannot handle relative addresses.
// TODO(mundaym): use LARL in the assembler to handle SB?
// TODO(mundaym): indexed versions of these?
(ADD <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDload <t> [off] {sym} x ptr mem)
(ADD <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDload <t> [off] {sym} x ptr mem)
(ADDW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDWload <t> [off] {sym} x ptr mem)
(ADDW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDWload <t> [off] {sym} x ptr mem)
(ADDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDWload <t> [off] {sym} x ptr mem)
(ADDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDWload <t> [off] {sym} x ptr mem)
(MULLD <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLDload <t> [off] {sym} x ptr mem)
(MULLD <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLDload <t> [off] {sym} x ptr mem)
(MULLW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLWload <t> [off] {sym} x ptr mem)
(MULLW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLWload <t> [off] {sym} x ptr mem)
(MULLW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLWload <t> [off] {sym} x ptr mem)
(MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLWload <t> [off] {sym} x ptr mem)
(SUB <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (SUBload <t> [off] {sym} x ptr mem)
(SUBW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (SUBWload <t> [off] {sym} x ptr mem)
(SUBW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (SUBWload <t> [off] {sym} x ptr mem)
(AND <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDload <t> [off] {sym} x ptr mem)
(AND <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDload <t> [off] {sym} x ptr mem)
(ANDW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDWload <t> [off] {sym} x ptr mem)
(ANDW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDWload <t> [off] {sym} x ptr mem)
(ANDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDWload <t> [off] {sym} x ptr mem)
(ANDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDWload <t> [off] {sym} x ptr mem)
(OR <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORload <t> [off] {sym} x ptr mem)
(OR <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORload <t> [off] {sym} x ptr mem)
(ORW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORWload <t> [off] {sym} x ptr mem)
(ORW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORWload <t> [off] {sym} x ptr mem)
(ORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORWload <t> [off] {sym} x ptr mem)
(ORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORWload <t> [off] {sym} x ptr mem)
(XOR <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORload <t> [off] {sym} x ptr mem)
(XOR <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORload <t> [off] {sym} x ptr mem)
(XORW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORWload <t> [off] {sym} x ptr mem)
(XORW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORWload <t> [off] {sym} x ptr mem)
(XORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORWload <t> [off] {sym} x ptr mem)
(XORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORWload <t> [off] {sym} x ptr mem)
// Combine constant stores into larger (unaligned) stores. // Combine constant stores into larger (unaligned) stores.
// It doesn't work to global data (based on SB), // It doesn't work to global data (based on SB),
// because STGRL doesn't support unaligned address // because STGRL doesn't support unaligned address
......
...@@ -1265,14 +1265,20 @@ const ( ...@@ -1265,14 +1265,20 @@ const (
OpS390XADDW OpS390XADDW
OpS390XADDconst OpS390XADDconst
OpS390XADDWconst OpS390XADDWconst
OpS390XADDload
OpS390XADDWload
OpS390XSUB OpS390XSUB
OpS390XSUBW OpS390XSUBW
OpS390XSUBconst OpS390XSUBconst
OpS390XSUBWconst OpS390XSUBWconst
OpS390XSUBload
OpS390XSUBWload
OpS390XMULLD OpS390XMULLD
OpS390XMULLW OpS390XMULLW
OpS390XMULLDconst OpS390XMULLDconst
OpS390XMULLWconst OpS390XMULLWconst
OpS390XMULLDload
OpS390XMULLWload
OpS390XMULHD OpS390XMULHD
OpS390XMULHDU OpS390XMULHDU
OpS390XDIVD OpS390XDIVD
...@@ -1287,14 +1293,20 @@ const ( ...@@ -1287,14 +1293,20 @@ const (
OpS390XANDW OpS390XANDW
OpS390XANDconst OpS390XANDconst
OpS390XANDWconst OpS390XANDWconst
OpS390XANDload
OpS390XANDWload
OpS390XOR OpS390XOR
OpS390XORW OpS390XORW
OpS390XORconst OpS390XORconst
OpS390XORWconst OpS390XORWconst
OpS390XORload
OpS390XORWload
OpS390XXOR OpS390XXOR
OpS390XXORW OpS390XXORW
OpS390XXORconst OpS390XXORconst
OpS390XXORWconst OpS390XXORWconst
OpS390XXORload
OpS390XXORWload
OpS390XCMP OpS390XCMP
OpS390XCMPW OpS390XCMPW
OpS390XCMPU OpS390XCMPU
...@@ -15762,6 +15774,42 @@ var opcodeTable = [...]opInfo{ ...@@ -15762,6 +15774,42 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "ADDload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AADD,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "ADDWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AADDW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{ {
name: "SUB", name: "SUB",
argLen: 2, argLen: 2,
...@@ -15824,6 +15872,42 @@ var opcodeTable = [...]opInfo{ ...@@ -15824,6 +15872,42 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "SUBload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.ASUB,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "SUBWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.ASUBW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{ {
name: "MULLD", name: "MULLD",
argLen: 2, argLen: 2,
...@@ -15890,6 +15974,42 @@ var opcodeTable = [...]opInfo{ ...@@ -15890,6 +15974,42 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MULLDload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AMULLD,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "MULLWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AMULLW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{ {
name: "MULHD", name: "MULHD",
argLen: 2, argLen: 2,
...@@ -16114,6 +16234,42 @@ var opcodeTable = [...]opInfo{ ...@@ -16114,6 +16234,42 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "ANDload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AAND,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "ANDWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AANDW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{ {
name: "OR", name: "OR",
argLen: 2, argLen: 2,
...@@ -16178,6 +16334,42 @@ var opcodeTable = [...]opInfo{ ...@@ -16178,6 +16334,42 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "ORload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AOR,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "ORWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AORW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{ {
name: "XOR", name: "XOR",
argLen: 2, argLen: 2,
...@@ -16242,6 +16434,42 @@ var opcodeTable = [...]opInfo{ ...@@ -16242,6 +16434,42 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "XORload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AXOR,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "XORWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AXORW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{ {
name: "CMP", name: "CMP",
argLen: 2, argLen: 2,
......
...@@ -149,6 +149,116 @@ func canMergeSym(x, y interface{}) bool { ...@@ -149,6 +149,116 @@ func canMergeSym(x, y interface{}) bool {
return x == nil || y == nil return x == nil || y == nil
} }
// canMergeLoad reports whether the load can be merged into target without
// invalidating the schedule.
func canMergeLoad(target, load *Value) bool {
if target.Block.ID != load.Block.ID {
// If the load is in a different block do not merge it.
return false
}
mem := load.Args[len(load.Args)-1]
// We need the load's memory arg to still be alive at target. That
// can't be the case if one of target's args depends on a memory
// state that is a successor of load's memory arg.
//
// For example, it would be invalid to merge load into target in
// the following situation because newmem has killed oldmem
// before target is reached:
// load = read ... oldmem
// newmem = write ... oldmem
// arg0 = read ... newmem
// target = add arg0 load
//
// If the argument comes from a different block then we can exclude
// it immediately because it must dominate load (which is in the
// same block as target).
var args []*Value
for _, a := range target.Args {
if a != load && a.Block.ID == target.Block.ID {
args = append(args, a)
}
}
// memPreds contains memory states known to be predecessors of load's
// memory state. It is lazily initialized.
var memPreds map[*Value]bool
search:
for i := 0; len(args) > 0; i++ {
const limit = 100
if i >= limit {
// Give up if we have done a lot of iterations.
return false
}
v := args[len(args)-1]
args = args[:len(args)-1]
if target.Block.ID != v.Block.ID {
// Since target and load are in the same block
// we can stop searching when we leave the block.
continue search
}
if v.Op == OpPhi {
// A Phi implies we have reached the top of the block.
continue search
}
if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
// We could handle this situation however it is likely
// to be very rare.
return false
}
if v.Type.IsMemory() {
if memPreds == nil {
// Initialise a map containing memory states
// known to be predecessors of load's memory
// state.
memPreds = make(map[*Value]bool)
m := mem
const limit = 50
for i := 0; i < limit; i++ {
if m.Op == OpPhi {
break
}
if m.Block.ID != target.Block.ID {
break
}
if !m.Type.IsMemory() {
break
}
memPreds[m] = true
if len(m.Args) == 0 {
break
}
m = m.Args[len(m.Args)-1]
}
}
// We can merge if v is a predecessor of mem.
//
// For example, we can merge load into target in the
// following scenario:
// x = read ... v
// mem = write ... v
// load = read ... mem
// target = add x load
if memPreds[v] {
continue search
}
return false
}
if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
// If v takes mem as an input then we know mem
// is valid at this point.
continue search
}
for _, a := range v.Args {
if target.Block.ID == a.Block.ID {
args = append(args, a)
}
}
}
return true
}
// isArg returns whether s is an arg symbol // isArg returns whether s is an arg symbol
func isArg(s interface{}) bool { func isArg(s interface{}) bool {
_, ok := s.(*ArgSymbol) _, ok := s.(*ArgSymbol)
......
...@@ -142,8 +142,12 @@ var optab = []Optab{ ...@@ -142,8 +142,12 @@ var optab = []Optab{
Optab{AADD, C_REG, C_NONE, C_NONE, C_REG, 2, 0}, Optab{AADD, C_REG, C_NONE, C_NONE, C_REG, 2, 0},
Optab{AADD, C_LCON, C_REG, C_NONE, C_REG, 22, 0}, Optab{AADD, C_LCON, C_REG, C_NONE, C_REG, 22, 0},
Optab{AADD, C_LCON, C_NONE, C_NONE, C_REG, 22, 0}, Optab{AADD, C_LCON, C_NONE, C_NONE, C_REG, 22, 0},
Optab{AADD, C_LOREG, C_NONE, C_NONE, C_REG, 12, 0},
Optab{AADD, C_LAUTO, C_NONE, C_NONE, C_REG, 12, REGSP},
Optab{ASUB, C_LCON, C_REG, C_NONE, C_REG, 21, 0}, Optab{ASUB, C_LCON, C_REG, C_NONE, C_REG, 21, 0},
Optab{ASUB, C_LCON, C_NONE, C_NONE, C_REG, 21, 0}, Optab{ASUB, C_LCON, C_NONE, C_NONE, C_REG, 21, 0},
Optab{ASUB, C_LOREG, C_NONE, C_NONE, C_REG, 12, 0},
Optab{ASUB, C_LAUTO, C_NONE, C_NONE, C_REG, 12, REGSP},
Optab{AMULHD, C_REG, C_NONE, C_NONE, C_REG, 4, 0}, Optab{AMULHD, C_REG, C_NONE, C_NONE, C_REG, 4, 0},
Optab{AMULHD, C_REG, C_REG, C_NONE, C_REG, 4, 0}, Optab{AMULHD, C_REG, C_REG, C_NONE, C_REG, 4, 0},
Optab{ADIVW, C_REG, C_REG, C_NONE, C_REG, 2, 0}, Optab{ADIVW, C_REG, C_REG, C_NONE, C_REG, 2, 0},
...@@ -158,9 +162,13 @@ var optab = []Optab{ ...@@ -158,9 +162,13 @@ var optab = []Optab{
Optab{AAND, C_REG, C_NONE, C_NONE, C_REG, 6, 0}, Optab{AAND, C_REG, C_NONE, C_NONE, C_REG, 6, 0},
Optab{AAND, C_LCON, C_NONE, C_NONE, C_REG, 23, 0}, Optab{AAND, C_LCON, C_NONE, C_NONE, C_REG, 23, 0},
Optab{AAND, C_LCON, C_REG, C_NONE, C_REG, 23, 0}, Optab{AAND, C_LCON, C_REG, C_NONE, C_REG, 23, 0},
Optab{AAND, C_LOREG, C_NONE, C_NONE, C_REG, 12, 0},
Optab{AAND, C_LAUTO, C_NONE, C_NONE, C_REG, 12, REGSP},
Optab{AANDW, C_REG, C_REG, C_NONE, C_REG, 6, 0}, Optab{AANDW, C_REG, C_REG, C_NONE, C_REG, 6, 0},
Optab{AANDW, C_REG, C_NONE, C_NONE, C_REG, 6, 0}, Optab{AANDW, C_REG, C_NONE, C_NONE, C_REG, 6, 0},
Optab{AANDW, C_LCON, C_NONE, C_NONE, C_REG, 24, 0}, Optab{AANDW, C_LCON, C_NONE, C_NONE, C_REG, 24, 0},
Optab{AANDW, C_LOREG, C_NONE, C_NONE, C_REG, 12, 0},
Optab{AANDW, C_LAUTO, C_NONE, C_NONE, C_REG, 12, REGSP},
Optab{ASLD, C_REG, C_NONE, C_NONE, C_REG, 7, 0}, Optab{ASLD, C_REG, C_NONE, C_NONE, C_REG, 7, 0},
Optab{ASLD, C_REG, C_REG, C_NONE, C_REG, 7, 0}, Optab{ASLD, C_REG, C_REG, C_NONE, C_REG, 7, 0},
Optab{ASLD, C_SCON, C_REG, C_NONE, C_REG, 7, 0}, Optab{ASLD, C_SCON, C_REG, C_NONE, C_REG, 7, 0},
...@@ -2884,6 +2892,67 @@ func asmout(ctxt *obj.Link, asm *[]byte) { ...@@ -2884,6 +2892,67 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
} }
} }
case 12:
r1 := p.To.Reg
d2 := vregoff(ctxt, &p.From)
b2 := p.From.Reg
if b2 == 0 {
b2 = o.param
}
x2 := p.From.Index
if -DISP20/2 > d2 || d2 >= DISP20/2 {
zRIL(_a, op_LGFI, REGTMP, uint32(d2), asm)
if x2 != 0 {
zRX(op_LA, REGTMP, REGTMP, uint32(x2), 0, asm)
}
x2 = REGTMP
d2 = 0
}
var opx, opxy uint32
switch p.As {
case AADD:
opxy = op_AG
case AADDC:
opxy = op_ALG
case AADDW:
opx = op_A
opxy = op_AY
case AMULLW:
opx = op_MS
opxy = op_MSY
case AMULLD:
opxy = op_MSG
case ASUB:
opxy = op_SG
case ASUBC:
opxy = op_SLG
case ASUBE:
opxy = op_SLBG
case ASUBW:
opx = op_S
opxy = op_SY
case AAND:
opxy = op_NG
case AANDW:
opx = op_N
opxy = op_NY
case AOR:
opxy = op_OG
case AORW:
opx = op_O
opxy = op_OY
case AXOR:
opxy = op_XG
case AXORW:
opx = op_X
opxy = op_XY
}
if opx != 0 && 0 <= d2 && d2 < DISP12 {
zRX(opx, uint32(r1), uint32(x2), uint32(b2), uint32(d2), asm)
} else {
zRXY(opxy, uint32(r1), uint32(x2), uint32(b2), uint32(d2), asm)
}
case 15: // br/bl (reg) case 15: // br/bl (reg)
r := p.To.Reg r := p.To.Reg
if p.As == ABCL || p.As == ABL { if p.As == ABCL || p.As == ABL {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment