Commit 0004f34c authored by Keith Randall's avatar Keith Randall

cmd/compile: regalloc enforces 2-address instructions

Instead of being a hint, resultInArg0 is now enforced by regalloc.
This allows us to delete all the code from amd64/ssa.go which
deals with converting from a semantically three-address instruction
into some copies plus a two-address instruction.

Change-Id: Id4f39a80be4b678718bfd42a229f9094ab6ecd7c
Reviewed-on: https://go-review.googlesource.com/21816Reviewed-by: default avatarJosh Bleecher Snyder <josharian@gmail.com>
parent 6b33b0e9
This diff is collapsed.
...@@ -1273,6 +1273,12 @@ ...@@ -1273,6 +1273,12 @@
(XORWconst [c] x) && int16(c)==0 -> x (XORWconst [c] x) && int16(c)==0 -> x
(XORBconst [c] x) && int8(c)==0 -> x (XORBconst [c] x) && int8(c)==0 -> x
// Convert constant subtracts to constant adds
(SUBQconst [c] x) && c != -(1<<31) -> (ADDQconst [-c] x)
(SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x)
(SUBWconst [c] x) -> (ADDWconst [int64(int16(-c))] x)
(SUBBconst [c] x) -> (ADDBconst [int64(int8(-c))] x)
// generic constant folding // generic constant folding
// TODO: more of this // TODO: more of this
(ADDQconst [c] (MOVQconst [d])) -> (MOVQconst [c+d]) (ADDQconst [c] (MOVQconst [d])) -> (MOVQconst [c+d])
......
...@@ -111,12 +111,14 @@ func init() { ...@@ -111,12 +111,14 @@ func init() {
// Common regInfo // Common regInfo
var ( var (
gp01 = regInfo{inputs: []regMask{}, outputs: gponly} gp01 = regInfo{inputs: []regMask{}, outputs: gponly}
gp11 = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags} gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly, clobbers: flags}
gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags}
gp11nf = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered gp11nf = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered
gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
gp21 = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: gponly, clobbers: flags} gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly, clobbers: flags}
gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly, clobbers: flags}
gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly} gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
gp21shift = regInfo{inputs: []regMask{gpsp, cx}, outputs: []regMask{gp &^ cx}, clobbers: flags} gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}, clobbers: flags}
gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax},
clobbers: dx | flags} clobbers: dx | flags}
gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx},
...@@ -128,8 +130,8 @@ func init() { ...@@ -128,8 +130,8 @@ func init() {
gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly} gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
flagsgp = regInfo{inputs: flagsonly, outputs: gponly} flagsgp = regInfo{inputs: flagsonly, outputs: gponly}
// for CMOVconst -- uses AX to hold constant temporary. AX input is moved before temp. // for CMOVconst -- uses AX to hold constant temporary.
gp1flagsgp = regInfo{inputs: []regMask{gp, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}} gp1flagsgp = regInfo{inputs: []regMask{gp &^ ax, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
readflags = regInfo{inputs: flagsonly, outputs: gponly} readflags = regInfo{inputs: flagsonly, outputs: gponly}
flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}} flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
...@@ -186,14 +188,14 @@ func init() { ...@@ -186,14 +188,14 @@ func init() {
{name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by 8i store {name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by 8i store
// binary ops // binary ops
{name: "ADDQ", argLength: 2, reg: gp21, asm: "ADDQ", commutative: true, resultInArg0: true}, // arg0 + arg1 {name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true}, // arg0 + arg1
{name: "ADDL", argLength: 2, reg: gp21, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1 {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true}, // arg0 + arg1
{name: "ADDW", argLength: 2, reg: gp21, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1 {name: "ADDW", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true}, // arg0 + arg1
{name: "ADDB", argLength: 2, reg: gp21, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1 {name: "ADDB", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true}, // arg0 + arg1
{name: "ADDQconst", argLength: 1, reg: gp11, asm: "ADDQ", aux: "Int64", resultInArg0: true, typ: "UInt64"}, // arg0 + auxint {name: "ADDQconst", argLength: 1, reg: gp11sp, asm: "ADDQ", aux: "Int64", typ: "UInt64"}, // arg0 + auxint
{name: "ADDLconst", argLength: 1, reg: gp11, asm: "ADDL", aux: "Int32", resultInArg0: true}, // arg0 + auxint {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32"}, // arg0 + auxint
{name: "ADDWconst", argLength: 1, reg: gp11, asm: "ADDL", aux: "Int16", resultInArg0: true}, // arg0 + auxint {name: "ADDWconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int16"}, // arg0 + auxint
{name: "ADDBconst", argLength: 1, reg: gp11, asm: "ADDL", aux: "Int8", resultInArg0: true}, // arg0 + auxint {name: "ADDBconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int8"}, // arg0 + auxint
{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true}, // arg0 - arg1 {name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true}, // arg0 - arg1
{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true}, // arg0 - arg1 {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true}, // arg0 - arg1
......
...@@ -39,7 +39,7 @@ type opData struct { ...@@ -39,7 +39,7 @@ type opData struct {
rematerializeable bool rematerializeable bool
argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments
commutative bool // this operation is commutative (e.g. addition) commutative bool // this operation is commutative (e.g. addition)
resultInArg0 bool // prefer v and v.Args[0] to be allocated to the same register resultInArg0 bool // v and v.Args[0] must be allocated to the same register
} }
type blockData struct { type blockData struct {
...@@ -155,6 +155,12 @@ func genOp() { ...@@ -155,6 +155,12 @@ func genOp() {
} }
if v.resultInArg0 { if v.resultInArg0 {
fmt.Fprintln(w, "resultInArg0: true,") fmt.Fprintln(w, "resultInArg0: true,")
if v.reg.inputs[0] != v.reg.outputs[0] {
log.Fatalf("input[0] and output registers must be equal for %s", v.name)
}
if v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
log.Fatalf("input[1] and output registers must be equal for %s", v.name)
}
} }
if a.name == "generic" { if a.name == "generic" {
fmt.Fprintln(w, "generic:true,") fmt.Fprintln(w, "generic:true,")
......
...@@ -26,7 +26,7 @@ type opInfo struct { ...@@ -26,7 +26,7 @@ type opInfo struct {
generic bool // this is a generic (arch-independent) opcode generic bool // this is a generic (arch-independent) opcode
rematerializeable bool // this op is rematerializeable rematerializeable bool // this op is rematerializeable
commutative bool // this operation is commutative (e.g. addition) commutative bool // this operation is commutative (e.g. addition)
resultInArg0 bool // prefer v and v.Args[0] to be allocated to the same register resultInArg0 bool // v and v.Args[0] must be allocated to the same register
} }
type inputInfo struct { type inputInfo struct {
......
This diff is collapsed.
...@@ -527,6 +527,18 @@ func (s *regAllocState) advanceUses(v *Value) { ...@@ -527,6 +527,18 @@ func (s *regAllocState) advanceUses(v *Value) {
} }
} }
// liveAfterCurrentInstruction reports whether v is live after
// the current instruction is completed. v must be used by the
// current instruction.
func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
u := s.values[v.ID].uses
d := u.dist
for u != nil && u.dist == d {
u = u.next
}
return u != nil && u.dist > d
}
// Sets the state of the registers to that encoded in regs. // Sets the state of the registers to that encoded in regs.
func (s *regAllocState) setState(regs []endReg) { func (s *regAllocState) setState(regs []endReg) {
s.freeRegs(s.used) s.freeRegs(s.used)
...@@ -891,6 +903,27 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -891,6 +903,27 @@ func (s *regAllocState) regalloc(f *Func) {
args[i.idx] = s.allocValToReg(v.Args[i.idx], i.regs, true, v.Line) args[i.idx] = s.allocValToReg(v.Args[i.idx], i.regs, true, v.Line)
} }
// If the output clobbers the input register, and the input register is
// live beyond the instruction, make another copy of the input register so
// we don't have to reload the value from the spill location.
if opcodeTable[v.Op].resultInArg0 &&
s.liveAfterCurrentInstruction(v.Args[0]) &&
countRegs(s.values[v.Args[0].ID].regs) == 1 {
if opcodeTable[v.Op].commutative &&
(!s.liveAfterCurrentInstruction(v.Args[1]) ||
countRegs(s.values[v.Args[1].ID].regs) > 1) {
// Input #1 is dead after the instruction, or we have
// more than one copy of it in a register. Either way,
// use that input as the one that is clobbered.
args[0], args[1] = args[1], args[0]
} else {
m := s.compatRegs(v.Args[0].Type)
m &^= s.values[v.Args[0].ID].regs // a register not already holding v.Args[0]
s.allocValToReg(v.Args[0], m, true, v.Line)
}
}
// Now that all args are in regs, we're ready to issue the value itself. // Now that all args are in regs, we're ready to issue the value itself.
// Before we pick a register for the output value, allow input registers // Before we pick a register for the output value, allow input registers
// to be deallocated. We do this here so that the output can use the // to be deallocated. We do this here so that the output can use the
...@@ -908,20 +941,10 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -908,20 +941,10 @@ func (s *regAllocState) regalloc(f *Func) {
s.f.Fatalf("bad mask %s\n", v.LongString()) s.f.Fatalf("bad mask %s\n", v.LongString())
} }
if opcodeTable[v.Op].resultInArg0 { if opcodeTable[v.Op].resultInArg0 {
// Output must use the same register as input 0.
r := register(s.f.getHome(args[0].ID).(*Register).Num) r := register(s.f.getHome(args[0].ID).(*Register).Num)
if (mask&^s.used)>>r&1 != 0 {
mask = regMask(1) << r
}
if opcodeTable[v.Op].commutative {
r := register(s.f.getHome(args[1].ID).(*Register).Num)
if (mask&^s.used)>>r&1 != 0 {
mask = regMask(1) << r mask = regMask(1) << r
} }
}
// TODO: enforce resultInArg0 always, instead of treating it
// as a hint. Then we don't need the special cases adding
// moves all throughout ssa.go:genValue.
}
r := s.allocReg(v, mask) r := s.allocReg(v, mask)
s.assignReg(r, v, v) s.assignReg(r, v, v)
} }
......
...@@ -16653,6 +16653,17 @@ func rewriteValueAMD64_OpAMD64SUBBconst(v *Value, config *Config) bool { ...@@ -16653,6 +16653,17 @@ func rewriteValueAMD64_OpAMD64SUBBconst(v *Value, config *Config) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (SUBBconst [c] x)
// cond:
// result: (ADDBconst [int64(int8(-c))] x)
for {
c := v.AuxInt
x := v.Args[0]
v.reset(OpAMD64ADDBconst)
v.AuxInt = int64(int8(-c))
v.AddArg(x)
return true
}
// match: (SUBBconst (MOVBconst [d]) [c]) // match: (SUBBconst (MOVBconst [d]) [c])
// cond: // cond:
// result: (MOVBconst [int64(int8(d-c))]) // result: (MOVBconst [int64(int8(d-c))])
...@@ -16751,6 +16762,17 @@ func rewriteValueAMD64_OpAMD64SUBLconst(v *Value, config *Config) bool { ...@@ -16751,6 +16762,17 @@ func rewriteValueAMD64_OpAMD64SUBLconst(v *Value, config *Config) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (SUBLconst [c] x)
// cond:
// result: (ADDLconst [int64(int32(-c))] x)
for {
c := v.AuxInt
x := v.Args[0]
v.reset(OpAMD64ADDLconst)
v.AuxInt = int64(int32(-c))
v.AddArg(x)
return true
}
// match: (SUBLconst (MOVLconst [d]) [c]) // match: (SUBLconst (MOVLconst [d]) [c])
// cond: // cond:
// result: (MOVLconst [int64(int32(d-c))]) // result: (MOVLconst [int64(int32(d-c))])
...@@ -16854,6 +16876,20 @@ func rewriteValueAMD64_OpAMD64SUBQconst(v *Value, config *Config) bool { ...@@ -16854,6 +16876,20 @@ func rewriteValueAMD64_OpAMD64SUBQconst(v *Value, config *Config) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (SUBQconst [c] x)
// cond: c != -(1<<31)
// result: (ADDQconst [-c] x)
for {
c := v.AuxInt
x := v.Args[0]
if !(c != -(1 << 31)) {
break
}
v.reset(OpAMD64ADDQconst)
v.AuxInt = -c
v.AddArg(x)
return true
}
// match: (SUBQconst (MOVQconst [d]) [c]) // match: (SUBQconst (MOVQconst [d]) [c])
// cond: // cond:
// result: (MOVQconst [d-c]) // result: (MOVQconst [d-c])
...@@ -16955,6 +16991,17 @@ func rewriteValueAMD64_OpAMD64SUBWconst(v *Value, config *Config) bool { ...@@ -16955,6 +16991,17 @@ func rewriteValueAMD64_OpAMD64SUBWconst(v *Value, config *Config) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (SUBWconst [c] x)
// cond:
// result: (ADDWconst [int64(int16(-c))] x)
for {
c := v.AuxInt
x := v.Args[0]
v.reset(OpAMD64ADDWconst)
v.AuxInt = int64(int16(-c))
v.AddArg(x)
return true
}
// match: (SUBWconst (MOVWconst [d]) [c]) // match: (SUBWconst (MOVWconst [d]) [c])
// cond: // cond:
// result: (MOVWconst [int64(int16(d-c))]) // result: (MOVWconst [int64(int16(d-c))])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment