Commit 080187f4 authored by Giovanni Bajo's avatar Giovanni Bajo

cmd/compile: implement CMOV on amd64

This builds upon the branchelim pass, activating it for amd64 and
lowering CondSelect. Special care is made to FPU instructions for
NaN handling.

Benchmark results on Xeon E5630 (Westmere EP):

name                      old time/op    new time/op    delta
BinaryTree17-16              4.99s ± 9%     4.66s ± 2%     ~     (p=0.095 n=5+5)
Fannkuch11-16                4.93s ± 3%     5.04s ± 2%     ~     (p=0.548 n=5+5)
FmtFprintfEmpty-16          58.8ns ± 7%    61.4ns ±14%     ~     (p=0.579 n=5+5)
FmtFprintfString-16          114ns ± 2%     114ns ± 4%     ~     (p=0.603 n=5+5)
FmtFprintfInt-16             181ns ± 4%     125ns ± 3%  -30.90%  (p=0.008 n=5+5)
FmtFprintfIntInt-16          263ns ± 2%     217ns ± 2%  -17.34%  (p=0.008 n=5+5)
FmtFprintfPrefixedInt-16     230ns ± 1%     212ns ± 1%   -7.99%  (p=0.008 n=5+5)
FmtFprintfFloat-16           411ns ± 3%     344ns ± 5%  -16.43%  (p=0.008 n=5+5)
FmtManyArgs-16               828ns ± 4%     790ns ± 2%   -4.59%  (p=0.032 n=5+5)
GobDecode-16                10.9ms ± 4%    10.8ms ± 5%     ~     (p=0.548 n=5+5)
GobEncode-16                9.52ms ± 5%    9.46ms ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                      334ms ± 2%     337ms ± 2%     ~     (p=0.548 n=5+5)
Gunzip-16                   64.4ms ± 1%    65.0ms ± 1%   +1.00%  (p=0.008 n=5+5)
HTTPClientServer-16          156µs ± 3%     155µs ± 3%     ~     (p=0.690 n=5+5)
JSONEncode-16               21.0ms ± 1%    21.8ms ± 0%   +3.76%  (p=0.016 n=5+4)
JSONDecode-16               95.1ms ± 0%    95.7ms ± 1%     ~     (p=0.151 n=5+5)
Mandelbrot200-16            6.38ms ± 1%    6.42ms ± 1%     ~     (p=0.095 n=5+5)
GoParse-16                  5.47ms ± 2%    5.36ms ± 1%   -1.95%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16       111ns ± 1%     111ns ± 1%     ~     (p=0.635 n=5+4)
RegexpMatchEasy0_1K-16       408ns ± 1%     411ns ± 2%     ~     (p=0.087 n=5+5)
RegexpMatchEasy1_32-16       103ns ± 1%     104ns ± 1%     ~     (p=0.484 n=5+5)
RegexpMatchEasy1_1K-16       659ns ± 2%     652ns ± 1%     ~     (p=0.571 n=5+5)
RegexpMatchMedium_32-16      176ns ± 2%     174ns ± 1%     ~     (p=0.476 n=5+5)
RegexpMatchMedium_1K-16     58.6µs ± 4%    57.7µs ± 4%     ~     (p=0.548 n=5+5)
RegexpMatchHard_32-16       3.07µs ± 3%    3.04µs ± 4%     ~     (p=0.421 n=5+5)
RegexpMatchHard_1K-16       89.2µs ± 1%    87.9µs ± 2%   -1.52%  (p=0.032 n=5+5)
Revcomp-16                   575ms ± 0%     587ms ± 2%   +2.12%  (p=0.032 n=4+5)
Template-16                  110ms ± 1%     107ms ± 3%   -3.00%  (p=0.032 n=5+5)
TimeParse-16                 463ns ± 0%     462ns ± 0%     ~     (p=0.810 n=5+4)
TimeFormat-16                538ns ± 0%     535ns ± 0%   -0.63%  (p=0.024 n=5+5)

name                      old speed      new speed      delta
GobDecode-16              70.7MB/s ± 4%  71.4MB/s ± 5%     ~     (p=0.452 n=5+5)
GobEncode-16              80.7MB/s ± 5%  81.2MB/s ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                   58.2MB/s ± 2%  57.7MB/s ± 2%     ~     (p=0.452 n=5+5)
Gunzip-16                  302MB/s ± 1%   299MB/s ± 1%   -0.99%  (p=0.008 n=5+5)
JSONEncode-16             92.4MB/s ± 1%  89.1MB/s ± 0%   -3.63%  (p=0.016 n=5+4)
JSONDecode-16             20.4MB/s ± 0%  20.3MB/s ± 1%     ~     (p=0.135 n=5+5)
GoParse-16                10.6MB/s ± 2%  10.8MB/s ± 1%   +2.00%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16     286MB/s ± 1%   285MB/s ± 3%     ~     (p=1.000 n=5+5)
RegexpMatchEasy0_1K-16    2.51GB/s ± 1%  2.49GB/s ± 2%     ~     (p=0.095 n=5+5)
RegexpMatchEasy1_32-16     309MB/s ± 1%   307MB/s ± 1%     ~     (p=0.548 n=5+5)
RegexpMatchEasy1_1K-16    1.55GB/s ± 2%  1.57GB/s ± 1%     ~     (p=0.690 n=5+5)
RegexpMatchMedium_32-16   5.68MB/s ± 2%  5.73MB/s ± 1%     ~     (p=0.579 n=5+5)
RegexpMatchMedium_1K-16   17.5MB/s ± 4%  17.8MB/s ± 4%     ~     (p=0.500 n=5+5)
RegexpMatchHard_32-16     10.4MB/s ± 3%  10.5MB/s ± 4%     ~     (p=0.460 n=5+5)
RegexpMatchHard_1K-16     11.5MB/s ± 1%  11.7MB/s ± 2%   +1.57%  (p=0.032 n=5+5)
Revcomp-16                 442MB/s ± 0%   433MB/s ± 2%   -2.05%  (p=0.032 n=4+5)
Template-16               17.7MB/s ± 1%  18.2MB/s ± 3%   +3.12%  (p=0.032 n=5+5)

Change-Id: Ic7cb7374d07da031e771bdcbfdd832fd1b17159c
Reviewed-on: https://go-review.googlesource.com/98695Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
parent fdf5aaf5
...@@ -398,7 +398,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -398,7 +398,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = r p.To.Reg = r
case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ: case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
r := v.Reg() r := v.Reg()
if r != v.Args[0].Reg() { if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString()) v.Fatalf("input[0] and output not in same register %s", v.LongString())
...@@ -409,6 +420,71 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -409,6 +420,71 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = r p.To.Reg = r
case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
// Flag condition: ^ZERO || PARITY
// Generate:
// CMOV*NE SRC,DST
// CMOV*PS SRC,DST
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = r
var q *obj.Prog
if v.Op == ssa.OpAMD64CMOVQNEF {
q = s.Prog(x86.ACMOVQPS)
} else if v.Op == ssa.OpAMD64CMOVLNEF {
q = s.Prog(x86.ACMOVLPS)
} else {
q = s.Prog(x86.ACMOVWPS)
}
q.From.Type = obj.TYPE_REG
q.From.Reg = v.Args[1].Reg()
q.To.Type = obj.TYPE_REG
q.To.Reg = r
case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
// Flag condition: ZERO && !PARITY
// Generate:
// MOV SRC,AX
// CMOV*NE DST,AX
// CMOV*PC AX,DST
//
// TODO(rasky): we could generate:
// CMOV*NE DST,SRC
// CMOV*PC SRC,DST
// But this requires a way for regalloc to know that SRC might be
// clobbered by this instruction.
if v.Args[1].Reg() != x86.REG_AX {
opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
}
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r
p.To.Type = obj.TYPE_REG
p.To.Reg = x86.REG_AX
var q *obj.Prog
if v.Op == ssa.OpAMD64CMOVQEQF {
q = s.Prog(x86.ACMOVQPC)
} else if v.Op == ssa.OpAMD64CMOVLEQF {
q = s.Prog(x86.ACMOVLPC)
} else {
q = s.Prog(x86.ACMOVWPC)
}
q.From.Type = obj.TYPE_REG
q.From.Reg = x86.REG_AX
q.To.Type = obj.TYPE_REG
q.To.Reg = r
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
r := v.Reg() r := v.Reg()
if r != v.Args[0].Reg() { if r != v.Args[0].Reg() {
......
...@@ -19,7 +19,10 @@ package ssa ...@@ -19,7 +19,10 @@ package ssa
// rewrite Phis in the postdominator as CondSelects. // rewrite Phis in the postdominator as CondSelects.
func branchelim(f *Func) { func branchelim(f *Func) {
// FIXME: add support for lowering CondSelects on more architectures // FIXME: add support for lowering CondSelects on more architectures
if f.Config.arch != "arm64" { switch f.Config.arch {
case "arm64", "amd64":
// implemented
default:
return return
} }
...@@ -32,10 +35,22 @@ func branchelim(f *Func) { ...@@ -32,10 +35,22 @@ func branchelim(f *Func) {
} }
} }
func canCondSelect(v *Value) bool { func canCondSelect(v *Value, arch string) bool {
// For now, stick to simple scalars that fit in registers // For now, stick to simple scalars that fit in registers
sz := v.Type.Size() switch {
return sz <= v.Block.Func.Config.RegSize && (v.Type.IsInteger() || v.Type.IsPtrShaped()) case v.Type.Size() > v.Block.Func.Config.RegSize:
return false
case v.Type.IsPtrShaped():
return true
case v.Type.IsInteger():
if arch == "amd64" && v.Type.Size() < 2 {
// amd64 doesn't support CMOV with byte registers
return false
}
return true
default:
return false
}
} }
func elimIf(f *Func, dom *Block) bool { func elimIf(f *Func, dom *Block) bool {
...@@ -68,7 +83,7 @@ func elimIf(f *Func, dom *Block) bool { ...@@ -68,7 +83,7 @@ func elimIf(f *Func, dom *Block) bool {
for _, v := range post.Values { for _, v := range post.Values {
if v.Op == OpPhi { if v.Op == OpPhi {
hasphis = true hasphis = true
if !canCondSelect(v) { if !canCondSelect(v, f.Config.arch) {
return false return false
} }
} }
...@@ -169,7 +184,7 @@ func elimIfElse(f *Func, b *Block) bool { ...@@ -169,7 +184,7 @@ func elimIfElse(f *Func, b *Block) bool {
for _, v := range post.Values { for _, v := range post.Values {
if v.Op == OpPhi { if v.Op == OpPhi {
hasphis = true hasphis = true
if !canCondSelect(v) { if !canCondSelect(v, f.Config.arch) {
return false return false
} }
} }
......
...@@ -11,128 +11,162 @@ import ( ...@@ -11,128 +11,162 @@ import (
// Test that a trivial 'if' is eliminated // Test that a trivial 'if' is eliminated
func TestBranchElimIf(t *testing.T) { func TestBranchElimIf(t *testing.T) {
c := testConfig(t) var testData = []struct {
c.config.arch = "arm64" // FIXME arch string
boolType := types.New(types.TBOOL) intType string
intType := types.New(types.TINT32) ok bool
fun := c.Fun("entry", }{
Bloc("entry", {"arm64", "int32", true},
Valu("start", OpInitMem, types.TypeMem, 0, nil), {"amd64", "int32", true},
Valu("sb", OpSB, types.TypeInvalid, 0, nil), {"amd64", "int8", false},
Valu("const1", OpConst32, intType, 1, nil), }
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b3")),
Bloc("b3",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
CheckFunc(fun.f) for _, data := range testData {
branchelim(fun.f) t.Run(data.arch+"/"+data.intType, func(t *testing.T) {
CheckFunc(fun.f) c := testConfigArch(t, data.arch)
Deadcode(fun.f) boolType := c.config.Types.Bool
CheckFunc(fun.f) var intType *types.Type
switch data.intType {
case "int32":
intType = c.config.Types.Int32
case "int8":
intType = c.config.Types.Int8
default:
t.Fatal("invalid integer type:", data.intType)
}
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const1", OpConst32, intType, 1, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b3")),
Bloc("b3",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
if len(fun.f.Blocks) != 1 { CheckFunc(fun.f)
t.Errorf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks)) branchelim(fun.f)
} CheckFunc(fun.f)
if fun.values["phi"].Op != OpCondSelect { Deadcode(fun.f)
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op) CheckFunc(fun.f)
}
if fun.values["phi"].Args[2] != fun.values["cond"] { if data.ok {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
} if len(fun.f.Blocks) != 1 {
if fun.blocks["entry"].Kind != BlockExit { t.Fatalf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String()) }
if fun.values["phi"].Op != OpCondSelect {
t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
}
if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
}
} else {
if len(fun.f.Blocks) != 3 {
t.Fatalf("expected 3 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
}
}
})
} }
} }
// Test that a trivial if/else is eliminated // Test that a trivial if/else is eliminated
func TestBranchElimIfElse(t *testing.T) { func TestBranchElimIfElse(t *testing.T) {
c := testConfig(t) for _, arch := range []string{"arm64", "amd64"} {
c.config.arch = "arm64" // FIXME t.Run(arch, func(t *testing.T) {
boolType := types.New(types.TBOOL) c := testConfigArch(t, arch)
intType := types.New(types.TINT32) boolType := c.config.Types.Bool
fun := c.Fun("entry", intType := c.config.Types.Int32
Bloc("entry", fun := c.Fun("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil), Bloc("entry",
Valu("sb", OpSB, types.TypeInvalid, 0, nil), Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("const1", OpConst32, intType, 1, nil), Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const2", OpConst32, intType, 2, nil), Valu("const1", OpConst32, intType, 1, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"), Valu("const2", OpConst32, intType, 2, nil),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"), Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
If("cond", "b2", "b3")), Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
Bloc("b2", If("cond", "b2", "b3")),
Goto("b4")), Bloc("b2",
Bloc("b3", Goto("b4")),
Goto("b4")), Bloc("b3",
Bloc("b4", Goto("b4")),
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"), Bloc("b4",
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"), Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Exit("retstore"))) Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
CheckFunc(fun.f) CheckFunc(fun.f)
branchelim(fun.f) branchelim(fun.f)
CheckFunc(fun.f) CheckFunc(fun.f)
Deadcode(fun.f) Deadcode(fun.f)
CheckFunc(fun.f) CheckFunc(fun.f)
if len(fun.f.Blocks) != 1 { if len(fun.f.Blocks) != 1 {
t.Errorf("expected 1 block after branchelim; found %d", len(fun.f.Blocks)) t.Fatalf("expected 1 block after branchelim; found %d", len(fun.f.Blocks))
} }
if fun.values["phi"].Op != OpCondSelect { if fun.values["phi"].Op != OpCondSelect {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op) t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
} }
if fun.values["phi"].Args[2] != fun.values["cond"] { if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2]) t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
} }
if fun.blocks["entry"].Kind != BlockExit { if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String()) t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
}
})
} }
} }
// Test that an if/else CFG that loops back // Test that an if/else CFG that loops back
// into itself does *not* get eliminated. // into itself does *not* get eliminated.
func TestNoBranchElimLoop(t *testing.T) { func TestNoBranchElimLoop(t *testing.T) {
c := testConfig(t) for _, arch := range []string{"arm64", "amd64"} {
c.config.arch = "arm64" // FIXME t.Run(arch, func(t *testing.T) {
boolType := types.New(types.TBOOL) c := testConfigArch(t, arch)
intType := types.New(types.TINT32) boolType := c.config.Types.Bool
intType := c.config.Types.Int32
// The control flow here is totally bogus, // The control flow here is totally bogus,
// but a dead cycle seems like the only plausible // but a dead cycle seems like the only plausible
// way to arrive at a diamond CFG that is also a loop. // way to arrive at a diamond CFG that is also a loop.
fun := c.Fun("entry", fun := c.Fun("entry",
Bloc("entry", Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil), Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil), Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const2", OpConst32, intType, 2, nil), Valu("const2", OpConst32, intType, 2, nil),
Valu("const3", OpConst32, intType, 3, nil), Valu("const3", OpConst32, intType, 3, nil),
Goto("b5")), Goto("b5")),
Bloc("b2", Bloc("b2",
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"), Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"), Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"), Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"),
If("cond", "b3", "b4")), If("cond", "b3", "b4")),
Bloc("b3", Bloc("b3",
Goto("b2")), Goto("b2")),
Bloc("b4", Bloc("b4",
Goto("b2")), Goto("b2")),
Bloc("b5", Bloc("b5",
Exit("start"))) Exit("start")))
CheckFunc(fun.f) CheckFunc(fun.f)
branchelim(fun.f) branchelim(fun.f)
CheckFunc(fun.f) CheckFunc(fun.f)
if len(fun.f.Blocks) != 5 { if len(fun.f.Blocks) != 5 {
t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks)) t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks))
} }
if fun.values["phi"].Op != OpPhi { if fun.values["phi"].Op != OpPhi {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op) t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
})
} }
} }
...@@ -7,6 +7,7 @@ package ssa ...@@ -7,6 +7,7 @@ package ssa
import ( import (
"cmd/compile/internal/types" "cmd/compile/internal/types"
"cmd/internal/obj" "cmd/internal/obj"
"cmd/internal/obj/arm64"
"cmd/internal/obj/s390x" "cmd/internal/obj/s390x"
"cmd/internal/obj/x86" "cmd/internal/obj/x86"
"cmd/internal/src" "cmd/internal/src"
...@@ -22,6 +23,7 @@ var Copyelim = copyelim ...@@ -22,6 +23,7 @@ var Copyelim = copyelim
var testCtxts = map[string]*obj.Link{ var testCtxts = map[string]*obj.Link{
"amd64": obj.Linknew(&x86.Linkamd64), "amd64": obj.Linknew(&x86.Linkamd64),
"s390x": obj.Linknew(&s390x.Links390x), "s390x": obj.Linknew(&s390x.Links390x),
"arm64": obj.Linknew(&arm64.Linkarm64),
} }
func testConfig(tb testing.TB) *Conf { return testConfigArch(tb, "amd64") } func testConfig(tb testing.TB) *Conf { return testConfigArch(tb, "amd64") }
......
...@@ -475,6 +475,52 @@ ...@@ -475,6 +475,52 @@
(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem) (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
(InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem) (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
// Lowering conditional moves
// If the condition is a SETxx, we can just run a CMOV from the comparison that was
// setting the flags.
// Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL
(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t))
-> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t)
-> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t)
-> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
// If the condition does not set the flags, we need to generate a comparison.
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1
-> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2
-> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4
-> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
-> (CMOVQNE y x (CMPQconst [0] check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
-> (CMOVLNE y x (CMPQconst [0] check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
-> (CMOVWNE y x (CMPQconst [0] check))
// Absorb InvertFlags
(CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
(CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
(CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
// Absorb constants generated during lower
(CMOVQ(EQ|LE|GE|CC|LS) _ x (FlagEQ)) -> x
(CMOVQ(NE|LT|GT|CS|HI) y _ (FlagEQ)) -> y
(CMOVQ(NE|GT|GE|HI|CC) _ x (FlagGT_UGT)) -> x
(CMOVQ(EQ|LE|LT|LS|CS) y _ (FlagGT_UGT)) -> y
(CMOVQ(NE|GT|GE|LS|CS) _ x (FlagGT_ULT)) -> x
(CMOVQ(EQ|LE|LT|HI|CC) y _ (FlagGT_ULT)) -> y
(CMOVQ(NE|LT|LE|CS|LS) _ x (FlagLT_ULT)) -> x
(CMOVQ(EQ|GT|GE|HI|CC) y _ (FlagLT_ULT)) -> y
(CMOVQ(NE|LT|LE|HI|CC) _ x (FlagLT_UGT)) -> x
(CMOVQ(EQ|GT|GE|CS|LS) y _ (FlagLT_ULT)) -> y
// Miscellaneous // Miscellaneous
(Convert <t> x mem) && config.PtrSize == 8 -> (MOVQconvert <t> x mem) (Convert <t> x mem) && config.PtrSize == 8 -> (MOVQconvert <t> x mem)
(Convert <t> x mem) && config.PtrSize == 4 -> (MOVLconvert <t> x mem) (Convert <t> x mem) && config.PtrSize == 4 -> (MOVLconvert <t> x mem)
...@@ -1353,6 +1399,10 @@ ...@@ -1353,6 +1399,10 @@
(CMPLconst x [0]) -> (TESTL x x) (CMPLconst x [0]) -> (TESTL x x)
(CMPWconst x [0]) -> (TESTW x x) (CMPWconst x [0]) -> (TESTW x x)
(CMPBconst x [0]) -> (TESTB x x) (CMPBconst x [0]) -> (TESTB x x)
(TESTQconst [-1] x) -> (TESTQ x x)
(TESTLconst [-1] x) -> (TESTL x x)
(TESTWconst [-1] x) -> (TESTW x x)
(TESTBconst [-1] x) -> (TESTB x x)
// Combining byte loads into larger (unaligned) loads. // Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is // There are many ways these combinations could occur. This is
......
...@@ -132,6 +132,7 @@ func init() { ...@@ -132,6 +132,7 @@ func init() {
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly} gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly} gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly} gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}} gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}} gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
...@@ -340,10 +341,57 @@ func init() { ...@@ -340,10 +341,57 @@ func init() {
{name: "BSRQ", argLength: 1, reg: gp11flags, asm: "BSRQ", typ: "(UInt64,Flags)"}, // # of high-order zeroes in 64-bit arg {name: "BSRQ", argLength: 1, reg: gp11flags, asm: "BSRQ", typ: "(UInt64,Flags)"}, // # of high-order zeroes in 64-bit arg
{name: "BSRL", argLength: 1, reg: gp11flags, asm: "BSRL", typ: "(UInt32,Flags)"}, // # of high-order zeroes in 32-bit arg {name: "BSRL", argLength: 1, reg: gp11flags, asm: "BSRL", typ: "(UInt32,Flags)"}, // # of high-order zeroes in 32-bit arg
// Note ASM for ops moves whole register // CMOV instructions: 64, 32 and 16-bit sizes.
// // if arg2 encodes a true result, return arg1, else arg0
{name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0 {name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true},
{name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0 {name: "CMOVQNE", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQLT", argLength: 3, reg: gp21, asm: "CMOVQLT", resultInArg0: true},
{name: "CMOVQGT", argLength: 3, reg: gp21, asm: "CMOVQGT", resultInArg0: true},
{name: "CMOVQLE", argLength: 3, reg: gp21, asm: "CMOVQLE", resultInArg0: true},
{name: "CMOVQGE", argLength: 3, reg: gp21, asm: "CMOVQGE", resultInArg0: true},
{name: "CMOVQLS", argLength: 3, reg: gp21, asm: "CMOVQLS", resultInArg0: true},
{name: "CMOVQHI", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
{name: "CMOVQCC", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
{name: "CMOVQCS", argLength: 3, reg: gp21, asm: "CMOVQCS", resultInArg0: true},
{name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true},
{name: "CMOVLNE", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLLT", argLength: 3, reg: gp21, asm: "CMOVLLT", resultInArg0: true},
{name: "CMOVLGT", argLength: 3, reg: gp21, asm: "CMOVLGT", resultInArg0: true},
{name: "CMOVLLE", argLength: 3, reg: gp21, asm: "CMOVLLE", resultInArg0: true},
{name: "CMOVLGE", argLength: 3, reg: gp21, asm: "CMOVLGE", resultInArg0: true},
{name: "CMOVLLS", argLength: 3, reg: gp21, asm: "CMOVLLS", resultInArg0: true},
{name: "CMOVLHI", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
{name: "CMOVLCC", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
{name: "CMOVLCS", argLength: 3, reg: gp21, asm: "CMOVLCS", resultInArg0: true},
{name: "CMOVWEQ", argLength: 3, reg: gp21, asm: "CMOVWEQ", resultInArg0: true},
{name: "CMOVWNE", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWLT", argLength: 3, reg: gp21, asm: "CMOVWLT", resultInArg0: true},
{name: "CMOVWGT", argLength: 3, reg: gp21, asm: "CMOVWGT", resultInArg0: true},
{name: "CMOVWLE", argLength: 3, reg: gp21, asm: "CMOVWLE", resultInArg0: true},
{name: "CMOVWGE", argLength: 3, reg: gp21, asm: "CMOVWGE", resultInArg0: true},
{name: "CMOVWLS", argLength: 3, reg: gp21, asm: "CMOVWLS", resultInArg0: true},
{name: "CMOVWHI", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
{name: "CMOVWCC", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
{name: "CMOVWCS", argLength: 3, reg: gp21, asm: "CMOVWCS", resultInArg0: true},
// CMOV with floating point instructions. We need separate pseudo-op to handle
// InvertFlags correctly, and to generate special code that handles NaN (unordered flag).
// NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See
// code generation in amd64/ssa.go.
{name: "CMOVQEQF", argLength: 3, reg: gp21pax, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
{name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
{name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
{name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
{name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
{name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes {name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
...@@ -578,7 +626,6 @@ func init() { ...@@ -578,7 +626,6 @@ func init() {
{name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true}, {name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
//arg0=ptr,arg1=mem, returns void. Faults if ptr is nil. //arg0=ptr,arg1=mem, returns void. Faults if ptr is nil.
{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true}, {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It saves all GP registers if necessary, but may clobber others. // It saves all GP registers if necessary, but may clobber others.
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), ax}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"}, {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), ax}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
......
This diff is collapsed.
// asmcheck
package codegen
func cmovint(c int) int {
x := c + 4
if x < 0 {
x = 182
}
// amd64:"CMOVQLT"
// arm64:"CSEL\tLT"
return x
}
func cmovchan(x, y chan int) chan int {
if x != y {
x = y
}
// amd64:"CMOVQNE"
// arm64:"CSEL\tNE"
return x
}
func cmovuintptr(x, y uintptr) uintptr {
if x < y {
x = -y
}
// amd64:"CMOVQCS"
// arm64:"CSEL\tLO"
return x
}
func cmov32bit(x, y uint32) uint32 {
if x < y {
x = -y
}
// amd64:"CMOVLCS"
// arm64:"CSEL\tLO"
return x
}
func cmov16bit(x, y uint16) uint16 {
if x < y {
x = -y
}
// amd64:"CMOVWCS"
// arm64:"CSEL\tLO"
return x
}
// Floating point comparison. For EQ/NE, we must
// generate special code to handle NaNs.
func cmovfloateq(x, y float64) int {
a := 128
if x == y {
a = 256
}
// amd64:"CMOVQNE","CMOVQPC"
// arm64:"CSEL\tEQ"
return a
}
func cmovfloatne(x, y float64) int {
a := 128
if x != y {
a = 256
}
// amd64:"CMOVQNE","CMOVQPS"
// arm64:"CSEL\tNE"
return a
}
//go:noinline
func frexp(f float64) (frac float64, exp int) {
return 1.0, 4
}
//go:noinline
func ldexp(frac float64, exp int) float64 {
return 1.0
}
// Generate a CMOV with a floating comparison and integer move.
func cmovfloatint2(x, y float64) float64 {
yfr, yexp := 4.0, 5
r := x
for r >= y {
rfr, rexp := frexp(r)
if rfr < yfr {
rexp = rexp - 1
}
// amd64:"CMOVQHI"
// arm64:"CSEL\tGT"
r = r - ldexp(y, (rexp-yexp))
}
return r
}
func cmovloaded(x [4]int, y int) int {
if x[2] != 0 {
y = x[2]
} else {
y = y >> 2
}
// amd64:"CMOVQNE"
// arm64:"CSEL\tNE"
return y
}
func cmovuintptr2(x, y uintptr) uintptr {
a := x * 2
if a == 0 {
a = 256
}
// amd64:"CMOVQEQ"
// arm64:"CSEL\tEQ"
return a
}
// Floating point CMOVs are not supported by amd64/arm64
func cmovfloatmove(x, y int) float64 {
a := 1.0
if x <= y {
a = 2.0
}
// amd64:-"CMOV"
// arm64:-"CSEL"
return a
}
// On amd64, the following patterns trigger comparison inversion.
// Test that we correctly invert the CMOV condition
var gsink int64
var gusink uint64
func cmovinvert1(x, y int64) int64 {
if x < gsink {
y = -y
}
// amd64:"CMOVQGT"
return y
}
func cmovinvert2(x, y int64) int64 {
if x <= gsink {
y = -y
}
// amd64:"CMOVQGE"
return y
}
func cmovinvert3(x, y int64) int64 {
if x == gsink {
y = -y
}
// amd64:"CMOVQEQ"
return y
}
func cmovinvert4(x, y int64) int64 {
if x != gsink {
y = -y
}
// amd64:"CMOVQNE"
return y
}
func cmovinvert5(x, y uint64) uint64 {
if x > gusink {
y = -y
}
// amd64:"CMOVQCS"
return y
}
func cmovinvert6(x, y uint64) uint64 {
if x >= gusink {
y = -y
}
// amd64:"CMOVQLS"
return y
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment