Commit 080187f4 authored by Giovanni Bajo's avatar Giovanni Bajo

cmd/compile: implement CMOV on amd64

This builds upon the branchelim pass, activating it for amd64 and
lowering CondSelect. Special care is made to FPU instructions for
NaN handling.

Benchmark results on Xeon E5630 (Westmere EP):

name                      old time/op    new time/op    delta
BinaryTree17-16              4.99s ± 9%     4.66s ± 2%     ~     (p=0.095 n=5+5)
Fannkuch11-16                4.93s ± 3%     5.04s ± 2%     ~     (p=0.548 n=5+5)
FmtFprintfEmpty-16          58.8ns ± 7%    61.4ns ±14%     ~     (p=0.579 n=5+5)
FmtFprintfString-16          114ns ± 2%     114ns ± 4%     ~     (p=0.603 n=5+5)
FmtFprintfInt-16             181ns ± 4%     125ns ± 3%  -30.90%  (p=0.008 n=5+5)
FmtFprintfIntInt-16          263ns ± 2%     217ns ± 2%  -17.34%  (p=0.008 n=5+5)
FmtFprintfPrefixedInt-16     230ns ± 1%     212ns ± 1%   -7.99%  (p=0.008 n=5+5)
FmtFprintfFloat-16           411ns ± 3%     344ns ± 5%  -16.43%  (p=0.008 n=5+5)
FmtManyArgs-16               828ns ± 4%     790ns ± 2%   -4.59%  (p=0.032 n=5+5)
GobDecode-16                10.9ms ± 4%    10.8ms ± 5%     ~     (p=0.548 n=5+5)
GobEncode-16                9.52ms ± 5%    9.46ms ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                      334ms ± 2%     337ms ± 2%     ~     (p=0.548 n=5+5)
Gunzip-16                   64.4ms ± 1%    65.0ms ± 1%   +1.00%  (p=0.008 n=5+5)
HTTPClientServer-16          156µs ± 3%     155µs ± 3%     ~     (p=0.690 n=5+5)
JSONEncode-16               21.0ms ± 1%    21.8ms ± 0%   +3.76%  (p=0.016 n=5+4)
JSONDecode-16               95.1ms ± 0%    95.7ms ± 1%     ~     (p=0.151 n=5+5)
Mandelbrot200-16            6.38ms ± 1%    6.42ms ± 1%     ~     (p=0.095 n=5+5)
GoParse-16                  5.47ms ± 2%    5.36ms ± 1%   -1.95%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16       111ns ± 1%     111ns ± 1%     ~     (p=0.635 n=5+4)
RegexpMatchEasy0_1K-16       408ns ± 1%     411ns ± 2%     ~     (p=0.087 n=5+5)
RegexpMatchEasy1_32-16       103ns ± 1%     104ns ± 1%     ~     (p=0.484 n=5+5)
RegexpMatchEasy1_1K-16       659ns ± 2%     652ns ± 1%     ~     (p=0.571 n=5+5)
RegexpMatchMedium_32-16      176ns ± 2%     174ns ± 1%     ~     (p=0.476 n=5+5)
RegexpMatchMedium_1K-16     58.6µs ± 4%    57.7µs ± 4%     ~     (p=0.548 n=5+5)
RegexpMatchHard_32-16       3.07µs ± 3%    3.04µs ± 4%     ~     (p=0.421 n=5+5)
RegexpMatchHard_1K-16       89.2µs ± 1%    87.9µs ± 2%   -1.52%  (p=0.032 n=5+5)
Revcomp-16                   575ms ± 0%     587ms ± 2%   +2.12%  (p=0.032 n=4+5)
Template-16                  110ms ± 1%     107ms ± 3%   -3.00%  (p=0.032 n=5+5)
TimeParse-16                 463ns ± 0%     462ns ± 0%     ~     (p=0.810 n=5+4)
TimeFormat-16                538ns ± 0%     535ns ± 0%   -0.63%  (p=0.024 n=5+5)

name                      old speed      new speed      delta
GobDecode-16              70.7MB/s ± 4%  71.4MB/s ± 5%     ~     (p=0.452 n=5+5)
GobEncode-16              80.7MB/s ± 5%  81.2MB/s ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                   58.2MB/s ± 2%  57.7MB/s ± 2%     ~     (p=0.452 n=5+5)
Gunzip-16                  302MB/s ± 1%   299MB/s ± 1%   -0.99%  (p=0.008 n=5+5)
JSONEncode-16             92.4MB/s ± 1%  89.1MB/s ± 0%   -3.63%  (p=0.016 n=5+4)
JSONDecode-16             20.4MB/s ± 0%  20.3MB/s ± 1%     ~     (p=0.135 n=5+5)
GoParse-16                10.6MB/s ± 2%  10.8MB/s ± 1%   +2.00%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16     286MB/s ± 1%   285MB/s ± 3%     ~     (p=1.000 n=5+5)
RegexpMatchEasy0_1K-16    2.51GB/s ± 1%  2.49GB/s ± 2%     ~     (p=0.095 n=5+5)
RegexpMatchEasy1_32-16     309MB/s ± 1%   307MB/s ± 1%     ~     (p=0.548 n=5+5)
RegexpMatchEasy1_1K-16    1.55GB/s ± 2%  1.57GB/s ± 1%     ~     (p=0.690 n=5+5)
RegexpMatchMedium_32-16   5.68MB/s ± 2%  5.73MB/s ± 1%     ~     (p=0.579 n=5+5)
RegexpMatchMedium_1K-16   17.5MB/s ± 4%  17.8MB/s ± 4%     ~     (p=0.500 n=5+5)
RegexpMatchHard_32-16     10.4MB/s ± 3%  10.5MB/s ± 4%     ~     (p=0.460 n=5+5)
RegexpMatchHard_1K-16     11.5MB/s ± 1%  11.7MB/s ± 2%   +1.57%  (p=0.032 n=5+5)
Revcomp-16                 442MB/s ± 0%   433MB/s ± 2%   -2.05%  (p=0.032 n=4+5)
Template-16               17.7MB/s ± 1%  18.2MB/s ± 3%   +3.12%  (p=0.032 n=5+5)

Change-Id: Ic7cb7374d07da031e771bdcbfdd832fd1b17159c
Reviewed-on: https://go-review.googlesource.com/98695Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
parent fdf5aaf5
......@@ -398,7 +398,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
......@@ -409,6 +420,71 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
// Flag condition: ^ZERO || PARITY
// Generate:
// CMOV*NE SRC,DST
// CMOV*PS SRC,DST
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = r
var q *obj.Prog
if v.Op == ssa.OpAMD64CMOVQNEF {
q = s.Prog(x86.ACMOVQPS)
} else if v.Op == ssa.OpAMD64CMOVLNEF {
q = s.Prog(x86.ACMOVLPS)
} else {
q = s.Prog(x86.ACMOVWPS)
}
q.From.Type = obj.TYPE_REG
q.From.Reg = v.Args[1].Reg()
q.To.Type = obj.TYPE_REG
q.To.Reg = r
case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
// Flag condition: ZERO && !PARITY
// Generate:
// MOV SRC,AX
// CMOV*NE DST,AX
// CMOV*PC AX,DST
//
// TODO(rasky): we could generate:
// CMOV*NE DST,SRC
// CMOV*PC SRC,DST
// But this requires a way for regalloc to know that SRC might be
// clobbered by this instruction.
if v.Args[1].Reg() != x86.REG_AX {
opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
}
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r
p.To.Type = obj.TYPE_REG
p.To.Reg = x86.REG_AX
var q *obj.Prog
if v.Op == ssa.OpAMD64CMOVQEQF {
q = s.Prog(x86.ACMOVQPC)
} else if v.Op == ssa.OpAMD64CMOVLEQF {
q = s.Prog(x86.ACMOVLPC)
} else {
q = s.Prog(x86.ACMOVWPC)
}
q.From.Type = obj.TYPE_REG
q.From.Reg = x86.REG_AX
q.To.Type = obj.TYPE_REG
q.To.Reg = r
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
r := v.Reg()
if r != v.Args[0].Reg() {
......
......@@ -19,7 +19,10 @@ package ssa
// rewrite Phis in the postdominator as CondSelects.
func branchelim(f *Func) {
// FIXME: add support for lowering CondSelects on more architectures
if f.Config.arch != "arm64" {
switch f.Config.arch {
case "arm64", "amd64":
// implemented
default:
return
}
......@@ -32,10 +35,22 @@ func branchelim(f *Func) {
}
}
func canCondSelect(v *Value) bool {
func canCondSelect(v *Value, arch string) bool {
// For now, stick to simple scalars that fit in registers
sz := v.Type.Size()
return sz <= v.Block.Func.Config.RegSize && (v.Type.IsInteger() || v.Type.IsPtrShaped())
switch {
case v.Type.Size() > v.Block.Func.Config.RegSize:
return false
case v.Type.IsPtrShaped():
return true
case v.Type.IsInteger():
if arch == "amd64" && v.Type.Size() < 2 {
// amd64 doesn't support CMOV with byte registers
return false
}
return true
default:
return false
}
}
func elimIf(f *Func, dom *Block) bool {
......@@ -68,7 +83,7 @@ func elimIf(f *Func, dom *Block) bool {
for _, v := range post.Values {
if v.Op == OpPhi {
hasphis = true
if !canCondSelect(v) {
if !canCondSelect(v, f.Config.arch) {
return false
}
}
......@@ -169,7 +184,7 @@ func elimIfElse(f *Func, b *Block) bool {
for _, v := range post.Values {
if v.Op == OpPhi {
hasphis = true
if !canCondSelect(v) {
if !canCondSelect(v, f.Config.arch) {
return false
}
}
......
......@@ -11,128 +11,162 @@ import (
// Test that a trivial 'if' is eliminated
func TestBranchElimIf(t *testing.T) {
c := testConfig(t)
c.config.arch = "arm64" // FIXME
boolType := types.New(types.TBOOL)
intType := types.New(types.TINT32)
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const1", OpConst32, intType, 1, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b3")),
Bloc("b3",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
var testData = []struct {
arch string
intType string
ok bool
}{
{"arm64", "int32", true},
{"amd64", "int32", true},
{"amd64", "int8", false},
}
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
Deadcode(fun.f)
CheckFunc(fun.f)
for _, data := range testData {
t.Run(data.arch+"/"+data.intType, func(t *testing.T) {
c := testConfigArch(t, data.arch)
boolType := c.config.Types.Bool
var intType *types.Type
switch data.intType {
case "int32":
intType = c.config.Types.Int32
case "int8":
intType = c.config.Types.Int8
default:
t.Fatal("invalid integer type:", data.intType)
}
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const1", OpConst32, intType, 1, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b3")),
Bloc("b3",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
if len(fun.f.Blocks) != 1 {
t.Errorf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpCondSelect {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
}
if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
Deadcode(fun.f)
CheckFunc(fun.f)
if data.ok {
if len(fun.f.Blocks) != 1 {
t.Fatalf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpCondSelect {
t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
}
if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
}
} else {
if len(fun.f.Blocks) != 3 {
t.Fatalf("expected 3 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
}
}
})
}
}
// Test that a trivial if/else is eliminated
func TestBranchElimIfElse(t *testing.T) {
c := testConfig(t)
c.config.arch = "arm64" // FIXME
boolType := types.New(types.TBOOL)
intType := types.New(types.TINT32)
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const1", OpConst32, intType, 1, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b4")),
Bloc("b3",
Goto("b4")),
Bloc("b4",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
for _, arch := range []string{"arm64", "amd64"} {
t.Run(arch, func(t *testing.T) {
c := testConfigArch(t, arch)
boolType := c.config.Types.Bool
intType := c.config.Types.Int32
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const1", OpConst32, intType, 1, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b4")),
Bloc("b3",
Goto("b4")),
Bloc("b4",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
Deadcode(fun.f)
CheckFunc(fun.f)
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
Deadcode(fun.f)
CheckFunc(fun.f)
if len(fun.f.Blocks) != 1 {
t.Errorf("expected 1 block after branchelim; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpCondSelect {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
}
if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
if len(fun.f.Blocks) != 1 {
t.Fatalf("expected 1 block after branchelim; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpCondSelect {
t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
}
if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
}
})
}
}
// Test that an if/else CFG that loops back
// into itself does *not* get eliminated.
func TestNoBranchElimLoop(t *testing.T) {
c := testConfig(t)
c.config.arch = "arm64" // FIXME
boolType := types.New(types.TBOOL)
intType := types.New(types.TINT32)
for _, arch := range []string{"arm64", "amd64"} {
t.Run(arch, func(t *testing.T) {
c := testConfigArch(t, arch)
boolType := c.config.Types.Bool
intType := c.config.Types.Int32
// The control flow here is totally bogus,
// but a dead cycle seems like the only plausible
// way to arrive at a diamond CFG that is also a loop.
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("const3", OpConst32, intType, 3, nil),
Goto("b5")),
Bloc("b2",
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"),
If("cond", "b3", "b4")),
Bloc("b3",
Goto("b2")),
Bloc("b4",
Goto("b2")),
Bloc("b5",
Exit("start")))
// The control flow here is totally bogus,
// but a dead cycle seems like the only plausible
// way to arrive at a diamond CFG that is also a loop.
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("const3", OpConst32, intType, 3, nil),
Goto("b5")),
Bloc("b2",
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"),
If("cond", "b3", "b4")),
Bloc("b3",
Goto("b2")),
Bloc("b4",
Goto("b2")),
Bloc("b5",
Exit("start")))
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
if len(fun.f.Blocks) != 5 {
t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpPhi {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
if len(fun.f.Blocks) != 5 {
t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpPhi {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
})
}
}
......@@ -7,6 +7,7 @@ package ssa
import (
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/obj/arm64"
"cmd/internal/obj/s390x"
"cmd/internal/obj/x86"
"cmd/internal/src"
......@@ -22,6 +23,7 @@ var Copyelim = copyelim
var testCtxts = map[string]*obj.Link{
"amd64": obj.Linknew(&x86.Linkamd64),
"s390x": obj.Linknew(&s390x.Links390x),
"arm64": obj.Linknew(&arm64.Linkarm64),
}
func testConfig(tb testing.TB) *Conf { return testConfigArch(tb, "amd64") }
......
......@@ -475,6 +475,52 @@
(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
(InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
// Lowering conditional moves
// If the condition is a SETxx, we can just run a CMOV from the comparison that was
// setting the flags.
// Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL
(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t))
-> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t)
-> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t)
-> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
// If the condition does not set the flags, we need to generate a comparison.
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1
-> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2
-> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4
-> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
-> (CMOVQNE y x (CMPQconst [0] check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
-> (CMOVLNE y x (CMPQconst [0] check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
-> (CMOVWNE y x (CMPQconst [0] check))
// Absorb InvertFlags
(CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
(CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
(CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
// Absorb constants generated during lower
(CMOVQ(EQ|LE|GE|CC|LS) _ x (FlagEQ)) -> x
(CMOVQ(NE|LT|GT|CS|HI) y _ (FlagEQ)) -> y
(CMOVQ(NE|GT|GE|HI|CC) _ x (FlagGT_UGT)) -> x
(CMOVQ(EQ|LE|LT|LS|CS) y _ (FlagGT_UGT)) -> y
(CMOVQ(NE|GT|GE|LS|CS) _ x (FlagGT_ULT)) -> x
(CMOVQ(EQ|LE|LT|HI|CC) y _ (FlagGT_ULT)) -> y
(CMOVQ(NE|LT|LE|CS|LS) _ x (FlagLT_ULT)) -> x
(CMOVQ(EQ|GT|GE|HI|CC) y _ (FlagLT_ULT)) -> y
(CMOVQ(NE|LT|LE|HI|CC) _ x (FlagLT_UGT)) -> x
(CMOVQ(EQ|GT|GE|CS|LS) y _ (FlagLT_ULT)) -> y
// Miscellaneous
(Convert <t> x mem) && config.PtrSize == 8 -> (MOVQconvert <t> x mem)
(Convert <t> x mem) && config.PtrSize == 4 -> (MOVLconvert <t> x mem)
......@@ -1353,6 +1399,10 @@
(CMPLconst x [0]) -> (TESTL x x)
(CMPWconst x [0]) -> (TESTW x x)
(CMPBconst x [0]) -> (TESTB x x)
(TESTQconst [-1] x) -> (TESTQ x x)
(TESTLconst [-1] x) -> (TESTL x x)
(TESTWconst [-1] x) -> (TESTW x x)
(TESTBconst [-1] x) -> (TESTB x x)
// Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is
......
......@@ -132,6 +132,7 @@ func init() {
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
......@@ -340,10 +341,57 @@ func init() {
{name: "BSRQ", argLength: 1, reg: gp11flags, asm: "BSRQ", typ: "(UInt64,Flags)"}, // # of high-order zeroes in 64-bit arg
{name: "BSRL", argLength: 1, reg: gp11flags, asm: "BSRL", typ: "(UInt32,Flags)"}, // # of high-order zeroes in 32-bit arg
// Note ASM for ops moves whole register
//
{name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
{name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
// CMOV instructions: 64, 32 and 16-bit sizes.
// if arg2 encodes a true result, return arg1, else arg0
{name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true},
{name: "CMOVQNE", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQLT", argLength: 3, reg: gp21, asm: "CMOVQLT", resultInArg0: true},
{name: "CMOVQGT", argLength: 3, reg: gp21, asm: "CMOVQGT", resultInArg0: true},
{name: "CMOVQLE", argLength: 3, reg: gp21, asm: "CMOVQLE", resultInArg0: true},
{name: "CMOVQGE", argLength: 3, reg: gp21, asm: "CMOVQGE", resultInArg0: true},
{name: "CMOVQLS", argLength: 3, reg: gp21, asm: "CMOVQLS", resultInArg0: true},
{name: "CMOVQHI", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
{name: "CMOVQCC", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
{name: "CMOVQCS", argLength: 3, reg: gp21, asm: "CMOVQCS", resultInArg0: true},
{name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true},
{name: "CMOVLNE", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLLT", argLength: 3, reg: gp21, asm: "CMOVLLT", resultInArg0: true},
{name: "CMOVLGT", argLength: 3, reg: gp21, asm: "CMOVLGT", resultInArg0: true},
{name: "CMOVLLE", argLength: 3, reg: gp21, asm: "CMOVLLE", resultInArg0: true},
{name: "CMOVLGE", argLength: 3, reg: gp21, asm: "CMOVLGE", resultInArg0: true},
{name: "CMOVLLS", argLength: 3, reg: gp21, asm: "CMOVLLS", resultInArg0: true},
{name: "CMOVLHI", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
{name: "CMOVLCC", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
{name: "CMOVLCS", argLength: 3, reg: gp21, asm: "CMOVLCS", resultInArg0: true},
{name: "CMOVWEQ", argLength: 3, reg: gp21, asm: "CMOVWEQ", resultInArg0: true},
{name: "CMOVWNE", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWLT", argLength: 3, reg: gp21, asm: "CMOVWLT", resultInArg0: true},
{name: "CMOVWGT", argLength: 3, reg: gp21, asm: "CMOVWGT", resultInArg0: true},
{name: "CMOVWLE", argLength: 3, reg: gp21, asm: "CMOVWLE", resultInArg0: true},
{name: "CMOVWGE", argLength: 3, reg: gp21, asm: "CMOVWGE", resultInArg0: true},
{name: "CMOVWLS", argLength: 3, reg: gp21, asm: "CMOVWLS", resultInArg0: true},
{name: "CMOVWHI", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
{name: "CMOVWCC", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
{name: "CMOVWCS", argLength: 3, reg: gp21, asm: "CMOVWCS", resultInArg0: true},
// CMOV with floating point instructions. We need separate pseudo-op to handle
// InvertFlags correctly, and to generate special code that handles NaN (unordered flag).
// NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See
// code generation in amd64/ssa.go.
{name: "CMOVQEQF", argLength: 3, reg: gp21pax, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
{name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
{name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
{name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
{name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
{name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
......@@ -578,7 +626,6 @@ func init() {
{name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
//arg0=ptr,arg1=mem, returns void. Faults if ptr is nil.
{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It saves all GP registers if necessary, but may clobber others.
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), ax}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
......
This diff is collapsed.
// asmcheck
package codegen
func cmovint(c int) int {
x := c + 4
if x < 0 {
x = 182
}
// amd64:"CMOVQLT"
// arm64:"CSEL\tLT"
return x
}
func cmovchan(x, y chan int) chan int {
if x != y {
x = y
}
// amd64:"CMOVQNE"
// arm64:"CSEL\tNE"
return x
}
func cmovuintptr(x, y uintptr) uintptr {
if x < y {
x = -y
}
// amd64:"CMOVQCS"
// arm64:"CSEL\tLO"
return x
}
func cmov32bit(x, y uint32) uint32 {
if x < y {
x = -y
}
// amd64:"CMOVLCS"
// arm64:"CSEL\tLO"
return x
}
func cmov16bit(x, y uint16) uint16 {
if x < y {
x = -y
}
// amd64:"CMOVWCS"
// arm64:"CSEL\tLO"
return x
}
// Floating point comparison. For EQ/NE, we must
// generate special code to handle NaNs.
func cmovfloateq(x, y float64) int {
a := 128
if x == y {
a = 256
}
// amd64:"CMOVQNE","CMOVQPC"
// arm64:"CSEL\tEQ"
return a
}
func cmovfloatne(x, y float64) int {
a := 128
if x != y {
a = 256
}
// amd64:"CMOVQNE","CMOVQPS"
// arm64:"CSEL\tNE"
return a
}
//go:noinline
func frexp(f float64) (frac float64, exp int) {
return 1.0, 4
}
//go:noinline
func ldexp(frac float64, exp int) float64 {
return 1.0
}
// Generate a CMOV with a floating comparison and integer move.
func cmovfloatint2(x, y float64) float64 {
yfr, yexp := 4.0, 5
r := x
for r >= y {
rfr, rexp := frexp(r)
if rfr < yfr {
rexp = rexp - 1
}
// amd64:"CMOVQHI"
// arm64:"CSEL\tGT"
r = r - ldexp(y, (rexp-yexp))
}
return r
}
func cmovloaded(x [4]int, y int) int {
if x[2] != 0 {
y = x[2]
} else {
y = y >> 2
}
// amd64:"CMOVQNE"
// arm64:"CSEL\tNE"
return y
}
func cmovuintptr2(x, y uintptr) uintptr {
a := x * 2
if a == 0 {
a = 256
}
// amd64:"CMOVQEQ"
// arm64:"CSEL\tEQ"
return a
}
// Floating point CMOVs are not supported by amd64/arm64
func cmovfloatmove(x, y int) float64 {
a := 1.0
if x <= y {
a = 2.0
}
// amd64:-"CMOV"
// arm64:-"CSEL"
return a
}
// On amd64, the following patterns trigger comparison inversion.
// Test that we correctly invert the CMOV condition
var gsink int64
var gusink uint64
func cmovinvert1(x, y int64) int64 {
if x < gsink {
y = -y
}
// amd64:"CMOVQGT"
return y
}
func cmovinvert2(x, y int64) int64 {
if x <= gsink {
y = -y
}
// amd64:"CMOVQGE"
return y
}
func cmovinvert3(x, y int64) int64 {
if x == gsink {
y = -y
}
// amd64:"CMOVQEQ"
return y
}
func cmovinvert4(x, y int64) int64 {
if x != gsink {
y = -y
}
// amd64:"CMOVQNE"
return y
}
func cmovinvert5(x, y uint64) uint64 {
if x > gusink {
y = -y
}
// amd64:"CMOVQCS"
return y
}
func cmovinvert6(x, y uint64) uint64 {
if x >= gusink {
y = -y
}
// amd64:"CMOVQLS"
return y
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment