Commit 2756d56c authored by Cherry Zhang's avatar Cherry Zhang

cmd/compile: intrinsify math/big.mulWW, divWW on AMD64

Change-Id: I59f7afa7a5803d19f8b21fe70fc85ef997bb3a85
Reviewed-on: https://go-review.googlesource.com/30542
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent 7c431cb7
...@@ -303,6 +303,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -303,6 +303,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
m.To.Reg = x86.REG_DX m.To.Reg = x86.REG_DX
} }
case ssa.OpAMD64MULQU2:
// Arg[0] is already in AX as it's the only register we allow
// results hi in DX, lo in AX
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
case ssa.OpAMD64DIVQU2:
// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
// results q in AX, r in DX
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[2].Reg()
case ssa.OpAMD64AVGQU: case ssa.OpAMD64AVGQU:
// compute (x+y)/2 unsigned. // compute (x+y)/2 unsigned.
// Do a 64-bit add, the overflow goes into the carry. // Do a 64-bit add, the overflow goes into the carry.
......
...@@ -545,6 +545,22 @@ func (s *state) stmt(n *Node) { ...@@ -545,6 +545,22 @@ func (s *state) stmt(n *Node) {
s.assign(n.List.Second(), resok, false, false, n.Lineno, 0, false) s.assign(n.List.Second(), resok, false, false, n.Lineno, 0, false)
return return
case OAS2FUNC:
// We come here only when it is an intrinsic call returning two values.
if !isIntrinsicCall(n.Rlist.First()) {
s.Fatalf("non-intrinsic AS2FUNC not expanded %v", n.Rlist.First())
}
v := s.intrinsicCall(n.Rlist.First())
v1 := s.newValue1(ssa.OpSelect0, n.List.First().Type, v)
v2 := s.newValue1(ssa.OpSelect1, n.List.Second().Type, v)
// Make a fake node to mimic loading return value, ONLY for write barrier test.
// This is future-proofing against non-scalar 2-result intrinsics.
// Currently we only have scalar ones, which result in no write barrier.
fakeret := &Node{Op: OINDREG, Reg: int16(Thearch.REGSP)}
s.assign(n.List.First(), v1, needwritebarrier(n.List.First(), fakeret), false, n.Lineno, 0, false)
s.assign(n.List.Second(), v2, needwritebarrier(n.List.Second(), fakeret), false, n.Lineno, 0, false)
return
case ODCL: case ODCL:
if n.Left.Class == PAUTOHEAP { if n.Left.Class == PAUTOHEAP {
Fatalf("DCL %v", n) Fatalf("DCL %v", n)
...@@ -2483,23 +2499,15 @@ type sizedIntrinsicKey struct { ...@@ -2483,23 +2499,15 @@ type sizedIntrinsicKey struct {
} }
// disableForInstrumenting returns nil when instrumenting, fn otherwise // disableForInstrumenting returns nil when instrumenting, fn otherwise
func disableForInstrumenting(fn func(*state, *Node) *ssa.Value) func(*state, *Node) *ssa.Value { func disableForInstrumenting(fn intrinsicBuilder) intrinsicBuilder {
if instrumenting { if instrumenting {
return nil return nil
} }
return fn return fn
} }
// enableForRuntime returns fn when compiling runtime, nil otherwise
func enableForRuntime(fn func(*state, *Node) *ssa.Value) func(*state, *Node) *ssa.Value {
if compiling_runtime {
return fn
}
return nil
}
// enableOnArch returns fn on given archs, nil otherwise // enableOnArch returns fn on given archs, nil otherwise
func enableOnArch(fn func(*state, *Node) *ssa.Value, archs ...sys.ArchFamily) func(*state, *Node) *ssa.Value { func enableOnArch(fn intrinsicBuilder, archs ...sys.ArchFamily) intrinsicBuilder {
if Thearch.LinkArch.InFamily(archs...) { if Thearch.LinkArch.InFamily(archs...) {
return fn return fn
} }
...@@ -2513,9 +2521,7 @@ func intrinsicInit() { ...@@ -2513,9 +2521,7 @@ func intrinsicInit() {
// initial set of intrinsics. // initial set of intrinsics.
i.std = map[intrinsicKey]intrinsicBuilder{ i.std = map[intrinsicKey]intrinsicBuilder{
/******** runtime ********/ /******** runtime ********/
intrinsicKey{"", "slicebytetostringtmp"}: enableForRuntime(disableForInstrumenting(func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime", "slicebytetostringtmp"}: disableForInstrumenting(func(s *state, n *Node) *ssa.Value {
// pkg name left empty because intrinsification only should apply
// inside the runtime package when non instrumented.
// Compiler frontend optimizations emit OARRAYBYTESTRTMP nodes // Compiler frontend optimizations emit OARRAYBYTESTRTMP nodes
// for the backend instead of slicebytetostringtmp calls // for the backend instead of slicebytetostringtmp calls
// when not instrumenting. // when not instrumenting.
...@@ -2523,7 +2529,7 @@ func intrinsicInit() { ...@@ -2523,7 +2529,7 @@ func intrinsicInit() {
ptr := s.newValue1(ssa.OpSlicePtr, ptrto(Types[TUINT8]), slice) ptr := s.newValue1(ssa.OpSlicePtr, ptrto(Types[TUINT8]), slice)
len := s.newValue1(ssa.OpSliceLen, Types[TINT], slice) len := s.newValue1(ssa.OpSliceLen, Types[TINT], slice)
return s.newValue2(ssa.OpStringMake, n.Type, ptr, len) return s.newValue2(ssa.OpStringMake, n.Type, ptr, len)
})), }),
intrinsicKey{"runtime", "KeepAlive"}: func(s *state, n *Node) *ssa.Value { intrinsicKey{"runtime", "KeepAlive"}: func(s *state, n *Node) *ssa.Value {
data := s.newValue1(ssa.OpIData, ptrto(Types[TUINT8]), s.intrinsicFirstArg(n)) data := s.newValue1(ssa.OpIData, ptrto(Types[TUINT8]), s.intrinsicFirstArg(n))
s.vars[&memVar] = s.newValue2(ssa.OpKeepAlive, ssa.TypeMem, data, s.mem()) s.vars[&memVar] = s.newValue2(ssa.OpKeepAlive, ssa.TypeMem, data, s.mem())
...@@ -2717,6 +2723,16 @@ func intrinsicInit() { ...@@ -2717,6 +2723,16 @@ func intrinsicInit() {
i.std[intrinsicKey{"runtime/internal/atomic", "Xadd"}] i.std[intrinsicKey{"runtime/internal/atomic", "Xadd"}]
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "AddUintptr", 8}] = i.ptrSized[sizedIntrinsicKey{"sync/atomic", "AddUintptr", 8}] =
i.std[intrinsicKey{"runtime/internal/atomic", "Xadd64"}] i.std[intrinsicKey{"runtime/internal/atomic", "Xadd64"}]
/******** math/big ********/
i.intSized[sizedIntrinsicKey{"math/big", "mulWW", 8}] =
enableOnArch(func(s *state, n *Node) *ssa.Value {
return s.newValue2(ssa.OpMul64uhilo, ssa.MakeTuple(Types[TUINT64], Types[TUINT64]), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1))
}, sys.AMD64)
i.intSized[sizedIntrinsicKey{"math/big", "divWW", 8}] =
enableOnArch(func(s *state, n *Node) *ssa.Value {
return s.newValue3(ssa.OpDiv128u, ssa.MakeTuple(Types[TUINT64], Types[TUINT64]), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.intrinsicArg(n, 2))
}, sys.AMD64)
} }
// findIntrinsic returns a function which builds the SSA equivalent of the // findIntrinsic returns a function which builds the SSA equivalent of the
...@@ -2732,6 +2748,9 @@ func findIntrinsic(sym *Sym) intrinsicBuilder { ...@@ -2732,6 +2748,9 @@ func findIntrinsic(sym *Sym) intrinsicBuilder {
intrinsicInit() intrinsicInit()
} }
pkg := sym.Pkg.Path pkg := sym.Pkg.Path
if sym.Pkg == localpkg {
pkg = myimportpath
}
fn := sym.Name fn := sym.Name
f := intrinsics.std[intrinsicKey{pkg, fn}] f := intrinsics.std[intrinsicKey{pkg, fn}]
if f != nil { if f != nil {
......
...@@ -813,7 +813,7 @@ opswitch: ...@@ -813,7 +813,7 @@ opswitch:
} }
n = liststmt(ll) n = liststmt(ll)
// a,b,... = fn() // a,b,... = fn()
case OAS2FUNC: case OAS2FUNC:
init.AppendNodes(&n.Ninit) init.AppendNodes(&n.Ninit)
...@@ -821,6 +821,11 @@ opswitch: ...@@ -821,6 +821,11 @@ opswitch:
walkexprlistsafe(n.List.Slice(), init) walkexprlistsafe(n.List.Slice(), init)
r = walkexpr(r, init) r = walkexpr(r, init)
if isIntrinsicCall(r) {
n.Rlist.Set1(r)
break
}
ll := ascompatet(n.Op, n.List, r.Type, 0, init) ll := ascompatet(n.Op, n.List, r.Type, 0, init)
for i, n := range ll { for i, n := range ll {
ll[i] = applywritebarrier(n) ll[i] = applywritebarrier(n)
......
...@@ -49,6 +49,9 @@ ...@@ -49,6 +49,9 @@
(Hmul8 x y) -> (HMULB x y) (Hmul8 x y) -> (HMULB x y)
(Hmul8u x y) -> (HMULBU x y) (Hmul8u x y) -> (HMULBU x y)
(Mul64uhilo x y) -> (MULQU2 x y)
(Div128u xhi xlo y) -> (DIVQU2 xhi xlo y)
(Avg64u x y) -> (AVGQU x y) (Avg64u x y) -> (AVGQU x y)
(Mod64 x y) -> (Select1 (DIVQ x y)) (Mod64 x y) -> (Select1 (DIVQ x y))
......
...@@ -211,6 +211,9 @@ func init() { ...@@ -211,6 +211,9 @@ func init() {
{name: "DIVLU", argLength: 2, reg: gp11div, typ: "(UInt32,UInt32)", asm: "DIVL", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1] {name: "DIVLU", argLength: 2, reg: gp11div, typ: "(UInt32,UInt32)", asm: "DIVL", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
{name: "DIVWU", argLength: 2, reg: gp11div, typ: "(UInt16,UInt16)", asm: "DIVW", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1] {name: "DIVWU", argLength: 2, reg: gp11div, typ: "(UInt16,UInt16)", asm: "DIVW", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
{name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, asm: "MULQ", clobberFlags: true}, // arg0 * arg1, returns (hi, lo)
{name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true}, // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r)
{name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
{name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 & auxint {name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
......
...@@ -58,6 +58,9 @@ var genericOps = []opData{ ...@@ -58,6 +58,9 @@ var genericOps = []opData{
{name: "Hmul64", argLength: 2}, {name: "Hmul64", argLength: 2},
{name: "Hmul64u", argLength: 2}, {name: "Hmul64u", argLength: 2},
{name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
{name: "Mul64uhilo", argLength: 2, typ: "(UInt64,UInt64)"}, // arg0 * arg1, returns (hi, lo)
// Weird special instruction for strength reduction of divides. // Weird special instruction for strength reduction of divides.
{name: "Avg64u", argLength: 2}, // (uint64(arg0) + uint64(arg1)) / 2, correct to all 64 bits. {name: "Avg64u", argLength: 2}, // (uint64(arg0) + uint64(arg1)) / 2, correct to all 64 bits.
...@@ -69,6 +72,7 @@ var genericOps = []opData{ ...@@ -69,6 +72,7 @@ var genericOps = []opData{
{name: "Div32u", argLength: 2}, {name: "Div32u", argLength: 2},
{name: "Div64", argLength: 2}, {name: "Div64", argLength: 2},
{name: "Div64u", argLength: 2}, {name: "Div64u", argLength: 2},
{name: "Div128u", argLength: 3}, // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r)
{name: "Mod8", argLength: 2}, // arg0 % arg1, signed {name: "Mod8", argLength: 2}, // arg0 % arg1, signed
{name: "Mod8u", argLength: 2}, // arg0 % arg1, unsigned {name: "Mod8u", argLength: 2}, // arg0 % arg1, unsigned
...@@ -424,8 +428,6 @@ var genericOps = []opData{ ...@@ -424,8 +428,6 @@ var genericOps = []opData{
{name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry) {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry)
{name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1) {name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1)
{name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
{name: "Signmask", argLength: 1, typ: "Int32"}, // 0 if arg0 >= 0, -1 if arg0 < 0 {name: "Signmask", argLength: 1, typ: "Int32"}, // 0 if arg0 >= 0, -1 if arg0 < 0
{name: "Zeromask", argLength: 1, typ: "UInt32"}, // 0 if arg0 == 0, 0xffffffff if arg0 != 0 {name: "Zeromask", argLength: 1, typ: "UInt32"}, // 0 if arg0 == 0, 0xffffffff if arg0 != 0
......
...@@ -442,6 +442,8 @@ const ( ...@@ -442,6 +442,8 @@ const (
OpAMD64DIVQU OpAMD64DIVQU
OpAMD64DIVLU OpAMD64DIVLU
OpAMD64DIVWU OpAMD64DIVWU
OpAMD64MULQU2
OpAMD64DIVQU2
OpAMD64ANDQ OpAMD64ANDQ
OpAMD64ANDL OpAMD64ANDL
OpAMD64ANDQconst OpAMD64ANDQconst
...@@ -1443,6 +1445,8 @@ const ( ...@@ -1443,6 +1445,8 @@ const (
OpHmul32u OpHmul32u
OpHmul64 OpHmul64
OpHmul64u OpHmul64u
OpMul32uhilo
OpMul64uhilo
OpAvg64u OpAvg64u
OpDiv8 OpDiv8
OpDiv8u OpDiv8u
...@@ -1452,6 +1456,7 @@ const ( ...@@ -1452,6 +1456,7 @@ const (
OpDiv32u OpDiv32u
OpDiv64 OpDiv64
OpDiv64u OpDiv64u
OpDiv128u
OpMod8 OpMod8
OpMod8u OpMod8u
OpMod16 OpMod16
...@@ -1702,7 +1707,6 @@ const ( ...@@ -1702,7 +1707,6 @@ const (
OpAdd32withcarry OpAdd32withcarry
OpSub32carry OpSub32carry
OpSub32withcarry OpSub32withcarry
OpMul32uhilo
OpSignmask OpSignmask
OpZeromask OpZeromask
OpCvt32Uto32F OpCvt32Uto32F
...@@ -4904,6 +4908,39 @@ var opcodeTable = [...]opInfo{ ...@@ -4904,6 +4908,39 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MULQU2",
argLen: 2,
clobberFlags: true,
asm: x86.AMULQ,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // AX
{1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 4}, // DX
{1, 1}, // AX
},
},
},
{
name: "DIVQU2",
argLen: 3,
clobberFlags: true,
asm: x86.ADIVQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4}, // DX
{1, 1}, // AX
{2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 1}, // AX
{1, 4}, // DX
},
},
},
{ {
name: "ANDQ", name: "ANDQ",
argLen: 2, argLen: 2,
...@@ -17959,6 +17996,16 @@ var opcodeTable = [...]opInfo{ ...@@ -17959,6 +17996,16 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "Mul32uhilo",
argLen: 2,
generic: true,
},
{
name: "Mul64uhilo",
argLen: 2,
generic: true,
},
{ {
name: "Avg64u", name: "Avg64u",
argLen: 2, argLen: 2,
...@@ -18004,6 +18051,11 @@ var opcodeTable = [...]opInfo{ ...@@ -18004,6 +18051,11 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "Div128u",
argLen: 3,
generic: true,
},
{ {
name: "Mod8", name: "Mod8",
argLen: 2, argLen: 2,
...@@ -19313,11 +19365,6 @@ var opcodeTable = [...]opInfo{ ...@@ -19313,11 +19365,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "Mul32uhilo",
argLen: 2,
generic: true,
},
{ {
name: "Signmask", name: "Signmask",
argLen: 1, argLen: 1,
......
...@@ -394,6 +394,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool { ...@@ -394,6 +394,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
return rewriteValueAMD64_OpCvt64to64F(v, config) return rewriteValueAMD64_OpCvt64to64F(v, config)
case OpDeferCall: case OpDeferCall:
return rewriteValueAMD64_OpDeferCall(v, config) return rewriteValueAMD64_OpDeferCall(v, config)
case OpDiv128u:
return rewriteValueAMD64_OpDiv128u(v, config)
case OpDiv16: case OpDiv16:
return rewriteValueAMD64_OpDiv16(v, config) return rewriteValueAMD64_OpDiv16(v, config)
case OpDiv16u: case OpDiv16u:
...@@ -612,6 +614,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool { ...@@ -612,6 +614,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
return rewriteValueAMD64_OpMul64(v, config) return rewriteValueAMD64_OpMul64(v, config)
case OpMul64F: case OpMul64F:
return rewriteValueAMD64_OpMul64F(v, config) return rewriteValueAMD64_OpMul64F(v, config)
case OpMul64uhilo:
return rewriteValueAMD64_OpMul64uhilo(v, config)
case OpMul8: case OpMul8:
return rewriteValueAMD64_OpMul8(v, config) return rewriteValueAMD64_OpMul8(v, config)
case OpNeg16: case OpNeg16:
...@@ -14268,6 +14272,23 @@ func rewriteValueAMD64_OpDeferCall(v *Value, config *Config) bool { ...@@ -14268,6 +14272,23 @@ func rewriteValueAMD64_OpDeferCall(v *Value, config *Config) bool {
return true return true
} }
} }
func rewriteValueAMD64_OpDiv128u(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Div128u xhi xlo y)
// cond:
// result: (DIVQU2 xhi xlo y)
for {
xhi := v.Args[0]
xlo := v.Args[1]
y := v.Args[2]
v.reset(OpAMD64DIVQU2)
v.AddArg(xhi)
v.AddArg(xlo)
v.AddArg(y)
return true
}
}
func rewriteValueAMD64_OpDiv16(v *Value, config *Config) bool { func rewriteValueAMD64_OpDiv16(v *Value, config *Config) bool {
b := v.Block b := v.Block
_ = b _ = b
...@@ -16667,6 +16688,21 @@ func rewriteValueAMD64_OpMul64F(v *Value, config *Config) bool { ...@@ -16667,6 +16688,21 @@ func rewriteValueAMD64_OpMul64F(v *Value, config *Config) bool {
return true return true
} }
} }
func rewriteValueAMD64_OpMul64uhilo(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Mul64uhilo x y)
// cond:
// result: (MULQU2 x y)
for {
x := v.Args[0]
y := v.Args[1]
v.reset(OpAMD64MULQU2)
v.AddArg(x)
v.AddArg(y)
return true
}
}
func rewriteValueAMD64_OpMul8(v *Value, config *Config) bool { func rewriteValueAMD64_OpMul8(v *Value, config *Config) bool {
b := v.Block b := v.Block
_ = b _ = b
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment