Commit 2c1b5130 authored by Michael Munday's avatar Michael Munday

cmd/compile: add math/bits.{Add,Sub}64 intrinsics on s390x

This CL adds intrinsics for the 64-bit addition and subtraction
functions in math/bits. These intrinsics use the condition code
to propagate the carry or borrow bit.

To make the carry chains more efficient I've removed the
'clobberFlags' property from most of the load and store
operations. Originally these ops did clobber flags when using
offsets that didn't fit in a signed 20-bit integer, however
that is no longer true.

As with other platforms the intrinsics are faster when executed
in a chain rather than a loop because currently we need to spill
and restore the carry bit between each loop iteration. We may
be able to reduce the need to do this on s390x (e.g. by using
compare-and-branch instructions that do not clobber flags) in the
future.

name           old time/op  new time/op  delta
Add64          1.21ns ± 2%  2.03ns ± 2%  +67.18%  (p=0.000 n=7+10)
Add64multiple  2.98ns ± 3%  1.03ns ± 0%  -65.39%  (p=0.000 n=10+9)
Sub64          1.23ns ± 4%  2.03ns ± 1%  +64.85%  (p=0.000 n=10+10)
Sub64multiple  3.73ns ± 4%  1.04ns ± 1%  -72.28%  (p=0.000 n=10+8)

Change-Id: I913bbd5e19e6b95bef52f5bc4f14d6fe40119083
Reviewed-on: https://go-review.googlesource.com/c/go/+/174303
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 004fb5cb
...@@ -66,6 +66,7 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- ...@@ -66,6 +66,7 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
ADD $32768, R1, R2 // b9040021c22800008000 ADD $32768, R1, R2 // b9040021c22800008000
ADDC R1, R2 // b9ea1022 ADDC R1, R2 // b9ea1022
ADDC $1, R1, R2 // ec21000100db ADDC $1, R1, R2 // ec21000100db
ADDC $-1, R1, R2 // ec21ffff00db
ADDC R1, R2, R3 // b9ea1032 ADDC R1, R2, R3 // b9ea1032
ADDW R1, R2 // 1a21 ADDW R1, R2 // 1a21
ADDW R1, R2, R3 // b9f81032 ADDW R1, R2, R3 // b9f81032
......
...@@ -3575,14 +3575,14 @@ func init() { ...@@ -3575,14 +3575,14 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2]) return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
}, },
sys.AMD64, sys.ARM64, sys.PPC64) sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X)
alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64) alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchS390X)
addF("math/bits", "Sub64", addF("math/bits", "Sub64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2]) return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
}, },
sys.AMD64, sys.ARM64) sys.AMD64, sys.ARM64, sys.S390X)
alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64) alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64, sys.ArchS390X)
addF("math/bits", "Div64", addF("math/bits", "Div64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
// check for divide-by-zero/overflow and panic with appropriate message // check for divide-by-zero/overflow and panic with appropriate message
......
...@@ -184,6 +184,37 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -184,6 +184,37 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
if r != r1 { if r != r1 {
p.Reg = r1 p.Reg = r1
} }
case ssa.OpS390XADDC:
r1 := v.Reg0()
r2 := v.Args[0].Reg()
r3 := v.Args[1].Reg()
if r1 == r2 {
r2, r3 = r3, r2
}
p := opregreg(s, v.Op.Asm(), r1, r2)
if r3 != r1 {
p.Reg = r3
}
case ssa.OpS390XSUBC:
r1 := v.Reg0()
r2 := v.Args[0].Reg()
r3 := v.Args[1].Reg()
p := opregreg(s, v.Op.Asm(), r1, r3)
if r1 != r2 {
p.Reg = r2
}
case ssa.OpS390XADDE, ssa.OpS390XSUBE:
r1 := v.Reg0()
if r1 != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
r2 := v.Args[1].Reg()
opregreg(s, v.Op.Asm(), r1, r2)
case ssa.OpS390XADDCconst:
r1 := v.Reg0()
r3 := v.Args[0].Reg()
i2 := int64(int16(v.AuxInt))
opregregimm(s, v.Op.Asm(), r1, r3, i2)
// 2-address opcode arithmetic // 2-address opcode arithmetic
case ssa.OpS390XMULLD, ssa.OpS390XMULLW, case ssa.OpS390XMULLD, ssa.OpS390XMULLW,
ssa.OpS390XMULHD, ssa.OpS390XMULHDU, ssa.OpS390XMULHD, ssa.OpS390XMULHDU,
...@@ -553,7 +584,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -553,7 +584,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpS390XInvertFlags: case ssa.OpS390XInvertFlags:
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
case ssa.OpS390XFlagEQ, ssa.OpS390XFlagLT, ssa.OpS390XFlagGT: case ssa.OpS390XFlagEQ, ssa.OpS390XFlagLT, ssa.OpS390XFlagGT, ssa.OpS390XFlagOV:
v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
case ssa.OpS390XAddTupleFirst32, ssa.OpS390XAddTupleFirst64: case ssa.OpS390XAddTupleFirst32, ssa.OpS390XAddTupleFirst64:
v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
......
...@@ -119,6 +119,18 @@ ...@@ -119,6 +119,18 @@
(Bswap64 x) -> (MOVDBR x) (Bswap64 x) -> (MOVDBR x)
(Bswap32 x) -> (MOVWBR x) (Bswap32 x) -> (MOVWBR x)
// add with carry
(Select0 (Add64carry x y c))
-> (Select0 <typ.UInt64> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1]))))
(Select1 (Add64carry x y c))
-> (Select0 <typ.UInt64> (ADDE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1]))))))
// subtract with borrow
(Select0 (Sub64borrow x y c))
-> (Select0 <typ.UInt64> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c))))
(Select1 (Sub64borrow x y c))
-> (NEG (Select0 <typ.UInt64> (SUBE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c)))))))
// math package intrinsics // math package intrinsics
(Sqrt x) -> (FSQRT x) (Sqrt x) -> (FSQRT x)
(Floor x) -> (FIDBR [7] x) (Floor x) -> (FIDBR [7] x)
...@@ -1121,6 +1133,43 @@ ...@@ -1121,6 +1133,43 @@
(MOVBreg (ANDWconst [m] x)) && int8(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x)) (MOVBreg (ANDWconst [m] x)) && int8(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
(MOVHreg (ANDWconst [m] x)) && int16(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x)) (MOVHreg (ANDWconst [m] x)) && int16(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
// carry flag generation
// (only constant fold carry of zero)
(Select1 (ADDCconst (MOVDconst [c]) [d]))
&& uint64(c+d) >= uint64(c) && c+d == 0
-> (FlagEQ)
(Select1 (ADDCconst (MOVDconst [c]) [d]))
&& uint64(c+d) >= uint64(c) && c+d != 0
-> (FlagLT)
// borrow flag generation
// (only constant fold borrow of zero)
(Select1 (SUBC (MOVDconst [c]) (MOVDconst [d])))
&& uint64(d) <= uint64(c) && c-d == 0
-> (FlagGT)
(Select1 (SUBC (MOVDconst [c]) (MOVDconst [d])))
&& uint64(d) <= uint64(c) && c-d != 0
-> (FlagOV)
// add with carry
(ADDE x y (FlagEQ)) -> (ADDC x y)
(ADDE x y (FlagLT)) -> (ADDC x y)
(ADDC x (MOVDconst [c])) && is16Bit(c) -> (ADDCconst x [c])
(Select0 (ADDCconst (MOVDconst [c]) [d])) -> (MOVDconst [c+d])
// subtract with borrow
(SUBE x y (FlagGT)) -> (SUBC x y)
(SUBE x y (FlagOV)) -> (SUBC x y)
(Select0 (SUBC (MOVDconst [c]) (MOVDconst [d]))) -> (MOVDconst [c-d])
// collapse carry chain
(ADDE x y (Select1 (ADDCconst [-1] (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) c)))))
-> (ADDE x y c)
// collapse borrow chain
(SUBE x y (Select1 (SUBC (MOVDconst [0]) (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) c))))))
-> (SUBE x y c)
// fused multiply-add // fused multiply-add
(FADD (FMUL y z) x) -> (FMADD x y z) (FADD (FMUL y z) x) -> (FMADD x y z)
(FADDS (FMULS y z) x) -> (FMADDS x y z) (FADDS (FMULS y z) x) -> (FMADDS x y z)
......
This diff is collapsed.
...@@ -3139,14 +3139,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { ...@@ -3139,14 +3139,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
} }
r = p.To.Reg r = p.To.Reg
} }
if r == p.To.Reg { if opri != 0 && r == p.To.Reg && int64(int16(v)) == v {
if opri != 0 && int64(int16(v)) == v { zRI(opri, uint32(p.To.Reg), uint32(v), asm)
zRI(opri, uint32(p.To.Reg), uint32(v), asm) } else if oprie != 0 && int64(int16(v)) == v {
} else {
zRIL(_a, opril, uint32(p.To.Reg), uint32(v), asm)
}
} else {
zRIE(_d, oprie, uint32(p.To.Reg), uint32(r), uint32(v), 0, 0, 0, 0, asm) zRIE(_d, oprie, uint32(p.To.Reg), uint32(r), uint32(v), 0, 0, 0, 0, asm)
} else {
zRIL(_a, opril, uint32(p.To.Reg), uint32(v), asm)
} }
case 23: // 64-bit logical op $constant reg case 23: // 64-bit logical op $constant reg
......
...@@ -377,32 +377,38 @@ func IterateBits8(n uint8) int { ...@@ -377,32 +377,38 @@ func IterateBits8(n uint8) int {
func Add(x, y, ci uint) (r, co uint) { func Add(x, y, ci uint) (r, co uint) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ" // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
// s390x:"ADDE","ADDC\t[$]-1,"
return bits.Add(x, y, ci) return bits.Add(x, y, ci)
} }
func AddC(x, ci uint) (r, co uint) { func AddC(x, ci uint) (r, co uint) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ" // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
// s390x:"ADDE","ADDC\t[$]-1,"
return bits.Add(x, 7, ci) return bits.Add(x, 7, ci)
} }
func AddZ(x, y uint) (r, co uint) { func AddZ(x, y uint) (r, co uint) {
// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
// s390x:"ADDC",-"ADDC\t[$]-1,"
return bits.Add(x, y, 0) return bits.Add(x, y, 0)
} }
func AddR(x, y, ci uint) uint { func AddR(x, y, ci uint) uint {
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP" // arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
// s390x:"ADDE","ADDC\t[$]-1,"
r, _ := bits.Add(x, y, ci) r, _ := bits.Add(x, y, ci)
return r return r
} }
func AddM(p, q, r *[3]uint) { func AddM(p, q, r *[3]uint) {
var c uint var c uint
r[0], c = bits.Add(p[0], q[0], c) r[0], c = bits.Add(p[0], q[0], c)
// arm64:"ADCS",-"ADD\t",-"CMP" // arm64:"ADCS",-"ADD\t",-"CMP"
// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
// s390x:"ADDE",-"ADDC\t[$]-1,"
r[1], c = bits.Add(p[1], q[1], c) r[1], c = bits.Add(p[1], q[1], c)
r[2], c = bits.Add(p[2], q[2], c) r[2], c = bits.Add(p[2], q[2], c)
} }
...@@ -412,6 +418,7 @@ func Add64(x, y, ci uint64) (r, co uint64) { ...@@ -412,6 +418,7 @@ func Add64(x, y, ci uint64) (r, co uint64) {
// amd64:"NEGL","ADCQ","SBBQ","NEGQ" // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
// ppc64: "ADDC", "ADDE", "ADDZE" // ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
return bits.Add64(x, y, ci) return bits.Add64(x, y, ci)
} }
...@@ -420,6 +427,7 @@ func Add64C(x, ci uint64) (r, co uint64) { ...@@ -420,6 +427,7 @@ func Add64C(x, ci uint64) (r, co uint64) {
// amd64:"NEGL","ADCQ","SBBQ","NEGQ" // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
// ppc64: "ADDC", "ADDE", "ADDZE" // ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
return bits.Add64(x, 7, ci) return bits.Add64(x, 7, ci)
} }
...@@ -428,6 +436,7 @@ func Add64Z(x, y uint64) (r, co uint64) { ...@@ -428,6 +436,7 @@ func Add64Z(x, y uint64) (r, co uint64) {
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
// ppc64: "ADDC", "ADDE", "ADDZE" // ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDC",-"ADDC\t[$]-1,"
return bits.Add64(x, y, 0) return bits.Add64(x, y, 0)
} }
...@@ -436,6 +445,7 @@ func Add64R(x, y, ci uint64) uint64 { ...@@ -436,6 +445,7 @@ func Add64R(x, y, ci uint64) uint64 {
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
// ppc64: "ADDC", "ADDE", "ADDZE" // ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
r, _ := bits.Add64(x, y, ci) r, _ := bits.Add64(x, y, ci)
return r return r
} }
...@@ -446,6 +456,7 @@ func Add64M(p, q, r *[3]uint64) { ...@@ -446,6 +456,7 @@ func Add64M(p, q, r *[3]uint64) {
// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
// ppc64: "ADDC", "ADDE", "ADDZE" // ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE",-"ADDC\t[$]-1,"
r[1], c = bits.Add64(p[1], q[1], c) r[1], c = bits.Add64(p[1], q[1], c)
r[2], c = bits.Add64(p[2], q[2], c) r[2], c = bits.Add64(p[2], q[2], c)
} }
...@@ -457,24 +468,28 @@ func Add64M(p, q, r *[3]uint64) { ...@@ -457,24 +468,28 @@ func Add64M(p, q, r *[3]uint64) {
func Sub(x, y, ci uint) (r, co uint) { func Sub(x, y, ci uint) (r, co uint) {
// amd64:"NEGL","SBBQ","NEGQ" // amd64:"NEGL","SBBQ","NEGQ"
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
// s390x:"SUBE"
return bits.Sub(x, y, ci) return bits.Sub(x, y, ci)
} }
func SubC(x, ci uint) (r, co uint) { func SubC(x, ci uint) (r, co uint) {
// amd64:"NEGL","SBBQ","NEGQ" // amd64:"NEGL","SBBQ","NEGQ"
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
// s390x:"SUBE"
return bits.Sub(x, 7, ci) return bits.Sub(x, 7, ci)
} }
func SubZ(x, y uint) (r, co uint) { func SubZ(x, y uint) (r, co uint) {
// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
// s390x:"SUBC"
return bits.Sub(x, y, 0) return bits.Sub(x, y, 0)
} }
func SubR(x, y, ci uint) uint { func SubR(x, y, ci uint) uint {
// amd64:"NEGL","SBBQ",-"NEGQ" // amd64:"NEGL","SBBQ",-"NEGQ"
// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
// s390x:"SUBE"
r, _ := bits.Sub(x, y, ci) r, _ := bits.Sub(x, y, ci)
return r return r
} }
...@@ -483,6 +498,7 @@ func SubM(p, q, r *[3]uint) { ...@@ -483,6 +498,7 @@ func SubM(p, q, r *[3]uint) {
r[0], c = bits.Sub(p[0], q[0], c) r[0], c = bits.Sub(p[0], q[0], c)
// amd64:"SBBQ",-"NEGL",-"NEGQ" // amd64:"SBBQ",-"NEGL",-"NEGQ"
// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
// s390x:"SUBE"
r[1], c = bits.Sub(p[1], q[1], c) r[1], c = bits.Sub(p[1], q[1], c)
r[2], c = bits.Sub(p[2], q[2], c) r[2], c = bits.Sub(p[2], q[2], c)
} }
...@@ -490,24 +506,28 @@ func SubM(p, q, r *[3]uint) { ...@@ -490,24 +506,28 @@ func SubM(p, q, r *[3]uint) {
func Sub64(x, y, ci uint64) (r, co uint64) { func Sub64(x, y, ci uint64) (r, co uint64) {
// amd64:"NEGL","SBBQ","NEGQ" // amd64:"NEGL","SBBQ","NEGQ"
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
// s390x:"SUBE"
return bits.Sub64(x, y, ci) return bits.Sub64(x, y, ci)
} }
func Sub64C(x, ci uint64) (r, co uint64) { func Sub64C(x, ci uint64) (r, co uint64) {
// amd64:"NEGL","SBBQ","NEGQ" // amd64:"NEGL","SBBQ","NEGQ"
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
// s390x:"SUBE"
return bits.Sub64(x, 7, ci) return bits.Sub64(x, 7, ci)
} }
func Sub64Z(x, y uint64) (r, co uint64) { func Sub64Z(x, y uint64) (r, co uint64) {
// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
// s390x:"SUBC"
return bits.Sub64(x, y, 0) return bits.Sub64(x, y, 0)
} }
func Sub64R(x, y, ci uint64) uint64 { func Sub64R(x, y, ci uint64) uint64 {
// amd64:"NEGL","SBBQ",-"NEGQ" // amd64:"NEGL","SBBQ",-"NEGQ"
// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
// s390x:"SUBE"
r, _ := bits.Sub64(x, y, ci) r, _ := bits.Sub64(x, y, ci)
return r return r
} }
...@@ -516,6 +536,7 @@ func Sub64M(p, q, r *[3]uint64) { ...@@ -516,6 +536,7 @@ func Sub64M(p, q, r *[3]uint64) {
r[0], c = bits.Sub64(p[0], q[0], c) r[0], c = bits.Sub64(p[0], q[0], c)
// amd64:"SBBQ",-"NEGL",-"NEGQ" // amd64:"SBBQ",-"NEGL",-"NEGQ"
// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
// s390x:"SUBE"
r[1], c = bits.Sub64(p[1], q[1], c) r[1], c = bits.Sub64(p[1], q[1], c)
r[2], c = bits.Sub64(p[2], q[2], c) r[2], c = bits.Sub64(p[2], q[2], c)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment