Commit 3a907282 authored by Russ Cox's avatar Russ Cox

math/big: make division faster

- Add new BenchmarkQuoRem.
- Eliminate allocation in divLarge nat pool
- Unroll mulAddVWW body 4x
- Remove some redundant slice loads in divLarge

name      old time/op  new time/op  delta
QuoRem-8  2.18µs ± 1%  1.93µs ± 1%  -11.38%  (p=0.000 n=19+18)

The starting point in the comparison here is Cherry's
pending CL to turn mulWW and divWW into intrinsics.
The optimizations in divLarge work best because all
the function calls are gone. The effect of this CL is not
as large if you don't assume Cherry's CL.

Change-Id: Ia6138907489c5b9168497912e43705634e163b35
Reviewed-on: https://go-review.googlesource.com/30613
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 68331750
...@@ -326,6 +326,41 @@ TEXT ·mulAddVWW(SB),NOSPLIT,$0 ...@@ -326,6 +326,41 @@ TEXT ·mulAddVWW(SB),NOSPLIT,$0
MOVQ r+56(FP), CX // c = r MOVQ r+56(FP), CX // c = r
MOVQ z_len+8(FP), R11 MOVQ z_len+8(FP), R11
MOVQ $0, BX // i = 0 MOVQ $0, BX // i = 0
CMPQ R11, $4
JL E5
U5: // i+4 <= n
// regular loop body unrolled 4x
MOVQ (0*8)(R8)(BX*8), AX
MULQ R9
ADDQ CX, AX
ADCQ $0, DX
MOVQ AX, (0*8)(R10)(BX*8)
MOVQ DX, CX
MOVQ (1*8)(R8)(BX*8), AX
MULQ R9
ADDQ CX, AX
ADCQ $0, DX
MOVQ AX, (1*8)(R10)(BX*8)
MOVQ DX, CX
MOVQ (2*8)(R8)(BX*8), AX
MULQ R9
ADDQ CX, AX
ADCQ $0, DX
MOVQ AX, (2*8)(R10)(BX*8)
MOVQ DX, CX
MOVQ (3*8)(R8)(BX*8), AX
MULQ R9
ADDQ CX, AX
ADCQ $0, DX
MOVQ AX, (3*8)(R10)(BX*8)
MOVQ DX, CX
ADDQ $4, BX // i += 4
LEAQ 4(BX), DX
CMPQ DX, R11
JLE U5
JMP E5 JMP E5
L5: MOVQ (R8)(BX*8), AX L5: MOVQ (R8)(BX*8), AX
......
...@@ -478,6 +478,18 @@ func TestQuoStepD6(t *testing.T) { ...@@ -478,6 +478,18 @@ func TestQuoStepD6(t *testing.T) {
} }
} }
func BenchmarkQuoRem(b *testing.B) {
x, _ := new(Int).SetString("153980389784927331788354528594524332344709972855165340650588877572729725338415474372475094155672066328274535240275856844648695200875763869073572078279316458648124537905600131008790701752441155668003033945258023841165089852359980273279085783159654751552359397986180318708491098942831252291841441726305535546071", 0)
y, _ := new(Int).SetString("7746362281539803897849273317883545285945243323447099728551653406505888775727297253384154743724750941556720663282745352402758568446486952008757638690735720782793164586481245379056001310087907017524411556680030339452580238411650898523599802732790857831596547515523593979861803187084910989428312522918414417263055355460715745539358014631136245887418412633787074173796862711588221766398229333338511838891484974940633857861775630560092874987828057333663969469797013996401149696897591265769095952887917296740109742927689053276850469671231961384715398038978492733178835452859452433234470997285516534065058887757272972533841547437247509415567206632827453524027585684464869520087576386907357207827931645864812453790560013100879070175244115566800303394525802384116508985235998027327908578315965475155235939798618031870849109894283125229184144172630553554607112725169432413343763989564437170644270643461665184965150423819594083121075825", 0)
q := new(Int)
r := new(Int)
b.ResetTimer()
for i := 0; i < b.N; i++ {
q.QuoRem(y, x, r)
}
}
var bitLenTests = []struct { var bitLenTests = []struct {
in string in string
out int out int
...@@ -794,6 +806,17 @@ func TestProbablyPrime(t *testing.T) { ...@@ -794,6 +806,17 @@ func TestProbablyPrime(t *testing.T) {
} }
} }
func BenchmarkProbablyPrime(b *testing.B) {
p, _ := new(Int).SetString("203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123", 10)
for _, rep := range []int{1, 5, 10, 20} {
b.Run(fmt.Sprintf("Rep=%d", rep), func(b *testing.B) {
for i := 0; i < b.N; i++ {
p.ProbablyPrime(rep)
}
})
}
}
type intShiftTest struct { type intShiftTest struct {
in string in string
shift uint shift uint
......
...@@ -542,16 +542,21 @@ func (z nat) div(z2, u, v nat) (q, r nat) { ...@@ -542,16 +542,21 @@ func (z nat) div(z2, u, v nat) (q, r nat) {
return return
} }
// getNat returns a nat of len n. The contents may not be zero. // getNat returns a *nat of len n. The contents may not be zero.
func getNat(n int) nat { // The pool holds *nat to avoid allocation when converting to interface{}.
var z nat func getNat(n int) *nat {
var z *nat
if v := natPool.Get(); v != nil { if v := natPool.Get(); v != nil {
z = v.(nat) z = v.(*nat)
} }
return z.make(n) if z == nil {
z = new(nat)
}
*z = z.make(n)
return z
} }
func putNat(x nat) { func putNat(x *nat) {
natPool.Put(x) natPool.Put(x)
} }
...@@ -575,7 +580,8 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) { ...@@ -575,7 +580,8 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
} }
q = z.make(m + 1) q = z.make(m + 1)
qhatv := getNat(n + 1) qhatvp := getNat(n + 1)
qhatv := *qhatvp
if alias(u, uIn) || alias(u, v) { if alias(u, uIn) || alias(u, v) {
u = nil // u is an alias for uIn or v - cannot reuse u = nil // u is an alias for uIn or v - cannot reuse
} }
...@@ -583,36 +589,40 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) { ...@@ -583,36 +589,40 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
u.clear() // TODO(gri) no need to clear if we allocated a new u u.clear() // TODO(gri) no need to clear if we allocated a new u
// D1. // D1.
var v1 nat var v1p *nat
shift := nlz(v[n-1]) shift := nlz(v[n-1])
if shift > 0 { if shift > 0 {
// do not modify v, it may be used by another goroutine simultaneously // do not modify v, it may be used by another goroutine simultaneously
v1 = getNat(n) v1p = getNat(n)
v1 := *v1p
shlVU(v1, v, shift) shlVU(v1, v, shift)
v = v1 v = v1
} }
u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
// D2. // D2.
vn1 := v[n-1]
for j := m; j >= 0; j-- { for j := m; j >= 0; j-- {
// D3. // D3.
qhat := Word(_M) qhat := Word(_M)
if u[j+n] != v[n-1] { if ujn := u[j+n]; ujn != vn1 {
var rhat Word var rhat Word
qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1]) qhat, rhat = divWW(ujn, u[j+n-1], vn1)
// x1 | x2 = q̂v_{n-2} // x1 | x2 = q̂v_{n-2}
x1, x2 := mulWW(qhat, v[n-2]) vn2 := v[n-2]
x1, x2 := mulWW(qhat, vn2)
// test if q̂v_{n-2} > br̂ + u_{j+n-2} // test if q̂v_{n-2} > br̂ + u_{j+n-2}
for greaterThan(x1, x2, rhat, u[j+n-2]) { ujn2 := u[j+n-2]
for greaterThan(x1, x2, rhat, ujn2) {
qhat-- qhat--
prevRhat := rhat prevRhat := rhat
rhat += v[n-1] rhat += vn1
// v[n-1] >= 0, so this tests for overflow. // v[n-1] >= 0, so this tests for overflow.
if rhat < prevRhat { if rhat < prevRhat {
break break
} }
x1, x2 = mulWW(qhat, v[n-2]) x1, x2 = mulWW(qhat, vn2)
} }
} }
...@@ -628,10 +638,10 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) { ...@@ -628,10 +638,10 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
q[j] = qhat q[j] = qhat
} }
if v1 != nil { if v1p != nil {
putNat(v1) putNat(v1p)
} }
putNat(qhatv) putNat(qhatvp)
q = q.norm() q = q.norm()
shrVU(u, u, shift) shrVU(u, u, shift)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment