Commit e875fe42 authored by Adam Langley's avatar Adam Langley

vendor/golang_org/x/crypto/curve25519: new package

This change imports the curve25519 package from x/crypto at revision
594708b89f21ece706681be23d04a6513a22de6e.

Change-Id: I379eaa71492959e404259fc1273d0057573bc243
Reviewed-on: https://go-review.googlesource.com/30822Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 9d88292c
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code was translated into a form compatible with 6a from the public
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
// +build amd64,!gccgo,!appengine
DATA ·REDMASK51(SB)/8, $0x0007FFFFFFFFFFFF
GLOBL ·REDMASK51(SB), 8, $8
DATA ·_121666_213(SB)/8, $996687872
GLOBL ·_121666_213(SB), 8, $8
DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
GLOBL ·_2P0(SB), 8, $8
DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
GLOBL ·_2P1234(SB), 8, $8
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code was translated into a form compatible with 6a from the public
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
// +build amd64,!gccgo,!appengine
// func cswap(inout *[5]uint64, v uint64)
TEXT ·cswap(SB),7,$0
MOVQ inout+0(FP),DI
MOVQ v+8(FP),SI
CMPQ SI,$1
MOVQ 0(DI),SI
MOVQ 80(DI),DX
MOVQ 8(DI),CX
MOVQ 88(DI),R8
MOVQ SI,R9
CMOVQEQ DX,SI
CMOVQEQ R9,DX
MOVQ CX,R9
CMOVQEQ R8,CX
CMOVQEQ R9,R8
MOVQ SI,0(DI)
MOVQ DX,80(DI)
MOVQ CX,8(DI)
MOVQ R8,88(DI)
MOVQ 16(DI),SI
MOVQ 96(DI),DX
MOVQ 24(DI),CX
MOVQ 104(DI),R8
MOVQ SI,R9
CMOVQEQ DX,SI
CMOVQEQ R9,DX
MOVQ CX,R9
CMOVQEQ R8,CX
CMOVQEQ R9,R8
MOVQ SI,16(DI)
MOVQ DX,96(DI)
MOVQ CX,24(DI)
MOVQ R8,104(DI)
MOVQ 32(DI),SI
MOVQ 112(DI),DX
MOVQ 40(DI),CX
MOVQ 120(DI),R8
MOVQ SI,R9
CMOVQEQ DX,SI
CMOVQEQ R9,DX
MOVQ CX,R9
CMOVQEQ R8,CX
CMOVQEQ R9,R8
MOVQ SI,32(DI)
MOVQ DX,112(DI)
MOVQ CX,40(DI)
MOVQ R8,120(DI)
MOVQ 48(DI),SI
MOVQ 128(DI),DX
MOVQ 56(DI),CX
MOVQ 136(DI),R8
MOVQ SI,R9
CMOVQEQ DX,SI
CMOVQEQ R9,DX
MOVQ CX,R9
CMOVQEQ R8,CX
CMOVQEQ R9,R8
MOVQ SI,48(DI)
MOVQ DX,128(DI)
MOVQ CX,56(DI)
MOVQ R8,136(DI)
MOVQ 64(DI),SI
MOVQ 144(DI),DX
MOVQ 72(DI),CX
MOVQ 152(DI),R8
MOVQ SI,R9
CMOVQEQ DX,SI
CMOVQEQ R9,DX
MOVQ CX,R9
CMOVQEQ R8,CX
CMOVQEQ R9,R8
MOVQ SI,64(DI)
MOVQ DX,144(DI)
MOVQ CX,72(DI)
MOVQ R8,152(DI)
MOVQ DI,AX
MOVQ SI,DX
RET
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// We have a implementation in amd64 assembly so this code is only run on
// non-amd64 platforms. The amd64 assembly does not support gccgo.
// +build !amd64 gccgo appengine
package curve25519
// This code is a port of the public domain, "ref10" implementation of
// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
// fieldElement represents an element of the field GF(2^255 - 19). An element
// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
// context.
type fieldElement [10]int32
func feZero(fe *fieldElement) {
for i := range fe {
fe[i] = 0
}
}
func feOne(fe *fieldElement) {
feZero(fe)
fe[0] = 1
}
func feAdd(dst, a, b *fieldElement) {
for i := range dst {
dst[i] = a[i] + b[i]
}
}
func feSub(dst, a, b *fieldElement) {
for i := range dst {
dst[i] = a[i] - b[i]
}
}
func feCopy(dst, src *fieldElement) {
for i := range dst {
dst[i] = src[i]
}
}
// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
//
// Preconditions: b in {0,1}.
func feCSwap(f, g *fieldElement, b int32) {
var x fieldElement
b = -b
for i := range x {
x[i] = b & (f[i] ^ g[i])
}
for i := range f {
f[i] ^= x[i]
}
for i := range g {
g[i] ^= x[i]
}
}
// load3 reads a 24-bit, little-endian value from in.
func load3(in []byte) int64 {
var r int64
r = int64(in[0])
r |= int64(in[1]) << 8
r |= int64(in[2]) << 16
return r
}
// load4 reads a 32-bit, little-endian value from in.
func load4(in []byte) int64 {
var r int64
r = int64(in[0])
r |= int64(in[1]) << 8
r |= int64(in[2]) << 16
r |= int64(in[3]) << 24
return r
}
func feFromBytes(dst *fieldElement, src *[32]byte) {
h0 := load4(src[:])
h1 := load3(src[4:]) << 6
h2 := load3(src[7:]) << 5
h3 := load3(src[10:]) << 3
h4 := load3(src[13:]) << 2
h5 := load4(src[16:])
h6 := load3(src[20:]) << 7
h7 := load3(src[23:]) << 5
h8 := load3(src[26:]) << 4
h9 := load3(src[29:]) << 2
var carry [10]int64
carry[9] = (h9 + 1<<24) >> 25
h0 += carry[9] * 19
h9 -= carry[9] << 25
carry[1] = (h1 + 1<<24) >> 25
h2 += carry[1]
h1 -= carry[1] << 25
carry[3] = (h3 + 1<<24) >> 25
h4 += carry[3]
h3 -= carry[3] << 25
carry[5] = (h5 + 1<<24) >> 25
h6 += carry[5]
h5 -= carry[5] << 25
carry[7] = (h7 + 1<<24) >> 25
h8 += carry[7]
h7 -= carry[7] << 25
carry[0] = (h0 + 1<<25) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
carry[2] = (h2 + 1<<25) >> 26
h3 += carry[2]
h2 -= carry[2] << 26
carry[4] = (h4 + 1<<25) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[6] = (h6 + 1<<25) >> 26
h7 += carry[6]
h6 -= carry[6] << 26
carry[8] = (h8 + 1<<25) >> 26
h9 += carry[8]
h8 -= carry[8] << 26
dst[0] = int32(h0)
dst[1] = int32(h1)
dst[2] = int32(h2)
dst[3] = int32(h3)
dst[4] = int32(h4)
dst[5] = int32(h5)
dst[6] = int32(h6)
dst[7] = int32(h7)
dst[8] = int32(h8)
dst[9] = int32(h9)
}
// feToBytes marshals h to s.
// Preconditions:
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
//
// Write p=2^255-19; q=floor(h/p).
// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
//
// Proof:
// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
//
// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
// Then 0<y<1.
//
// Write r=h-pq.
// Have 0<=r<=p-1=2^255-20.
// Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
//
// Write x=r+19(2^-255)r+y.
// Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
//
// Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
// so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
func feToBytes(s *[32]byte, h *fieldElement) {
var carry [10]int32
q := (19*h[9] + (1 << 24)) >> 25
q = (h[0] + q) >> 26
q = (h[1] + q) >> 25
q = (h[2] + q) >> 26
q = (h[3] + q) >> 25
q = (h[4] + q) >> 26
q = (h[5] + q) >> 25
q = (h[6] + q) >> 26
q = (h[7] + q) >> 25
q = (h[8] + q) >> 26
q = (h[9] + q) >> 25
// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
h[0] += 19 * q
// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
carry[0] = h[0] >> 26
h[1] += carry[0]
h[0] -= carry[0] << 26
carry[1] = h[1] >> 25
h[2] += carry[1]
h[1] -= carry[1] << 25
carry[2] = h[2] >> 26
h[3] += carry[2]
h[2] -= carry[2] << 26
carry[3] = h[3] >> 25
h[4] += carry[3]
h[3] -= carry[3] << 25
carry[4] = h[4] >> 26
h[5] += carry[4]
h[4] -= carry[4] << 26
carry[5] = h[5] >> 25
h[6] += carry[5]
h[5] -= carry[5] << 25
carry[6] = h[6] >> 26
h[7] += carry[6]
h[6] -= carry[6] << 26
carry[7] = h[7] >> 25
h[8] += carry[7]
h[7] -= carry[7] << 25
carry[8] = h[8] >> 26
h[9] += carry[8]
h[8] -= carry[8] << 26
carry[9] = h[9] >> 25
h[9] -= carry[9] << 25
// h10 = carry9
// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
// evidently 2^255 h10-2^255 q = 0.
// Goal: Output h[0]+...+2^230 h[9].
s[0] = byte(h[0] >> 0)
s[1] = byte(h[0] >> 8)
s[2] = byte(h[0] >> 16)
s[3] = byte((h[0] >> 24) | (h[1] << 2))
s[4] = byte(h[1] >> 6)
s[5] = byte(h[1] >> 14)
s[6] = byte((h[1] >> 22) | (h[2] << 3))
s[7] = byte(h[2] >> 5)
s[8] = byte(h[2] >> 13)
s[9] = byte((h[2] >> 21) | (h[3] << 5))
s[10] = byte(h[3] >> 3)
s[11] = byte(h[3] >> 11)
s[12] = byte((h[3] >> 19) | (h[4] << 6))
s[13] = byte(h[4] >> 2)
s[14] = byte(h[4] >> 10)
s[15] = byte(h[4] >> 18)
s[16] = byte(h[5] >> 0)
s[17] = byte(h[5] >> 8)
s[18] = byte(h[5] >> 16)
s[19] = byte((h[5] >> 24) | (h[6] << 1))
s[20] = byte(h[6] >> 7)
s[21] = byte(h[6] >> 15)
s[22] = byte((h[6] >> 23) | (h[7] << 3))
s[23] = byte(h[7] >> 5)
s[24] = byte(h[7] >> 13)
s[25] = byte((h[7] >> 21) | (h[8] << 4))
s[26] = byte(h[8] >> 4)
s[27] = byte(h[8] >> 12)
s[28] = byte((h[8] >> 20) | (h[9] << 6))
s[29] = byte(h[9] >> 2)
s[30] = byte(h[9] >> 10)
s[31] = byte(h[9] >> 18)
}
// feMul calculates h = f * g
// Can overlap h with f or g.
//
// Preconditions:
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
//
// Postconditions:
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
//
// Notes on implementation strategy:
//
// Using schoolbook multiplication.
// Karatsuba would save a little in some cost models.
//
// Most multiplications by 2 and 19 are 32-bit precomputations;
// cheaper than 64-bit postcomputations.
//
// There is one remaining multiplication by 19 in the carry chain;
// one *19 precomputation can be merged into this,
// but the resulting data flow is considerably less clean.
//
// There are 12 carries below.
// 10 of them are 2-way parallelizable and vectorizable.
// Can get away with 11 carries, but then data flow is much deeper.
//
// With tighter constraints on inputs can squeeze carries into int32.
func feMul(h, f, g *fieldElement) {
f0 := f[0]
f1 := f[1]
f2 := f[2]
f3 := f[3]
f4 := f[4]
f5 := f[5]
f6 := f[6]
f7 := f[7]
f8 := f[8]
f9 := f[9]
g0 := g[0]
g1 := g[1]
g2 := g[2]
g3 := g[3]
g4 := g[4]
g5 := g[5]
g6 := g[6]
g7 := g[7]
g8 := g[8]
g9 := g[9]
g1_19 := 19 * g1 // 1.4*2^29
g2_19 := 19 * g2 // 1.4*2^30; still ok
g3_19 := 19 * g3
g4_19 := 19 * g4
g5_19 := 19 * g5
g6_19 := 19 * g6
g7_19 := 19 * g7
g8_19 := 19 * g8
g9_19 := 19 * g9
f1_2 := 2 * f1
f3_2 := 2 * f3
f5_2 := 2 * f5
f7_2 := 2 * f7
f9_2 := 2 * f9
f0g0 := int64(f0) * int64(g0)
f0g1 := int64(f0) * int64(g1)
f0g2 := int64(f0) * int64(g2)
f0g3 := int64(f0) * int64(g3)
f0g4 := int64(f0) * int64(g4)
f0g5 := int64(f0) * int64(g5)
f0g6 := int64(f0) * int64(g6)
f0g7 := int64(f0) * int64(g7)
f0g8 := int64(f0) * int64(g8)
f0g9 := int64(f0) * int64(g9)
f1g0 := int64(f1) * int64(g0)
f1g1_2 := int64(f1_2) * int64(g1)
f1g2 := int64(f1) * int64(g2)
f1g3_2 := int64(f1_2) * int64(g3)
f1g4 := int64(f1) * int64(g4)
f1g5_2 := int64(f1_2) * int64(g5)
f1g6 := int64(f1) * int64(g6)
f1g7_2 := int64(f1_2) * int64(g7)
f1g8 := int64(f1) * int64(g8)
f1g9_38 := int64(f1_2) * int64(g9_19)
f2g0 := int64(f2) * int64(g0)
f2g1 := int64(f2) * int64(g1)
f2g2 := int64(f2) * int64(g2)
f2g3 := int64(f2) * int64(g3)
f2g4 := int64(f2) * int64(g4)
f2g5 := int64(f2) * int64(g5)
f2g6 := int64(f2) * int64(g6)
f2g7 := int64(f2) * int64(g7)
f2g8_19 := int64(f2) * int64(g8_19)
f2g9_19 := int64(f2) * int64(g9_19)
f3g0 := int64(f3) * int64(g0)
f3g1_2 := int64(f3_2) * int64(g1)
f3g2 := int64(f3) * int64(g2)
f3g3_2 := int64(f3_2) * int64(g3)
f3g4 := int64(f3) * int64(g4)
f3g5_2 := int64(f3_2) * int64(g5)
f3g6 := int64(f3) * int64(g6)
f3g7_38 := int64(f3_2) * int64(g7_19)
f3g8_19 := int64(f3) * int64(g8_19)
f3g9_38 := int64(f3_2) * int64(g9_19)
f4g0 := int64(f4) * int64(g0)
f4g1 := int64(f4) * int64(g1)
f4g2 := int64(f4) * int64(g2)
f4g3 := int64(f4) * int64(g3)
f4g4 := int64(f4) * int64(g4)
f4g5 := int64(f4) * int64(g5)
f4g6_19 := int64(f4) * int64(g6_19)
f4g7_19 := int64(f4) * int64(g7_19)
f4g8_19 := int64(f4) * int64(g8_19)
f4g9_19 := int64(f4) * int64(g9_19)
f5g0 := int64(f5) * int64(g0)
f5g1_2 := int64(f5_2) * int64(g1)
f5g2 := int64(f5) * int64(g2)
f5g3_2 := int64(f5_2) * int64(g3)
f5g4 := int64(f5) * int64(g4)
f5g5_38 := int64(f5_2) * int64(g5_19)
f5g6_19 := int64(f5) * int64(g6_19)
f5g7_38 := int64(f5_2) * int64(g7_19)
f5g8_19 := int64(f5) * int64(g8_19)
f5g9_38 := int64(f5_2) * int64(g9_19)
f6g0 := int64(f6) * int64(g0)
f6g1 := int64(f6) * int64(g1)
f6g2 := int64(f6) * int64(g2)
f6g3 := int64(f6) * int64(g3)
f6g4_19 := int64(f6) * int64(g4_19)
f6g5_19 := int64(f6) * int64(g5_19)
f6g6_19 := int64(f6) * int64(g6_19)
f6g7_19 := int64(f6) * int64(g7_19)
f6g8_19 := int64(f6) * int64(g8_19)
f6g9_19 := int64(f6) * int64(g9_19)
f7g0 := int64(f7) * int64(g0)
f7g1_2 := int64(f7_2) * int64(g1)
f7g2 := int64(f7) * int64(g2)
f7g3_38 := int64(f7_2) * int64(g3_19)
f7g4_19 := int64(f7) * int64(g4_19)
f7g5_38 := int64(f7_2) * int64(g5_19)
f7g6_19 := int64(f7) * int64(g6_19)
f7g7_38 := int64(f7_2) * int64(g7_19)
f7g8_19 := int64(f7) * int64(g8_19)
f7g9_38 := int64(f7_2) * int64(g9_19)
f8g0 := int64(f8) * int64(g0)
f8g1 := int64(f8) * int64(g1)
f8g2_19 := int64(f8) * int64(g2_19)
f8g3_19 := int64(f8) * int64(g3_19)
f8g4_19 := int64(f8) * int64(g4_19)
f8g5_19 := int64(f8) * int64(g5_19)
f8g6_19 := int64(f8) * int64(g6_19)
f8g7_19 := int64(f8) * int64(g7_19)
f8g8_19 := int64(f8) * int64(g8_19)
f8g9_19 := int64(f8) * int64(g9_19)
f9g0 := int64(f9) * int64(g0)
f9g1_38 := int64(f9_2) * int64(g1_19)
f9g2_19 := int64(f9) * int64(g2_19)
f9g3_38 := int64(f9_2) * int64(g3_19)
f9g4_19 := int64(f9) * int64(g4_19)
f9g5_38 := int64(f9_2) * int64(g5_19)
f9g6_19 := int64(f9) * int64(g6_19)
f9g7_38 := int64(f9_2) * int64(g7_19)
f9g8_19 := int64(f9) * int64(g8_19)
f9g9_38 := int64(f9_2) * int64(g9_19)
h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
var carry [10]int64
// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
// i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
// i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
// |h0| <= 2^25
// |h4| <= 2^25
// |h1| <= 1.51*2^58
// |h5| <= 1.51*2^58
carry[1] = (h1 + (1 << 24)) >> 25
h2 += carry[1]
h1 -= carry[1] << 25
carry[5] = (h5 + (1 << 24)) >> 25
h6 += carry[5]
h5 -= carry[5] << 25
// |h1| <= 2^24; from now on fits into int32
// |h5| <= 2^24; from now on fits into int32
// |h2| <= 1.21*2^59
// |h6| <= 1.21*2^59
carry[2] = (h2 + (1 << 25)) >> 26
h3 += carry[2]
h2 -= carry[2] << 26
carry[6] = (h6 + (1 << 25)) >> 26
h7 += carry[6]
h6 -= carry[6] << 26
// |h2| <= 2^25; from now on fits into int32 unchanged
// |h6| <= 2^25; from now on fits into int32 unchanged
// |h3| <= 1.51*2^58
// |h7| <= 1.51*2^58
carry[3] = (h3 + (1 << 24)) >> 25
h4 += carry[3]
h3 -= carry[3] << 25
carry[7] = (h7 + (1 << 24)) >> 25
h8 += carry[7]
h7 -= carry[7] << 25
// |h3| <= 2^24; from now on fits into int32 unchanged
// |h7| <= 2^24; from now on fits into int32 unchanged
// |h4| <= 1.52*2^33
// |h8| <= 1.52*2^33
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[8] = (h8 + (1 << 25)) >> 26
h9 += carry[8]
h8 -= carry[8] << 26
// |h4| <= 2^25; from now on fits into int32 unchanged
// |h8| <= 2^25; from now on fits into int32 unchanged
// |h5| <= 1.01*2^24
// |h9| <= 1.51*2^58
carry[9] = (h9 + (1 << 24)) >> 25
h0 += carry[9] * 19
h9 -= carry[9] << 25
// |h9| <= 2^24; from now on fits into int32 unchanged
// |h0| <= 1.8*2^37
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
// |h0| <= 2^25; from now on fits into int32 unchanged
// |h1| <= 1.01*2^24
h[0] = int32(h0)
h[1] = int32(h1)
h[2] = int32(h2)
h[3] = int32(h3)
h[4] = int32(h4)
h[5] = int32(h5)
h[6] = int32(h6)
h[7] = int32(h7)
h[8] = int32(h8)
h[9] = int32(h9)
}
// feSquare calculates h = f*f. Can overlap h with f.
//
// Preconditions:
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
//
// Postconditions:
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
func feSquare(h, f *fieldElement) {
f0 := f[0]
f1 := f[1]
f2 := f[2]
f3 := f[3]
f4 := f[4]
f5 := f[5]
f6 := f[6]
f7 := f[7]
f8 := f[8]
f9 := f[9]
f0_2 := 2 * f0
f1_2 := 2 * f1
f2_2 := 2 * f2
f3_2 := 2 * f3
f4_2 := 2 * f4
f5_2 := 2 * f5
f6_2 := 2 * f6
f7_2 := 2 * f7
f5_38 := 38 * f5 // 1.31*2^30
f6_19 := 19 * f6 // 1.31*2^30
f7_38 := 38 * f7 // 1.31*2^30
f8_19 := 19 * f8 // 1.31*2^30
f9_38 := 38 * f9 // 1.31*2^30
f0f0 := int64(f0) * int64(f0)
f0f1_2 := int64(f0_2) * int64(f1)
f0f2_2 := int64(f0_2) * int64(f2)
f0f3_2 := int64(f0_2) * int64(f3)
f0f4_2 := int64(f0_2) * int64(f4)
f0f5_2 := int64(f0_2) * int64(f5)
f0f6_2 := int64(f0_2) * int64(f6)
f0f7_2 := int64(f0_2) * int64(f7)
f0f8_2 := int64(f0_2) * int64(f8)
f0f9_2 := int64(f0_2) * int64(f9)
f1f1_2 := int64(f1_2) * int64(f1)
f1f2_2 := int64(f1_2) * int64(f2)
f1f3_4 := int64(f1_2) * int64(f3_2)
f1f4_2 := int64(f1_2) * int64(f4)
f1f5_4 := int64(f1_2) * int64(f5_2)
f1f6_2 := int64(f1_2) * int64(f6)
f1f7_4 := int64(f1_2) * int64(f7_2)
f1f8_2 := int64(f1_2) * int64(f8)
f1f9_76 := int64(f1_2) * int64(f9_38)
f2f2 := int64(f2) * int64(f2)
f2f3_2 := int64(f2_2) * int64(f3)
f2f4_2 := int64(f2_2) * int64(f4)
f2f5_2 := int64(f2_2) * int64(f5)
f2f6_2 := int64(f2_2) * int64(f6)
f2f7_2 := int64(f2_2) * int64(f7)
f2f8_38 := int64(f2_2) * int64(f8_19)
f2f9_38 := int64(f2) * int64(f9_38)
f3f3_2 := int64(f3_2) * int64(f3)
f3f4_2 := int64(f3_2) * int64(f4)
f3f5_4 := int64(f3_2) * int64(f5_2)
f3f6_2 := int64(f3_2) * int64(f6)
f3f7_76 := int64(f3_2) * int64(f7_38)
f3f8_38 := int64(f3_2) * int64(f8_19)
f3f9_76 := int64(f3_2) * int64(f9_38)
f4f4 := int64(f4) * int64(f4)
f4f5_2 := int64(f4_2) * int64(f5)
f4f6_38 := int64(f4_2) * int64(f6_19)
f4f7_38 := int64(f4) * int64(f7_38)
f4f8_38 := int64(f4_2) * int64(f8_19)
f4f9_38 := int64(f4) * int64(f9_38)
f5f5_38 := int64(f5) * int64(f5_38)
f5f6_38 := int64(f5_2) * int64(f6_19)
f5f7_76 := int64(f5_2) * int64(f7_38)
f5f8_38 := int64(f5_2) * int64(f8_19)
f5f9_76 := int64(f5_2) * int64(f9_38)
f6f6_19 := int64(f6) * int64(f6_19)
f6f7_38 := int64(f6) * int64(f7_38)
f6f8_38 := int64(f6_2) * int64(f8_19)
f6f9_38 := int64(f6) * int64(f9_38)
f7f7_38 := int64(f7) * int64(f7_38)
f7f8_38 := int64(f7_2) * int64(f8_19)
f7f9_76 := int64(f7_2) * int64(f9_38)
f8f8_19 := int64(f8) * int64(f8_19)
f8f9_38 := int64(f8) * int64(f9_38)
f9f9_38 := int64(f9) * int64(f9_38)
h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
var carry [10]int64
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[1] = (h1 + (1 << 24)) >> 25
h2 += carry[1]
h1 -= carry[1] << 25
carry[5] = (h5 + (1 << 24)) >> 25
h6 += carry[5]
h5 -= carry[5] << 25
carry[2] = (h2 + (1 << 25)) >> 26
h3 += carry[2]
h2 -= carry[2] << 26
carry[6] = (h6 + (1 << 25)) >> 26
h7 += carry[6]
h6 -= carry[6] << 26
carry[3] = (h3 + (1 << 24)) >> 25
h4 += carry[3]
h3 -= carry[3] << 25
carry[7] = (h7 + (1 << 24)) >> 25
h8 += carry[7]
h7 -= carry[7] << 25
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[8] = (h8 + (1 << 25)) >> 26
h9 += carry[8]
h8 -= carry[8] << 26
carry[9] = (h9 + (1 << 24)) >> 25
h0 += carry[9] * 19
h9 -= carry[9] << 25
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
h[0] = int32(h0)
h[1] = int32(h1)
h[2] = int32(h2)
h[3] = int32(h3)
h[4] = int32(h4)
h[5] = int32(h5)
h[6] = int32(h6)
h[7] = int32(h7)
h[8] = int32(h8)
h[9] = int32(h9)
}
// feMul121666 calculates h = f * 121666. Can overlap h with f.
//
// Preconditions:
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
//
// Postconditions:
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
func feMul121666(h, f *fieldElement) {
h0 := int64(f[0]) * 121666
h1 := int64(f[1]) * 121666
h2 := int64(f[2]) * 121666
h3 := int64(f[3]) * 121666
h4 := int64(f[4]) * 121666
h5 := int64(f[5]) * 121666
h6 := int64(f[6]) * 121666
h7 := int64(f[7]) * 121666
h8 := int64(f[8]) * 121666
h9 := int64(f[9]) * 121666
var carry [10]int64
carry[9] = (h9 + (1 << 24)) >> 25
h0 += carry[9] * 19
h9 -= carry[9] << 25
carry[1] = (h1 + (1 << 24)) >> 25
h2 += carry[1]
h1 -= carry[1] << 25
carry[3] = (h3 + (1 << 24)) >> 25
h4 += carry[3]
h3 -= carry[3] << 25
carry[5] = (h5 + (1 << 24)) >> 25
h6 += carry[5]
h5 -= carry[5] << 25
carry[7] = (h7 + (1 << 24)) >> 25
h8 += carry[7]
h7 -= carry[7] << 25
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
carry[2] = (h2 + (1 << 25)) >> 26
h3 += carry[2]
h2 -= carry[2] << 26
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[6] = (h6 + (1 << 25)) >> 26
h7 += carry[6]
h6 -= carry[6] << 26
carry[8] = (h8 + (1 << 25)) >> 26
h9 += carry[8]
h8 -= carry[8] << 26
h[0] = int32(h0)
h[1] = int32(h1)
h[2] = int32(h2)
h[3] = int32(h3)
h[4] = int32(h4)
h[5] = int32(h5)
h[6] = int32(h6)
h[7] = int32(h7)
h[8] = int32(h8)
h[9] = int32(h9)
}
// feInvert sets out = z^-1.
func feInvert(out, z *fieldElement) {
var t0, t1, t2, t3 fieldElement
var i int
feSquare(&t0, z)
for i = 1; i < 1; i++ {
feSquare(&t0, &t0)
}
feSquare(&t1, &t0)
for i = 1; i < 2; i++ {
feSquare(&t1, &t1)
}
feMul(&t1, z, &t1)
feMul(&t0, &t0, &t1)
feSquare(&t2, &t0)
for i = 1; i < 1; i++ {
feSquare(&t2, &t2)
}
feMul(&t1, &t1, &t2)
feSquare(&t2, &t1)
for i = 1; i < 5; i++ {
feSquare(&t2, &t2)
}
feMul(&t1, &t2, &t1)
feSquare(&t2, &t1)
for i = 1; i < 10; i++ {
feSquare(&t2, &t2)
}
feMul(&t2, &t2, &t1)
feSquare(&t3, &t2)
for i = 1; i < 20; i++ {
feSquare(&t3, &t3)
}
feMul(&t2, &t3, &t2)
feSquare(&t2, &t2)
for i = 1; i < 10; i++ {
feSquare(&t2, &t2)
}
feMul(&t1, &t2, &t1)
feSquare(&t2, &t1)
for i = 1; i < 50; i++ {
feSquare(&t2, &t2)
}
feMul(&t2, &t2, &t1)
feSquare(&t3, &t2)
for i = 1; i < 100; i++ {
feSquare(&t3, &t3)
}
feMul(&t2, &t3, &t2)
feSquare(&t2, &t2)
for i = 1; i < 50; i++ {
feSquare(&t2, &t2)
}
feMul(&t1, &t2, &t1)
feSquare(&t1, &t1)
for i = 1; i < 5; i++ {
feSquare(&t1, &t1)
}
feMul(out, &t1, &t0)
}
func scalarMult(out, in, base *[32]byte) {
var e [32]byte
copy(e[:], in[:])
e[0] &= 248
e[31] &= 127
e[31] |= 64
var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
feFromBytes(&x1, base)
feOne(&x2)
feCopy(&x3, &x1)
feOne(&z3)
swap := int32(0)
for pos := 254; pos >= 0; pos-- {
b := e[pos/8] >> uint(pos&7)
b &= 1
swap ^= int32(b)
feCSwap(&x2, &x3, swap)
feCSwap(&z2, &z3, swap)
swap = int32(b)
feSub(&tmp0, &x3, &z3)
feSub(&tmp1, &x2, &z2)
feAdd(&x2, &x2, &z2)
feAdd(&z2, &x3, &z3)
feMul(&z3, &tmp0, &x2)
feMul(&z2, &z2, &tmp1)
feSquare(&tmp0, &tmp1)
feSquare(&tmp1, &x2)
feAdd(&x3, &z3, &z2)
feSub(&z2, &z3, &z2)
feMul(&x2, &tmp1, &tmp0)
feSub(&tmp1, &tmp1, &tmp0)
feSquare(&z2, &z2)
feMul121666(&z3, &tmp1)
feSquare(&x3, &x3)
feAdd(&tmp0, &tmp0, &z3)
feMul(&z3, &x1, &z2)
feMul(&z2, &tmp1, &tmp0)
}
feCSwap(&x2, &x3, swap)
feCSwap(&z2, &z3, swap)
feInvert(&z2, &z2)
feMul(&x2, &x2, &z2)
feToBytes(out, &x2)
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package curve25519
import (
"fmt"
"testing"
)
const expectedHex = "89161fde887b2b53de549af483940106ecc114d6982daa98256de23bdf77661a"
func TestBaseScalarMult(t *testing.T) {
var a, b [32]byte
in := &a
out := &b
a[0] = 1
for i := 0; i < 200; i++ {
ScalarBaseMult(out, in)
in, out = out, in
}
result := fmt.Sprintf("%x", in[:])
if result != expectedHex {
t.Errorf("incorrect result: got %s, want %s", result, expectedHex)
}
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package curve25519 provides an implementation of scalar multiplication on
// the elliptic curve known as curve25519. See http://cr.yp.to/ecdh.html
package curve25519 // import "golang.org/x/crypto/curve25519"
// basePoint is the x coordinate of the generator of the curve.
var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
// ScalarMult sets dst to the product in*base where dst and base are the x
// coordinates of group points and all values are in little-endian form.
func ScalarMult(dst, in, base *[32]byte) {
scalarMult(dst, in, base)
}
// ScalarBaseMult sets dst to the product in*base where dst and base are the x
// coordinates of group points, base is the standard generator and all values
// are in little-endian form.
func ScalarBaseMult(dst, in *[32]byte) {
ScalarMult(dst, in, &basePoint)
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code was translated into a form compatible with 6a from the public
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
// +build amd64,!gccgo,!appengine
// func freeze(inout *[5]uint64)
TEXT ·freeze(SB),7,$96-8
MOVQ inout+0(FP), DI
MOVQ SP,R11
MOVQ $31,CX
NOTQ CX
ANDQ CX,SP
ADDQ $32,SP
MOVQ R11,0(SP)
MOVQ R12,8(SP)
MOVQ R13,16(SP)
MOVQ R14,24(SP)
MOVQ R15,32(SP)
MOVQ BX,40(SP)
MOVQ BP,48(SP)
MOVQ 0(DI),SI
MOVQ 8(DI),DX
MOVQ 16(DI),CX
MOVQ 24(DI),R8
MOVQ 32(DI),R9
MOVQ ·REDMASK51(SB),AX
MOVQ AX,R10
SUBQ $18,R10
MOVQ $3,R11
REDUCELOOP:
MOVQ SI,R12
SHRQ $51,R12
ANDQ AX,SI
ADDQ R12,DX
MOVQ DX,R12
SHRQ $51,R12
ANDQ AX,DX
ADDQ R12,CX
MOVQ CX,R12
SHRQ $51,R12
ANDQ AX,CX
ADDQ R12,R8
MOVQ R8,R12
SHRQ $51,R12
ANDQ AX,R8
ADDQ R12,R9
MOVQ R9,R12
SHRQ $51,R12
ANDQ AX,R9
IMUL3Q $19,R12,R12
ADDQ R12,SI
SUBQ $1,R11
JA REDUCELOOP
MOVQ $1,R12
CMPQ R10,SI
CMOVQLT R11,R12
CMPQ AX,DX
CMOVQNE R11,R12
CMPQ AX,CX
CMOVQNE R11,R12
CMPQ AX,R8
CMOVQNE R11,R12
CMPQ AX,R9
CMOVQNE R11,R12
NEGQ R12
ANDQ R12,AX
ANDQ R12,R10
SUBQ R10,SI
SUBQ AX,DX
SUBQ AX,CX
SUBQ AX,R8
SUBQ AX,R9
MOVQ SI,0(DI)
MOVQ DX,8(DI)
MOVQ CX,16(DI)
MOVQ R8,24(DI)
MOVQ R9,32(DI)
MOVQ 0(SP),R11
MOVQ 8(SP),R12
MOVQ 16(SP),R13
MOVQ 24(SP),R14
MOVQ 32(SP),R15
MOVQ 40(SP),BX
MOVQ 48(SP),BP
MOVQ R11,SP
MOVQ DI,AX
MOVQ SI,DX
RET
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code was translated into a form compatible with 6a from the public
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
// +build amd64,!gccgo,!appengine
// func ladderstep(inout *[5][5]uint64)
TEXT ·ladderstep(SB),0,$384-8
MOVQ inout+0(FP),DI
MOVQ SP,R11
MOVQ $31,CX
NOTQ CX
ANDQ CX,SP
ADDQ $32,SP
MOVQ R11,0(SP)
MOVQ R12,8(SP)
MOVQ R13,16(SP)
MOVQ R14,24(SP)
MOVQ R15,32(SP)
MOVQ BX,40(SP)
MOVQ BP,48(SP)
MOVQ 40(DI),SI
MOVQ 48(DI),DX
MOVQ 56(DI),CX
MOVQ 64(DI),R8
MOVQ 72(DI),R9
MOVQ SI,AX
MOVQ DX,R10
MOVQ CX,R11
MOVQ R8,R12
MOVQ R9,R13
ADDQ ·_2P0(SB),AX
ADDQ ·_2P1234(SB),R10
ADDQ ·_2P1234(SB),R11
ADDQ ·_2P1234(SB),R12
ADDQ ·_2P1234(SB),R13
ADDQ 80(DI),SI
ADDQ 88(DI),DX
ADDQ 96(DI),CX
ADDQ 104(DI),R8
ADDQ 112(DI),R9
SUBQ 80(DI),AX
SUBQ 88(DI),R10
SUBQ 96(DI),R11
SUBQ 104(DI),R12
SUBQ 112(DI),R13
MOVQ SI,56(SP)
MOVQ DX,64(SP)
MOVQ CX,72(SP)
MOVQ R8,80(SP)
MOVQ R9,88(SP)
MOVQ AX,96(SP)
MOVQ R10,104(SP)
MOVQ R11,112(SP)
MOVQ R12,120(SP)
MOVQ R13,128(SP)
MOVQ 96(SP),AX
MULQ 96(SP)
MOVQ AX,SI
MOVQ DX,CX
MOVQ 96(SP),AX
SHLQ $1,AX
MULQ 104(SP)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 96(SP),AX
SHLQ $1,AX
MULQ 112(SP)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 96(SP),AX
SHLQ $1,AX
MULQ 120(SP)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 96(SP),AX
SHLQ $1,AX
MULQ 128(SP)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 104(SP),AX
MULQ 104(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 104(SP),AX
SHLQ $1,AX
MULQ 112(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 104(SP),AX
SHLQ $1,AX
MULQ 120(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 104(SP),DX
IMUL3Q $38,DX,AX
MULQ 128(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 112(SP),AX
MULQ 112(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 112(SP),DX
IMUL3Q $38,DX,AX
MULQ 120(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 112(SP),DX
IMUL3Q $38,DX,AX
MULQ 128(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 120(SP),DX
IMUL3Q $19,DX,AX
MULQ 120(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 120(SP),DX
IMUL3Q $38,DX,AX
MULQ 128(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 128(SP),DX
IMUL3Q $19,DX,AX
MULQ 128(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ ·REDMASK51(SB),DX
SHLQ $13,CX:SI
ANDQ DX,SI
SHLQ $13,R9:R8
ANDQ DX,R8
ADDQ CX,R8
SHLQ $13,R11:R10
ANDQ DX,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ DX,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ DX,R14
ADDQ R13,R14
IMUL3Q $19,R15,CX
ADDQ CX,SI
MOVQ SI,CX
SHRQ $51,CX
ADDQ R8,CX
ANDQ DX,SI
MOVQ CX,R8
SHRQ $51,CX
ADDQ R10,CX
ANDQ DX,R8
MOVQ CX,R9
SHRQ $51,CX
ADDQ R12,CX
ANDQ DX,R9
MOVQ CX,AX
SHRQ $51,CX
ADDQ R14,CX
ANDQ DX,AX
MOVQ CX,R10
SHRQ $51,CX
IMUL3Q $19,CX,CX
ADDQ CX,SI
ANDQ DX,R10
MOVQ SI,136(SP)
MOVQ R8,144(SP)
MOVQ R9,152(SP)
MOVQ AX,160(SP)
MOVQ R10,168(SP)
MOVQ 56(SP),AX
MULQ 56(SP)
MOVQ AX,SI
MOVQ DX,CX
MOVQ 56(SP),AX
SHLQ $1,AX
MULQ 64(SP)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 56(SP),AX
SHLQ $1,AX
MULQ 72(SP)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 56(SP),AX
SHLQ $1,AX
MULQ 80(SP)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 56(SP),AX
SHLQ $1,AX
MULQ 88(SP)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 64(SP),AX
MULQ 64(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 64(SP),AX
SHLQ $1,AX
MULQ 72(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 64(SP),AX
SHLQ $1,AX
MULQ 80(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 64(SP),DX
IMUL3Q $38,DX,AX
MULQ 88(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 72(SP),AX
MULQ 72(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 72(SP),DX
IMUL3Q $38,DX,AX
MULQ 80(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 72(SP),DX
IMUL3Q $38,DX,AX
MULQ 88(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 80(SP),DX
IMUL3Q $19,DX,AX
MULQ 80(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 80(SP),DX
IMUL3Q $38,DX,AX
MULQ 88(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 88(SP),DX
IMUL3Q $19,DX,AX
MULQ 88(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ ·REDMASK51(SB),DX
SHLQ $13,CX:SI
ANDQ DX,SI
SHLQ $13,R9:R8
ANDQ DX,R8
ADDQ CX,R8
SHLQ $13,R11:R10
ANDQ DX,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ DX,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ DX,R14
ADDQ R13,R14
IMUL3Q $19,R15,CX
ADDQ CX,SI
MOVQ SI,CX
SHRQ $51,CX
ADDQ R8,CX
ANDQ DX,SI
MOVQ CX,R8
SHRQ $51,CX
ADDQ R10,CX
ANDQ DX,R8
MOVQ CX,R9
SHRQ $51,CX
ADDQ R12,CX
ANDQ DX,R9
MOVQ CX,AX
SHRQ $51,CX
ADDQ R14,CX
ANDQ DX,AX
MOVQ CX,R10
SHRQ $51,CX
IMUL3Q $19,CX,CX
ADDQ CX,SI
ANDQ DX,R10
MOVQ SI,176(SP)
MOVQ R8,184(SP)
MOVQ R9,192(SP)
MOVQ AX,200(SP)
MOVQ R10,208(SP)
MOVQ SI,SI
MOVQ R8,DX
MOVQ R9,CX
MOVQ AX,R8
MOVQ R10,R9
ADDQ ·_2P0(SB),SI
ADDQ ·_2P1234(SB),DX
ADDQ ·_2P1234(SB),CX
ADDQ ·_2P1234(SB),R8
ADDQ ·_2P1234(SB),R9
SUBQ 136(SP),SI
SUBQ 144(SP),DX
SUBQ 152(SP),CX
SUBQ 160(SP),R8
SUBQ 168(SP),R9
MOVQ SI,216(SP)
MOVQ DX,224(SP)
MOVQ CX,232(SP)
MOVQ R8,240(SP)
MOVQ R9,248(SP)
MOVQ 120(DI),SI
MOVQ 128(DI),DX
MOVQ 136(DI),CX
MOVQ 144(DI),R8
MOVQ 152(DI),R9
MOVQ SI,AX
MOVQ DX,R10
MOVQ CX,R11
MOVQ R8,R12
MOVQ R9,R13
ADDQ ·_2P0(SB),AX
ADDQ ·_2P1234(SB),R10
ADDQ ·_2P1234(SB),R11
ADDQ ·_2P1234(SB),R12
ADDQ ·_2P1234(SB),R13
ADDQ 160(DI),SI
ADDQ 168(DI),DX
ADDQ 176(DI),CX
ADDQ 184(DI),R8
ADDQ 192(DI),R9
SUBQ 160(DI),AX
SUBQ 168(DI),R10
SUBQ 176(DI),R11
SUBQ 184(DI),R12
SUBQ 192(DI),R13
MOVQ SI,256(SP)
MOVQ DX,264(SP)
MOVQ CX,272(SP)
MOVQ R8,280(SP)
MOVQ R9,288(SP)
MOVQ AX,296(SP)
MOVQ R10,304(SP)
MOVQ R11,312(SP)
MOVQ R12,320(SP)
MOVQ R13,328(SP)
MOVQ 280(SP),SI
IMUL3Q $19,SI,AX
MOVQ AX,336(SP)
MULQ 112(SP)
MOVQ AX,SI
MOVQ DX,CX
MOVQ 288(SP),DX
IMUL3Q $19,DX,AX
MOVQ AX,344(SP)
MULQ 104(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 256(SP),AX
MULQ 96(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 256(SP),AX
MULQ 104(SP)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 256(SP),AX
MULQ 112(SP)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 256(SP),AX
MULQ 120(SP)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 256(SP),AX
MULQ 128(SP)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 264(SP),AX
MULQ 96(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 264(SP),AX
MULQ 104(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 264(SP),AX
MULQ 112(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 264(SP),AX
MULQ 120(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 264(SP),DX
IMUL3Q $19,DX,AX
MULQ 128(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 272(SP),AX
MULQ 96(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 272(SP),AX
MULQ 104(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 272(SP),AX
MULQ 112(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 272(SP),DX
IMUL3Q $19,DX,AX
MULQ 120(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 272(SP),DX
IMUL3Q $19,DX,AX
MULQ 128(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 280(SP),AX
MULQ 96(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 280(SP),AX
MULQ 104(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 336(SP),AX
MULQ 120(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 336(SP),AX
MULQ 128(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 288(SP),AX
MULQ 96(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 344(SP),AX
MULQ 112(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 344(SP),AX
MULQ 120(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 344(SP),AX
MULQ 128(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ ·REDMASK51(SB),DX
SHLQ $13,CX:SI
ANDQ DX,SI
SHLQ $13,R9:R8
ANDQ DX,R8
ADDQ CX,R8
SHLQ $13,R11:R10
ANDQ DX,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ DX,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ DX,R14
ADDQ R13,R14
IMUL3Q $19,R15,CX
ADDQ CX,SI
MOVQ SI,CX
SHRQ $51,CX
ADDQ R8,CX
MOVQ CX,R8
SHRQ $51,CX
ANDQ DX,SI
ADDQ R10,CX
MOVQ CX,R9
SHRQ $51,CX
ANDQ DX,R8
ADDQ R12,CX
MOVQ CX,AX
SHRQ $51,CX
ANDQ DX,R9
ADDQ R14,CX
MOVQ CX,R10
SHRQ $51,CX
ANDQ DX,AX
IMUL3Q $19,CX,CX
ADDQ CX,SI
ANDQ DX,R10
MOVQ SI,96(SP)
MOVQ R8,104(SP)
MOVQ R9,112(SP)
MOVQ AX,120(SP)
MOVQ R10,128(SP)
MOVQ 320(SP),SI
IMUL3Q $19,SI,AX
MOVQ AX,256(SP)
MULQ 72(SP)
MOVQ AX,SI
MOVQ DX,CX
MOVQ 328(SP),DX
IMUL3Q $19,DX,AX
MOVQ AX,264(SP)
MULQ 64(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 296(SP),AX
MULQ 56(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 296(SP),AX
MULQ 64(SP)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 296(SP),AX
MULQ 72(SP)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 296(SP),AX
MULQ 80(SP)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 296(SP),AX
MULQ 88(SP)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 304(SP),AX
MULQ 56(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 304(SP),AX
MULQ 64(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 304(SP),AX
MULQ 72(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 304(SP),AX
MULQ 80(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 304(SP),DX
IMUL3Q $19,DX,AX
MULQ 88(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 312(SP),AX
MULQ 56(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 312(SP),AX
MULQ 64(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 312(SP),AX
MULQ 72(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 312(SP),DX
IMUL3Q $19,DX,AX
MULQ 80(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 312(SP),DX
IMUL3Q $19,DX,AX
MULQ 88(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 320(SP),AX
MULQ 56(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 320(SP),AX
MULQ 64(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 256(SP),AX
MULQ 80(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 256(SP),AX
MULQ 88(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 328(SP),AX
MULQ 56(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 264(SP),AX
MULQ 72(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 264(SP),AX
MULQ 80(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 264(SP),AX
MULQ 88(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ ·REDMASK51(SB),DX
SHLQ $13,CX:SI
ANDQ DX,SI
SHLQ $13,R9:R8
ANDQ DX,R8
ADDQ CX,R8
SHLQ $13,R11:R10
ANDQ DX,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ DX,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ DX,R14
ADDQ R13,R14
IMUL3Q $19,R15,CX
ADDQ CX,SI
MOVQ SI,CX
SHRQ $51,CX
ADDQ R8,CX
MOVQ CX,R8
SHRQ $51,CX
ANDQ DX,SI
ADDQ R10,CX
MOVQ CX,R9
SHRQ $51,CX
ANDQ DX,R8
ADDQ R12,CX
MOVQ CX,AX
SHRQ $51,CX
ANDQ DX,R9
ADDQ R14,CX
MOVQ CX,R10
SHRQ $51,CX
ANDQ DX,AX
IMUL3Q $19,CX,CX
ADDQ CX,SI
ANDQ DX,R10
MOVQ SI,DX
MOVQ R8,CX
MOVQ R9,R11
MOVQ AX,R12
MOVQ R10,R13
ADDQ ·_2P0(SB),DX
ADDQ ·_2P1234(SB),CX
ADDQ ·_2P1234(SB),R11
ADDQ ·_2P1234(SB),R12
ADDQ ·_2P1234(SB),R13
ADDQ 96(SP),SI
ADDQ 104(SP),R8
ADDQ 112(SP),R9
ADDQ 120(SP),AX
ADDQ 128(SP),R10
SUBQ 96(SP),DX
SUBQ 104(SP),CX
SUBQ 112(SP),R11
SUBQ 120(SP),R12
SUBQ 128(SP),R13
MOVQ SI,120(DI)
MOVQ R8,128(DI)
MOVQ R9,136(DI)
MOVQ AX,144(DI)
MOVQ R10,152(DI)
MOVQ DX,160(DI)
MOVQ CX,168(DI)
MOVQ R11,176(DI)
MOVQ R12,184(DI)
MOVQ R13,192(DI)
MOVQ 120(DI),AX
MULQ 120(DI)
MOVQ AX,SI
MOVQ DX,CX
MOVQ 120(DI),AX
SHLQ $1,AX
MULQ 128(DI)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 120(DI),AX
SHLQ $1,AX
MULQ 136(DI)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 120(DI),AX
SHLQ $1,AX
MULQ 144(DI)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 120(DI),AX
SHLQ $1,AX
MULQ 152(DI)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 128(DI),AX
MULQ 128(DI)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 128(DI),AX
SHLQ $1,AX
MULQ 136(DI)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 128(DI),AX
SHLQ $1,AX
MULQ 144(DI)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 128(DI),DX
IMUL3Q $38,DX,AX
MULQ 152(DI)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 136(DI),AX
MULQ 136(DI)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 136(DI),DX
IMUL3Q $38,DX,AX
MULQ 144(DI)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 136(DI),DX
IMUL3Q $38,DX,AX
MULQ 152(DI)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 144(DI),DX
IMUL3Q $19,DX,AX
MULQ 144(DI)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 144(DI),DX
IMUL3Q $38,DX,AX
MULQ 152(DI)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 152(DI),DX
IMUL3Q $19,DX,AX
MULQ 152(DI)
ADDQ AX,R12
ADCQ DX,R13
MOVQ ·REDMASK51(SB),DX
SHLQ $13,CX:SI
ANDQ DX,SI
SHLQ $13,R9:R8
ANDQ DX,R8
ADDQ CX,R8
SHLQ $13,R11:R10
ANDQ DX,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ DX,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ DX,R14
ADDQ R13,R14
IMUL3Q $19,R15,CX
ADDQ CX,SI
MOVQ SI,CX
SHRQ $51,CX
ADDQ R8,CX
ANDQ DX,SI
MOVQ CX,R8
SHRQ $51,CX
ADDQ R10,CX
ANDQ DX,R8
MOVQ CX,R9
SHRQ $51,CX
ADDQ R12,CX
ANDQ DX,R9
MOVQ CX,AX
SHRQ $51,CX
ADDQ R14,CX
ANDQ DX,AX
MOVQ CX,R10
SHRQ $51,CX
IMUL3Q $19,CX,CX
ADDQ CX,SI
ANDQ DX,R10
MOVQ SI,120(DI)
MOVQ R8,128(DI)
MOVQ R9,136(DI)
MOVQ AX,144(DI)
MOVQ R10,152(DI)
MOVQ 160(DI),AX
MULQ 160(DI)
MOVQ AX,SI
MOVQ DX,CX
MOVQ 160(DI),AX
SHLQ $1,AX
MULQ 168(DI)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 160(DI),AX
SHLQ $1,AX
MULQ 176(DI)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 160(DI),AX
SHLQ $1,AX
MULQ 184(DI)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 160(DI),AX
SHLQ $1,AX
MULQ 192(DI)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 168(DI),AX
MULQ 168(DI)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 168(DI),AX
SHLQ $1,AX
MULQ 176(DI)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 168(DI),AX
SHLQ $1,AX
MULQ 184(DI)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 168(DI),DX
IMUL3Q $38,DX,AX
MULQ 192(DI)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 176(DI),AX
MULQ 176(DI)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 176(DI),DX
IMUL3Q $38,DX,AX
MULQ 184(DI)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 176(DI),DX
IMUL3Q $38,DX,AX
MULQ 192(DI)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 184(DI),DX
IMUL3Q $19,DX,AX
MULQ 184(DI)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 184(DI),DX
IMUL3Q $38,DX,AX
MULQ 192(DI)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 192(DI),DX
IMUL3Q $19,DX,AX
MULQ 192(DI)
ADDQ AX,R12
ADCQ DX,R13
MOVQ ·REDMASK51(SB),DX
SHLQ $13,CX:SI
ANDQ DX,SI
SHLQ $13,R9:R8
ANDQ DX,R8
ADDQ CX,R8
SHLQ $13,R11:R10
ANDQ DX,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ DX,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ DX,R14
ADDQ R13,R14
IMUL3Q $19,R15,CX
ADDQ CX,SI
MOVQ SI,CX
SHRQ $51,CX
ADDQ R8,CX
ANDQ DX,SI
MOVQ CX,R8
SHRQ $51,CX
ADDQ R10,CX
ANDQ DX,R8
MOVQ CX,R9
SHRQ $51,CX
ADDQ R12,CX
ANDQ DX,R9
MOVQ CX,AX
SHRQ $51,CX
ADDQ R14,CX
ANDQ DX,AX
MOVQ CX,R10
SHRQ $51,CX
IMUL3Q $19,CX,CX
ADDQ CX,SI
ANDQ DX,R10
MOVQ SI,160(DI)
MOVQ R8,168(DI)
MOVQ R9,176(DI)
MOVQ AX,184(DI)
MOVQ R10,192(DI)
MOVQ 184(DI),SI
IMUL3Q $19,SI,AX
MOVQ AX,56(SP)
MULQ 16(DI)
MOVQ AX,SI
MOVQ DX,CX
MOVQ 192(DI),DX
IMUL3Q $19,DX,AX
MOVQ AX,64(SP)
MULQ 8(DI)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 160(DI),AX
MULQ 0(DI)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 160(DI),AX
MULQ 8(DI)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 160(DI),AX
MULQ 16(DI)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 160(DI),AX
MULQ 24(DI)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 160(DI),AX
MULQ 32(DI)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 168(DI),AX
MULQ 0(DI)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 168(DI),AX
MULQ 8(DI)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 168(DI),AX
MULQ 16(DI)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 168(DI),AX
MULQ 24(DI)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 168(DI),DX
IMUL3Q $19,DX,AX
MULQ 32(DI)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 176(DI),AX
MULQ 0(DI)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 176(DI),AX
MULQ 8(DI)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 176(DI),AX
MULQ 16(DI)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 176(DI),DX
IMUL3Q $19,DX,AX
MULQ 24(DI)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 176(DI),DX
IMUL3Q $19,DX,AX
MULQ 32(DI)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 184(DI),AX
MULQ 0(DI)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 184(DI),AX
MULQ 8(DI)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 56(SP),AX
MULQ 24(DI)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 56(SP),AX
MULQ 32(DI)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 192(DI),AX
MULQ 0(DI)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 64(SP),AX
MULQ 16(DI)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 64(SP),AX
MULQ 24(DI)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 64(SP),AX
MULQ 32(DI)
ADDQ AX,R12
ADCQ DX,R13
MOVQ ·REDMASK51(SB),DX
SHLQ $13,CX:SI
ANDQ DX,SI
SHLQ $13,R9:R8
ANDQ DX,R8
ADDQ CX,R8
SHLQ $13,R11:R10
ANDQ DX,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ DX,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ DX,R14
ADDQ R13,R14
IMUL3Q $19,R15,CX
ADDQ CX,SI
MOVQ SI,CX
SHRQ $51,CX
ADDQ R8,CX
MOVQ CX,R8
SHRQ $51,CX
ANDQ DX,SI
ADDQ R10,CX
MOVQ CX,R9
SHRQ $51,CX
ANDQ DX,R8
ADDQ R12,CX
MOVQ CX,AX
SHRQ $51,CX
ANDQ DX,R9
ADDQ R14,CX
MOVQ CX,R10
SHRQ $51,CX
ANDQ DX,AX
IMUL3Q $19,CX,CX
ADDQ CX,SI
ANDQ DX,R10
MOVQ SI,160(DI)
MOVQ R8,168(DI)
MOVQ R9,176(DI)
MOVQ AX,184(DI)
MOVQ R10,192(DI)
MOVQ 200(SP),SI
IMUL3Q $19,SI,AX
MOVQ AX,56(SP)
MULQ 152(SP)
MOVQ AX,SI
MOVQ DX,CX
MOVQ 208(SP),DX
IMUL3Q $19,DX,AX
MOVQ AX,64(SP)
MULQ 144(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 176(SP),AX
MULQ 136(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 176(SP),AX
MULQ 144(SP)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 176(SP),AX
MULQ 152(SP)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 176(SP),AX
MULQ 160(SP)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 176(SP),AX
MULQ 168(SP)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 184(SP),AX
MULQ 136(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 184(SP),AX
MULQ 144(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 184(SP),AX
MULQ 152(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 184(SP),AX
MULQ 160(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 184(SP),DX
IMUL3Q $19,DX,AX
MULQ 168(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 192(SP),AX
MULQ 136(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 192(SP),AX
MULQ 144(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 192(SP),AX
MULQ 152(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 192(SP),DX
IMUL3Q $19,DX,AX
MULQ 160(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 192(SP),DX
IMUL3Q $19,DX,AX
MULQ 168(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 200(SP),AX
MULQ 136(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 200(SP),AX
MULQ 144(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 56(SP),AX
MULQ 160(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 56(SP),AX
MULQ 168(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 208(SP),AX
MULQ 136(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 64(SP),AX
MULQ 152(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 64(SP),AX
MULQ 160(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 64(SP),AX
MULQ 168(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ ·REDMASK51(SB),DX
SHLQ $13,CX:SI
ANDQ DX,SI
SHLQ $13,R9:R8
ANDQ DX,R8
ADDQ CX,R8
SHLQ $13,R11:R10
ANDQ DX,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ DX,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ DX,R14
ADDQ R13,R14
IMUL3Q $19,R15,CX
ADDQ CX,SI
MOVQ SI,CX
SHRQ $51,CX
ADDQ R8,CX
MOVQ CX,R8
SHRQ $51,CX
ANDQ DX,SI
ADDQ R10,CX
MOVQ CX,R9
SHRQ $51,CX
ANDQ DX,R8
ADDQ R12,CX
MOVQ CX,AX
SHRQ $51,CX
ANDQ DX,R9
ADDQ R14,CX
MOVQ CX,R10
SHRQ $51,CX
ANDQ DX,AX
IMUL3Q $19,CX,CX
ADDQ CX,SI
ANDQ DX,R10
MOVQ SI,40(DI)
MOVQ R8,48(DI)
MOVQ R9,56(DI)
MOVQ AX,64(DI)
MOVQ R10,72(DI)
MOVQ 216(SP),AX
MULQ ·_121666_213(SB)
SHRQ $13,AX
MOVQ AX,SI
MOVQ DX,CX
MOVQ 224(SP),AX
MULQ ·_121666_213(SB)
SHRQ $13,AX
ADDQ AX,CX
MOVQ DX,R8
MOVQ 232(SP),AX
MULQ ·_121666_213(SB)
SHRQ $13,AX
ADDQ AX,R8
MOVQ DX,R9
MOVQ 240(SP),AX
MULQ ·_121666_213(SB)
SHRQ $13,AX
ADDQ AX,R9
MOVQ DX,R10
MOVQ 248(SP),AX
MULQ ·_121666_213(SB)
SHRQ $13,AX
ADDQ AX,R10
IMUL3Q $19,DX,DX
ADDQ DX,SI
ADDQ 136(SP),SI
ADDQ 144(SP),CX
ADDQ 152(SP),R8
ADDQ 160(SP),R9
ADDQ 168(SP),R10
MOVQ SI,80(DI)
MOVQ CX,88(DI)
MOVQ R8,96(DI)
MOVQ R9,104(DI)
MOVQ R10,112(DI)
MOVQ 104(DI),SI
IMUL3Q $19,SI,AX
MOVQ AX,56(SP)
MULQ 232(SP)
MOVQ AX,SI
MOVQ DX,CX
MOVQ 112(DI),DX
IMUL3Q $19,DX,AX
MOVQ AX,64(SP)
MULQ 224(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 80(DI),AX
MULQ 216(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 80(DI),AX
MULQ 224(SP)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 80(DI),AX
MULQ 232(SP)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 80(DI),AX
MULQ 240(SP)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 80(DI),AX
MULQ 248(SP)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 88(DI),AX
MULQ 216(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 88(DI),AX
MULQ 224(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 88(DI),AX
MULQ 232(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 88(DI),AX
MULQ 240(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 88(DI),DX
IMUL3Q $19,DX,AX
MULQ 248(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 96(DI),AX
MULQ 216(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 96(DI),AX
MULQ 224(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 96(DI),AX
MULQ 232(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 96(DI),DX
IMUL3Q $19,DX,AX
MULQ 240(SP)
ADDQ AX,SI
ADCQ DX,CX
MOVQ 96(DI),DX
IMUL3Q $19,DX,AX
MULQ 248(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 104(DI),AX
MULQ 216(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 104(DI),AX
MULQ 224(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 56(SP),AX
MULQ 240(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 56(SP),AX
MULQ 248(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 112(DI),AX
MULQ 216(SP)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 64(SP),AX
MULQ 232(SP)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 64(SP),AX
MULQ 240(SP)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 64(SP),AX
MULQ 248(SP)
ADDQ AX,R12
ADCQ DX,R13
MOVQ ·REDMASK51(SB),DX
SHLQ $13,CX:SI
ANDQ DX,SI
SHLQ $13,R9:R8
ANDQ DX,R8
ADDQ CX,R8
SHLQ $13,R11:R10
ANDQ DX,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ DX,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ DX,R14
ADDQ R13,R14
IMUL3Q $19,R15,CX
ADDQ CX,SI
MOVQ SI,CX
SHRQ $51,CX
ADDQ R8,CX
MOVQ CX,R8
SHRQ $51,CX
ANDQ DX,SI
ADDQ R10,CX
MOVQ CX,R9
SHRQ $51,CX
ANDQ DX,R8
ADDQ R12,CX
MOVQ CX,AX
SHRQ $51,CX
ANDQ DX,R9
ADDQ R14,CX
MOVQ CX,R10
SHRQ $51,CX
ANDQ DX,AX
IMUL3Q $19,CX,CX
ADDQ CX,SI
ANDQ DX,R10
MOVQ SI,80(DI)
MOVQ R8,88(DI)
MOVQ R9,96(DI)
MOVQ AX,104(DI)
MOVQ R10,112(DI)
MOVQ 0(SP),R11
MOVQ 8(SP),R12
MOVQ 16(SP),R13
MOVQ 24(SP),R14
MOVQ 32(SP),R15
MOVQ 40(SP),BX
MOVQ 48(SP),BP
MOVQ R11,SP
MOVQ DI,AX
MOVQ SI,DX
RET
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!gccgo,!appengine
package curve25519
// These functions are implemented in the .s files. The names of the functions
// in the rest of the file are also taken from the SUPERCOP sources to help
// people following along.
//go:noescape
func cswap(inout *[5]uint64, v uint64)
//go:noescape
func ladderstep(inout *[5][5]uint64)
//go:noescape
func freeze(inout *[5]uint64)
//go:noescape
func mul(dest, a, b *[5]uint64)
//go:noescape
func square(out, in *[5]uint64)
// mladder uses a Montgomery ladder to calculate (xr/zr) *= s.
func mladder(xr, zr *[5]uint64, s *[32]byte) {
var work [5][5]uint64
work[0] = *xr
setint(&work[1], 1)
setint(&work[2], 0)
work[3] = *xr
setint(&work[4], 1)
j := uint(6)
var prevbit byte
for i := 31; i >= 0; i-- {
for j < 8 {
bit := ((*s)[i] >> j) & 1
swap := bit ^ prevbit
prevbit = bit
cswap(&work[1], uint64(swap))
ladderstep(&work)
j--
}
j = 7
}
*xr = work[1]
*zr = work[2]
}
func scalarMult(out, in, base *[32]byte) {
var e [32]byte
copy(e[:], (*in)[:])
e[0] &= 248
e[31] &= 127
e[31] |= 64
var t, z [5]uint64
unpack(&t, base)
mladder(&t, &z, &e)
invert(&z, &z)
mul(&t, &t, &z)
pack(out, &t)
}
func setint(r *[5]uint64, v uint64) {
r[0] = v
r[1] = 0
r[2] = 0
r[3] = 0
r[4] = 0
}
// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian
// order.
func unpack(r *[5]uint64, x *[32]byte) {
r[0] = uint64(x[0]) |
uint64(x[1])<<8 |
uint64(x[2])<<16 |
uint64(x[3])<<24 |
uint64(x[4])<<32 |
uint64(x[5])<<40 |
uint64(x[6]&7)<<48
r[1] = uint64(x[6])>>3 |
uint64(x[7])<<5 |
uint64(x[8])<<13 |
uint64(x[9])<<21 |
uint64(x[10])<<29 |
uint64(x[11])<<37 |
uint64(x[12]&63)<<45
r[2] = uint64(x[12])>>6 |
uint64(x[13])<<2 |
uint64(x[14])<<10 |
uint64(x[15])<<18 |
uint64(x[16])<<26 |
uint64(x[17])<<34 |
uint64(x[18])<<42 |
uint64(x[19]&1)<<50
r[3] = uint64(x[19])>>1 |
uint64(x[20])<<7 |
uint64(x[21])<<15 |
uint64(x[22])<<23 |
uint64(x[23])<<31 |
uint64(x[24])<<39 |
uint64(x[25]&15)<<47
r[4] = uint64(x[25])>>4 |
uint64(x[26])<<4 |
uint64(x[27])<<12 |
uint64(x[28])<<20 |
uint64(x[29])<<28 |
uint64(x[30])<<36 |
uint64(x[31]&127)<<44
}
// pack sets out = x where out is the usual, little-endian form of the 5,
// 51-bit limbs in x.
func pack(out *[32]byte, x *[5]uint64) {
t := *x
freeze(&t)
out[0] = byte(t[0])
out[1] = byte(t[0] >> 8)
out[2] = byte(t[0] >> 16)
out[3] = byte(t[0] >> 24)
out[4] = byte(t[0] >> 32)
out[5] = byte(t[0] >> 40)
out[6] = byte(t[0] >> 48)
out[6] ^= byte(t[1]<<3) & 0xf8
out[7] = byte(t[1] >> 5)
out[8] = byte(t[1] >> 13)
out[9] = byte(t[1] >> 21)
out[10] = byte(t[1] >> 29)
out[11] = byte(t[1] >> 37)
out[12] = byte(t[1] >> 45)
out[12] ^= byte(t[2]<<6) & 0xc0
out[13] = byte(t[2] >> 2)
out[14] = byte(t[2] >> 10)
out[15] = byte(t[2] >> 18)
out[16] = byte(t[2] >> 26)
out[17] = byte(t[2] >> 34)
out[18] = byte(t[2] >> 42)
out[19] = byte(t[2] >> 50)
out[19] ^= byte(t[3]<<1) & 0xfe
out[20] = byte(t[3] >> 7)
out[21] = byte(t[3] >> 15)
out[22] = byte(t[3] >> 23)
out[23] = byte(t[3] >> 31)
out[24] = byte(t[3] >> 39)
out[25] = byte(t[3] >> 47)
out[25] ^= byte(t[4]<<4) & 0xf0
out[26] = byte(t[4] >> 4)
out[27] = byte(t[4] >> 12)
out[28] = byte(t[4] >> 20)
out[29] = byte(t[4] >> 28)
out[30] = byte(t[4] >> 36)
out[31] = byte(t[4] >> 44)
}
// invert calculates r = x^-1 mod p using Fermat's little theorem.
func invert(r *[5]uint64, x *[5]uint64) {
var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64
square(&z2, x) /* 2 */
square(&t, &z2) /* 4 */
square(&t, &t) /* 8 */
mul(&z9, &t, x) /* 9 */
mul(&z11, &z9, &z2) /* 11 */
square(&t, &z11) /* 22 */
mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */
square(&t, &z2_5_0) /* 2^6 - 2^1 */
for i := 1; i < 5; i++ { /* 2^20 - 2^10 */
square(&t, &t)
}
mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */
square(&t, &z2_10_0) /* 2^11 - 2^1 */
for i := 1; i < 10; i++ { /* 2^20 - 2^10 */
square(&t, &t)
}
mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */
square(&t, &z2_20_0) /* 2^21 - 2^1 */
for i := 1; i < 20; i++ { /* 2^40 - 2^20 */
square(&t, &t)
}
mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */
square(&t, &t) /* 2^41 - 2^1 */
for i := 1; i < 10; i++ { /* 2^50 - 2^10 */
square(&t, &t)
}
mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */
square(&t, &z2_50_0) /* 2^51 - 2^1 */
for i := 1; i < 50; i++ { /* 2^100 - 2^50 */
square(&t, &t)
}
mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */
square(&t, &z2_100_0) /* 2^101 - 2^1 */
for i := 1; i < 100; i++ { /* 2^200 - 2^100 */
square(&t, &t)
}
mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */
square(&t, &t) /* 2^201 - 2^1 */
for i := 1; i < 50; i++ { /* 2^250 - 2^50 */
square(&t, &t)
}
mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */
square(&t, &t) /* 2^251 - 2^1 */
square(&t, &t) /* 2^252 - 2^2 */
square(&t, &t) /* 2^253 - 2^3 */
square(&t, &t) /* 2^254 - 2^4 */
square(&t, &t) /* 2^255 - 2^5 */
mul(r, &t, &z11) /* 2^255 - 21 */
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code was translated into a form compatible with 6a from the public
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
// +build amd64,!gccgo,!appengine
// func mul(dest, a, b *[5]uint64)
TEXT ·mul(SB),0,$128-24
MOVQ dest+0(FP), DI
MOVQ a+8(FP), SI
MOVQ b+16(FP), DX
MOVQ SP,R11
MOVQ $31,CX
NOTQ CX
ANDQ CX,SP
ADDQ $32,SP
MOVQ R11,0(SP)
MOVQ R12,8(SP)
MOVQ R13,16(SP)
MOVQ R14,24(SP)
MOVQ R15,32(SP)
MOVQ BX,40(SP)
MOVQ BP,48(SP)
MOVQ DI,56(SP)
MOVQ DX,CX
MOVQ 24(SI),DX
IMUL3Q $19,DX,AX
MOVQ AX,64(SP)
MULQ 16(CX)
MOVQ AX,R8
MOVQ DX,R9
MOVQ 32(SI),DX
IMUL3Q $19,DX,AX
MOVQ AX,72(SP)
MULQ 8(CX)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 0(SI),AX
MULQ 0(CX)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 0(SI),AX
MULQ 8(CX)
MOVQ AX,R10
MOVQ DX,R11
MOVQ 0(SI),AX
MULQ 16(CX)
MOVQ AX,R12
MOVQ DX,R13
MOVQ 0(SI),AX
MULQ 24(CX)
MOVQ AX,R14
MOVQ DX,R15
MOVQ 0(SI),AX
MULQ 32(CX)
MOVQ AX,BX
MOVQ DX,BP
MOVQ 8(SI),AX
MULQ 0(CX)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 8(SI),AX
MULQ 8(CX)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 8(SI),AX
MULQ 16(CX)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 8(SI),AX
MULQ 24(CX)
ADDQ AX,BX
ADCQ DX,BP
MOVQ 8(SI),DX
IMUL3Q $19,DX,AX
MULQ 32(CX)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 16(SI),AX
MULQ 0(CX)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 16(SI),AX
MULQ 8(CX)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 16(SI),AX
MULQ 16(CX)
ADDQ AX,BX
ADCQ DX,BP
MOVQ 16(SI),DX
IMUL3Q $19,DX,AX
MULQ 24(CX)
ADDQ AX,R8
ADCQ DX,R9
MOVQ 16(SI),DX
IMUL3Q $19,DX,AX
MULQ 32(CX)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 24(SI),AX
MULQ 0(CX)
ADDQ AX,R14
ADCQ DX,R15
MOVQ 24(SI),AX
MULQ 8(CX)
ADDQ AX,BX
ADCQ DX,BP
MOVQ 64(SP),AX
MULQ 24(CX)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 64(SP),AX
MULQ 32(CX)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 32(SI),AX
MULQ 0(CX)
ADDQ AX,BX
ADCQ DX,BP
MOVQ 72(SP),AX
MULQ 16(CX)
ADDQ AX,R10
ADCQ DX,R11
MOVQ 72(SP),AX
MULQ 24(CX)
ADDQ AX,R12
ADCQ DX,R13
MOVQ 72(SP),AX
MULQ 32(CX)
ADDQ AX,R14
ADCQ DX,R15
MOVQ ·REDMASK51(SB),SI
SHLQ $13,R9:R8
ANDQ SI,R8
SHLQ $13,R11:R10
ANDQ SI,R10
ADDQ R9,R10
SHLQ $13,R13:R12
ANDQ SI,R12
ADDQ R11,R12
SHLQ $13,R15:R14
ANDQ SI,R14
ADDQ R13,R14
SHLQ $13,BP:BX
ANDQ SI,BX
ADDQ R15,BX
IMUL3Q $19,BP,DX
ADDQ DX,R8
MOVQ R8,DX
SHRQ $51,DX
ADDQ R10,DX
MOVQ DX,CX
SHRQ $51,DX
ANDQ SI,R8
ADDQ R12,DX
MOVQ DX,R9
SHRQ $51,DX
ANDQ SI,CX
ADDQ R14,DX
MOVQ DX,AX
SHRQ $51,DX
ANDQ SI,R9
ADDQ BX,DX
MOVQ DX,R10
SHRQ $51,DX
ANDQ SI,AX
IMUL3Q $19,DX,DX
ADDQ DX,R8
ANDQ SI,R10
MOVQ R8,0(DI)
MOVQ CX,8(DI)
MOVQ R9,16(DI)
MOVQ AX,24(DI)
MOVQ R10,32(DI)
MOVQ 0(SP),R11
MOVQ 8(SP),R12
MOVQ 16(SP),R13
MOVQ 24(SP),R14
MOVQ 32(SP),R15
MOVQ 40(SP),BX
MOVQ 48(SP),BP
MOVQ R11,SP
MOVQ DI,AX
MOVQ SI,DX
RET
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code was translated into a form compatible with 6a from the public
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
// +build amd64,!gccgo,!appengine
// func square(out, in *[5]uint64)
TEXT ·square(SB),7,$96-16
MOVQ out+0(FP), DI
MOVQ in+8(FP), SI
MOVQ SP,R11
MOVQ $31,CX
NOTQ CX
ANDQ CX,SP
ADDQ $32, SP
MOVQ R11,0(SP)
MOVQ R12,8(SP)
MOVQ R13,16(SP)
MOVQ R14,24(SP)
MOVQ R15,32(SP)
MOVQ BX,40(SP)
MOVQ BP,48(SP)
MOVQ 0(SI),AX
MULQ 0(SI)
MOVQ AX,CX
MOVQ DX,R8
MOVQ 0(SI),AX
SHLQ $1,AX
MULQ 8(SI)
MOVQ AX,R9
MOVQ DX,R10
MOVQ 0(SI),AX
SHLQ $1,AX
MULQ 16(SI)
MOVQ AX,R11
MOVQ DX,R12
MOVQ 0(SI),AX
SHLQ $1,AX
MULQ 24(SI)
MOVQ AX,R13
MOVQ DX,R14
MOVQ 0(SI),AX
SHLQ $1,AX
MULQ 32(SI)
MOVQ AX,R15
MOVQ DX,BX
MOVQ 8(SI),AX
MULQ 8(SI)
ADDQ AX,R11
ADCQ DX,R12
MOVQ 8(SI),AX
SHLQ $1,AX
MULQ 16(SI)
ADDQ AX,R13
ADCQ DX,R14
MOVQ 8(SI),AX
SHLQ $1,AX
MULQ 24(SI)
ADDQ AX,R15
ADCQ DX,BX
MOVQ 8(SI),DX
IMUL3Q $38,DX,AX
MULQ 32(SI)
ADDQ AX,CX
ADCQ DX,R8
MOVQ 16(SI),AX
MULQ 16(SI)
ADDQ AX,R15
ADCQ DX,BX
MOVQ 16(SI),DX
IMUL3Q $38,DX,AX
MULQ 24(SI)
ADDQ AX,CX
ADCQ DX,R8
MOVQ 16(SI),DX
IMUL3Q $38,DX,AX
MULQ 32(SI)
ADDQ AX,R9
ADCQ DX,R10
MOVQ 24(SI),DX
IMUL3Q $19,DX,AX
MULQ 24(SI)
ADDQ AX,R9
ADCQ DX,R10
MOVQ 24(SI),DX
IMUL3Q $38,DX,AX
MULQ 32(SI)
ADDQ AX,R11
ADCQ DX,R12
MOVQ 32(SI),DX
IMUL3Q $19,DX,AX
MULQ 32(SI)
ADDQ AX,R13
ADCQ DX,R14
MOVQ ·REDMASK51(SB),SI
SHLQ $13,R8:CX
ANDQ SI,CX
SHLQ $13,R10:R9
ANDQ SI,R9
ADDQ R8,R9
SHLQ $13,R12:R11
ANDQ SI,R11
ADDQ R10,R11
SHLQ $13,R14:R13
ANDQ SI,R13
ADDQ R12,R13
SHLQ $13,BX:R15
ANDQ SI,R15
ADDQ R14,R15
IMUL3Q $19,BX,DX
ADDQ DX,CX
MOVQ CX,DX
SHRQ $51,DX
ADDQ R9,DX
ANDQ SI,CX
MOVQ DX,R8
SHRQ $51,DX
ADDQ R11,DX
ANDQ SI,R8
MOVQ DX,R9
SHRQ $51,DX
ADDQ R13,DX
ANDQ SI,R9
MOVQ DX,AX
SHRQ $51,DX
ADDQ R15,DX
ANDQ SI,AX
MOVQ DX,R10
SHRQ $51,DX
IMUL3Q $19,DX,DX
ADDQ DX,CX
ANDQ SI,R10
MOVQ CX,0(DI)
MOVQ R8,8(DI)
MOVQ R9,16(DI)
MOVQ AX,24(DI)
MOVQ R10,32(DI)
MOVQ 0(SP),R11
MOVQ 8(SP),R12
MOVQ 16(SP),R13
MOVQ 24(SP),R14
MOVQ 32(SP),R15
MOVQ 40(SP),BX
MOVQ 48(SP),BP
MOVQ R11,SP
MOVQ DI,AX
MOVQ SI,DX
RET
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment