Commit 948db4e0 authored by Shenghou Ma's avatar Shenghou Ma

crypto/aes: speed up using AES-NI on amd64

This CL requires CL 5970055.

benchmark           old ns/op    new ns/op    delta
BenchmarkEncrypt          161           23  -85.71%
BenchmarkDecrypt          158           24  -84.24%
BenchmarkExpand           526           62  -88.21%

benchmark            old MB/s     new MB/s  speedup
BenchmarkEncrypt        99.32       696.19    7.01x
BenchmarkDecrypt       100.93       641.56    6.36x

R=golang-dev, bradfitz, dave, rsc
CC=golang-dev
https://golang.org/cl/6549055
parent e039c405
...@@ -221,7 +221,10 @@ L: ...@@ -221,7 +221,10 @@ L:
if tt.dec != nil { if tt.dec != nil {
dec = make([]uint32, len(tt.dec)) dec = make([]uint32, len(tt.dec))
} }
expandKey(tt.key, enc, dec) // This test could only test Go version of expandKey because asm
// version might use different memory layout for expanded keys
// This is OK because we don't expose expanded keys to the outside
expandKeyGo(tt.key, enc, dec)
for j, v := range enc { for j, v := range enc {
if v != tt.enc[j] { if v != tt.enc[j] {
t.Errorf("key %d: enc[%d] = %#x, want %#x", i, j, v, tt.enc[j]) t.Errorf("key %d: enc[%d] = %#x, want %#x", i, j, v, tt.enc[j])
......
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// func hasAsm() bool
// returns whether AES-NI is supported
TEXT ·hasAsm(SB),7,$0
XORQ AX, AX
INCL AX
CPUID
SHRQ $25, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET
// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
TEXT ·encryptBlockAsm(SB),7,$0
MOVQ nr+0(FP), CX
MOVQ xk+8(FP), AX
MOVQ dst+16(FP), DX
MOVQ src+24(FP), BX
MOVUPS 0(AX), X1
MOVUPS 0(BX), X0
ADDQ $16, AX
PXOR X1, X0
SUBQ $12, CX
JE Lenc196
JB Lenc128
Lenc256:
MOVUPS 0(AX), X1
AESENC X1, X0
MOVUPS 16(AX), X1
AESENC X1, X0
ADDQ $32, AX
Lenc196:
MOVUPS 0(AX), X1
AESENC X1, X0
MOVUPS 16(AX), X1
AESENC X1, X0
ADDQ $32, AX
Lenc128:
MOVUPS 0(AX), X1
AESENC X1, X0
MOVUPS 16(AX), X1
AESENC X1, X0
MOVUPS 32(AX), X1
AESENC X1, X0
MOVUPS 48(AX), X1
AESENC X1, X0
MOVUPS 64(AX), X1
AESENC X1, X0
MOVUPS 80(AX), X1
AESENC X1, X0
MOVUPS 96(AX), X1
AESENC X1, X0
MOVUPS 112(AX), X1
AESENC X1, X0
MOVUPS 128(AX), X1
AESENC X1, X0
MOVUPS 144(AX), X1
AESENCLAST X1, X0
MOVUPS X0, 0(DX)
RET
// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
TEXT ·decryptBlockAsm(SB),7,$0
MOVQ nr+0(FP), CX
MOVQ xk+8(FP), AX
MOVQ dst+16(FP), DX
MOVQ src+24(FP), BX
MOVUPS 0(AX), X1
MOVUPS 0(BX), X0
ADDQ $16, AX
PXOR X1, X0
SUBQ $12, CX
JE Ldec196
JB Ldec128
Ldec256:
MOVUPS 0(AX), X1
AESDEC X1, X0
MOVUPS 16(AX), X1
AESDEC X1, X0
ADDQ $32, AX
Ldec196:
MOVUPS 0(AX), X1
AESDEC X1, X0
MOVUPS 16(AX), X1
AESDEC X1, X0
ADDQ $32, AX
Ldec128:
MOVUPS 0(AX), X1
AESDEC X1, X0
MOVUPS 16(AX), X1
AESDEC X1, X0
MOVUPS 32(AX), X1
AESDEC X1, X0
MOVUPS 48(AX), X1
AESDEC X1, X0
MOVUPS 64(AX), X1
AESDEC X1, X0
MOVUPS 80(AX), X1
AESDEC X1, X0
MOVUPS 96(AX), X1
AESDEC X1, X0
MOVUPS 112(AX), X1
AESDEC X1, X0
MOVUPS 128(AX), X1
AESDEC X1, X0
MOVUPS 144(AX), X1
AESDECLAST X1, X0
MOVUPS X0, 0(DX)
RET
// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
// Note that round keys are stored in uint128 format, not uint32
TEXT ·expandKeyAsm(SB),7,$0
MOVQ nr+0(FP), CX
MOVQ key+8(FP), AX
MOVQ enc+16(FP), BX
MOVQ dec+24(FP), DX
MOVUPS (AX), X0
// enc
MOVUPS X0, (BX)
ADDQ $16, BX
PXOR X4, X4 // _expand_key_* expect X4 to be zero
CMPL CX, $12
JE Lexp_enc196
JB Lexp_enc128
Lexp_enc256:
MOVUPS 16(AX), X2
MOVUPS X2, (BX)
ADDQ $16, BX
AESKEYGENASSIST $0x01, X2, X1
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x01, X0, X1
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x02, X2, X1
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x02, X0, X1
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x04, X2, X1
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x04, X0, X1
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x08, X2, X1
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x08, X0, X1
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x10, X2, X1
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x10, X0, X1
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x20, X2, X1
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x20, X0, X1
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x40, X2, X1
CALL _expand_key_256a<>(SB)
JMP Lexp_dec
Lexp_enc196:
MOVQ 16(AX), X2
AESKEYGENASSIST $0x01, X2, X1
CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x02, X2, X1
CALL _expand_key_192b<>(SB)
AESKEYGENASSIST $0x04, X2, X1
CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x08, X2, X1
CALL _expand_key_192b<>(SB)
AESKEYGENASSIST $0x10, X2, X1
CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x20, X2, X1
CALL _expand_key_192b<>(SB)
AESKEYGENASSIST $0x40, X2, X1
CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x80, X2, X1
CALL _expand_key_192b<>(SB)
JMP Lexp_dec
Lexp_enc128:
AESKEYGENASSIST $0x01, X0, X1
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x02, X0, X1
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x04, X0, X1
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x08, X0, X1
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x10, X0, X1
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x20, X0, X1
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x40, X0, X1
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x80, X0, X1
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x1b, X0, X1
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x36, X0, X1
CALL _expand_key_128<>(SB)
Lexp_dec:
// dec
SUBQ $16, BX
MOVUPS (BX), X1
MOVUPS X1, (DX)
DECQ CX
Lexp_dec_loop:
MOVUPS -16(BX), X1
AESIMC X1, X0
MOVUPS X0, 16(DX)
SUBQ $16, BX
ADDQ $16, DX
DECQ CX
JNZ Lexp_dec_loop
MOVUPS -16(BX), X0
MOVUPS X0, 16(DX)
RET
#define PSHUFD_X0_X0_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc0
#define PSHUFD_X1_X1_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc9
TEXT _expand_key_128<>(SB),7,$0
PSHUFD $0xff, X1, X1
SHUFPS $0x10, X0, X4
PXOR X4, X0
SHUFPS $0x8c, X0, X4
PXOR X4, X0
PXOR X1, X0
MOVUPS X0, (BX)
ADDQ $16, BX
RET
#define PSLLDQ_X5_ BYTE $0x66; BYTE $0x0f; BYTE $0x73; BYTE $0xfd
#define PSHUFD_X0_X3_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xd8
TEXT _expand_key_192a<>(SB),7,$0
PSHUFD $0x55, X1, X1
SHUFPS $0x10, X0, X4
PXOR X4, X0
SHUFPS $0x8c, X0, X4
PXOR X4, X0
PXOR X1, X0
MOVAPS X2, X5
MOVAPS X2, X6
PSLLDQ_X5_; BYTE $0x4
PSHUFD $0xff, X0, X3
PXOR X3, X2
PXOR X5, X2
MOVAPS X0, X1
SHUFPS $0x44, X0, X6
MOVUPS X6, (BX)
SHUFPS $0x4e, X2, X1
MOVUPS X1, 16(BX)
ADDQ $32, BX
RET
TEXT _expand_key_192b<>(SB),7,$0
PSHUFD $0x55, X1, X1
SHUFPS $0x10, X0, X4
PXOR X4, X0
SHUFPS $0x8c, X0, X4
PXOR X4, X0
PXOR X1, X0
MOVAPS X2, X5
PSLLDQ_X5_; BYTE $0x4
PSHUFD $0xff, X0, X3
PXOR X3, X2
PXOR X5, X2
MOVUPS X0, (BX)
ADDQ $16, BX
RET
TEXT _expand_key_256a<>(SB),7,$0
JMP _expand_key_128<>(SB)
TEXT _expand_key_256b<>(SB),7,$0
PSHUFD $0xaa, X1, X1
SHUFPS $0x10, X2, X4
PXOR X4, X2
SHUFPS $0x8c, X2, X4
PXOR X4, X2
PXOR X1, X2
MOVUPS X2, (BX)
ADDQ $16, BX
RET
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
package aes package aes
// Encrypt one block from src into dst, using the expanded key xk. // Encrypt one block from src into dst, using the expanded key xk.
func encryptBlock(xk []uint32, dst, src []byte) { func encryptBlockGo(xk []uint32, dst, src []byte) {
var s0, s1, s2, s3, t0, t1, t2, t3 uint32 var s0, s1, s2, s3, t0, t1, t2, t3 uint32
s0 = uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3]) s0 = uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
...@@ -82,7 +82,7 @@ func encryptBlock(xk []uint32, dst, src []byte) { ...@@ -82,7 +82,7 @@ func encryptBlock(xk []uint32, dst, src []byte) {
} }
// Decrypt one block from src into dst, using the expanded key xk. // Decrypt one block from src into dst, using the expanded key xk.
func decryptBlock(xk []uint32, dst, src []byte) { func decryptBlockGo(xk []uint32, dst, src []byte) {
var s0, s1, s2, s3, t0, t1, t2, t3 uint32 var s0, s1, s2, s3, t0, t1, t2, t3 uint32
s0 = uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3]) s0 = uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
...@@ -139,7 +139,7 @@ func rotw(w uint32) uint32 { return w<<8 | w>>24 } ...@@ -139,7 +139,7 @@ func rotw(w uint32) uint32 { return w<<8 | w>>24 }
// Key expansion algorithm. See FIPS-197, Figure 11. // Key expansion algorithm. See FIPS-197, Figure 11.
// Their rcon[i] is our powx[i-1] << 24. // Their rcon[i] is our powx[i-1] << 24.
func expandKey(key []byte, enc, dec []uint32) { func expandKeyGo(key []byte, enc, dec []uint32) {
// Encryption key setup. // Encryption key setup.
var i int var i int
nk := len(key) / 4 nk := len(key) / 4
......
...@@ -45,6 +45,10 @@ func NewCipher(key []byte) (cipher.Block, error) { ...@@ -45,6 +45,10 @@ func NewCipher(key []byte) (cipher.Block, error) {
func (c *aesCipher) BlockSize() int { return BlockSize } func (c *aesCipher) BlockSize() int { return BlockSize }
func (c *aesCipher) Encrypt(dst, src []byte) { encryptBlock(c.enc, dst, src) } func (c *aesCipher) Encrypt(dst, src []byte) {
encryptBlock(c.enc, dst, src)
}
func (c *aesCipher) Decrypt(dst, src []byte) { decryptBlock(c.dec, dst, src) } func (c *aesCipher) Decrypt(dst, src []byte) {
decryptBlock(c.dec, dst, src)
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64
package aes
// defined in asm_$GOARCH.s
func hasAsm() bool
func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
func expandKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
var useAsm = hasAsm()
func encryptBlock(xk []uint32, dst, src []byte) {
if useAsm {
encryptBlockAsm(len(xk)/4-1, &xk[0], &dst[0], &src[0])
} else {
encryptBlockGo(xk, dst, src)
}
}
func decryptBlock(xk []uint32, dst, src []byte) {
if useAsm {
decryptBlockAsm(len(xk)/4-1, &xk[0], &dst[0], &src[0])
} else {
decryptBlockGo(xk, dst, src)
}
}
func expandKey(key []byte, enc, dec []uint32) {
if useAsm {
rounds := 10
switch len(key) {
case 128 / 8:
rounds = 10
case 192 / 8:
rounds = 12
case 256 / 8:
rounds = 14
}
expandKeyAsm(rounds, &key[0], &enc[0], &dec[0])
} else {
expandKeyGo(key, enc, dec)
}
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64
package aes
func encryptBlock(xk []uint32, dst, src []byte) {
encryptBlockGo(xk, dst, src)
}
func decryptBlock(xk []uint32, dst, src []byte) {
decryptBlockGo(xk, dst, src)
}
func expandKey(key []byte, enc, dec []uint32) {
expandKeyGo(key, enc, dec)
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment