Commit 251496db authored by Jussi Kivilinna's avatar Jussi Kivilinna Committed by Herbert Xu

crypto: serpent - add 4-way parallel i586/SSE2 assembler implementation

Patch adds i586/SSE2 assembler implementation of serpent cipher. Assembler
functions crypt data in four block chunks.

Patch has been tested with tcrypt and automated filesystem tests.

Tcrypt benchmarks results (serpent-sse2/serpent_generic speed ratios):

Intel Atom N270:

size    ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec
16      0.95x   1.12x   1.02x   1.07x   0.97x   0.98x
64      1.73x   1.82x   1.08x   1.82x   1.72x   1.73x
256     2.08x   2.00x   1.04x   2.07x   1.99x   2.01x
1024    2.28x   2.18x   1.05x   2.23x   2.17x   2.20x
8192    2.28x   2.13x   1.05x   2.23x   2.18x   2.20x

Full output:
 http://koti.mbnet.fi/axh/kernel/crypto/atom-n270/serpent-generic.txt
 http://koti.mbnet.fi/axh/kernel/crypto/atom-n270/serpent-sse2.txt

Userspace test results:

Encryption/decryption of sse2-i586 vs generic on Intel Atom N270:
 encrypt: 2.35x
 decrypt: 2.54x

Encryption/decryption of sse2-i586 vs generic on AMD Phenom II:
 encrypt: 1.82x
 decrypt: 2.51x

Encryption/decryption of sse2-i586 vs generic on Intel Xeon E7330:
 encrypt: 2.99x
 decrypt: 3.48x
Signed-off-by: default avatarJussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 937c30d7
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
...@@ -21,6 +22,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o ...@@ -21,6 +22,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
aes-i586-y := aes-i586-asm_32.o aes_glue.o aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
......
This diff is collapsed.
...@@ -4,6 +4,35 @@ ...@@ -4,6 +4,35 @@
#include <linux/crypto.h> #include <linux/crypto.h>
#include <crypto/serpent.h> #include <crypto/serpent.h>
#ifdef CONFIG_X86_32
#define SERPENT_PARALLEL_BLOCKS 4
asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
const u8 *src)
{
__serpent_enc_blk_4way(ctx, dst, src, false);
}
static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
const u8 *src)
{
__serpent_enc_blk_4way(ctx, dst, src, true);
}
static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
const u8 *src)
{
serpent_dec_blk_4way(ctx, dst, src);
}
#else
#define SERPENT_PARALLEL_BLOCKS 8 #define SERPENT_PARALLEL_BLOCKS 8
asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst,
...@@ -30,3 +59,5 @@ static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, ...@@ -30,3 +59,5 @@ static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
} }
#endif #endif
#endif
...@@ -783,6 +783,23 @@ config CRYPTO_SERPENT_SSE2_X86_64 ...@@ -783,6 +783,23 @@ config CRYPTO_SERPENT_SSE2_X86_64
See also: See also:
<http://www.cl.cam.ac.uk/~rja14/serpent.html> <http://www.cl.cam.ac.uk/~rja14/serpent.html>
config CRYPTO_SERPENT_SSE2_586
tristate "Serpent cipher algorithm (i586/SSE2)"
depends on X86 && !64BIT
select CRYPTO_ALGAPI
select CRYPTO_SERPENT
help
Serpent cipher algorithm, by Anderson, Biham & Knudsen.
Keys are allowed to be from 0 to 256 bits in length, in steps
of 8 bits.
This module provides Serpent cipher algorithm that processes four
blocks parallel using SSE2 instruction set.
See also:
<http://www.cl.cam.ac.uk/~rja14/serpent.html>
config CRYPTO_TEA config CRYPTO_TEA
tristate "TEA, XTEA and XETA cipher algorithms" tristate "TEA, XTEA and XETA cipher algorithms"
select CRYPTO_ALGAPI select CRYPTO_ALGAPI
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment