Commit f3f935a7 authored by Jussi Kivilinna's avatar Jussi Kivilinna Committed by Herbert Xu

crypto: camellia - add AVX2/AES-NI/x86_64 assembler implementation of camellia cipher

Patch adds AVX2/AES-NI/x86-64 implementation of Camellia cipher, requiring
32 parallel blocks for input (512 bytes). Compared to AVX implementation, this
version is extended to use the 256-bit wide YMM registers. For AES-NI
instructions data is split to two 128-bit registers and merged afterwards.
Even with this additional handling, performance should be higher compared
to the AES-NI/AVX implementation.
Signed-off-by: default avatarJussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 56d76c96
...@@ -43,6 +43,7 @@ endif ...@@ -43,6 +43,7 @@ endif
# These modules require assembler to support AVX2. # These modules require assembler to support AVX2.
ifeq ($(avx2_supported),yes) ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
endif endif
...@@ -73,6 +74,7 @@ endif ...@@ -73,6 +74,7 @@ endif
ifeq ($(avx2_supported),yes) ifeq ($(avx2_supported),yes)
blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
endif endif
......
This diff is collapsed.
This diff is collapsed.
...@@ -26,33 +26,44 @@ ...@@ -26,33 +26,44 @@
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
/* 16-way AES-NI parallel cipher functions */ /* 16-way parallel cipher functions (avx/aes-ni) */
asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src); const u8 *src);
EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way);
asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src); const u8 *src);
EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src); const u8 *src);
EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv); const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(camellia_ctr_16way);
asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst, asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv); const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst, asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv); const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
static void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{ {
glue_xts_crypt_128bit_one(ctx, dst, src, iv, glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(camellia_enc_blk)); GLUE_FUNC_CAST(camellia_enc_blk));
} }
EXPORT_SYMBOL_GPL(camellia_xts_enc);
static void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{ {
glue_xts_crypt_128bit_one(ctx, dst, src, iv, glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(camellia_dec_blk)); GLUE_FUNC_CAST(camellia_dec_blk));
} }
EXPORT_SYMBOL_GPL(camellia_xts_dec);
static const struct common_glue_ctx camellia_enc = { static const struct common_glue_ctx camellia_enc = {
.num_funcs = 3, .num_funcs = 3,
......
...@@ -48,6 +48,22 @@ asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, ...@@ -48,6 +48,22 @@ asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src); const u8 *src);
/* 16-way parallel cipher functions (avx/aes-ni) */
asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src) const u8 *src)
{ {
...@@ -79,4 +95,7 @@ extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, ...@@ -79,4 +95,7 @@ extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
le128 *iv); le128 *iv);
extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
#endif /* ASM_X86_CAMELLIA_H */ #endif /* ASM_X86_CAMELLIA_H */
...@@ -894,6 +894,29 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 ...@@ -894,6 +894,29 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
See also: See also:
<https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html> <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html>
config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64
tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)"
depends on X86 && 64BIT
depends on CRYPTO
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_CAMELLIA_X86_64
select CRYPTO_CAMELLIA_AESNI_AVX_X86_64
select CRYPTO_LRW
select CRYPTO_XTS
help
Camellia cipher algorithm module (x86_64/AES-NI/AVX2).
Camellia is a symmetric key block cipher developed jointly
at NTT and Mitsubishi Electric Corporation.
The Camellia specifies three key sizes: 128, 192 and 256 bits.
See also:
<https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html>
config CRYPTO_CAMELLIA_SPARC64 config CRYPTO_CAMELLIA_SPARC64
tristate "Camellia cipher algorithm (SPARC64)" tristate "Camellia cipher algorithm (SPARC64)"
depends on SPARC64 depends on SPARC64
......
...@@ -1666,6 +1666,9 @@ static const struct alg_test_desc alg_test_descs[] = { ...@@ -1666,6 +1666,9 @@ static const struct alg_test_desc alg_test_descs[] = {
}, { }, {
.alg = "__driver-cbc-camellia-aesni", .alg = "__driver-cbc-camellia-aesni",
.test = alg_test_null, .test = alg_test_null,
}, {
.alg = "__driver-cbc-camellia-aesni-avx2",
.test = alg_test_null,
}, { }, {
.alg = "__driver-cbc-cast5-avx", .alg = "__driver-cbc-cast5-avx",
.test = alg_test_null, .test = alg_test_null,
...@@ -1697,6 +1700,9 @@ static const struct alg_test_desc alg_test_descs[] = { ...@@ -1697,6 +1700,9 @@ static const struct alg_test_desc alg_test_descs[] = {
}, { }, {
.alg = "__driver-ecb-camellia-aesni", .alg = "__driver-ecb-camellia-aesni",
.test = alg_test_null, .test = alg_test_null,
}, {
.alg = "__driver-ecb-camellia-aesni-avx2",
.test = alg_test_null,
}, { }, {
.alg = "__driver-ecb-cast5-avx", .alg = "__driver-ecb-cast5-avx",
.test = alg_test_null, .test = alg_test_null,
...@@ -1977,6 +1983,9 @@ static const struct alg_test_desc alg_test_descs[] = { ...@@ -1977,6 +1983,9 @@ static const struct alg_test_desc alg_test_descs[] = {
}, { }, {
.alg = "cryptd(__driver-cbc-camellia-aesni)", .alg = "cryptd(__driver-cbc-camellia-aesni)",
.test = alg_test_null, .test = alg_test_null,
}, {
.alg = "cryptd(__driver-cbc-camellia-aesni-avx2)",
.test = alg_test_null,
}, { }, {
.alg = "cryptd(__driver-cbc-serpent-avx2)", .alg = "cryptd(__driver-cbc-serpent-avx2)",
.test = alg_test_null, .test = alg_test_null,
...@@ -1990,6 +1999,9 @@ static const struct alg_test_desc alg_test_descs[] = { ...@@ -1990,6 +1999,9 @@ static const struct alg_test_desc alg_test_descs[] = {
}, { }, {
.alg = "cryptd(__driver-ecb-camellia-aesni)", .alg = "cryptd(__driver-ecb-camellia-aesni)",
.test = alg_test_null, .test = alg_test_null,
}, {
.alg = "cryptd(__driver-ecb-camellia-aesni-avx2)",
.test = alg_test_null,
}, { }, {
.alg = "cryptd(__driver-ecb-cast5-avx)", .alg = "cryptd(__driver-ecb-cast5-avx)",
.test = alg_test_null, .test = alg_test_null,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment