Commit 1ed55eac authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:

 - Added aesni/avx/x86_64 implementations for camellia.

 - Optimised AVX code for cast5/serpent/twofish/cast6.

 - Fixed vmac bug with unaligned input.

 - Allow compression algorithms in FIPS mode.

 - Optimised crc32c implementation for Intel.

 - Misc fixes.

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (32 commits)
  crypto: caam - Updated SEC-4.0 device tree binding for ERA information.
  crypto: testmgr - remove superfluous initializers for xts(aes)
  crypto: testmgr - allow compression algs in fips mode
  crypto: testmgr - add larger crc32c test vector to test FPU path in crc32c_intel
  crypto: testmgr - clean alg_test_null entries in alg_test_descs[]
  crypto: testmgr - remove fips_allowed flag from camellia-aesni null-tests
  crypto: cast5/cast6 - move lookup tables to shared module
  padata: use __this_cpu_read per-cpu helper
  crypto: s5p-sss - Fix compilation error
  crypto: picoxcell - Add terminating entry for platform_device_id table
  crypto: omap-aes - select BLKCIPHER2
  crypto: camellia - add AES-NI/AVX/x86_64 assembler implementation of camellia cipher
  crypto: camellia-x86_64 - share common functions and move structures and function definitions to header file
  crypto: tcrypt - add async speed test for camellia cipher
  crypto: tegra-aes - fix error-valued pointer dereference
  crypto: tegra - fix missing unlock on error case
  crypto: cast5/avx - avoid using temporary stack buffers
  crypto: serpent/avx - avoid using temporary stack buffers
  crypto: twofish/avx - avoid using temporary stack buffers
  crypto: cast6/avx - avoid using temporary stack buffers
  ...
parents 08242bc2 a2c0911c
......@@ -54,7 +54,8 @@ PROPERTIES
- compatible
Usage: required
Value type: <string>
Definition: Must include "fsl,sec-v4.0"
Definition: Must include "fsl,sec-v4.0". Also includes SEC
ERA versions (optional) with which the device is compatible.
- #address-cells
Usage: required
......@@ -106,7 +107,7 @@ PROPERTIES
EXAMPLE
crypto@300000 {
compatible = "fsl,sec-v4.0";
compatible = "fsl,sec-v4.0", "fsl,sec-era-v2.0";
#address-cells = <1>;
#size-cells = <1>;
reg = <0x300000 0x10000>;
......
......@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
......@@ -34,6 +35,8 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
camellia_aesni_avx_glue.o
cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
......@@ -47,3 +50,5 @@ serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
crc32c-intel-y := crc32c-intel_glue.o
crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
/*
* x86_64/AVX/AES-NI assembler implementation of Camellia
*
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
/*
* Version licensed under 2-clause BSD License is available at:
* http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
*/
#define CAMELLIA_TABLE_BYTE_LEN 272
/* struct camellia_ctx: */
#define key_table 0
#define key_length CAMELLIA_TABLE_BYTE_LEN
/* register macros */
#define CTX %rdi
/**********************************************************************
16-way camellia
**********************************************************************/
#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
vpand x, mask4bit, tmp0; \
vpandn x, mask4bit, x; \
vpsrld $4, x, x; \
\
vpshufb tmp0, lo_t, tmp0; \
vpshufb x, hi_t, x; \
vpxor tmp0, x, x;
/*
* IN:
* x0..x7: byte-sliced AB state
* mem_cd: register pointer storing CD state
* key: index for key material
* OUT:
* x0..x7: new byte-sliced CD state
*/
#define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \
t7, mem_cd, key) \
/* \
* S-function with AES subbytes \
*/ \
vmovdqa .Linv_shift_row, t4; \
vbroadcastss .L0f0f0f0f, t7; \
vmovdqa .Lpre_tf_lo_s1, t0; \
vmovdqa .Lpre_tf_hi_s1, t1; \
\
/* AES inverse shift rows */ \
vpshufb t4, x0, x0; \
vpshufb t4, x7, x7; \
vpshufb t4, x1, x1; \
vpshufb t4, x4, x4; \
vpshufb t4, x2, x2; \
vpshufb t4, x5, x5; \
vpshufb t4, x3, x3; \
vpshufb t4, x6, x6; \
\
/* prefilter sboxes 1, 2 and 3 */ \
vmovdqa .Lpre_tf_lo_s4, t2; \
vmovdqa .Lpre_tf_hi_s4, t3; \
filter_8bit(x0, t0, t1, t7, t6); \
filter_8bit(x7, t0, t1, t7, t6); \
filter_8bit(x1, t0, t1, t7, t6); \
filter_8bit(x4, t0, t1, t7, t6); \
filter_8bit(x2, t0, t1, t7, t6); \
filter_8bit(x5, t0, t1, t7, t6); \
\
/* prefilter sbox 4 */ \
vpxor t4, t4, t4; \
filter_8bit(x3, t2, t3, t7, t6); \
filter_8bit(x6, t2, t3, t7, t6); \
\
/* AES subbytes + AES shift rows */ \
vmovdqa .Lpost_tf_lo_s1, t0; \
vmovdqa .Lpost_tf_hi_s1, t1; \
vaesenclast t4, x0, x0; \
vaesenclast t4, x7, x7; \
vaesenclast t4, x1, x1; \
vaesenclast t4, x4, x4; \
vaesenclast t4, x2, x2; \
vaesenclast t4, x5, x5; \
vaesenclast t4, x3, x3; \
vaesenclast t4, x6, x6; \
\
/* postfilter sboxes 1 and 4 */ \
vmovdqa .Lpost_tf_lo_s3, t2; \
vmovdqa .Lpost_tf_hi_s3, t3; \
filter_8bit(x0, t0, t1, t7, t6); \
filter_8bit(x7, t0, t1, t7, t6); \
filter_8bit(x3, t0, t1, t7, t6); \
filter_8bit(x6, t0, t1, t7, t6); \
\
/* postfilter sbox 3 */ \
vmovdqa .Lpost_tf_lo_s2, t4; \
vmovdqa .Lpost_tf_hi_s2, t5; \
filter_8bit(x2, t2, t3, t7, t6); \
filter_8bit(x5, t2, t3, t7, t6); \
\
vpxor t6, t6, t6; \
vmovq key, t0; \
\
/* postfilter sbox 2 */ \
filter_8bit(x1, t4, t5, t7, t2); \
filter_8bit(x4, t4, t5, t7, t2); \
\
vpsrldq $5, t0, t5; \
vpsrldq $1, t0, t1; \
vpsrldq $2, t0, t2; \
vpsrldq $3, t0, t3; \
vpsrldq $4, t0, t4; \
vpshufb t6, t0, t0; \
vpshufb t6, t1, t1; \
vpshufb t6, t2, t2; \
vpshufb t6, t3, t3; \
vpshufb t6, t4, t4; \
vpsrldq $2, t5, t7; \
vpshufb t6, t7, t7; \
\
/* \
* P-function \
*/ \
vpxor x5, x0, x0; \
vpxor x6, x1, x1; \
vpxor x7, x2, x2; \
vpxor x4, x3, x3; \
\
vpxor x2, x4, x4; \
vpxor x3, x5, x5; \
vpxor x0, x6, x6; \
vpxor x1, x7, x7; \
\
vpxor x7, x0, x0; \
vpxor x4, x1, x1; \
vpxor x5, x2, x2; \
vpxor x6, x3, x3; \
\
vpxor x3, x4, x4; \
vpxor x0, x5, x5; \
vpxor x1, x6, x6; \
vpxor x2, x7, x7; /* note: high and low parts swapped */ \
\
/* \
* Add key material and result to CD (x becomes new CD) \
*/ \
\
vpxor t3, x4, x4; \
vpxor 0 * 16(mem_cd), x4, x4; \
\
vpxor t2, x5, x5; \
vpxor 1 * 16(mem_cd), x5, x5; \
\
vpsrldq $1, t5, t3; \
vpshufb t6, t5, t5; \
vpshufb t6, t3, t6; \
\
vpxor t1, x6, x6; \
vpxor 2 * 16(mem_cd), x6, x6; \
\
vpxor t0, x7, x7; \
vpxor 3 * 16(mem_cd), x7, x7; \
\
vpxor t7, x0, x0; \
vpxor 4 * 16(mem_cd), x0, x0; \
\
vpxor t6, x1, x1; \
vpxor 5 * 16(mem_cd), x1, x1; \
\
vpxor t5, x2, x2; \
vpxor 6 * 16(mem_cd), x2, x2; \
\
vpxor t4, x3, x3; \
vpxor 7 * 16(mem_cd), x3, x3;
/*
* Size optimization... with inlined roundsm16, binary would be over 5 times
* larger and would only be 0.5% faster (on sandy-bridge).
*/
.align 8
roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
%rcx, (%r9));
ret;
.align 8
roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
%xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
%rax, (%r9));
ret;
/*
* IN/OUT:
* x0..x7: byte-sliced AB state preloaded
* mem_ab: byte-sliced AB state in memory
* mem_cb: byte-sliced CD state in memory
*/
#define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
leaq (key_table + (i) * 8)(CTX), %r9; \
call roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \
\
vmovdqu x4, 0 * 16(mem_cd); \
vmovdqu x5, 1 * 16(mem_cd); \
vmovdqu x6, 2 * 16(mem_cd); \
vmovdqu x7, 3 * 16(mem_cd); \
vmovdqu x0, 4 * 16(mem_cd); \
vmovdqu x1, 5 * 16(mem_cd); \
vmovdqu x2, 6 * 16(mem_cd); \
vmovdqu x3, 7 * 16(mem_cd); \
\
leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \
call roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \
\
store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
/* Store new AB state */ \
vmovdqu x0, 0 * 16(mem_ab); \
vmovdqu x1, 1 * 16(mem_ab); \
vmovdqu x2, 2 * 16(mem_ab); \
vmovdqu x3, 3 * 16(mem_ab); \
vmovdqu x4, 4 * 16(mem_ab); \
vmovdqu x5, 5 * 16(mem_ab); \
vmovdqu x6, 6 * 16(mem_ab); \
vmovdqu x7, 7 * 16(mem_ab);
#define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd, i) \
two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
#define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd, i) \
two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
/*
* IN:
* v0..3: byte-sliced 32-bit integers
* OUT:
* v0..3: (IN <<< 1)
*/
#define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \
vpcmpgtb v0, zero, t0; \
vpaddb v0, v0, v0; \
vpabsb t0, t0; \
\
vpcmpgtb v1, zero, t1; \
vpaddb v1, v1, v1; \
vpabsb t1, t1; \
\
vpcmpgtb v2, zero, t2; \
vpaddb v2, v2, v2; \
vpabsb t2, t2; \
\
vpor t0, v1, v1; \
\
vpcmpgtb v3, zero, t0; \
vpaddb v3, v3, v3; \
vpabsb t0, t0; \
\
vpor t1, v2, v2; \
vpor t2, v3, v3; \
vpor t0, v0, v0;
/*
* IN:
* r: byte-sliced AB state in memory
* l: byte-sliced CD state in memory
* OUT:
* x0..x7: new byte-sliced CD state
*/
#define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
tt1, tt2, tt3, kll, klr, krl, krr) \
/* \
* t0 = kll; \
* t0 &= ll; \
* lr ^= rol32(t0, 1); \
*/ \
vpxor tt0, tt0, tt0; \
vmovd kll, t0; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t0; \
\
vpand l0, t0, t0; \
vpand l1, t1, t1; \
vpand l2, t2, t2; \
vpand l3, t3, t3; \
\
rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
\
vpxor l4, t0, l4; \
vmovdqu l4, 4 * 16(l); \
vpxor l5, t1, l5; \
vmovdqu l5, 5 * 16(l); \
vpxor l6, t2, l6; \
vmovdqu l6, 6 * 16(l); \
vpxor l7, t3, l7; \
vmovdqu l7, 7 * 16(l); \
\
/* \
* t2 = krr; \
* t2 |= rr; \
* rl ^= t2; \
*/ \
\
vmovd krr, t0; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t0; \
\
vpor 4 * 16(r), t0, t0; \
vpor 5 * 16(r), t1, t1; \
vpor 6 * 16(r), t2, t2; \
vpor 7 * 16(r), t3, t3; \
\
vpxor 0 * 16(r), t0, t0; \
vpxor 1 * 16(r), t1, t1; \
vpxor 2 * 16(r), t2, t2; \
vpxor 3 * 16(r), t3, t3; \
vmovdqu t0, 0 * 16(r); \
vmovdqu t1, 1 * 16(r); \
vmovdqu t2, 2 * 16(r); \
vmovdqu t3, 3 * 16(r); \
\
/* \
* t2 = krl; \
* t2 &= rl; \
* rr ^= rol32(t2, 1); \
*/ \
vmovd krl, t0; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t0; \
\
vpand 0 * 16(r), t0, t0; \
vpand 1 * 16(r), t1, t1; \
vpand 2 * 16(r), t2, t2; \
vpand 3 * 16(r), t3, t3; \
\
rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
\
vpxor 4 * 16(r), t0, t0; \
vpxor 5 * 16(r), t1, t1; \
vpxor 6 * 16(r), t2, t2; \
vpxor 7 * 16(r), t3, t3; \
vmovdqu t0, 4 * 16(r); \
vmovdqu t1, 5 * 16(r); \
vmovdqu t2, 6 * 16(r); \
vmovdqu t3, 7 * 16(r); \
\
/* \
* t0 = klr; \
* t0 |= lr; \
* ll ^= t0; \
*/ \
\
vmovd klr, t0; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpshufb tt0, t0, t0; \
\
vpor l4, t0, t0; \
vpor l5, t1, t1; \
vpor l6, t2, t2; \
vpor l7, t3, t3; \
\
vpxor l0, t0, l0; \
vmovdqu l0, 0 * 16(l); \
vpxor l1, t1, l1; \
vmovdqu l1, 1 * 16(l); \
vpxor l2, t2, l2; \
vmovdqu l2, 2 * 16(l); \
vpxor l3, t3, l3; \
vmovdqu l3, 3 * 16(l);
#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
vpunpckhdq x1, x0, t2; \
vpunpckldq x1, x0, x0; \
\
vpunpckldq x3, x2, t1; \
vpunpckhdq x3, x2, x2; \
\
vpunpckhqdq t1, x0, x1; \
vpunpcklqdq t1, x0, x0; \
\
vpunpckhqdq x2, t2, x3; \
vpunpcklqdq x2, t2, x2;
#define byteslice_16x16b(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, \
b3, c3, d3, st0, st1) \
vmovdqu d2, st0; \
vmovdqu d3, st1; \
transpose_4x4(a0, a1, a2, a3, d2, d3); \
transpose_4x4(b0, b1, b2, b3, d2, d3); \
vmovdqu st0, d2; \
vmovdqu st1, d3; \
\
vmovdqu a0, st0; \
vmovdqu a1, st1; \
transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \
\
vmovdqu .Lshufb_16x16b, a0; \
vmovdqu st1, a1; \
vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \
vpshufb a0, b0, b0; \
vpshufb a0, b1, b1; \
vpshufb a0, b2, b2; \
vpshufb a0, b3, b3; \
vpshufb a0, a1, a1; \
vpshufb a0, c0, c0; \
vpshufb a0, c1, c1; \
vpshufb a0, c2, c2; \
vpshufb a0, c3, c3; \
vpshufb a0, d0, d0; \
vpshufb a0, d1, d1; \
vpshufb a0, d2, d2; \
vpshufb a0, d3, d3; \
vmovdqu d3, st1; \
vmovdqu st0, d3; \
vpshufb a0, d3, a0; \
vmovdqu d2, st0; \
\
transpose_4x4(a0, b0, c0, d0, d2, d3); \
transpose_4x4(a1, b1, c1, d1, d2, d3); \
vmovdqu st0, d2; \
vmovdqu st1, d3; \
\
vmovdqu b0, st0; \
vmovdqu b1, st1; \
transpose_4x4(a2, b2, c2, d2, b0, b1); \
transpose_4x4(a3, b3, c3, d3, b0, b1); \
vmovdqu st0, b0; \
vmovdqu st1, b1; \
/* does not adjust output bytes inside vectors */
/* load blocks to registers and apply pre-whitening */
#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, rio, key) \
vmovq key, x0; \
vpshufb .Lpack_bswap, x0, x0; \
\
vpxor 0 * 16(rio), x0, y7; \
vpxor 1 * 16(rio), x0, y6; \
vpxor 2 * 16(rio), x0, y5; \
vpxor 3 * 16(rio), x0, y4; \
vpxor 4 * 16(rio), x0, y3; \
vpxor 5 * 16(rio), x0, y2; \
vpxor 6 * 16(rio), x0, y1; \
vpxor 7 * 16(rio), x0, y0; \
vpxor 8 * 16(rio), x0, x7; \
vpxor 9 * 16(rio), x0, x6; \
vpxor 10 * 16(rio), x0, x5; \
vpxor 11 * 16(rio), x0, x4; \
vpxor 12 * 16(rio), x0, x3; \
vpxor 13 * 16(rio), x0, x2; \
vpxor 14 * 16(rio), x0, x1; \
vpxor 15 * 16(rio), x0, x0;
/* byteslice pre-whitened blocks and store to temporary memory */
#define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, mem_ab, mem_cd) \
byteslice_16x16b(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
y5, y6, y7, (mem_ab), (mem_cd)); \
\
vmovdqu x0, 0 * 16(mem_ab); \
vmovdqu x1, 1 * 16(mem_ab); \
vmovdqu x2, 2 * 16(mem_ab); \
vmovdqu x3, 3 * 16(mem_ab); \
vmovdqu x4, 4 * 16(mem_ab); \
vmovdqu x5, 5 * 16(mem_ab); \
vmovdqu x6, 6 * 16(mem_ab); \
vmovdqu x7, 7 * 16(mem_ab); \
vmovdqu y0, 0 * 16(mem_cd); \
vmovdqu y1, 1 * 16(mem_cd); \
vmovdqu y2, 2 * 16(mem_cd); \
vmovdqu y3, 3 * 16(mem_cd); \
vmovdqu y4, 4 * 16(mem_cd); \
vmovdqu y5, 5 * 16(mem_cd); \
vmovdqu y6, 6 * 16(mem_cd); \
vmovdqu y7, 7 * 16(mem_cd);
/* de-byteslice, apply post-whitening and store blocks */
#define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
y5, y6, y7, key, stack_tmp0, stack_tmp1) \
byteslice_16x16b(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, y3, \
y7, x3, x7, stack_tmp0, stack_tmp1); \
\
vmovdqu x0, stack_tmp0; \
\
vmovq key, x0; \
vpshufb .Lpack_bswap, x0, x0; \
\
vpxor x0, y7, y7; \
vpxor x0, y6, y6; \
vpxor x0, y5, y5; \
vpxor x0, y4, y4; \
vpxor x0, y3, y3; \
vpxor x0, y2, y2; \
vpxor x0, y1, y1; \
vpxor x0, y0, y0; \
vpxor x0, x7, x7; \
vpxor x0, x6, x6; \
vpxor x0, x5, x5; \
vpxor x0, x4, x4; \
vpxor x0, x3, x3; \
vpxor x0, x2, x2; \
vpxor x0, x1, x1; \
vpxor stack_tmp0, x0, x0;
#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, rio) \
vmovdqu x0, 0 * 16(rio); \
vmovdqu x1, 1 * 16(rio); \
vmovdqu x2, 2 * 16(rio); \
vmovdqu x3, 3 * 16(rio); \
vmovdqu x4, 4 * 16(rio); \
vmovdqu x5, 5 * 16(rio); \
vmovdqu x6, 6 * 16(rio); \
vmovdqu x7, 7 * 16(rio); \
vmovdqu y0, 8 * 16(rio); \
vmovdqu y1, 9 * 16(rio); \
vmovdqu y2, 10 * 16(rio); \
vmovdqu y3, 11 * 16(rio); \
vmovdqu y4, 12 * 16(rio); \
vmovdqu y5, 13 * 16(rio); \
vmovdqu y6, 14 * 16(rio); \
vmovdqu y7, 15 * 16(rio);
.data
.align 16
#define SHUFB_BYTES(idx) \
0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
.Lshufb_16x16b:
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3);
.Lpack_bswap:
.long 0x00010203
.long 0x04050607
.long 0x80808080
.long 0x80808080
/* For CTR-mode IV byteswap */
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
/*
* pre-SubByte transform
*
* pre-lookup for sbox1, sbox2, sbox3:
* swap_bitendianness(
* isom_map_camellia_to_aes(
* camellia_f(
* swap_bitendianess(in)
* )
* )
* )
*
* (note: '⊕ 0xc5' inside camellia_f())
*/
.Lpre_tf_lo_s1:
.byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
.byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
.Lpre_tf_hi_s1:
.byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
.byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
/*
* pre-SubByte transform
*
* pre-lookup for sbox4:
* swap_bitendianness(
* isom_map_camellia_to_aes(
* camellia_f(
* swap_bitendianess(in <<< 1)
* )
* )
* )
*
* (note: '⊕ 0xc5' inside camellia_f())
*/
.Lpre_tf_lo_s4:
.byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
.byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
.Lpre_tf_hi_s4:
.byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
.byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
/*
* post-SubByte transform
*
* post-lookup for sbox1, sbox4:
* swap_bitendianness(
* camellia_h(
* isom_map_aes_to_camellia(
* swap_bitendianness(
* aes_inverse_affine_transform(in)
* )
* )
* )
* )
*
* (note: '⊕ 0x6e' inside camellia_h())
*/
.Lpost_tf_lo_s1:
.byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
.byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
.Lpost_tf_hi_s1:
.byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
.byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
/*
* post-SubByte transform
*
* post-lookup for sbox2:
* swap_bitendianness(
* camellia_h(
* isom_map_aes_to_camellia(
* swap_bitendianness(
* aes_inverse_affine_transform(in)
* )
* )
* )
* ) <<< 1
*
* (note: '⊕ 0x6e' inside camellia_h())
*/
.Lpost_tf_lo_s2:
.byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
.byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
.Lpost_tf_hi_s2:
.byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
.byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
/*
* post-SubByte transform
*
* post-lookup for sbox3:
* swap_bitendianness(
* camellia_h(
* isom_map_aes_to_camellia(
* swap_bitendianness(
* aes_inverse_affine_transform(in)
* )
* )
* )
* ) >>> 1
*
* (note: '⊕ 0x6e' inside camellia_h())
*/
.Lpost_tf_lo_s3:
.byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
.byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
.Lpost_tf_hi_s3:
.byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
.byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
/* For isolating SubBytes from AESENCLAST, inverse shift row */
.Linv_shift_row:
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
/* 4-bit mask */
.align 4
.L0f0f0f0f:
.long 0x0f0f0f0f
.text
.align 8
.type __camellia_enc_blk16,@function;
__camellia_enc_blk16:
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 256 bytes
* %xmm0..%xmm15: 16 plaintext blocks
* output:
* %xmm0..%xmm15: 16 encrypted blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/
leaq 8 * 16(%rax), %rcx;
inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx);
enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx, 0);
fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15,
((key_table + (8) * 8) + 0)(CTX),
((key_table + (8) * 8) + 4)(CTX),
((key_table + (8) * 8) + 8)(CTX),
((key_table + (8) * 8) + 12)(CTX));
enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx, 8);
fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15,
((key_table + (16) * 8) + 0)(CTX),
((key_table + (16) * 8) + 4)(CTX),
((key_table + (16) * 8) + 8)(CTX),
((key_table + (16) * 8) + 12)(CTX));
enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx, 16);
movl $24, %r8d;
cmpl $16, key_length(CTX);
jne .Lenc_max32;
.Lenc_done:
/* load CD for output */
vmovdqu 0 * 16(%rcx), %xmm8;
vmovdqu 1 * 16(%rcx), %xmm9;
vmovdqu 2 * 16(%rcx), %xmm10;
vmovdqu 3 * 16(%rcx), %xmm11;
vmovdqu 4 * 16(%rcx), %xmm12;
vmovdqu 5 * 16(%rcx), %xmm13;
vmovdqu 6 * 16(%rcx), %xmm14;
vmovdqu 7 * 16(%rcx), %xmm15;
outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
ret;
.align 8
.Lenc_max32:
movl $32, %r8d;
fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15,
((key_table + (24) * 8) + 0)(CTX),
((key_table + (24) * 8) + 4)(CTX),
((key_table + (24) * 8) + 8)(CTX),
((key_table + (24) * 8) + 12)(CTX));
enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx, 24);
jmp .Lenc_done;
.align 8
.type __camellia_dec_blk16,@function;
__camellia_dec_blk16:
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 256 bytes
* %r8d: 24 for 16 byte key, 32 for larger
* %xmm0..%xmm15: 16 encrypted blocks
* output:
* %xmm0..%xmm15: 16 plaintext blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/
leaq 8 * 16(%rax), %rcx;
inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx);
cmpl $32, %r8d;
je .Ldec_max32;
.Ldec_max24:
dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx, 16);
fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15,
((key_table + (16) * 8) + 8)(CTX),
((key_table + (16) * 8) + 12)(CTX),
((key_table + (16) * 8) + 0)(CTX),
((key_table + (16) * 8) + 4)(CTX));
dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx, 8);
fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15,
((key_table + (8) * 8) + 8)(CTX),
((key_table + (8) * 8) + 12)(CTX),
((key_table + (8) * 8) + 0)(CTX),
((key_table + (8) * 8) + 4)(CTX));
dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx, 0);
/* load CD for output */
vmovdqu 0 * 16(%rcx), %xmm8;
vmovdqu 1 * 16(%rcx), %xmm9;
vmovdqu 2 * 16(%rcx), %xmm10;
vmovdqu 3 * 16(%rcx), %xmm11;
vmovdqu 4 * 16(%rcx), %xmm12;
vmovdqu 5 * 16(%rcx), %xmm13;
vmovdqu 6 * 16(%rcx), %xmm14;
vmovdqu 7 * 16(%rcx), %xmm15;
outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
ret;
.align 8
.Ldec_max32:
dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rax, %rcx, 24);
fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15,
((key_table + (24) * 8) + 8)(CTX),
((key_table + (24) * 8) + 12)(CTX),
((key_table + (24) * 8) + 0)(CTX),
((key_table + (24) * 8) + 4)(CTX));
jmp .Ldec_max24;
.align 8
.global camellia_ecb_enc_16way
.type camellia_ecb_enc_16way,@function;
camellia_ecb_enc_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
*/
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rdx, (key_table)(CTX));
/* now dst can be used as temporary buffer (even in src == dst case) */
movq %rsi, %rax;
call __camellia_enc_blk16;
write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi);
ret;
.align 8
.global camellia_ecb_dec_16way
.type camellia_ecb_dec_16way,@function;
camellia_ecb_dec_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
*/
cmpl $16, key_length(CTX);
movl $32, %r8d;
movl $24, %eax;
cmovel %eax, %r8d; /* max */
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rdx, (key_table)(CTX, %r8, 8));
/* now dst can be used as temporary buffer (even in src == dst case) */
movq %rsi, %rax;
call __camellia_dec_blk16;
write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi);
ret;
.align 8
.global camellia_cbc_dec_16way
.type camellia_cbc_dec_16way,@function;
camellia_cbc_dec_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
*/
cmpl $16, key_length(CTX);
movl $32, %r8d;
movl $24, %eax;
cmovel %eax, %r8d; /* max */
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, %rdx, (key_table)(CTX, %r8, 8));
/*
* dst might still be in-use (in case dst == src), so use stack for
* temporary storage.
*/
subq $(16 * 16), %rsp;
movq %rsp, %rax;
call __camellia_dec_blk16;
addq $(16 * 16), %rsp;
vpxor (0 * 16)(%rdx), %xmm6, %xmm6;
vpxor (1 * 16)(%rdx), %xmm5, %xmm5;
vpxor (2 * 16)(%rdx), %xmm4, %xmm4;
vpxor (3 * 16)(%rdx), %xmm3, %xmm3;
vpxor (4 * 16)(%rdx), %xmm2, %xmm2;
vpxor (5 * 16)(%rdx), %xmm1, %xmm1;
vpxor (6 * 16)(%rdx), %xmm0, %xmm0;
vpxor (7 * 16)(%rdx), %xmm15, %xmm15;
vpxor (8 * 16)(%rdx), %xmm14, %xmm14;
vpxor (9 * 16)(%rdx), %xmm13, %xmm13;
vpxor (10 * 16)(%rdx), %xmm12, %xmm12;
vpxor (11 * 16)(%rdx), %xmm11, %xmm11;
vpxor (12 * 16)(%rdx), %xmm10, %xmm10;
vpxor (13 * 16)(%rdx), %xmm9, %xmm9;
vpxor (14 * 16)(%rdx), %xmm8, %xmm8;
write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi);
ret;
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
vpsubq minus_one, x, x; \
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
.align 8
.global camellia_ctr_16way
.type camellia_ctr_16way,@function;
camellia_ctr_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (little endian, 128bit)
*/
subq $(16 * 16), %rsp;
movq %rsp, %rax;
vmovdqa .Lbswap128_mask, %xmm14;
/* load IV and byteswap */
vmovdqu (%rcx), %xmm0;
vpshufb %xmm14, %xmm0, %xmm15;
vmovdqu %xmm15, 15 * 16(%rax);
vpcmpeqd %xmm15, %xmm15, %xmm15;
vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */
/* construct IVs */
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm13;
vmovdqu %xmm13, 14 * 16(%rax);
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm13;
vmovdqu %xmm13, 13 * 16(%rax);
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm12;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm11;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm10;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm9;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm8;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm7;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm6;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm5;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm4;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm3;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm2;
inc_le128(%xmm0, %xmm15, %xmm13);
vpshufb %xmm14, %xmm0, %xmm1;
inc_le128(%xmm0, %xmm15, %xmm13);
vmovdqa %xmm0, %xmm13;
vpshufb %xmm14, %xmm0, %xmm0;
inc_le128(%xmm13, %xmm15, %xmm14);
vmovdqu %xmm13, (%rcx);
/* inpack16_pre: */
vmovq (key_table)(CTX), %xmm15;
vpshufb .Lpack_bswap, %xmm15, %xmm15;
vpxor %xmm0, %xmm15, %xmm0;
vpxor %xmm1, %xmm15, %xmm1;
vpxor %xmm2, %xmm15, %xmm2;
vpxor %xmm3, %xmm15, %xmm3;
vpxor %xmm4, %xmm15, %xmm4;
vpxor %xmm5, %xmm15, %xmm5;
vpxor %xmm6, %xmm15, %xmm6;
vpxor %xmm7, %xmm15, %xmm7;
vpxor %xmm8, %xmm15, %xmm8;
vpxor %xmm9, %xmm15, %xmm9;
vpxor %xmm10, %xmm15, %xmm10;
vpxor %xmm11, %xmm15, %xmm11;
vpxor %xmm12, %xmm15, %xmm12;
vpxor 13 * 16(%rax), %xmm15, %xmm13;
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
call __camellia_enc_blk16;
addq $(16 * 16), %rsp;
vpxor 0 * 16(%rdx), %xmm7, %xmm7;
vpxor 1 * 16(%rdx), %xmm6, %xmm6;
vpxor 2 * 16(%rdx), %xmm5, %xmm5;
vpxor 3 * 16(%rdx), %xmm4, %xmm4;
vpxor 4 * 16(%rdx), %xmm3, %xmm3;
vpxor 5 * 16(%rdx), %xmm2, %xmm2;
vpxor 6 * 16(%rdx), %xmm1, %xmm1;
vpxor 7 * 16(%rdx), %xmm0, %xmm0;
vpxor 8 * 16(%rdx), %xmm15, %xmm15;
vpxor 9 * 16(%rdx), %xmm14, %xmm14;
vpxor 10 * 16(%rdx), %xmm13, %xmm13;
vpxor 11 * 16(%rdx), %xmm12, %xmm12;
vpxor 12 * 16(%rdx), %xmm11, %xmm11;
vpxor 13 * 16(%rdx), %xmm10, %xmm10;
vpxor 14 * 16(%rdx), %xmm9, %xmm9;
vpxor 15 * 16(%rdx), %xmm8, %xmm8;
write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi);
ret;
/*
* Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia
*
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
/* 16-way AES-NI parallel cipher functions */
asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
} }
};
static const struct common_glue_ctx camellia_ctr = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
} }
};
static const struct common_glue_ctx camellia_dec = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_cbc = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
}
static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
nbytes);
}
static inline void camellia_fpu_end(bool fpu_enabled)
{
glue_fpu_end(fpu_enabled);
}
static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
&tfm->crt_flags);
}
struct crypt_priv {
struct camellia_ctx *ctx;
bool fpu_enabled;
};
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->camellia_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->camellia_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ecb-camellia-aesni",
.cra_driver_name = "__driver-ecb-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-camellia-aesni",
.cra_driver_name = "__driver-cbc-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-camellia-aesni",
.cra_driver_name = "__driver-ctr-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = camellia_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "__lrw-camellia-aesni",
.cra_driver_name = "__driver-lrw-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_exit = lrw_camellia_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = lrw_camellia_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-camellia-aesni",
.cra_driver_name = "__driver-xts-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = xts_camellia_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "ecb(camellia)",
.cra_driver_name = "ecb-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(camellia)",
.cra_driver_name = "cbc-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(camellia)",
.cra_driver_name = "ctr-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
}, {
.cra_name = "lrw(camellia)",
.cra_driver_name = "lrw-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "xts(camellia)",
.cra_driver_name = "xts-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
} };
static int __init camellia_aesni_init(void)
{
u64 xcr0;
if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
pr_info("AVX or AES-NI instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
}
static void __exit camellia_aesni_fini(void)
{
crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
}
module_init(camellia_aesni_init);
module_exit(camellia_aesni_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized");
MODULE_ALIAS("camellia");
MODULE_ALIAS("camellia-asm");
......@@ -32,53 +32,24 @@
#include <crypto/algapi.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
#define CAMELLIA_MIN_KEY_SIZE 16
#define CAMELLIA_MAX_KEY_SIZE 32
#define CAMELLIA_BLOCK_SIZE 16
#define CAMELLIA_TABLE_BYTE_LEN 272
struct camellia_ctx {
u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
u32 key_length;
};
/* regular block cipher functions */
asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
EXPORT_SYMBOL_GPL(__camellia_enc_blk);
asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(camellia_dec_blk);
/* 2-way parallel cipher functions */
asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way);
asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk(ctx, dst, src, false);
}
static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk(ctx, dst, src, true);
}
static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk_2way(ctx, dst, src, false);
}
static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk_2way(ctx, dst, src, true);
}
EXPORT_SYMBOL_GPL(camellia_dec_blk_2way);
static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
......@@ -1275,8 +1246,7 @@ static void camellia_setup192(const unsigned char *key, u64 *subkey)
camellia_setup256(kk, subkey);
}
static int __camellia_setkey(struct camellia_ctx *cctx,
const unsigned char *key,
int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key,
unsigned int key_len, u32 *flags)
{
if (key_len != 16 && key_len != 24 && key_len != 32) {
......@@ -1300,6 +1270,7 @@ static int __camellia_setkey(struct camellia_ctx *cctx,
return 0;
}
EXPORT_SYMBOL_GPL(__camellia_setkey);
static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
......@@ -1308,7 +1279,7 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
&tfm->crt_flags);
}
static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
{
u128 iv = *src;
......@@ -1316,22 +1287,23 @@ static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
u128_xor(&dst[1], &dst[1], &iv);
}
EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way);
static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
if (dst != src)
*dst = *src;
u128_to_be128(&ctrblk, iv);
u128_inc(iv);
le128_to_be128(&ctrblk, iv);
le128_inc(iv);
camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
}
EXPORT_SYMBOL_GPL(camellia_crypt_ctr);
static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblks[2];
......@@ -1340,13 +1312,14 @@ static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
dst[1] = src[1];
}
u128_to_be128(&ctrblks[0], iv);
u128_inc(iv);
u128_to_be128(&ctrblks[1], iv);
u128_inc(iv);
le128_to_be128(&ctrblks[0], iv);
le128_inc(iv);
le128_to_be128(&ctrblks[1], iv);
le128_inc(iv);
camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
EXPORT_SYMBOL_GPL(camellia_crypt_ctr_2way);
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 2,
......@@ -1464,12 +1437,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
camellia_dec_blk(ctx, srcdst, srcdst);
}
struct camellia_lrw_ctx {
struct lrw_table_ctx lrw_table;
struct camellia_ctx camellia_ctx;
};
static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
......@@ -1484,6 +1452,7 @@ static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
return lrw_init_table(&ctx->lrw_table,
key + keylen - CAMELLIA_BLOCK_SIZE);
}
EXPORT_SYMBOL_GPL(lrw_camellia_setkey);
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
......@@ -1519,19 +1488,15 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return lrw_crypt(desc, dst, src, nbytes, &req);
}
static void lrw_exit_tfm(struct crypto_tfm *tfm)
void lrw_camellia_exit_tfm(struct crypto_tfm *tfm)
{
struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm);
struct camellia_xts_ctx {
struct camellia_ctx tweak_ctx;
struct camellia_ctx crypt_ctx;
};
static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
......@@ -1555,6 +1520,7 @@ static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
flags);
}
EXPORT_SYMBOL_GPL(xts_camellia_setkey);
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
......@@ -1679,7 +1645,7 @@ static struct crypto_alg camellia_algs[6] = { {
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_exit = lrw_exit_tfm,
.cra_exit = lrw_camellia_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
......
......@@ -25,10 +25,10 @@
.file "cast5-avx-x86_64-asm_64.S"
.extern cast5_s1
.extern cast5_s2
.extern cast5_s3
.extern cast5_s4
.extern cast_s1
.extern cast_s2
.extern cast_s3
.extern cast_s4
/* structure of crypto context */
#define km 0
......@@ -36,10 +36,10 @@
#define rr ((16*4)+16)
/* s-boxes */
#define s1 cast5_s1
#define s2 cast5_s2
#define s3 cast5_s3
#define s4 cast5_s4
#define s1 cast_s1
#define s2 cast_s2
#define s3 cast_s3
#define s4 cast_s4
/**********************************************************************
16-way AVX cast5
......@@ -180,31 +180,17 @@
vpunpcklqdq t1, t0, x0; \
vpunpckhqdq t1, t0, x1;
#define inpack_blocks(in, x0, x1, t0, t1, rmask) \
vmovdqu (0*4*4)(in), x0; \
vmovdqu (1*4*4)(in), x1; \
#define inpack_blocks(x0, x1, t0, t1, rmask) \
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
\
transpose_2x4(x0, x1, t0, t1)
#define outunpack_blocks(out, x0, x1, t0, t1, rmask) \
#define outunpack_blocks(x0, x1, t0, t1, rmask) \
transpose_2x4(x0, x1, t0, t1) \
\
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
vmovdqu x0, (0*4*4)(out); \
vmovdqu x1, (1*4*4)(out);
#define outunpack_xor_blocks(out, x0, x1, t0, t1, rmask) \
transpose_2x4(x0, x1, t0, t1) \
\
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
vpxor (0*4*4)(out), x0, x0; \
vmovdqu x0, (0*4*4)(out); \
vpxor (1*4*4)(out), x1, x1; \
vmovdqu x1, (1*4*4)(out);
vpshufb rmask, x1, x1;
.data
......@@ -213,6 +199,8 @@
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lbswap_iv_mask:
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
.L16_mask:
.byte 16, 16, 16, 16
.L32_mask:
......@@ -223,35 +211,42 @@
.text
.align 16
.global __cast5_enc_blk_16way
.type __cast5_enc_blk_16way,@function;
.type __cast5_enc_blk16,@function;
__cast5_enc_blk_16way:
__cast5_enc_blk16:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
* RL1: blocks 1 and 2
* RR1: blocks 3 and 4
* RL2: blocks 5 and 6
* RR2: blocks 7 and 8
* RL3: blocks 9 and 10
* RR3: blocks 11 and 12
* RL4: blocks 13 and 14
* RR4: blocks 15 and 16
* output:
* RL1: encrypted blocks 1 and 2
* RR1: encrypted blocks 3 and 4
* RL2: encrypted blocks 5 and 6
* RR2: encrypted blocks 7 and 8
* RL3: encrypted blocks 9 and 10
* RR3: encrypted blocks 11 and 12
* RL4: encrypted blocks 13 and 14
* RR4: encrypted blocks 15 and 16
*/
pushq %rbp;
pushq %rbx;
pushq %rcx;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
enc_preload_rkr();
leaq 1*(2*4*4)(%rdx), %rax;
inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
leaq 2*(2*4*4)(%rdx), %rax;
inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
leaq 3*(2*4*4)(%rdx), %rax;
inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
movq %rsi, %r11;
inpack_blocks(RL1, RR1, RTMP, RX, RKM);
inpack_blocks(RL2, RR2, RTMP, RX, RKM);
inpack_blocks(RL3, RR3, RTMP, RX, RKM);
inpack_blocks(RL4, RR4, RTMP, RX, RKM);
round(RL, RR, 0, 1);
round(RR, RL, 1, 2);
......@@ -276,44 +271,41 @@ __cast5_enc_blk_16way:
round(RR, RL, 15, 1);
__skip_enc:
popq %rcx;
popq %rbx;
popq %rbp;
vmovdqa .Lbswap_mask, RKM;
leaq 1*(2*4*4)(%r11), %rax;
testb %cl, %cl;
jnz __enc_xor16;
outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
leaq 2*(2*4*4)(%r11), %rax;
outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
leaq 3*(2*4*4)(%r11), %rax;
outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
ret;
__enc_xor16:
outunpack_xor_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
leaq 2*(2*4*4)(%r11), %rax;
outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
leaq 3*(2*4*4)(%r11), %rax;
outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
ret;
.align 16
.global cast5_dec_blk_16way
.type cast5_dec_blk_16way,@function;
.type __cast5_dec_blk16,@function;
cast5_dec_blk_16way:
__cast5_dec_blk16:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* RL1: encrypted blocks 1 and 2
* RR1: encrypted blocks 3 and 4
* RL2: encrypted blocks 5 and 6
* RR2: encrypted blocks 7 and 8
* RL3: encrypted blocks 9 and 10
* RR3: encrypted blocks 11 and 12
* RL4: encrypted blocks 13 and 14
* RR4: encrypted blocks 15 and 16
* output:
* RL1: decrypted blocks 1 and 2
* RR1: decrypted blocks 3 and 4
* RL2: decrypted blocks 5 and 6
* RR2: decrypted blocks 7 and 8
* RL3: decrypted blocks 9 and 10
* RR3: decrypted blocks 11 and 12
* RL4: decrypted blocks 13 and 14
* RR4: decrypted blocks 15 and 16
*/
pushq %rbp;
......@@ -324,15 +316,10 @@ cast5_dec_blk_16way:
vmovd .L32_mask, R32;
dec_preload_rkr();
leaq 1*(2*4*4)(%rdx), %rax;
inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
leaq 2*(2*4*4)(%rdx), %rax;
inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
leaq 3*(2*4*4)(%rdx), %rax;
inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
movq %rsi, %r11;
inpack_blocks(RL1, RR1, RTMP, RX, RKM);
inpack_blocks(RL2, RR2, RTMP, RX, RKM);
inpack_blocks(RL3, RR3, RTMP, RX, RKM);
inpack_blocks(RL4, RR4, RTMP, RX, RKM);
movzbl rr(CTX), %eax;
testl %eax, %eax;
......@@ -361,16 +348,211 @@ __dec_tail:
popq %rbx;
popq %rbp;
leaq 1*(2*4*4)(%r11), %rax;
outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
leaq 2*(2*4*4)(%r11), %rax;
outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
leaq 3*(2*4*4)(%r11), %rax;
outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
ret;
__skip_dec:
vpsrldq $4, RKR, RKR;
jmp __dec_tail;
.align 16
.global cast5_ecb_enc_16way
.type cast5_ecb_enc_16way,@function;
cast5_ecb_enc_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
vmovdqu (1*4*4)(%rdx), RR1;
vmovdqu (2*4*4)(%rdx), RL2;
vmovdqu (3*4*4)(%rdx), RR2;
vmovdqu (4*4*4)(%rdx), RL3;
vmovdqu (5*4*4)(%rdx), RR3;
vmovdqu (6*4*4)(%rdx), RL4;
vmovdqu (7*4*4)(%rdx), RR4;
call __cast5_enc_blk16;
vmovdqu RR1, (0*4*4)(%r11);
vmovdqu RL1, (1*4*4)(%r11);
vmovdqu RR2, (2*4*4)(%r11);
vmovdqu RL2, (3*4*4)(%r11);
vmovdqu RR3, (4*4*4)(%r11);
vmovdqu RL3, (5*4*4)(%r11);
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
ret;
.align 16
.global cast5_ecb_dec_16way
.type cast5_ecb_dec_16way,@function;
cast5_ecb_dec_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
vmovdqu (1*4*4)(%rdx), RR1;
vmovdqu (2*4*4)(%rdx), RL2;
vmovdqu (3*4*4)(%rdx), RR2;
vmovdqu (4*4*4)(%rdx), RL3;
vmovdqu (5*4*4)(%rdx), RR3;
vmovdqu (6*4*4)(%rdx), RL4;
vmovdqu (7*4*4)(%rdx), RR4;
call __cast5_dec_blk16;
vmovdqu RR1, (0*4*4)(%r11);
vmovdqu RL1, (1*4*4)(%r11);
vmovdqu RR2, (2*4*4)(%r11);
vmovdqu RL2, (3*4*4)(%r11);
vmovdqu RR3, (4*4*4)(%r11);
vmovdqu RL3, (5*4*4)(%r11);
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
ret;
.align 16
.global cast5_cbc_dec_16way
.type cast5_cbc_dec_16way,@function;
cast5_cbc_dec_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
vmovdqu (0*16)(%rdx), RL1;
vmovdqu (1*16)(%rdx), RR1;
vmovdqu (2*16)(%rdx), RL2;
vmovdqu (3*16)(%rdx), RR2;
vmovdqu (4*16)(%rdx), RL3;
vmovdqu (5*16)(%rdx), RR3;
vmovdqu (6*16)(%rdx), RL4;
vmovdqu (7*16)(%rdx), RR4;
call __cast5_dec_blk16;
/* xor with src */
vmovq (%r12), RX;
vpshufd $0x4f, RX, RX;
vpxor RX, RR1, RR1;
vpxor 0*16+8(%r12), RL1, RL1;
vpxor 1*16+8(%r12), RR2, RR2;
vpxor 2*16+8(%r12), RL2, RL2;
vpxor 3*16+8(%r12), RR3, RR3;
vpxor 4*16+8(%r12), RL3, RL3;
vpxor 5*16+8(%r12), RR4, RR4;
vpxor 6*16+8(%r12), RL4, RL4;
vmovdqu RR1, (0*16)(%r11);
vmovdqu RL1, (1*16)(%r11);
vmovdqu RR2, (2*16)(%r11);
vmovdqu RL2, (3*16)(%r11);
vmovdqu RR3, (4*16)(%r11);
vmovdqu RL3, (5*16)(%r11);
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
popq %r12;
ret;
.align 16
.global cast5_ctr_16way
.type cast5_ctr_16way,@function;
cast5_ctr_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (big endian, 64bit)
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
vpcmpeqd RTMP, RTMP, RTMP;
vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */
vpcmpeqd RKR, RKR, RKR;
vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */
vmovdqa .Lbswap_iv_mask, R1ST;
vmovdqa .Lbswap128_mask, RKM;
/* load IV and byteswap */
vmovq (%rcx), RX;
vpshufb R1ST, RX, RX;
/* construct IVs */
vpsubq RTMP, RX, RX; /* le: IV1, IV0 */
vpshufb RKM, RX, RL1; /* be: IV0, IV1 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RR1; /* be: IV2, IV3 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RL2; /* be: IV4, IV5 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RR2; /* be: IV6, IV7 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RL3; /* be: IV8, IV9 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RR3; /* be: IV10, IV11 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RL4; /* be: IV12, IV13 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RR4; /* be: IV14, IV15 */
/* store last IV */
vpsubq RTMP, RX, RX; /* le: IV16, IV14 */
vpshufb R1ST, RX, RX; /* be: IV16, IV16 */
vmovq RX, (%rcx);
call __cast5_enc_blk16;
/* dst = src ^ iv */
vpxor (0*16)(%r12), RR1, RR1;
vpxor (1*16)(%r12), RL1, RL1;
vpxor (2*16)(%r12), RR2, RR2;
vpxor (3*16)(%r12), RL2, RL2;
vpxor (4*16)(%r12), RR3, RR3;
vpxor (5*16)(%r12), RL3, RL3;
vpxor (6*16)(%r12), RR4, RR4;
vpxor (7*16)(%r12), RL4, RL4;
vmovdqu RR1, (0*16)(%r11);
vmovdqu RL1, (1*16)(%r11);
vmovdqu RR2, (2*16)(%r11);
vmovdqu RL2, (3*16)(%r11);
vmovdqu RR3, (4*16)(%r11);
vmovdqu RL3, (5*16)(%r11);
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
popq %r12;
ret;
......@@ -37,29 +37,14 @@
#define CAST5_PARALLEL_BLOCKS 16
asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
const u8 *src)
{
__cast5_enc_blk_16way(ctx, dst, src, false);
}
static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
const u8 *src)
{
__cast5_enc_blk_16way(ctx, dst, src, true);
}
static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
const u8 *src)
{
cast5_dec_blk_16way(ctx, dst, src);
}
asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
__be64 *iv);
static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
......@@ -79,8 +64,11 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes;
void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
int err;
fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
err = blkcipher_walk_virt(desc, walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
......@@ -93,10 +81,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
do {
if (enc)
cast5_enc_blk_xway(ctx, wdst, wsrc);
else
cast5_dec_blk_xway(ctx, wdst, wsrc);
fn(ctx, wdst, wsrc);
wsrc += bsize * CAST5_PARALLEL_BLOCKS;
wdst += bsize * CAST5_PARALLEL_BLOCKS;
......@@ -107,12 +92,11 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
goto done;
}
fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
/* Handle leftovers */
do {
if (enc)
__cast5_encrypt(ctx, wdst, wsrc);
else
__cast5_decrypt(ctx, wdst, wsrc);
fn(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
......@@ -194,9 +178,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
u64 last_iv;
int i;
/* Start of the last block. */
src += nbytes / bsize - 1;
......@@ -211,13 +193,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
src -= CAST5_PARALLEL_BLOCKS - 1;
dst -= CAST5_PARALLEL_BLOCKS - 1;
for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
ivs[i] = src[i];
cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
*(dst + (i + 1)) ^= *(ivs + i);
cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
......@@ -298,23 +274,12 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
__be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
int i;
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
do {
/* create ctrblks for parallel encrypt */
for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
ctrblocks[i] = cpu_to_be64(ctrblk++);
}
cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
(u8 *)ctrblocks);
cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
(__be64 *)walk->iv);
src += CAST5_PARALLEL_BLOCKS;
dst += CAST5_PARALLEL_BLOCKS;
......@@ -327,13 +292,16 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
/* Handle leftovers */
do {
u64 ctrblk;
if (dst != src)
*dst = *src;
ctrblocks[0] = cpu_to_be64(ctrblk++);
ctrblk = *(u64 *)walk->iv;
be64_add_cpu((__be64 *)walk->iv, 1);
__cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
*dst ^= ctrblocks[0];
__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
*dst ^= ctrblk;
src += 1;
dst += 1;
......@@ -341,7 +309,6 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
} while (nbytes >= bsize);
done:
*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
return nbytes;
}
......
......@@ -23,22 +23,24 @@
*
*/
#include "glue_helper-asm-avx.S"
.file "cast6-avx-x86_64-asm_64.S"
.extern cast6_s1
.extern cast6_s2
.extern cast6_s3
.extern cast6_s4
.extern cast_s1
.extern cast_s2
.extern cast_s3
.extern cast_s4
/* structure of crypto context */
#define km 0
#define kr (12*4*4)
/* s-boxes */
#define s1 cast6_s1
#define s2 cast6_s2
#define s3 cast6_s3
#define s4 cast6_s4
#define s1 cast_s1
#define s2 cast_s2
#define s3 cast_s3
#define s4 cast_s4
/**********************************************************************
8-way AVX cast6
......@@ -205,11 +207,7 @@
vpunpcklqdq x3, t2, x2; \
vpunpckhqdq x3, t2, x3;
#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2, rmask) \
vmovdqu (0*4*4)(in), x0; \
vmovdqu (1*4*4)(in), x1; \
vmovdqu (2*4*4)(in), x2; \
vmovdqu (3*4*4)(in), x3; \
#define inpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
vpshufb rmask, x2, x2; \
......@@ -217,39 +215,21 @@
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
#define outunpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
vpshufb rmask, x2, x2; \
vpshufb rmask, x3, x3; \
vmovdqu x0, (0*4*4)(out); \
vmovdqu x1, (1*4*4)(out); \
vmovdqu x2, (2*4*4)(out); \
vmovdqu x3, (3*4*4)(out);
#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
vpshufb rmask, x2, x2; \
vpshufb rmask, x3, x3; \
vpxor (0*4*4)(out), x0, x0; \
vmovdqu x0, (0*4*4)(out); \
vpxor (1*4*4)(out), x1, x1; \
vmovdqu x1, (1*4*4)(out); \
vpxor (2*4*4)(out), x2, x2; \
vmovdqu x2, (2*4*4)(out); \
vpxor (3*4*4)(out), x3, x3; \
vmovdqu x3, (3*4*4)(out);
vpshufb rmask, x3, x3;
.data
.align 16
.Lbswap_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lrkr_enc_Q_Q_QBAR_QBAR:
.byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12
.Lrkr_enc_QBAR_QBAR_QBAR_QBAR:
......@@ -269,31 +249,26 @@
.text
.align 16
.global __cast6_enc_blk_8way
.type __cast6_enc_blk_8way,@function;
.align 8
.type __cast6_enc_blk8,@function;
__cast6_enc_blk_8way:
__cast6_enc_blk8:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/
pushq %rbp;
pushq %rbx;
pushq %rcx;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
leaq (4*4*4)(%rdx), %rax;
inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
movq %rsi, %r11;
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
preload_rkr(0, dummy, none);
Q(0);
......@@ -311,36 +286,25 @@ __cast6_enc_blk_8way:
QBAR(10);
QBAR(11);
popq %rcx;
popq %rbx;
popq %rbp;
vmovdqa .Lbswap_mask, RKM;
leaq (4*4*4)(%r11), %rax;
testb %cl, %cl;
jnz __enc_xor8;
outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
ret;
__enc_xor8:
outunpack_xor_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
.align 8
.type __cast6_dec_blk8,@function;
ret;
.align 16
.global cast6_dec_blk_8way
.type cast6_dec_blk_8way,@function;
cast6_dec_blk_8way:
__cast6_dec_blk8:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/
pushq %rbp;
......@@ -350,11 +314,8 @@ cast6_dec_blk_8way:
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
leaq (4*4*4)(%rdx), %rax;
inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
movq %rsi, %r11;
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q);
Q(11);
......@@ -376,8 +337,103 @@ cast6_dec_blk_8way:
popq %rbp;
vmovdqa .Lbswap_mask, RKM;
leaq (4*4*4)(%r11), %rax;
outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
ret;
.align 8
.global cast6_ecb_enc_8way
.type cast6_ecb_enc_8way,@function;
cast6_ecb_enc_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __cast6_enc_blk8;
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
.align 8
.global cast6_ecb_dec_8way
.type cast6_ecb_dec_8way,@function;
cast6_ecb_dec_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __cast6_dec_blk8;
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
.align 8
.global cast6_cbc_dec_8way
.type cast6_cbc_dec_8way,@function;
cast6_cbc_dec_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __cast6_dec_blk8;
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r12;
ret;
.align 8
.global cast6_ctr_8way
.type cast6_ctr_8way,@function;
cast6_ctr_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (little endian, 128bit)
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
RD2, RX, RKR, RKM);
call __cast6_enc_blk8;
store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r12;
ret;
......@@ -40,79 +40,34 @@
#define CAST6_PARALLEL_BLOCKS 8
asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst,
asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst,
const u8 *src)
{
__cast6_enc_blk_8way(ctx, dst, src, false);
}
static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst,
const u8 *src)
{
__cast6_enc_blk_8way(ctx, dst, src, true);
}
static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst,
const u8 *src)
{
cast6_dec_blk_8way(ctx, dst, src);
}
static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
{
u128 ivs[CAST6_PARALLEL_BLOCKS - 1];
unsigned int j;
for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
ivs[j] = src[j];
cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
}
asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
le128 *iv);
static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
u128_to_be128(&ctrblk, iv);
u128_inc(iv);
le128_to_be128(&ctrblk, iv);
le128_inc(iv);
__cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
{
be128 ctrblks[CAST6_PARALLEL_BLOCKS];
unsigned int i;
for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv);
u128_inc(iv);
}
cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
}
static const struct common_glue_ctx cast6_enc = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) }
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
......@@ -125,7 +80,7 @@ static const struct common_glue_ctx cast6_ctr = {
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) }
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
......@@ -138,7 +93,7 @@ static const struct common_glue_ctx cast6_dec = {
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) }
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
......@@ -151,7 +106,7 @@ static const struct common_glue_ctx cast6_dec_cbc = {
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) }
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
......@@ -215,7 +170,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst);
cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
return;
}
......@@ -232,7 +187,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst);
cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
return;
}
......
......@@ -32,6 +32,8 @@
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
......@@ -44,6 +46,31 @@
#define REX_PRE
#endif
#ifdef CONFIG_X86_64
/*
* use carryless multiply version of crc32c when buffer
* size is >= 512 (when eager fpu is enabled) or
* >= 1024 (when eager fpu is disabled) to account
* for fpu state save/restore overhead.
*/
#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
unsigned int crc_init);
static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
#if defined(X86_FEATURE_EAGER_FPU)
#define set_pcl_breakeven_point() \
do { \
if (!use_eager_fpu()) \
crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
} while (0)
#else
#define set_pcl_breakeven_point() \
(crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
#endif
#endif /* CONFIG_X86_64 */
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
{
while (length--) {
......@@ -154,6 +181,52 @@ static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
return 0;
}
#ifdef CONFIG_X86_64
static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
/*
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save/restore overhead
*/
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
kernel_fpu_begin();
*crcp = crc_pcl(data, len, *crcp);
kernel_fpu_end();
} else
*crcp = crc32c_intel_le_hw(*crcp, data, len);
return 0;
}
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
kernel_fpu_begin();
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
kernel_fpu_end();
} else
*(__le32 *)out =
~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
return 0;
}
static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
}
static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
#endif /* CONFIG_X86_64 */
static struct shash_alg alg = {
.setkey = crc32c_intel_setkey,
.init = crc32c_intel_init,
......@@ -184,6 +257,14 @@ static int __init crc32c_intel_mod_init(void)
{
if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
#ifdef CONFIG_X86_64
if (cpu_has_pclmulqdq) {
alg.update = crc32c_pcl_intel_update;
alg.finup = crc32c_pcl_intel_finup;
alg.digest = crc32c_pcl_intel_digest;
set_pcl_breakeven_point();
}
#endif
return crypto_register_shash(&alg);
}
......
/*
* Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
*
* The white paper on CRC32C calculations with PCLMULQDQ instruction can be
* downloaded from:
* http://download.intel.com/design/intarch/papers/323405.pdf
*
* Copyright (C) 2012 Intel Corporation.
*
* Authors:
* Wajdi Feghali <wajdi.k.feghali@intel.com>
* James Guilford <james.guilford@intel.com>
* David Cote <david.m.cote@intel.com>
* Tim Chen <tim.c.chen@linux.intel.com>
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
.macro LABEL prefix n
\prefix\n\():
.endm
.macro JMPTBL_ENTRY i
.word crc_\i - crc_array
.endm
.macro JNC_LESS_THAN j
jnc less_than_\j
.endm
# Define threshold where buffers are considered "small" and routed to more
# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
# SMALL_SIZE can be no larger than 255.
#define SMALL_SIZE 200
.if (SMALL_SIZE > 255)
.error "SMALL_ SIZE must be < 256"
.endif
# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
.global crc_pcl
crc_pcl:
#define bufp %rdi
#define bufp_dw %edi
#define bufp_w %di
#define bufp_b %dil
#define bufptmp %rcx
#define block_0 %rcx
#define block_1 %rdx
#define block_2 %r11
#define len %rsi
#define len_dw %esi
#define len_w %si
#define len_b %sil
#define crc_init_arg %rdx
#define tmp %rbx
#define crc_init %r8
#define crc_init_dw %r8d
#define crc1 %r9
#define crc2 %r10
pushq %rbx
pushq %rdi
pushq %rsi
## Move crc_init for Linux to a different
mov crc_init_arg, crc_init
################################################################
## 1) ALIGN:
################################################################
mov bufp, bufptmp # rdi = *buf
neg bufp
and $7, bufp # calculate the unalignment amount of
# the address
je proc_block # Skip if aligned
## If len is less than 8 and we're unaligned, we need to jump
## to special code to avoid reading beyond the end of the buffer
cmp $8, len
jae do_align
# less_than_8 expects length in upper 3 bits of len_dw
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
shl $32-3+1, len_dw
jmp less_than_8_post_shl1
do_align:
#### Calculate CRC of unaligned bytes of the buffer (if any)
movq (bufptmp), tmp # load a quadward from the buffer
add bufp, bufptmp # align buffer pointer for quadword
# processing
sub bufp, len # update buffer length
align_loop:
crc32b %bl, crc_init_dw # compute crc32 of 1-byte
shr $8, tmp # get next byte
dec bufp
jne align_loop
proc_block:
################################################################
## 2) PROCESS BLOCKS:
################################################################
## compute num of bytes to be processed
movq len, tmp # save num bytes in tmp
cmpq $128*24, len
jae full_block
continue_block:
cmpq $SMALL_SIZE, len
jb small
## len < 128*24
movq $2731, %rax # 2731 = ceil(2^16 / 24)
mul len_dw
shrq $16, %rax
## eax contains floor(bytes / 24) = num 24-byte chunks to do
## process rax 24-byte chunks (128 >= rax >= 0)
## compute end address of each block
## block 0 (base addr + RAX * 8)
## block 1 (base addr + RAX * 16)
## block 2 (base addr + RAX * 24)
lea (bufptmp, %rax, 8), block_0
lea (block_0, %rax, 8), block_1
lea (block_1, %rax, 8), block_2
xor crc1, crc1
xor crc2, crc2
## branch into array
lea jump_table(%rip), bufp
movzxw (bufp, %rax, 2), len
offset=crc_array-jump_table
lea offset(bufp, len, 1), bufp
jmp *bufp
################################################################
## 2a) PROCESS FULL BLOCKS:
################################################################
full_block:
movq $128,%rax
lea 128*8*2(block_0), block_1
lea 128*8*3(block_0), block_2
add $128*8*1, block_0
xor crc1,crc1
xor crc2,crc2
# Fall thruogh into top of crc array (crc_128)
################################################################
## 3) CRC Array:
################################################################
crc_array:
i=128
.rept 128-1
.altmacro
LABEL crc_ %i
.noaltmacro
crc32q -i*8(block_0), crc_init
crc32q -i*8(block_1), crc1
crc32q -i*8(block_2), crc2
i=(i-1)
.endr
.altmacro
LABEL crc_ %i
.noaltmacro
crc32q -i*8(block_0), crc_init
crc32q -i*8(block_1), crc1
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
mov block_2, block_0
################################################################
## 4) Combine three results:
################################################################
lea (K_table-16)(%rip), bufp # first entry is for idx 1
shlq $3, %rax # rax *= 8
subq %rax, tmp # tmp -= rax*8
shlq $1, %rax
subq %rax, tmp # tmp -= rax*16
# (total tmp -= rax*24)
addq %rax, bufp
movdqa (bufp), %xmm0 # 2 consts: K1:K2
movq crc_init, %xmm1 # CRC for block 1
pclmulqdq $0x00,%xmm0,%xmm1 # Multiply by K2
movq crc1, %xmm2 # CRC for block 2
pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1
pxor %xmm2,%xmm1
movq %xmm1, %rax
xor -i*8(block_2), %rax
mov crc2, crc_init
crc32 %rax, crc_init
################################################################
## 5) Check for end:
################################################################
LABEL crc_ 0
mov tmp, len
cmp $128*24, tmp
jae full_block
cmp $24, tmp
jae continue_block
less_than_24:
shl $32-4, len_dw # less_than_16 expects length
# in upper 4 bits of len_dw
jnc less_than_16
crc32q (bufptmp), crc_init
crc32q 8(bufptmp), crc_init
jz do_return
add $16, bufptmp
# len is less than 8 if we got here
# less_than_8 expects length in upper 3 bits of len_dw
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
shl $2, len_dw
jmp less_than_8_post_shl1
#######################################################################
## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
#######################################################################
small:
shl $32-8, len_dw # Prepare len_dw for less_than_256
j=256
.rept 5 # j = {256, 128, 64, 32, 16}
.altmacro
LABEL less_than_ %j # less_than_j: Length should be in
# upper lg(j) bits of len_dw
j=(j/2)
shl $1, len_dw # Get next MSB
JNC_LESS_THAN %j
.noaltmacro
i=0
.rept (j/8)
crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data
i=i+8
.endr
jz do_return # Return if remaining length is zero
add $j, bufptmp # Advance buf
.endr
less_than_8: # Length should be stored in
# upper 3 bits of len_dw
shl $1, len_dw
less_than_8_post_shl1:
jnc less_than_4
crc32l (bufptmp), crc_init_dw # CRC of 4 bytes
jz do_return # return if remaining data is zero
add $4, bufptmp
less_than_4: # Length should be stored in
# upper 2 bits of len_dw
shl $1, len_dw
jnc less_than_2
crc32w (bufptmp), crc_init_dw # CRC of 2 bytes
jz do_return # return if remaining data is zero
add $2, bufptmp
less_than_2: # Length should be stored in the MSB
# of len_dw
shl $1, len_dw
jnc less_than_1
crc32b (bufptmp), crc_init_dw # CRC of 1 byte
less_than_1: # Length should be zero
do_return:
movq crc_init, %rax
popq %rsi
popq %rdi
popq %rbx
ret
################################################################
## jump table Table is 129 entries x 2 bytes each
################################################################
.align 4
jump_table:
i=0
.rept 129
.altmacro
JMPTBL_ENTRY %i
.noaltmacro
i=i+1
.endr
################################################################
## PCLMULQDQ tables
## Table is 128 entries x 2 quad words each
################################################################
.data
.align 64
K_table:
.quad 0x14cd00bd6,0x105ec76f0
.quad 0x0ba4fc28e,0x14cd00bd6
.quad 0x1d82c63da,0x0f20c0dfe
.quad 0x09e4addf8,0x0ba4fc28e
.quad 0x039d3b296,0x1384aa63a
.quad 0x102f9b8a2,0x1d82c63da
.quad 0x14237f5e6,0x01c291d04
.quad 0x00d3b6092,0x09e4addf8
.quad 0x0c96cfdc0,0x0740eef02
.quad 0x18266e456,0x039d3b296
.quad 0x0daece73e,0x0083a6eec
.quad 0x0ab7aff2a,0x102f9b8a2
.quad 0x1248ea574,0x1c1733996
.quad 0x083348832,0x14237f5e6
.quad 0x12c743124,0x02ad91c30
.quad 0x0b9e02b86,0x00d3b6092
.quad 0x018b33a4e,0x06992cea2
.quad 0x1b331e26a,0x0c96cfdc0
.quad 0x17d35ba46,0x07e908048
.quad 0x1bf2e8b8a,0x18266e456
.quad 0x1a3e0968a,0x11ed1f9d8
.quad 0x0ce7f39f4,0x0daece73e
.quad 0x061d82e56,0x0f1d0f55e
.quad 0x0d270f1a2,0x0ab7aff2a
.quad 0x1c3f5f66c,0x0a87ab8a8
.quad 0x12ed0daac,0x1248ea574
.quad 0x065863b64,0x08462d800
.quad 0x11eef4f8e,0x083348832
.quad 0x1ee54f54c,0x071d111a8
.quad 0x0b3e32c28,0x12c743124
.quad 0x0064f7f26,0x0ffd852c6
.quad 0x0dd7e3b0c,0x0b9e02b86
.quad 0x0f285651c,0x0dcb17aa4
.quad 0x010746f3c,0x018b33a4e
.quad 0x1c24afea4,0x0f37c5aee
.quad 0x0271d9844,0x1b331e26a
.quad 0x08e766a0c,0x06051d5a2
.quad 0x093a5f730,0x17d35ba46
.quad 0x06cb08e5c,0x11d5ca20e
.quad 0x06b749fb2,0x1bf2e8b8a
.quad 0x1167f94f2,0x021f3d99c
.quad 0x0cec3662e,0x1a3e0968a
.quad 0x19329634a,0x08f158014
.quad 0x0e6fc4e6a,0x0ce7f39f4
.quad 0x08227bb8a,0x1a5e82106
.quad 0x0b0cd4768,0x061d82e56
.quad 0x13c2b89c4,0x188815ab2
.quad 0x0d7a4825c,0x0d270f1a2
.quad 0x10f5ff2ba,0x105405f3e
.quad 0x00167d312,0x1c3f5f66c
.quad 0x0f6076544,0x0e9adf796
.quad 0x026f6a60a,0x12ed0daac
.quad 0x1a2adb74e,0x096638b34
.quad 0x19d34af3a,0x065863b64
.quad 0x049c3cc9c,0x1e50585a0
.quad 0x068bce87a,0x11eef4f8e
.quad 0x1524fa6c6,0x19f1c69dc
.quad 0x16cba8aca,0x1ee54f54c
.quad 0x042d98888,0x12913343e
.quad 0x1329d9f7e,0x0b3e32c28
.quad 0x1b1c69528,0x088f25a3a
.quad 0x02178513a,0x0064f7f26
.quad 0x0e0ac139e,0x04e36f0b0
.quad 0x0170076fa,0x0dd7e3b0c
.quad 0x141a1a2e2,0x0bd6f81f8
.quad 0x16ad828b4,0x0f285651c
.quad 0x041d17b64,0x19425cbba
.quad 0x1fae1cc66,0x010746f3c
.quad 0x1a75b4b00,0x18db37e8a
.quad 0x0f872e54c,0x1c24afea4
.quad 0x01e41e9fc,0x04c144932
.quad 0x086d8e4d2,0x0271d9844
.quad 0x160f7af7a,0x052148f02
.quad 0x05bb8f1bc,0x08e766a0c
.quad 0x0a90fd27a,0x0a3c6f37a
.quad 0x0b3af077a,0x093a5f730
.quad 0x04984d782,0x1d22c238e
.quad 0x0ca6ef3ac,0x06cb08e5c
.quad 0x0234e0b26,0x063ded06a
.quad 0x1d88abd4a,0x06b749fb2
.quad 0x04597456a,0x04d56973c
.quad 0x0e9e28eb4,0x1167f94f2
.quad 0x07b3ff57a,0x19385bf2e
.quad 0x0c9c8b782,0x0cec3662e
.quad 0x13a9cba9e,0x0e417f38a
.quad 0x093e106a4,0x19329634a
.quad 0x167001a9c,0x14e727980
.quad 0x1ddffc5d4,0x0e6fc4e6a
.quad 0x00df04680,0x0d104b8fc
.quad 0x02342001e,0x08227bb8a
.quad 0x00a2a8d7e,0x05b397730
.quad 0x168763fa6,0x0b0cd4768
.quad 0x1ed5a407a,0x0e78eb416
.quad 0x0d2c3ed1a,0x13c2b89c4
.quad 0x0995a5724,0x1641378f0
.quad 0x19b1afbc4,0x0d7a4825c
.quad 0x109ffedc0,0x08d96551c
.quad 0x0f2271e60,0x10f5ff2ba
.quad 0x00b0bf8ca,0x00bf80dd2
.quad 0x123888b7a,0x00167d312
.quad 0x1e888f7dc,0x18dcddd1c
.quad 0x002ee03b2,0x0f6076544
.quad 0x183e8d8fe,0x06a45d2b2
.quad 0x133d7a042,0x026f6a60a
.quad 0x116b0f50c,0x1dd3e10e8
.quad 0x05fabe670,0x1a2adb74e
.quad 0x130004488,0x0de87806c
.quad 0x000bcf5f6,0x19d34af3a
.quad 0x18f0c7078,0x014338754
.quad 0x017f27698,0x049c3cc9c
.quad 0x058ca5f00,0x15e3e77ee
.quad 0x1af900c24,0x068bce87a
.quad 0x0b5cfca28,0x0dd07448e
.quad 0x0ded288f8,0x1524fa6c6
.quad 0x059f229bc,0x1d8048348
.quad 0x06d390dec,0x16cba8aca
.quad 0x037170390,0x0a3e3e02c
.quad 0x06353c1cc,0x042d98888
.quad 0x0c4584f5c,0x0d73c7bea
.quad 0x1f16a3418,0x1329d9f7e
.quad 0x0531377e2,0x185137662
.quad 0x1d8d9ca7c,0x1b1c69528
.quad 0x0b25b29f2,0x18a08b5bc
.quad 0x19fb2a8b0,0x02178513a
.quad 0x1a08fe6ac,0x1da758ae0
.quad 0x045cddf4e,0x0e0ac139e
.quad 0x1a91647f2,0x169cf9eb0
.quad 0x1a0f717c4,0x0170076fa
/*
* Shared glue code for 128bit block ciphers, AVX assembler macros
*
* Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
vmovdqu (0*16)(src), x0; \
vmovdqu (1*16)(src), x1; \
vmovdqu (2*16)(src), x2; \
vmovdqu (3*16)(src), x3; \
vmovdqu (4*16)(src), x4; \
vmovdqu (5*16)(src), x5; \
vmovdqu (6*16)(src), x6; \
vmovdqu (7*16)(src), x7;
#define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vmovdqu x0, (0*16)(dst); \
vmovdqu x1, (1*16)(dst); \
vmovdqu x2, (2*16)(dst); \
vmovdqu x3, (3*16)(dst); \
vmovdqu x4, (4*16)(dst); \
vmovdqu x5, (5*16)(dst); \
vmovdqu x6, (6*16)(dst); \
vmovdqu x7, (7*16)(dst);
#define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*16)(src), x1, x1; \
vpxor (1*16)(src), x2, x2; \
vpxor (2*16)(src), x3, x3; \
vpxor (3*16)(src), x4, x4; \
vpxor (4*16)(src), x5, x5; \
vpxor (5*16)(src), x6, x6; \
vpxor (6*16)(src), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
vpsubq minus_one, x, x; \
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \
vpcmpeqd t0, t0, t0; \
vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \
vmovdqa bswap, t1; \
\
/* load IV and byteswap */ \
vmovdqu (iv), x7; \
vpshufb t1, x7, x0; \
\
/* construct IVs */ \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x1; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x2; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x3; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x4; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x5; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x6; \
inc_le128(x7, t0, t2); \
vmovdqa x7, t2; \
vpshufb t1, x7, x7; \
inc_le128(t2, t0, t1); \
vmovdqu t2, (iv);
#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*16)(src), x0, x0; \
vpxor (1*16)(src), x1, x1; \
vpxor (2*16)(src), x2, x2; \
vpxor (3*16)(src), x3, x3; \
vpxor (4*16)(src), x4, x4; \
vpxor (5*16)(src), x5, x5; \
vpxor (6*16)(src), x6, x6; \
vpxor (7*16)(src), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
......@@ -221,16 +221,16 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
u8 *src = (u8 *)walk->src.virt.addr;
u8 *dst = (u8 *)walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
u128 ctrblk;
le128 ctrblk;
u128 tmp;
be128_to_u128(&ctrblk, (be128 *)walk->iv);
be128_to_le128(&ctrblk, (be128 *)walk->iv);
memcpy(&tmp, src, nbytes);
fn_ctr(ctx, &tmp, &tmp, &ctrblk);
memcpy(dst, &tmp, nbytes);
u128_to_be128((be128 *)walk->iv, &ctrblk);
le128_to_be128((be128 *)walk->iv, &ctrblk);
}
EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
......@@ -243,11 +243,11 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ctrblk;
le128 ctrblk;
unsigned int num_blocks, func_bytes;
unsigned int i;
be128_to_u128(&ctrblk, (be128 *)walk->iv);
be128_to_le128(&ctrblk, (be128 *)walk->iv);
/* Process multi-block batch */
for (i = 0; i < gctx->num_funcs; i++) {
......@@ -269,7 +269,7 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
}
done:
u128_to_be128((be128 *)walk->iv, &ctrblk);
le128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes;
}
......
......@@ -24,7 +24,16 @@
*
*/
#include "glue_helper-asm-avx.S"
.file "serpent-avx-x86_64-asm_64.S"
.data
.align 16
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.text
#define CTX %rdi
......@@ -550,51 +559,27 @@
vpunpcklqdq x3, t2, x2; \
vpunpckhqdq x3, t2, x3;
#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
vmovdqu (0*4*4)(in), x0; \
vmovdqu (1*4*4)(in), x1; \
vmovdqu (2*4*4)(in), x2; \
vmovdqu (3*4*4)(in), x3; \
\
#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vmovdqu x0, (0*4*4)(out); \
vmovdqu x1, (1*4*4)(out); \
vmovdqu x2, (2*4*4)(out); \
vmovdqu x3, (3*4*4)(out);
#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vpxor (0*4*4)(out), x0, x0; \
vmovdqu x0, (0*4*4)(out); \
vpxor (1*4*4)(out), x1, x1; \
vmovdqu x1, (1*4*4)(out); \
vpxor (2*4*4)(out), x2, x2; \
vmovdqu x2, (2*4*4)(out); \
vpxor (3*4*4)(out), x3, x3; \
vmovdqu x3, (3*4*4)(out);
#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
.global __serpent_enc_blk_8way_avx
.type __serpent_enc_blk_8way_avx,@function;
.type __serpent_enc_blk8_avx,@function;
__serpent_enc_blk_8way_avx:
__serpent_enc_blk8_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/
vpcmpeqd RNOT, RNOT, RNOT;
leaq (4*4*4)(%rdx), %rax;
read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
K2(RA, RB, RC, RD, RE, 0);
S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
......@@ -630,38 +615,26 @@ __serpent_enc_blk_8way_avx:
S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
leaq (4*4*4)(%rsi), %rax;
testb %cl, %cl;
jnz __enc_xor8;
write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
__enc_xor8:
xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
.align 8
.global serpent_dec_blk_8way_avx
.type serpent_dec_blk_8way_avx,@function;
.type __serpent_dec_blk8_avx,@function;
serpent_dec_blk_8way_avx:
__serpent_dec_blk8_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
* output:
* RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: decrypted blocks
*/
vpcmpeqd RNOT, RNOT, RNOT;
leaq (4*4*4)(%rdx), %rax;
read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
K2(RA, RB, RC, RD, RE, 32);
SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
......@@ -697,8 +670,85 @@ serpent_dec_blk_8way_avx:
SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
leaq (4*4*4)(%rsi), %rax;
write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
ret;
.align 8
.global serpent_ecb_enc_8way_avx
.type serpent_ecb_enc_8way_avx,@function;
serpent_ecb_enc_8way_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __serpent_enc_blk8_avx;
store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
.align 8
.global serpent_ecb_dec_8way_avx
.type serpent_ecb_dec_8way_avx,@function;
serpent_ecb_dec_8way_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __serpent_dec_blk8_avx;
store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
ret;
.align 8
.global serpent_cbc_dec_8way_avx
.type serpent_cbc_dec_8way_avx,@function;
serpent_cbc_dec_8way_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __serpent_dec_blk8_avx;
store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
ret;
.align 8
.global serpent_ctr_8way_avx
.type serpent_ctr_8way_avx,@function;
serpent_ctr_8way_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (little endian, 128bit)
*/
load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
RD2, RK0, RK1, RK2);
call __serpent_enc_blk8_avx;
store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
......@@ -42,55 +42,24 @@
#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
{
u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
unsigned int j;
for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
ivs[j] = src[j];
serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
}
static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
u128_to_be128(&ctrblk, iv);
u128_inc(iv);
le128_to_be128(&ctrblk, iv);
le128_inc(iv);
__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
{
be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
unsigned int i;
for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv);
u128_inc(iv);
}
serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
}
static const struct common_glue_ctx serpent_enc = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
......@@ -103,7 +72,7 @@ static const struct common_glue_ctx serpent_ctr = {
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
......@@ -116,7 +85,7 @@ static const struct common_glue_ctx serpent_dec = {
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
......@@ -129,7 +98,7 @@ static const struct common_glue_ctx serpent_dec_cbc = {
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
......@@ -193,7 +162,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
return;
}
......@@ -210,7 +179,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
return;
}
......
......@@ -59,19 +59,19 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
}
static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
u128_to_be128(&ctrblk, iv);
u128_inc(iv);
le128_to_be128(&ctrblk, iv);
le128_inc(iv);
__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
le128 *iv)
{
be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
unsigned int i;
......@@ -80,8 +80,8 @@ static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
if (dst != src)
dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv);
u128_inc(iv);
le128_to_be128(&ctrblks[i], iv);
le128_inc(iv);
}
serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
......
......@@ -23,7 +23,16 @@
*
*/
#include "glue_helper-asm-avx.S"
.file "twofish-avx-x86_64-asm_64.S"
.data
.align 16
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.text
/* structure of crypto context */
......@@ -217,69 +226,45 @@
vpunpcklqdq x3, t2, x2; \
vpunpckhqdq x3, t2, x3;
#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \
vpxor (0*4*4)(in), wkey, x0; \
vpxor (1*4*4)(in), wkey, x1; \
vpxor (2*4*4)(in), wkey, x2; \
vpxor (3*4*4)(in), wkey, x3; \
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
#define inpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
vpxor x0, wkey, x0; \
vmovdqu x0, (0*4*4)(out); \
vpxor x1, wkey, x1; \
vmovdqu x1, (1*4*4)(out); \
vpxor x2, wkey, x2; \
vmovdqu x2, (2*4*4)(out); \
vpxor x3, wkey, x3; \
vmovdqu x3, (3*4*4)(out);
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
#define outunpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vpxor x0, wkey, x0; \
vpxor (0*4*4)(out), x0, x0; \
vmovdqu x0, (0*4*4)(out); \
vpxor x1, wkey, x1; \
vpxor (1*4*4)(out), x1, x1; \
vmovdqu x1, (1*4*4)(out); \
vpxor x2, wkey, x2; \
vpxor (2*4*4)(out), x2, x2; \
vmovdqu x2, (2*4*4)(out); \
vpxor x3, wkey, x3; \
vpxor (3*4*4)(out), x3, x3; \
vmovdqu x3, (3*4*4)(out);
vpxor x3, wkey, x3;
.align 8
.global __twofish_enc_blk_8way
.type __twofish_enc_blk_8way,@function;
.type __twofish_enc_blk8,@function;
__twofish_enc_blk_8way:
__twofish_enc_blk8:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
* output:
* RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
*/
vmovdqu w(CTX), RK1;
pushq %rbp;
pushq %rbx;
pushq %rcx;
vmovdqu w(CTX), RK1;
leaq (4*4*4)(%rdx), %rax;
inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
inpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
preload_rgi(RA1);
rotate_1l(RD1);
inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
inpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
rotate_1l(RD2);
movq %rsi, %r11;
encrypt_cycle(0);
encrypt_cycle(1);
encrypt_cycle(2);
......@@ -295,47 +280,33 @@ __twofish_enc_blk_8way:
popq %rbx;
popq %rbp;
leaq (4*4*4)(%r11), %rax;
testb %cl, %cl;
jnz __enc_xor8;
outunpack_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
ret;
__enc_xor8:
outunpack_xor_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
ret;
.align 8
.global twofish_dec_blk_8way
.type twofish_dec_blk_8way,@function;
.type __twofish_dec_blk8,@function;
twofish_dec_blk_8way:
__twofish_dec_blk8:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/
vmovdqu (w+4*4)(CTX), RK1;
pushq %rbp;
pushq %rbx;
vmovdqu (w+4*4)(CTX), RK1;
leaq (4*4*4)(%rdx), %rax;
inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
preload_rgi(RC1);
rotate_1l(RA1);
inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
inpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
rotate_1l(RA2);
movq %rsi, %r11;
decrypt_cycle(7);
decrypt_cycle(6);
decrypt_cycle(5);
......@@ -350,8 +321,103 @@ twofish_dec_blk_8way:
popq %rbx;
popq %rbp;
leaq (4*4*4)(%r11), %rax;
outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
ret;
.align 8
.global twofish_ecb_enc_8way
.type twofish_ecb_enc_8way,@function;
twofish_ecb_enc_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __twofish_enc_blk8;
store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
ret;
.align 8
.global twofish_ecb_dec_8way
.type twofish_ecb_dec_8way,@function;
twofish_ecb_dec_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
call __twofish_dec_blk8;
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
.align 8
.global twofish_cbc_dec_8way
.type twofish_cbc_dec_8way,@function;
twofish_cbc_dec_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
call __twofish_dec_blk8;
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r12;
ret;
.align 8
.global twofish_ctr_8way
.type twofish_ctr_8way,@function;
twofish_ctr_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (little endian, 128bit)
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
RD2, RX0, RX1, RY0);
call __twofish_enc_blk8;
store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
popq %r12;
ret;
......@@ -45,66 +45,23 @@
#define TWOFISH_PARALLEL_BLOCKS 8
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_3way(ctx, dst, src, false);
}
/* 8-way parallel cipher functions */
asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst,
asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_8way(ctx, dst, src, false);
}
static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_8way(ctx, dst, src, true);
}
asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst,
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
twofish_dec_blk_8way(ctx, dst, src);
}
static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src)
{
u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1];
unsigned int j;
for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
ivs[j] = src[j];
twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
__twofish_enc_blk_3way(ctx, dst, src, false);
}
static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
{
be128 ctrblks[TWOFISH_PARALLEL_BLOCKS];
unsigned int i;
for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv);
u128_inc(iv);
}
twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
}
static const struct common_glue_ctx twofish_enc = {
.num_funcs = 3,
......@@ -112,7 +69,7 @@ static const struct common_glue_ctx twofish_enc = {
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) }
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
......@@ -128,7 +85,7 @@ static const struct common_glue_ctx twofish_ctr = {
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) }
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
......@@ -144,7 +101,7 @@ static const struct common_glue_ctx twofish_dec = {
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) }
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
......@@ -160,7 +117,7 @@ static const struct common_glue_ctx twofish_dec_cbc = {
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) }
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
}, {
.num_blocks = 3,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
......@@ -227,7 +184,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst);
twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
return;
}
......@@ -249,7 +206,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst);
twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
return;
}
......
......@@ -62,15 +62,15 @@ void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
}
EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
if (dst != src)
*dst = *src;
u128_to_be128(&ctrblk, iv);
u128_inc(iv);
le128_to_be128(&ctrblk, iv);
le128_inc(iv);
twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, dst, (u128 *)&ctrblk);
......@@ -78,7 +78,7 @@ void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
u128 *iv)
le128 *iv)
{
be128 ctrblks[3];
......@@ -88,12 +88,12 @@ void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
dst[2] = src[2];
}
u128_to_be128(&ctrblks[0], iv);
u128_inc(iv);
u128_to_be128(&ctrblks[1], iv);
u128_inc(iv);
u128_to_be128(&ctrblks[2], iv);
u128_inc(iv);
le128_to_be128(&ctrblks[0], iv);
le128_inc(iv);
le128_to_be128(&ctrblks[1], iv);
le128_inc(iv);
le128_to_be128(&ctrblks[2], iv);
le128_inc(iv);
twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
......
#ifndef ASM_X86_CAMELLIA_H
#define ASM_X86_CAMELLIA_H
#include <linux/kernel.h>
#include <linux/crypto.h>
#define CAMELLIA_MIN_KEY_SIZE 16
#define CAMELLIA_MAX_KEY_SIZE 32
#define CAMELLIA_BLOCK_SIZE 16
#define CAMELLIA_TABLE_BYTE_LEN 272
#define CAMELLIA_PARALLEL_BLOCKS 2
struct camellia_ctx {
u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
u32 key_length;
};
struct camellia_lrw_ctx {
struct lrw_table_ctx lrw_table;
struct camellia_ctx camellia_ctx;
};
struct camellia_xts_ctx {
struct camellia_ctx tweak_ctx;
struct camellia_ctx crypt_ctx;
};
extern int __camellia_setkey(struct camellia_ctx *cctx,
const unsigned char *key,
unsigned int key_len, u32 *flags);
extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen);
extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm);
extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen);
/* regular block cipher functions */
asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
/* 2-way parallel cipher functions */
asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk(ctx, dst, src, false);
}
static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk(ctx, dst, src, true);
}
static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk_2way(ctx, dst, src, false);
}
static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk_2way(ctx, dst, src, true);
}
/* glue helpers */
extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
le128 *iv);
extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
le128 *iv);
#endif /* ASM_X86_CAMELLIA_H */
......@@ -13,7 +13,7 @@
typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
u128 *iv);
le128 *iv);
#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
......@@ -71,23 +71,29 @@ static inline void glue_fpu_end(bool fpu_enabled)
kernel_fpu_end();
}
static inline void u128_to_be128(be128 *dst, const u128 *src)
static inline void le128_to_be128(be128 *dst, const le128 *src)
{
dst->a = cpu_to_be64(src->a);
dst->b = cpu_to_be64(src->b);
dst->a = cpu_to_be64(le64_to_cpu(src->a));
dst->b = cpu_to_be64(le64_to_cpu(src->b));
}
static inline void be128_to_u128(u128 *dst, const be128 *src)
static inline void be128_to_le128(le128 *dst, const be128 *src)
{
dst->a = be64_to_cpu(src->a);
dst->b = be64_to_cpu(src->b);
dst->a = cpu_to_le64(be64_to_cpu(src->a));
dst->b = cpu_to_le64(be64_to_cpu(src->b));
}
static inline void u128_inc(u128 *i)
static inline void le128_inc(le128 *i)
{
i->b++;
if (!i->b)
i->a++;
u64 a = le64_to_cpu(i->a);
u64 b = le64_to_cpu(i->b);
b++;
if (!b)
a++;
i->a = cpu_to_le64(a);
i->b = cpu_to_le64(b);
}
extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
......
......@@ -6,27 +6,14 @@
#define SERPENT_PARALLEL_BLOCKS 8
asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
const u8 *src)
{
__serpent_enc_blk_8way_avx(ctx, dst, src, false);
}
static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
const u8 *src)
{
__serpent_enc_blk_8way_avx(ctx, dst, src, true);
}
static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
const u8 *src)
{
serpent_dec_blk_8way_avx(ctx, dst, src);
}
asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
#endif
......@@ -31,9 +31,9 @@ asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
/* helpers from twofish_x86_64-3way module */
extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
u128 *iv);
le128 *iv);
extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
u128 *iv);
le128 *iv);
extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen);
......
......@@ -324,9 +324,19 @@ config CRYPTO_CRC32C
by iSCSI for header and data digests and by others.
See Castagnoli93. Module will be crc32c.
config CRYPTO_CRC32C_X86_64
bool
depends on X86 && 64BIT
select CRYPTO_HASH
help
In Intel processor with SSE4.2 supported, the processor will
support CRC32C calculation using hardware accelerated CRC32
instruction optimized with PCLMULQDQ instruction when available.
config CRYPTO_CRC32C_INTEL
tristate "CRC32c INTEL hardware acceleration"
depends on X86
select CRYPTO_CRC32C_X86_64 if 64BIT
select CRYPTO_HASH
help
In Intel processor with SSE4.2 supported, the processor will
......@@ -793,6 +803,28 @@ config CRYPTO_CAMELLIA_X86_64
See also:
<https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html>
config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX)"
depends on X86 && 64BIT
depends on CRYPTO
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_CAMELLIA_X86_64
select CRYPTO_LRW
select CRYPTO_XTS
help
Camellia cipher algorithm module (x86_64/AES-NI/AVX).
Camellia is a symmetric key block cipher developed jointly
at NTT and Mitsubishi Electric Corporation.
The Camellia specifies three key sizes: 128, 192 and 256 bits.
See also:
<https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html>
config CRYPTO_CAMELLIA_SPARC64
tristate "Camellia cipher algorithm (SPARC64)"
depends on SPARC64
......@@ -809,9 +841,16 @@ config CRYPTO_CAMELLIA_SPARC64
See also:
<https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html>
config CRYPTO_CAST_COMMON
tristate
help
Common parts of the CAST cipher algorithms shared by the
generic c and the assembler implementations.
config CRYPTO_CAST5
tristate "CAST5 (CAST-128) cipher algorithm"
select CRYPTO_ALGAPI
select CRYPTO_CAST_COMMON
help
The CAST5 encryption algorithm (synonymous with CAST-128) is
described in RFC2144.
......@@ -822,6 +861,7 @@ config CRYPTO_CAST5_AVX_X86_64
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_CAST_COMMON
select CRYPTO_CAST5
help
The CAST5 encryption algorithm (synonymous with CAST-128) is
......@@ -833,6 +873,7 @@ config CRYPTO_CAST5_AVX_X86_64
config CRYPTO_CAST6
tristate "CAST6 (CAST-256) cipher algorithm"
select CRYPTO_ALGAPI
select CRYPTO_CAST_COMMON
help
The CAST6 encryption algorithm (synonymous with CAST-256) is
described in RFC2612.
......@@ -844,6 +885,7 @@ config CRYPTO_CAST6_AVX_X86_64
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_CAST_COMMON
select CRYPTO_CAST6
select CRYPTO_LRW
select CRYPTO_XTS
......
......@@ -68,6 +68,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
obj-$(CONFIG_CRYPTO_CAST5) += cast5_generic.o
obj-$(CONFIG_CRYPTO_CAST6) += cast6_generic.o
obj-$(CONFIG_CRYPTO_ARC4) += arc4.o
......
......@@ -30,275 +30,6 @@
#include <linux/types.h>
#include <crypto/cast5.h>
const u32 cast5_s1[256] = {
0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f,
0x9c004dd3, 0x6003e540, 0xcf9fc949,
0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0,
0x15c361d2, 0xc2e7661d, 0x22d4ff8e,
0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3,
0xdf2f8656, 0x887ca41a, 0xa2d2bd2d,
0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1,
0xaa54166b, 0x22568e3a, 0xa2d341d0,
0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac,
0x4a97c1d8, 0x527644b7, 0xb5f437a7,
0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0,
0x90ecf52e, 0x22b0c054, 0xbc8e5935,
0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290,
0xe93b159f, 0xb48ee411, 0x4bff345d,
0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad,
0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50,
0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f,
0xc59c5319, 0xb949e354, 0xb04669fe,
0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5,
0x6a390493, 0xe63d37e0, 0x2a54f6b3,
0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5,
0xf61b1891, 0xbb72275e, 0xaa508167,
0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427,
0xa2d1936b, 0x2ad286af, 0xaa56d291,
0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d,
0x73e2bb14, 0xa0bebc3c, 0x54623779,
0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e,
0x89fe78e6, 0x3fab0950, 0x325ff6c2,
0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf,
0x380782d5, 0xc7fa5cf6, 0x8ac31511,
0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241,
0x051ef495, 0xaa573b04, 0x4a805d8d,
0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b,
0x50afd341, 0xa7c13275, 0x915a0bf5,
0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265,
0xab85c5f3, 0x1b55db94, 0xaad4e324,
0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3,
0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c,
0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6,
0x22513f1e, 0xaa51a79b, 0x2ad344cc,
0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6,
0x032268d4, 0xc9600acc, 0xce387e6d,
0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da,
0x4736f464, 0x5ad328d8, 0xb347cc96,
0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc,
0xbfc5fe4a, 0xa70aec10, 0xac39570a,
0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f,
0x1cacd68d, 0x2ad37c96, 0x0175cb9d,
0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4,
0xb11c3274, 0xdd24cb9e, 0x7e1c54bd,
0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af,
0x51c85f4d, 0x56907596, 0xa5bb15e6,
0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a,
0x3526ffa0, 0xc37b4d09, 0xbc306ed9,
0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf,
0x700b45e1, 0xd5ea50f1, 0x85a92872,
0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198,
0x0cd0ede7, 0x26470db8, 0xf881814c,
0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db,
0xab838653, 0x6e2f1e23, 0x83719c9e,
0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c,
0xe1e696ff, 0xb141ab08, 0x7cca89b9,
0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c,
0x5ac9f049, 0xdd8f0f00, 0x5c8165bf
};
EXPORT_SYMBOL_GPL(cast5_s1);
const u32 cast5_s2[256] = {
0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a,
0xeec5207a, 0x55889c94, 0x72fc0651,
0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef,
0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3,
0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086,
0xef944459, 0xba83ccb3, 0xe0c3cdfb,
0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb,
0xe4e7ef5b, 0x25a1ff41, 0xe180f806,
0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f,
0x77e83f4e, 0x79929269, 0x24fa9f7b,
0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154,
0x0d554b63, 0x5d681121, 0xc866c359,
0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181,
0x39f7627f, 0x361e3084, 0xe4eb573b,
0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c,
0x99847ab4, 0xa0e3df79, 0xba6cf38c,
0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a,
0x8f458c74, 0xd9e0a227, 0x4ec73a34,
0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c,
0x1d804366, 0x721d9bfd, 0xa58684bb,
0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1,
0x27e19ba5, 0xd5a6c252, 0xe49754bd,
0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9,
0xe0b56714, 0x21f043b7, 0xe5d05860,
0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf,
0x68561be6, 0x83ca6b94, 0x2d6ed23b,
0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c,
0x397bc8d6, 0x5ee22b95, 0x5f0e5304,
0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122,
0xb96726d1, 0x8049a7e8, 0x22b7da7b,
0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402,
0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf,
0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53,
0xe3214517, 0xb4542835, 0x9f63293c,
0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6,
0x30a22c95, 0x31a70850, 0x60930f13,
0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6,
0xa02b1741, 0x7cbad9a2, 0x2180036f,
0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676,
0x25a75e7b, 0xe4e6d1fc, 0x20c710e6,
0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb,
0x846a3bae, 0x8ff77888, 0xee5d60f6,
0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54,
0x157fd7fa, 0xef8579cc, 0xd152de58,
0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5,
0xc242fa0f, 0xa7e3ebb0, 0xc68e4906,
0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8,
0xbec0c560, 0x61a3c9e8, 0xbca8f54d,
0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc,
0x301e16e6, 0x273be979, 0xb0ffeaa6,
0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a,
0xf7e19798, 0x7619b72f, 0x8f1c9ba4,
0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e,
0x1a513742, 0xef6828bc, 0x520365d6,
0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb,
0x5eea29cb, 0x145892f5, 0x91584f7f,
0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4,
0x0d23e0f9, 0x6c387e8a, 0x0ae6d249,
0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3,
0x230eabb0, 0x6438bc87, 0xf0b5b1fa,
0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589,
0xa345415e, 0x5c038323, 0x3e5d3bb9,
0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539,
0x73bfbe70, 0x83877605, 0x4523ecf1
};
EXPORT_SYMBOL_GPL(cast5_s2);
const u32 cast5_s3[256] = {
0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff,
0x369fe44b, 0x8c1fc644, 0xaececa90,
0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806,
0xf0ad0548, 0xe13c8d83, 0x927010d5,
0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820,
0xfade82e0, 0xa067268b, 0x8272792e,
0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee,
0x825b1bfd, 0x9255c5ed, 0x1257a240,
0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf,
0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5,
0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1,
0x1fb78dfc, 0x8e6bd2c1, 0x437be59b,
0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c,
0x4a012d6e, 0xc5884a28, 0xccc36f71,
0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850,
0xd7c07f7e, 0x02507fbf, 0x5afb9a04,
0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e,
0x727cc3c4, 0x0a0fb402, 0x0f7fef82,
0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0,
0x1eac5790, 0x796fb449, 0x8252dc15,
0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403,
0xe83ec305, 0x4f91751a, 0x925669c2,
0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574,
0x927985b2, 0x8276dbcb, 0x02778176,
0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83,
0x340ce5c8, 0x96bbb682, 0x93b4b148,
0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20,
0x8437aa88, 0x7d29dc96, 0x2756d3dc,
0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e,
0x3cf8209d, 0x6094d1e3, 0xcd9ca341,
0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9,
0xbda8229c, 0x127dadaa, 0x438a074e,
0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff,
0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51,
0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a,
0x76a2e214, 0xb9a40368, 0x925d958f,
0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623,
0x193cbcfa, 0x27627545, 0x825cf47a,
0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7,
0x8272a972, 0x9270c4a8, 0x127de50b,
0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb,
0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b,
0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11,
0x236a5cae, 0x12deca4d, 0x2c3f8cc5,
0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c,
0xb9b6a80c, 0x5c8f82bc, 0x89d36b45,
0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40,
0x7c34671c, 0x02717ef6, 0x4feb5536,
0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1,
0x006e1888, 0xa2e53f55, 0xb9e6d4bc,
0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33,
0xabcc4f33, 0x7688c55d, 0x7b00a6b0,
0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff,
0x856302e0, 0x72dbd92b, 0xee971b69,
0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2,
0x61efc8c2, 0xf1ac2571, 0xcc8239c2,
0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38,
0x0ff0443d, 0x606e6dc6, 0x60543a49,
0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f,
0x68458425, 0x99833be5, 0x600d457d,
0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31,
0x9c305a00, 0x52bce688, 0x1b03588a,
0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636,
0xa133c501, 0xe9d3531c, 0xee353783
};
EXPORT_SYMBOL_GPL(cast5_s3);
const u32 cast5_s4[256] = {
0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb,
0x64ad8c57, 0x85510443, 0xfa020ed1,
0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43,
0x6497b7b1, 0xf3641f63, 0x241e4adf,
0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30,
0xc0a5374f, 0x1d2d00d9, 0x24147b15,
0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f,
0x0c13fefe, 0x081b08ca, 0x05170121,
0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f,
0x06df4261, 0xbb9e9b8a, 0x7293ea25,
0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400,
0x547eebe6, 0x446d4ca0, 0x6cf3d6f5,
0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061,
0x11b638e1, 0x72500e03, 0xf80eb2bb,
0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400,
0x6920318f, 0x081dbb99, 0xffc304a5,
0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea,
0x9f926f91, 0x9f46222f, 0x3991467d,
0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8,
0x3fb6180c, 0x18f8931e, 0x281658e6,
0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25,
0x79098b02, 0xe4eabb81, 0x28123b23,
0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9,
0x0014377b, 0x041e8ac8, 0x09114003,
0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de,
0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6,
0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0,
0x56c8c391, 0x6b65811c, 0x5e146119,
0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d,
0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24,
0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a,
0xeca1d7c7, 0x041afa32, 0x1d16625a,
0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb,
0xc70b8b46, 0xd9e66a48, 0x56e55a79,
0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3,
0xedda04eb, 0x17a9be04, 0x2c18f4df,
0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254,
0xe5b6a035, 0x213d42f6, 0x2c1c7c26,
0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2,
0x0418f2c8, 0x001a96a6, 0x0d1526ab,
0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86,
0x311170a7, 0x3e9b640c, 0xcc3e10d7,
0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1,
0x1f9af36e, 0xcfcbd12f, 0xc1de8417,
0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca,
0xb4be31cd, 0xd8782806, 0x12a3a4e2,
0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5,
0x9711aac5, 0x001d7b95, 0x82e5e7d2,
0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415,
0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a,
0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7,
0x0ce454a9, 0xd60acd86, 0x015f1919,
0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe,
0x8b75e387, 0xb3c50651, 0xb8a5c3ef,
0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb,
0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876,
0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8,
0x296b299e, 0x492fc295, 0x9266beab,
0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee,
0xf65324e6, 0x6afce36c, 0x0316cc04,
0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979,
0x932bcdf6, 0xb657c34d, 0x4edfd282,
0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0,
0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2
};
EXPORT_SYMBOL_GPL(cast5_s4);
static const u32 s5[256] = {
0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff,
0x1dd358f5, 0x44dd9d44, 0x1731167f,
......@@ -564,10 +295,10 @@ static const u32 sb8[256] = {
0xeaee6801, 0x8db2a283, 0xea8bf59e
};
#define s1 cast5_s1
#define s2 cast5_s2
#define s3 cast5_s3
#define s4 cast5_s4
#define s1 cast_s1
#define s2 cast_s2
#define s3 cast_s3
#define s4 cast_s4
#define F1(D, m, r) ((I = ((m) + (D))), (I = rol32(I, (r))), \
(((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]))
......
......@@ -27,10 +27,10 @@
#include <linux/types.h>
#include <crypto/cast6.h>
#define s1 cast6_s1
#define s2 cast6_s2
#define s3 cast6_s3
#define s4 cast6_s4
#define s1 cast_s1
#define s2 cast_s2
#define s3 cast_s3
#define s4 cast_s4
#define F1(D, r, m) ((I = ((m) + (D))), (I = rol32(I, (r))), \
(((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]))
......@@ -39,278 +39,6 @@
#define F3(D, r, m) ((I = ((m) - (D))), (I = rol32(I, (r))), \
(((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]))
const u32 cast6_s1[256] = {
0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f,
0x9c004dd3, 0x6003e540, 0xcf9fc949,
0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0,
0x15c361d2, 0xc2e7661d, 0x22d4ff8e,
0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3,
0xdf2f8656, 0x887ca41a, 0xa2d2bd2d,
0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1,
0xaa54166b, 0x22568e3a, 0xa2d341d0,
0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac,
0x4a97c1d8, 0x527644b7, 0xb5f437a7,
0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0,
0x90ecf52e, 0x22b0c054, 0xbc8e5935,
0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290,
0xe93b159f, 0xb48ee411, 0x4bff345d,
0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad,
0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50,
0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f,
0xc59c5319, 0xb949e354, 0xb04669fe,
0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5,
0x6a390493, 0xe63d37e0, 0x2a54f6b3,
0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5,
0xf61b1891, 0xbb72275e, 0xaa508167,
0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427,
0xa2d1936b, 0x2ad286af, 0xaa56d291,
0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d,
0x73e2bb14, 0xa0bebc3c, 0x54623779,
0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e,
0x89fe78e6, 0x3fab0950, 0x325ff6c2,
0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf,
0x380782d5, 0xc7fa5cf6, 0x8ac31511,
0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241,
0x051ef495, 0xaa573b04, 0x4a805d8d,
0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b,
0x50afd341, 0xa7c13275, 0x915a0bf5,
0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265,
0xab85c5f3, 0x1b55db94, 0xaad4e324,
0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3,
0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c,
0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6,
0x22513f1e, 0xaa51a79b, 0x2ad344cc,
0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6,
0x032268d4, 0xc9600acc, 0xce387e6d,
0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da,
0x4736f464, 0x5ad328d8, 0xb347cc96,
0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc,
0xbfc5fe4a, 0xa70aec10, 0xac39570a,
0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f,
0x1cacd68d, 0x2ad37c96, 0x0175cb9d,
0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4,
0xb11c3274, 0xdd24cb9e, 0x7e1c54bd,
0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af,
0x51c85f4d, 0x56907596, 0xa5bb15e6,
0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a,
0x3526ffa0, 0xc37b4d09, 0xbc306ed9,
0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf,
0x700b45e1, 0xd5ea50f1, 0x85a92872,
0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198,
0x0cd0ede7, 0x26470db8, 0xf881814c,
0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db,
0xab838653, 0x6e2f1e23, 0x83719c9e,
0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c,
0xe1e696ff, 0xb141ab08, 0x7cca89b9,
0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c,
0x5ac9f049, 0xdd8f0f00, 0x5c8165bf
};
EXPORT_SYMBOL_GPL(cast6_s1);
const u32 cast6_s2[256] = {
0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a,
0xeec5207a, 0x55889c94, 0x72fc0651,
0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef,
0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3,
0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086,
0xef944459, 0xba83ccb3, 0xe0c3cdfb,
0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb,
0xe4e7ef5b, 0x25a1ff41, 0xe180f806,
0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f,
0x77e83f4e, 0x79929269, 0x24fa9f7b,
0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154,
0x0d554b63, 0x5d681121, 0xc866c359,
0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181,
0x39f7627f, 0x361e3084, 0xe4eb573b,
0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c,
0x99847ab4, 0xa0e3df79, 0xba6cf38c,
0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a,
0x8f458c74, 0xd9e0a227, 0x4ec73a34,
0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c,
0x1d804366, 0x721d9bfd, 0xa58684bb,
0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1,
0x27e19ba5, 0xd5a6c252, 0xe49754bd,
0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9,
0xe0b56714, 0x21f043b7, 0xe5d05860,
0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf,
0x68561be6, 0x83ca6b94, 0x2d6ed23b,
0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c,
0x397bc8d6, 0x5ee22b95, 0x5f0e5304,
0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122,
0xb96726d1, 0x8049a7e8, 0x22b7da7b,
0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402,
0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf,
0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53,
0xe3214517, 0xb4542835, 0x9f63293c,
0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6,
0x30a22c95, 0x31a70850, 0x60930f13,
0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6,
0xa02b1741, 0x7cbad9a2, 0x2180036f,
0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676,
0x25a75e7b, 0xe4e6d1fc, 0x20c710e6,
0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb,
0x846a3bae, 0x8ff77888, 0xee5d60f6,
0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54,
0x157fd7fa, 0xef8579cc, 0xd152de58,
0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5,
0xc242fa0f, 0xa7e3ebb0, 0xc68e4906,
0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8,
0xbec0c560, 0x61a3c9e8, 0xbca8f54d,
0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc,
0x301e16e6, 0x273be979, 0xb0ffeaa6,
0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a,
0xf7e19798, 0x7619b72f, 0x8f1c9ba4,
0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e,
0x1a513742, 0xef6828bc, 0x520365d6,
0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb,
0x5eea29cb, 0x145892f5, 0x91584f7f,
0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4,
0x0d23e0f9, 0x6c387e8a, 0x0ae6d249,
0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3,
0x230eabb0, 0x6438bc87, 0xf0b5b1fa,
0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589,
0xa345415e, 0x5c038323, 0x3e5d3bb9,
0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539,
0x73bfbe70, 0x83877605, 0x4523ecf1
};
EXPORT_SYMBOL_GPL(cast6_s2);
const u32 cast6_s3[256] = {
0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff,
0x369fe44b, 0x8c1fc644, 0xaececa90,
0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806,
0xf0ad0548, 0xe13c8d83, 0x927010d5,
0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820,
0xfade82e0, 0xa067268b, 0x8272792e,
0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee,
0x825b1bfd, 0x9255c5ed, 0x1257a240,
0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf,
0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5,
0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1,
0x1fb78dfc, 0x8e6bd2c1, 0x437be59b,
0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c,
0x4a012d6e, 0xc5884a28, 0xccc36f71,
0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850,
0xd7c07f7e, 0x02507fbf, 0x5afb9a04,
0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e,
0x727cc3c4, 0x0a0fb402, 0x0f7fef82,
0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0,
0x1eac5790, 0x796fb449, 0x8252dc15,
0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403,
0xe83ec305, 0x4f91751a, 0x925669c2,
0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574,
0x927985b2, 0x8276dbcb, 0x02778176,
0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83,
0x340ce5c8, 0x96bbb682, 0x93b4b148,
0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20,
0x8437aa88, 0x7d29dc96, 0x2756d3dc,
0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e,
0x3cf8209d, 0x6094d1e3, 0xcd9ca341,
0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9,
0xbda8229c, 0x127dadaa, 0x438a074e,
0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff,
0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51,
0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a,
0x76a2e214, 0xb9a40368, 0x925d958f,
0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623,
0x193cbcfa, 0x27627545, 0x825cf47a,
0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7,
0x8272a972, 0x9270c4a8, 0x127de50b,
0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb,
0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b,
0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11,
0x236a5cae, 0x12deca4d, 0x2c3f8cc5,
0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c,
0xb9b6a80c, 0x5c8f82bc, 0x89d36b45,
0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40,
0x7c34671c, 0x02717ef6, 0x4feb5536,
0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1,
0x006e1888, 0xa2e53f55, 0xb9e6d4bc,
0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33,
0xabcc4f33, 0x7688c55d, 0x7b00a6b0,
0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff,
0x856302e0, 0x72dbd92b, 0xee971b69,
0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2,
0x61efc8c2, 0xf1ac2571, 0xcc8239c2,
0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38,
0x0ff0443d, 0x606e6dc6, 0x60543a49,
0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f,
0x68458425, 0x99833be5, 0x600d457d,
0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31,
0x9c305a00, 0x52bce688, 0x1b03588a,
0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636,
0xa133c501, 0xe9d3531c, 0xee353783
};
EXPORT_SYMBOL_GPL(cast6_s3);
const u32 cast6_s4[256] = {
0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb,
0x64ad8c57, 0x85510443, 0xfa020ed1,
0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43,
0x6497b7b1, 0xf3641f63, 0x241e4adf,
0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30,
0xc0a5374f, 0x1d2d00d9, 0x24147b15,
0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f,
0x0c13fefe, 0x081b08ca, 0x05170121,
0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f,
0x06df4261, 0xbb9e9b8a, 0x7293ea25,
0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400,
0x547eebe6, 0x446d4ca0, 0x6cf3d6f5,
0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061,
0x11b638e1, 0x72500e03, 0xf80eb2bb,
0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400,
0x6920318f, 0x081dbb99, 0xffc304a5,
0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea,
0x9f926f91, 0x9f46222f, 0x3991467d,
0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8,
0x3fb6180c, 0x18f8931e, 0x281658e6,
0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25,
0x79098b02, 0xe4eabb81, 0x28123b23,
0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9,
0x0014377b, 0x041e8ac8, 0x09114003,
0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de,
0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6,
0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0,
0x56c8c391, 0x6b65811c, 0x5e146119,
0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d,
0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24,
0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a,
0xeca1d7c7, 0x041afa32, 0x1d16625a,
0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb,
0xc70b8b46, 0xd9e66a48, 0x56e55a79,
0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3,
0xedda04eb, 0x17a9be04, 0x2c18f4df,
0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254,
0xe5b6a035, 0x213d42f6, 0x2c1c7c26,
0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2,
0x0418f2c8, 0x001a96a6, 0x0d1526ab,
0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86,
0x311170a7, 0x3e9b640c, 0xcc3e10d7,
0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1,
0x1f9af36e, 0xcfcbd12f, 0xc1de8417,
0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca,
0xb4be31cd, 0xd8782806, 0x12a3a4e2,
0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5,
0x9711aac5, 0x001d7b95, 0x82e5e7d2,
0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415,
0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a,
0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7,
0x0ce454a9, 0xd60acd86, 0x015f1919,
0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe,
0x8b75e387, 0xb3c50651, 0xb8a5c3ef,
0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb,
0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876,
0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8,
0x296b299e, 0x492fc295, 0x9266beab,
0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee,
0xf65324e6, 0x6afce36c, 0x0316cc04,
0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979,
0x932bcdf6, 0xb657c34d, 0x4edfd282,
0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0,
0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2
};
EXPORT_SYMBOL_GPL(cast6_s4);
static const u32 Tm[24][8] = {
{ 0x5a827999, 0xc95c653a, 0x383650db, 0xa7103c7c, 0x15ea281d,
0x84c413be, 0xf39dff5f, 0x6277eb00 } ,
......
/*
* Common lookup tables for CAST-128 (cast5) and CAST-256 (cast6)
*
* Copyright © 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
* Copyright © 2003 Kartikey Mahendra Bhatt <kartik_me@hotmail.com>
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <linux/module.h>
#include <crypto/cast_common.h>
const u32 cast_s1[256] = {
0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f,
0x9c004dd3, 0x6003e540, 0xcf9fc949,
0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0,
0x15c361d2, 0xc2e7661d, 0x22d4ff8e,
0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3,
0xdf2f8656, 0x887ca41a, 0xa2d2bd2d,
0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1,
0xaa54166b, 0x22568e3a, 0xa2d341d0,
0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac,
0x4a97c1d8, 0x527644b7, 0xb5f437a7,
0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0,
0x90ecf52e, 0x22b0c054, 0xbc8e5935,
0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290,
0xe93b159f, 0xb48ee411, 0x4bff345d,
0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad,
0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50,
0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f,
0xc59c5319, 0xb949e354, 0xb04669fe,
0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5,
0x6a390493, 0xe63d37e0, 0x2a54f6b3,
0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5,
0xf61b1891, 0xbb72275e, 0xaa508167,
0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427,
0xa2d1936b, 0x2ad286af, 0xaa56d291,
0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d,
0x73e2bb14, 0xa0bebc3c, 0x54623779,
0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e,
0x89fe78e6, 0x3fab0950, 0x325ff6c2,
0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf,
0x380782d5, 0xc7fa5cf6, 0x8ac31511,
0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241,
0x051ef495, 0xaa573b04, 0x4a805d8d,
0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b,
0x50afd341, 0xa7c13275, 0x915a0bf5,
0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265,
0xab85c5f3, 0x1b55db94, 0xaad4e324,
0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3,
0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c,
0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6,
0x22513f1e, 0xaa51a79b, 0x2ad344cc,
0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6,
0x032268d4, 0xc9600acc, 0xce387e6d,
0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da,
0x4736f464, 0x5ad328d8, 0xb347cc96,
0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc,
0xbfc5fe4a, 0xa70aec10, 0xac39570a,
0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f,
0x1cacd68d, 0x2ad37c96, 0x0175cb9d,
0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4,
0xb11c3274, 0xdd24cb9e, 0x7e1c54bd,
0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af,
0x51c85f4d, 0x56907596, 0xa5bb15e6,
0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a,
0x3526ffa0, 0xc37b4d09, 0xbc306ed9,
0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf,
0x700b45e1, 0xd5ea50f1, 0x85a92872,
0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198,
0x0cd0ede7, 0x26470db8, 0xf881814c,
0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db,
0xab838653, 0x6e2f1e23, 0x83719c9e,
0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c,
0xe1e696ff, 0xb141ab08, 0x7cca89b9,
0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c,
0x5ac9f049, 0xdd8f0f00, 0x5c8165bf
};
EXPORT_SYMBOL_GPL(cast_s1);
const u32 cast_s2[256] = {
0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a,
0xeec5207a, 0x55889c94, 0x72fc0651,
0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef,
0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3,
0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086,
0xef944459, 0xba83ccb3, 0xe0c3cdfb,
0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb,
0xe4e7ef5b, 0x25a1ff41, 0xe180f806,
0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f,
0x77e83f4e, 0x79929269, 0x24fa9f7b,
0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154,
0x0d554b63, 0x5d681121, 0xc866c359,
0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181,
0x39f7627f, 0x361e3084, 0xe4eb573b,
0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c,
0x99847ab4, 0xa0e3df79, 0xba6cf38c,
0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a,
0x8f458c74, 0xd9e0a227, 0x4ec73a34,
0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c,
0x1d804366, 0x721d9bfd, 0xa58684bb,
0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1,
0x27e19ba5, 0xd5a6c252, 0xe49754bd,
0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9,
0xe0b56714, 0x21f043b7, 0xe5d05860,
0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf,
0x68561be6, 0x83ca6b94, 0x2d6ed23b,
0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c,
0x397bc8d6, 0x5ee22b95, 0x5f0e5304,
0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122,
0xb96726d1, 0x8049a7e8, 0x22b7da7b,
0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402,
0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf,
0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53,
0xe3214517, 0xb4542835, 0x9f63293c,
0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6,
0x30a22c95, 0x31a70850, 0x60930f13,
0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6,
0xa02b1741, 0x7cbad9a2, 0x2180036f,
0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676,
0x25a75e7b, 0xe4e6d1fc, 0x20c710e6,
0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb,
0x846a3bae, 0x8ff77888, 0xee5d60f6,
0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54,
0x157fd7fa, 0xef8579cc, 0xd152de58,
0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5,
0xc242fa0f, 0xa7e3ebb0, 0xc68e4906,
0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8,
0xbec0c560, 0x61a3c9e8, 0xbca8f54d,
0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc,
0x301e16e6, 0x273be979, 0xb0ffeaa6,
0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a,
0xf7e19798, 0x7619b72f, 0x8f1c9ba4,
0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e,
0x1a513742, 0xef6828bc, 0x520365d6,
0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb,
0x5eea29cb, 0x145892f5, 0x91584f7f,
0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4,
0x0d23e0f9, 0x6c387e8a, 0x0ae6d249,
0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3,
0x230eabb0, 0x6438bc87, 0xf0b5b1fa,
0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589,
0xa345415e, 0x5c038323, 0x3e5d3bb9,
0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539,
0x73bfbe70, 0x83877605, 0x4523ecf1
};
EXPORT_SYMBOL_GPL(cast_s2);
const u32 cast_s3[256] = {
0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff,
0x369fe44b, 0x8c1fc644, 0xaececa90,
0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806,
0xf0ad0548, 0xe13c8d83, 0x927010d5,
0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820,
0xfade82e0, 0xa067268b, 0x8272792e,
0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee,
0x825b1bfd, 0x9255c5ed, 0x1257a240,
0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf,
0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5,
0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1,
0x1fb78dfc, 0x8e6bd2c1, 0x437be59b,
0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c,
0x4a012d6e, 0xc5884a28, 0xccc36f71,
0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850,
0xd7c07f7e, 0x02507fbf, 0x5afb9a04,
0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e,
0x727cc3c4, 0x0a0fb402, 0x0f7fef82,
0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0,
0x1eac5790, 0x796fb449, 0x8252dc15,
0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403,
0xe83ec305, 0x4f91751a, 0x925669c2,
0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574,
0x927985b2, 0x8276dbcb, 0x02778176,
0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83,
0x340ce5c8, 0x96bbb682, 0x93b4b148,
0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20,
0x8437aa88, 0x7d29dc96, 0x2756d3dc,
0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e,
0x3cf8209d, 0x6094d1e3, 0xcd9ca341,
0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9,
0xbda8229c, 0x127dadaa, 0x438a074e,
0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff,
0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51,
0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a,
0x76a2e214, 0xb9a40368, 0x925d958f,
0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623,
0x193cbcfa, 0x27627545, 0x825cf47a,
0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7,
0x8272a972, 0x9270c4a8, 0x127de50b,
0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb,
0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b,
0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11,
0x236a5cae, 0x12deca4d, 0x2c3f8cc5,
0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c,
0xb9b6a80c, 0x5c8f82bc, 0x89d36b45,
0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40,
0x7c34671c, 0x02717ef6, 0x4feb5536,
0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1,
0x006e1888, 0xa2e53f55, 0xb9e6d4bc,
0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33,
0xabcc4f33, 0x7688c55d, 0x7b00a6b0,
0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff,
0x856302e0, 0x72dbd92b, 0xee971b69,
0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2,
0x61efc8c2, 0xf1ac2571, 0xcc8239c2,
0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38,
0x0ff0443d, 0x606e6dc6, 0x60543a49,
0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f,
0x68458425, 0x99833be5, 0x600d457d,
0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31,
0x9c305a00, 0x52bce688, 0x1b03588a,
0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636,
0xa133c501, 0xe9d3531c, 0xee353783
};
EXPORT_SYMBOL_GPL(cast_s3);
const u32 cast_s4[256] = {
0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb,
0x64ad8c57, 0x85510443, 0xfa020ed1,
0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43,
0x6497b7b1, 0xf3641f63, 0x241e4adf,
0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30,
0xc0a5374f, 0x1d2d00d9, 0x24147b15,
0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f,
0x0c13fefe, 0x081b08ca, 0x05170121,
0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f,
0x06df4261, 0xbb9e9b8a, 0x7293ea25,
0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400,
0x547eebe6, 0x446d4ca0, 0x6cf3d6f5,
0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061,
0x11b638e1, 0x72500e03, 0xf80eb2bb,
0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400,
0x6920318f, 0x081dbb99, 0xffc304a5,
0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea,
0x9f926f91, 0x9f46222f, 0x3991467d,
0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8,
0x3fb6180c, 0x18f8931e, 0x281658e6,
0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25,
0x79098b02, 0xe4eabb81, 0x28123b23,
0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9,
0x0014377b, 0x041e8ac8, 0x09114003,
0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de,
0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6,
0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0,
0x56c8c391, 0x6b65811c, 0x5e146119,
0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d,
0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24,
0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a,
0xeca1d7c7, 0x041afa32, 0x1d16625a,
0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb,
0xc70b8b46, 0xd9e66a48, 0x56e55a79,
0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3,
0xedda04eb, 0x17a9be04, 0x2c18f4df,
0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254,
0xe5b6a035, 0x213d42f6, 0x2c1c7c26,
0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2,
0x0418f2c8, 0x001a96a6, 0x0d1526ab,
0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86,
0x311170a7, 0x3e9b640c, 0xcc3e10d7,
0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1,
0x1f9af36e, 0xcfcbd12f, 0xc1de8417,
0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca,
0xb4be31cd, 0xd8782806, 0x12a3a4e2,
0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5,
0x9711aac5, 0x001d7b95, 0x82e5e7d2,
0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415,
0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a,
0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7,
0x0ce454a9, 0xd60acd86, 0x015f1919,
0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe,
0x8b75e387, 0xb3c50651, 0xb8a5c3ef,
0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb,
0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876,
0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8,
0x296b299e, 0x492fc295, 0x9266beab,
0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee,
0xf65324e6, 0x6afce36c, 0x0316cc04,
0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979,
0x932bcdf6, 0xb657c34d, 0x4edfd282,
0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0,
0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2
};
EXPORT_SYMBOL_GPL(cast_s4);
MODULE_LICENSE("GPL");
......@@ -971,11 +971,13 @@ static int do_test(int m)
case 3:
ret += tcrypt_test("ecb(des)");
ret += tcrypt_test("cbc(des)");
ret += tcrypt_test("ctr(des)");
break;
case 4:
ret += tcrypt_test("ecb(des3_ede)");
ret += tcrypt_test("cbc(des3_ede)");
ret += tcrypt_test("ctr(des3_ede)");
break;
case 5:
......@@ -1479,6 +1481,10 @@ static int do_test(int m)
test_hash_speed("ghash-generic", sec, hash_speed_template_16);
if (mode > 300 && mode < 400) break;
case 319:
test_hash_speed("crc32c", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
case 399:
break;
......@@ -1722,6 +1728,29 @@ static int do_test(int m)
speed_template_32_64);
break;
case 508:
test_acipher_speed("ecb(camellia)", ENCRYPT, sec, NULL, 0,
speed_template_16_32);
test_acipher_speed("ecb(camellia)", DECRYPT, sec, NULL, 0,
speed_template_16_32);
test_acipher_speed("cbc(camellia)", ENCRYPT, sec, NULL, 0,
speed_template_16_32);
test_acipher_speed("cbc(camellia)", DECRYPT, sec, NULL, 0,
speed_template_16_32);
test_acipher_speed("ctr(camellia)", ENCRYPT, sec, NULL, 0,
speed_template_16_32);
test_acipher_speed("ctr(camellia)", DECRYPT, sec, NULL, 0,
speed_template_16_32);
test_acipher_speed("lrw(camellia)", ENCRYPT, sec, NULL, 0,
speed_template_32_48);
test_acipher_speed("lrw(camellia)", DECRYPT, sec, NULL, 0,
speed_template_32_48);
test_acipher_speed("xts(camellia)", ENCRYPT, sec, NULL, 0,
speed_template_32_64);
test_acipher_speed("xts(camellia)", DECRYPT, sec, NULL, 0,
speed_template_32_64);
break;
case 1000:
test_available();
break;
......
......@@ -1638,270 +1638,66 @@ static const struct alg_test_desc alg_test_descs[] = {
{
.alg = "__cbc-cast5-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__cbc-cast6-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__cbc-serpent-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__cbc-serpent-sse2",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__cbc-twofish-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-cbc-aes-aesni",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-cbc-camellia-aesni",
.test = alg_test_null,
}, {
.alg = "__driver-cbc-cast5-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-cbc-cast6-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-cbc-serpent-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-cbc-serpent-sse2",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-cbc-twofish-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-ecb-aes-aesni",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-ecb-camellia-aesni",
.test = alg_test_null,
}, {
.alg = "__driver-ecb-cast5-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-ecb-cast6-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-ecb-serpent-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-ecb-serpent-sse2",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__driver-ecb-twofish-avx",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "__ghash-pclmulqdqni",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.hash = {
.vecs = NULL,
.count = 0
}
}
}, {
.alg = "ansi_cprng",
.test = alg_test_cprng,
......@@ -2130,135 +1926,39 @@ static const struct alg_test_desc alg_test_descs[] = {
.alg = "cryptd(__driver-cbc-aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-cbc-camellia-aesni)",
.test = alg_test_null,
}, {
.alg = "cryptd(__driver-ecb-aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-ecb-camellia-aesni)",
.test = alg_test_null,
}, {
.alg = "cryptd(__driver-ecb-cast5-avx)",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-ecb-cast6-avx)",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-ecb-serpent-avx)",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-ecb-serpent-sse2)",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-ecb-twofish-avx)",
.test = alg_test_null,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__driver-gcm-aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "cryptd(__ghash-pclmulqdqni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.hash = {
.vecs = NULL,
.count = 0
}
}
}, {
.alg = "ctr(aes)",
.test = alg_test_skcipher,
......@@ -2335,6 +2035,36 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}
}, {
.alg = "ctr(des)",
.test = alg_test_skcipher,
.suite = {
.cipher = {
.enc = {
.vecs = des_ctr_enc_tv_template,
.count = DES_CTR_ENC_TEST_VECTORS
},
.dec = {
.vecs = des_ctr_dec_tv_template,
.count = DES_CTR_DEC_TEST_VECTORS
}
}
}
}, {
.alg = "ctr(des3_ede)",
.test = alg_test_skcipher,
.suite = {
.cipher = {
.enc = {
.vecs = des3_ede_ctr_enc_tv_template,
.count = DES3_EDE_CTR_ENC_TEST_VECTORS
},
.dec = {
.vecs = des3_ede_ctr_dec_tv_template,
.count = DES3_EDE_CTR_DEC_TEST_VECTORS
}
}
}
}, {
.alg = "ctr(serpent)",
.test = alg_test_skcipher,
......@@ -2383,6 +2113,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "deflate",
.test = alg_test_comp,
.fips_allowed = 1,
.suite = {
.comp = {
.comp = {
......@@ -2399,18 +2130,6 @@ static const struct alg_test_desc alg_test_descs[] = {
.alg = "ecb(__aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
.vecs = NULL,
.count = 0
},
.dec = {
.vecs = NULL,
.count = 0
}
}
}
}, {
.alg = "ecb(aes)",
.test = alg_test_skcipher,
......@@ -2859,6 +2578,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "lzo",
.test = alg_test_comp,
.fips_allowed = 1,
.suite = {
.comp = {
.comp = {
......@@ -3226,6 +2946,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "zlib",
.test = alg_test_pcomp,
.fips_allowed = 1,
.suite = {
.pcomp = {
.comp = {
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -375,6 +375,11 @@ static void vhash_update(const unsigned char *m,
u64 pkh = ctx->polykey[0];
u64 pkl = ctx->polykey[1];
if (!mbytes)
return;
BUG_ON(mbytes % VMAC_NHBYTES);
mptr = (u64 *)m;
i = mbytes / VMAC_NHBYTES; /* Must be non-zero */
......@@ -454,7 +459,7 @@ static u64 vhash(unsigned char m[], unsigned int mbytes,
}
static u64 vmac(unsigned char m[], unsigned int mbytes,
unsigned char n[16], u64 *tagl,
const unsigned char n[16], u64 *tagl,
struct vmac_ctx_t *ctx)
{
u64 *in_n, *out_p;
......@@ -559,8 +564,33 @@ static int vmac_update(struct shash_desc *pdesc, const u8 *p,
{
struct crypto_shash *parent = pdesc->tfm;
struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
int expand;
int min;
expand = VMAC_NHBYTES - ctx->partial_size > 0 ?
VMAC_NHBYTES - ctx->partial_size : 0;
min = len < expand ? len : expand;
memcpy(ctx->partial + ctx->partial_size, p, min);
ctx->partial_size += min;
if (len < expand)
return 0;
vhash_update(p, len, &ctx->__vmac_ctx);
vhash_update(ctx->partial, VMAC_NHBYTES, &ctx->__vmac_ctx);
ctx->partial_size = 0;
len -= expand;
p += expand;
if (len % VMAC_NHBYTES) {
memcpy(ctx->partial, p + len - (len % VMAC_NHBYTES),
len % VMAC_NHBYTES);
ctx->partial_size = len % VMAC_NHBYTES;
}
vhash_update(p, len - len % VMAC_NHBYTES, &ctx->__vmac_ctx);
return 0;
}
......@@ -572,10 +602,20 @@ static int vmac_final(struct shash_desc *pdesc, u8 *out)
vmac_t mac;
u8 nonce[16] = {};
mac = vmac(NULL, 0, nonce, NULL, ctx);
/* vmac() ends up accessing outside the array bounds that
* we specify. In appears to access up to the next 2-word
* boundary. We'll just be uber cautious and zero the
* unwritten bytes in the buffer.
*/
if (ctx->partial_size) {
memset(ctx->partial + ctx->partial_size, 0,
VMAC_NHBYTES - ctx->partial_size);
}
mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx);
memcpy(out, &mac, sizeof(vmac_t));
memset(&mac, 0, sizeof(vmac_t));
memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
ctx->partial_size = 0;
return 0;
}
......@@ -673,4 +713,3 @@ module_exit(vmac_module_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("VMAC hash algorithm");
......@@ -254,6 +254,7 @@ config CRYPTO_DEV_OMAP_AES
tristate "Support for OMAP AES hw engine"
depends on ARCH_OMAP2 || ARCH_OMAP3
select CRYPTO_AES
select CRYPTO_BLKCIPHER2
help
OMAP processors have AES module accelerator. Select this if you
want to use the OMAP module for AES algorithms.
......
......@@ -1863,6 +1863,7 @@ static int __devexit spacc_remove(struct platform_device *pdev)
static const struct platform_device_id spacc_id_table[] = {
{ "picochip,spacc-ipsec", },
{ "picochip,spacc-l2", },
{ }
};
static struct platform_driver spacc_driver = {
......
......@@ -30,7 +30,7 @@
#include <crypto/ctr.h>
#include <plat/cpu.h>
#include <plat/dma.h>
#include <mach/dma.h>
#define _SBF(s, v) ((v) << (s))
#define _BIT(b) _SBF(b, 1)
......
......@@ -936,8 +936,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
sg_count--;
link_tbl_ptr--;
}
link_tbl_ptr->len = cpu_to_be16(be16_to_cpu(link_tbl_ptr->len)
+ cryptlen);
be16_add_cpu(&link_tbl_ptr->len, cryptlen);
/* tag end of link table */
link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN;
......
......@@ -672,8 +672,10 @@ static int tegra_aes_get_random(struct crypto_rng *tfm, u8 *rdata,
mutex_lock(&aes_lock);
ret = clk_prepare_enable(dd->aes_clk);
if (ret)
if (ret) {
mutex_unlock(&aes_lock);
return ret;
}
ctx->dd = dd;
dd->ctx = ctx;
......@@ -757,8 +759,10 @@ static int tegra_aes_rng_reset(struct crypto_rng *tfm, u8 *seed,
dd->flags = FLAGS_ENCRYPT | FLAGS_RNG;
ret = clk_prepare_enable(dd->aes_clk);
if (ret)
if (ret) {
mutex_unlock(&aes_lock);
return ret;
}
aes_set_key(dd);
......@@ -1029,7 +1033,7 @@ static int tegra_aes_probe(struct platform_device *pdev)
if (dd->buf_out)
dma_free_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES,
dd->buf_out, dd->dma_buf_out);
if (IS_ERR(dd->aes_clk))
if (!IS_ERR(dd->aes_clk))
clk_put(dd->aes_clk);
if (aes_wq)
destroy_workqueue(aes_wq);
......
......@@ -3,6 +3,7 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <crypto/cast_common.h>
#define CAST5_BLOCK_SIZE 8
#define CAST5_MIN_KEY_SIZE 5
......@@ -19,9 +20,4 @@ int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
void __cast5_encrypt(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
void __cast5_decrypt(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
extern const u32 cast5_s1[256];
extern const u32 cast5_s2[256];
extern const u32 cast5_s3[256];
extern const u32 cast5_s4[256];
#endif
......@@ -3,6 +3,7 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <crypto/cast_common.h>
#define CAST6_BLOCK_SIZE 16
#define CAST6_MIN_KEY_SIZE 16
......@@ -20,9 +21,4 @@ int cast6_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
void __cast6_encrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
void __cast6_decrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
extern const u32 cast6_s1[256];
extern const u32 cast6_s2[256];
extern const u32 cast6_s3[256];
extern const u32 cast6_s4[256];
#endif
#ifndef _CRYPTO_CAST_COMMON_H
#define _CRYPTO_CAST_COMMON_H
extern const u32 cast_s1[256];
extern const u32 cast_s2[256];
extern const u32 cast_s3[256];
extern const u32 cast_s4[256];
#endif
......@@ -56,6 +56,8 @@ typedef u64 vmac_t;
struct vmac_ctx_t {
struct crypto_cipher *child;
struct vmac_ctx __vmac_ctx;
u8 partial[VMAC_NHBYTES]; /* partial block */
int partial_size; /* size of the partial block */
};
#endif /* __CRYPTO_VMAC_H */
......@@ -171,7 +171,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
{
int cpu, num_cpus;
unsigned int next_nr, next_index;
struct padata_parallel_queue *queue, *next_queue;
struct padata_parallel_queue *next_queue;
struct padata_priv *padata;
struct padata_list *reorder;
......@@ -204,8 +204,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
goto out;
}
queue = per_cpu_ptr(pd->pqueue, smp_processor_id());
if (queue->cpu_index == next_queue->cpu_index) {
if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
padata = ERR_PTR(-ENODATA);
goto out;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment