Commit e2174139 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu

crypto: arm64/aes-ce - add 5 way interleave routines

In preparation of tweaking the accelerated AES chaining mode routines
to be able to use a 5-way stride, implement the core routines to
support processing 5 blocks of input at a time. While at it, drop
the 2 way versions, which have been unused for a while now.
Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent d45b1714
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
load_round_keys \rounds, \temp load_round_keys \rounds, \temp
.endm .endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
aes\de \i0\().16b, \k\().16b aes\de \i0\().16b, \k\().16b
aes\mc \i0\().16b, \i0\().16b aes\mc \i0\().16b, \i0\().16b
.ifnb \i1 .ifnb \i1
...@@ -63,27 +63,34 @@ ...@@ -63,27 +63,34 @@
aes\mc \i2\().16b, \i2\().16b aes\mc \i2\().16b, \i2\().16b
aes\de \i3\().16b, \k\().16b aes\de \i3\().16b, \k\().16b
aes\mc \i3\().16b, \i3\().16b aes\mc \i3\().16b, \i3\().16b
.ifnb \i4
aes\de \i4\().16b, \k\().16b
aes\mc \i4\().16b, \i4\().16b
.endif
.endif .endif
.endif .endif
.endm .endm
/* up to 4 interleaved encryption rounds with the same round key */ /* up to 5 interleaved encryption rounds with the same round key */
.macro round_Nx, enc, k, i0, i1, i2, i3 .macro round_Nx, enc, k, i0, i1, i2, i3, i4
.ifc \enc, e .ifc \enc, e
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3 do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4
.else .else
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3 do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4
.endif .endif
.endm .endm
/* up to 4 interleaved final rounds */ /* up to 5 interleaved final rounds */
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3 .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4
aes\de \i0\().16b, \k\().16b aes\de \i0\().16b, \k\().16b
.ifnb \i1 .ifnb \i1
aes\de \i1\().16b, \k\().16b aes\de \i1\().16b, \k\().16b
.ifnb \i3 .ifnb \i3
aes\de \i2\().16b, \k\().16b aes\de \i2\().16b, \k\().16b
aes\de \i3\().16b, \k\().16b aes\de \i3\().16b, \k\().16b
.ifnb \i4
aes\de \i4\().16b, \k\().16b
.endif
.endif .endif
.endif .endif
eor \i0\().16b, \i0\().16b, \k2\().16b eor \i0\().16b, \i0\().16b, \k2\().16b
...@@ -92,47 +99,52 @@ ...@@ -92,47 +99,52 @@
.ifnb \i3 .ifnb \i3
eor \i2\().16b, \i2\().16b, \k2\().16b eor \i2\().16b, \i2\().16b, \k2\().16b
eor \i3\().16b, \i3\().16b, \k2\().16b eor \i3\().16b, \i3\().16b, \k2\().16b
.ifnb \i4
eor \i4\().16b, \i4\().16b, \k2\().16b
.endif
.endif .endif
.endif .endif
.endm .endm
/* up to 4 interleaved blocks */ /* up to 5 interleaved blocks */
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3 .macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
cmp \rounds, #12 cmp \rounds, #12
blo 2222f /* 128 bits */ blo 2222f /* 128 bits */
beq 1111f /* 192 bits */ beq 1111f /* 192 bits */
round_Nx \enc, v17, \i0, \i1, \i2, \i3 round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4
round_Nx \enc, v18, \i0, \i1, \i2, \i3 round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3 1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
round_Nx \enc, v20, \i0, \i1, \i2, \i3 round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
round_Nx \enc, \key, \i0, \i1, \i2, \i3 round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4
.endr .endr
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3 fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4
.endm .endm
.macro encrypt_block, in, rounds, t0, t1, t2 .macro encrypt_block, in, rounds, t0, t1, t2
do_block_Nx e, \rounds, \in do_block_Nx e, \rounds, \in
.endm .endm
.macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1
.endm
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3 do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
.endm .endm
.macro decrypt_block, in, rounds, t0, t1, t2 .macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
do_block_Nx d, \rounds, \in do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4
.endm .endm
.macro decrypt_block2x, i0, i1, rounds, t0, t1, t2 .macro decrypt_block, in, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1 do_block_Nx d, \rounds, \in
.endm .endm
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3 do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
.endm .endm
.macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4
.endm
#define MAX_STRIDE 5
#include "aes-modes.S" #include "aes-modes.S"
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
.text .text
.align 4 .align 4
#ifndef MAX_STRIDE
#define MAX_STRIDE 4
#endif
aes_encrypt_block4x: aes_encrypt_block4x:
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret ret
...@@ -23,6 +27,18 @@ aes_decrypt_block4x: ...@@ -23,6 +27,18 @@ aes_decrypt_block4x:
ret ret
ENDPROC(aes_decrypt_block4x) ENDPROC(aes_decrypt_block4x)
#if MAX_STRIDE == 5
aes_encrypt_block5x:
encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
ret
ENDPROC(aes_encrypt_block5x)
aes_decrypt_block5x:
decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
ret
ENDPROC(aes_decrypt_block5x)
#endif
/* /*
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks) * int blocks)
......
...@@ -117,26 +117,9 @@ ...@@ -117,26 +117,9 @@
/* /*
* Interleaved versions: functionally equivalent to the * Interleaved versions: functionally equivalent to the
* ones above, but applied to 2 or 4 AES states in parallel. * ones above, but applied to AES states in parallel.
*/ */
.macro sub_bytes_2x, in0, in1
sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
sub v9.16b, \in1\().16b, v15.16b
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
sub v10.16b, v8.16b, v15.16b
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
sub v11.16b, v9.16b, v15.16b
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
sub v8.16b, v10.16b, v15.16b
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
sub v9.16b, v11.16b, v15.16b
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
.endm
.macro sub_bytes_4x, in0, in1, in2, in3 .macro sub_bytes_4x, in0, in1, in2, in3
sub v8.16b, \in0\().16b, v15.16b sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
...@@ -215,25 +198,6 @@ ...@@ -215,25 +198,6 @@
eor \in1\().16b, \in1\().16b, v11.16b eor \in1\().16b, \in1\().16b, v11.16b
.endm .endm
.macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
movi v15.16b, #0x40
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
sub_bytes_2x \in0, \in1
subs \i, \i, #1
ld1 {v15.4s}, [\rkp], #16
beq 2222f
mix_columns_2x \in0, \in1, \enc
b 1111b
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
.endm
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
ld1 {v15.4s}, [\rk] ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16 add \rkp, \rk, #16
...@@ -260,14 +224,6 @@ ...@@ -260,14 +224,6 @@
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
.endm .endm
.macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
.endm
.macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
.endm
.macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
.endm .endm
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment