Commit bd17e2d3 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Greg Kroah-Hartman

crypto: arm64/aes-ccm-ce: fix for big endian

commit 56e4e76c upstream.

The AES-CCM implementation that uses ARMv8 Crypto Extensions instructions
refers to the AES round keys as pairs of 64-bit quantities, which causes
failures when building the code for big endian. In addition, it byte swaps
the input counter unconditionally, while this is only required for little
endian builds. So fix both issues.

Fixes: 12ac3efe ("arm64/crypto: use crypto instructions to generate AES key schedule")
Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent bed5c787
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/assembler.h>
.text .text
.arch armv8-a+crypto .arch armv8-a+crypto
...@@ -19,7 +20,7 @@ ...@@ -19,7 +20,7 @@
*/ */
ENTRY(ce_aes_ccm_auth_data) ENTRY(ce_aes_ccm_auth_data)
ldr w8, [x3] /* leftover from prev round? */ ldr w8, [x3] /* leftover from prev round? */
ld1 {v0.2d}, [x0] /* load mac */ ld1 {v0.16b}, [x0] /* load mac */
cbz w8, 1f cbz w8, 1f
sub w8, w8, #16 sub w8, w8, #16
eor v1.16b, v1.16b, v1.16b eor v1.16b, v1.16b, v1.16b
...@@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data) ...@@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
beq 8f /* out of input? */ beq 8f /* out of input? */
cbnz w8, 0b cbnz w8, 0b
eor v0.16b, v0.16b, v1.16b eor v0.16b, v0.16b, v1.16b
1: ld1 {v3.2d}, [x4] /* load first round key */ 1: ld1 {v3.16b}, [x4] /* load first round key */
prfm pldl1strm, [x1] prfm pldl1strm, [x1]
cmp w5, #12 /* which key size? */ cmp w5, #12 /* which key size? */
add x6, x4, #16 add x6, x4, #16
...@@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data) ...@@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
mov v5.16b, v3.16b mov v5.16b, v3.16b
b 4f b 4f
2: mov v4.16b, v3.16b 2: mov v4.16b, v3.16b
ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ ld1 {v5.16b}, [x6], #16 /* load 2nd round key */
3: aese v0.16b, v4.16b 3: aese v0.16b, v4.16b
aesmc v0.16b, v0.16b aesmc v0.16b, v0.16b
4: ld1 {v3.2d}, [x6], #16 /* load next round key */ 4: ld1 {v3.16b}, [x6], #16 /* load next round key */
aese v0.16b, v5.16b aese v0.16b, v5.16b
aesmc v0.16b, v0.16b aesmc v0.16b, v0.16b
5: ld1 {v4.2d}, [x6], #16 /* load next round key */ 5: ld1 {v4.16b}, [x6], #16 /* load next round key */
subs w7, w7, #3 subs w7, w7, #3
aese v0.16b, v3.16b aese v0.16b, v3.16b
aesmc v0.16b, v0.16b aesmc v0.16b, v0.16b
ld1 {v5.2d}, [x6], #16 /* load next round key */ ld1 {v5.16b}, [x6], #16 /* load next round key */
bpl 3b bpl 3b
aese v0.16b, v4.16b aese v0.16b, v4.16b
subs w2, w2, #16 /* last data? */ subs w2, w2, #16 /* last data? */
...@@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data) ...@@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data)
ld1 {v1.16b}, [x1], #16 /* load next input block */ ld1 {v1.16b}, [x1], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */ eor v0.16b, v0.16b, v1.16b /* xor with mac */
bne 1b bne 1b
6: st1 {v0.2d}, [x0] /* store mac */ 6: st1 {v0.16b}, [x0] /* store mac */
beq 10f beq 10f
adds w2, w2, #16 adds w2, w2, #16
beq 10f beq 10f
...@@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data) ...@@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data)
adds w7, w7, #1 adds w7, w7, #1
bne 9b bne 9b
eor v0.16b, v0.16b, v1.16b eor v0.16b, v0.16b, v1.16b
st1 {v0.2d}, [x0] st1 {v0.16b}, [x0]
10: str w8, [x3] 10: str w8, [x3]
ret ret
ENDPROC(ce_aes_ccm_auth_data) ENDPROC(ce_aes_ccm_auth_data)
...@@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data) ...@@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data)
* u32 rounds); * u32 rounds);
*/ */
ENTRY(ce_aes_ccm_final) ENTRY(ce_aes_ccm_final)
ld1 {v3.2d}, [x2], #16 /* load first round key */ ld1 {v3.16b}, [x2], #16 /* load first round key */
ld1 {v0.2d}, [x0] /* load mac */ ld1 {v0.16b}, [x0] /* load mac */
cmp w3, #12 /* which key size? */ cmp w3, #12 /* which key size? */
sub w3, w3, #2 /* modified # of rounds */ sub w3, w3, #2 /* modified # of rounds */
ld1 {v1.2d}, [x1] /* load 1st ctriv */ ld1 {v1.16b}, [x1] /* load 1st ctriv */
bmi 0f bmi 0f
bne 3f bne 3f
mov v5.16b, v3.16b mov v5.16b, v3.16b
b 2f b 2f
0: mov v4.16b, v3.16b 0: mov v4.16b, v3.16b
1: ld1 {v5.2d}, [x2], #16 /* load next round key */ 1: ld1 {v5.16b}, [x2], #16 /* load next round key */
aese v0.16b, v4.16b aese v0.16b, v4.16b
aesmc v0.16b, v0.16b aesmc v0.16b, v0.16b
aese v1.16b, v4.16b aese v1.16b, v4.16b
aesmc v1.16b, v1.16b aesmc v1.16b, v1.16b
2: ld1 {v3.2d}, [x2], #16 /* load next round key */ 2: ld1 {v3.16b}, [x2], #16 /* load next round key */
aese v0.16b, v5.16b aese v0.16b, v5.16b
aesmc v0.16b, v0.16b aesmc v0.16b, v0.16b
aese v1.16b, v5.16b aese v1.16b, v5.16b
aesmc v1.16b, v1.16b aesmc v1.16b, v1.16b
3: ld1 {v4.2d}, [x2], #16 /* load next round key */ 3: ld1 {v4.16b}, [x2], #16 /* load next round key */
subs w3, w3, #3 subs w3, w3, #3
aese v0.16b, v3.16b aese v0.16b, v3.16b
aesmc v0.16b, v0.16b aesmc v0.16b, v0.16b
...@@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final) ...@@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final)
aese v1.16b, v4.16b aese v1.16b, v4.16b
/* final round key cancels out */ /* final round key cancels out */
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
st1 {v0.2d}, [x0] /* store result */ st1 {v0.16b}, [x0] /* store result */
ret ret
ENDPROC(ce_aes_ccm_final) ENDPROC(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc .macro aes_ccm_do_crypt,enc
ldr x8, [x6, #8] /* load lower ctr */ ldr x8, [x6, #8] /* load lower ctr */
ld1 {v0.2d}, [x5] /* load mac */ ld1 {v0.16b}, [x5] /* load mac */
rev x8, x8 /* keep swabbed ctr in reg */ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
0: /* outer loop */ 0: /* outer loop */
ld1 {v1.1d}, [x6] /* load upper ctr */ ld1 {v1.8b}, [x6] /* load upper ctr */
prfm pldl1strm, [x1] prfm pldl1strm, [x1]
add x8, x8, #1 add x8, x8, #1
rev x9, x8 rev x9, x8
cmp w4, #12 /* which key size? */ cmp w4, #12 /* which key size? */
sub w7, w4, #2 /* get modified # of rounds */ sub w7, w4, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */ ins v1.d[1], x9 /* no carry in lower ctr */
ld1 {v3.2d}, [x3] /* load first round key */ ld1 {v3.16b}, [x3] /* load first round key */
add x10, x3, #16 add x10, x3, #16
bmi 1f bmi 1f
bne 4f bne 4f
mov v5.16b, v3.16b mov v5.16b, v3.16b
b 3f b 3f
1: mov v4.16b, v3.16b 1: mov v4.16b, v3.16b
ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ ld1 {v5.16b}, [x10], #16 /* load 2nd round key */
2: /* inner loop: 3 rounds, 2x interleaved */ 2: /* inner loop: 3 rounds, 2x interleaved */
aese v0.16b, v4.16b aese v0.16b, v4.16b
aesmc v0.16b, v0.16b aesmc v0.16b, v0.16b
aese v1.16b, v4.16b aese v1.16b, v4.16b
aesmc v1.16b, v1.16b aesmc v1.16b, v1.16b
3: ld1 {v3.2d}, [x10], #16 /* load next round key */ 3: ld1 {v3.16b}, [x10], #16 /* load next round key */
aese v0.16b, v5.16b aese v0.16b, v5.16b
aesmc v0.16b, v0.16b aesmc v0.16b, v0.16b
aese v1.16b, v5.16b aese v1.16b, v5.16b
aesmc v1.16b, v1.16b aesmc v1.16b, v1.16b
4: ld1 {v4.2d}, [x10], #16 /* load next round key */ 4: ld1 {v4.16b}, [x10], #16 /* load next round key */
subs w7, w7, #3 subs w7, w7, #3
aese v0.16b, v3.16b aese v0.16b, v3.16b
aesmc v0.16b, v0.16b aesmc v0.16b, v0.16b
aese v1.16b, v3.16b aese v1.16b, v3.16b
aesmc v1.16b, v1.16b aesmc v1.16b, v1.16b
ld1 {v5.2d}, [x10], #16 /* load next round key */ ld1 {v5.16b}, [x10], #16 /* load next round key */
bpl 2b bpl 2b
aese v0.16b, v4.16b aese v0.16b, v4.16b
aese v1.16b, v4.16b aese v1.16b, v4.16b
...@@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final) ...@@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final)
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
st1 {v1.16b}, [x0], #16 /* write output block */ st1 {v1.16b}, [x0], #16 /* write output block */
bne 0b bne 0b
rev x8, x8 CPU_LE( rev x8, x8 )
st1 {v0.2d}, [x5] /* store mac */ st1 {v0.16b}, [x5] /* store mac */
str x8, [x6, #8] /* store lsb end of ctr (BE) */ str x8, [x6, #8] /* store lsb end of ctr (BE) */
5: ret 5: ret
6: eor v0.16b, v0.16b, v5.16b /* final round mac */ 6: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */ eor v1.16b, v1.16b, v5.16b /* final round enc */
st1 {v0.2d}, [x5] /* store mac */ st1 {v0.16b}, [x5] /* store mac */
add w2, w2, #16 /* process partial tail block */ add w2, w2, #16 /* process partial tail block */
7: ldrb w9, [x1], #1 /* get 1 byte of input */ 7: ldrb w9, [x1], #1 /* get 1 byte of input */
umov w6, v1.b[0] /* get top crypted ctr byte */ umov w6, v1.b[0] /* get top crypted ctr byte */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment