Commit ad8f5b88 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu

crypto: x86/poly1305 - unify Poly1305 state struct with generic code

In preparation of exposing a Poly1305 library interface directly from
the accelerated x86 driver, align the state descriptor of the x86 code
with the one used by the generic driver. This is needed to make the
library interface unified between all implementations.
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 48ea8c6e
...@@ -14,40 +14,14 @@ ...@@ -14,40 +14,14 @@
#include <linux/module.h> #include <linux/module.h>
#include <asm/simd.h> #include <asm/simd.h>
struct poly1305_simd_desc_ctx {
struct poly1305_desc_ctx base;
/* derived key u set? */
bool uset;
#ifdef CONFIG_AS_AVX2
/* derived keys r^3, r^4 set? */
bool wset;
#endif
/* derived Poly1305 key r^2 */
u32 u[5];
/* ... silently appended r^3 and r^4 when using AVX2 */
};
asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src, asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
const u32 *r, unsigned int blocks); const u32 *r, unsigned int blocks);
asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r, asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u); unsigned int blocks, const u32 *u);
#ifdef CONFIG_AS_AVX2
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u); unsigned int blocks, const u32 *u);
static bool poly1305_use_avx2;
#endif
static int poly1305_simd_init(struct shash_desc *desc) static bool poly1305_use_avx2 __ro_after_init;
{
struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
sctx->uset = false;
#ifdef CONFIG_AS_AVX2
sctx->wset = false;
#endif
return crypto_poly1305_init(desc);
}
static void poly1305_simd_mult(u32 *a, const u32 *b) static void poly1305_simd_mult(u32 *a, const u32 *b)
{ {
...@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, const u32 *b) ...@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
const u8 *src, unsigned int srclen) const u8 *src, unsigned int srclen)
{ {
struct poly1305_simd_desc_ctx *sctx;
unsigned int blocks, datalen; unsigned int blocks, datalen;
BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
if (unlikely(!dctx->sset)) { if (unlikely(!dctx->sset)) {
datalen = crypto_poly1305_setdesckey(dctx, src, srclen); datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
src += srclen - datalen; src += srclen - datalen;
srclen = datalen; srclen = datalen;
} }
#ifdef CONFIG_AS_AVX2 if (IS_ENABLED(CONFIG_AS_AVX2) &&
if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { poly1305_use_avx2 &&
if (unlikely(!sctx->wset)) { srclen >= POLY1305_BLOCK_SIZE * 4) {
if (!sctx->uset) { if (unlikely(dctx->rset < 4)) {
memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); if (dctx->rset < 2) {
poly1305_simd_mult(sctx->u, dctx->r.r); dctx->r[1] = dctx->r[0];
sctx->uset = true; poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
} }
memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); dctx->r[2] = dctx->r[1];
poly1305_simd_mult(sctx->u + 5, dctx->r.r); poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); dctx->r[3] = dctx->r[2];
poly1305_simd_mult(sctx->u + 10, dctx->r.r); poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
sctx->wset = true; dctx->rset = 4;
} }
blocks = srclen / (POLY1305_BLOCK_SIZE * 4); blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
sctx->u); dctx->r[1].r);
src += POLY1305_BLOCK_SIZE * 4 * blocks; src += POLY1305_BLOCK_SIZE * 4 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
} }
#endif
if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
if (unlikely(!sctx->uset)) { if (unlikely(dctx->rset < 2)) {
memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); dctx->r[1] = dctx->r[0];
poly1305_simd_mult(sctx->u, dctx->r.r); poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
sctx->uset = true; dctx->rset = 2;
} }
blocks = srclen / (POLY1305_BLOCK_SIZE * 2); blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
sctx->u); blocks, dctx->r[1].r);
src += POLY1305_BLOCK_SIZE * 2 * blocks; src += POLY1305_BLOCK_SIZE * 2 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
} }
if (srclen >= POLY1305_BLOCK_SIZE) { if (srclen >= POLY1305_BLOCK_SIZE) {
poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
srclen -= POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE;
} }
return srclen; return srclen;
...@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct shash_desc *desc, ...@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct shash_desc *desc,
static struct shash_alg alg = { static struct shash_alg alg = {
.digestsize = POLY1305_DIGEST_SIZE, .digestsize = POLY1305_DIGEST_SIZE,
.init = poly1305_simd_init, .init = crypto_poly1305_init,
.update = poly1305_simd_update, .update = poly1305_simd_update,
.final = crypto_poly1305_final, .final = crypto_poly1305_final,
.descsize = sizeof(struct poly1305_simd_desc_ctx), .descsize = sizeof(struct poly1305_desc_ctx),
.base = { .base = {
.cra_name = "poly1305", .cra_name = "poly1305",
.cra_driver_name = "poly1305-simd", .cra_driver_name = "poly1305-simd",
...@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init(void) ...@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init(void)
if (!boot_cpu_has(X86_FEATURE_XMM2)) if (!boot_cpu_has(X86_FEATURE_XMM2))
return -ENODEV; return -ENODEV;
#ifdef CONFIG_AS_AVX2 poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
alg.descsize = sizeof(struct poly1305_simd_desc_ctx); alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
if (poly1305_use_avx2) if (poly1305_use_avx2)
alg.descsize += 10 * sizeof(u32); alg.descsize += 10 * sizeof(u32);
#endif
return crypto_register_shash(&alg); return crypto_register_shash(&alg);
} }
......
...@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_desc *desc) ...@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_desc *desc)
poly1305_core_init(&dctx->h); poly1305_core_init(&dctx->h);
dctx->buflen = 0; dctx->buflen = 0;
dctx->rset = false; dctx->rset = 0;
dctx->sset = false; dctx->sset = false;
return 0; return 0;
...@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, ...@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
srclen = datalen; srclen = datalen;
} }
poly1305_core_blocks(&dctx->h, &dctx->r, src, poly1305_core_blocks(&dctx->h, dctx->r, src,
srclen / POLY1305_BLOCK_SIZE, 1); srclen / POLY1305_BLOCK_SIZE, 1);
} }
...@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) ...@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
dctx->buf[dctx->buflen++] = 1; dctx->buf[dctx->buflen++] = 1;
memset(dctx->buf + dctx->buflen, 0, memset(dctx->buf + dctx->buflen, 0,
POLY1305_BLOCK_SIZE - dctx->buflen); POLY1305_BLOCK_SIZE - dctx->buflen);
poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0); poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
} }
poly1305_core_emit(&dctx->h, digest); poly1305_core_emit(&dctx->h, digest);
......
...@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, ...@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
{ {
if (!dctx->sset) { if (!dctx->sset) {
if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
poly1305_core_setkey(&dctx->r, src); poly1305_core_setkey(dctx->r, src);
src += POLY1305_BLOCK_SIZE; src += POLY1305_BLOCK_SIZE;
srclen -= POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE;
dctx->rset = true; dctx->rset = 1;
} }
if (srclen >= POLY1305_BLOCK_SIZE) { if (srclen >= POLY1305_BLOCK_SIZE) {
dctx->s[0] = get_unaligned_le32(src + 0); dctx->s[0] = get_unaligned_le32(src + 0);
......
...@@ -22,20 +22,20 @@ struct poly1305_state { ...@@ -22,20 +22,20 @@ struct poly1305_state {
}; };
struct poly1305_desc_ctx { struct poly1305_desc_ctx {
/* key */
struct poly1305_key r;
/* finalize key */
u32 s[4];
/* accumulator */
struct poly1305_state h;
/* partial buffer */ /* partial buffer */
u8 buf[POLY1305_BLOCK_SIZE]; u8 buf[POLY1305_BLOCK_SIZE];
/* bytes used in partial buffer */ /* bytes used in partial buffer */
unsigned int buflen; unsigned int buflen;
/* r key has been set */ /* how many keys have been set in r[] */
bool rset; unsigned short rset;
/* s key has been set */ /* whether s[] has been set */
bool sset; bool sset;
/* finalize key */
u32 s[4];
/* accumulator */
struct poly1305_state h;
/* key */
struct poly1305_key r[1];
}; };
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment