Commit ae1b83c7 authored by Tianjia Zhang's avatar Tianjia Zhang Committed by Herbert Xu

crypto: arm64/sm4 - add CE implementation for GCM mode

This patch is a CE-optimized assembly implementation for GCM mode.

Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 224 and 224
modes of tcrypt, and compared the performance before and after this patch (the
driver used before this patch is gcm_base(ctr-sm4-ce,ghash-generic)).
The abscissas are blocks of different lengths. The data is tabulated and the
unit is Mb/s:

Before (gcm_base(ctr-sm4-ce,ghash-generic)):

gcm(sm4)     |     16      64      256      512     1024     1420     4096     8192
-------------+---------------------------------------------------------------------
  GCM enc    |  25.24   64.65   104.66   116.69   123.81   125.12   129.67   130.62
  GCM dec    |  25.40   64.80   104.74   116.70   123.81   125.21   129.68   130.59
  GCM mb enc |  24.95   64.06   104.20   116.38   123.55   124.97   129.63   130.61
  GCM mb dec |  24.92   64.00   104.13   116.34   123.55   124.98   129.56   130.48

After:

gcm-sm4-ce   |     16      64      256      512     1024     1420     4096     8192
-------------+---------------------------------------------------------------------
  GCM enc    | 108.62  397.18   971.60  1283.92  1522.77  1513.39  1777.00  1806.96
  GCM dec    | 116.36  398.14  1004.27  1319.11  1624.21  1635.43  1932.54  1974.20
  GCM mb enc | 107.13  391.79   962.05  1274.94  1514.76  1508.57  1769.07  1801.58
  GCM mb dec | 113.40  389.36   988.51  1307.68  1619.10  1631.55  1931.70  1970.86
Signed-off-by: default avatarTianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 67fa3a7f
...@@ -297,6 +297,22 @@ config CRYPTO_SM4_ARM64_CE_CCM ...@@ -297,6 +297,22 @@ config CRYPTO_SM4_ARM64_CE_CCM
- ARMv8 Crypto Extensions - ARMv8 Crypto Extensions
- NEON (Advanced SIMD) extensions - NEON (Advanced SIMD) extensions
config CRYPTO_SM4_ARM64_CE_GCM
tristate "AEAD cipher: SM4 in GCM mode (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AEAD
select CRYPTO_SM4
select CRYPTO_SM4_ARM64_CE_BLK
help
AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with
GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
Architecture: arm64 using:
- ARMv8 Crypto Extensions
- PMULL (Polynomial Multiply Long) instructions
- NEON (Advanced SIMD) extensions
config CRYPTO_CRCT10DIF_ARM64_CE config CRYPTO_CRCT10DIF_ARM64_CE
tristate "CRCT10DIF (PMULL)" tristate "CRCT10DIF (PMULL)"
depends on KERNEL_MODE_NEON && CRC_T10DIF depends on KERNEL_MODE_NEON && CRC_T10DIF
......
...@@ -32,6 +32,9 @@ sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o ...@@ -32,6 +32,9 @@ sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o
sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_GCM) += sm4-ce-gcm.o
sm4-ce-gcm-y := sm4-ce-gcm-glue.o sm4-ce-gcm-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
* as specified in rfc8998
* https://datatracker.ietf.org/doc/html/rfc8998
*
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <crypto/b128ops.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#include "sm4-ce.h"
asmlinkage void sm4_ce_pmull_ghash_setup(const u32 *rkey_enc, u8 *ghash_table);
asmlinkage void pmull_ghash_update(const u8 *ghash_table, u8 *ghash,
const u8 *src, unsigned int nblocks);
asmlinkage void sm4_ce_pmull_gcm_enc(const u32 *rkey_enc, u8 *dst,
const u8 *src, u8 *iv,
unsigned int nbytes, u8 *ghash,
const u8 *ghash_table, const u8 *lengths);
asmlinkage void sm4_ce_pmull_gcm_dec(const u32 *rkey_enc, u8 *dst,
const u8 *src, u8 *iv,
unsigned int nbytes, u8 *ghash,
const u8 *ghash_table, const u8 *lengths);
#define GHASH_BLOCK_SIZE 16
#define GCM_IV_SIZE 12
struct sm4_gcm_ctx {
struct sm4_ctx key;
u8 ghash_table[16 * 4];
};
static int gcm_setkey(struct crypto_aead *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(tfm);
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
kernel_neon_end();
return 0;
}
static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
{
switch (authsize) {
case 4:
case 8:
case 12 ... 16:
return 0;
default:
return -EINVAL;
}
}
static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[])
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) buffer[GHASH_BLOCK_SIZE];
u32 assoclen = req->assoclen;
struct scatter_walk walk;
unsigned int buflen = 0;
scatterwalk_start(&walk, req->src);
do {
u32 n = scatterwalk_clamp(&walk, assoclen);
u8 *p, *ptr;
if (!n) {
scatterwalk_start(&walk, sg_next(walk.sg));
n = scatterwalk_clamp(&walk, assoclen);
}
p = ptr = scatterwalk_map(&walk);
assoclen -= n;
scatterwalk_advance(&walk, n);
if (n + buflen < GHASH_BLOCK_SIZE) {
memcpy(&buffer[buflen], ptr, n);
buflen += n;
} else {
unsigned int nblocks;
if (buflen) {
unsigned int l = GHASH_BLOCK_SIZE - buflen;
memcpy(&buffer[buflen], ptr, l);
ptr += l;
n -= l;
pmull_ghash_update(ctx->ghash_table, ghash,
buffer, 1);
}
nblocks = n / GHASH_BLOCK_SIZE;
if (nblocks) {
pmull_ghash_update(ctx->ghash_table, ghash,
ptr, nblocks);
ptr += nblocks * GHASH_BLOCK_SIZE;
}
buflen = n % GHASH_BLOCK_SIZE;
if (buflen)
memcpy(&buffer[0], ptr, buflen);
}
scatterwalk_unmap(p);
scatterwalk_done(&walk, 0, assoclen);
} while (assoclen);
/* padding with '0' */
if (buflen) {
memset(&buffer[buflen], 0, GHASH_BLOCK_SIZE - buflen);
pmull_ghash_update(ctx->ghash_table, ghash, buffer, 1);
}
}
static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
struct sm4_gcm_ctx *ctx, u8 ghash[],
void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc,
u8 *dst, const u8 *src, u8 *iv,
unsigned int nbytes, u8 *ghash,
const u8 *ghash_table, const u8 *lengths))
{
u8 __aligned(8) iv[SM4_BLOCK_SIZE];
be128 __aligned(8) lengths;
int err;
memset(ghash, 0, SM4_BLOCK_SIZE);
lengths.a = cpu_to_be64(req->assoclen * 8);
lengths.b = cpu_to_be64(walk->total * 8);
memcpy(iv, walk->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
kernel_neon_begin();
if (req->assoclen)
gcm_calculate_auth_mac(req, ghash);
do {
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
const u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
if (walk->nbytes == walk->total) {
tail = 0;
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes, ghash,
ctx->ghash_table,
(const u8 *)&lengths);
} else if (walk->nbytes - tail) {
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes - tail, ghash,
ctx->ghash_table, NULL);
}
kernel_neon_end();
err = skcipher_walk_done(walk, tail);
if (err)
return err;
if (walk->nbytes)
kernel_neon_begin();
} while (walk->nbytes > 0);
return 0;
}
static int gcm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = skcipher_walk_aead_encrypt(&walk, req, false);
if (err)
return err;
err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc);
if (err)
return err;
/* copy authtag to end of dst */
scatterwalk_map_and_copy(ghash, req->dst, req->assoclen + req->cryptlen,
crypto_aead_authsize(aead), 1);
return 0;
}
static int gcm_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(aead);
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
u8 authtag[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = skcipher_walk_aead_decrypt(&walk, req, false);
if (err)
return err;
err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec);
if (err)
return err;
/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(authtag, req->src,
req->assoclen + req->cryptlen - authsize,
authsize, 0);
if (crypto_memneq(authtag, ghash, authsize))
return -EBADMSG;
return 0;
}
static struct aead_alg sm4_gcm_alg = {
.base = {
.cra_name = "gcm(sm4)",
.cra_driver_name = "gcm-sm4-ce",
.cra_priority = 400,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_gcm_ctx),
.cra_module = THIS_MODULE,
},
.ivsize = GCM_IV_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.maxauthsize = SM4_BLOCK_SIZE,
.setkey = gcm_setkey,
.setauthsize = gcm_setauthsize,
.encrypt = gcm_encrypt,
.decrypt = gcm_decrypt,
};
static int __init sm4_ce_gcm_init(void)
{
if (!cpu_have_named_feature(PMULL))
return -ENODEV;
return crypto_register_aead(&sm4_gcm_alg);
}
static void __exit sm4_ce_gcm_exit(void)
{
crypto_unregister_aead(&sm4_gcm_alg);
}
static const struct cpu_feature sm4_ce_gcm_cpu_feature[] = {
{ cpu_feature(PMULL) },
{}
};
MODULE_DEVICE_TABLE(cpu, sm4_ce_gcm_cpu_feature);
module_cpu_feature_match(SM4, sm4_ce_gcm_init);
module_exit(sm4_ce_gcm_exit);
MODULE_DESCRIPTION("Synchronous SM4 in GCM mode using ARMv8 Crypto Extensions");
MODULE_ALIAS_CRYPTO("gcm(sm4)");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_LICENSE("GPL v2");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment