Commit 930ab34d authored by Tianjia Zhang's avatar Tianjia Zhang Committed by Herbert Xu

crypto: x86/sm3 - add AVX assembly implementation

This patch adds AVX assembly accelerated implementation of SM3 secure
hash algorithm. From the benchmark data, compared to pure software
implementation sm3-generic, the performance increase is up to 38%.

The main algorithm implementation based on SM3 AES/BMI2 accelerated
work by libgcrypt at:
https://gnupg.org/software/libgcrypt/index.html

Benchmark on Intel i5-6200U 2.30GHz, performance data of two
implementations, pure software sm3-generic and sm3-avx acceleration.
The data comes from the 326 mode and 422 mode of tcrypt. The abscissas
are different lengths of per update. The data is tabulated and the
unit is Mb/s:

update-size |     16      64     256    1024    2048    4096    8192
------------+-------------------------------------------------------
sm3-generic | 105.97  129.60  182.12  189.62  188.06  193.66  194.88
sm3-avx     | 119.87  163.05  244.44  260.92  257.60  264.87  265.88
Signed-off-by: default avatarTianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent b4784a45
......@@ -90,6 +90,9 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o
sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM3 Secure Hash Algorithm, AVX assembler accelerated.
* specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
*
* Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
#include <asm/simd.h>
asmlinkage void sm3_transform_avx(struct sm3_state *state,
const u8 *data, int nblocks);
static int sm3_avx_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sm3_state *sctx = shash_desc_ctx(desc);
if (!crypto_simd_usable() ||
(sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) {
sm3_update(sctx, data, len);
return 0;
}
/*
* Make sure struct sm3_state begins directly with the SM3
* 256-bit internal state, as this is what the asm functions expect.
*/
BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0);
kernel_fpu_begin();
sm3_base_do_update(desc, data, len, sm3_transform_avx);
kernel_fpu_end();
return 0;
}
static int sm3_avx_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!crypto_simd_usable()) {
struct sm3_state *sctx = shash_desc_ctx(desc);
if (len)
sm3_update(sctx, data, len);
sm3_final(sctx, out);
return 0;
}
kernel_fpu_begin();
if (len)
sm3_base_do_update(desc, data, len, sm3_transform_avx);
sm3_base_do_finalize(desc, sm3_transform_avx);
kernel_fpu_end();
return sm3_base_finish(desc, out);
}
static int sm3_avx_final(struct shash_desc *desc, u8 *out)
{
if (!crypto_simd_usable()) {
sm3_final(shash_desc_ctx(desc), out);
return 0;
}
kernel_fpu_begin();
sm3_base_do_finalize(desc, sm3_transform_avx);
kernel_fpu_end();
return sm3_base_finish(desc, out);
}
static struct shash_alg sm3_avx_alg = {
.digestsize = SM3_DIGEST_SIZE,
.init = sm3_base_init,
.update = sm3_avx_update,
.final = sm3_avx_final,
.finup = sm3_avx_finup,
.descsize = sizeof(struct sm3_state),
.base = {
.cra_name = "sm3",
.cra_driver_name = "sm3-avx",
.cra_priority = 300,
.cra_blocksize = SM3_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init sm3_avx_mod_init(void)
{
const char *feature_name;
if (!boot_cpu_has(X86_FEATURE_AVX)) {
pr_info("AVX instruction are not detected.\n");
return -ENODEV;
}
if (!boot_cpu_has(X86_FEATURE_BMI2)) {
pr_info("BMI2 instruction are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return crypto_register_shash(&sm3_avx_alg);
}
static void __exit sm3_avx_mod_exit(void)
{
crypto_unregister_shash(&sm3_avx_alg);
}
module_init(sm3_avx_mod_init);
module_exit(sm3_avx_mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_DESCRIPTION("SM3 Secure Hash Algorithm, AVX assembler accelerated");
MODULE_ALIAS_CRYPTO("sm3");
MODULE_ALIAS_CRYPTO("sm3-avx");
......@@ -1008,6 +1008,19 @@ config CRYPTO_SM3
http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
https://datatracker.ietf.org/doc/html/draft-shen-sm3-hash
config CRYPTO_SM3_AVX_X86_64
tristate "SM3 digest algorithm (x86_64/AVX)"
depends on X86 && 64BIT
select CRYPTO_HASH
select CRYPTO_LIB_SM3
help
SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
It is part of the Chinese Commercial Cryptography suite. This is
SM3 optimized implementation using Advanced Vector Extensions (AVX)
when available.
If unsure, say N.
config CRYPTO_STREEBOG
tristate "Streebog Hash Function"
select CRYPTO_HASH
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment