Commit aa2197f5 authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu

crypto: x86/aes-xts - wire up VAES + AVX10/512 implementation

Add an AES-XTS implementation "xts-aes-vaes-avx10_512" for x86_64 CPUs
with the VAES, VPCLMULQDQ, and either AVX10/512 or AVX512BW + AVX512VL
extensions.  This implementation uses zmm registers to operate on four
AES blocks at a time.  The assembly code is instantiated using a macro
so that most of the source code is shared with other implementations.

To avoid downclocking on older Intel CPU models, an exclusion list is
used to prevent this 512-bit implementation from being used by default
on some CPU models.  They will use xts-aes-vaes-avx10_256 instead.  For
now, this exclusion list is simply coded into aesni-intel_glue.c.  It
may make sense to eventually move it into a more central location.

xts-aes-vaes-avx10_512 is slightly faster than xts-aes-vaes-avx10_256 on
some current CPUs.  E.g., on AMD Zen 4, AES-256-XTS decryption
throughput increases by 13% with 4096-byte inputs, or 14% with 512-byte
inputs.  On Intel Sapphire Rapids, AES-256-XTS decryption throughput
increases by 2% with 4096-byte inputs, or 3% with 512-byte inputs.

Future CPUs may provide stronger 512-bit support, in which case a larger
benefit should be seen.
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent ee63fea0
...@@ -826,4 +826,13 @@ SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256) ...@@ -826,4 +826,13 @@ SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256)
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256) SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256)
_aes_xts_crypt 0 _aes_xts_crypt 0
SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256) SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256)
.set VL, 64
.set USE_AVX10, 1
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_512)
_aes_xts_crypt 1
SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_512)
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_512)
_aes_xts_crypt 0
SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_512)
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
...@@ -1298,8 +1298,29 @@ DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); ...@@ -1298,8 +1298,29 @@ DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500);
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600); DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600);
DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700); DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700);
DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800);
#endif #endif
/*
* This is a list of CPU models that are known to suffer from downclocking when
* zmm registers (512-bit vectors) are used. On these CPUs, the AES-XTS
* implementation with zmm registers won't be used by default. An
* implementation with ymm registers (256-bit vectors) will be used instead.
*/
static const struct x86_cpu_id zmm_exclusion_list[] = {
{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_SKYLAKE_X },
{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_X },
{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_D },
{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE },
{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_L },
{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_NNPI },
{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE_L },
{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE },
/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
/* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
{},
};
static int __init register_xts_algs(void) static int __init register_xts_algs(void)
{ {
int err; int err;
...@@ -1333,6 +1354,14 @@ static int __init register_xts_algs(void) ...@@ -1333,6 +1354,14 @@ static int __init register_xts_algs(void)
&aes_xts_simdalg_vaes_avx10_256); &aes_xts_simdalg_vaes_avx10_256);
if (err) if (err)
return err; return err;
if (x86_match_cpu(zmm_exclusion_list))
aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;
err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1,
&aes_xts_simdalg_vaes_avx10_512);
if (err)
return err;
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
return 0; return 0;
} }
...@@ -1349,6 +1378,9 @@ static void unregister_xts_algs(void) ...@@ -1349,6 +1378,9 @@ static void unregister_xts_algs(void)
if (aes_xts_simdalg_vaes_avx10_256) if (aes_xts_simdalg_vaes_avx10_256)
simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1, simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1,
&aes_xts_simdalg_vaes_avx10_256); &aes_xts_simdalg_vaes_avx10_256);
if (aes_xts_simdalg_vaes_avx10_512)
simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1,
&aes_xts_simdalg_vaes_avx10_512);
#endif #endif
} }
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment