Commit 596f674d authored by Herbert Xu's avatar Herbert Xu

crypto: p10-aes-gcm - Revert implementation

Revert the changes that added p10-aes-gcm:

	0781bbd7 ("crypto: p10-aes-gcm - A perl script to process PowerPC assembler source")
	41a6437a ("crypto: p10-aes-gcm - Supporting functions for ghash")
	3b47ecca ("crypto: p10-aes-gcm - Supporting functions for AES")
	ca68a96c ("crypto: p10-aes-gcm - An accelerated AES/GCM stitched implementation")
	cc40379b ("crypto: p10-aes-gcm - Glue code for AES/GCM stitched implementation")
	3c657e86 ("crypto: p10-aes-gcm - Update Kconfig and Makefile")

These changes fail to build in many configurations and are not ready
for prime time.
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent f81c1d4a
...@@ -94,15 +94,4 @@ config CRYPTO_AES_PPC_SPE ...@@ -94,15 +94,4 @@ config CRYPTO_AES_PPC_SPE
architecture specific assembler implementations that work on 1KB architecture specific assembler implementations that work on 1KB
tables or 256 bytes S-boxes. tables or 256 bytes S-boxes.
config CRYPTO_P10_AES_GCM
tristate "Stitched AES/GCM acceleration support on P10+ CPU (PPC)"
depends on PPC64
select CRYPTO_LIB_AES
select CRYPTO_ALGAPI
select CRYPTO_AEAD
default m
help
Support for cryptographic acceleration instructions on Power10+ CPU.
This module supports stitched acceleration for AES/GCM in hardware.
endmenu endmenu
...@@ -13,7 +13,6 @@ obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o ...@@ -13,7 +13,6 @@ obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
obj-$(CONFIG_CRYPTO_P10_AES_GCM) += p10-aes-gcm-crypto.o
aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
md5-ppc-y := md5-asm.o md5-glue.o md5-ppc-y := md5-asm.o md5-glue.o
...@@ -22,12 +21,3 @@ sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o ...@@ -22,12 +21,3 @@ sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
p10-aes-gcm-crypto-y := p10-aes-gcm-glue.o p10_aes_gcm.o ghashp8-ppc.o aesp8-ppc.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
targets += aesp8-ppc.S ghashp8-ppc.S
$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
$(call if_changed,perl)
This diff is collapsed.
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
# has relicensed it under the GPLv2. Therefore this program is free software;
# you can redistribute it and/or modify it under the terms of the GNU General
# Public License version 2 as published by the Free Software Foundation.
#
# The original headers, including the original license headers, are
# included below for completeness.
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see https://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# GHASH for PowerISA v2.07.
#
# July 2014
#
# Accurate performance measurements are problematic, because it's
# always virtualized setup with possibly throttled processor.
# Relative comparison is therefore more informative. This initial
# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
# faster than "4-bit" integer-only compiler-generated 64-bit code.
# "Initial version" means that there is room for futher improvement.
$flavour=shift;
$output =shift;
if ($flavour =~ /64/) {
$SIZE_T=8;
$LRSAVE=2*$SIZE_T;
$STU="stdu";
$POP="ld";
$PUSH="std";
} elsif ($flavour =~ /32/) {
$SIZE_T=4;
$LRSAVE=$SIZE_T;
$STU="stwu";
$POP="lwz";
$PUSH="stw";
} else { die "nonsense $flavour"; }
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
my $vrsave="r12";
my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
$code=<<___;
.machine "any"
.text
.globl .gcm_init_p8
lis r0,0xfff0
li r8,0x10
mfspr $vrsave,256
li r9,0x20
mtspr 256,r0
li r10,0x30
lvx_u $H,0,r4 # load H
le?xor r7,r7,r7
le?addi r7,r7,0x8 # need a vperm start with 08
le?lvsr 5,0,r7
le?vspltisb 6,0x0f
le?vxor 5,5,6 # set a b-endian mask
le?vperm $H,$H,$H,5
vspltisb $xC2,-16 # 0xf0
vspltisb $t0,1 # one
vaddubm $xC2,$xC2,$xC2 # 0xe0
vxor $zero,$zero,$zero
vor $xC2,$xC2,$t0 # 0xe1
vsldoi $xC2,$xC2,$zero,15 # 0xe1...
vsldoi $t1,$zero,$t0,1 # ...1
vaddubm $xC2,$xC2,$xC2 # 0xc2...
vspltisb $t2,7
vor $xC2,$xC2,$t1 # 0xc2....01
vspltb $t1,$H,0 # most significant byte
vsl $H,$H,$t0 # H<<=1
vsrab $t1,$t1,$t2 # broadcast carry bit
vand $t1,$t1,$xC2
vxor $H,$H,$t1 # twisted H
vsldoi $H,$H,$H,8 # twist even more ...
vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
vsldoi $Hl,$zero,$H,8 # ... and split
vsldoi $Hh,$H,$zero,8
stvx_u $xC2,0,r3 # save pre-computed table
stvx_u $Hl,r8,r3
stvx_u $H, r9,r3
stvx_u $Hh,r10,r3
mtspr 256,$vrsave
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size .gcm_init_p8,.-.gcm_init_p8
.globl .gcm_init_htable
lis r0,0xfff0
li r8,0x10
mfspr $vrsave,256
li r9,0x20
mtspr 256,r0
li r10,0x30
lvx_u $H,0,r4 # load H
vspltisb $xC2,-16 # 0xf0
vspltisb $t0,1 # one
vaddubm $xC2,$xC2,$xC2 # 0xe0
vxor $zero,$zero,$zero
vor $xC2,$xC2,$t0 # 0xe1
vsldoi $xC2,$xC2,$zero,15 # 0xe1...
vsldoi $t1,$zero,$t0,1 # ...1
vaddubm $xC2,$xC2,$xC2 # 0xc2...
vspltisb $t2,7
vor $xC2,$xC2,$t1 # 0xc2....01
vspltb $t1,$H,0 # most significant byte
vsl $H,$H,$t0 # H<<=1
vsrab $t1,$t1,$t2 # broadcast carry bit
vand $t1,$t1,$xC2
vxor $IN,$H,$t1 # twisted H
vsldoi $H,$IN,$IN,8 # twist even more ...
vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
vsldoi $Hl,$zero,$H,8 # ... and split
vsldoi $Hh,$H,$zero,8
stvx_u $xC2,0,r3 # save pre-computed table
stvx_u $Hl,r8,r3
li r8,0x40
stvx_u $H, r9,r3
li r9,0x50
stvx_u $Hh,r10,r3
li r10,0x60
vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
vpmsumd $Xh,$IN,$Hh # H.hi·H.hi
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
vsldoi $t0,$Xm,$zero,8
vsldoi $t1,$zero,$Xm,8
vxor $Xl,$Xl,$t0
vxor $Xh,$Xh,$t1
vsldoi $Xl,$Xl,$Xl,8
vxor $Xl,$Xl,$t2
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
vpmsumd $Xl,$Xl,$xC2
vxor $t1,$t1,$Xh
vxor $IN1,$Xl,$t1
vsldoi $H2,$IN1,$IN1,8
vsldoi $H2l,$zero,$H2,8
vsldoi $H2h,$H2,$zero,8
stvx_u $H2l,r8,r3 # save H^2
li r8,0x70
stvx_u $H2,r9,r3
li r9,0x80
stvx_u $H2h,r10,r3
li r10,0x90
vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase
vsldoi $t0,$Xm,$zero,8
vsldoi $t1,$zero,$Xm,8
vsldoi $t4,$Xm1,$zero,8
vsldoi $t5,$zero,$Xm1,8
vxor $Xl,$Xl,$t0
vxor $Xh,$Xh,$t1
vxor $Xl1,$Xl1,$t4
vxor $Xh1,$Xh1,$t5
vsldoi $Xl,$Xl,$Xl,8
vsldoi $Xl1,$Xl1,$Xl1,8
vxor $Xl,$Xl,$t2
vxor $Xl1,$Xl1,$t6
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
vpmsumd $Xl,$Xl,$xC2
vpmsumd $Xl1,$Xl1,$xC2
vxor $t1,$t1,$Xh
vxor $t5,$t5,$Xh1
vxor $Xl,$Xl,$t1
vxor $Xl1,$Xl1,$t5
vsldoi $H,$Xl,$Xl,8
vsldoi $H2,$Xl1,$Xl1,8
vsldoi $Hl,$zero,$H,8
vsldoi $Hh,$H,$zero,8
vsldoi $H2l,$zero,$H2,8
vsldoi $H2h,$H2,$zero,8
stvx_u $Hl,r8,r3 # save H^3
li r8,0xa0
stvx_u $H,r9,r3
li r9,0xb0
stvx_u $Hh,r10,r3
li r10,0xc0
stvx_u $H2l,r8,r3 # save H^4
stvx_u $H2,r9,r3
stvx_u $H2h,r10,r3
mtspr 256,$vrsave
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size .gcm_init_htable,.-.gcm_init_htable
.globl .gcm_gmult_p8
lis r0,0xfff8
li r8,0x10
mfspr $vrsave,256
li r9,0x20
mtspr 256,r0
li r10,0x30
lvx_u $IN,0,$Xip # load Xi
lvx_u $Hl,r8,$Htbl # load pre-computed table
le?lvsl $lemask,r0,r0
lvx_u $H, r9,$Htbl
le?vspltisb $t0,0x07
lvx_u $Hh,r10,$Htbl
le?vxor $lemask,$lemask,$t0
lvx_u $xC2,0,$Htbl
le?vperm $IN,$IN,$IN,$lemask
vxor $zero,$zero,$zero
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
vpmsumd $t2,$Xl,$xC2 # 1st phase
vsldoi $t0,$Xm,$zero,8
vsldoi $t1,$zero,$Xm,8
vxor $Xl,$Xl,$t0
vxor $Xh,$Xh,$t1
vsldoi $Xl,$Xl,$Xl,8
vxor $Xl,$Xl,$t2
vsldoi $t1,$Xl,$Xl,8 # 2nd phase
vpmsumd $Xl,$Xl,$xC2
vxor $t1,$t1,$Xh
vxor $Xl,$Xl,$t1
le?vperm $Xl,$Xl,$Xl,$lemask
stvx_u $Xl,0,$Xip # write out Xi
mtspr 256,$vrsave
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size .gcm_gmult_p8,.-.gcm_gmult_p8
.globl .gcm_ghash_p8
lis r0,0xfff8
li r8,0x10
mfspr $vrsave,256
li r9,0x20
mtspr 256,r0
li r10,0x30
lvx_u $Xl,0,$Xip # load Xi
lvx_u $Hl,r8,$Htbl # load pre-computed table
le?lvsl $lemask,r0,r0
lvx_u $H, r9,$Htbl
le?vspltisb $t0,0x07
lvx_u $Hh,r10,$Htbl
le?vxor $lemask,$lemask,$t0
lvx_u $xC2,0,$Htbl
le?vperm $Xl,$Xl,$Xl,$lemask
vxor $zero,$zero,$zero
lvx_u $IN,0,$inp
addi $inp,$inp,16
subi $len,$len,16
le?vperm $IN,$IN,$IN,$lemask
vxor $IN,$IN,$Xl
b Loop
.align 5
Loop:
subic $len,$len,16
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
subfe. r0,r0,r0 # borrow?-1:0
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
and r0,r0,$len
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
add $inp,$inp,r0
vpmsumd $t2,$Xl,$xC2 # 1st phase
vsldoi $t0,$Xm,$zero,8
vsldoi $t1,$zero,$Xm,8
vxor $Xl,$Xl,$t0
vxor $Xh,$Xh,$t1
vsldoi $Xl,$Xl,$Xl,8
vxor $Xl,$Xl,$t2
lvx_u $IN,0,$inp
addi $inp,$inp,16
vsldoi $t1,$Xl,$Xl,8 # 2nd phase
vpmsumd $Xl,$Xl,$xC2
le?vperm $IN,$IN,$IN,$lemask
vxor $t1,$t1,$Xh
vxor $IN,$IN,$t1
vxor $IN,$IN,$Xl
beq Loop # did $len-=16 borrow?
vxor $Xl,$Xl,$t1
le?vperm $Xl,$Xl,$Xl,$lemask
stvx_u $Xl,0,$Xip # write out Xi
mtspr 256,$vrsave
blr
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.size .gcm_ghash_p8,.-.gcm_ghash_p8
.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
___
foreach (split("\n",$code)) {
if ($flavour =~ /le$/o) { # little-endian
s/le\?//o or
s/be\?/#be#/o;
} else {
s/le\?/#le#/o or
s/be\?//o;
}
print $_,"\n";
}
close STDOUT; # enforce flush
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue code for accelerated AES-GCM stitched implementation for ppc64le.
*
* Copyright 2022- IBM Inc. All rights reserved
*/
#include <asm/unaligned.h>
#include <asm/simd.h>
#include <asm/switch_to.h>
#include <crypto/algapi.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/gf128mul.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <linux/types.h>
#define PPC_MODULE_FEATURE_P10 (32 + ilog2(PPC_FEATURE2_ARCH_3_1))
#define PPC_ALIGN 16
#define GCM_IV_SIZE 12
MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation");
MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("aes");
asmlinkage int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
void *key);
asmlinkage void aes_p8_encrypt(const u8 *in, u8 *out, const void *key);
asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len,
void *rkey, u8 *iv, void *Xi);
asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len,
void *rkey, u8 *iv, void *Xi);
asmlinkage void gcm_init_htable(unsigned char htable[256], unsigned char Xi[16]);
asmlinkage void gcm_ghash_p8(unsigned char *Xi, unsigned char *Htable,
unsigned char *aad, unsigned int alen);
struct aes_key {
u8 key[AES_MAX_KEYLENGTH];
u64 rounds;
};
struct gcm_ctx {
u8 iv[16];
u8 ivtag[16];
u8 aad_hash[16];
u64 aadLen;
u64 Plen; /* offset 56 - used in aes_p10_gcm_{en/de}crypt */
};
struct Hash_ctx {
u8 H[16]; /* subkey */
u8 Htable[256]; /* Xi, Hash table(offset 32) */
};
struct p10_aes_gcm_ctx {
struct aes_key enc_key;
};
static void vsx_begin(void)
{
preempt_disable();
enable_kernel_vsx();
}
static void vsx_end(void)
{
disable_kernel_vsx();
preempt_enable();
}
static void set_subkey(unsigned char *hash)
{
*(u64 *)&hash[0] = be64_to_cpup((__be64 *)&hash[0]);
*(u64 *)&hash[8] = be64_to_cpup((__be64 *)&hash[8]);
}
/*
* Compute aad if any.
* - Hash aad and copy to Xi.
*/
static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash,
unsigned char *aad, int alen)
{
int i;
u8 nXi[16] = {0, };
gctx->aadLen = alen;
i = alen & ~0xf;
if (i) {
gcm_ghash_p8(nXi, hash->Htable+32, aad, i);
aad += i;
alen -= i;
}
if (alen) {
for (i = 0; i < alen; i++)
nXi[i] ^= aad[i];
memset(gctx->aad_hash, 0, 16);
gcm_ghash_p8(gctx->aad_hash, hash->Htable+32, nXi, 16);
} else {
memcpy(gctx->aad_hash, nXi, 16);
}
memcpy(hash->Htable, gctx->aad_hash, 16);
}
static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey,
struct Hash_ctx *hash, u8 *assoc, unsigned int assoclen)
{
__be32 counter = cpu_to_be32(1);
aes_p8_encrypt(hash->H, hash->H, rdkey);
set_subkey(hash->H);
gcm_init_htable(hash->Htable+32, hash->H);
*((__be32 *)(iv+12)) = counter;
gctx->Plen = 0;
/*
* Encrypt counter vector as iv tag and increment counter.
*/
aes_p8_encrypt(iv, gctx->ivtag, rdkey);
counter = cpu_to_be32(2);
*((__be32 *)(iv+12)) = counter;
memcpy(gctx->iv, iv, 16);
gctx->aadLen = assoclen;
memset(gctx->aad_hash, 0, 16);
if (assoclen)
set_aad(gctx, hash, assoc, assoclen);
}
static void finish_tag(struct gcm_ctx *gctx, struct Hash_ctx *hash, int len)
{
int i;
unsigned char len_ac[16 + PPC_ALIGN];
unsigned char *aclen = PTR_ALIGN((void *)len_ac, PPC_ALIGN);
__be64 clen = cpu_to_be64(len << 3);
__be64 alen = cpu_to_be64(gctx->aadLen << 3);
if (len == 0 && gctx->aadLen == 0) {
memcpy(hash->Htable, gctx->ivtag, 16);
return;
}
/*
* Len is in bits.
*/
*((__be64 *)(aclen)) = alen;
*((__be64 *)(aclen+8)) = clen;
/*
* hash (AAD len and len)
*/
gcm_ghash_p8(hash->Htable, hash->Htable+32, aclen, 16);
for (i = 0; i < 16; i++)
hash->Htable[i] ^= gctx->ivtag[i];
}
static int set_authsize(struct crypto_aead *tfm, unsigned int authsize)
{
switch (authsize) {
case 4:
case 8:
case 12:
case 13:
case 14:
case 15:
case 16:
break;
default:
return -EINVAL;
}
return 0;
}
static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct crypto_tfm *tfm = crypto_aead_tfm(aead);
struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
int ret;
vsx_begin();
ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
vsx_end();
return ret ? -EINVAL : 0;
}
static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
{
struct crypto_tfm *tfm = req->base.tfm;
struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
u8 databuf[sizeof(struct gcm_ctx) + PPC_ALIGN];
struct gcm_ctx *gctx = PTR_ALIGN((void *)databuf, PPC_ALIGN);
u8 hashbuf[sizeof(struct Hash_ctx) + PPC_ALIGN];
struct Hash_ctx *hash = PTR_ALIGN((void *)hashbuf, PPC_ALIGN);
struct scatter_walk assoc_sg_walk;
struct skcipher_walk walk;
u8 *assocmem = NULL;
u8 *assoc;
unsigned int assoclen = req->assoclen;
unsigned int cryptlen = req->cryptlen;
unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN];
unsigned char *iv = PTR_ALIGN((void *)ivbuf, PPC_ALIGN);
int ret;
unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm));
u8 otag[16];
int total_processed = 0;
memset(databuf, 0, sizeof(databuf));
memset(hashbuf, 0, sizeof(hashbuf));
memset(ivbuf, 0, sizeof(ivbuf));
memcpy(iv, req->iv, GCM_IV_SIZE);
/* Linearize assoc, if not already linear */
if (req->src->length >= assoclen && req->src->length) {
scatterwalk_start(&assoc_sg_walk, req->src);
assoc = scatterwalk_map(&assoc_sg_walk);
} else {
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC;
/* assoc can be any length, so must be on heap */
assocmem = kmalloc(assoclen, flags);
if (unlikely(!assocmem))
return -ENOMEM;
assoc = assocmem;
scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
}
vsx_begin();
gcmp10_init(gctx, iv, (unsigned char *) &ctx->enc_key, hash, assoc, assoclen);
vsx_end();
if (!assocmem)
scatterwalk_unmap(assoc);
else
kfree(assocmem);
if (enc)
ret = skcipher_walk_aead_encrypt(&walk, req, false);
else
ret = skcipher_walk_aead_decrypt(&walk, req, false);
if (ret)
return ret;
while (walk.nbytes > 0 && ret == 0) {
vsx_begin();
if (enc)
aes_p10_gcm_encrypt(walk.src.virt.addr,
walk.dst.virt.addr,
walk.nbytes,
&ctx->enc_key, gctx->iv, hash->Htable);
else
aes_p10_gcm_decrypt(walk.src.virt.addr,
walk.dst.virt.addr,
walk.nbytes,
&ctx->enc_key, gctx->iv, hash->Htable);
vsx_end();
total_processed += walk.nbytes;
ret = skcipher_walk_done(&walk, 0);
}
if (ret)
return ret;
/* Finalize hash */
vsx_begin();
finish_tag(gctx, hash, total_processed);
vsx_end();
/* copy Xi to end of dst */
if (enc)
scatterwalk_map_and_copy(hash->Htable, req->dst, req->assoclen + cryptlen,
auth_tag_len, 1);
else {
scatterwalk_map_and_copy(otag, req->src,
req->assoclen + cryptlen - auth_tag_len,
auth_tag_len, 0);
if (crypto_memneq(otag, hash->Htable, auth_tag_len)) {
memzero_explicit(hash->Htable, 16);
return -EBADMSG;
}
}
return 0;
}
static int p10_aes_gcm_encrypt(struct aead_request *req)
{
return p10_aes_gcm_crypt(req, 1);
}
static int p10_aes_gcm_decrypt(struct aead_request *req)
{
return p10_aes_gcm_crypt(req, 0);
}
static struct aead_alg gcm_aes_alg = {
.ivsize = GCM_IV_SIZE,
.maxauthsize = 16,
.setauthsize = set_authsize,
.setkey = p10_aes_gcm_setkey,
.encrypt = p10_aes_gcm_encrypt,
.decrypt = p10_aes_gcm_decrypt,
.base.cra_name = "gcm(aes)",
.base.cra_driver_name = "p10_aes_gcm",
.base.cra_priority = 2100,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx),
.base.cra_module = THIS_MODULE,
};
static int __init p10_init(void)
{
return crypto_register_aead(&gcm_aes_alg);
}
static void __exit p10_exit(void)
{
crypto_unregister_aead(&gcm_aes_alg);
}
module_cpu_feature_match(PPC_MODULE_FEATURE_P10, p10_init);
module_exit(p10_exit);
This diff is collapsed.
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
# PowerPC assembler distiller by <appro>.
my $flavour = shift;
my $output = shift;
open STDOUT,">$output" || die "can't open $output: $!";
my %GLOBALS;
my $dotinlocallabels=($flavour=~/linux/)?1:0;
################################################################
# directives which need special treatment on different platforms
################################################################
my $globl = sub {
my $junk = shift;
my $name = shift;
my $global = \$GLOBALS{$name};
my $ret;
$name =~ s|^[\.\_]||;
SWITCH: for ($flavour) {
/aix/ && do { $name = ".$name";
last;
};
/osx/ && do { $name = "_$name";
last;
};
/linux/
&& do { $ret = "_GLOBAL($name)";
last;
};
}
$ret = ".globl $name\nalign 5\n$name:" if (!$ret);
$$global = $name;
$ret;
};
my $text = sub {
my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
$ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
$ret;
};
my $machine = sub {
my $junk = shift;
my $arch = shift;
if ($flavour =~ /osx/)
{ $arch =~ s/\"//g;
$arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
}
".machine $arch";
};
my $size = sub {
if ($flavour =~ /linux/)
{ shift;
my $name = shift; $name =~ s|^[\.\_]||;
my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name;
$ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/);
$ret;
}
else
{ ""; }
};
my $asciz = sub {
shift;
my $line = join(",",@_);
if ($line =~ /^"(.*)"$/)
{ ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
else
{ ""; }
};
my $quad = sub {
shift;
my @ret;
my ($hi,$lo);
for (@_) {
if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
{ $hi=$1?"0x$1":"0"; $lo="0x$2"; }
elsif (/^([0-9]+)$/o)
{ $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
else
{ $hi=undef; $lo=$_; }
if (defined($hi))
{ push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
else
{ push(@ret,".quad $lo"); }
}
join("\n",@ret);
};
################################################################
# simplified mnemonics not handled by at least one assembler
################################################################
my $cmplw = sub {
my $f = shift;
my $cr = 0; $cr = shift if ($#_>1);
# Some out-of-date 32-bit GNU assembler just can't handle cmplw...
($flavour =~ /linux.*32/) ?
" .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
" cmplw ".join(',',$cr,@_);
};
my $bdnz = sub {
my $f = shift;
my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
" bc $bo,0,".shift;
} if ($flavour!~/linux/);
my $bltlr = sub {
my $f = shift;
my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
" .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
" bclr $bo,0";
};
my $bnelr = sub {
my $f = shift;
my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
" .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
" bclr $bo,2";
};
my $beqlr = sub {
my $f = shift;
my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
" .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
" bclr $bo,2";
};
# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
# arguments is 64, with "operand out of range" error.
my $extrdi = sub {
my ($f,$ra,$rs,$n,$b) = @_;
$b = ($b+$n)&63; $n = 64-$n;
" rldicl $ra,$rs,$b,$n";
};
my $vmr = sub {
my ($f,$vx,$vy) = @_;
" vor $vx,$vy,$vy";
};
# Some ABIs specify vrsave, special-purpose register #256, as reserved
# for system use.
my $no_vrsave = ($flavour =~ /linux-ppc64le/);
my $mtspr = sub {
my ($f,$idx,$ra) = @_;
if ($idx == 256 && $no_vrsave) {
" or $ra,$ra,$ra";
} else {
" mtspr $idx,$ra";
}
};
my $mfspr = sub {
my ($f,$rd,$idx) = @_;
if ($idx == 256 && $no_vrsave) {
" li $rd,-1";
} else {
" mfspr $rd,$idx";
}
};
# PowerISA 2.06 stuff
sub vsxmem_op {
my ($f, $vrt, $ra, $rb, $op) = @_;
" .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
}
# made-up unaligned memory reference AltiVec/VMX instructions
my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
# PowerISA 2.07 stuff
sub vcrypto_op {
my ($f, $vrt, $vra, $vrb, $op) = @_;
" .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
}
my $vcipher = sub { vcrypto_op(@_, 1288); };
my $vcipherlast = sub { vcrypto_op(@_, 1289); };
my $vncipher = sub { vcrypto_op(@_, 1352); };
my $vncipherlast= sub { vcrypto_op(@_, 1353); };
my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
my $vpmsumb = sub { vcrypto_op(@_, 1032); };
my $vpmsumd = sub { vcrypto_op(@_, 1224); };
my $vpmsubh = sub { vcrypto_op(@_, 1096); };
my $vpmsumw = sub { vcrypto_op(@_, 1160); };
my $vaddudm = sub { vcrypto_op(@_, 192); };
my $vadduqm = sub { vcrypto_op(@_, 256); };
my $mtsle = sub {
my ($f, $arg) = @_;
" .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
};
print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
while($line=<>) {
$line =~ s|[#!;].*$||; # get rid of asm-style comments...
$line =~ s|/\*.*\*/||; # ... and C-style comments...
$line =~ s|^\s+||; # ... and skip white spaces in beginning...
$line =~ s|\s+$||; # ... and at the end
{
$line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel
$line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
}
{
$line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
my $c = $1; $c = "\t" if ($c eq "");
my $mnemonic = $2;
my $f = $3;
my $opcode = eval("\$$mnemonic");
$line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
}
print $line if ($line);
print "\n";
}
close STDOUT;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment