crypto: p10-aes-gcm - Revert implementation

Revert the changes that added p10-aes-gcm: 0781bbd7 ("crypto: p10-aes-gcm - A perl script to process PowerPC assembler source") 41a6437a ("crypto: p10-aes-gcm - Supporting functions for ghash") 3b47ecca ("crypto: p10-aes-gcm - Supporting functions for AES") ca68a96c ("crypto: p10-aes-gcm - An accelerated AES/GCM stitched implementation") cc40379b ("crypto: p10-aes-gcm - Glue code for AES/GCM stitched implementation") 3c657e86 ("crypto: p10-aes-gcm - Update Kconfig and Makefile") These changes fail to build in many configurations and are not ready for prime time. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

crypto: p10-aes-gcm - Revert implementation
Revert the changes that added p10-aes-gcm: 0781bbd7 ("crypto: p10-aes-gcm - A perl script to process PowerPC assembler source") 41a6437a ("crypto: p10-aes-gcm - Supporting functions for ghash") 3b47ecca ("crypto: p10-aes-gcm - Supporting functions for AES") ca68a96c ("crypto: p10-aes-gcm - An accelerated AES/GCM stitched implementation") cc40379b ("crypto: p10-aes-gcm - Glue code for AES/GCM stitched implementation") 3c657e86 ("crypto: p10-aes-gcm - Update Kconfig and Makefile") These changes fail to build in many configurations and are not ready for prime time. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
596f674d · Herbert Xu · f81c1d4a · 596f674d · 596f674d · f81c1d4a
Commit 596f674d authored Jan 18, 2023 by Herbert Xu
7 changed files
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -94,15 +94,4 @@ config CRYPTO_AES_PPC_SPE
 	  architecture specific assembler implementations that work on 1KB
 	  tables or 256 bytes S-boxes.
-config CRYPTO_P10_AES_GCM
-	tristate "Stitched AES/GCM acceleration support on P10+ CPU (PPC)"
-	depends on PPC64
-	select CRYPTO_LIB_AES
-	select CRYPTO_ALGAPI
-	select CRYPTO_AEAD
-	default m
-	help
-	  Support for cryptographic acceleration instructions on Power10+ CPU.
-	    This module supports stitched acceleration for AES/GCM in hardware.
 endmenu
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
 obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
 obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
-obj-$(CONFIG_CRYPTO_P10_AES_GCM) += p10-aes-gcm-crypto.o
 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
 md5-ppc-y := md5-asm.o md5-glue.o
@@ -22,12 +21,3 @@ sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
 sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
 crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
 crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
-p10-aes-gcm-crypto-y := p10-aes-gcm-glue.o p10_aes_gcm.o ghashp8-ppc.o aesp8-ppc.o
-quiet_cmd_perl = PERL    $@
-      cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
-targets += aesp8-ppc.S ghashp8-ppc.S
-$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
-	$(call if_changed,perl)
--- a/arch/powerpc/crypto/aesp8-ppc.pl
+++ b/arch/powerpc/crypto/aesp8-ppc.pl
--- a/arch/powerpc/crypto/ghashp8-ppc.pl
+++ b/arch/powerpc/crypto/ghashp8-ppc.pl
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-# This code is taken from the OpenSSL project but the author (Andy Polyakov)
-# has relicensed it under the GPLv2. Therefore this program is free software;
-# you can redistribute it and/or modify it under the terms of the GNU General
-# Public License version 2 as published by the Free Software Foundation.
-#
-# The original headers, including the original license headers, are
-# included below for completeness.
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see https://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# GHASH for PowerISA v2.07.
-#
-# July 2014
-#
-# Accurate performance measurements are problematic, because it's
-# always virtualized setup with possibly throttled processor.
-# Relative comparison is therefore more informative. This initial
-# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
-# faster than "4-bit" integer-only compiler-generated 64-bit code.
-# "Initial version" means that there is room for futher improvement.
-$flavour=shift;
-$output =shift;
-if ($flavour =~ /64/) {
-	$SIZE_T=8;
-	$LRSAVE=2*$SIZE_T;
-	$STU="stdu";
-	$POP="ld";
-	$PUSH="std";
-} elsif ($flavour =~ /32/) {
-	$SIZE_T=4;
-	$LRSAVE=$SIZE_T;
-	$STU="stwu";
-	$POP="lwz";
-	$PUSH="stw";
-} else { die "nonsense $flavour"; }
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
-my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
-my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
-my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
-my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
-my $vrsave="r12";
-my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
-$code=<<___;
-.machine	"any"
-.text
-.globl	.gcm_init_p8
-	lis		r0,0xfff0
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$H,0,r4			# load H
-	le?xor		r7,r7,r7
-	le?addi		r7,r7,0x8		# need a vperm start with 08
-	le?lvsr		5,0,r7
-	le?vspltisb	6,0x0f
-	le?vxor		5,5,6			# set a b-endian mask
-	le?vperm	$H,$H,$H,5
-	vspltisb	$xC2,-16		# 0xf0
-	vspltisb	$t0,1			# one
-	vaddubm		$xC2,$xC2,$xC2		# 0xe0
-	vxor		$zero,$zero,$zero
-	vor		$xC2,$xC2,$t0		# 0xe1
-	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
-	vsldoi		$t1,$zero,$t0,1		# ...1
-	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
-	vspltisb	$t2,7
-	vor		$xC2,$xC2,$t1		# 0xc2....01
-	vspltb		$t1,$H,0		# most significant byte
-	vsl		$H,$H,$t0		# H<<=1
-	vsrab		$t1,$t1,$t2		# broadcast carry bit
-	vand		$t1,$t1,$xC2
-	vxor		$H,$H,$t1		# twisted H
-	vsldoi		$H,$H,$H,8		# twist even more ...
-	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
-	vsldoi		$Hl,$zero,$H,8		# ... and split
-	vsldoi		$Hh,$H,$zero,8
-	stvx_u		$xC2,0,r3		# save pre-computed table
-	stvx_u		$Hl,r8,r3
-	stvx_u		$H, r9,r3
-	stvx_u		$Hh,r10,r3
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,2,0
-	.long		0
-.size	.gcm_init_p8,.-.gcm_init_p8
-.globl	.gcm_init_htable
-	lis		r0,0xfff0
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$H,0,r4			# load H
-	vspltisb	$xC2,-16		# 0xf0
-	vspltisb	$t0,1			# one
-	vaddubm		$xC2,$xC2,$xC2		# 0xe0
-	vxor		$zero,$zero,$zero
-	vor		$xC2,$xC2,$t0		# 0xe1
-	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
-	vsldoi		$t1,$zero,$t0,1		# ...1
-	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
-	vspltisb	$t2,7
-	vor		$xC2,$xC2,$t1		# 0xc2....01
-	vspltb		$t1,$H,0		# most significant byte
-	vsl		$H,$H,$t0		# H<<=1
-	vsrab		$t1,$t1,$t2		# broadcast carry bit
-	vand		$t1,$t1,$xC2
-	vxor		$IN,$H,$t1		# twisted H
-	vsldoi		$H,$IN,$IN,8		# twist even more ...
-	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
-	vsldoi		$Hl,$zero,$H,8		# ... and split
-	vsldoi		$Hh,$H,$zero,8
-	stvx_u		$xC2,0,r3		# save pre-computed table
-	stvx_u		$Hl,r8,r3
-	li		r8,0x40
-	stvx_u		$H, r9,r3
-	li		r9,0x50
-	stvx_u		$Hh,r10,r3
-	li		r10,0x60
-	vpmsumd		$Xl,$IN,$Hl		# H.lo·H.lo
-	vpmsumd		$Xm,$IN,$H		# H.hi·H.lo+H.lo·H.hi
-	vpmsumd		$Xh,$IN,$Hh		# H.hi·H.hi
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	vxor		$t1,$t1,$Xh
-	vxor		$IN1,$Xl,$t1
-	vsldoi		$H2,$IN1,$IN1,8
-	vsldoi		$H2l,$zero,$H2,8
-	vsldoi		$H2h,$H2,$zero,8
-	stvx_u		$H2l,r8,r3		# save H^2
-	li		r8,0x70
-	stvx_u		$H2,r9,r3
-	li		r9,0x80
-	stvx_u		$H2h,r10,r3
-	li		r10,0x90
-	vpmsumd		$Xl,$IN,$H2l		# H.lo·H^2.lo
-	 vpmsumd	$Xl1,$IN1,$H2l		# H^2.lo·H^2.lo
-	vpmsumd		$Xm,$IN,$H2		# H.hi·H^2.lo+H.lo·H^2.hi
-	 vpmsumd	$Xm1,$IN1,$H2		# H^2.hi·H^2.lo+H^2.lo·H^2.hi
-	vpmsumd		$Xh,$IN,$H2h		# H.hi·H^2.hi
-	 vpmsumd	$Xh1,$IN1,$H2h		# H^2.hi·H^2.hi
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-	 vpmsumd	$t6,$Xl1,$xC2		# 1st reduction phase
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	 vsldoi		$t4,$Xm1,$zero,8
-	 vsldoi		$t5,$zero,$Xm1,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-	 vxor		$Xl1,$Xl1,$t4
-	 vxor		$Xh1,$Xh1,$t5
-	vsldoi		$Xl,$Xl,$Xl,8
-	 vsldoi		$Xl1,$Xl1,$Xl1,8
-	vxor		$Xl,$Xl,$t2
-	 vxor		$Xl1,$Xl1,$t6
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	 vsldoi		$t5,$Xl1,$Xl1,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	 vpmsumd	$Xl1,$Xl1,$xC2
-	vxor		$t1,$t1,$Xh
-	 vxor		$t5,$t5,$Xh1
-	vxor		$Xl,$Xl,$t1
-	 vxor		$Xl1,$Xl1,$t5
-	vsldoi		$H,$Xl,$Xl,8
-	 vsldoi		$H2,$Xl1,$Xl1,8
-	vsldoi		$Hl,$zero,$H,8
-	vsldoi		$Hh,$H,$zero,8
-	 vsldoi		$H2l,$zero,$H2,8
-	 vsldoi		$H2h,$H2,$zero,8
-	stvx_u		$Hl,r8,r3		# save H^3
-	li		r8,0xa0
-	stvx_u		$H,r9,r3
-	li		r9,0xb0
-	stvx_u		$Hh,r10,r3
-	li		r10,0xc0
-	 stvx_u		$H2l,r8,r3		# save H^4
-	 stvx_u		$H2,r9,r3
-	 stvx_u		$H2h,r10,r3
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,2,0
-	.long		0
-.size	.gcm_init_htable,.-.gcm_init_htable
-.globl	.gcm_gmult_p8
-	lis		r0,0xfff8
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$IN,0,$Xip		# load Xi
-	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
-	 le?lvsl	$lemask,r0,r0
-	lvx_u		$H, r9,$Htbl
-	 le?vspltisb	$t0,0x07
-	lvx_u		$Hh,r10,$Htbl
-	 le?vxor	$lemask,$lemask,$t0
-	lvx_u		$xC2,0,$Htbl
-	 le?vperm	$IN,$IN,$IN,$lemask
-	vxor		$zero,$zero,$zero
-	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
-	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
-	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
-	vpmsumd		$t2,$Xl,$xC2		# 1st phase
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
-	vpmsumd		$Xl,$Xl,$xC2
-	vxor		$t1,$t1,$Xh
-	vxor		$Xl,$Xl,$t1
-	le?vperm	$Xl,$Xl,$Xl,$lemask
-	stvx_u		$Xl,0,$Xip		# write out Xi
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,2,0
-	.long		0
-.size	.gcm_gmult_p8,.-.gcm_gmult_p8
-.globl	.gcm_ghash_p8
-	lis		r0,0xfff8
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$Xl,0,$Xip		# load Xi
-	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
-	 le?lvsl	$lemask,r0,r0
-	lvx_u		$H, r9,$Htbl
-	 le?vspltisb	$t0,0x07
-	lvx_u		$Hh,r10,$Htbl
-	 le?vxor	$lemask,$lemask,$t0
-	lvx_u		$xC2,0,$Htbl
-	 le?vperm	$Xl,$Xl,$Xl,$lemask
-	vxor		$zero,$zero,$zero
-	lvx_u		$IN,0,$inp
-	addi		$inp,$inp,16
-	subi		$len,$len,16
-	 le?vperm	$IN,$IN,$IN,$lemask
-	vxor		$IN,$IN,$Xl
-	b		Loop
-.align	5
-Loop:
-	 subic		$len,$len,16
-	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
-	 subfe.		r0,r0,r0		# borrow?-1:0
-	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
-	 and		r0,r0,$len
-	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
-	 add		$inp,$inp,r0
-	vpmsumd		$t2,$Xl,$xC2		# 1st phase
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-	 lvx_u		$IN,0,$inp
-	 addi		$inp,$inp,16
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
-	vpmsumd		$Xl,$Xl,$xC2
-	 le?vperm	$IN,$IN,$IN,$lemask
-	vxor		$t1,$t1,$Xh
-	vxor		$IN,$IN,$t1
-	vxor		$IN,$IN,$Xl
-	beq		Loop			# did $len-=16 borrow?
-	vxor		$Xl,$Xl,$t1
-	le?vperm	$Xl,$Xl,$Xl,$lemask
-	stvx_u		$Xl,0,$Xip		# write out Xi
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,4,0
-	.long		0
-.size	.gcm_ghash_p8,.-.gcm_ghash_p8
-.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-.align  2
-___
-foreach (split("\n",$code)) {
-	if ($flavour =~ /le$/o) {	# little-endian
-	    s/le\?//o		or
-	    s/be\?/#be#/o;
-	} else {
-	    s/le\?/#le#/o	or
-	    s/be\?//o;
-	}
-	print $_,"\n";
-}
-close STDOUT; # enforce flush
--- a/arch/powerpc/crypto/p10-aes-gcm-glue.c
+++ b/arch/powerpc/crypto/p10-aes-gcm-glue.c
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Glue code for accelerated AES-GCM stitched implementation for ppc64le.
- *
- * Copyright 2022- IBM Inc. All rights reserved
- */
-#include <asm/unaligned.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/algapi.h>
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <crypto/b128ops.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#define PPC_MODULE_FEATURE_P10	(32 + ilog2(PPC_FEATURE2_ARCH_3_1))
-#define	PPC_ALIGN		16
-#define GCM_IV_SIZE		12
-MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation");
-MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("aes");
-asmlinkage int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
-				      void *key);
-asmlinkage void aes_p8_encrypt(const u8 *in, u8 *out, const void *key);
-asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len,
-				    void *rkey, u8 *iv, void *Xi);
-asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len,
-				    void *rkey, u8 *iv, void *Xi);
-asmlinkage void gcm_init_htable(unsigned char htable[256], unsigned char Xi[16]);
-asmlinkage void gcm_ghash_p8(unsigned char *Xi, unsigned char *Htable,
-		unsigned char *aad, unsigned int alen);
-struct aes_key {
-	u8 key[AES_MAX_KEYLENGTH];
-	u64 rounds;
-};
-struct gcm_ctx {
-	u8 iv[16];
-	u8 ivtag[16];
-	u8 aad_hash[16];
-	u64 aadLen;
-	u64 Plen;	/* offset 56 - used in aes_p10_gcm_{en/de}crypt */
-};
-struct Hash_ctx {
-	u8 H[16];	/* subkey */
-	u8 Htable[256];	/* Xi, Hash table(offset 32) */
-};
-struct p10_aes_gcm_ctx {
-	struct aes_key enc_key;
-};
-static void vsx_begin(void)
-{
-	preempt_disable();
-	enable_kernel_vsx();
-}
-static void vsx_end(void)
-{
-	disable_kernel_vsx();
-	preempt_enable();
-}
-static void set_subkey(unsigned char *hash)
-{
-	*(u64 *)&hash[0] = be64_to_cpup((__be64 *)&hash[0]);
-	*(u64 *)&hash[8] = be64_to_cpup((__be64 *)&hash[8]);
-}
-/*
- * Compute aad if any.
- *   - Hash aad and copy to Xi.
- */
-static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash,
-		    unsigned char *aad, int alen)
-{
-	int i;
-	u8 nXi[16] = {0, };
-	gctx->aadLen = alen;
-	i = alen & ~0xf;
-	if (i) {
-		gcm_ghash_p8(nXi, hash->Htable+32, aad, i);
-		aad += i;
-		alen -= i;
-	}
-	if (alen) {
-		for (i = 0; i < alen; i++)
-			nXi[i] ^= aad[i];
-		memset(gctx->aad_hash, 0, 16);
-		gcm_ghash_p8(gctx->aad_hash, hash->Htable+32, nXi, 16);
-	} else {
-		memcpy(gctx->aad_hash, nXi, 16);
-	}
-	memcpy(hash->Htable, gctx->aad_hash, 16);
-}
-static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey,
-			struct Hash_ctx *hash, u8 *assoc, unsigned int assoclen)
-{
-	__be32 counter = cpu_to_be32(1);
-	aes_p8_encrypt(hash->H, hash->H, rdkey);
-	set_subkey(hash->H);
-	gcm_init_htable(hash->Htable+32, hash->H);
-	*((__be32 *)(iv+12)) = counter;
-	gctx->Plen = 0;
-	/*
-	 * Encrypt counter vector as iv tag and increment counter.
-	 */
-	aes_p8_encrypt(iv, gctx->ivtag, rdkey);
-	counter = cpu_to_be32(2);
-	*((__be32 *)(iv+12)) = counter;
-	memcpy(gctx->iv, iv, 16);
-	gctx->aadLen = assoclen;
-	memset(gctx->aad_hash, 0, 16);
-	if (assoclen)
-		set_aad(gctx, hash, assoc, assoclen);
-}
-static void finish_tag(struct gcm_ctx *gctx, struct Hash_ctx *hash, int len)
-{
-	int i;
-	unsigned char len_ac[16 + PPC_ALIGN];
-	unsigned char *aclen = PTR_ALIGN((void *)len_ac, PPC_ALIGN);
-	__be64 clen = cpu_to_be64(len << 3);
-	__be64 alen = cpu_to_be64(gctx->aadLen << 3);
-	if (len == 0 && gctx->aadLen == 0) {
-		memcpy(hash->Htable, gctx->ivtag, 16);
-		return;
-	}
-	/*
-	 * Len is in bits.
-	 */
-	*((__be64 *)(aclen)) = alen;
-	*((__be64 *)(aclen+8)) = clen;
-	/*
-	 * hash (AAD len and len)
-	 */
-	gcm_ghash_p8(hash->Htable, hash->Htable+32, aclen, 16);
-	for (i = 0; i < 16; i++)
-		hash->Htable[i] ^= gctx->ivtag[i];
-}
-static int set_authsize(struct crypto_aead *tfm, unsigned int authsize)
-{
-	switch (authsize) {
-	case 4:
-	case 8:
-	case 12:
-	case 13:
-	case 14:
-	case 15:
-	case 16:
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
-			     unsigned int keylen)
-{
-	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
-	struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
-	int ret;
-	vsx_begin();
-	ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
-	vsx_end();
-	return ret ? -EINVAL : 0;
-}
-static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
-	u8 databuf[sizeof(struct gcm_ctx) + PPC_ALIGN];
-	struct gcm_ctx *gctx = PTR_ALIGN((void *)databuf, PPC_ALIGN);
-	u8 hashbuf[sizeof(struct Hash_ctx) + PPC_ALIGN];
-	struct Hash_ctx *hash = PTR_ALIGN((void *)hashbuf, PPC_ALIGN);
-	struct scatter_walk assoc_sg_walk;
-	struct skcipher_walk walk;
-	u8 *assocmem = NULL;
-	u8 *assoc;
-	unsigned int assoclen = req->assoclen;
-	unsigned int cryptlen = req->cryptlen;
-	unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN];
-	unsigned char *iv = PTR_ALIGN((void *)ivbuf, PPC_ALIGN);
-	int ret;
-	unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm));
-	u8 otag[16];
-	int total_processed = 0;
-	memset(databuf, 0, sizeof(databuf));
-	memset(hashbuf, 0, sizeof(hashbuf));
-	memset(ivbuf, 0, sizeof(ivbuf));
-	memcpy(iv, req->iv, GCM_IV_SIZE);
-	/* Linearize assoc, if not already linear */
-	if (req->src->length >= assoclen && req->src->length) {
-		scatterwalk_start(&assoc_sg_walk, req->src);
-		assoc = scatterwalk_map(&assoc_sg_walk);
-	} else {
-		gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-			      GFP_KERNEL : GFP_ATOMIC;
-		/* assoc can be any length, so must be on heap */
-		assocmem = kmalloc(assoclen, flags);
-		if (unlikely(!assocmem))
-			return -ENOMEM;
-		assoc = assocmem;
-		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
-	}
-	vsx_begin();
-	gcmp10_init(gctx, iv, (unsigned char *) &ctx->enc_key, hash, assoc, assoclen);
-	vsx_end();
-	if (!assocmem)
-		scatterwalk_unmap(assoc);
-	else
-		kfree(assocmem);
-	if (enc)
-		ret = skcipher_walk_aead_encrypt(&walk, req, false);
-	else
-		ret = skcipher_walk_aead_decrypt(&walk, req, false);
-	if (ret)
-		return ret;
-	while (walk.nbytes > 0 && ret == 0) {
-		vsx_begin();
-		if (enc)
-			aes_p10_gcm_encrypt(walk.src.virt.addr,
-					    walk.dst.virt.addr,
-					    walk.nbytes,
-					    &ctx->enc_key, gctx->iv, hash->Htable);
-		else
-			aes_p10_gcm_decrypt(walk.src.virt.addr,
-					    walk.dst.virt.addr,
-					    walk.nbytes,
-					    &ctx->enc_key, gctx->iv, hash->Htable);
-		vsx_end();
-		total_processed += walk.nbytes;
-		ret = skcipher_walk_done(&walk, 0);
-	}
-	if (ret)
-		return ret;
-	/* Finalize hash */
-	vsx_begin();
-	finish_tag(gctx, hash, total_processed);
-	vsx_end();
-	/* copy Xi to end of dst */
-	if (enc)
-		scatterwalk_map_and_copy(hash->Htable, req->dst, req->assoclen + cryptlen,
-					 auth_tag_len, 1);
-	else {
-		scatterwalk_map_and_copy(otag, req->src,
-					 req->assoclen + cryptlen - auth_tag_len,
-					 auth_tag_len, 0);
-		if (crypto_memneq(otag, hash->Htable, auth_tag_len)) {
-			memzero_explicit(hash->Htable, 16);
-			return -EBADMSG;
-		}
-	}
-	return 0;
-}
-static int p10_aes_gcm_encrypt(struct aead_request *req)
-{
-	return p10_aes_gcm_crypt(req, 1);
-}
-static int p10_aes_gcm_decrypt(struct aead_request *req)
-{
-	return p10_aes_gcm_crypt(req, 0);
-}
-static struct aead_alg gcm_aes_alg = {
-	.ivsize			= GCM_IV_SIZE,
-	.maxauthsize		= 16,
-	.setauthsize		= set_authsize,
-	.setkey			= p10_aes_gcm_setkey,
-	.encrypt		= p10_aes_gcm_encrypt,
-	.decrypt		= p10_aes_gcm_decrypt,
-	.base.cra_name		= "gcm(aes)",
-	.base.cra_driver_name	= "p10_aes_gcm",
-	.base.cra_priority	= 2100,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct p10_aes_gcm_ctx),
-	.base.cra_module	= THIS_MODULE,
-};
-static int __init p10_init(void)
-{
-	return crypto_register_aead(&gcm_aes_alg);
-}
-static void __exit p10_exit(void)
-{
-	crypto_unregister_aead(&gcm_aes_alg);
-}
-module_cpu_feature_match(PPC_MODULE_FEATURE_P10, p10_init);
-module_exit(p10_exit);
--- a/arch/powerpc/crypto/p10_aes_gcm.S
+++ b/arch/powerpc/crypto/p10_aes_gcm.S
--- a/arch/powerpc/crypto/ppc-xlate.pl
+++ b/arch/powerpc/crypto/ppc-xlate.pl
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-# PowerPC assembler distiller by <appro>.
-my $flavour = shift;
-my $output = shift;
-open STDOUT,">$output" || die "can't open $output: $!";
-my %GLOBALS;
-my $dotinlocallabels=($flavour=~/linux/)?1:0;
-################################################################
-# directives which need special treatment on different platforms
-################################################################
-my $globl = sub {
-    my $junk = shift;
-    my $name = shift;
-    my $global = \$GLOBALS{$name};
-    my $ret;
-    $name =~ s|^[\.\_]||;
-    SWITCH: for ($flavour) {
-	/aix/		&& do { $name = ".$name";
-				last;
-			      };
-	/osx/		&& do { $name = "_$name";
-				last;
-			      };
-	/linux/
-			&& do {	$ret = "_GLOBAL($name)";
-				last;
-			      };
-    }
-    $ret = ".globl	$name\nalign 5\n$name:" if (!$ret);
-    $$global = $name;
-    $ret;
-};
-my $text = sub {
-    my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
-    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
-    $ret;
-};
-my $machine = sub {
-    my $junk = shift;
-    my $arch = shift;
-    if ($flavour =~ /osx/)
-    {	$arch =~ s/\"//g;
-	$arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
-    }
-    ".machine	$arch";
-};
-my $size = sub {
-    if ($flavour =~ /linux/)
-    {	shift;
-	my $name = shift; $name =~ s|^[\.\_]||;
-	my $ret  = ".size	$name,.-".($flavour=~/64$/?".":"").$name;
-	$ret .= "\n.size	.$name,.-.$name" if ($flavour=~/64$/);
-	$ret;
-    }
-    else
-    {	"";	}
-};
-my $asciz = sub {
-    shift;
-    my $line = join(",",@_);
-    if ($line =~ /^"(.*)"$/)
-    {	".byte	" . join(",",unpack("C*",$1),0) . "\n.align	2";	}
-    else
-    {	"";	}
-};
-my $quad = sub {
-    shift;
-    my @ret;
-    my ($hi,$lo);
-    for (@_) {
-	if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
-	{  $hi=$1?"0x$1":"0"; $lo="0x$2";  }
-	elsif (/^([0-9]+)$/o)
-	{  $hi=$1>>32; $lo=$1&0xffffffff;  } # error-prone with 32-bit perl
-	else
-	{  $hi=undef; $lo=$_; }
-	if (defined($hi))
-	{  push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo");  }
-	else
-	{  push(@ret,".quad	$lo");  }
-    }
-    join("\n",@ret);
-};
-################################################################
-# simplified mnemonics not handled by at least one assembler
-################################################################
-my $cmplw = sub {
-    my $f = shift;
-    my $cr = 0; $cr = shift if ($#_>1);
-    # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
-    ($flavour =~ /linux.*32/) ?
-	"	.long	".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
-	"	cmplw	".join(',',$cr,@_);
-};
-my $bdnz = sub {
-    my $f = shift;
-    my $bo = $f=~/[\+\-]/ ? 16+9 : 16;	# optional "to be taken" hint
-    "	bc	$bo,0,".shift;
-} if ($flavour!~/linux/);
-my $bltlr = sub {
-    my $f = shift;
-    my $bo = $f=~/\-/ ? 12+2 : 12;	# optional "not to be taken" hint
-    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
-	"	.long	".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
-	"	bclr	$bo,0";
-};
-my $bnelr = sub {
-    my $f = shift;
-    my $bo = $f=~/\-/ ? 4+2 : 4;	# optional "not to be taken" hint
-    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
-	"	.long	".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
-	"	bclr	$bo,2";
-};
-my $beqlr = sub {
-    my $f = shift;
-    my $bo = $f=~/-/ ? 12+2 : 12;	# optional "not to be taken" hint
-    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
-	"	.long	".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
-	"	bclr	$bo,2";
-};
-# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
-# arguments is 64, with "operand out of range" error.
-my $extrdi = sub {
-    my ($f,$ra,$rs,$n,$b) = @_;
-    $b = ($b+$n)&63; $n = 64-$n;
-    "	rldicl	$ra,$rs,$b,$n";
-};
-my $vmr = sub {
-    my ($f,$vx,$vy) = @_;
-    "	vor	$vx,$vy,$vy";
-};
-# Some ABIs specify vrsave, special-purpose register #256, as reserved
-# for system use.
-my $no_vrsave = ($flavour =~ /linux-ppc64le/);
-my $mtspr = sub {
-    my ($f,$idx,$ra) = @_;
-    if ($idx == 256 && $no_vrsave) {
-	"	or	$ra,$ra,$ra";
-    } else {
-	"	mtspr	$idx,$ra";
-    }
-};
-my $mfspr = sub {
-    my ($f,$rd,$idx) = @_;
-    if ($idx == 256 && $no_vrsave) {
-	"	li	$rd,-1";
-    } else {
-	"	mfspr	$rd,$idx";
-    }
-};
-# PowerISA 2.06 stuff
-sub vsxmem_op {
-    my ($f, $vrt, $ra, $rb, $op) = @_;
-    "	.long	".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
-}
-# made-up unaligned memory reference AltiVec/VMX instructions
-my $lvx_u	= sub {	vsxmem_op(@_, 844); };	# lxvd2x
-my $stvx_u	= sub {	vsxmem_op(@_, 972); };	# stxvd2x
-my $lvdx_u	= sub {	vsxmem_op(@_, 588); };	# lxsdx
-my $stvdx_u	= sub {	vsxmem_op(@_, 716); };	# stxsdx
-my $lvx_4w	= sub { vsxmem_op(@_, 780); };	# lxvw4x
-my $stvx_4w	= sub { vsxmem_op(@_, 908); };	# stxvw4x
-# PowerISA 2.07 stuff
-sub vcrypto_op {
-    my ($f, $vrt, $vra, $vrb, $op) = @_;
-    "	.long	".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
-}
-my $vcipher	= sub { vcrypto_op(@_, 1288); };
-my $vcipherlast	= sub { vcrypto_op(@_, 1289); };
-my $vncipher	= sub { vcrypto_op(@_, 1352); };
-my $vncipherlast= sub { vcrypto_op(@_, 1353); };
-my $vsbox	= sub { vcrypto_op(@_, 0, 1480); };
-my $vshasigmad	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
-my $vshasigmaw	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
-my $vpmsumb	= sub { vcrypto_op(@_, 1032); };
-my $vpmsumd	= sub { vcrypto_op(@_, 1224); };
-my $vpmsubh	= sub { vcrypto_op(@_, 1096); };
-my $vpmsumw	= sub { vcrypto_op(@_, 1160); };
-my $vaddudm	= sub { vcrypto_op(@_, 192);  };
-my $vadduqm	= sub { vcrypto_op(@_, 256);  };
-my $mtsle	= sub {
-    my ($f, $arg) = @_;
-    "	.long	".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
-};
-print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
-while($line=<>) {
-    $line =~ s|[#!;].*$||;	# get rid of asm-style comments...
-    $line =~ s|/\*.*\*/||;	# ... and C-style comments...
-    $line =~ s|^\s+||;		# ... and skip white spaces in beginning...
-    $line =~ s|\s+$||;		# ... and at the end
-    {
-	$line =~ s|\b\.L(\w+)|L$1|g;	# common denominator for Locallabel
-	$line =~ s|\bL(\w+)|\.L$1|g	if ($dotinlocallabels);
-    }
-    {
-	$line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
-	my $c = $1; $c = "\t" if ($c eq "");
-	my $mnemonic = $2;
-	my $f = $3;
-	my $opcode = eval("\$$mnemonic");
-	$line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
-	if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
-	elsif ($mnemonic)           { $line = $c.$mnemonic.$f."\t".$line; }
-    }
-    print $line if ($line);
-    print "\n";
-}
-close STDOUT;