crypto: p10-aes-gcm - Revert implementation

Revert the changes that added p10-aes-gcm: 0781bbd7 ("crypto: p10-aes-gcm - A perl script to process PowerPC assembler source") 41a6437a ("crypto: p10-aes-gcm - Supporting functions for ghash") 3b47ecca ("crypto: p10-aes-gcm - Supporting functions for AES") ca68a96c ("crypto: p10-aes-gcm - An accelerated AES/GCM stitched implementation") cc40379b ("crypto: p10-aes-gcm - Glue code for AES/GCM stitched implementation") 3c657e86 ("crypto: p10-aes-gcm - Update Kconfig and Makefile") These changes fail to build in many configurations and are not ready for prime time. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

crypto: p10-aes-gcm - Revert implementation
Revert the changes that added p10-aes-gcm: 0781bbd7 ("crypto: p10-aes-gcm - A perl script to process PowerPC assembler source") 41a6437a ("crypto: p10-aes-gcm - Supporting functions for ghash") 3b47ecca ("crypto: p10-aes-gcm - Supporting functions for AES") ca68a96c ("crypto: p10-aes-gcm - An accelerated AES/GCM stitched implementation") cc40379b ("crypto: p10-aes-gcm - Glue code for AES/GCM stitched implementation") 3c657e86 ("crypto: p10-aes-gcm - Update Kconfig and Makefile") These changes fail to build in many configurations and are not ready for prime time. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
596f674d · Herbert Xu · f81c1d4a · 596f674d · 596f674d · f81c1d4a
Commit 596f674d authored Jan 18, 2023 by Herbert Xu
7 changed files
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -94,15 +94,4 @@ config CRYPTO_AES_PPC_SPE
 	  architecture specific assembler implementations that work on 1KB
 	  tables or 256 bytes S-boxes.

-config CRYPTO_P10_AES_GCM
-	tristate "Stitched AES/GCM acceleration support on P10+ CPU (PPC)"
-	depends on PPC64
-	select CRYPTO_LIB_AES
-	select CRYPTO_ALGAPI
-	select CRYPTO_AEAD
-	default m
-	help
-	  Support for cryptographic acceleration instructions on Power10+ CPU.
-	    This module supports stitched acceleration for AES/GCM in hardware.
-
 endmenu
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
 obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
 obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
-obj-$(CONFIG_CRYPTO_P10_AES_GCM) += p10-aes-gcm-crypto.o

 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
 md5-ppc-y := md5-asm.o md5-glue.o
@@ -22,12 +21,3 @@ sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
 sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
 crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
 crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
-p10-aes-gcm-crypto-y := p10-aes-gcm-glue.o p10_aes_gcm.o ghashp8-ppc.o aesp8-ppc.o
-
-quiet_cmd_perl = PERL    $@
-      cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
-
-targets += aesp8-ppc.S ghashp8-ppc.S
-
-$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
-	$(call if_changed,perl)
--- a/arch/powerpc/crypto/aesp8-ppc.pl
+++ b/arch/powerpc/crypto/aesp8-ppc.pl
-#! /usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# This code is taken from CRYPTOGAMs[1] and is included here using the option
-# in the license to distribute the code under the GPL. Therefore this program
-# is free software; you can redistribute it and/or modify it under the terms of
-# the GNU General Public License version 2 as published by the Free Software
-# Foundation.
-#
-# [1] https://www.openssl.org/~appro/cryptogams/
-
-# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-#       * Redistributions of source code must retain copyright notices,
-#         this list of conditions and the following disclaimer.
-#
-#       * Redistributions in binary form must reproduce the above
-#         copyright notice, this list of conditions and the following
-#         disclaimer in the documentation and/or other materials
-#         provided with the distribution.
-#
-#       * Neither the name of the CRYPTOGAMS nor the names of its
-#         copyright holder and contributors may be used to endorse or
-#         promote products derived from this software without specific
-#         prior written permission.
-#
-# ALTERNATIVELY, provided that this notice is retained in full, this
-# product may be distributed under the terms of the GNU General Public
-# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
-# those given above.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see https://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# This module implements support for AES instructions as per PowerISA
-# specification version 2.07, first implemented by POWER8 processor.
-# The module is endian-agnostic in sense that it supports both big-
-# and little-endian cases. Data alignment in parallelizable modes is
-# handled with VSX loads and stores, which implies MSR.VSX flag being
-# set. It should also be noted that ISA specification doesn't prohibit
-# alignment exceptions for these instructions on page boundaries.
-# Initially alignment was handled in pure AltiVec/VMX way [when data
-# is aligned programmatically, which in turn guarantees exception-
-# free execution], but it turned to hamper performance when vcipher
-# instructions are interleaved. It's reckoned that eventual
-# misalignment penalties at page boundaries are in average lower
-# than additional overhead in pure AltiVec approach.
-#
-# May 2016
-#
-# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
-# systems were measured.
-#
-######################################################################
-# Current large-block performance in cycles per byte processed with
-# 128-bit key (less is better).
-#
-#		CBC en-/decrypt	CTR	XTS
-# POWER8[le]	3.96/0.72	0.74	1.1
-# POWER8[be]	3.75/0.65	0.66	1.0
-
-$flavour = shift;
-
-if ($flavour =~ /64/) {
-	$SIZE_T	=8;
-	$LRSAVE	=2*$SIZE_T;
-	$STU	="stdu";
-	$POP	="ld";
-	$PUSH	="std";
-	$UCMP	="cmpld";
-	$SHL	="sldi";
-} elsif ($flavour =~ /32/) {
-	$SIZE_T	=4;
-	$LRSAVE	=$SIZE_T;
-	$STU	="stwu";
-	$POP	="lwz";
-	$PUSH	="stw";
-	$UCMP	="cmplw";
-	$SHL	="slwi";
-} else { die "nonsense $flavour"; }
-
-$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
-
-$FRAME=8*$SIZE_T;
-$prefix="aes_p8";
-
-$sp="r1";
-$vrsave="r12";
-
-#########################################################################
-{{{	# Key setup procedures						#
-my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
-my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
-my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
-
-$code.=<<___;
-.machine	"any"
-
-.text
-
-.align	7
-rcon:
-.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
-.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
-.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
-.long	0,0,0,0						?asis
-Lconsts:
-	mflr	r0
-	bcl	20,31,\$+4
-	mflr	$ptr	 #vvvvv "distance between . and rcon
-	addi	$ptr,$ptr,-0x48
-	mtlr	r0
-	blr
-	.long	0
-	.byte	0,12,0x14,0,0,0,0,0
-.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-
-.globl	.${prefix}_set_encrypt_key
-Lset_encrypt_key:
-	mflr		r11
-	$PUSH		r11,$LRSAVE($sp)
-
-	li		$ptr,-1
-	${UCMP}i	$inp,0
-	beq-		Lenc_key_abort		# if ($inp==0) return -1;
-	${UCMP}i	$out,0
-	beq-		Lenc_key_abort		# if ($out==0) return -1;
-	li		$ptr,-2
-	cmpwi		$bits,128
-	blt-		Lenc_key_abort
-	cmpwi		$bits,256
-	bgt-		Lenc_key_abort
-	andi.		r0,$bits,0x3f
-	bne-		Lenc_key_abort
-
-	lis		r0,0xfff0
-	mfspr		$vrsave,256
-	mtspr		256,r0
-
-	bl		Lconsts
-	mtlr		r11
-
-	neg		r9,$inp
-	lvx		$in0,0,$inp
-	addi		$inp,$inp,15		# 15 is not typo
-	lvsr		$key,0,r9		# borrow $key
-	li		r8,0x20
-	cmpwi		$bits,192
-	lvx		$in1,0,$inp
-	le?vspltisb	$mask,0x0f		# borrow $mask
-	lvx		$rcon,0,$ptr
-	le?vxor		$key,$key,$mask		# adjust for byte swap
-	lvx		$mask,r8,$ptr
-	addi		$ptr,$ptr,0x10
-	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
-	li		$cnt,8
-	vxor		$zero,$zero,$zero
-	mtctr		$cnt
-
-	?lvsr		$outperm,0,$out
-	vspltisb	$outmask,-1
-	lvx		$outhead,0,$out
-	?vperm		$outmask,$zero,$outmask,$outperm
-
-	blt		Loop128
-	addi		$inp,$inp,8
-	beq		L192
-	addi		$inp,$inp,8
-	b		L256
-
-.align	4
-Loop128:
-	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in0,$in0,$key
-	bdnz		Loop128
-
-	lvx		$rcon,0,$ptr		# last two round keys
-
-	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in0,$in0,$key
-
-	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vxor		$in0,$in0,$key
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-
-	addi		$inp,$out,15		# 15 is not typo
-	addi		$out,$out,0x50
-
-	li		$rounds,10
-	b		Ldone
-
-.align	4
-L192:
-	lvx		$tmp,0,$inp
-	li		$cnt,4
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
-	vspltisb	$key,8			# borrow $key
-	mtctr		$cnt
-	vsububm		$mask,$mask,$key	# adjust the mask
-
-Loop192:
-	vperm		$key,$in1,$in1,$mask	# roate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	vcipherlast	$key,$key,$rcon
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-
-	 vsldoi		$stage,$zero,$in1,8
-	vspltw		$tmp,$in0,3
-	vxor		$tmp,$tmp,$in1
-	vsldoi		$in1,$zero,$in1,12	# >>32
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in1,$in1,$tmp
-	vxor		$in0,$in0,$key
-	vxor		$in1,$in1,$key
-	 vsldoi		$stage,$stage,$in0,8
-
-	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$stage,$stage,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	 vsldoi		$stage,$in0,$in1,8
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	 vperm		$outtail,$stage,$stage,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vspltw		$tmp,$in0,3
-	vxor		$tmp,$tmp,$in1
-	vsldoi		$in1,$zero,$in1,12	# >>32
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in1,$in1,$tmp
-	vxor		$in0,$in0,$key
-	vxor		$in1,$in1,$key
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-	 addi		$inp,$out,15		# 15 is not typo
-	 addi		$out,$out,16
-	bdnz		Loop192
-
-	li		$rounds,12
-	addi		$out,$out,0x20
-	b		Ldone
-
-.align	4
-L256:
-	lvx		$tmp,0,$inp
-	li		$cnt,7
-	li		$rounds,14
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
-	mtctr		$cnt
-
-Loop256:
-	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$in1,$in1,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in0,$in0,$key
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-	 addi		$inp,$out,15		# 15 is not typo
-	 addi		$out,$out,16
-	bdz		Ldone
-
-	vspltw		$key,$in0,3		# just splat
-	vsldoi		$tmp,$zero,$in1,12	# >>32
-	vsbox		$key,$key
-
-	vxor		$in1,$in1,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in1,$in1,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in1,$in1,$tmp
-
-	vxor		$in1,$in1,$key
-	b		Loop256
-
-.align	4
-Ldone:
-	lvx		$in1,0,$inp		# redundant in aligned case
-	vsel		$in1,$outhead,$in1,$outmask
-	stvx		$in1,0,$inp
-	li		$ptr,0
-	mtspr		256,$vrsave
-	stw		$rounds,0($out)
-
-Lenc_key_abort:
-	mr		r3,$ptr
-	blr
-	.long		0
-	.byte		0,12,0x14,1,0,0,3,0
-	.long		0
-.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
-
-.globl	.${prefix}_set_decrypt_key
-	$STU		$sp,-$FRAME($sp)
-	mflr		r10
-	$PUSH		r10,$FRAME+$LRSAVE($sp)
-	bl		Lset_encrypt_key
-	mtlr		r10
-
-	cmpwi		r3,0
-	bne-		Ldec_key_abort
-
-	slwi		$cnt,$rounds,4
-	subi		$inp,$out,240		# first round key
-	srwi		$rounds,$rounds,1
-	add		$out,$inp,$cnt		# last round key
-	mtctr		$rounds
-
-Ldeckey:
-	lwz		r0, 0($inp)
-	lwz		r6, 4($inp)
-	lwz		r7, 8($inp)
-	lwz		r8, 12($inp)
-	addi		$inp,$inp,16
-	lwz		r9, 0($out)
-	lwz		r10,4($out)
-	lwz		r11,8($out)
-	lwz		r12,12($out)
-	stw		r0, 0($out)
-	stw		r6, 4($out)
-	stw		r7, 8($out)
-	stw		r8, 12($out)
-	subi		$out,$out,16
-	stw		r9, -16($inp)
-	stw		r10,-12($inp)
-	stw		r11,-8($inp)
-	stw		r12,-4($inp)
-	bdnz		Ldeckey
-
-	xor		r3,r3,r3		# return value
-Ldec_key_abort:
-	addi		$sp,$sp,$FRAME
-	blr
-	.long		0
-	.byte		0,12,4,1,0x80,0,3,0
-	.long		0
-.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
-___
-}}}
-#########################################################################
-{{{	# Single block en- and decrypt procedures			#
-sub gen_block () {
-my $dir = shift;
-my $n   = $dir eq "de" ? "n" : "";
-my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
-
-$code.=<<___;
-.globl	.${prefix}_${dir}crypt
-	lwz		$rounds,240($key)
-	lis		r0,0xfc00
-	mfspr		$vrsave,256
-	li		$idx,15			# 15 is not typo
-	mtspr		256,r0
-
-	lvx		v0,0,$inp
-	neg		r11,$out
-	lvx		v1,$idx,$inp
-	lvsl		v2,0,$inp		# inpperm
-	le?vspltisb	v4,0x0f
-	?lvsl		v3,0,r11		# outperm
-	le?vxor		v2,v2,v4
-	li		$idx,16
-	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
-	lvx		v1,0,$key
-	?lvsl		v5,0,$key		# keyperm
-	srwi		$rounds,$rounds,1
-	lvx		v2,$idx,$key
-	addi		$idx,$idx,16
-	subi		$rounds,$rounds,1
-	?vperm		v1,v1,v2,v5		# align round key
-
-	vxor		v0,v0,v1
-	lvx		v1,$idx,$key
-	addi		$idx,$idx,16
-	mtctr		$rounds
-
-Loop_${dir}c:
-	?vperm		v2,v2,v1,v5
-	v${n}cipher	v0,v0,v2
-	lvx		v2,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		v1,v1,v2,v5
-	v${n}cipher	v0,v0,v1
-	lvx		v1,$idx,$key
-	addi		$idx,$idx,16
-	bdnz		Loop_${dir}c
-
-	?vperm		v2,v2,v1,v5
-	v${n}cipher	v0,v0,v2
-	lvx		v2,$idx,$key
-	?vperm		v1,v1,v2,v5
-	v${n}cipherlast	v0,v0,v1
-
-	vspltisb	v2,-1
-	vxor		v1,v1,v1
-	li		$idx,15			# 15 is not typo
-	?vperm		v2,v1,v2,v3		# outmask
-	le?vxor		v3,v3,v4
-	lvx		v1,0,$out		# outhead
-	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
-	vsel		v1,v1,v0,v2
-	lvx		v4,$idx,$out
-	stvx		v1,0,$out
-	vsel		v0,v0,v4,v2
-	stvx		v0,$idx,$out
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,3,0
-	.long		0
-.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
-___
-}
-&gen_block("en");
-&gen_block("de");
-}}}
-#########################################################################
-{{{	# CBC en- and decrypt procedures				#
-my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
-						map("v$_",(4..10));
-$code.=<<___;
-.globl	.${prefix}_cbc_encrypt
-	${UCMP}i	$len,16
-	bltlr-
-
-	cmpwi		$enc,0			# test direction
-	lis		r0,0xffe0
-	mfspr		$vrsave,256
-	mtspr		256,r0
-
-	li		$idx,15
-	vxor		$rndkey0,$rndkey0,$rndkey0
-	le?vspltisb	$tmp,0x0f
-
-	lvx		$ivec,0,$ivp		# load [unaligned] iv
-	lvsl		$inpperm,0,$ivp
-	lvx		$inptail,$idx,$ivp
-	le?vxor		$inpperm,$inpperm,$tmp
-	vperm		$ivec,$ivec,$inptail,$inpperm
-
-	neg		r11,$inp
-	?lvsl		$keyperm,0,$key		# prepare for unaligned key
-	lwz		$rounds,240($key)
-
-	lvsr		$inpperm,0,r11		# prepare for unaligned load
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,15		# 15 is not typo
-	le?vxor		$inpperm,$inpperm,$tmp
-
-	?lvsr		$outperm,0,$out		# prepare for unaligned store
-	vspltisb	$outmask,-1
-	lvx		$outhead,0,$out
-	?vperm		$outmask,$rndkey0,$outmask,$outperm
-	le?vxor		$outperm,$outperm,$tmp
-
-	srwi		$rounds,$rounds,1
-	li		$idx,16
-	subi		$rounds,$rounds,1
-	beq		Lcbc_dec
-
-Lcbc_enc:
-	vmr		$inout,$inptail
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-	mtctr		$rounds
-	subi		$len,$len,16		# len-=16
-
-	lvx		$rndkey0,0,$key
-	 vperm		$inout,$inout,$inptail,$inpperm
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	vxor		$inout,$inout,$ivec
-
-Loop_cbc_enc:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	bdnz		Loop_cbc_enc
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipherlast	$ivec,$inout,$rndkey0
-	${UCMP}i	$len,16
-
-	vperm		$tmp,$ivec,$ivec,$outperm
-	vsel		$inout,$outhead,$tmp,$outmask
-	vmr		$outhead,$tmp
-	stvx		$inout,0,$out
-	addi		$out,$out,16
-	bge		Lcbc_enc
-
-	b		Lcbc_done
-
-.align	4
-Lcbc_dec:
-	${UCMP}i	$len,128
-	bge		_aesp8_cbc_decrypt8x
-	vmr		$tmp,$inptail
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-	mtctr		$rounds
-	subi		$len,$len,16		# len-=16
-
-	lvx		$rndkey0,0,$key
-	 vperm		$tmp,$tmp,$inptail,$inpperm
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$tmp,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-
-Loop_cbc_dec:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vncipher	$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	bdnz		Loop_cbc_dec
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vncipherlast	$inout,$inout,$rndkey0
-	${UCMP}i	$len,16
-
-	vxor		$inout,$inout,$ivec
-	vmr		$ivec,$tmp
-	vperm		$tmp,$inout,$inout,$outperm
-	vsel		$inout,$outhead,$tmp,$outmask
-	vmr		$outhead,$tmp
-	stvx		$inout,0,$out
-	addi		$out,$out,16
-	bge		Lcbc_dec
-
-Lcbc_done:
-	addi		$out,$out,-1
-	lvx		$inout,0,$out		# redundant in aligned case
-	vsel		$inout,$outhead,$inout,$outmask
-	stvx		$inout,0,$out
-
-	neg		$enc,$ivp		# write [unaligned] iv
-	li		$idx,15			# 15 is not typo
-	vxor		$rndkey0,$rndkey0,$rndkey0
-	vspltisb	$outmask,-1
-	le?vspltisb	$tmp,0x0f
-	?lvsl		$outperm,0,$enc
-	?vperm		$outmask,$rndkey0,$outmask,$outperm
-	le?vxor		$outperm,$outperm,$tmp
-	lvx		$outhead,0,$ivp
-	vperm		$ivec,$ivec,$ivec,$outperm
-	vsel		$inout,$outhead,$ivec,$outmask
-	lvx		$inptail,$idx,$ivp
-	stvx		$inout,0,$ivp
-	vsel		$inout,$ivec,$inptail,$outmask
-	stvx		$inout,$idx,$ivp
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,6,0
-	.long		0
-___
-#########################################################################
-{{	# Optimized CBC decrypt procedure				#
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
-my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
-			# v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
-
-$code.=<<___;
-.align	5
-_aesp8_cbc_decrypt8x:
-	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-	li		r10,`$FRAME+8*16+15`
-	li		r11,`$FRAME+8*16+31`
-	stvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	stvx		v21,r11,$sp
-	addi		r11,r11,32
-	stvx		v22,r10,$sp
-	addi		r10,r10,32
-	stvx		v23,r11,$sp
-	addi		r11,r11,32
-	stvx		v24,r10,$sp
-	addi		r10,r10,32
-	stvx		v25,r11,$sp
-	addi		r11,r11,32
-	stvx		v26,r10,$sp
-	addi		r10,r10,32
-	stvx		v27,r11,$sp
-	addi		r11,r11,32
-	stvx		v28,r10,$sp
-	addi		r10,r10,32
-	stvx		v29,r11,$sp
-	addi		r11,r11,32
-	stvx		v30,r10,$sp
-	stvx		v31,r11,$sp
-	li		r0,-1
-	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
-	li		$x10,0x10
-	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	li		$x20,0x20
-	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	li		$x30,0x30
-	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	li		$x40,0x40
-	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	li		$x50,0x50
-	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	li		$x60,0x60
-	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	li		$x70,0x70
-	mtspr		256,r0
-
-	subi		$rounds,$rounds,3	# -4 in total
-	subi		$len,$len,128		# bias
-
-	lvx		$rndkey0,$x00,$key	# load key schedule
-	lvx		v30,$x10,$key
-	addi		$key,$key,0x20
-	lvx		v31,$x00,$key
-	?vperm		$rndkey0,$rndkey0,v30,$keyperm
-	addi		$key_,$sp,$FRAME+15
-	mtctr		$rounds
-
-Load_cbc_dec_key:
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v30,$x10,$key
-	addi		$key,$key,0x20
-	stvx		v24,$x00,$key_		# off-load round[1]
-	?vperm		v25,v31,v30,$keyperm
-	lvx		v31,$x00,$key
-	stvx		v25,$x10,$key_		# off-load round[2]
-	addi		$key_,$key_,0x20
-	bdnz		Load_cbc_dec_key
-
-	lvx		v26,$x10,$key
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v27,$x20,$key
-	stvx		v24,$x00,$key_		# off-load round[3]
-	?vperm		v25,v31,v26,$keyperm
-	lvx		v28,$x30,$key
-	stvx		v25,$x10,$key_		# off-load round[4]
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	?vperm		v26,v26,v27,$keyperm
-	lvx		v29,$x40,$key
-	?vperm		v27,v27,v28,$keyperm
-	lvx		v30,$x50,$key
-	?vperm		v28,v28,v29,$keyperm
-	lvx		v31,$x60,$key
-	?vperm		v29,v29,v30,$keyperm
-	lvx		$out0,$x70,$key		# borrow $out0
-	?vperm		v30,v30,v31,$keyperm
-	lvx		v24,$x00,$key_		# pre-load round[1]
-	?vperm		v31,v31,$out0,$keyperm
-	lvx		v25,$x10,$key_		# pre-load round[2]
-
-	#lvx		$inptail,0,$inp		# "caller" already did this
-	#addi		$inp,$inp,15		# 15 is not typo
-	subi		$inp,$inp,15		# undo "caller"
-
-	 le?li		$idx,8
-	lvx_u		$in0,$x00,$inp		# load first 8 "words"
-	 le?lvsl	$inpperm,0,$idx
-	 le?vspltisb	$tmp,0x0f
-	lvx_u		$in1,$x10,$inp
-	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
-	lvx_u		$in2,$x20,$inp
-	 le?vperm	$in0,$in0,$in0,$inpperm
-	lvx_u		$in3,$x30,$inp
-	 le?vperm	$in1,$in1,$in1,$inpperm
-	lvx_u		$in4,$x40,$inp
-	 le?vperm	$in2,$in2,$in2,$inpperm
-	vxor		$out0,$in0,$rndkey0
-	lvx_u		$in5,$x50,$inp
-	 le?vperm	$in3,$in3,$in3,$inpperm
-	vxor		$out1,$in1,$rndkey0
-	lvx_u		$in6,$x60,$inp
-	 le?vperm	$in4,$in4,$in4,$inpperm
-	vxor		$out2,$in2,$rndkey0
-	lvx_u		$in7,$x70,$inp
-	addi		$inp,$inp,0x80
-	 le?vperm	$in5,$in5,$in5,$inpperm
-	vxor		$out3,$in3,$rndkey0
-	 le?vperm	$in6,$in6,$in6,$inpperm
-	vxor		$out4,$in4,$rndkey0
-	 le?vperm	$in7,$in7,$in7,$inpperm
-	vxor		$out5,$in5,$rndkey0
-	vxor		$out6,$in6,$rndkey0
-	vxor		$out7,$in7,$rndkey0
-
-	mtctr		$rounds
-	b		Loop_cbc_dec8x
-.align	5
-Loop_cbc_dec8x:
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	vncipher	$out6,$out6,v24
-	vncipher	$out7,$out7,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	vncipher	$out6,$out6,v25
-	vncipher	$out7,$out7,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_cbc_dec8x
-
-	subic		$len,$len,128		# $len-=128
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	vncipher	$out6,$out6,v24
-	vncipher	$out7,$out7,v24
-
-	subfe.		r0,r0,r0		# borrow?-1:0
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	vncipher	$out6,$out6,v25
-	vncipher	$out7,$out7,v25
-
-	and		r0,r0,$len
-	vncipher	$out0,$out0,v26
-	vncipher	$out1,$out1,v26
-	vncipher	$out2,$out2,v26
-	vncipher	$out3,$out3,v26
-	vncipher	$out4,$out4,v26
-	vncipher	$out5,$out5,v26
-	vncipher	$out6,$out6,v26
-	vncipher	$out7,$out7,v26
-
-	add		$inp,$inp,r0		# $inp is adjusted in such
-						# way that at exit from the
-						# loop inX-in7 are loaded
-						# with last "words"
-	vncipher	$out0,$out0,v27
-	vncipher	$out1,$out1,v27
-	vncipher	$out2,$out2,v27
-	vncipher	$out3,$out3,v27
-	vncipher	$out4,$out4,v27
-	vncipher	$out5,$out5,v27
-	vncipher	$out6,$out6,v27
-	vncipher	$out7,$out7,v27
-
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	vncipher	$out0,$out0,v28
-	vncipher	$out1,$out1,v28
-	vncipher	$out2,$out2,v28
-	vncipher	$out3,$out3,v28
-	vncipher	$out4,$out4,v28
-	vncipher	$out5,$out5,v28
-	vncipher	$out6,$out6,v28
-	vncipher	$out7,$out7,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	vncipher	$out0,$out0,v29
-	vncipher	$out1,$out1,v29
-	vncipher	$out2,$out2,v29
-	vncipher	$out3,$out3,v29
-	vncipher	$out4,$out4,v29
-	vncipher	$out5,$out5,v29
-	vncipher	$out6,$out6,v29
-	vncipher	$out7,$out7,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-
-	vncipher	$out0,$out0,v30
-	 vxor		$ivec,$ivec,v31		# xor with last round key
-	vncipher	$out1,$out1,v30
-	 vxor		$in0,$in0,v31
-	vncipher	$out2,$out2,v30
-	 vxor		$in1,$in1,v31
-	vncipher	$out3,$out3,v30
-	 vxor		$in2,$in2,v31
-	vncipher	$out4,$out4,v30
-	 vxor		$in3,$in3,v31
-	vncipher	$out5,$out5,v30
-	 vxor		$in4,$in4,v31
-	vncipher	$out6,$out6,v30
-	 vxor		$in5,$in5,v31
-	vncipher	$out7,$out7,v30
-	 vxor		$in6,$in6,v31
-
-	vncipherlast	$out0,$out0,$ivec
-	vncipherlast	$out1,$out1,$in0
-	 lvx_u		$in0,$x00,$inp		# load next input block
-	vncipherlast	$out2,$out2,$in1
-	 lvx_u		$in1,$x10,$inp
-	vncipherlast	$out3,$out3,$in2
-	 le?vperm	$in0,$in0,$in0,$inpperm
-	 lvx_u		$in2,$x20,$inp
-	vncipherlast	$out4,$out4,$in3
-	 le?vperm	$in1,$in1,$in1,$inpperm
-	 lvx_u		$in3,$x30,$inp
-	vncipherlast	$out5,$out5,$in4
-	 le?vperm	$in2,$in2,$in2,$inpperm
-	 lvx_u		$in4,$x40,$inp
-	vncipherlast	$out6,$out6,$in5
-	 le?vperm	$in3,$in3,$in3,$inpperm
-	 lvx_u		$in5,$x50,$inp
-	vncipherlast	$out7,$out7,$in6
-	 le?vperm	$in4,$in4,$in4,$inpperm
-	 lvx_u		$in6,$x60,$inp
-	vmr		$ivec,$in7
-	 le?vperm	$in5,$in5,$in5,$inpperm
-	 lvx_u		$in7,$x70,$inp
-	 addi		$inp,$inp,0x80
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	 le?vperm	$in6,$in6,$in6,$inpperm
-	 vxor		$out0,$in0,$rndkey0
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	 le?vperm	$in7,$in7,$in7,$inpperm
-	 vxor		$out1,$in1,$rndkey0
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	 vxor		$out2,$in2,$rndkey0
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	 vxor		$out3,$in3,$rndkey0
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x40,$out
-	 vxor		$out4,$in4,$rndkey0
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x50,$out
-	 vxor		$out5,$in5,$rndkey0
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x60,$out
-	 vxor		$out6,$in6,$rndkey0
-	stvx_u		$out7,$x70,$out
-	addi		$out,$out,0x80
-	 vxor		$out7,$in7,$rndkey0
-
-	mtctr		$rounds
-	beq		Loop_cbc_dec8x		# did $len-=128 borrow?
-
-	addic.		$len,$len,128
-	beq		Lcbc_dec8x_done
-	nop
-	nop
-
-Loop_cbc_dec8x_tail:				# up to 7 "words" tail...
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	vncipher	$out6,$out6,v24
-	vncipher	$out7,$out7,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	vncipher	$out6,$out6,v25
-	vncipher	$out7,$out7,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_cbc_dec8x_tail
-
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	vncipher	$out6,$out6,v24
-	vncipher	$out7,$out7,v24
-
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	vncipher	$out6,$out6,v25
-	vncipher	$out7,$out7,v25
-
-	vncipher	$out1,$out1,v26
-	vncipher	$out2,$out2,v26
-	vncipher	$out3,$out3,v26
-	vncipher	$out4,$out4,v26
-	vncipher	$out5,$out5,v26
-	vncipher	$out6,$out6,v26
-	vncipher	$out7,$out7,v26
-
-	vncipher	$out1,$out1,v27
-	vncipher	$out2,$out2,v27
-	vncipher	$out3,$out3,v27
-	vncipher	$out4,$out4,v27
-	vncipher	$out5,$out5,v27
-	vncipher	$out6,$out6,v27
-	vncipher	$out7,$out7,v27
-
-	vncipher	$out1,$out1,v28
-	vncipher	$out2,$out2,v28
-	vncipher	$out3,$out3,v28
-	vncipher	$out4,$out4,v28
-	vncipher	$out5,$out5,v28
-	vncipher	$out6,$out6,v28
-	vncipher	$out7,$out7,v28
-
-	vncipher	$out1,$out1,v29
-	vncipher	$out2,$out2,v29
-	vncipher	$out3,$out3,v29
-	vncipher	$out4,$out4,v29
-	vncipher	$out5,$out5,v29
-	vncipher	$out6,$out6,v29
-	vncipher	$out7,$out7,v29
-
-	vncipher	$out1,$out1,v30
-	 vxor		$ivec,$ivec,v31		# last round key
-	vncipher	$out2,$out2,v30
-	 vxor		$in1,$in1,v31
-	vncipher	$out3,$out3,v30
-	 vxor		$in2,$in2,v31
-	vncipher	$out4,$out4,v30
-	 vxor		$in3,$in3,v31
-	vncipher	$out5,$out5,v30
-	 vxor		$in4,$in4,v31
-	vncipher	$out6,$out6,v30
-	 vxor		$in5,$in5,v31
-	vncipher	$out7,$out7,v30
-	 vxor		$in6,$in6,v31
-
-	cmplwi		$len,32			# switch($len)
-	blt		Lcbc_dec8x_one
-	nop
-	beq		Lcbc_dec8x_two
-	cmplwi		$len,64
-	blt		Lcbc_dec8x_three
-	nop
-	beq		Lcbc_dec8x_four
-	cmplwi		$len,96
-	blt		Lcbc_dec8x_five
-	nop
-	beq		Lcbc_dec8x_six
-
-Lcbc_dec8x_seven:
-	vncipherlast	$out1,$out1,$ivec
-	vncipherlast	$out2,$out2,$in1
-	vncipherlast	$out3,$out3,$in2
-	vncipherlast	$out4,$out4,$in3
-	vncipherlast	$out5,$out5,$in4
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out1,$out1,$out1,$inpperm
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x00,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x10,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x20,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x30,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x40,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x50,$out
-	stvx_u		$out7,$x60,$out
-	addi		$out,$out,0x70
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_six:
-	vncipherlast	$out2,$out2,$ivec
-	vncipherlast	$out3,$out3,$in2
-	vncipherlast	$out4,$out4,$in3
-	vncipherlast	$out5,$out5,$in4
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out2,$out2,$out2,$inpperm
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x00,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x10,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x20,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x30,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x40,$out
-	stvx_u		$out7,$x50,$out
-	addi		$out,$out,0x60
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_five:
-	vncipherlast	$out3,$out3,$ivec
-	vncipherlast	$out4,$out4,$in3
-	vncipherlast	$out5,$out5,$in4
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out3,$out3,$out3,$inpperm
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x00,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x10,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x20,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x30,$out
-	stvx_u		$out7,$x40,$out
-	addi		$out,$out,0x50
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_four:
-	vncipherlast	$out4,$out4,$ivec
-	vncipherlast	$out5,$out5,$in4
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out4,$out4,$out4,$inpperm
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x00,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x10,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x20,$out
-	stvx_u		$out7,$x30,$out
-	addi		$out,$out,0x40
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_three:
-	vncipherlast	$out5,$out5,$ivec
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out5,$out5,$out5,$inpperm
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x00,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x10,$out
-	stvx_u		$out7,$x20,$out
-	addi		$out,$out,0x30
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_two:
-	vncipherlast	$out6,$out6,$ivec
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out6,$out6,$out6,$inpperm
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x00,$out
-	stvx_u		$out7,$x10,$out
-	addi		$out,$out,0x20
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_one:
-	vncipherlast	$out7,$out7,$ivec
-	vmr		$ivec,$in7
-
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out7,0,$out
-	addi		$out,$out,0x10
-
-Lcbc_dec8x_done:
-	le?vperm	$ivec,$ivec,$ivec,$inpperm
-	stvx_u		$ivec,0,$ivp		# write [unaligned] iv
-
-	li		r10,`$FRAME+15`
-	li		r11,`$FRAME+31`
-	stvx		$inpperm,r10,$sp	# wipe copies of round keys
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-
-	mtspr		256,$vrsave
-	lvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	lvx		v21,r11,$sp
-	addi		r11,r11,32
-	lvx		v22,r10,$sp
-	addi		r10,r10,32
-	lvx		v23,r11,$sp
-	addi		r11,r11,32
-	lvx		v24,r10,$sp
-	addi		r10,r10,32
-	lvx		v25,r11,$sp
-	addi		r11,r11,32
-	lvx		v26,r10,$sp
-	addi		r10,r10,32
-	lvx		v27,r11,$sp
-	addi		r11,r11,32
-	lvx		v28,r10,$sp
-	addi		r10,r10,32
-	lvx		v29,r11,$sp
-	addi		r11,r11,32
-	lvx		v30,r10,$sp
-	lvx		v31,r11,$sp
-	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0x80,6,6,0
-	.long		0
-.size	.${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
-___
-}}	}}}
-
-#########################################################################
-{{{	# CTR procedure[s]						#
-
-####################### WARNING: Here be dragons! #######################
-#
-# This code is written as 'ctr32', based on a 32-bit counter used
-# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
-# a 128-bit counter.
-#
-# This leads to subtle changes from the upstream code: the counter
-# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
-# both the bulk (8 blocks at a time) path, and in the individual block
-# path. Be aware of this when doing updates.
-#
-# See:
-# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
-# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
-# https://github.com/openssl/openssl/pull/8942
-#
-#########################################################################
-my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
-						map("v$_",(4..11));
-my $dat=$tmp;
-
-$code.=<<___;
-.globl	.${prefix}_ctr32_encrypt_blocks
-	${UCMP}i	$len,1
-	bltlr-
-
-	lis		r0,0xfff0
-	mfspr		$vrsave,256
-	mtspr		256,r0
-
-	li		$idx,15
-	vxor		$rndkey0,$rndkey0,$rndkey0
-	le?vspltisb	$tmp,0x0f
-
-	lvx		$ivec,0,$ivp		# load [unaligned] iv
-	lvsl		$inpperm,0,$ivp
-	lvx		$inptail,$idx,$ivp
-	 vspltisb	$one,1
-	le?vxor		$inpperm,$inpperm,$tmp
-	vperm		$ivec,$ivec,$inptail,$inpperm
-	 vsldoi		$one,$rndkey0,$one,1
-
-	neg		r11,$inp
-	?lvsl		$keyperm,0,$key		# prepare for unaligned key
-	lwz		$rounds,240($key)
-
-	lvsr		$inpperm,0,r11		# prepare for unaligned load
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,15		# 15 is not typo
-	le?vxor		$inpperm,$inpperm,$tmp
-
-	srwi		$rounds,$rounds,1
-	li		$idx,16
-	subi		$rounds,$rounds,1
-
-	${UCMP}i	$len,8
-	bge		_aesp8_ctr32_encrypt8x
-
-	?lvsr		$outperm,0,$out		# prepare for unaligned store
-	vspltisb	$outmask,-1
-	lvx		$outhead,0,$out
-	?vperm		$outmask,$rndkey0,$outmask,$outperm
-	le?vxor		$outperm,$outperm,$tmp
-
-	lvx		$rndkey0,0,$key
-	mtctr		$rounds
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$ivec,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	b		Loop_ctr32_enc
-
-.align	5
-Loop_ctr32_enc:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	bdnz		Loop_ctr32_enc
-
-	vadduqm		$ivec,$ivec,$one	# Kernel change for 128-bit
-	 vmr		$dat,$inptail
-	 lvx		$inptail,0,$inp
-	 addi		$inp,$inp,16
-	 subic.		$len,$len,1		# blocks--
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	 vperm		$dat,$dat,$inptail,$inpperm
-	 li		$idx,16
-	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
-	 lvx		$rndkey0,0,$key
-	vxor		$dat,$dat,$rndkey1	# last round key
-	vcipherlast	$inout,$inout,$dat
-
-	 lvx		$rndkey1,$idx,$key
-	 addi		$idx,$idx,16
-	vperm		$inout,$inout,$inout,$outperm
-	vsel		$dat,$outhead,$inout,$outmask
-	 mtctr		$rounds
-	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vmr		$outhead,$inout
-	 vxor		$inout,$ivec,$rndkey0
-	 lvx		$rndkey0,$idx,$key
-	 addi		$idx,$idx,16
-	stvx		$dat,0,$out
-	addi		$out,$out,16
-	bne		Loop_ctr32_enc
-
-	addi		$out,$out,-1
-	lvx		$inout,0,$out		# redundant in aligned case
-	vsel		$inout,$outhead,$inout,$outmask
-	stvx		$inout,0,$out
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,6,0
-	.long		0
-___
-#########################################################################
-{{	# Optimized CTR procedure					#
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
-my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
-			# v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
-my ($two,$three,$four)=($outhead,$outperm,$outmask);
-
-$code.=<<___;
-.align	5
-_aesp8_ctr32_encrypt8x:
-	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-	li		r10,`$FRAME+8*16+15`
-	li		r11,`$FRAME+8*16+31`
-	stvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	stvx		v21,r11,$sp
-	addi		r11,r11,32
-	stvx		v22,r10,$sp
-	addi		r10,r10,32
-	stvx		v23,r11,$sp
-	addi		r11,r11,32
-	stvx		v24,r10,$sp
-	addi		r10,r10,32
-	stvx		v25,r11,$sp
-	addi		r11,r11,32
-	stvx		v26,r10,$sp
-	addi		r10,r10,32
-	stvx		v27,r11,$sp
-	addi		r11,r11,32
-	stvx		v28,r10,$sp
-	addi		r10,r10,32
-	stvx		v29,r11,$sp
-	addi		r11,r11,32
-	stvx		v30,r10,$sp
-	stvx		v31,r11,$sp
-	li		r0,-1
-	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
-	li		$x10,0x10
-	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	li		$x20,0x20
-	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	li		$x30,0x30
-	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	li		$x40,0x40
-	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	li		$x50,0x50
-	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	li		$x60,0x60
-	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	li		$x70,0x70
-	mtspr		256,r0
-
-	subi		$rounds,$rounds,3	# -4 in total
-
-	lvx		$rndkey0,$x00,$key	# load key schedule
-	lvx		v30,$x10,$key
-	addi		$key,$key,0x20
-	lvx		v31,$x00,$key
-	?vperm		$rndkey0,$rndkey0,v30,$keyperm
-	addi		$key_,$sp,$FRAME+15
-	mtctr		$rounds
-
-Load_ctr32_enc_key:
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v30,$x10,$key
-	addi		$key,$key,0x20
-	stvx		v24,$x00,$key_		# off-load round[1]
-	?vperm		v25,v31,v30,$keyperm
-	lvx		v31,$x00,$key
-	stvx		v25,$x10,$key_		# off-load round[2]
-	addi		$key_,$key_,0x20
-	bdnz		Load_ctr32_enc_key
-
-	lvx		v26,$x10,$key
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v27,$x20,$key
-	stvx		v24,$x00,$key_		# off-load round[3]
-	?vperm		v25,v31,v26,$keyperm
-	lvx		v28,$x30,$key
-	stvx		v25,$x10,$key_		# off-load round[4]
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	?vperm		v26,v26,v27,$keyperm
-	lvx		v29,$x40,$key
-	?vperm		v27,v27,v28,$keyperm
-	lvx		v30,$x50,$key
-	?vperm		v28,v28,v29,$keyperm
-	lvx		v31,$x60,$key
-	?vperm		v29,v29,v30,$keyperm
-	lvx		$out0,$x70,$key		# borrow $out0
-	?vperm		v30,v30,v31,$keyperm
-	lvx		v24,$x00,$key_		# pre-load round[1]
-	?vperm		v31,v31,$out0,$keyperm
-	lvx		v25,$x10,$key_		# pre-load round[2]
-
-	vadduqm		$two,$one,$one
-	subi		$inp,$inp,15		# undo "caller"
-	$SHL		$len,$len,4
-
-	vadduqm		$out1,$ivec,$one	# counter values ...
-	vadduqm		$out2,$ivec,$two	# (do all ctr adds as 128-bit)
-	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
-	 le?li		$idx,8
-	vadduqm		$out3,$out1,$two
-	vxor		$out1,$out1,$rndkey0
-	 le?lvsl	$inpperm,0,$idx
-	vadduqm		$out4,$out2,$two
-	vxor		$out2,$out2,$rndkey0
-	 le?vspltisb	$tmp,0x0f
-	vadduqm		$out5,$out3,$two
-	vxor		$out3,$out3,$rndkey0
-	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
-	vadduqm		$out6,$out4,$two
-	vxor		$out4,$out4,$rndkey0
-	vadduqm		$out7,$out5,$two
-	vxor		$out5,$out5,$rndkey0
-	vadduqm		$ivec,$out6,$two	# next counter value
-	vxor		$out6,$out6,$rndkey0
-	vxor		$out7,$out7,$rndkey0
-
-	mtctr		$rounds
-	b		Loop_ctr32_enc8x
-.align	5
-Loop_ctr32_enc8x:
-	vcipher 	$out0,$out0,v24
-	vcipher 	$out1,$out1,v24
-	vcipher 	$out2,$out2,v24
-	vcipher 	$out3,$out3,v24
-	vcipher 	$out4,$out4,v24
-	vcipher 	$out5,$out5,v24
-	vcipher 	$out6,$out6,v24
-	vcipher 	$out7,$out7,v24
-Loop_ctr32_enc8x_middle:
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vcipher 	$out0,$out0,v25
-	vcipher 	$out1,$out1,v25
-	vcipher 	$out2,$out2,v25
-	vcipher 	$out3,$out3,v25
-	vcipher 	$out4,$out4,v25
-	vcipher 	$out5,$out5,v25
-	vcipher 	$out6,$out6,v25
-	vcipher 	$out7,$out7,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_ctr32_enc8x
-
-	subic		r11,$len,256		# $len-256, borrow $key_
-	vcipher 	$out0,$out0,v24
-	vcipher 	$out1,$out1,v24
-	vcipher 	$out2,$out2,v24
-	vcipher 	$out3,$out3,v24
-	vcipher 	$out4,$out4,v24
-	vcipher 	$out5,$out5,v24
-	vcipher 	$out6,$out6,v24
-	vcipher 	$out7,$out7,v24
-
-	subfe		r0,r0,r0		# borrow?-1:0
-	vcipher 	$out0,$out0,v25
-	vcipher 	$out1,$out1,v25
-	vcipher 	$out2,$out2,v25
-	vcipher 	$out3,$out3,v25
-	vcipher 	$out4,$out4,v25
-	vcipher		$out5,$out5,v25
-	vcipher		$out6,$out6,v25
-	vcipher		$out7,$out7,v25
-
-	and		r0,r0,r11
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	vcipher		$out0,$out0,v26
-	vcipher		$out1,$out1,v26
-	vcipher		$out2,$out2,v26
-	vcipher		$out3,$out3,v26
-	vcipher		$out4,$out4,v26
-	vcipher		$out5,$out5,v26
-	vcipher		$out6,$out6,v26
-	vcipher		$out7,$out7,v26
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	subic		$len,$len,129		# $len-=129
-	vcipher		$out0,$out0,v27
-	addi		$len,$len,1		# $len-=128 really
-	vcipher		$out1,$out1,v27
-	vcipher		$out2,$out2,v27
-	vcipher		$out3,$out3,v27
-	vcipher		$out4,$out4,v27
-	vcipher		$out5,$out5,v27
-	vcipher		$out6,$out6,v27
-	vcipher		$out7,$out7,v27
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-
-	vcipher		$out0,$out0,v28
-	 lvx_u		$in0,$x00,$inp		# load input
-	vcipher		$out1,$out1,v28
-	 lvx_u		$in1,$x10,$inp
-	vcipher		$out2,$out2,v28
-	 lvx_u		$in2,$x20,$inp
-	vcipher		$out3,$out3,v28
-	 lvx_u		$in3,$x30,$inp
-	vcipher		$out4,$out4,v28
-	 lvx_u		$in4,$x40,$inp
-	vcipher		$out5,$out5,v28
-	 lvx_u		$in5,$x50,$inp
-	vcipher		$out6,$out6,v28
-	 lvx_u		$in6,$x60,$inp
-	vcipher		$out7,$out7,v28
-	 lvx_u		$in7,$x70,$inp
-	 addi		$inp,$inp,0x80
-
-	vcipher		$out0,$out0,v29
-	 le?vperm	$in0,$in0,$in0,$inpperm
-	vcipher		$out1,$out1,v29
-	 le?vperm	$in1,$in1,$in1,$inpperm
-	vcipher		$out2,$out2,v29
-	 le?vperm	$in2,$in2,$in2,$inpperm
-	vcipher		$out3,$out3,v29
-	 le?vperm	$in3,$in3,$in3,$inpperm
-	vcipher		$out4,$out4,v29
-	 le?vperm	$in4,$in4,$in4,$inpperm
-	vcipher		$out5,$out5,v29
-	 le?vperm	$in5,$in5,$in5,$inpperm
-	vcipher		$out6,$out6,v29
-	 le?vperm	$in6,$in6,$in6,$inpperm
-	vcipher		$out7,$out7,v29
-	 le?vperm	$in7,$in7,$in7,$inpperm
-
-	add		$inp,$inp,r0		# $inp is adjusted in such
-						# way that at exit from the
-						# loop inX-in7 are loaded
-						# with last "words"
-	subfe.		r0,r0,r0		# borrow?-1:0
-	vcipher		$out0,$out0,v30
-	 vxor		$in0,$in0,v31		# xor with last round key
-	vcipher		$out1,$out1,v30
-	 vxor		$in1,$in1,v31
-	vcipher		$out2,$out2,v30
-	 vxor		$in2,$in2,v31
-	vcipher		$out3,$out3,v30
-	 vxor		$in3,$in3,v31
-	vcipher		$out4,$out4,v30
-	 vxor		$in4,$in4,v31
-	vcipher		$out5,$out5,v30
-	 vxor		$in5,$in5,v31
-	vcipher		$out6,$out6,v30
-	 vxor		$in6,$in6,v31
-	vcipher		$out7,$out7,v30
-	 vxor		$in7,$in7,v31
-
-	bne		Lctr32_enc8x_break	# did $len-129 borrow?
-
-	vcipherlast	$in0,$out0,$in0
-	vcipherlast	$in1,$out1,$in1
-	 vadduqm	$out1,$ivec,$one	# counter values ...
-	vcipherlast	$in2,$out2,$in2
-	 vadduqm	$out2,$ivec,$two
-	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
-	vcipherlast	$in3,$out3,$in3
-	 vadduqm	$out3,$out1,$two
-	 vxor		$out1,$out1,$rndkey0
-	vcipherlast	$in4,$out4,$in4
-	 vadduqm	$out4,$out2,$two
-	 vxor		$out2,$out2,$rndkey0
-	vcipherlast	$in5,$out5,$in5
-	 vadduqm	$out5,$out3,$two
-	 vxor		$out3,$out3,$rndkey0
-	vcipherlast	$in6,$out6,$in6
-	 vadduqm	$out6,$out4,$two
-	 vxor		$out4,$out4,$rndkey0
-	vcipherlast	$in7,$out7,$in7
-	 vadduqm	$out7,$out5,$two
-	 vxor		$out5,$out5,$rndkey0
-	le?vperm	$in0,$in0,$in0,$inpperm
-	 vadduqm	$ivec,$out6,$two	# next counter value
-	 vxor		$out6,$out6,$rndkey0
-	le?vperm	$in1,$in1,$in1,$inpperm
-	 vxor		$out7,$out7,$rndkey0
-	mtctr		$rounds
-
-	 vcipher	$out0,$out0,v24
-	stvx_u		$in0,$x00,$out
-	le?vperm	$in2,$in2,$in2,$inpperm
-	 vcipher	$out1,$out1,v24
-	stvx_u		$in1,$x10,$out
-	le?vperm	$in3,$in3,$in3,$inpperm
-	 vcipher	$out2,$out2,v24
-	stvx_u		$in2,$x20,$out
-	le?vperm	$in4,$in4,$in4,$inpperm
-	 vcipher	$out3,$out3,v24
-	stvx_u		$in3,$x30,$out
-	le?vperm	$in5,$in5,$in5,$inpperm
-	 vcipher	$out4,$out4,v24
-	stvx_u		$in4,$x40,$out
-	le?vperm	$in6,$in6,$in6,$inpperm
-	 vcipher	$out5,$out5,v24
-	stvx_u		$in5,$x50,$out
-	le?vperm	$in7,$in7,$in7,$inpperm
-	 vcipher	$out6,$out6,v24
-	stvx_u		$in6,$x60,$out
-	 vcipher	$out7,$out7,v24
-	stvx_u		$in7,$x70,$out
-	addi		$out,$out,0x80
-
-	b		Loop_ctr32_enc8x_middle
-
-.align	5
-Lctr32_enc8x_break:
-	cmpwi		$len,-0x60
-	blt		Lctr32_enc8x_one
-	nop
-	beq		Lctr32_enc8x_two
-	cmpwi		$len,-0x40
-	blt		Lctr32_enc8x_three
-	nop
-	beq		Lctr32_enc8x_four
-	cmpwi		$len,-0x20
-	blt		Lctr32_enc8x_five
-	nop
-	beq		Lctr32_enc8x_six
-	cmpwi		$len,0x00
-	blt		Lctr32_enc8x_seven
-
-Lctr32_enc8x_eight:
-	vcipherlast	$out0,$out0,$in0
-	vcipherlast	$out1,$out1,$in1
-	vcipherlast	$out2,$out2,$in2
-	vcipherlast	$out3,$out3,$in3
-	vcipherlast	$out4,$out4,$in4
-	vcipherlast	$out5,$out5,$in5
-	vcipherlast	$out6,$out6,$in6
-	vcipherlast	$out7,$out7,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x40,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x50,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x60,$out
-	stvx_u		$out7,$x70,$out
-	addi		$out,$out,0x80
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_seven:
-	vcipherlast	$out0,$out0,$in1
-	vcipherlast	$out1,$out1,$in2
-	vcipherlast	$out2,$out2,$in3
-	vcipherlast	$out3,$out3,$in4
-	vcipherlast	$out4,$out4,$in5
-	vcipherlast	$out5,$out5,$in6
-	vcipherlast	$out6,$out6,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x40,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x50,$out
-	stvx_u		$out6,$x60,$out
-	addi		$out,$out,0x70
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_six:
-	vcipherlast	$out0,$out0,$in2
-	vcipherlast	$out1,$out1,$in3
-	vcipherlast	$out2,$out2,$in4
-	vcipherlast	$out3,$out3,$in5
-	vcipherlast	$out4,$out4,$in6
-	vcipherlast	$out5,$out5,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x40,$out
-	stvx_u		$out5,$x50,$out
-	addi		$out,$out,0x60
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_five:
-	vcipherlast	$out0,$out0,$in3
-	vcipherlast	$out1,$out1,$in4
-	vcipherlast	$out2,$out2,$in5
-	vcipherlast	$out3,$out3,$in6
-	vcipherlast	$out4,$out4,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	stvx_u		$out4,$x40,$out
-	addi		$out,$out,0x50
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_four:
-	vcipherlast	$out0,$out0,$in4
-	vcipherlast	$out1,$out1,$in5
-	vcipherlast	$out2,$out2,$in6
-	vcipherlast	$out3,$out3,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	stvx_u		$out3,$x30,$out
-	addi		$out,$out,0x40
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_three:
-	vcipherlast	$out0,$out0,$in5
-	vcipherlast	$out1,$out1,$in6
-	vcipherlast	$out2,$out2,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	stvx_u		$out2,$x20,$out
-	addi		$out,$out,0x30
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_two:
-	vcipherlast	$out0,$out0,$in6
-	vcipherlast	$out1,$out1,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	stvx_u		$out1,$x10,$out
-	addi		$out,$out,0x20
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_one:
-	vcipherlast	$out0,$out0,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	stvx_u		$out0,0,$out
-	addi		$out,$out,0x10
-
-Lctr32_enc8x_done:
-	li		r10,`$FRAME+15`
-	li		r11,`$FRAME+31`
-	stvx		$inpperm,r10,$sp	# wipe copies of round keys
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-
-	mtspr		256,$vrsave
-	lvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	lvx		v21,r11,$sp
-	addi		r11,r11,32
-	lvx		v22,r10,$sp
-	addi		r10,r10,32
-	lvx		v23,r11,$sp
-	addi		r11,r11,32
-	lvx		v24,r10,$sp
-	addi		r10,r10,32
-	lvx		v25,r11,$sp
-	addi		r11,r11,32
-	lvx		v26,r10,$sp
-	addi		r10,r10,32
-	lvx		v27,r11,$sp
-	addi		r11,r11,32
-	lvx		v28,r10,$sp
-	addi		r10,r10,32
-	lvx		v29,r11,$sp
-	addi		r11,r11,32
-	lvx		v30,r10,$sp
-	lvx		v31,r11,$sp
-	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0x80,6,6,0
-	.long		0
-.size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
-___
-}}	}}}
-
-#########################################################################
-{{{	# XTS procedures						#
-# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,	#
-#                             const AES_KEY *key1, const AES_KEY *key2,	#
-#                             [const] unsigned char iv[16]);		#
-# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which	#
-# input tweak value is assumed to be encrypted already, and last tweak	#
-# value, one suitable for consecutive call on same chunk of data, is	#
-# written back to original buffer. In addition, in "tweak chaining"	#
-# mode only complete input blocks are processed.			#
-
-my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout) =				map("v$_",(0..2));
-my ($output,$inptail,$inpperm,$leperm,$keyperm) =	map("v$_",(3..7));
-my ($tweak,$seven,$eighty7,$tmp,$tweak1) =		map("v$_",(8..12));
-my $taillen = $key2;
-
-   ($inp,$idx) = ($idx,$inp);				# reassign
-
-$code.=<<___;
-.globl	.${prefix}_xts_encrypt
-	mr		$inp,r3				# reassign
-	li		r3,-1
-	${UCMP}i	$len,16
-	bltlr-
-
-	lis		r0,0xfff0
-	mfspr		r12,256				# save vrsave
-	li		r11,0
-	mtspr		256,r0
-
-	vspltisb	$seven,0x07			# 0x070707..07
-	le?lvsl		$leperm,r11,r11
-	le?vspltisb	$tmp,0x0f
-	le?vxor		$leperm,$leperm,$seven
-
-	li		$idx,15
-	lvx		$tweak,0,$ivp			# load [unaligned] iv
-	lvsl		$inpperm,0,$ivp
-	lvx		$inptail,$idx,$ivp
-	le?vxor		$inpperm,$inpperm,$tmp
-	vperm		$tweak,$tweak,$inptail,$inpperm
-
-	neg		r11,$inp
-	lvsr		$inpperm,0,r11			# prepare for unaligned load
-	lvx		$inout,0,$inp
-	addi		$inp,$inp,15			# 15 is not typo
-	le?vxor		$inpperm,$inpperm,$tmp
-
-	${UCMP}i	$key2,0				# key2==NULL?
-	beq		Lxts_enc_no_key2
-
-	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
-	lwz		$rounds,240($key2)
-	srwi		$rounds,$rounds,1
-	subi		$rounds,$rounds,1
-	li		$idx,16
-
-	lvx		$rndkey0,0,$key2
-	lvx		$rndkey1,$idx,$key2
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$tweak,$tweak,$rndkey0
-	lvx		$rndkey0,$idx,$key2
-	addi		$idx,$idx,16
-	mtctr		$rounds
-
-Ltweak_xts_enc:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$tweak,$tweak,$rndkey1
-	lvx		$rndkey1,$idx,$key2
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$tweak,$tweak,$rndkey0
-	lvx		$rndkey0,$idx,$key2
-	addi		$idx,$idx,16
-	bdnz		Ltweak_xts_enc
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$tweak,$tweak,$rndkey1
-	lvx		$rndkey1,$idx,$key2
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipherlast	$tweak,$tweak,$rndkey0
-
-	li		$ivp,0				# don't chain the tweak
-	b		Lxts_enc
-
-Lxts_enc_no_key2:
-	li		$idx,-16
-	and		$len,$len,$idx			# in "tweak chaining"
-							# mode only complete
-							# blocks are processed
-Lxts_enc:
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-
-	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
-	lwz		$rounds,240($key1)
-	srwi		$rounds,$rounds,1
-	subi		$rounds,$rounds,1
-	li		$idx,16
-
-	vslb		$eighty7,$seven,$seven		# 0x808080..80
-	vor		$eighty7,$eighty7,$seven	# 0x878787..87
-	vspltisb	$tmp,1				# 0x010101..01
-	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
-
-	${UCMP}i	$len,96
-	bge		_aesp8_xts_encrypt6x
-
-	andi.		$taillen,$len,15
-	subic		r0,$len,32
-	subi		$taillen,$taillen,16
-	subfe		r0,r0,r0
-	and		r0,r0,$taillen
-	add		$inp,$inp,r0
-
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$tweak
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	mtctr		$rounds
-	b		Loop_xts_enc
-
-.align	5
-Loop_xts_enc:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	bdnz		Loop_xts_enc
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$rndkey0,$rndkey0,$tweak
-	vcipherlast	$output,$inout,$rndkey0
-
-	le?vperm	$tmp,$output,$output,$leperm
-	be?nop
-	le?stvx_u	$tmp,0,$out
-	be?stvx_u	$output,0,$out
-	addi		$out,$out,16
-
-	subic.		$len,$len,16
-	beq		Lxts_enc_done
-
-	vmr		$inout,$inptail
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-
-	subic		r0,$len,32
-	subfe		r0,r0,r0
-	and		r0,r0,$taillen
-	add		$inp,$inp,r0
-
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak,$tweak,$tmp
-
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$tweak
-	vxor		$output,$output,$rndkey0	# just in case $len<16
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-
-	mtctr		$rounds
-	${UCMP}i	$len,16
-	bge		Loop_xts_enc
-
-	vxor		$output,$output,$tweak
-	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
-	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
-	vspltisb	$tmp,-1
-	vperm		$inptail,$inptail,$tmp,$inpperm
-	vsel		$inout,$inout,$output,$inptail
-
-	subi		r11,$out,17
-	subi		$out,$out,16
-	mtctr		$len
-	li		$len,16
-Loop_xts_enc_steal:
-	lbzu		r0,1(r11)
-	stb		r0,16(r11)
-	bdnz		Loop_xts_enc_steal
-
-	mtctr		$rounds
-	b		Loop_xts_enc			# one more time...
-
-Lxts_enc_done:
-	${UCMP}i	$ivp,0
-	beq		Lxts_enc_ret
-
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak,$tweak,$tmp
-
-	le?vperm	$tweak,$tweak,$tweak,$leperm
-	stvx_u		$tweak,0,$ivp
-
-Lxts_enc_ret:
-	mtspr		256,r12				# restore vrsave
-	li		r3,0
-	blr
-	.long		0
-	.byte		0,12,0x04,0,0x80,6,6,0
-	.long		0
-.size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
-
-.globl	.${prefix}_xts_decrypt
-	mr		$inp,r3				# reassign
-	li		r3,-1
-	${UCMP}i	$len,16
-	bltlr-
-
-	lis		r0,0xfff8
-	mfspr		r12,256				# save vrsave
-	li		r11,0
-	mtspr		256,r0
-
-	andi.		r0,$len,15
-	neg		r0,r0
-	andi.		r0,r0,16
-	sub		$len,$len,r0
-
-	vspltisb	$seven,0x07			# 0x070707..07
-	le?lvsl		$leperm,r11,r11
-	le?vspltisb	$tmp,0x0f
-	le?vxor		$leperm,$leperm,$seven
-
-	li		$idx,15
-	lvx		$tweak,0,$ivp			# load [unaligned] iv
-	lvsl		$inpperm,0,$ivp
-	lvx		$inptail,$idx,$ivp
-	le?vxor		$inpperm,$inpperm,$tmp
-	vperm		$tweak,$tweak,$inptail,$inpperm
-
-	neg		r11,$inp
-	lvsr		$inpperm,0,r11			# prepare for unaligned load
-	lvx		$inout,0,$inp
-	addi		$inp,$inp,15			# 15 is not typo
-	le?vxor		$inpperm,$inpperm,$tmp
-
-	${UCMP}i	$key2,0				# key2==NULL?
-	beq		Lxts_dec_no_key2
-
-	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
-	lwz		$rounds,240($key2)
-	srwi		$rounds,$rounds,1
-	subi		$rounds,$rounds,1
-	li		$idx,16
-
-	lvx		$rndkey0,0,$key2
-	lvx		$rndkey1,$idx,$key2
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$tweak,$tweak,$rndkey0
-	lvx		$rndkey0,$idx,$key2
-	addi		$idx,$idx,16
-	mtctr		$rounds
-
-Ltweak_xts_dec:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$tweak,$tweak,$rndkey1
-	lvx		$rndkey1,$idx,$key2
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$tweak,$tweak,$rndkey0
-	lvx		$rndkey0,$idx,$key2
-	addi		$idx,$idx,16
-	bdnz		Ltweak_xts_dec
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$tweak,$tweak,$rndkey1
-	lvx		$rndkey1,$idx,$key2
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipherlast	$tweak,$tweak,$rndkey0
-
-	li		$ivp,0				# don't chain the tweak
-	b		Lxts_dec
-
-Lxts_dec_no_key2:
-	neg		$idx,$len
-	andi.		$idx,$idx,15
-	add		$len,$len,$idx			# in "tweak chaining"
-							# mode only complete
-							# blocks are processed
-Lxts_dec:
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-
-	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
-	lwz		$rounds,240($key1)
-	srwi		$rounds,$rounds,1
-	subi		$rounds,$rounds,1
-	li		$idx,16
-
-	vslb		$eighty7,$seven,$seven		# 0x808080..80
-	vor		$eighty7,$eighty7,$seven	# 0x878787..87
-	vspltisb	$tmp,1				# 0x010101..01
-	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
-
-	${UCMP}i	$len,96
-	bge		_aesp8_xts_decrypt6x
-
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$tweak
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	mtctr		$rounds
-
-	${UCMP}i	$len,16
-	blt		Ltail_xts_dec
-	be?b		Loop_xts_dec
-
-.align	5
-Loop_xts_dec:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vncipher	$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	bdnz		Loop_xts_dec
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$rndkey0,$rndkey0,$tweak
-	vncipherlast	$output,$inout,$rndkey0
-
-	le?vperm	$tmp,$output,$output,$leperm
-	be?nop
-	le?stvx_u	$tmp,0,$out
-	be?stvx_u	$output,0,$out
-	addi		$out,$out,16
-
-	subic.		$len,$len,16
-	beq		Lxts_dec_done
-
-	vmr		$inout,$inptail
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak,$tweak,$tmp
-
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$tweak
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-
-	mtctr		$rounds
-	${UCMP}i	$len,16
-	bge		Loop_xts_dec
-
-Ltail_xts_dec:
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak1,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak1,$tweak1,$tmp
-
-	subi		$inp,$inp,16
-	add		$inp,$inp,$len
-
-	vxor		$inout,$inout,$tweak		# :-(
-	vxor		$inout,$inout,$tweak1		# :-)
-
-Loop_xts_dec_short:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vncipher	$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	bdnz		Loop_xts_dec_short
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$rndkey0,$rndkey0,$tweak1
-	vncipherlast	$output,$inout,$rndkey0
-
-	le?vperm	$tmp,$output,$output,$leperm
-	be?nop
-	le?stvx_u	$tmp,0,$out
-	be?stvx_u	$output,0,$out
-
-	vmr		$inout,$inptail
-	lvx		$inptail,0,$inp
-	#addi		$inp,$inp,16
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-
-	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
-	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
-	vspltisb	$tmp,-1
-	vperm		$inptail,$inptail,$tmp,$inpperm
-	vsel		$inout,$inout,$output,$inptail
-
-	vxor		$rndkey0,$rndkey0,$tweak
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-
-	subi		r11,$out,1
-	mtctr		$len
-	li		$len,16
-Loop_xts_dec_steal:
-	lbzu		r0,1(r11)
-	stb		r0,16(r11)
-	bdnz		Loop_xts_dec_steal
-
-	mtctr		$rounds
-	b		Loop_xts_dec			# one more time...
-
-Lxts_dec_done:
-	${UCMP}i	$ivp,0
-	beq		Lxts_dec_ret
-
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak,$tweak,$tmp
-
-	le?vperm	$tweak,$tweak,$tweak,$leperm
-	stvx_u		$tweak,0,$ivp
-
-Lxts_dec_ret:
-	mtspr		256,r12				# restore vrsave
-	li		r3,0
-	blr
-	.long		0
-	.byte		0,12,0x04,0,0x80,6,6,0
-	.long		0
-.size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
-___
-#########################################################################
-{{	# Optimized XTS procedures					#
-my $key_=$key2;
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
-    $x00=0 if ($flavour =~ /osx/);
-my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
-my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
-my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
-my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
-			# v26-v31 last 6 round keys
-my ($keyperm)=($out0);	# aliases with "caller", redundant assignment
-my $taillen=$x70;
-
-$code.=<<___;
-.align	5
-_aesp8_xts_encrypt6x:
-	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-	mflr		r11
-	li		r7,`$FRAME+8*16+15`
-	li		r3,`$FRAME+8*16+31`
-	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
-	stvx		v20,r7,$sp		# ABI says so
-	addi		r7,r7,32
-	stvx		v21,r3,$sp
-	addi		r3,r3,32
-	stvx		v22,r7,$sp
-	addi		r7,r7,32
-	stvx		v23,r3,$sp
-	addi		r3,r3,32
-	stvx		v24,r7,$sp
-	addi		r7,r7,32
-	stvx		v25,r3,$sp
-	addi		r3,r3,32
-	stvx		v26,r7,$sp
-	addi		r7,r7,32
-	stvx		v27,r3,$sp
-	addi		r3,r3,32
-	stvx		v28,r7,$sp
-	addi		r7,r7,32
-	stvx		v29,r3,$sp
-	addi		r3,r3,32
-	stvx		v30,r7,$sp
-	stvx		v31,r3,$sp
-	li		r0,-1
-	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
-	li		$x10,0x10
-	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	li		$x20,0x20
-	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	li		$x30,0x30
-	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	li		$x40,0x40
-	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	li		$x50,0x50
-	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	li		$x60,0x60
-	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	li		$x70,0x70
-	mtspr		256,r0
-
-	subi		$rounds,$rounds,3	# -4 in total
-
-	lvx		$rndkey0,$x00,$key1	# load key schedule
-	lvx		v30,$x10,$key1
-	addi		$key1,$key1,0x20
-	lvx		v31,$x00,$key1
-	?vperm		$rndkey0,$rndkey0,v30,$keyperm
-	addi		$key_,$sp,$FRAME+15
-	mtctr		$rounds
-
-Load_xts_enc_key:
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v30,$x10,$key1
-	addi		$key1,$key1,0x20
-	stvx		v24,$x00,$key_		# off-load round[1]
-	?vperm		v25,v31,v30,$keyperm
-	lvx		v31,$x00,$key1
-	stvx		v25,$x10,$key_		# off-load round[2]
-	addi		$key_,$key_,0x20
-	bdnz		Load_xts_enc_key
-
-	lvx		v26,$x10,$key1
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v27,$x20,$key1
-	stvx		v24,$x00,$key_		# off-load round[3]
-	?vperm		v25,v31,v26,$keyperm
-	lvx		v28,$x30,$key1
-	stvx		v25,$x10,$key_		# off-load round[4]
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	?vperm		v26,v26,v27,$keyperm
-	lvx		v29,$x40,$key1
-	?vperm		v27,v27,v28,$keyperm
-	lvx		v30,$x50,$key1
-	?vperm		v28,v28,v29,$keyperm
-	lvx		v31,$x60,$key1
-	?vperm		v29,v29,v30,$keyperm
-	lvx		$twk5,$x70,$key1	# borrow $twk5
-	?vperm		v30,v30,v31,$keyperm
-	lvx		v24,$x00,$key_		# pre-load round[1]
-	?vperm		v31,v31,$twk5,$keyperm
-	lvx		v25,$x10,$key_		# pre-load round[2]
-
-	 vperm		$in0,$inout,$inptail,$inpperm
-	 subi		$inp,$inp,31		# undo "caller"
-	vxor		$twk0,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out0,$in0,$twk0
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in1,$x10,$inp
-	vxor		$twk1,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in1,$in1,$in1,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out1,$in1,$twk1
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in2,$x20,$inp
-	 andi.		$taillen,$len,15
-	vxor		$twk2,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in2,$in2,$in2,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out2,$in2,$twk2
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in3,$x30,$inp
-	 sub		$len,$len,$taillen
-	vxor		$twk3,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in3,$in3,$in3,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out3,$in3,$twk3
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in4,$x40,$inp
-	 subi		$len,$len,0x60
-	vxor		$twk4,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in4,$in4,$in4,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out4,$in4,$twk4
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in5,$x50,$inp
-	 addi		$inp,$inp,0x60
-	vxor		$twk5,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in5,$in5,$in5,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out5,$in5,$twk5
-	vxor		$tweak,$tweak,$tmp
-
-	vxor		v31,v31,$rndkey0
-	mtctr		$rounds
-	b		Loop_xts_enc6x
-
-.align	5
-Loop_xts_enc6x:
-	vcipher		$out0,$out0,v24
-	vcipher		$out1,$out1,v24
-	vcipher		$out2,$out2,v24
-	vcipher		$out3,$out3,v24
-	vcipher		$out4,$out4,v24
-	vcipher		$out5,$out5,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vcipher		$out0,$out0,v25
-	vcipher		$out1,$out1,v25
-	vcipher		$out2,$out2,v25
-	vcipher		$out3,$out3,v25
-	vcipher		$out4,$out4,v25
-	vcipher		$out5,$out5,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_xts_enc6x
-
-	subic		$len,$len,96		# $len-=96
-	 vxor		$in0,$twk0,v31		# xor with last round key
-	vcipher		$out0,$out0,v24
-	vcipher		$out1,$out1,v24
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk0,$tweak,$rndkey0
-	 vaddubm	$tweak,$tweak,$tweak
-	vcipher		$out2,$out2,v24
-	vcipher		$out3,$out3,v24
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipher		$out4,$out4,v24
-	vcipher		$out5,$out5,v24
-
-	subfe.		r0,r0,r0		# borrow?-1:0
-	 vand		$tmp,$tmp,$eighty7
-	vcipher		$out0,$out0,v25
-	vcipher		$out1,$out1,v25
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out2,$out2,v25
-	vcipher		$out3,$out3,v25
-	 vxor		$in1,$twk1,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk1,$tweak,$rndkey0
-	vcipher		$out4,$out4,v25
-	vcipher		$out5,$out5,v25
-
-	and		r0,r0,$len
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipher		$out0,$out0,v26
-	vcipher		$out1,$out1,v26
-	 vand		$tmp,$tmp,$eighty7
-	vcipher		$out2,$out2,v26
-	vcipher		$out3,$out3,v26
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out4,$out4,v26
-	vcipher		$out5,$out5,v26
-
-	add		$inp,$inp,r0		# $inp is adjusted in such
-						# way that at exit from the
-						# loop inX-in5 are loaded
-						# with last "words"
-	 vxor		$in2,$twk2,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk2,$tweak,$rndkey0
-	 vaddubm	$tweak,$tweak,$tweak
-	vcipher		$out0,$out0,v27
-	vcipher		$out1,$out1,v27
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipher		$out2,$out2,v27
-	vcipher		$out3,$out3,v27
-	 vand		$tmp,$tmp,$eighty7
-	vcipher		$out4,$out4,v27
-	vcipher		$out5,$out5,v27
-
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out0,$out0,v28
-	vcipher		$out1,$out1,v28
-	 vxor		$in3,$twk3,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk3,$tweak,$rndkey0
-	vcipher		$out2,$out2,v28
-	vcipher		$out3,$out3,v28
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipher		$out4,$out4,v28
-	vcipher		$out5,$out5,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-	 vand		$tmp,$tmp,$eighty7
-
-	vcipher		$out0,$out0,v29
-	vcipher		$out1,$out1,v29
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out2,$out2,v29
-	vcipher		$out3,$out3,v29
-	 vxor		$in4,$twk4,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk4,$tweak,$rndkey0
-	vcipher		$out4,$out4,v29
-	vcipher		$out5,$out5,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-
-	vcipher		$out0,$out0,v30
-	vcipher		$out1,$out1,v30
-	 vand		$tmp,$tmp,$eighty7
-	vcipher		$out2,$out2,v30
-	vcipher		$out3,$out3,v30
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out4,$out4,v30
-	vcipher		$out5,$out5,v30
-	 vxor		$in5,$twk5,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk5,$tweak,$rndkey0
-
-	vcipherlast	$out0,$out0,$in0
-	 lvx_u		$in0,$x00,$inp		# load next input block
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipherlast	$out1,$out1,$in1
-	 lvx_u		$in1,$x10,$inp
-	vcipherlast	$out2,$out2,$in2
-	 le?vperm	$in0,$in0,$in0,$leperm
-	 lvx_u		$in2,$x20,$inp
-	 vand		$tmp,$tmp,$eighty7
-	vcipherlast	$out3,$out3,$in3
-	 le?vperm	$in1,$in1,$in1,$leperm
-	 lvx_u		$in3,$x30,$inp
-	vcipherlast	$out4,$out4,$in4
-	 le?vperm	$in2,$in2,$in2,$leperm
-	 lvx_u		$in4,$x40,$inp
-	 vxor		$tweak,$tweak,$tmp
-	vcipherlast	$tmp,$out5,$in5		# last block might be needed
-						# in stealing mode
-	 le?vperm	$in3,$in3,$in3,$leperm
-	 lvx_u		$in5,$x50,$inp
-	 addi		$inp,$inp,0x60
-	 le?vperm	$in4,$in4,$in4,$leperm
-	 le?vperm	$in5,$in5,$in5,$leperm
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	 vxor		$out0,$in0,$twk0
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	 vxor		$out1,$in1,$twk1
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	 vxor		$out2,$in2,$twk2
-	le?vperm	$out4,$out4,$out4,$leperm
-	stvx_u		$out3,$x30,$out
-	 vxor		$out3,$in3,$twk3
-	le?vperm	$out5,$tmp,$tmp,$leperm
-	stvx_u		$out4,$x40,$out
-	 vxor		$out4,$in4,$twk4
-	le?stvx_u	$out5,$x50,$out
-	be?stvx_u	$tmp, $x50,$out
-	 vxor		$out5,$in5,$twk5
-	addi		$out,$out,0x60
-
-	mtctr		$rounds
-	beq		Loop_xts_enc6x		# did $len-=96 borrow?
-
-	addic.		$len,$len,0x60
-	beq		Lxts_enc6x_zero
-	cmpwi		$len,0x20
-	blt		Lxts_enc6x_one
-	nop
-	beq		Lxts_enc6x_two
-	cmpwi		$len,0x40
-	blt		Lxts_enc6x_three
-	nop
-	beq		Lxts_enc6x_four
-
-Lxts_enc6x_five:
-	vxor		$out0,$in1,$twk0
-	vxor		$out1,$in2,$twk1
-	vxor		$out2,$in3,$twk2
-	vxor		$out3,$in4,$twk3
-	vxor		$out4,$in5,$twk4
-
-	bl		_aesp8_xts_enc5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk5		# unused tweak
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	vxor		$tmp,$out4,$twk5	# last block prep for stealing
-	le?vperm	$out4,$out4,$out4,$leperm
-	stvx_u		$out3,$x30,$out
-	stvx_u		$out4,$x40,$out
-	addi		$out,$out,0x50
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_four:
-	vxor		$out0,$in2,$twk0
-	vxor		$out1,$in3,$twk1
-	vxor		$out2,$in4,$twk2
-	vxor		$out3,$in5,$twk3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_enc5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk4		# unused tweak
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	vxor		$tmp,$out3,$twk4	# last block prep for stealing
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	stvx_u		$out3,$x30,$out
-	addi		$out,$out,0x40
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_three:
-	vxor		$out0,$in3,$twk0
-	vxor		$out1,$in4,$twk1
-	vxor		$out2,$in5,$twk2
-	vxor		$out3,$out3,$out3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_enc5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk3		# unused tweak
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$tmp,$out2,$twk3	# last block prep for stealing
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	stvx_u		$out2,$x20,$out
-	addi		$out,$out,0x30
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_two:
-	vxor		$out0,$in4,$twk0
-	vxor		$out1,$in5,$twk1
-	vxor		$out2,$out2,$out2
-	vxor		$out3,$out3,$out3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_enc5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk2		# unused tweak
-	vxor		$tmp,$out1,$twk2	# last block prep for stealing
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	stvx_u		$out1,$x10,$out
-	addi		$out,$out,0x20
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_one:
-	vxor		$out0,$in5,$twk0
-	nop
-Loop_xts_enc1x:
-	vcipher		$out0,$out0,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vcipher		$out0,$out0,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_xts_enc1x
-
-	add		$inp,$inp,$taillen
-	cmpwi		$taillen,0
-	vcipher		$out0,$out0,v24
-
-	subi		$inp,$inp,16
-	vcipher		$out0,$out0,v25
-
-	lvsr		$inpperm,0,$taillen
-	vcipher		$out0,$out0,v26
-
-	lvx_u		$in0,0,$inp
-	vcipher		$out0,$out0,v27
-
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	vcipher		$out0,$out0,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	vcipher		$out0,$out0,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$twk0,$twk0,v31
-
-	le?vperm	$in0,$in0,$in0,$leperm
-	vcipher		$out0,$out0,v30
-
-	vperm		$in0,$in0,$in0,$inpperm
-	vcipherlast	$out0,$out0,$twk0
-
-	vmr		$twk0,$twk1		# unused tweak
-	vxor		$tmp,$out0,$twk1	# last block prep for stealing
-	le?vperm	$out0,$out0,$out0,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	addi		$out,$out,0x10
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_zero:
-	cmpwi		$taillen,0
-	beq		Lxts_enc6x_done
-
-	add		$inp,$inp,$taillen
-	subi		$inp,$inp,16
-	lvx_u		$in0,0,$inp
-	lvsr		$inpperm,0,$taillen	# $in5 is no more
-	le?vperm	$in0,$in0,$in0,$leperm
-	vperm		$in0,$in0,$in0,$inpperm
-	vxor		$tmp,$tmp,$twk0
-Lxts_enc6x_steal:
-	vxor		$in0,$in0,$twk0
-	vxor		$out0,$out0,$out0
-	vspltisb	$out1,-1
-	vperm		$out0,$out0,$out1,$inpperm
-	vsel		$out0,$in0,$tmp,$out0	# $tmp is last block, remember?
-
-	subi		r30,$out,17
-	subi		$out,$out,16
-	mtctr		$taillen
-Loop_xts_enc6x_steal:
-	lbzu		r0,1(r30)
-	stb		r0,16(r30)
-	bdnz		Loop_xts_enc6x_steal
-
-	li		$taillen,0
-	mtctr		$rounds
-	b		Loop_xts_enc1x		# one more time...
-
-.align	4
-Lxts_enc6x_done:
-	${UCMP}i	$ivp,0
-	beq		Lxts_enc6x_ret
-
-	vxor		$tweak,$twk0,$rndkey0
-	le?vperm	$tweak,$tweak,$tweak,$leperm
-	stvx_u		$tweak,0,$ivp
-
-Lxts_enc6x_ret:
-	mtlr		r11
-	li		r10,`$FRAME+15`
-	li		r11,`$FRAME+31`
-	stvx		$seven,r10,$sp		# wipe copies of round keys
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-
-	mtspr		256,$vrsave
-	lvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	lvx		v21,r11,$sp
-	addi		r11,r11,32
-	lvx		v22,r10,$sp
-	addi		r10,r10,32
-	lvx		v23,r11,$sp
-	addi		r11,r11,32
-	lvx		v24,r10,$sp
-	addi		r10,r10,32
-	lvx		v25,r11,$sp
-	addi		r11,r11,32
-	lvx		v26,r10,$sp
-	addi		r10,r10,32
-	lvx		v27,r11,$sp
-	addi		r11,r11,32
-	lvx		v28,r10,$sp
-	addi		r10,r10,32
-	lvx		v29,r11,$sp
-	addi		r11,r11,32
-	lvx		v30,r10,$sp
-	lvx		v31,r11,$sp
-	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-	blr
-	.long		0
-	.byte		0,12,0x04,1,0x80,6,6,0
-	.long		0
-
-.align	5
-_aesp8_xts_enc5x:
-	vcipher		$out0,$out0,v24
-	vcipher		$out1,$out1,v24
-	vcipher		$out2,$out2,v24
-	vcipher		$out3,$out3,v24
-	vcipher		$out4,$out4,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vcipher		$out0,$out0,v25
-	vcipher		$out1,$out1,v25
-	vcipher		$out2,$out2,v25
-	vcipher		$out3,$out3,v25
-	vcipher		$out4,$out4,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		_aesp8_xts_enc5x
-
-	add		$inp,$inp,$taillen
-	cmpwi		$taillen,0
-	vcipher		$out0,$out0,v24
-	vcipher		$out1,$out1,v24
-	vcipher		$out2,$out2,v24
-	vcipher		$out3,$out3,v24
-	vcipher		$out4,$out4,v24
-
-	subi		$inp,$inp,16
-	vcipher		$out0,$out0,v25
-	vcipher		$out1,$out1,v25
-	vcipher		$out2,$out2,v25
-	vcipher		$out3,$out3,v25
-	vcipher		$out4,$out4,v25
-	 vxor		$twk0,$twk0,v31
-
-	vcipher		$out0,$out0,v26
-	lvsr		$inpperm,r0,$taillen	# $in5 is no more
-	vcipher		$out1,$out1,v26
-	vcipher		$out2,$out2,v26
-	vcipher		$out3,$out3,v26
-	vcipher		$out4,$out4,v26
-	 vxor		$in1,$twk1,v31
-
-	vcipher		$out0,$out0,v27
-	lvx_u		$in0,0,$inp
-	vcipher		$out1,$out1,v27
-	vcipher		$out2,$out2,v27
-	vcipher		$out3,$out3,v27
-	vcipher		$out4,$out4,v27
-	 vxor		$in2,$twk2,v31
-
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	vcipher		$out0,$out0,v28
-	vcipher		$out1,$out1,v28
-	vcipher		$out2,$out2,v28
-	vcipher		$out3,$out3,v28
-	vcipher		$out4,$out4,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-	 vxor		$in3,$twk3,v31
-
-	vcipher		$out0,$out0,v29
-	le?vperm	$in0,$in0,$in0,$leperm
-	vcipher		$out1,$out1,v29
-	vcipher		$out2,$out2,v29
-	vcipher		$out3,$out3,v29
-	vcipher		$out4,$out4,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$in4,$twk4,v31
-
-	vcipher		$out0,$out0,v30
-	vperm		$in0,$in0,$in0,$inpperm
-	vcipher		$out1,$out1,v30
-	vcipher		$out2,$out2,v30
-	vcipher		$out3,$out3,v30
-	vcipher		$out4,$out4,v30
-
-	vcipherlast	$out0,$out0,$twk0
-	vcipherlast	$out1,$out1,$in1
-	vcipherlast	$out2,$out2,$in2
-	vcipherlast	$out3,$out3,$in3
-	vcipherlast	$out4,$out4,$in4
-	blr
-        .long   	0
-        .byte   	0,12,0x14,0,0,0,0,0
-
-.align	5
-_aesp8_xts_decrypt6x:
-	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-	mflr		r11
-	li		r7,`$FRAME+8*16+15`
-	li		r3,`$FRAME+8*16+31`
-	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
-	stvx		v20,r7,$sp		# ABI says so
-	addi		r7,r7,32
-	stvx		v21,r3,$sp
-	addi		r3,r3,32
-	stvx		v22,r7,$sp
-	addi		r7,r7,32
-	stvx		v23,r3,$sp
-	addi		r3,r3,32
-	stvx		v24,r7,$sp
-	addi		r7,r7,32
-	stvx		v25,r3,$sp
-	addi		r3,r3,32
-	stvx		v26,r7,$sp
-	addi		r7,r7,32
-	stvx		v27,r3,$sp
-	addi		r3,r3,32
-	stvx		v28,r7,$sp
-	addi		r7,r7,32
-	stvx		v29,r3,$sp
-	addi		r3,r3,32
-	stvx		v30,r7,$sp
-	stvx		v31,r3,$sp
-	li		r0,-1
-	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
-	li		$x10,0x10
-	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	li		$x20,0x20
-	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	li		$x30,0x30
-	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	li		$x40,0x40
-	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	li		$x50,0x50
-	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	li		$x60,0x60
-	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	li		$x70,0x70
-	mtspr		256,r0
-
-	subi		$rounds,$rounds,3	# -4 in total
-
-	lvx		$rndkey0,$x00,$key1	# load key schedule
-	lvx		v30,$x10,$key1
-	addi		$key1,$key1,0x20
-	lvx		v31,$x00,$key1
-	?vperm		$rndkey0,$rndkey0,v30,$keyperm
-	addi		$key_,$sp,$FRAME+15
-	mtctr		$rounds
-
-Load_xts_dec_key:
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v30,$x10,$key1
-	addi		$key1,$key1,0x20
-	stvx		v24,$x00,$key_		# off-load round[1]
-	?vperm		v25,v31,v30,$keyperm
-	lvx		v31,$x00,$key1
-	stvx		v25,$x10,$key_		# off-load round[2]
-	addi		$key_,$key_,0x20
-	bdnz		Load_xts_dec_key
-
-	lvx		v26,$x10,$key1
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v27,$x20,$key1
-	stvx		v24,$x00,$key_		# off-load round[3]
-	?vperm		v25,v31,v26,$keyperm
-	lvx		v28,$x30,$key1
-	stvx		v25,$x10,$key_		# off-load round[4]
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	?vperm		v26,v26,v27,$keyperm
-	lvx		v29,$x40,$key1
-	?vperm		v27,v27,v28,$keyperm
-	lvx		v30,$x50,$key1
-	?vperm		v28,v28,v29,$keyperm
-	lvx		v31,$x60,$key1
-	?vperm		v29,v29,v30,$keyperm
-	lvx		$twk5,$x70,$key1	# borrow $twk5
-	?vperm		v30,v30,v31,$keyperm
-	lvx		v24,$x00,$key_		# pre-load round[1]
-	?vperm		v31,v31,$twk5,$keyperm
-	lvx		v25,$x10,$key_		# pre-load round[2]
-
-	 vperm		$in0,$inout,$inptail,$inpperm
-	 subi		$inp,$inp,31		# undo "caller"
-	vxor		$twk0,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out0,$in0,$twk0
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in1,$x10,$inp
-	vxor		$twk1,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in1,$in1,$in1,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out1,$in1,$twk1
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in2,$x20,$inp
-	 andi.		$taillen,$len,15
-	vxor		$twk2,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in2,$in2,$in2,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out2,$in2,$twk2
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in3,$x30,$inp
-	 sub		$len,$len,$taillen
-	vxor		$twk3,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in3,$in3,$in3,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out3,$in3,$twk3
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in4,$x40,$inp
-	 subi		$len,$len,0x60
-	vxor		$twk4,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in4,$in4,$in4,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out4,$in4,$twk4
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in5,$x50,$inp
-	 addi		$inp,$inp,0x60
-	vxor		$twk5,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in5,$in5,$in5,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out5,$in5,$twk5
-	vxor		$tweak,$tweak,$tmp
-
-	vxor		v31,v31,$rndkey0
-	mtctr		$rounds
-	b		Loop_xts_dec6x
-
-.align	5
-Loop_xts_dec6x:
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_xts_dec6x
-
-	subic		$len,$len,96		# $len-=96
-	 vxor		$in0,$twk0,v31		# xor with last round key
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk0,$tweak,$rndkey0
-	 vaddubm	$tweak,$tweak,$tweak
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-
-	subfe.		r0,r0,r0		# borrow?-1:0
-	 vand		$tmp,$tmp,$eighty7
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	 vxor		$in1,$twk1,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk1,$tweak,$rndkey0
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-
-	and		r0,r0,$len
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipher	$out0,$out0,v26
-	vncipher	$out1,$out1,v26
-	 vand		$tmp,$tmp,$eighty7
-	vncipher	$out2,$out2,v26
-	vncipher	$out3,$out3,v26
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out4,$out4,v26
-	vncipher	$out5,$out5,v26
-
-	add		$inp,$inp,r0		# $inp is adjusted in such
-						# way that at exit from the
-						# loop inX-in5 are loaded
-						# with last "words"
-	 vxor		$in2,$twk2,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk2,$tweak,$rndkey0
-	 vaddubm	$tweak,$tweak,$tweak
-	vncipher	$out0,$out0,v27
-	vncipher	$out1,$out1,v27
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipher	$out2,$out2,v27
-	vncipher	$out3,$out3,v27
-	 vand		$tmp,$tmp,$eighty7
-	vncipher	$out4,$out4,v27
-	vncipher	$out5,$out5,v27
-
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out0,$out0,v28
-	vncipher	$out1,$out1,v28
-	 vxor		$in3,$twk3,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk3,$tweak,$rndkey0
-	vncipher	$out2,$out2,v28
-	vncipher	$out3,$out3,v28
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipher	$out4,$out4,v28
-	vncipher	$out5,$out5,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-	 vand		$tmp,$tmp,$eighty7
-
-	vncipher	$out0,$out0,v29
-	vncipher	$out1,$out1,v29
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out2,$out2,v29
-	vncipher	$out3,$out3,v29
-	 vxor		$in4,$twk4,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk4,$tweak,$rndkey0
-	vncipher	$out4,$out4,v29
-	vncipher	$out5,$out5,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-
-	vncipher	$out0,$out0,v30
-	vncipher	$out1,$out1,v30
-	 vand		$tmp,$tmp,$eighty7
-	vncipher	$out2,$out2,v30
-	vncipher	$out3,$out3,v30
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out4,$out4,v30
-	vncipher	$out5,$out5,v30
-	 vxor		$in5,$twk5,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk5,$tweak,$rndkey0
-
-	vncipherlast	$out0,$out0,$in0
-	 lvx_u		$in0,$x00,$inp		# load next input block
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipherlast	$out1,$out1,$in1
-	 lvx_u		$in1,$x10,$inp
-	vncipherlast	$out2,$out2,$in2
-	 le?vperm	$in0,$in0,$in0,$leperm
-	 lvx_u		$in2,$x20,$inp
-	 vand		$tmp,$tmp,$eighty7
-	vncipherlast	$out3,$out3,$in3
-	 le?vperm	$in1,$in1,$in1,$leperm
-	 lvx_u		$in3,$x30,$inp
-	vncipherlast	$out4,$out4,$in4
-	 le?vperm	$in2,$in2,$in2,$leperm
-	 lvx_u		$in4,$x40,$inp
-	 vxor		$tweak,$tweak,$tmp
-	vncipherlast	$out5,$out5,$in5
-	 le?vperm	$in3,$in3,$in3,$leperm
-	 lvx_u		$in5,$x50,$inp
-	 addi		$inp,$inp,0x60
-	 le?vperm	$in4,$in4,$in4,$leperm
-	 le?vperm	$in5,$in5,$in5,$leperm
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	 vxor		$out0,$in0,$twk0
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	 vxor		$out1,$in1,$twk1
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	 vxor		$out2,$in2,$twk2
-	le?vperm	$out4,$out4,$out4,$leperm
-	stvx_u		$out3,$x30,$out
-	 vxor		$out3,$in3,$twk3
-	le?vperm	$out5,$out5,$out5,$leperm
-	stvx_u		$out4,$x40,$out
-	 vxor		$out4,$in4,$twk4
-	stvx_u		$out5,$x50,$out
-	 vxor		$out5,$in5,$twk5
-	addi		$out,$out,0x60
-
-	mtctr		$rounds
-	beq		Loop_xts_dec6x		# did $len-=96 borrow?
-
-	addic.		$len,$len,0x60
-	beq		Lxts_dec6x_zero
-	cmpwi		$len,0x20
-	blt		Lxts_dec6x_one
-	nop
-	beq		Lxts_dec6x_two
-	cmpwi		$len,0x40
-	blt		Lxts_dec6x_three
-	nop
-	beq		Lxts_dec6x_four
-
-Lxts_dec6x_five:
-	vxor		$out0,$in1,$twk0
-	vxor		$out1,$in2,$twk1
-	vxor		$out2,$in3,$twk2
-	vxor		$out3,$in4,$twk3
-	vxor		$out4,$in5,$twk4
-
-	bl		_aesp8_xts_dec5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk5		# unused tweak
-	vxor		$twk1,$tweak,$rndkey0
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$out0,$in0,$twk1
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$leperm
-	stvx_u		$out3,$x30,$out
-	stvx_u		$out4,$x40,$out
-	addi		$out,$out,0x50
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_four:
-	vxor		$out0,$in2,$twk0
-	vxor		$out1,$in3,$twk1
-	vxor		$out2,$in4,$twk2
-	vxor		$out3,$in5,$twk3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_dec5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk4		# unused tweak
-	vmr		$twk1,$twk5
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$out0,$in0,$twk5
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	stvx_u		$out3,$x30,$out
-	addi		$out,$out,0x40
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_three:
-	vxor		$out0,$in3,$twk0
-	vxor		$out1,$in4,$twk1
-	vxor		$out2,$in5,$twk2
-	vxor		$out3,$out3,$out3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_dec5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk3		# unused tweak
-	vmr		$twk1,$twk4
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$out0,$in0,$twk4
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	stvx_u		$out2,$x20,$out
-	addi		$out,$out,0x30
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_two:
-	vxor		$out0,$in4,$twk0
-	vxor		$out1,$in5,$twk1
-	vxor		$out2,$out2,$out2
-	vxor		$out3,$out3,$out3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_dec5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk2		# unused tweak
-	vmr		$twk1,$twk3
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$out0,$in0,$twk3
-	stvx_u		$out1,$x10,$out
-	addi		$out,$out,0x20
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_one:
-	vxor		$out0,$in5,$twk0
-	nop
-Loop_xts_dec1x:
-	vncipher	$out0,$out0,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_xts_dec1x
-
-	subi		r0,$taillen,1
-	vncipher	$out0,$out0,v24
-
-	andi.		r0,r0,16
-	cmpwi		$taillen,0
-	vncipher	$out0,$out0,v25
-
-	sub		$inp,$inp,r0
-	vncipher	$out0,$out0,v26
-
-	lvx_u		$in0,0,$inp
-	vncipher	$out0,$out0,v27
-
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	vncipher	$out0,$out0,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	vncipher	$out0,$out0,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$twk0,$twk0,v31
-
-	le?vperm	$in0,$in0,$in0,$leperm
-	vncipher	$out0,$out0,v30
-
-	mtctr		$rounds
-	vncipherlast	$out0,$out0,$twk0
-
-	vmr		$twk0,$twk1		# unused tweak
-	vmr		$twk1,$twk2
-	le?vperm	$out0,$out0,$out0,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	addi		$out,$out,0x10
-	vxor		$out0,$in0,$twk2
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_zero:
-	cmpwi		$taillen,0
-	beq		Lxts_dec6x_done
-
-	lvx_u		$in0,0,$inp
-	le?vperm	$in0,$in0,$in0,$leperm
-	vxor		$out0,$in0,$twk1
-Lxts_dec6x_steal:
-	vncipher	$out0,$out0,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Lxts_dec6x_steal
-
-	add		$inp,$inp,$taillen
-	vncipher	$out0,$out0,v24
-
-	cmpwi		$taillen,0
-	vncipher	$out0,$out0,v25
-
-	lvx_u		$in0,0,$inp
-	vncipher	$out0,$out0,v26
-
-	lvsr		$inpperm,0,$taillen	# $in5 is no more
-	vncipher	$out0,$out0,v27
-
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	vncipher	$out0,$out0,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	vncipher	$out0,$out0,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$twk1,$twk1,v31
-
-	le?vperm	$in0,$in0,$in0,$leperm
-	vncipher	$out0,$out0,v30
-
-	vperm		$in0,$in0,$in0,$inpperm
-	vncipherlast	$tmp,$out0,$twk1
-
-	le?vperm	$out0,$tmp,$tmp,$leperm
-	le?stvx_u	$out0,0,$out
-	be?stvx_u	$tmp,0,$out
-
-	vxor		$out0,$out0,$out0
-	vspltisb	$out1,-1
-	vperm		$out0,$out0,$out1,$inpperm
-	vsel		$out0,$in0,$tmp,$out0
-	vxor		$out0,$out0,$twk0
-
-	subi		r30,$out,1
-	mtctr		$taillen
-Loop_xts_dec6x_steal:
-	lbzu		r0,1(r30)
-	stb		r0,16(r30)
-	bdnz		Loop_xts_dec6x_steal
-
-	li		$taillen,0
-	mtctr		$rounds
-	b		Loop_xts_dec1x		# one more time...
-
-.align	4
-Lxts_dec6x_done:
-	${UCMP}i	$ivp,0
-	beq		Lxts_dec6x_ret
-
-	vxor		$tweak,$twk0,$rndkey0
-	le?vperm	$tweak,$tweak,$tweak,$leperm
-	stvx_u		$tweak,0,$ivp
-
-Lxts_dec6x_ret:
-	mtlr		r11
-	li		r10,`$FRAME+15`
-	li		r11,`$FRAME+31`
-	stvx		$seven,r10,$sp		# wipe copies of round keys
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-
-	mtspr		256,$vrsave
-	lvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	lvx		v21,r11,$sp
-	addi		r11,r11,32
-	lvx		v22,r10,$sp
-	addi		r10,r10,32
-	lvx		v23,r11,$sp
-	addi		r11,r11,32
-	lvx		v24,r10,$sp
-	addi		r10,r10,32
-	lvx		v25,r11,$sp
-	addi		r11,r11,32
-	lvx		v26,r10,$sp
-	addi		r10,r10,32
-	lvx		v27,r11,$sp
-	addi		r11,r11,32
-	lvx		v28,r10,$sp
-	addi		r10,r10,32
-	lvx		v29,r11,$sp
-	addi		r11,r11,32
-	lvx		v30,r10,$sp
-	lvx		v31,r11,$sp
-	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-	blr
-	.long		0
-	.byte		0,12,0x04,1,0x80,6,6,0
-	.long		0
-
-.align	5
-_aesp8_xts_dec5x:
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		_aesp8_xts_dec5x
-
-	subi		r0,$taillen,1
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-
-	andi.		r0,r0,16
-	cmpwi		$taillen,0
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	 vxor		$twk0,$twk0,v31
-
-	sub		$inp,$inp,r0
-	vncipher	$out0,$out0,v26
-	vncipher	$out1,$out1,v26
-	vncipher	$out2,$out2,v26
-	vncipher	$out3,$out3,v26
-	vncipher	$out4,$out4,v26
-	 vxor		$in1,$twk1,v31
-
-	vncipher	$out0,$out0,v27
-	lvx_u		$in0,0,$inp
-	vncipher	$out1,$out1,v27
-	vncipher	$out2,$out2,v27
-	vncipher	$out3,$out3,v27
-	vncipher	$out4,$out4,v27
-	 vxor		$in2,$twk2,v31
-
-	addi		$key_,$sp,$FRAME+15	# rewind $key_
-	vncipher	$out0,$out0,v28
-	vncipher	$out1,$out1,v28
-	vncipher	$out2,$out2,v28
-	vncipher	$out3,$out3,v28
-	vncipher	$out4,$out4,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-	 vxor		$in3,$twk3,v31
-
-	vncipher	$out0,$out0,v29
-	le?vperm	$in0,$in0,$in0,$leperm
-	vncipher	$out1,$out1,v29
-	vncipher	$out2,$out2,v29
-	vncipher	$out3,$out3,v29
-	vncipher	$out4,$out4,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$in4,$twk4,v31
-
-	vncipher	$out0,$out0,v30
-	vncipher	$out1,$out1,v30
-	vncipher	$out2,$out2,v30
-	vncipher	$out3,$out3,v30
-	vncipher	$out4,$out4,v30
-
-	vncipherlast	$out0,$out0,$twk0
-	vncipherlast	$out1,$out1,$in1
-	vncipherlast	$out2,$out2,$in2
-	vncipherlast	$out3,$out3,$in3
-	vncipherlast	$out4,$out4,$in4
-	mtctr		$rounds
-	blr
-        .long   	0
-        .byte   	0,12,0x14,0,0,0,0,0
-___
-}}	}}}
-
-my $consts=1;
-foreach(split("\n",$code)) {
-        s/\`([^\`]*)\`/eval($1)/geo;
-
-	# constants table endian-specific conversion
-	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
-	    my $conv=$3;
-	    my @bytes=();
-
-	    # convert to endian-agnostic format
-	    if ($1 eq "long") {
-	      foreach (split(/,\s*/,$2)) {
-		my $l = /^0/?oct:int;
-		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
-	      }
-	    } else {
-		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
-	    }
-
-	    # little-endian conversion
-	    if ($flavour =~ /le$/o) {
-		SWITCH: for($conv)  {
-		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
-		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
-		}
-	    }
-
-	    #emit
-	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
-	    next;
-	}
-	$consts=0 if (m/Lconsts:/o);	# end of table
-
-	# instructions prefixed with '?' are endian-specific and need
-	# to be adjusted accordingly...
-	if ($flavour =~ /le$/o) {	# little-endian
-	    s/le\?//o		or
-	    s/be\?/#be#/o	or
-	    s/\?lvsr/lvsl/o	or
-	    s/\?lvsl/lvsr/o	or
-	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
-	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
-	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
-	} else {			# big-endian
-	    s/le\?/#le#/o	or
-	    s/be\?//o		or
-	    s/\?([a-z]+)/$1/o;
-	}
-
-        print $_,"\n";
-}
-
-close STDOUT;
--- a/arch/powerpc/crypto/ghashp8-ppc.pl
+++ b/arch/powerpc/crypto/ghashp8-ppc.pl
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# This code is taken from the OpenSSL project but the author (Andy Polyakov)
-# has relicensed it under the GPLv2. Therefore this program is free software;
-# you can redistribute it and/or modify it under the terms of the GNU General
-# Public License version 2 as published by the Free Software Foundation.
-#
-# The original headers, including the original license headers, are
-# included below for completeness.
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see https://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# GHASH for PowerISA v2.07.
-#
-# July 2014
-#
-# Accurate performance measurements are problematic, because it's
-# always virtualized setup with possibly throttled processor.
-# Relative comparison is therefore more informative. This initial
-# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
-# faster than "4-bit" integer-only compiler-generated 64-bit code.
-# "Initial version" means that there is room for futher improvement.
-
-$flavour=shift;
-$output =shift;
-
-if ($flavour =~ /64/) {
-	$SIZE_T=8;
-	$LRSAVE=2*$SIZE_T;
-	$STU="stdu";
-	$POP="ld";
-	$PUSH="std";
-} elsif ($flavour =~ /32/) {
-	$SIZE_T=4;
-	$LRSAVE=$SIZE_T;
-	$STU="stwu";
-	$POP="lwz";
-	$PUSH="stw";
-} else { die "nonsense $flavour"; }
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
-
-my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
-
-my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
-my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
-my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
-my $vrsave="r12";
-my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
-
-$code=<<___;
-.machine	"any"
-
-.text
-
-.globl	.gcm_init_p8
-	lis		r0,0xfff0
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$H,0,r4			# load H
-	le?xor		r7,r7,r7
-	le?addi		r7,r7,0x8		# need a vperm start with 08
-	le?lvsr		5,0,r7
-	le?vspltisb	6,0x0f
-	le?vxor		5,5,6			# set a b-endian mask
-	le?vperm	$H,$H,$H,5
-
-	vspltisb	$xC2,-16		# 0xf0
-	vspltisb	$t0,1			# one
-	vaddubm		$xC2,$xC2,$xC2		# 0xe0
-	vxor		$zero,$zero,$zero
-	vor		$xC2,$xC2,$t0		# 0xe1
-	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
-	vsldoi		$t1,$zero,$t0,1		# ...1
-	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
-	vspltisb	$t2,7
-	vor		$xC2,$xC2,$t1		# 0xc2....01
-	vspltb		$t1,$H,0		# most significant byte
-	vsl		$H,$H,$t0		# H<<=1
-	vsrab		$t1,$t1,$t2		# broadcast carry bit
-	vand		$t1,$t1,$xC2
-	vxor		$H,$H,$t1		# twisted H
-
-	vsldoi		$H,$H,$H,8		# twist even more ...
-	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
-	vsldoi		$Hl,$zero,$H,8		# ... and split
-	vsldoi		$Hh,$H,$zero,8
-
-	stvx_u		$xC2,0,r3		# save pre-computed table
-	stvx_u		$Hl,r8,r3
-	stvx_u		$H, r9,r3
-	stvx_u		$Hh,r10,r3
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,2,0
-	.long		0
-.size	.gcm_init_p8,.-.gcm_init_p8
-
-.globl	.gcm_init_htable
-	lis		r0,0xfff0
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$H,0,r4			# load H
-
-	vspltisb	$xC2,-16		# 0xf0
-	vspltisb	$t0,1			# one
-	vaddubm		$xC2,$xC2,$xC2		# 0xe0
-	vxor		$zero,$zero,$zero
-	vor		$xC2,$xC2,$t0		# 0xe1
-	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
-	vsldoi		$t1,$zero,$t0,1		# ...1
-	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
-	vspltisb	$t2,7
-	vor		$xC2,$xC2,$t1		# 0xc2....01
-	vspltb		$t1,$H,0		# most significant byte
-	vsl		$H,$H,$t0		# H<<=1
-	vsrab		$t1,$t1,$t2		# broadcast carry bit
-	vand		$t1,$t1,$xC2
-	vxor		$IN,$H,$t1		# twisted H
-
-	vsldoi		$H,$IN,$IN,8		# twist even more ...
-	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
-	vsldoi		$Hl,$zero,$H,8		# ... and split
-	vsldoi		$Hh,$H,$zero,8
-
-	stvx_u		$xC2,0,r3		# save pre-computed table
-	stvx_u		$Hl,r8,r3
-	li		r8,0x40
-	stvx_u		$H, r9,r3
-	li		r9,0x50
-	stvx_u		$Hh,r10,r3
-	li		r10,0x60
-
-	vpmsumd		$Xl,$IN,$Hl		# H.lo·H.lo
-	vpmsumd		$Xm,$IN,$H		# H.hi·H.lo+H.lo·H.hi
-	vpmsumd		$Xh,$IN,$Hh		# H.hi·H.hi
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	vxor		$t1,$t1,$Xh
-	vxor		$IN1,$Xl,$t1
-
-	vsldoi		$H2,$IN1,$IN1,8
-	vsldoi		$H2l,$zero,$H2,8
-	vsldoi		$H2h,$H2,$zero,8
-
-	stvx_u		$H2l,r8,r3		# save H^2
-	li		r8,0x70
-	stvx_u		$H2,r9,r3
-	li		r9,0x80
-	stvx_u		$H2h,r10,r3
-	li		r10,0x90
-
-	vpmsumd		$Xl,$IN,$H2l		# H.lo·H^2.lo
-	 vpmsumd	$Xl1,$IN1,$H2l		# H^2.lo·H^2.lo
-	vpmsumd		$Xm,$IN,$H2		# H.hi·H^2.lo+H.lo·H^2.hi
-	 vpmsumd	$Xm1,$IN1,$H2		# H^2.hi·H^2.lo+H^2.lo·H^2.hi
-	vpmsumd		$Xh,$IN,$H2h		# H.hi·H^2.hi
-	 vpmsumd	$Xh1,$IN1,$H2h		# H^2.hi·H^2.hi
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-	 vpmsumd	$t6,$Xl1,$xC2		# 1st reduction phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	 vsldoi		$t4,$Xm1,$zero,8
-	 vsldoi		$t5,$zero,$Xm1,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-	 vxor		$Xl1,$Xl1,$t4
-	 vxor		$Xh1,$Xh1,$t5
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	 vsldoi		$Xl1,$Xl1,$Xl1,8
-	vxor		$Xl,$Xl,$t2
-	 vxor		$Xl1,$Xl1,$t6
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	 vsldoi		$t5,$Xl1,$Xl1,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	 vpmsumd	$Xl1,$Xl1,$xC2
-	vxor		$t1,$t1,$Xh
-	 vxor		$t5,$t5,$Xh1
-	vxor		$Xl,$Xl,$t1
-	 vxor		$Xl1,$Xl1,$t5
-
-	vsldoi		$H,$Xl,$Xl,8
-	 vsldoi		$H2,$Xl1,$Xl1,8
-	vsldoi		$Hl,$zero,$H,8
-	vsldoi		$Hh,$H,$zero,8
-	 vsldoi		$H2l,$zero,$H2,8
-	 vsldoi		$H2h,$H2,$zero,8
-
-	stvx_u		$Hl,r8,r3		# save H^3
-	li		r8,0xa0
-	stvx_u		$H,r9,r3
-	li		r9,0xb0
-	stvx_u		$Hh,r10,r3
-	li		r10,0xc0
-	 stvx_u		$H2l,r8,r3		# save H^4
-	 stvx_u		$H2,r9,r3
-	 stvx_u		$H2h,r10,r3
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,2,0
-	.long		0
-.size	.gcm_init_htable,.-.gcm_init_htable
-
-.globl	.gcm_gmult_p8
-	lis		r0,0xfff8
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$IN,0,$Xip		# load Xi
-
-	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
-	 le?lvsl	$lemask,r0,r0
-	lvx_u		$H, r9,$Htbl
-	 le?vspltisb	$t0,0x07
-	lvx_u		$Hh,r10,$Htbl
-	 le?vxor	$lemask,$lemask,$t0
-	lvx_u		$xC2,0,$Htbl
-	 le?vperm	$IN,$IN,$IN,$lemask
-	vxor		$zero,$zero,$zero
-
-	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
-	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
-	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
-	vpmsumd		$Xl,$Xl,$xC2
-	vxor		$t1,$t1,$Xh
-	vxor		$Xl,$Xl,$t1
-
-	le?vperm	$Xl,$Xl,$Xl,$lemask
-	stvx_u		$Xl,0,$Xip		# write out Xi
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,2,0
-	.long		0
-.size	.gcm_gmult_p8,.-.gcm_gmult_p8
-
-.globl	.gcm_ghash_p8
-	lis		r0,0xfff8
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$Xl,0,$Xip		# load Xi
-
-	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
-	 le?lvsl	$lemask,r0,r0
-	lvx_u		$H, r9,$Htbl
-	 le?vspltisb	$t0,0x07
-	lvx_u		$Hh,r10,$Htbl
-	 le?vxor	$lemask,$lemask,$t0
-	lvx_u		$xC2,0,$Htbl
-	 le?vperm	$Xl,$Xl,$Xl,$lemask
-	vxor		$zero,$zero,$zero
-
-	lvx_u		$IN,0,$inp
-	addi		$inp,$inp,16
-	subi		$len,$len,16
-	 le?vperm	$IN,$IN,$IN,$lemask
-	vxor		$IN,$IN,$Xl
-	b		Loop
-
-.align	5
-Loop:
-	 subic		$len,$len,16
-	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
-	 subfe.		r0,r0,r0		# borrow?-1:0
-	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
-	 and		r0,r0,$len
-	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
-	 add		$inp,$inp,r0
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-	 lvx_u		$IN,0,$inp
-	 addi		$inp,$inp,16
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
-	vpmsumd		$Xl,$Xl,$xC2
-	 le?vperm	$IN,$IN,$IN,$lemask
-	vxor		$t1,$t1,$Xh
-	vxor		$IN,$IN,$t1
-	vxor		$IN,$IN,$Xl
-	beq		Loop			# did $len-=16 borrow?
-
-	vxor		$Xl,$Xl,$t1
-	le?vperm	$Xl,$Xl,$Xl,$lemask
-	stvx_u		$Xl,0,$Xip		# write out Xi
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,4,0
-	.long		0
-.size	.gcm_ghash_p8,.-.gcm_ghash_p8
-
-.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-.align  2
-___
-
-foreach (split("\n",$code)) {
-	if ($flavour =~ /le$/o) {	# little-endian
-	    s/le\?//o		or
-	    s/be\?/#be#/o;
-	} else {
-	    s/le\?/#le#/o	or
-	    s/be\?//o;
-	}
-	print $_,"\n";
-}
-
-close STDOUT; # enforce flush
--- a/arch/powerpc/crypto/p10-aes-gcm-glue.c
+++ b/arch/powerpc/crypto/p10-aes-gcm-glue.c
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Glue code for accelerated AES-GCM stitched implementation for ppc64le.
- *
- * Copyright 2022- IBM Inc. All rights reserved
- */
-
-#include <asm/unaligned.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/algapi.h>
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <crypto/b128ops.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-#define PPC_MODULE_FEATURE_P10	(32 + ilog2(PPC_FEATURE2_ARCH_3_1))
-#define	PPC_ALIGN		16
-#define GCM_IV_SIZE		12
-
-MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation");
-MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("aes");
-
-asmlinkage int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
-				      void *key);
-asmlinkage void aes_p8_encrypt(const u8 *in, u8 *out, const void *key);
-asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len,
-				    void *rkey, u8 *iv, void *Xi);
-asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len,
-				    void *rkey, u8 *iv, void *Xi);
-asmlinkage void gcm_init_htable(unsigned char htable[256], unsigned char Xi[16]);
-asmlinkage void gcm_ghash_p8(unsigned char *Xi, unsigned char *Htable,
-		unsigned char *aad, unsigned int alen);
-
-struct aes_key {
-	u8 key[AES_MAX_KEYLENGTH];
-	u64 rounds;
-};
-
-struct gcm_ctx {
-	u8 iv[16];
-	u8 ivtag[16];
-	u8 aad_hash[16];
-	u64 aadLen;
-	u64 Plen;	/* offset 56 - used in aes_p10_gcm_{en/de}crypt */
-};
-struct Hash_ctx {
-	u8 H[16];	/* subkey */
-	u8 Htable[256];	/* Xi, Hash table(offset 32) */
-};
-
-struct p10_aes_gcm_ctx {
-	struct aes_key enc_key;
-};
-
-static void vsx_begin(void)
-{
-	preempt_disable();
-	enable_kernel_vsx();
-}
-
-static void vsx_end(void)
-{
-	disable_kernel_vsx();
-	preempt_enable();
-}
-
-static void set_subkey(unsigned char *hash)
-{
-	*(u64 *)&hash[0] = be64_to_cpup((__be64 *)&hash[0]);
-	*(u64 *)&hash[8] = be64_to_cpup((__be64 *)&hash[8]);
-}
-
-/*
- * Compute aad if any.
- *   - Hash aad and copy to Xi.
- */
-static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash,
-		    unsigned char *aad, int alen)
-{
-	int i;
-	u8 nXi[16] = {0, };
-
-	gctx->aadLen = alen;
-	i = alen & ~0xf;
-	if (i) {
-		gcm_ghash_p8(nXi, hash->Htable+32, aad, i);
-		aad += i;
-		alen -= i;
-	}
-	if (alen) {
-		for (i = 0; i < alen; i++)
-			nXi[i] ^= aad[i];
-
-		memset(gctx->aad_hash, 0, 16);
-		gcm_ghash_p8(gctx->aad_hash, hash->Htable+32, nXi, 16);
-	} else {
-		memcpy(gctx->aad_hash, nXi, 16);
-	}
-
-	memcpy(hash->Htable, gctx->aad_hash, 16);
-}
-
-static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey,
-			struct Hash_ctx *hash, u8 *assoc, unsigned int assoclen)
-{
-	__be32 counter = cpu_to_be32(1);
-
-	aes_p8_encrypt(hash->H, hash->H, rdkey);
-	set_subkey(hash->H);
-	gcm_init_htable(hash->Htable+32, hash->H);
-
-	*((__be32 *)(iv+12)) = counter;
-
-	gctx->Plen = 0;
-
-	/*
-	 * Encrypt counter vector as iv tag and increment counter.
-	 */
-	aes_p8_encrypt(iv, gctx->ivtag, rdkey);
-
-	counter = cpu_to_be32(2);
-	*((__be32 *)(iv+12)) = counter;
-	memcpy(gctx->iv, iv, 16);
-
-	gctx->aadLen = assoclen;
-	memset(gctx->aad_hash, 0, 16);
-	if (assoclen)
-		set_aad(gctx, hash, assoc, assoclen);
-}
-
-static void finish_tag(struct gcm_ctx *gctx, struct Hash_ctx *hash, int len)
-{
-	int i;
-	unsigned char len_ac[16 + PPC_ALIGN];
-	unsigned char *aclen = PTR_ALIGN((void *)len_ac, PPC_ALIGN);
-	__be64 clen = cpu_to_be64(len << 3);
-	__be64 alen = cpu_to_be64(gctx->aadLen << 3);
-
-	if (len == 0 && gctx->aadLen == 0) {
-		memcpy(hash->Htable, gctx->ivtag, 16);
-		return;
-	}
-
-	/*
-	 * Len is in bits.
-	 */
-	*((__be64 *)(aclen)) = alen;
-	*((__be64 *)(aclen+8)) = clen;
-
-	/*
-	 * hash (AAD len and len)
-	 */
-	gcm_ghash_p8(hash->Htable, hash->Htable+32, aclen, 16);
-
-	for (i = 0; i < 16; i++)
-		hash->Htable[i] ^= gctx->ivtag[i];
-}
-
-static int set_authsize(struct crypto_aead *tfm, unsigned int authsize)
-{
-	switch (authsize) {
-	case 4:
-	case 8:
-	case 12:
-	case 13:
-	case 14:
-	case 15:
-	case 16:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
-			     unsigned int keylen)
-{
-	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
-	struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
-	int ret;
-
-	vsx_begin();
-	ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
-	vsx_end();
-
-	return ret ? -EINVAL : 0;
-}
-
-static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
-	u8 databuf[sizeof(struct gcm_ctx) + PPC_ALIGN];
-	struct gcm_ctx *gctx = PTR_ALIGN((void *)databuf, PPC_ALIGN);
-	u8 hashbuf[sizeof(struct Hash_ctx) + PPC_ALIGN];
-	struct Hash_ctx *hash = PTR_ALIGN((void *)hashbuf, PPC_ALIGN);
-	struct scatter_walk assoc_sg_walk;
-	struct skcipher_walk walk;
-	u8 *assocmem = NULL;
-	u8 *assoc;
-	unsigned int assoclen = req->assoclen;
-	unsigned int cryptlen = req->cryptlen;
-	unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN];
-	unsigned char *iv = PTR_ALIGN((void *)ivbuf, PPC_ALIGN);
-	int ret;
-	unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm));
-	u8 otag[16];
-	int total_processed = 0;
-
-	memset(databuf, 0, sizeof(databuf));
-	memset(hashbuf, 0, sizeof(hashbuf));
-	memset(ivbuf, 0, sizeof(ivbuf));
-	memcpy(iv, req->iv, GCM_IV_SIZE);
-
-	/* Linearize assoc, if not already linear */
-	if (req->src->length >= assoclen && req->src->length) {
-		scatterwalk_start(&assoc_sg_walk, req->src);
-		assoc = scatterwalk_map(&assoc_sg_walk);
-	} else {
-		gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-			      GFP_KERNEL : GFP_ATOMIC;
-
-		/* assoc can be any length, so must be on heap */
-		assocmem = kmalloc(assoclen, flags);
-		if (unlikely(!assocmem))
-			return -ENOMEM;
-		assoc = assocmem;
-
-		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
-	}
-
-	vsx_begin();
-	gcmp10_init(gctx, iv, (unsigned char *) &ctx->enc_key, hash, assoc, assoclen);
-	vsx_end();
-
-	if (!assocmem)
-		scatterwalk_unmap(assoc);
-	else
-		kfree(assocmem);
-
-	if (enc)
-		ret = skcipher_walk_aead_encrypt(&walk, req, false);
-	else
-		ret = skcipher_walk_aead_decrypt(&walk, req, false);
-	if (ret)
-		return ret;
-
-	while (walk.nbytes > 0 && ret == 0) {
-
-		vsx_begin();
-		if (enc)
-			aes_p10_gcm_encrypt(walk.src.virt.addr,
-					    walk.dst.virt.addr,
-					    walk.nbytes,
-					    &ctx->enc_key, gctx->iv, hash->Htable);
-		else
-			aes_p10_gcm_decrypt(walk.src.virt.addr,
-					    walk.dst.virt.addr,
-					    walk.nbytes,
-					    &ctx->enc_key, gctx->iv, hash->Htable);
-		vsx_end();
-
-		total_processed += walk.nbytes;
-		ret = skcipher_walk_done(&walk, 0);
-	}
-
-	if (ret)
-		return ret;
-
-	/* Finalize hash */
-	vsx_begin();
-	finish_tag(gctx, hash, total_processed);
-	vsx_end();
-
-	/* copy Xi to end of dst */
-	if (enc)
-		scatterwalk_map_and_copy(hash->Htable, req->dst, req->assoclen + cryptlen,
-					 auth_tag_len, 1);
-	else {
-		scatterwalk_map_and_copy(otag, req->src,
-					 req->assoclen + cryptlen - auth_tag_len,
-					 auth_tag_len, 0);
-
-		if (crypto_memneq(otag, hash->Htable, auth_tag_len)) {
-			memzero_explicit(hash->Htable, 16);
-			return -EBADMSG;
-		}
-	}
-
-	return 0;
-}
-
-static int p10_aes_gcm_encrypt(struct aead_request *req)
-{
-	return p10_aes_gcm_crypt(req, 1);
-}
-
-static int p10_aes_gcm_decrypt(struct aead_request *req)
-{
-	return p10_aes_gcm_crypt(req, 0);
-}
-
-static struct aead_alg gcm_aes_alg = {
-	.ivsize			= GCM_IV_SIZE,
-	.maxauthsize		= 16,
-
-	.setauthsize		= set_authsize,
-	.setkey			= p10_aes_gcm_setkey,
-	.encrypt		= p10_aes_gcm_encrypt,
-	.decrypt		= p10_aes_gcm_decrypt,
-
-	.base.cra_name		= "gcm(aes)",
-	.base.cra_driver_name	= "p10_aes_gcm",
-	.base.cra_priority	= 2100,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct p10_aes_gcm_ctx),
-	.base.cra_module	= THIS_MODULE,
-};
-
-static int __init p10_init(void)
-{
-	return crypto_register_aead(&gcm_aes_alg);
-}
-
-static void __exit p10_exit(void)
-{
-	crypto_unregister_aead(&gcm_aes_alg);
-}
-
-module_cpu_feature_match(PPC_MODULE_FEATURE_P10, p10_init);
-module_exit(p10_exit);
--- a/arch/powerpc/crypto/p10_aes_gcm.S
+++ b/arch/powerpc/crypto/p10_aes_gcm.S
-/* SPDX-License-Identifier: GPL-2.0-or-later */
- #
- # Accelerated AES-GCM stitched implementation for ppc64le.
- #
- # Copyright 2022- IBM Inc. All rights reserved
- #
- #===================================================================================
- # Written by Danny Tsen <dtsen@linux.ibm.com>
- #
- # GHASH is based on the Karatsuba multiplication method.
- #
- #    Xi xor X1
- #
- #    X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
- #      (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
- #      (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
- #      (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
- #      (X4.h * H.h + X4.l * H.l + X4 * H)
- #
- # Xi = v0
- # H Poly = v2
- # Hash keys = v3 - v14
- #     ( H.l, H, H.h)
- #     ( H^2.l, H^2, H^2.h)
- #     ( H^3.l, H^3, H^3.h)
- #     ( H^4.l, H^4, H^4.h)
- #
- # v30 is IV
- # v31 - counter 1
- #
- # AES used,
- #     vs0 - vs14 for round keys
- #     v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
- #
- # This implementation uses stitched AES-GCM approach to improve overall performance.
- # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
- #
- # ===================================================================================
- #
-
-.machine        "any"
-.abiversion     1
-.text
-
- # 4x loops
- # v15 - v18 - input states
- # vs1 - vs9 - round keys
- #
-.macro Loop_aes_middle4x
-	xxlor	19+32, 1, 1
-	xxlor	20+32, 2, 2
-	xxlor	21+32, 3, 3
-	xxlor	22+32, 4, 4
-
-	vcipher	15, 15, 19
-	vcipher	16, 16, 19
-	vcipher	17, 17, 19
-	vcipher	18, 18, 19
-
-	vcipher	15, 15, 20
-	vcipher	16, 16, 20
-	vcipher	17, 17, 20
-	vcipher	18, 18, 20
-
-	vcipher	15, 15, 21
-	vcipher	16, 16, 21
-	vcipher	17, 17, 21
-	vcipher	18, 18, 21
-
-	vcipher	15, 15, 22
-	vcipher	16, 16, 22
-	vcipher	17, 17, 22
-	vcipher	18, 18, 22
-
-	xxlor	19+32, 5, 5
-	xxlor	20+32, 6, 6
-	xxlor	21+32, 7, 7
-	xxlor	22+32, 8, 8
-
-	vcipher	15, 15, 19
-	vcipher	16, 16, 19
-	vcipher	17, 17, 19
-	vcipher	18, 18, 19
-
-	vcipher	15, 15, 20
-	vcipher	16, 16, 20
-	vcipher	17, 17, 20
-	vcipher	18, 18, 20
-
-	vcipher	15, 15, 21
-	vcipher	16, 16, 21
-	vcipher	17, 17, 21
-	vcipher	18, 18, 21
-
-	vcipher	15, 15, 22
-	vcipher	16, 16, 22
-	vcipher	17, 17, 22
-	vcipher	18, 18, 22
-
-	xxlor	23+32, 9, 9
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-.endm
-
- # 8x loops
- # v15 - v22 - input states
- # vs1 - vs9 - round keys
- #
-.macro Loop_aes_middle8x
-	xxlor	23+32, 1, 1
-	xxlor	24+32, 2, 2
-	xxlor	25+32, 3, 3
-	xxlor	26+32, 4, 4
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	vcipher	15, 15, 25
-	vcipher	16, 16, 25
-	vcipher	17, 17, 25
-	vcipher	18, 18, 25
-	vcipher	19, 19, 25
-	vcipher	20, 20, 25
-	vcipher	21, 21, 25
-	vcipher	22, 22, 25
-
-	vcipher	15, 15, 26
-	vcipher	16, 16, 26
-	vcipher	17, 17, 26
-	vcipher	18, 18, 26
-	vcipher	19, 19, 26
-	vcipher	20, 20, 26
-	vcipher	21, 21, 26
-	vcipher	22, 22, 26
-
-	xxlor	23+32, 5, 5
-	xxlor	24+32, 6, 6
-	xxlor	25+32, 7, 7
-	xxlor	26+32, 8, 8
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	vcipher	15, 15, 25
-	vcipher	16, 16, 25
-	vcipher	17, 17, 25
-	vcipher	18, 18, 25
-	vcipher	19, 19, 25
-	vcipher	20, 20, 25
-	vcipher	21, 21, 25
-	vcipher	22, 22, 25
-
-	vcipher	15, 15, 26
-	vcipher	16, 16, 26
-	vcipher	17, 17, 26
-	vcipher	18, 18, 26
-	vcipher	19, 19, 26
-	vcipher	20, 20, 26
-	vcipher	21, 21, 26
-	vcipher	22, 22, 26
-
-	xxlor	23+32, 9, 9
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-.endm
-
-.macro Loop_aes_middle_1x
-	xxlor	19+32, 1, 1
-	xxlor	20+32, 2, 2
-	xxlor	21+32, 3, 3
-	xxlor	22+32, 4, 4
-
-	vcipher 15, 15, 19
-	vcipher 15, 15, 20
-	vcipher 15, 15, 21
-	vcipher 15, 15, 22
-
-	xxlor	19+32, 5, 5
-	xxlor	20+32, 6, 6
-	xxlor	21+32, 7, 7
-	xxlor	22+32, 8, 8
-
-	vcipher 15, 15, 19
-	vcipher 15, 15, 20
-	vcipher 15, 15, 21
-	vcipher 15, 15, 22
-
-	xxlor	19+32, 9, 9
-	vcipher 15, 15, 19
-.endm
-
- #
- # Compute 4x hash values based on Karatsuba method.
- #
-ppc_aes_gcm_ghash:
-	vxor		15, 15, 0
-
-	vpmsumd		23, 12, 15		# H4.L * X.L
-	vpmsumd		24, 9, 16
-	vpmsumd		25, 6, 17
-	vpmsumd		26, 3, 18
-
-	vxor		23, 23, 24
-	vxor		23, 23, 25
-	vxor		23, 23, 26		# L
-
-	vpmsumd		24, 13, 15		# H4.L * X.H + H4.H * X.L
-	vpmsumd		25, 10, 16		# H3.L * X1.H + H3.H * X1.L
-	vpmsumd		26, 7, 17
-	vpmsumd		27, 4, 18
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-	vxor		24, 24, 27		# M
-
-	# sum hash and reduction with H Poly
-	vpmsumd		28, 23, 2		# reduction
-
-	vxor		29, 29, 29
-	vsldoi		26, 24, 29, 8		# mL
-	vsldoi		29, 29, 24, 8		# mH
-	vxor		23, 23, 26		# mL + L
-
-	vsldoi		23, 23, 23, 8		# swap
-	vxor		23, 23, 28
-
-	vpmsumd		24, 14, 15		# H4.H * X.H
-	vpmsumd		25, 11, 16
-	vpmsumd		26, 8, 17
-	vpmsumd		27, 5, 18
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-	vxor		24, 24, 27
-
-	vxor		24, 24, 29
-
-	# sum hash and reduction with H Poly
-	vsldoi		27, 23, 23, 8		# swap
-	vpmsumd		23, 23, 2
-	vxor		27, 27, 24
-	vxor		23, 23, 27
-
-	xxlor		32, 23+32, 23+32		# update hash
-
-	blr
-
- #
- # Combine two 4x ghash
- # v15 - v22 - input blocks
- #
-.macro ppc_aes_gcm_ghash2_4x
-	# first 4x hash
-	vxor		15, 15, 0		# Xi + X
-
-	vpmsumd		23, 12, 15		# H4.L * X.L
-	vpmsumd		24, 9, 16
-	vpmsumd		25, 6, 17
-	vpmsumd		26, 3, 18
-
-	vxor		23, 23, 24
-	vxor		23, 23, 25
-	vxor		23, 23, 26		# L
-
-	vpmsumd		24, 13, 15		# H4.L * X.H + H4.H * X.L
-	vpmsumd		25, 10, 16		# H3.L * X1.H + H3.H * X1.L
-	vpmsumd		26, 7, 17
-	vpmsumd		27, 4, 18
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-
-	# sum hash and reduction with H Poly
-	vpmsumd		28, 23, 2		# reduction
-
-	vxor		29, 29, 29
-
-	vxor		24, 24, 27		# M
-	vsldoi		26, 24, 29, 8		# mL
-	vsldoi		29, 29, 24, 8		# mH
-	vxor		23, 23, 26		# mL + L
-
-	vsldoi		23, 23, 23, 8		# swap
-	vxor		23, 23, 28
-
-	vpmsumd		24, 14, 15		# H4.H * X.H
-	vpmsumd		25, 11, 16
-	vpmsumd		26, 8, 17
-	vpmsumd		27, 5, 18
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-	vxor		24, 24, 27		# H
-
-	vxor		24, 24, 29		# H + mH
-
-	# sum hash and reduction with H Poly
-	vsldoi		27, 23, 23, 8		# swap
-	vpmsumd		23, 23, 2
-	vxor		27, 27, 24
-	vxor		27, 23, 27		# 1st Xi
-
-	# 2nd 4x hash
-	vpmsumd		24, 9, 20
-	vpmsumd		25, 6, 21
-	vpmsumd		26, 3, 22
-	vxor		19, 19, 27		# Xi + X
-	vpmsumd		23, 12, 19		# H4.L * X.L
-
-	vxor		23, 23, 24
-	vxor		23, 23, 25
-	vxor		23, 23, 26		# L
-
-	vpmsumd		24, 13, 19		# H4.L * X.H + H4.H * X.L
-	vpmsumd		25, 10, 20		# H3.L * X1.H + H3.H * X1.L
-	vpmsumd		26, 7, 21
-	vpmsumd		27, 4, 22
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-
-	# sum hash and reduction with H Poly
-	vpmsumd		28, 23, 2		# reduction
-
-	vxor		29, 29, 29
-
-	vxor		24, 24, 27		# M
-	vsldoi		26, 24, 29, 8		# mL
-	vsldoi		29, 29, 24, 8		# mH
-	vxor		23, 23, 26		# mL + L
-
-	vsldoi		23, 23, 23, 8		# swap
-	vxor		23, 23, 28
-
-	vpmsumd		24, 14, 19		# H4.H * X.H
-	vpmsumd		25, 11, 20
-	vpmsumd		26, 8, 21
-	vpmsumd		27, 5, 22
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-	vxor		24, 24, 27		# H
-
-	vxor		24, 24, 29		# H + mH
-
-	# sum hash and reduction with H Poly
-	vsldoi		27, 23, 23, 8		# swap
-	vpmsumd		23, 23, 2
-	vxor		27, 27, 24
-	vxor		23, 23, 27
-
-	xxlor		32, 23+32, 23+32		# update hash
-
-.endm
-
- #
- # Compute update single hash
- #
-.macro ppc_update_hash_1x
-	vxor		28, 28, 0
-
-	vxor		19, 19, 19
-
-	vpmsumd		22, 3, 28		# L
-	vpmsumd		23, 4, 28		# M
-	vpmsumd		24, 5, 28		# H
-
-	vpmsumd		27, 22, 2		# reduction
-
-	vsldoi		25, 23, 19, 8		# mL
-	vsldoi		26, 19, 23, 8		# mH
-	vxor		22, 22, 25		# LL + LL
-	vxor		24, 24, 26		# HH + HH
-
-	vsldoi		22, 22, 22, 8		# swap
-	vxor		22, 22, 27
-
-	vsldoi		20, 22, 22, 8		# swap
-	vpmsumd		22, 22, 2		# reduction
-	vxor		20, 20, 24
-	vxor		22, 22, 20
-
-	vmr		0, 22			# update hash
-
-.endm
-
-.macro SAVE_REGS
-	stdu 1,-640(1)
-	mflr 0
-
-	std	14,112(1)
-	std	15,120(1)
-	std	16,128(1)
-	std	17,136(1)
-	std	18,144(1)
-	std	19,152(1)
-	std	20,160(1)
-	std	21,168(1)
-	li	9, 256
-	stvx	20, 9, 1
-	addi	9, 9, 16
-	stvx	21, 9, 1
-	addi	9, 9, 16
-	stvx	22, 9, 1
-	addi	9, 9, 16
-	stvx	23, 9, 1
-	addi	9, 9, 16
-	stvx	24, 9, 1
-	addi	9, 9, 16
-	stvx	25, 9, 1
-	addi	9, 9, 16
-	stvx	26, 9, 1
-	addi	9, 9, 16
-	stvx	27, 9, 1
-	addi	9, 9, 16
-	stvx	28, 9, 1
-	addi	9, 9, 16
-	stvx	29, 9, 1
-	addi	9, 9, 16
-	stvx	30, 9, 1
-	addi	9, 9, 16
-	stvx	31, 9, 1
-	stxv	14, 464(1)
-	stxv	15, 480(1)
-	stxv	16, 496(1)
-	stxv	17, 512(1)
-	stxv	18, 528(1)
-	stxv	19, 544(1)
-	stxv	20, 560(1)
-	stxv	21, 576(1)
-	stxv	22, 592(1)
-	std	0, 656(1)
-.endm
-
-.macro RESTORE_REGS
-	lxv	14, 464(1)
-	lxv	15, 480(1)
-	lxv	16, 496(1)
-	lxv	17, 512(1)
-	lxv	18, 528(1)
-	lxv	19, 544(1)
-	lxv	20, 560(1)
-	lxv	21, 576(1)
-	lxv	22, 592(1)
-	li	9, 256
-	lvx	20, 9, 1
-	addi	9, 9, 16
-	lvx	21, 9, 1
-	addi	9, 9, 16
-	lvx	22, 9, 1
-	addi	9, 9, 16
-	lvx	23, 9, 1
-	addi	9, 9, 16
-	lvx	24, 9, 1
-	addi	9, 9, 16
-	lvx	25, 9, 1
-	addi	9, 9, 16
-	lvx	26, 9, 1
-	addi	9, 9, 16
-	lvx	27, 9, 1
-	addi	9, 9, 16
-	lvx	28, 9, 1
-	addi	9, 9, 16
-	lvx	29, 9, 1
-	addi	9, 9, 16
-	lvx	30, 9, 1
-	addi	9, 9, 16
-	lvx	31, 9, 1
-
-	ld	0, 656(1)
-	ld      14,112(1)
-	ld      15,120(1)
-	ld      16,128(1)
-	ld      17,136(1)
-	ld      18,144(1)
-	ld      19,152(1)
-	ld      20,160(1)
-	ld	21,168(1)
-
-	mtlr	0
-	addi	1, 1, 640
-.endm
-
-.macro LOAD_HASH_TABLE
-	# Load Xi
-	lxvb16x	32, 0, 8	# load Xi
-
-	# load Hash - h^4, h^3, h^2, h
-	li	10, 32
-	lxvd2x	2+32, 10, 8	# H Poli
-	li	10, 48
-	lxvd2x	3+32, 10, 8	# Hl
-	li	10, 64
-	lxvd2x	4+32, 10, 8	# H
-	li	10, 80
-	lxvd2x	5+32, 10, 8	# Hh
-
-	li	10, 96
-	lxvd2x	6+32, 10, 8	# H^2l
-	li	10, 112
-	lxvd2x	7+32, 10, 8	# H^2
-	li	10, 128
-	lxvd2x	8+32, 10, 8	# H^2h
-
-	li	10, 144
-	lxvd2x	9+32, 10, 8	# H^3l
-	li	10, 160
-	lxvd2x	10+32, 10, 8	# H^3
-	li	10, 176
-	lxvd2x	11+32, 10, 8	# H^3h
-
-	li	10, 192
-	lxvd2x	12+32, 10, 8	# H^4l
-	li	10, 208
-	lxvd2x	13+32, 10, 8	# H^4
-	li	10, 224
-	lxvd2x	14+32, 10, 8	# H^4h
-.endm
-
- #
- # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
- #               const char *rk, unsigned char iv[16], void *Xip);
- #
- #    r3 - inp
- #    r4 - out
- #    r5 - len
- #    r6 - AES round keys
- #    r7 - iv and other data
- #    r8 - Xi, HPoli, hash keys
- #
- #    rounds is at offset 240 in rk
- #    Xi is at 0 in gcm_table (Xip).
- #
-.global aes_p10_gcm_encrypt
-.align 5
-aes_p10_gcm_encrypt:
-
-	SAVE_REGS
-
-	LOAD_HASH_TABLE
-
-	# initialize ICB: GHASH( IV ), IV - r7
-	lxvb16x	30+32, 0, 7	# load IV  - v30
-
-	mr	12, 5		# length
-	li	11, 0		# block index
-
-	# counter 1
-	vxor	31, 31, 31
-	vspltisb 22, 1
-	vsldoi	31, 31, 22,1	# counter 1
-
-	# load round key to VSR
-	lxv	0, 0(6)
-	lxv	1, 0x10(6)
-	lxv	2, 0x20(6)
-	lxv	3, 0x30(6)
-	lxv	4, 0x40(6)
-	lxv	5, 0x50(6)
-	lxv	6, 0x60(6)
-	lxv	7, 0x70(6)
-	lxv	8, 0x80(6)
-	lxv	9, 0x90(6)
-	lxv	10, 0xa0(6)
-
-	# load rounds - 10 (128), 12 (192), 14 (256)
-	lwz	9,240(6)
-
-	#
-	# vxor	state, state, w # addroundkey
-	xxlor	32+29, 0, 0
-	vxor	15, 30, 29	# IV + round key - add round key 0
-
-	cmpdi	9, 10
-	beq	Loop_aes_gcm_8x
-
-	# load 2 more round keys (v11, v12)
-	lxv	11, 0xb0(6)
-	lxv	12, 0xc0(6)
-
-	cmpdi	9, 12
-	beq	Loop_aes_gcm_8x
-
-	# load 2 more round keys (v11, v12, v13, v14)
-	lxv	13, 0xd0(6)
-	lxv	14, 0xe0(6)
-	cmpdi	9, 14
-	beq	Loop_aes_gcm_8x
-
-	b	aes_gcm_out
-
-.align 5
-Loop_aes_gcm_8x:
-	mr	14, 3
-	mr	9, 4
-
-	#
-	# check partial block
-	#
-Continue_partial_check:
-	ld	15, 56(7)
-	cmpdi	15, 0
-	beq	Continue
-	bgt	Final_block
-	cmpdi	15, 16
-	blt	Final_block
-
-Continue:
-	# n blcoks
-	li	10, 128
-	divdu	10, 12, 10	# n 128 bytes-blocks
-	cmpdi	10, 0
-	beq	Loop_last_block
-
-	vaddudm	30, 30, 31	# IV + counter
-	vxor	16, 30, 29
-	vaddudm	30, 30, 31
-	vxor	17, 30, 29
-	vaddudm	30, 30, 31
-	vxor	18, 30, 29
-	vaddudm	30, 30, 31
-	vxor	19, 30, 29
-	vaddudm	30, 30, 31
-	vxor	20, 30, 29
-	vaddudm	30, 30, 31
-	vxor	21, 30, 29
-	vaddudm	30, 30, 31
-	vxor	22, 30, 29
-
-	mtctr	10
-
-	li	15, 16
-	li	16, 32
-	li	17, 48
-	li	18, 64
-	li	19, 80
-	li	20, 96
-	li	21, 112
-
-	lwz	10, 240(6)
-
-Loop_8x_block:
-
-	lxvb16x		15, 0, 14	# load block
-	lxvb16x		16, 15, 14	# load block
-	lxvb16x		17, 16, 14	# load block
-	lxvb16x		18, 17, 14	# load block
-	lxvb16x		19, 18, 14	# load block
-	lxvb16x		20, 19, 14	# load block
-	lxvb16x		21, 20, 14	# load block
-	lxvb16x		22, 21, 14	# load block
-	addi		14, 14, 128
-
-	Loop_aes_middle8x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_next_ghash
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	xxlor	23+32, 12, 12
-
-	cmpdi	10, 12
-	beq	Do_next_ghash
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	xxlor	23+32, 14, 14
-
-	cmpdi	10, 14
-	beq	Do_next_ghash
-	b	aes_gcm_out
-
-Do_next_ghash:
-
-	#
-	# last round
-	vcipherlast     15, 15, 23
-	vcipherlast     16, 16, 23
-
-	xxlxor		47, 47, 15
-	stxvb16x        47, 0, 9	# store output
-	xxlxor		48, 48, 16
-	stxvb16x        48, 15, 9	# store output
-
-	vcipherlast     17, 17, 23
-	vcipherlast     18, 18, 23
-
-	xxlxor		49, 49, 17
-	stxvb16x        49, 16, 9	# store output
-	xxlxor		50, 50, 18
-	stxvb16x        50, 17, 9	# store output
-
-	vcipherlast     19, 19, 23
-	vcipherlast     20, 20, 23
-
-	xxlxor		51, 51, 19
-	stxvb16x        51, 18, 9	# store output
-	xxlxor		52, 52, 20
-	stxvb16x        52, 19, 9	# store output
-
-	vcipherlast     21, 21, 23
-	vcipherlast     22, 22, 23
-
-	xxlxor		53, 53, 21
-	stxvb16x        53, 20, 9	# store output
-	xxlxor		54, 54, 22
-	stxvb16x        54, 21, 9	# store output
-
-	addi		9, 9, 128
-
-	# ghash here
-	ppc_aes_gcm_ghash2_4x
-
-	xxlor	27+32, 0, 0
-	vaddudm 30, 30, 31		# IV + counter
-	vmr	29, 30
-	vxor    15, 30, 27		# add round key
-	vaddudm 30, 30, 31
-	vxor    16, 30, 27
-	vaddudm 30, 30, 31
-	vxor    17, 30, 27
-	vaddudm 30, 30, 31
-	vxor    18, 30, 27
-	vaddudm 30, 30, 31
-	vxor    19, 30, 27
-	vaddudm 30, 30, 31
-	vxor    20, 30, 27
-	vaddudm 30, 30, 31
-	vxor    21, 30, 27
-	vaddudm 30, 30, 31
-	vxor    22, 30, 27
-
-	addi    12, 12, -128
-	addi    11, 11, 128
-
-	bdnz	Loop_8x_block
-
-	vmr	30, 29
-	stxvb16x 30+32, 0, 7		# update IV
-
-Loop_last_block:
-	cmpdi   12, 0
-	beq     aes_gcm_out
-
-	# loop last few blocks
-	li      10, 16
-	divdu   10, 12, 10
-
-	mtctr   10
-
-	lwz	10, 240(6)
-
-	cmpdi   12, 16
-	blt     Final_block
-
-Next_rem_block:
-	lxvb16x 15, 0, 14		# load block
-
-	Loop_aes_middle_1x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_next_1x
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 12, 12
-
-	cmpdi	10, 12
-	beq	Do_next_1x
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 14, 14
-
-	cmpdi	10, 14
-	beq	Do_next_1x
-
-Do_next_1x:
-	vcipherlast     15, 15, 23
-
-	xxlxor		47, 47, 15
-	stxvb16x	47, 0, 9	# store output
-	addi		14, 14, 16
-	addi		9, 9, 16
-
-	vmr		28, 15
-	ppc_update_hash_1x
-
-	addi		12, 12, -16
-	addi		11, 11, 16
-	xxlor		19+32, 0, 0
-	vaddudm		30, 30, 31		# IV + counter
-	vxor		15, 30, 19		# add round key
-
-	bdnz	Next_rem_block
-
-	li	15, 0
-	std	15, 56(7)		# clear partial?
-	stxvb16x 30+32, 0, 7		# update IV
-	cmpdi	12, 0
-	beq	aes_gcm_out
-
-Final_block:
-	lwz	10, 240(6)
-	Loop_aes_middle_1x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_final_1x
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 12, 12
-
-	cmpdi	10, 12
-	beq	Do_final_1x
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 14, 14
-
-	cmpdi	10, 14
-	beq	Do_final_1x
-
-Do_final_1x:
-	vcipherlast     15, 15, 23
-
-	# check partial block
-	li	21, 0			# encrypt
-	ld	15, 56(7)		# partial?
-	cmpdi	15, 0
-	beq	Normal_block
-	bl	Do_partial_block
-
-	cmpdi	12, 0
-	ble aes_gcm_out
-
-	b Continue_partial_check
-
-Normal_block:
-	lxvb16x	15, 0, 14		# load last block
-	xxlxor	47, 47, 15
-
-	# create partial block mask
-	li	15, 16
-	sub	15, 15, 12		# index to the mask
-
-	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
-	vspltisb	17, 0		# second 16 bytes - 0x0000...00
-	li	10, 192
-	stvx	16, 10, 1
-	addi	10, 10, 16
-	stvx	17, 10, 1
-
-	addi	10, 1, 192
-	lxvb16x	16, 15, 10		# load partial block mask
-	xxland	47, 47, 16
-
-	vmr	28, 15
-	ppc_update_hash_1x
-
-	# * should store only the remaining bytes.
-	bl	Write_partial_block
-
-	stxvb16x 30+32, 0, 7		# update IV
-	std	12, 56(7)		# update partial?
-	li	16, 16
-
-	stxvb16x	32, 0, 8		# write out Xi
-	stxvb16x	32, 16, 8		# write out Xi
-	b aes_gcm_out
-
- #
- # Compute data mask
- #
-.macro GEN_MASK _mask _start _end
-	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
-	vspltisb	17, 0		# second 16 bytes - 0x0000...00
-	li	10, 192
-	stxvb16x	17+32, 10, 1
-	add	10, 10, \_start
-	stxvb16x	16+32, 10, 1
-	add	10, 10, \_end
-	stxvb16x	17+32, 10, 1
-
-	addi	10, 1, 192
-	lxvb16x	\_mask, 0, 10		# load partial block mask
-.endm
-
- #
- # Handle multiple partial blocks for encrypt and decrypt
- #   operations.
- #
-Do_partial_block:
-	add	17, 15, 5
-	cmpdi	17, 16
-	bgt	Big_block
-	GEN_MASK 18, 15, 5
-	b	_Partial
-Big_block:
-	li	16, 16
-	GEN_MASK 18, 15, 16
-
-_Partial:
-	lxvb16x	17+32, 0, 14		# load last block
-	sldi	16, 15, 3
-	mtvsrdd	32+16, 0, 16
-	vsro	17, 17, 16
-	xxlxor	47, 47, 17+32
-	xxland	47, 47, 18
-
-	vxor	0, 0, 0			# clear Xi
-	vmr	28, 15
-
-	cmpdi	21, 0			# encrypt/decrypt ops?
-	beq	Skip_decrypt
-	xxland	32+28, 32+17, 18
-
-Skip_decrypt:
-
-	ppc_update_hash_1x
-
-	li	16, 16
-	lxvb16x 32+29, 16, 8
-	vxor	0, 0, 29
-	stxvb16x 32, 0, 8		# save Xi
-	stxvb16x 32, 16, 8		# save Xi
-
-	# store partial block
-	# loop the rest of the stream if any
-	sldi	16, 15, 3
-	mtvsrdd	32+16, 0, 16
-	vslo	15, 15, 16
-	#stxvb16x 15+32, 0, 9		# last block
-
-	li	16, 16
-	sub	17, 16, 15		# 16 - partial
-
-	add	16, 15, 5
-	cmpdi	16, 16
-	bgt	Larger_16
-	mr	17, 5
-Larger_16:
-
-	# write partial
-	li		10, 192
-	stxvb16x	15+32, 10, 1	# save current block
-
-	addi		10, 9, -1
-	addi		16, 1, 191
-	mtctr		17		# move partial byte count
-
-Write_last_partial:
-        lbzu		18, 1(16)
-	stbu		18, 1(10)
-        bdnz		Write_last_partial
-	# Complete loop partial
-
-	add	14, 14, 17
-	add	9, 9, 17
-	sub	12, 12, 17
-	add	11, 11, 17
-
-	add	15, 15, 5
-	cmpdi	15, 16
-	blt	Save_partial
-
-	vaddudm	30, 30, 31
-	stxvb16x 30+32, 0, 7		# update IV
-	xxlor	32+29, 0, 0
-	vxor	15, 30, 29		# IV + round key - add round key 0
-	li	15, 0
-	std	15, 56(7)		# partial done - clear
-	b	Partial_done
-Save_partial:
-	std	15, 56(7)		# partial
-
-Partial_done:
-	blr
-
- #
- # Write partial block
- # r9 - output
- # r12 - remaining bytes
- # v15 - partial input data
- #
-Write_partial_block:
-	li		10, 192
-	stxvb16x	15+32, 10, 1		# last block
-
-	addi		10, 9, -1
-	addi		16, 1, 191
-
-        mtctr		12			# remaining bytes
-	li		15, 0
-
-Write_last_byte:
-        lbzu		14, 1(16)
-	stbu		14, 1(10)
-        bdnz		Write_last_byte
-	blr
-
-aes_gcm_out:
-	# out = state
-	stxvb16x	32, 0, 8		# write out Xi
-	add	3, 11, 12		# return count
-
-	RESTORE_REGS
-	blr
-
- #
- # 8x Decrypt
- #
-.global aes_p10_gcm_decrypt
-.align 5
-aes_p10_gcm_decrypt:
-
-	SAVE_REGS
-
-	LOAD_HASH_TABLE
-
-	# initialize ICB: GHASH( IV ), IV - r7
-	lxvb16x	30+32, 0, 7	# load IV  - v30
-
-	mr	12, 5		# length
-	li	11, 0		# block index
-
-	# counter 1
-	vxor	31, 31, 31
-	vspltisb 22, 1
-	vsldoi	31, 31, 22,1	# counter 1
-
-	# load round key to VSR
-	lxv	0, 0(6)
-	lxv	1, 0x10(6)
-	lxv	2, 0x20(6)
-	lxv	3, 0x30(6)
-	lxv	4, 0x40(6)
-	lxv	5, 0x50(6)
-	lxv	6, 0x60(6)
-	lxv	7, 0x70(6)
-	lxv	8, 0x80(6)
-	lxv	9, 0x90(6)
-	lxv	10, 0xa0(6)
-
-	# load rounds - 10 (128), 12 (192), 14 (256)
-	lwz	9,240(6)
-
-	#
-	# vxor	state, state, w # addroundkey
-	xxlor	32+29, 0, 0
-	vxor	15, 30, 29	# IV + round key - add round key 0
-
-	cmpdi	9, 10
-	beq	Loop_aes_gcm_8x_dec
-
-	# load 2 more round keys (v11, v12)
-	lxv	11, 0xb0(6)
-	lxv	12, 0xc0(6)
-
-	cmpdi	9, 12
-	beq	Loop_aes_gcm_8x_dec
-
-	# load 2 more round keys (v11, v12, v13, v14)
-	lxv	13, 0xd0(6)
-	lxv	14, 0xe0(6)
-	cmpdi	9, 14
-	beq	Loop_aes_gcm_8x_dec
-
-	b	aes_gcm_out
-
-.align 5
-Loop_aes_gcm_8x_dec:
-	mr	14, 3
-	mr	9, 4
-
-	#
-	# check partial block
-	#
-Continue_partial_check_dec:
-	ld	15, 56(7)
-	cmpdi	15, 0
-	beq	Continue_dec
-	bgt	Final_block_dec
-	cmpdi	15, 16
-	blt	Final_block_dec
-
-Continue_dec:
-	# n blcoks
-	li	10, 128
-	divdu	10, 12, 10	# n 128 bytes-blocks
-	cmpdi	10, 0
-	beq	Loop_last_block_dec
-
-	vaddudm	30, 30, 31	# IV + counter
-	vxor	16, 30, 29
-	vaddudm	30, 30, 31
-	vxor	17, 30, 29
-	vaddudm	30, 30, 31
-	vxor	18, 30, 29
-	vaddudm	30, 30, 31
-	vxor	19, 30, 29
-	vaddudm	30, 30, 31
-	vxor	20, 30, 29
-	vaddudm	30, 30, 31
-	vxor	21, 30, 29
-	vaddudm	30, 30, 31
-	vxor	22, 30, 29
-
-	mtctr	10
-
-	li	15, 16
-	li	16, 32
-	li	17, 48
-	li	18, 64
-	li	19, 80
-	li	20, 96
-	li	21, 112
-
-	lwz	10, 240(6)
-
-Loop_8x_block_dec:
-
-	lxvb16x		15, 0, 14	# load block
-	lxvb16x		16, 15, 14	# load block
-	lxvb16x		17, 16, 14	# load block
-	lxvb16x		18, 17, 14	# load block
-	lxvb16x		19, 18, 14	# load block
-	lxvb16x		20, 19, 14	# load block
-	lxvb16x		21, 20, 14	# load block
-	lxvb16x		22, 21, 14	# load block
-	addi		14, 14, 128
-
-	Loop_aes_middle8x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_next_ghash_dec
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	xxlor	23+32, 12, 12
-
-	cmpdi	10, 12
-	beq	Do_next_ghash_dec
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	xxlor	23+32, 14, 14
-
-	cmpdi	10, 14
-	beq	Do_next_ghash_dec
-	b	aes_gcm_out
-
-Do_next_ghash_dec:
-
-	#
-	# last round
-	vcipherlast     15, 15, 23
-	vcipherlast     16, 16, 23
-
-	xxlxor		47, 47, 15
-	stxvb16x        47, 0, 9	# store output
-	xxlxor		48, 48, 16
-	stxvb16x        48, 15, 9	# store output
-
-	vcipherlast     17, 17, 23
-	vcipherlast     18, 18, 23
-
-	xxlxor		49, 49, 17
-	stxvb16x        49, 16, 9	# store output
-	xxlxor		50, 50, 18
-	stxvb16x        50, 17, 9	# store output
-
-	vcipherlast     19, 19, 23
-	vcipherlast     20, 20, 23
-
-	xxlxor		51, 51, 19
-	stxvb16x        51, 18, 9	# store output
-	xxlxor		52, 52, 20
-	stxvb16x        52, 19, 9	# store output
-
-	vcipherlast     21, 21, 23
-	vcipherlast     22, 22, 23
-
-	xxlxor		53, 53, 21
-	stxvb16x        53, 20, 9	# store output
-	xxlxor		54, 54, 22
-	stxvb16x        54, 21, 9	# store output
-
-	addi		9, 9, 128
-
-	xxlor           15+32, 15, 15
-	xxlor           16+32, 16, 16
-	xxlor           17+32, 17, 17
-	xxlor           18+32, 18, 18
-	xxlor           19+32, 19, 19
-	xxlor           20+32, 20, 20
-	xxlor           21+32, 21, 21
-	xxlor           22+32, 22, 22
-
-	# ghash here
-	ppc_aes_gcm_ghash2_4x
-
-	xxlor	27+32, 0, 0
-	vaddudm 30, 30, 31		# IV + counter
-	vmr	29, 30
-	vxor    15, 30, 27		# add round key
-	vaddudm 30, 30, 31
-	vxor    16, 30, 27
-	vaddudm 30, 30, 31
-	vxor    17, 30, 27
-	vaddudm 30, 30, 31
-	vxor    18, 30, 27
-	vaddudm 30, 30, 31
-	vxor    19, 30, 27
-	vaddudm 30, 30, 31
-	vxor    20, 30, 27
-	vaddudm 30, 30, 31
-	vxor    21, 30, 27
-	vaddudm 30, 30, 31
-	vxor    22, 30, 27
-
-	addi    12, 12, -128
-	addi    11, 11, 128
-
-	bdnz	Loop_8x_block_dec
-
-	vmr	30, 29
-	stxvb16x 30+32, 0, 7		# update IV
-
-Loop_last_block_dec:
-	cmpdi   12, 0
-	beq     aes_gcm_out
-
-	# loop last few blocks
-	li      10, 16
-	divdu   10, 12, 10
-
-	mtctr   10
-
-	lwz	10, 240(6)
-
-	cmpdi   12, 16
-	blt     Final_block_dec
-
-Next_rem_block_dec:
-	lxvb16x 15, 0, 14		# load block
-
-	Loop_aes_middle_1x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_next_1x_dec
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 12, 12
-
-	cmpdi	10, 12
-	beq	Do_next_1x_dec
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 14, 14
-
-	cmpdi	10, 14
-	beq	Do_next_1x_dec
-
-Do_next_1x_dec:
-	vcipherlast     15, 15, 23
-
-	xxlxor		47, 47, 15
-	stxvb16x	47, 0, 9	# store output
-	addi		14, 14, 16
-	addi		9, 9, 16
-
-	xxlor           28+32, 15, 15
-	#vmr		28, 15
-	ppc_update_hash_1x
-
-	addi		12, 12, -16
-	addi		11, 11, 16
-	xxlor		19+32, 0, 0
-	vaddudm		30, 30, 31		# IV + counter
-	vxor		15, 30, 19		# add round key
-
-	bdnz	Next_rem_block_dec
-
-	li	15, 0
-	std	15, 56(7)		# clear partial?
-	stxvb16x 30+32, 0, 7		# update IV
-	cmpdi	12, 0
-	beq	aes_gcm_out
-
-Final_block_dec:
-	lwz	10, 240(6)
-	Loop_aes_middle_1x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_final_1x_dec
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 12, 12
-
-	cmpdi	10, 12
-	beq	Do_final_1x_dec
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 14, 14
-
-	cmpdi	10, 14
-	beq	Do_final_1x_dec
-
-Do_final_1x_dec:
-	vcipherlast     15, 15, 23
-
-	# check partial block
-	li	21, 1			# decrypt
-	ld	15, 56(7)		# partial?
-	cmpdi	15, 0
-	beq	Normal_block_dec
-	bl	Do_partial_block
-	cmpdi	12, 0
-	ble aes_gcm_out
-
-	b Continue_partial_check_dec
-
-Normal_block_dec:
-	lxvb16x	15, 0, 14		# load last block
-	xxlxor	47, 47, 15
-
-	# create partial block mask
-	li	15, 16
-	sub	15, 15, 12		# index to the mask
-
-	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
-	vspltisb	17, 0		# second 16 bytes - 0x0000...00
-	li	10, 192
-	stvx	16, 10, 1
-	addi	10, 10, 16
-	stvx	17, 10, 1
-
-	addi	10, 1, 192
-	lxvb16x	16, 15, 10		# load partial block mask
-	xxland	47, 47, 16
-
-	xxland	32+28, 15, 16
-	#vmr	28, 15
-	ppc_update_hash_1x
-
-	# * should store only the remaining bytes.
-	bl	Write_partial_block
-
-	stxvb16x 30+32, 0, 7		# update IV
-	std	12, 56(7)		# update partial?
-	li	16, 16
-
-	stxvb16x	32, 0, 8		# write out Xi
-	stxvb16x	32, 16, 8		# write out Xi
-	b aes_gcm_out
--- a/arch/powerpc/crypto/ppc-xlate.pl
+++ b/arch/powerpc/crypto/ppc-xlate.pl
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# PowerPC assembler distiller by <appro>.
-
-my $flavour = shift;
-my $output = shift;
-open STDOUT,">$output" || die "can't open $output: $!";
-
-my %GLOBALS;
-my $dotinlocallabels=($flavour=~/linux/)?1:0;
-
-################################################################
-# directives which need special treatment on different platforms
-################################################################
-my $globl = sub {
-    my $junk = shift;
-    my $name = shift;
-    my $global = \$GLOBALS{$name};
-    my $ret;
-
-    $name =~ s|^[\.\_]||;
-
-    SWITCH: for ($flavour) {
-	/aix/		&& do { $name = ".$name";
-				last;
-			      };
-	/osx/		&& do { $name = "_$name";
-				last;
-			      };
-	/linux/
-			&& do {	$ret = "_GLOBAL($name)";
-				last;
-			      };
-    }
-
-    $ret = ".globl	$name\nalign 5\n$name:" if (!$ret);
-    $$global = $name;
-    $ret;
-};
-my $text = sub {
-    my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
-    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
-    $ret;
-};
-my $machine = sub {
-    my $junk = shift;
-    my $arch = shift;
-    if ($flavour =~ /osx/)
-    {	$arch =~ s/\"//g;
-	$arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
-    }
-    ".machine	$arch";
-};
-my $size = sub {
-    if ($flavour =~ /linux/)
-    {	shift;
-	my $name = shift; $name =~ s|^[\.\_]||;
-	my $ret  = ".size	$name,.-".($flavour=~/64$/?".":"").$name;
-	$ret .= "\n.size	.$name,.-.$name" if ($flavour=~/64$/);
-	$ret;
-    }
-    else
-    {	"";	}
-};
-my $asciz = sub {
-    shift;
-    my $line = join(",",@_);
-    if ($line =~ /^"(.*)"$/)
-    {	".byte	" . join(",",unpack("C*",$1),0) . "\n.align	2";	}
-    else
-    {	"";	}
-};
-my $quad = sub {
-    shift;
-    my @ret;
-    my ($hi,$lo);
-    for (@_) {
-	if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
-	{  $hi=$1?"0x$1":"0"; $lo="0x$2";  }
-	elsif (/^([0-9]+)$/o)
-	{  $hi=$1>>32; $lo=$1&0xffffffff;  } # error-prone with 32-bit perl
-	else
-	{  $hi=undef; $lo=$_; }
-
-	if (defined($hi))
-	{  push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo");  }
-	else
-	{  push(@ret,".quad	$lo");  }
-    }
-    join("\n",@ret);
-};
-
-################################################################
-# simplified mnemonics not handled by at least one assembler
-################################################################
-my $cmplw = sub {
-    my $f = shift;
-    my $cr = 0; $cr = shift if ($#_>1);
-    # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
-    ($flavour =~ /linux.*32/) ?
-	"	.long	".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
-	"	cmplw	".join(',',$cr,@_);
-};
-my $bdnz = sub {
-    my $f = shift;
-    my $bo = $f=~/[\+\-]/ ? 16+9 : 16;	# optional "to be taken" hint
-    "	bc	$bo,0,".shift;
-} if ($flavour!~/linux/);
-my $bltlr = sub {
-    my $f = shift;
-    my $bo = $f=~/\-/ ? 12+2 : 12;	# optional "not to be taken" hint
-    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
-	"	.long	".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
-	"	bclr	$bo,0";
-};
-my $bnelr = sub {
-    my $f = shift;
-    my $bo = $f=~/\-/ ? 4+2 : 4;	# optional "not to be taken" hint
-    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
-	"	.long	".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
-	"	bclr	$bo,2";
-};
-my $beqlr = sub {
-    my $f = shift;
-    my $bo = $f=~/-/ ? 12+2 : 12;	# optional "not to be taken" hint
-    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
-	"	.long	".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
-	"	bclr	$bo,2";
-};
-# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
-# arguments is 64, with "operand out of range" error.
-my $extrdi = sub {
-    my ($f,$ra,$rs,$n,$b) = @_;
-    $b = ($b+$n)&63; $n = 64-$n;
-    "	rldicl	$ra,$rs,$b,$n";
-};
-my $vmr = sub {
-    my ($f,$vx,$vy) = @_;
-    "	vor	$vx,$vy,$vy";
-};
-
-# Some ABIs specify vrsave, special-purpose register #256, as reserved
-# for system use.
-my $no_vrsave = ($flavour =~ /linux-ppc64le/);
-my $mtspr = sub {
-    my ($f,$idx,$ra) = @_;
-    if ($idx == 256 && $no_vrsave) {
-	"	or	$ra,$ra,$ra";
-    } else {
-	"	mtspr	$idx,$ra";
-    }
-};
-my $mfspr = sub {
-    my ($f,$rd,$idx) = @_;
-    if ($idx == 256 && $no_vrsave) {
-	"	li	$rd,-1";
-    } else {
-	"	mfspr	$rd,$idx";
-    }
-};
-
-# PowerISA 2.06 stuff
-sub vsxmem_op {
-    my ($f, $vrt, $ra, $rb, $op) = @_;
-    "	.long	".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
-}
-# made-up unaligned memory reference AltiVec/VMX instructions
-my $lvx_u	= sub {	vsxmem_op(@_, 844); };	# lxvd2x
-my $stvx_u	= sub {	vsxmem_op(@_, 972); };	# stxvd2x
-my $lvdx_u	= sub {	vsxmem_op(@_, 588); };	# lxsdx
-my $stvdx_u	= sub {	vsxmem_op(@_, 716); };	# stxsdx
-my $lvx_4w	= sub { vsxmem_op(@_, 780); };	# lxvw4x
-my $stvx_4w	= sub { vsxmem_op(@_, 908); };	# stxvw4x
-
-# PowerISA 2.07 stuff
-sub vcrypto_op {
-    my ($f, $vrt, $vra, $vrb, $op) = @_;
-    "	.long	".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
-}
-my $vcipher	= sub { vcrypto_op(@_, 1288); };
-my $vcipherlast	= sub { vcrypto_op(@_, 1289); };
-my $vncipher	= sub { vcrypto_op(@_, 1352); };
-my $vncipherlast= sub { vcrypto_op(@_, 1353); };
-my $vsbox	= sub { vcrypto_op(@_, 0, 1480); };
-my $vshasigmad	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
-my $vshasigmaw	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
-my $vpmsumb	= sub { vcrypto_op(@_, 1032); };
-my $vpmsumd	= sub { vcrypto_op(@_, 1224); };
-my $vpmsubh	= sub { vcrypto_op(@_, 1096); };
-my $vpmsumw	= sub { vcrypto_op(@_, 1160); };
-my $vaddudm	= sub { vcrypto_op(@_, 192);  };
-my $vadduqm	= sub { vcrypto_op(@_, 256);  };
-
-my $mtsle	= sub {
-    my ($f, $arg) = @_;
-    "	.long	".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
-};
-
-print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
-
-while($line=<>) {
-
-    $line =~ s|[#!;].*$||;	# get rid of asm-style comments...
-    $line =~ s|/\*.*\*/||;	# ... and C-style comments...
-    $line =~ s|^\s+||;		# ... and skip white spaces in beginning...
-    $line =~ s|\s+$||;		# ... and at the end
-
-    {
-	$line =~ s|\b\.L(\w+)|L$1|g;	# common denominator for Locallabel
-	$line =~ s|\bL(\w+)|\.L$1|g	if ($dotinlocallabels);
-    }
-
-    {
-	$line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
-	my $c = $1; $c = "\t" if ($c eq "");
-	my $mnemonic = $2;
-	my $f = $3;
-	my $opcode = eval("\$$mnemonic");
-	$line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
-	if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
-	elsif ($mnemonic)           { $line = $c.$mnemonic.$f."\t".$line; }
-    }
-
-    print $line if ($line);
-    print "\n";
-}
-
-close STDOUT;