Commit d5d0c35f authored by Jan Lindström's avatar Jan Lindström

Merge pull request #160 from grooverdan/crc32_power_abi_fix

MDEV-9699: power8 crc32: Per the PPC64 ABI, v20-v31 are non-volatile registers
parents 46089d76 5ea894a7
......@@ -67,14 +67,13 @@
#define off96 r30
#define off112 r31
#define const1 v25
#define const2 v26
#define const1 v24
#define const2 v25
#define byteswap v27
#define mask_32bit v28
#define mask_64bit v29
#define zeroes v30
#define ones v31
#define byteswap v26
#define mask_32bit v27
#define mask_64bit v28
#define zeroes v29
#ifdef BYTESWAP_DATA
#define VPERM(A, B, C, D) vperm A, B, C, D
......@@ -101,13 +100,28 @@ FUNC_START(__crc32_vpmsum)
li off112,112
li r0,0
/* Enough room for saving 10 non volatile VMX registers */
subi r6,r1,56+10*16
subi r7,r1,56+2*16
stvx v20,0,r6
stvx v21,off16,r6
stvx v22,off32,r6
stvx v23,off48,r6
stvx v24,off64,r6
stvx v25,off80,r6
stvx v26,off96,r6
stvx v27,off112,r6
stvx v28,0,r7
stvx v29,off16,r7
mr r10,r3
vxor zeroes,zeroes,zeroes
vspltisw ones,-1
vspltisw v0,-1
vsldoi mask_32bit,zeroes,ones,4
vsldoi mask_64bit,zeroes,ones,8
vsldoi mask_32bit,zeroes,v0,4
vsldoi mask_64bit,zeroes,v0,8
/* Get the initial value into v8 */
vxor v8,v8,v8
......@@ -570,6 +584,21 @@ FUNC_START(__crc32_vpmsum)
vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
#endif
.Lout:
subi r6,r1,56+10*16
subi r7,r1,56+2*16
lvx v20,0,r6
lvx v21,off16,r6
lvx v22,off32,r6
lvx v23,off48,r6
lvx v24,off64,r6
lvx v25,off80,r6
lvx v26,off96,r6
lvx v27,off112,r6
lvx v28,0,r7
lvx v29,off16,r7
/* Get it into r3 */
MFVRD(r3, v0)
......@@ -739,6 +768,8 @@ FUNC_START(__crc32_vpmsum)
.Lzero:
mr r3,r10
blr
b .Lout
FUNC_END(__crc32_vpmsum)
#endif /* __powerpc__ */
......@@ -67,14 +67,13 @@
#define off96 r30
#define off112 r31
#define const1 v25
#define const2 v26
#define const1 v24
#define const2 v25
#define byteswap v27
#define mask_32bit v28
#define mask_64bit v29
#define zeroes v30
#define ones v31
#define byteswap v26
#define mask_32bit v27
#define mask_64bit v28
#define zeroes v29
#ifdef BYTESWAP_DATA
#define VPERM(A, B, C, D) vperm A, B, C, D
......@@ -101,13 +100,28 @@ FUNC_START(__crc32_vpmsum)
li off112,112
li r0,0
/* Enough room for saving 10 non volatile VMX registers */
subi r6,r1,56+10*16
subi r7,r1,56+2*16
stvx v20,0,r6
stvx v21,off16,r6
stvx v22,off32,r6
stvx v23,off48,r6
stvx v24,off64,r6
stvx v25,off80,r6
stvx v26,off96,r6
stvx v27,off112,r6
stvx v28,0,r7
stvx v29,off16,r7
mr r10,r3
vxor zeroes,zeroes,zeroes
vspltisw ones,-1
vspltisw v0,-1
vsldoi mask_32bit,zeroes,ones,4
vsldoi mask_64bit,zeroes,ones,8
vsldoi mask_32bit,zeroes,v0,4
vsldoi mask_64bit,zeroes,v0,8
/* Get the initial value into v8 */
vxor v8,v8,v8
......@@ -570,6 +584,21 @@ FUNC_START(__crc32_vpmsum)
vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
#endif
.Lout:
subi r6,r1,56+10*16
subi r7,r1,56+2*16
lvx v20,0,r6
lvx v21,off16,r6
lvx v22,off32,r6
lvx v23,off48,r6
lvx v24,off64,r6
lvx v25,off80,r6
lvx v26,off96,r6
lvx v27,off112,r6
lvx v28,0,r7
lvx v29,off16,r7
/* Get it into r3 */
MFVRD(r3, v0)
......@@ -739,6 +768,8 @@ FUNC_START(__crc32_vpmsum)
.Lzero:
mr r3,r10
blr
b .Lout
FUNC_END(__crc32_vpmsum)
#endif /* __powerpc__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment