Commit 59f87247 authored by Paul Mackerras's avatar Paul Mackerras Committed by Linus Torvalds

[PATCH] Better memset

Anton noticed in some traces that we were spending an awfully long time
doing a memset.  The ppc64 memset is basically unchanged from the ppc32
version, and it only does 4-byte stores and doesn't unroll the loop. 
Here's a memset that performs a bit better.

I have been using it for 3 weeks now, and Anton has tested it on a
variety of machines, without problems. 
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent fe3a3a65
...@@ -66,28 +66,69 @@ _GLOBAL(strlen) ...@@ -66,28 +66,69 @@ _GLOBAL(strlen)
blr blr
_GLOBAL(memset) _GLOBAL(memset)
neg r0,r5
rlwimi r4,r4,8,16,23 rlwimi r4,r4,8,16,23
andi. r0,r0,7 /* # bytes to be 8-byte aligned */
rlwimi r4,r4,16,0,15 rlwimi r4,r4,16,0,15
addi r6,r3,-4 cmplw cr1,r5,r0 /* do we get that far? */
cmplwi 0,r5,4 rldimi r4,r4,32,0
blt 7f mr r6,r3
stwu r4,4(r6) mtcrf 1,r0
beqlr mr r6,r3
andi. r0,r6,3 blt cr1,8f
add r5,r0,r5 beq+ 3f /* if already 8-byte aligned */
subf r6,r0,r6 subf r5,r0,r5
srwi r0,r5,2 bf 31,1f
stb r4,0(r6)
addi r6,r6,1
1: bf 30,2f
sth r4,0(r6)
addi r6,r6,2
2: bf 29,3f
stw r4,0(r6)
addi r6,r6,4
3: srdi. r0,r5,6
clrldi r5,r5,58
mtctr r0 mtctr r0
bdz 6f beq 5f
1: stwu r4,4(r6) 4: std r4,0(r6)
bdnz 1b std r4,8(r6)
6: andi. r5,r5,3 std r4,16(r6)
7: cmpwi 0,r5,0 std r4,24(r6)
beqlr std r4,32(r6)
mtctr r5 std r4,40(r6)
addi r6,r6,3 std r4,48(r6)
8: stbu r4,1(r6) std r4,56(r6)
bdnz 8b addi r6,r6,64
bdnz 4b
5: srwi. r0,r5,3
clrlwi r5,r5,29
mtcrf 1,r0
beq 8f
bf 29,6f
std r4,0(r6)
std r4,8(r6)
std r4,16(r6)
std r4,24(r6)
addi r6,r6,32
6: bf 30,7f
std r4,0(r6)
std r4,8(r6)
addi r6,r6,16
7: bf 31,8f
std r4,0(r6)
addi r6,r6,8
8: cmpwi r5,0
mtcrf 1,r5
beqlr+
bf 29,9f
stw r4,0(r6)
addi r6,r6,4
9: bf 30,10f
sth r4,0(r6)
addi r6,r6,2
10: bflr 31
stb r4,0(r6)
blr blr
_GLOBAL(memmove) _GLOBAL(memmove)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment