Commit 12cff028 authored by David S. Miller's avatar David S. Miller

[SPARC64]: Use saner local label names in Ultra3 copies.

This makes the kernel profiles look much more
meaningful.
Signed-off-by: default avatarDavid S. Miller <davem@redhat.com>
parent 5feed8ed
...@@ -140,24 +140,24 @@ ...@@ -140,24 +140,24 @@
.globl U3copy_from_user .globl U3copy_from_user
U3copy_from_user: /* %o0=dst, %o1=src, %o2=len */ U3copy_from_user: /* %o0=dst, %o1=src, %o2=len */
cmp %o2, 0 cmp %o2, 0
be,pn %XCC, out be,pn %XCC, 85f
or %o0, %o1, %o3 or %o0, %o1, %o3
cmp %o2, 16 cmp %o2, 16
bleu,a,pn %XCC, small_copy bleu,a,pn %XCC, 80f
or %o3, %o2, %o3 or %o3, %o2, %o3
cmp %o2, 256 cmp %o2, 256
blu,pt %XCC, medium_copy blu,pt %XCC, 70f
andcc %o3, 0x7, %g0 andcc %o3, 0x7, %g0
ba,pt %xcc, enter ba,pt %xcc, 1f
andcc %o0, 0x3f, %g2 andcc %o0, 0x3f, %g2
/* Here len >= 256 and condition codes reflect execution /* Here len >= 256 and condition codes reflect execution
* of "andcc %o0, 0x7, %g2", done by caller. * of "andcc %o0, 0x7, %g2", done by caller.
*/ */
.align 64 .align 64
enter: 1:
/* Is 'dst' already aligned on an 64-byte boundary? */ /* Is 'dst' already aligned on an 64-byte boundary? */
be,pt %XCC, 2f be,pt %XCC, 2f
...@@ -180,11 +180,11 @@ enter: ...@@ -180,11 +180,11 @@ enter:
2: VISEntryHalf 2: VISEntryHalf
and %o1, 0x7, %g1 and %o1, 0x7, %g1
ba,pt %xcc, begin ba,pt %xcc, 1f
alignaddr %o1, %g0, %o1 alignaddr %o1, %g0, %o1
.align 64 .align 64
begin: 1:
membar #StoreLoad | #StoreStore | #LoadStore membar #StoreLoad | #StoreStore | #LoadStore
prefetcha [%o1 + 0x000] %asi, #one_read prefetcha [%o1 + 0x000] %asi, #one_read
prefetcha [%o1 + 0x040] %asi, #one_read prefetcha [%o1 + 0x040] %asi, #one_read
...@@ -213,11 +213,11 @@ begin: ...@@ -213,11 +213,11 @@ begin:
sub %o4, 0x80, %o4 sub %o4, 0x80, %o4
add %o1, 0x40, %o1 add %o1, 0x40, %o1
ba,pt %xcc, loop ba,pt %xcc, 1f
srl %o4, 6, %o3 srl %o4, 6, %o3
.align 64 .align 64
loop: 1:
EX3(ldda [%o1 + 0x008] %asi, %f2) EX3(ldda [%o1 + 0x008] %asi, %f2)
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
EX3(ldda [%o1 + 0x010] %asi, %f4) EX3(ldda [%o1 + 0x010] %asi, %f4)
...@@ -240,11 +240,10 @@ loop: ...@@ -240,11 +240,10 @@ loop:
faligndata %f10, %f12, %f26 faligndata %f10, %f12, %f26
subcc %o3, 0x01, %o3 subcc %o3, 0x01, %o3
add %o1, 0x40, %o1 add %o1, 0x40, %o1
bg,pt %XCC, loop bg,pt %XCC, 1b
add %o0, 0x40, %o0 add %o0, 0x40, %o0
/* Finally we copy the last full 64-byte block. */ /* Finally we copy the last full 64-byte block. */
loopfini:
EX3(ldda [%o1 + 0x008] %asi, %f2) EX3(ldda [%o1 + 0x008] %asi, %f2)
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
EX3(ldda [%o1 + 0x010] %asi, %f4) EX3(ldda [%o1 + 0x010] %asi, %f4)
...@@ -279,12 +278,11 @@ loopfini: ...@@ -279,12 +278,11 @@ loopfini:
* Also notice how this code is careful not to perform a * Also notice how this code is careful not to perform a
* load past the end of the src buffer. * load past the end of the src buffer.
*/ */
loopend:
and %o2, 0x3f, %o2 and %o2, 0x3f, %o2
andcc %o2, 0x38, %g2 andcc %o2, 0x38, %g2
be,pn %XCC, endcruft be,pn %XCC, 10f
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
be,pn %XCC, endcruft be,pn %XCC, 10f
cmp %g1, 0 cmp %g1, 0
be,a,pt %XCC, 1f be,a,pt %XCC, 1f
...@@ -296,7 +294,7 @@ loopend: ...@@ -296,7 +294,7 @@ loopend:
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8 faligndata %f0, %f2, %f8
std %f8, [%o0 + 0x00] std %f8, [%o0 + 0x00]
be,pn %XCC, endcruft be,pn %XCC, 10f
add %o0, 0x8, %o0 add %o0, 0x8, %o0
EX(ldda [%o1 + 0x08] %asi, %f0, add %o2, %g0) EX(ldda [%o1 + 0x08] %asi, %f0, add %o2, %g0)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
...@@ -311,15 +309,15 @@ loopend: ...@@ -311,15 +309,15 @@ loopend:
* Note that %g1 is (src & 0x3) saved above before the * Note that %g1 is (src & 0x3) saved above before the
* alignaddr was performed. * alignaddr was performed.
*/ */
endcruft: 10:
cmp %o2, 0 cmp %o2, 0
add %o1, %g1, %o1 add %o1, %g1, %o1
VISExitHalf VISExitHalf
be,pn %XCC, out be,pn %XCC, 85f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
andcc %g1, 0x7, %g0 andcc %g1, 0x7, %g0
bne,pn %icc, small_copy_unaligned bne,pn %icc, 90f
andcc %o2, 0x8, %g0 andcc %o2, 0x8, %g0
be,pt %icc, 1f be,pt %icc, 1f
nop nop
...@@ -342,17 +340,16 @@ endcruft: ...@@ -342,17 +340,16 @@ endcruft:
add %o1, 0x2, %o1 add %o1, 0x2, %o1
1: andcc %o2, 0x1, %g0 1: andcc %o2, 0x1, %g0
be,pt %icc, out be,pt %icc, 85f
nop nop
EXNV(lduba [%o1] %asi, %o5, and %o2, 0x1) EXNV(lduba [%o1] %asi, %o5, and %o2, 0x1)
ba,pt %xcc, out ba,pt %xcc, 85f
stb %o5, [%o1 + %o3] stb %o5, [%o1 + %o3]
medium_copy: /* 16 < len <= 64 */ 70: /* 16 < len <= 64 */
bne,pn %XCC, small_copy_unaligned bne,pn %XCC, 90f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
medium_copy_aligned:
andn %o2, 0x7, %o4 andn %o2, 0x7, %o4
and %o2, 0x7, %o2 and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4 1: subcc %o4, 0x8, %o4
...@@ -368,32 +365,32 @@ medium_copy_aligned: ...@@ -368,32 +365,32 @@ medium_copy_aligned:
stw %o5, [%o1 + %o3] stw %o5, [%o1 + %o3]
add %o1, 0x4, %o1 add %o1, 0x4, %o1
1: cmp %o2, 0 1: cmp %o2, 0
be,pt %XCC, out be,pt %XCC, 85f
nop nop
ba,pt %xcc, small_copy_unaligned ba,pt %xcc, 90f
nop nop
small_copy: /* 0 < len <= 16 */ 80: /* 0 < len <= 16 */
andcc %o3, 0x3, %g0 andcc %o3, 0x3, %g0
bne,pn %XCC, small_copy_unaligned bne,pn %XCC, 90f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
small_copy_aligned: 1:
subcc %o2, 4, %o2 subcc %o2, 4, %o2
EXNV(lduwa [%o1] %asi, %g1, add %o2, %g0) EXNV(lduwa [%o1] %asi, %g1, add %o2, %g0)
stw %g1, [%o1 + %o3] stw %g1, [%o1 + %o3]
bgu,pt %XCC, small_copy_aligned bgu,pt %XCC, 1b
add %o1, 4, %o1 add %o1, 4, %o1
out: retl 85: retl
clr %o0 clr %o0
.align 32 .align 32
small_copy_unaligned: 90:
subcc %o2, 1, %o2 subcc %o2, 1, %o2
EXNV(lduba [%o1] %asi, %g1, add %o2, %g0) EXNV(lduba [%o1] %asi, %g1, add %o2, %g0)
stb %g1, [%o1 + %o3] stb %g1, [%o1 + %o3]
bgu,pt %XCC, small_copy_unaligned bgu,pt %XCC, 90b
add %o1, 1, %o1 add %o1, 1, %o1
retl retl
clr %o0 clr %o0
......
...@@ -159,24 +159,24 @@ U3copy_to_user: /* %o0=dst, %o1=src, %o2=len */ ...@@ -159,24 +159,24 @@ U3copy_to_user: /* %o0=dst, %o1=src, %o2=len */
nop nop
cmp %o2, 0 cmp %o2, 0
be,pn %XCC, out be,pn %XCC, 85f
or %o0, %o1, %o3 or %o0, %o1, %o3
cmp %o2, 16 cmp %o2, 16
bleu,a,pn %XCC, small_copy bleu,a,pn %XCC, 80f
or %o3, %o2, %o3 or %o3, %o2, %o3
cmp %o2, 256 cmp %o2, 256
blu,pt %XCC, medium_copy blu,pt %XCC, 70f
andcc %o3, 0x7, %g0 andcc %o3, 0x7, %g0
ba,pt %xcc, enter ba,pt %xcc, 1f
andcc %o0, 0x3f, %g2 andcc %o0, 0x3f, %g2
/* Here len >= 256 and condition codes reflect execution /* Here len >= 256 and condition codes reflect execution
* of "andcc %o0, 0x7, %g2", done by caller. * of "andcc %o0, 0x7, %g2", done by caller.
*/ */
.align 64 .align 64
enter: 1:
/* Is 'dst' already aligned on an 64-byte boundary? */ /* Is 'dst' already aligned on an 64-byte boundary? */
be,pt %XCC, 2f be,pt %XCC, 2f
...@@ -199,11 +199,11 @@ enter: ...@@ -199,11 +199,11 @@ enter:
2: VISEntryHalf 2: VISEntryHalf
and %o1, 0x7, %g1 and %o1, 0x7, %g1
ba,pt %xcc, begin ba,pt %xcc, 1f
alignaddr %o1, %g0, %o1 alignaddr %o1, %g0, %o1
.align 64 .align 64
begin: 1:
membar #StoreLoad | #StoreStore | #LoadStore membar #StoreLoad | #StoreStore | #LoadStore
prefetch [%o1 + 0x000], #one_read prefetch [%o1 + 0x000], #one_read
prefetch [%o1 + 0x040], #one_read prefetch [%o1 + 0x040], #one_read
...@@ -232,11 +232,11 @@ begin: ...@@ -232,11 +232,11 @@ begin:
sub %o4, 0x80, %o4 sub %o4, 0x80, %o4
add %o1, 0x40, %o1 add %o1, 0x40, %o1
ba,pt %xcc, loop ba,pt %xcc, 1f
srl %o4, 6, %o3 srl %o4, 6, %o3
.align 64 .align 64
loop: 1:
ldd [%o1 + 0x008], %f2 ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4 ldd [%o1 + 0x010], %f4
...@@ -259,11 +259,10 @@ loop: ...@@ -259,11 +259,10 @@ loop:
faligndata %f10, %f12, %f26 faligndata %f10, %f12, %f26
subcc %o3, 0x01, %o3 subcc %o3, 0x01, %o3
add %o1, 0x40, %o1 add %o1, 0x40, %o1
bg,pt %XCC, loop bg,pt %XCC, 1b
add %o0, 0x40, %o0 add %o0, 0x40, %o0
/* Finally we copy the last full 64-byte block. */ /* Finally we copy the last full 64-byte block. */
loopfini:
ldd [%o1 + 0x008], %f2 ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4 ldd [%o1 + 0x010], %f4
...@@ -298,12 +297,11 @@ loopfini: ...@@ -298,12 +297,11 @@ loopfini:
* Also notice how this code is careful not to perform a * Also notice how this code is careful not to perform a
* load past the end of the src buffer. * load past the end of the src buffer.
*/ */
loopend:
and %o2, 0x3f, %o2 and %o2, 0x3f, %o2
andcc %o2, 0x38, %g2 andcc %o2, 0x38, %g2
be,pn %XCC, endcruft be,pn %XCC, 2f
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
be,pn %XCC, endcruft be,pn %XCC, 2f
cmp %g1, 0 cmp %g1, 0
be,a,pt %XCC, 1f be,a,pt %XCC, 1f
...@@ -315,7 +313,7 @@ loopend: ...@@ -315,7 +313,7 @@ loopend:
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8 faligndata %f0, %f2, %f8
EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8) EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8)
be,pn %XCC, endcruft be,pn %XCC, 2f
add %o0, 0x8, %o0 add %o0, 0x8, %o0
ldd [%o1 + 0x08], %f0 ldd [%o1 + 0x08], %f0
add %o1, 0x8, %o1 add %o1, 0x8, %o1
...@@ -330,15 +328,15 @@ loopend: ...@@ -330,15 +328,15 @@ loopend:
* Note that %g1 is (src & 0x3) saved above before the * Note that %g1 is (src & 0x3) saved above before the
* alignaddr was performed. * alignaddr was performed.
*/ */
endcruft: 2:
cmp %o2, 0 cmp %o2, 0
add %o1, %g1, %o1 add %o1, %g1, %o1
VISExitHalf VISExitHalf
be,pn %XCC, out be,pn %XCC, 85f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
andcc %g1, 0x7, %g0 andcc %g1, 0x7, %g0
bne,pn %icc, small_copy_unaligned bne,pn %icc, 90f
andcc %o2, 0x8, %g0 andcc %o2, 0x8, %g0
be,pt %icc, 1f be,pt %icc, 1f
nop nop
...@@ -361,17 +359,16 @@ endcruft: ...@@ -361,17 +359,16 @@ endcruft:
add %o1, 0x2, %o1 add %o1, 0x2, %o1
1: andcc %o2, 0x1, %g0 1: andcc %o2, 0x1, %g0
be,pt %icc, out be,pt %icc, 85f
nop nop
ldub [%o1], %o5 ldub [%o1], %o5
ba,pt %xcc, out ba,pt %xcc, 85f
EXNV(stba %o5, [%o1 + %o3] ASI_AIUS, and %o2, 0x1) EXNV(stba %o5, [%o1 + %o3] ASI_AIUS, and %o2, 0x1)
medium_copy: /* 16 < len <= 64 */ 70: /* 16 < len <= 64 */
bne,pn %XCC, small_copy_unaligned bne,pn %XCC, 90f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
medium_copy_aligned:
andn %o2, 0x7, %o4 andn %o2, 0x7, %o4
and %o2, 0x7, %o2 and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4 1: subcc %o4, 0x8, %o4
...@@ -387,32 +384,32 @@ medium_copy_aligned: ...@@ -387,32 +384,32 @@ medium_copy_aligned:
EXNV3(stwa %o5, [%o1 + %o3] ASI_AIUS, add %o2, %g0) EXNV3(stwa %o5, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
add %o1, 0x4, %o1 add %o1, 0x4, %o1
1: cmp %o2, 0 1: cmp %o2, 0
be,pt %XCC, out be,pt %XCC, 85f
nop nop
ba,pt %xcc, small_copy_unaligned ba,pt %xcc, 90f
nop nop
small_copy: /* 0 < len <= 16 */ 80: /* 0 < len <= 16 */
andcc %o3, 0x3, %g0 andcc %o3, 0x3, %g0
bne,pn %XCC, small_copy_unaligned bne,pn %XCC, 90f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
small_copy_aligned: 1:
subcc %o2, 4, %o2 subcc %o2, 4, %o2
lduw [%o1], %g1 lduw [%o1], %g1
EXNV3(stwa %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0) EXNV3(stwa %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
bgu,pt %XCC, small_copy_aligned bgu,pt %XCC, 1b
add %o1, 4, %o1 add %o1, 4, %o1
out: retl 85: retl
clr %o0 clr %o0
.align 32 .align 32
small_copy_unaligned: 90:
subcc %o2, 1, %o2 subcc %o2, 1, %o2
ldub [%o1], %g1 ldub [%o1], %g1
EXNV2(stba %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0) EXNV2(stba %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
bgu,pt %XCC, small_copy_unaligned bgu,pt %XCC, 90b
add %o1, 1, %o1 add %o1, 1, %o1
retl retl
clr %o0 clr %o0
...@@ -45,24 +45,24 @@ ...@@ -45,24 +45,24 @@
U3memcpy: /* %o0=dst, %o1=src, %o2=len */ U3memcpy: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5 mov %o0, %g5
cmp %o2, 0 cmp %o2, 0
be,pn %XCC, out be,pn %XCC, 85f
or %o0, %o1, %o3 or %o0, %o1, %o3
cmp %o2, 16 cmp %o2, 16
bleu,a,pn %XCC, small_copy bleu,a,pn %XCC, 70f
or %o3, %o2, %o3 or %o3, %o2, %o3
cmp %o2, 256 cmp %o2, 256
blu,pt %XCC, medium_copy blu,pt %XCC, 80f
andcc %o3, 0x7, %g0 andcc %o3, 0x7, %g0
ba,pt %xcc, enter ba,pt %xcc, 1f
andcc %o0, 0x3f, %g2 andcc %o0, 0x3f, %g2
/* Here len >= 256 and condition codes reflect execution /* Here len >= 256 and condition codes reflect execution
* of "andcc %o0, 0x7, %g2", done by caller. * of "andcc %o0, 0x7, %g2", done by caller.
*/ */
.align 64 .align 64
enter: 1:
/* Is 'dst' already aligned on an 64-byte boundary? */ /* Is 'dst' already aligned on an 64-byte boundary? */
be,pt %XCC, 2f be,pt %XCC, 2f
...@@ -85,11 +85,11 @@ enter: ...@@ -85,11 +85,11 @@ enter:
2: VISEntryHalf 2: VISEntryHalf
and %o1, 0x7, %g1 and %o1, 0x7, %g1
ba,pt %xcc, begin ba,pt %xcc, 1f
alignaddr %o1, %g0, %o1 alignaddr %o1, %g0, %o1
.align 64 .align 64
begin: 1:
membar #StoreLoad | #StoreStore | #LoadStore membar #StoreLoad | #StoreStore | #LoadStore
prefetch [%o1 + 0x000], #one_read prefetch [%o1 + 0x000], #one_read
prefetch [%o1 + 0x040], #one_read prefetch [%o1 + 0x040], #one_read
...@@ -118,11 +118,11 @@ begin: ...@@ -118,11 +118,11 @@ begin:
sub %o4, 0x80, %o4 sub %o4, 0x80, %o4
add %o1, 0x40, %o1 add %o1, 0x40, %o1
ba,pt %xcc, loop ba,pt %xcc, 1f
srl %o4, 6, %o3 srl %o4, 6, %o3
.align 64 .align 64
loop: 1:
ldd [%o1 + 0x008], %f2 ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4 ldd [%o1 + 0x010], %f4
...@@ -145,11 +145,10 @@ loop: ...@@ -145,11 +145,10 @@ loop:
faligndata %f10, %f12, %f26 faligndata %f10, %f12, %f26
subcc %o3, 0x01, %o3 subcc %o3, 0x01, %o3
add %o1, 0x40, %o1 add %o1, 0x40, %o1
bg,pt %XCC, loop bg,pt %XCC, 1b
add %o0, 0x40, %o0 add %o0, 0x40, %o0
/* Finally we copy the last full 64-byte block. */ /* Finally we copy the last full 64-byte block. */
loopfini:
ldd [%o1 + 0x008], %f2 ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4 ldd [%o1 + 0x010], %f4
...@@ -183,12 +182,11 @@ loopfini: ...@@ -183,12 +182,11 @@ loopfini:
* Also notice how this code is careful not to perform a * Also notice how this code is careful not to perform a
* load past the end of the src buffer. * load past the end of the src buffer.
*/ */
loopend:
and %o2, 0x3f, %o2 and %o2, 0x3f, %o2
andcc %o2, 0x38, %g2 andcc %o2, 0x38, %g2
be,pn %XCC, endcruft be,pn %XCC, 2f
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
be,pn %XCC, endcruft be,pn %XCC, 2f
cmp %g1, 0 cmp %g1, 0
be,a,pt %XCC, 1f be,a,pt %XCC, 1f
...@@ -200,7 +198,7 @@ loopend: ...@@ -200,7 +198,7 @@ loopend:
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8 faligndata %f0, %f2, %f8
std %f8, [%o0 + 0x00] std %f8, [%o0 + 0x00]
be,pn %XCC, endcruft be,pn %XCC, 2f
add %o0, 0x8, %o0 add %o0, 0x8, %o0
ldd [%o1 + 0x08], %f0 ldd [%o1 + 0x08], %f0
add %o1, 0x8, %o1 add %o1, 0x8, %o1
...@@ -215,15 +213,15 @@ loopend: ...@@ -215,15 +213,15 @@ loopend:
* Note that %g1 is (src & 0x3) saved above before the * Note that %g1 is (src & 0x3) saved above before the
* alignaddr was performed. * alignaddr was performed.
*/ */
endcruft: 2:
cmp %o2, 0 cmp %o2, 0
add %o1, %g1, %o1 add %o1, %g1, %o1
VISExitHalf VISExitHalf
be,pn %XCC, out be,pn %XCC, 85f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
andcc %g1, 0x7, %g0 andcc %g1, 0x7, %g0
bne,pn %icc, small_copy_unaligned bne,pn %icc, 90f
andcc %o2, 0x8, %g0 andcc %o2, 0x8, %g0
be,pt %icc, 1f be,pt %icc, 1f
nop nop
...@@ -246,17 +244,16 @@ endcruft: ...@@ -246,17 +244,16 @@ endcruft:
add %o1, 0x2, %o1 add %o1, 0x2, %o1
1: andcc %o2, 0x1, %g0 1: andcc %o2, 0x1, %g0
be,pt %icc, out be,pt %icc, 85f
nop nop
ldub [%o1], %o5 ldub [%o1], %o5
ba,pt %xcc, out ba,pt %xcc, 85f
stb %o5, [%o1 + %o3] stb %o5, [%o1 + %o3]
medium_copy: /* 16 < len <= 64 */ 70: /* 16 < len <= 64 */
bne,pn %XCC, small_copy_unaligned bne,pn %XCC, 90f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
medium_copy_aligned:
andn %o2, 0x7, %o4 andn %o2, 0x7, %o4
and %o2, 0x7, %o2 and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4 1: subcc %o4, 0x8, %o4
...@@ -272,32 +269,32 @@ medium_copy_aligned: ...@@ -272,32 +269,32 @@ medium_copy_aligned:
stw %o5, [%o1 + %o3] stw %o5, [%o1 + %o3]
add %o1, 0x4, %o1 add %o1, 0x4, %o1
1: cmp %o2, 0 1: cmp %o2, 0
be,pt %XCC, out be,pt %XCC, 85f
nop nop
ba,pt %xcc, small_copy_unaligned ba,pt %xcc, 90f
nop nop
small_copy: /* 0 < len <= 16 */ 80: /* 0 < len <= 16 */
andcc %o3, 0x3, %g0 andcc %o3, 0x3, %g0
bne,pn %XCC, small_copy_unaligned bne,pn %XCC, 90f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
small_copy_aligned: 1:
subcc %o2, 4, %o2 subcc %o2, 4, %o2
lduw [%o1], %g1 lduw [%o1], %g1
stw %g1, [%o1 + %o3] stw %g1, [%o1 + %o3]
bgu,pt %XCC, small_copy_aligned bgu,pt %XCC, 1b
add %o1, 4, %o1 add %o1, 4, %o1
out: retl 85: retl
mov %g5, %o0 mov %g5, %o0
.align 32 .align 32
small_copy_unaligned: 90:
subcc %o2, 1, %o2 subcc %o2, 1, %o2
ldub [%o1], %g1 ldub [%o1], %g1
stb %g1, [%o1 + %o3] stb %g1, [%o1 + %o3]
bgu,pt %XCC, small_copy_unaligned bgu,pt %XCC, 90b
add %o1, 1, %o1 add %o1, 1, %o1
retl retl
mov %g5, %o0 mov %g5, %o0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment