Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
da09d2f1
Commit
da09d2f1
authored
Jul 28, 2004
by
Linus Torvalds
Browse files
Options
Browse Files
Download
Plain Diff
Merge
bk://kernel.bkbits.net/davem/sparc-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
parents
ee61f1e4
a6df905f
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
859 additions
and
1724 deletions
+859
-1724
arch/sparc64/defconfig
arch/sparc64/defconfig
+3
-9
arch/sparc64/kernel/sparc64_ksyms.c
arch/sparc64/kernel/sparc64_ksyms.c
+1
-1
arch/sparc64/kernel/sys_sparc32.c
arch/sparc64/kernel/sys_sparc32.c
+0
-132
arch/sparc64/kernel/systbls.S
arch/sparc64/kernel/systbls.S
+2
-2
arch/sparc64/lib/U3copy_from_user.S
arch/sparc64/lib/U3copy_from_user.S
+269
-358
arch/sparc64/lib/U3copy_in_user.S
arch/sparc64/lib/U3copy_in_user.S
+76
-467
arch/sparc64/lib/U3copy_to_user.S
arch/sparc64/lib/U3copy_to_user.S
+248
-362
arch/sparc64/lib/U3memcpy.S
arch/sparc64/lib/U3memcpy.S
+241
-336
arch/sparc64/lib/VIScopy.S
arch/sparc64/lib/VIScopy.S
+0
-4
arch/sparc64/lib/splock.S
arch/sparc64/lib/splock.S
+12
-0
drivers/sbus/char/bbc_envctrl.c
drivers/sbus/char/bbc_envctrl.c
+2
-3
drivers/sbus/char/envctrl.c
drivers/sbus/char/envctrl.c
+1
-2
include/asm-sparc/pci.h
include/asm-sparc/pci.h
+0
-6
include/asm-sparc64/page.h
include/asm-sparc64/page.h
+1
-1
include/asm-sparc64/spinlock.h
include/asm-sparc64/spinlock.h
+2
-16
include/asm-sparc64/string.h
include/asm-sparc64/string.h
+1
-25
No files found.
arch/sparc64/defconfig
View file @
da09d2f1
...
...
@@ -90,7 +90,6 @@ CONFIG_BINFMT_ELF32=y
# CONFIG_BINFMT_AOUT32 is not set
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_MISC=m
# CONFIG_SUNOS_EMUL is not set
CONFIG_SOLARIS_EMUL=m
#
...
...
@@ -122,7 +121,6 @@ CONFIG_FB=y
# CONFIG_FB_CIRRUS is not set
CONFIG_FB_PM2=y
# CONFIG_FB_PM2_FIFO_DISCONNECT is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_BW2 is not set
...
...
@@ -180,7 +178,6 @@ CONFIG_LOGO_SUN_CLUT224=y
#
# Serial drivers
#
# CONFIG_SERIAL_8250 is not set
#
# Non-8250 serial port support
...
...
@@ -342,7 +339,6 @@ CONFIG_SCSI_SATA_SIL=m
CONFIG_SCSI_SATA_SIS=m
CONFIG_SCSI_SATA_VIA=m
CONFIG_SCSI_SATA_VITESSE=m
# CONFIG_SCSI_BUSLOGIC is not set
CONFIG_SCSI_DMX3191D=m
CONFIG_SCSI_EATA_PIO=m
# CONFIG_SCSI_FUTURE_DOMAIN is not set
...
...
@@ -679,6 +675,9 @@ CONFIG_NET_DIVERT=y
# QoS and/or fair queueing
#
CONFIG_NET_SCHED=y
# CONFIG_NET_SCH_CLK_JIFFIES is not set
# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
CONFIG_NET_SCH_CLK_CPU=y
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
...
...
@@ -1522,11 +1521,6 @@ CONFIG_SND_VX222=m
CONFIG_SND_SUN_AMD7930=m
CONFIG_SND_SUN_CS4231=m
#
# Open Sound System
#
# CONFIG_SOUND_PRIME is not set
#
# USB support
#
...
...
arch/sparc64/kernel/sparc64_ksyms.c
View file @
da09d2f1
...
...
@@ -135,6 +135,7 @@ EXPORT_SYMBOL(__write_lock);
EXPORT_SYMBOL
(
__write_unlock
);
EXPORT_SYMBOL
(
__write_trylock
);
/* Out of line spin-locking implementation. */
EXPORT_SYMBOL
(
_raw_spin_lock
);
EXPORT_SYMBOL
(
_raw_spin_lock_flags
);
#endif
...
...
@@ -333,7 +334,6 @@ EXPORT_SYMBOL(sys_close);
#endif
/* Special internal versions of library functions. */
EXPORT_SYMBOL
(
__memcpy
);
EXPORT_SYMBOL
(
__memset
);
EXPORT_SYMBOL
(
_clear_page
);
EXPORT_SYMBOL
(
clear_user_page
);
...
...
arch/sparc64/kernel/sys_sparc32.c
View file @
da09d2f1
...
...
@@ -867,138 +867,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned
return
sys_ftruncate
(
fd
,
(
high
<<
32
)
|
low
);
}
/* readdir & getdents */
#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
#define ROUND_UP(x) (((x)+sizeof(u32)-1) & ~(sizeof(u32)-1))
struct
old_linux_dirent32
{
u32
d_ino
;
u32
d_offset
;
unsigned
short
d_namlen
;
char
d_name
[
1
];
};
struct
readdir_callback32
{
struct
old_linux_dirent32
__user
*
dirent
;
int
count
;
};
static
int
fillonedir
(
void
*
__buf
,
const
char
*
name
,
int
namlen
,
loff_t
offset
,
ino_t
ino
,
unsigned
int
d_type
)
{
struct
readdir_callback32
*
buf
=
(
struct
readdir_callback32
*
)
__buf
;
struct
old_linux_dirent32
__user
*
dirent
;
if
(
buf
->
count
)
return
-
EINVAL
;
buf
->
count
++
;
dirent
=
buf
->
dirent
;
put_user
(
ino
,
&
dirent
->
d_ino
);
put_user
(
offset
,
&
dirent
->
d_offset
);
put_user
(
namlen
,
&
dirent
->
d_namlen
);
copy_to_user
(
dirent
->
d_name
,
name
,
namlen
);
put_user
(
0
,
dirent
->
d_name
+
namlen
);
return
0
;
}
asmlinkage
long
old32_readdir
(
unsigned
int
fd
,
struct
old_linux_dirent32
__user
*
dirent
,
unsigned
int
count
)
{
int
error
=
-
EBADF
;
struct
file
*
file
;
struct
readdir_callback32
buf
;
file
=
fget
(
fd
);
if
(
!
file
)
goto
out
;
buf
.
count
=
0
;
buf
.
dirent
=
dirent
;
error
=
vfs_readdir
(
file
,
fillonedir
,
&
buf
);
if
(
error
<
0
)
goto
out_putf
;
error
=
buf
.
count
;
out_putf:
fput
(
file
);
out:
return
error
;
}
struct
linux_dirent32
{
u32
d_ino
;
u32
d_off
;
unsigned
short
d_reclen
;
char
d_name
[
1
];
};
struct
getdents_callback32
{
struct
linux_dirent32
__user
*
current_dir
;
struct
linux_dirent32
__user
*
previous
;
int
count
;
int
error
;
};
static
int
filldir
(
void
*
__buf
,
const
char
*
name
,
int
namlen
,
loff_t
offset
,
ino_t
ino
,
unsigned
int
d_type
)
{
struct
linux_dirent32
__user
*
dirent
;
struct
getdents_callback32
*
buf
=
(
struct
getdents_callback32
*
)
__buf
;
int
reclen
=
ROUND_UP
(
NAME_OFFSET
(
dirent
)
+
namlen
+
2
);
buf
->
error
=
-
EINVAL
;
/* only used if we fail.. */
if
(
reclen
>
buf
->
count
)
return
-
EINVAL
;
dirent
=
buf
->
previous
;
if
(
dirent
)
put_user
(
offset
,
&
dirent
->
d_off
);
dirent
=
buf
->
current_dir
;
buf
->
previous
=
dirent
;
put_user
(
ino
,
&
dirent
->
d_ino
);
put_user
(
reclen
,
&
dirent
->
d_reclen
);
copy_to_user
(
dirent
->
d_name
,
name
,
namlen
);
put_user
(
0
,
dirent
->
d_name
+
namlen
);
put_user
(
d_type
,
(
char
__user
*
)
dirent
+
reclen
-
1
);
dirent
=
(
void
__user
*
)
dirent
+
reclen
;
buf
->
current_dir
=
dirent
;
buf
->
count
-=
reclen
;
return
0
;
}
asmlinkage
long
sys32_getdents
(
unsigned
int
fd
,
struct
linux_dirent32
__user
*
dirent
,
unsigned
int
count
)
{
struct
file
*
file
;
struct
linux_dirent32
__user
*
lastdirent
;
struct
getdents_callback32
buf
;
int
error
=
-
EBADF
;
file
=
fget
(
fd
);
if
(
!
file
)
goto
out
;
buf
.
current_dir
=
dirent
;
buf
.
previous
=
NULL
;
buf
.
count
=
count
;
buf
.
error
=
0
;
error
=
vfs_readdir
(
file
,
filldir
,
&
buf
);
if
(
error
<
0
)
goto
out_putf
;
lastdirent
=
buf
.
previous
;
error
=
buf
.
error
;
if
(
lastdirent
)
{
put_user
(
file
->
f_pos
,
&
lastdirent
->
d_off
);
error
=
count
-
buf
.
count
;
}
out_putf:
fput
(
file
);
out:
return
error
;
}
/* end of readdir & getdents */
int
cp_compat_stat
(
struct
kstat
*
stat
,
struct
compat_stat
__user
*
statbuf
)
{
int
err
;
...
...
arch/sparc64/kernel/systbls.S
View file @
da09d2f1
...
...
@@ -54,13 +54,13 @@ sys_call_table32:
.
word
compat_sys_fcntl64
,
sys_ni_syscall
,
compat_sys_statfs
,
compat_sys_fstatfs
,
sys_oldumount
/*
160
*/
.
word
compat_sys_sched_setaffinity
,
compat_sys_sched_getaffinity
,
sys32_getdomainname
,
sys32_setdomainname
,
sys_nis_syscall
.
word
sys_quotactl
,
sys_set_tid_address
,
compat_sys_mount
,
sys_ustat
,
sys32_setxattr
/*
170
*/
.
word
sys32_lsetxattr
,
sys32_fsetxattr
,
sys_getxattr
,
sys_lgetxattr
,
sys32
_getdents
/*
170
*/
.
word
sys32_lsetxattr
,
sys32_fsetxattr
,
sys_getxattr
,
sys_lgetxattr
,
compat_sys
_getdents
.
word
sys_setsid
,
sys_fchdir
,
sys32_fgetxattr
,
sys_listxattr
,
sys_llistxattr
/*
180
*/
.
word
sys32_flistxattr
,
sys_removexattr
,
sys_lremovexattr
,
compat_sys_sigpending
,
sys_ni_syscall
.
word
sys32_setpgid
,
sys32_fremovexattr
,
sys32_tkill
,
sys32_exit_group
,
sparc64_newuname
/*
190
*/
.
word
sys32_init_module
,
sparc64_personality
,
sys_remap_file_pages
,
sys32_epoll_create
,
sys32_epoll_ctl
.
word
sys32_epoll_wait
,
sys_nis_syscall
,
sys_getppid
,
sys32_sigaction
,
sys_sgetmask
/*
200
*/
.
word
sys32_ssetmask
,
sys_sigsuspend
,
compat_sys_newlstat
,
sys_uselib
,
old32
_readdir
/*
200
*/
.
word
sys32_ssetmask
,
sys_sigsuspend
,
compat_sys_newlstat
,
sys_uselib
,
compat_old
_readdir
.
word
sys32_readahead
,
sys32_socketcall
,
sys32_syslog
,
sys32_lookup_dcookie
,
sys32_fadvise64
/*
210
*/
.
word
sys32_fadvise64_64
,
sys32_tgkill
,
sys32_waitpid
,
sys_swapoff
,
sys32_sysinfo
.
word
sys32_ipc
,
sys32_sigreturn
,
sys_clone
,
sys_nis_syscall
,
sys32_adjtimex
...
...
arch/sparc64/lib/U3copy_from_user.S
View file @
da09d2f1
/*
$Id
:
U3copy_from_user
.
S
,
v
1
.4
2002
/
01
/
15
07
:
16
:
26
davem
Exp
$
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
copy
from
userspace
.
/*
U3copy_from_user
.
S
:
UltraSparc
-
III
optimized
copy
from
userspace
.
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#undef SMALL_COPY_USES_FPU
#define XCC xcc
#define EXNV_RAW(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
ba
U3cfu_fixup
; \
a
,
b
,
%
o1
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
;
\
99
:
add
%
o1
,
%
o3
,
%
o0
;
\
ba
U3cfu_fixup
; \
a
,
b
,
%
o1
; \
.
section
__ex_table
; \
...
...
@@ -22,6 +33,32 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV4(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
add
%
o1
,
%
o3
,
%
o0
; \
a
,
b
,
%
o1
; \
ba
U3cfu_fixup
; \
add
%
o1
,
4
,
%
o1
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV8(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
add
%
o1
,
%
o3
,
%
o0
; \
a
,
b
,
%
o1
; \
ba
U3cfu_fixup
; \
add
%
o1
,
8
,
%
o1
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EX(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
...
...
@@ -77,18 +114,9 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#else
#define ASI_BLK_P 0xf0
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#define EXNV(x,y,a,b) x,y;
#define EX(x,y,a,b) x,y;
#define EX2(x,y) x,y;
#define EX3(x,y) x,y;
#define EX4(x,y) x,y;
#endif
.
register
%
g2
,#
scratch
.
register
%
g3
,#
scratch
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
...
...
@@ -111,79 +139,53 @@
.
globl
U3copy_from_user
U3copy_from_user
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
mov
%
o0
,
%
g3
!
A0
Group
#endif
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
0
!
A1
ble
,
pn
%
icc
,
U3copy_from_user_short_ret
!
BR
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3copy_from_user_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3copy_from_user_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
ba
,
pt
%
xcc
,
U3copy_from_user_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
cmp
%
o2
,
0
be
,
pn
%
XCC
,
out
or
%
o0
,
%
o1
,
%
o3
cmp
%
o2
,
16
bleu
,
a
,
pn
%
XCC
,
small_copy
or
%
o3
,
%
o2
,
%
o3
.
align
32
U3copy_from_user_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g0
)!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3copy_from_user_short
!
BR
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
(
1
-
cycle
stall
)
U3copy_from_user_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
cmp
%
o2
,
256
blu
,
pt
%
XCC
,
medium_copy
andcc
%
o3
,
0x7
,
%
g0
ba
,
pt
%
xcc
,
enter
andcc
%
o0
,
0x3f
,
%
g2
/
*
Here
len
>=
256
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
.
align
64
U3copy_from_user_
enter
:
enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
be
,
pt
%
XCC
,
2
f
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
sub
%
g2
,
0x40
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
1
:
EXNV
_RAW
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
!
BR
Group
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
bg
,
pt
%
XCC
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
2
:
VISEntryHalf
!
MS
+
MS
and
%
o1
,
0x7
,
%
g1
!
A1
ba
,
pt
%
xcc
,
U3copy_from_user_begin
!
BR
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
2
:
VISEntryHalf
and
%
o1
,
0x7
,
%
g1
ba
,
pt
%
xcc
,
begin
alignaddr
%
o1
,
%
g0
,
%
o1
.
align
64
U3copy_from_user_
begin
:
#ifdef __KERNEL__
begin
:
.
globl
U3copy_from_user_nop_1_6
U3copy_from_user_nop_1_6
:
ldxa
[%
g0
]
ASI_DCU_CONTROL_REG
,
%
g3
...
...
@@ -192,315 +194,225 @@ U3copy_from_user_nop_1_6:
or
%
g3
,
%
o3
,
%
o3
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
membar
#
Sync
#endif
prefetcha
[%
o1
+
0x000
]
%
asi
,
#
one_read
!
MS
Group1
prefetcha
[%
o1
+
0x040
]
%
asi
,
#
one_read
!
MS
Group2
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
prefetcha
[%
o1
+
0x080
]
%
asi
,
#
one_read
!
MS
Group3
cmp
%
o4
,
0x140
!
A0
prefetcha
[%
o1
+
0x0c0
]
%
asi
,
#
one_read
!
MS
Group4
EX
(
ldda
[%
o1
+
0x000
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group5
(
%f0
results
at
G8
)
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x100
]
%
asi
,
#
one_read
!
MS
Group6
1
:
EX
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
AX
(
%f2
results
at
G9
)
cmp
%
o4
,
0x180
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x140
]
%
asi
,
#
one_read
!
MS
Group7
1
:
EX
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
,
add
%
o2
,
%
g0
)
!
AX
(
%f4
results
at
G10
)
cmp
%
o4
,
0x1c0
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
EX
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
,
add
%
o2
,
%
g0
)
!
AX
(
%f6
results
at
G12
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
EX
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
,
add
%
o2
,
%
g0
)
!
MS
(
%f8
results
at
G13
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
EX
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
,
add
%
o2
,
%
g0
)
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
EX
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
,
add
%
o2
,
%
g0
)
!
MS
(
%f12
results
at
G16
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
EX
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
,
add
%
o2
,
%
g0
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
EX
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
(
%f0
results
at
G19
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
bg
,
pt
%
icc
,
U3copy_from_user_loop1
!
BR
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
ba
,
pt
%
xcc
,
U3copy_from_user_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
nop
nop
nop
nop
nop
nop
nop
prefetcha
[%
o1
+
0x000
]
%
asi
,
#
one_read
prefetcha
[%
o1
+
0x040
]
%
asi
,
#
one_read
andn
%
o2
,
(
0x40
-
1
),
%
o4
prefetcha
[%
o1
+
0x080
]
%
asi
,
#
one_read
prefetcha
[%
o1
+
0x0c0
]
%
asi
,
#
one_read
EX
(
ldda
[%
o1
+
0x000
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
prefetcha
[%
o1
+
0x100
]
%
asi
,
#
one_read
EX
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
prefetcha
[%
o1
+
0x140
]
%
asi
,
#
one_read
EX
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
,
add
%
o2
,
%
g0
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
faligndata
%f0
,
%f2
,
%f16
EX
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
,
add
%
o2
,
%
g0
)
faligndata
%f2
,
%f4
,
%f18
EX
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
,
add
%
o2
,
%
g0
)
faligndata
%f4
,
%f6
,
%f20
EX
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
,
add
%
o2
,
%
g0
)
faligndata
%f6
,
%f8
,
%f22
EX
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
,
add
%
o2
,
%
g0
)
faligndata
%f8
,
%f10
,
%f24
EX
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
,
add
%
o2
,
%
g0
)
faligndata
%f10
,
%f12
,
%f26
EX
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
sub
%
o4
,
0x80
,
%
o4
add
%
o1
,
0x40
,
%
o1
ba
,
pt
%
xcc
,
loop
srl
%
o4
,
6
,
%
o3
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
*
already
.
*/
U3copy_from_user_loop1
:
EX2
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
EX2
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
EX2
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EX2
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
EX2
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
EX2
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
EX2
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
EX2
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
(
%f0
results
at
G19
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_from_user_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3copy_from_user_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3copy_from_user_loop2
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
EX3
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_from_user_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
.
align
64
loop
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
faligndata
%f12
,
%f14
,
%f28
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
faligndata
%f0
,
%f2
,
%f16
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
faligndata
%f2
,
%f4
,
%f18
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
faligndata
%f4
,
%f6
,
%f20
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
faligndata
%f6
,
%f8
,
%f22
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
faligndata
%f8
,
%f10
,
%f24
EX3
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
faligndata
%f10
,
%f12
,
%f26
subcc
%
o3
,
0x01
,
%
o3
add
%
o1
,
0x40
,
%
o1
bg
,
pt
%
XCC
,
loop
add
%
o0
,
0x40
,
%
o0
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3copy_from_user_
loopfini
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group19
faligndata
%f14
,
%f0
,
%f30
!
FGA
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
Group20
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
cmp
%
g1
,
0
!
A0
be
,
pt
%
icc
,
1
f
!
BR
add
%
o0
,
0x40
,
%
o0
!
A1
EX4
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
MS
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
#ifdef __KERNEL__
loopfini
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
faligndata
%f12
,
%f14
,
%f28
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
faligndata
%f0
,
%f2
,
%f16
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
faligndata
%f2
,
%f4
,
%f18
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
faligndata
%f4
,
%f6
,
%f20
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
faligndata
%f6
,
%f8
,
%f22
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
faligndata
%f8
,
%f10
,
%f24
cmp
%
g1
,
0
be
,
pt
%
XCC
,
1
f
add
%
o0
,
0x40
,
%
o0
EX4
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
1
:
faligndata
%f10
,
%f12
,
%f26
faligndata
%f12
,
%f14
,
%f28
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
add
%
o0
,
0x40
,
%
o0
add
%
o1
,
0x40
,
%
o1
.
globl
U3copy_from_user_nop_2_3
U3copy_from_user_nop_2_3
:
mov
PRIMARY_CONTEXT
,
%
o3
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
#endif
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
membar
#
Sync
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
code
found
in
U3copy_from_user_toosmall
processing
.
*
load
past
the
end
of
the
src
buffer
.
*/
U3copy_from_user_
loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
be
,
pn
%
icc
,
U3copy_from_user_endcruft
!
BR
subcc
%
g2
,
0x8
,
%
g2
!
A1
be
,
pn
%
icc
,
U3copy_from_user_endcruft
!
BR
Group
cmp
%
g1
,
0
!
A0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
EX
(
ldda
[%
o1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
1
:
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
be
,
pn
%
icc
,
U3copy_from_user_endcruft
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
Group
loopend
:
and
%
o2
,
0x3f
,
%
o2
andcc
%
o2
,
0x38
,
%
g2
be
,
pn
%
XCC
,
endcruft
subcc
%
g2
,
0x8
,
%
g2
be
,
pn
%
XCC
,
endcruft
cmp
%
g1
,
0
be
,
a
,
pt
%
XCC
,
1
f
EX
(
ldda
[%
o1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
1
:
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f0
,
%f2
,
%f8
std
%f8
,
[%
o0
+
0x00
]
be
,
pn
%
XCC
,
endcruft
add
%
o0
,
0x8
,
%
o0
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f2
,
%f0
,
%f8
std
%f8
,
[%
o0
+
0x00
]
bne
,
pn
%
XCC
,
1
b
add
%
o0
,
0x8
,
%
o0
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*/
U3copy_from_user_
endcruft
:
endcruft
:
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
be
,
pn
%
icc
,
U3copy_from_user_short_ret
nop
ba
,
a
,
pt
%
xcc
,
U3copy_from_user_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3copy_from_user_toosmall
:
#ifdef SMALL_COPY_USES_FPU
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
be
,
pn
%
XCC
,
out
sub
%
o0
,
%
o1
,
%
o3
bg
,
pt
%
icc
,
1
b
!
BR
Group
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
andcc
%
g1
,
0x7
,
%
g0
bne
,
pn
%
icc
,
small_copy_unaligned
andcc
%
o2
,
0x8
,
%
g0
be
,
pt
%
icc
,
1
f
nop
EXNV
(
ldxa
[%
o1
]
%
asi
,
%
o5
,
add
%
o2
,
%
g0
)
stx
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x8
,
%
o1
2
:
VISEntryHalf
!
MS
+
MS
1
:
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
icc
,
1
f
nop
EXNV
(
lduwa
[%
o1
]
%
asi
,
%
o5
,
and
%
o2
,
0x7
)
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
*
to
be
nonzero
.
*/
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
/
*
You
may
read
this
and
believe
that
it
allows
reading
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3copy_from_user_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3copy_from_user_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3copy_from_user_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
andcc
%
o2
,
0x2
,
%
g0
be
,
pt
%
icc
,
1
f
nop
EXNV
(
lduha
[%
o1
]
%
asi
,
%
o5
,
and
%
o2
,
0x3
)
sth
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x2
,
%
o1
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
1
:
andcc
%
o2
,
0x1
,
%
g0
be
,
pt
%
icc
,
out
nop
EXNV
(
lduba
[%
o1
]
%
asi
,
%
o5
,
and
%
o2
,
0x1
)
ba
,
pt
%
xcc
,
out
stb
%
o5
,
[%
o1
+
%
o3
]
medium_copy
:
/
*
16
<
len
<=
64
*/
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
EXNV8
(
ldxa
[%
o1
]
%
asi
,
%
o5
,
add
%
o2
,
%
o4
)
stx
%
o5
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
EXNV4
(
lduwa
[%
o1
]
%
asi
,
%
o5
,
add
%
o2
,
%
g0
)
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
2
:
andn
%
o2
,
0x7
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
small_copy
:
/
*
0
<
len
<=
16
*/
andcc
%
o3
,
0x3
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
3
:
EXNV
(
ldxa
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x8
,
%
o1
add
%
o0
,
0x8
,
%
o0
s
ubcc
%
g2
,
0x8
,
%
g2
bg
,
pt
%
icc
,
3
b
stx
%
o3
,
[%
o0
+
-
8
]
small_copy_aligned
:
subcc
%
o2
,
4
,
%
o2
EXNV
(
lduwa
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
s
tw
%
g1
,
[%
o1
+
%
o3
]
bg
u
,
pt
%
XCC
,
small_copy_aligned
add
%
o1
,
4
,
%
o1
cmp
%
o2
,
0
bne
,
pn
%
icc
,
U3copy_from_user_short
nop
ba
,
a
,
pt
%
xcc
,
U3copy_from_user_short_ret
out
:
retl
clr
%
o0
#endif /* !(SMALL_COPY_USES_FPU) */
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
EXNV
(
lduba
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
stb
%
g1
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
clr
%
o0
#ifdef __KERNEL__
.
globl
U3cfu_fixup
U3cfu_fixup
:
/
*
Since
this
is
copy_from_user
(),
zero
out
the
rest
of
the
*
kernel
buffer
.
...
...
@@ -516,4 +428,3 @@ U3cfu_fixup:
2
:
retl
mov
%
o1
,
%
o0
#endif
arch/sparc64/lib/U3copy_in_user.S
View file @
da09d2f1
/*
$Id
:
U3copy_in_user
.
S
,
v
1
.4
2001
/
03
/
21
05
:
58
:
47
davem
Exp
$
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
copy
within
userspace
.
/*
U3copy_in_user
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/asi.h>
#undef SMALL_COPY_USES_FPU
#include <asm/dcu.h>
#include <asm/spitfire.h>
#define XCC xcc
#define EXNV(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
...
...
@@ -19,7 +21,7 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV
2(x,y,a,b)
\
#define EXNV
1(x,y,a,b)
\
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
...
...
@@ -31,122 +33,41 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV
3(x,y,a,b)
\
#define EXNV
4(x,y,a,b)
\
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
a
,
b
,
%
o0
; \
retl
; \
add
%
o0
,
8
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EX(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
retl
; \
a
,
b
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXBLK1(x,y) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
add
%
o4
,
0x1c0
,
%
o1
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
retl
; \
add
%
o1
,
%
o2
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXBLK2(x,y) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
sll
%
o3
,
6
,
%
o3
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
add
%
o3
,
0x80
,
%
o1
; \
retl
; \
add
%
o1
,
%
o2
,
%
o0
; \
add
%
o0
,
4
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EX
BLK3(x,y
) \
#define EX
NV8(x,y,a,b
) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
retl
; \
add
%
o2
,
0x80
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXBLK4(x,y) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
99
:
a
,
b
,
%
o0
; \
retl
; \
add
%
o
2
,
0x40
,
%
o0
; \
add
%
o
0
,
8
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#else
#define ASI_AIUS 0x80
#define ASI_BLK_AIUS 0xf0
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#define EXNV(x,y,a,b) x,y;
#define EXNV2(x,y,a,b) x,y;
#define EXNV3(x,y,a,b) x,y;
#define EX(x,y,a,b) x,y;
#define EXBLK1(x,y) x,y;
#define EXBLK2(x,y) x,y;
#define EXBLK3(x,y) x,y;
#define EXBLK4(x,y) x,y;
#endif
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
*
1
)
%
o5
is
preserved
from
VISEntryHalf
to
VISExitHalf
*
2
)
Only
low
32
FPU
registers
are
used
so
that
only
the
*
lower
half
of
the
FPU
register
set
is
dirtied
by
this
*
code
.
This
is
especially
important
in
the
kernel
.
*
3
)
This
code
never
prefetches
cachelines
past
the
end
*
of
the
source
buffer
.
*
*
XXX
Actually
,
Cheetah
can
buffer
up
to
8
concurrent
*
XXX
prefetches
,
revisit
this
...
*/
.
register
%
g2
,#
scratch
.
register
%
g3
,#
scratch
.
text
.
align
32
/
*
The
cheetah
's flexible spine, oversized liver, enlarged heart,
*
s
lender
muscular
body
,
and
claws
make
it
the
swiftest
hunter
*
in
Africa
and
the
fastest
animal
on
land
.
Can
reach
speeds
*
of
up
to
2
.4
GB
per
second
.
/
*
Don
't try to get too fancy here, just nice and
*
s
imple
.
This
is
predominantly
used
for
well
aligned
*
small
copies
in
the
compat
layer
.
It
is
also
used
*
to
copy
register
windows
around
during
thread
cloning
.
*/
.
globl
U3copy_in_user
...
...
@@ -155,377 +76,65 @@ U3copy_in_user: /* %o0=dst, %o1=src, %o2=len */
*
Reading
%
asi
to
check
for
KERNEL_DS
is
comparatively
*
cheap
.
*/
rd
%
asi
,
%
g1
!
MS
Group
(
4
cycles
)
cmp
%
g1
,
ASI_AIUS
!
A0
Group
bne
U3memcpy
!
BR
nop
!
A1
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
mov
%
o0
,
%
g3
!
A0
Group
#endif
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
0
!
A1
ble
,
pn
%
icc
,
U3copy_in_user_short_ret
!
BR
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3copy_in_user_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3copy_in_user_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
ba
,
pt
%
xcc
,
U3copy_in_user_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
.
align
32
U3copy_in_user_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g0
)!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3copy_in_user_short
!
BR
EXNV
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
1
)
!
MS
Group
(
1
-
cycle
stall
)
U3copy_in_user_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
.
align
64
U3copy_in_user_enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
2
:
VISEntryHalf
!
MS
+
MS
and
%
o1
,
0x7
,
%
g1
!
A1
ba
,
pt
%
xcc
,
U3copy_in_user_begin
!
BR
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
.
align
64
U3copy_in_user_begin
:
prefetcha
[%
o1
+
0x000
]
%
asi
,
#
one_read
!
MS
Group1
prefetcha
[%
o1
+
0x040
]
%
asi
,
#
one_read
!
MS
Group2
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
prefetcha
[%
o1
+
0x080
]
%
asi
,
#
one_read
!
MS
Group3
cmp
%
o4
,
0x140
!
A0
prefetcha
[%
o1
+
0x0c0
]
%
asi
,
#
one_read
!
MS
Group4
EX
(
ldda
[%
o1
+
0x000
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group5
(
%f0
results
at
G8
)
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x100
]
%
asi
,
#
one_read
!
MS
Group6
1
:
EX
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
AX
(
%f2
results
at
G9
)
cmp
%
o4
,
0x180
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x140
]
%
asi
,
#
one_read
!
MS
Group7
1
:
EX
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
,
add
%
o2
,
%
g0
)
!
AX
(
%f4
results
at
G10
)
cmp
%
o4
,
0x1c0
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
EX
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
,
add
%
o2
,
%
g0
)
!
AX
(
%f6
results
at
G12
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
EX
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
,
add
%
o2
,
%
g0
)
!
MS
(
%f8
results
at
G13
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
EX
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
,
add
%
o2
,
%
g0
)
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
EX
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
,
add
%
o2
,
%
g0
)
!
MS
(
%f12
results
at
G16
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
EX
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
,
add
%
o2
,
%
g0
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
EX
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
(
%f0
results
at
G19
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
bg
,
pt
%
icc
,
U3copy_in_user_loop1
!
BR
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
ba
,
pt
%
xcc
,
U3copy_in_user_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
nop
nop
nop
nop
rd
%
asi
,
%
g1
cmp
%
g1
,
ASI_AIUS
bne
,
pn
%
icc
,
U3memcpy_user_stub
nop
cmp
%
o2
,
0
be
,
pn
%
XCC
,
out
or
%
o0
,
%
o1
,
%
o3
cmp
%
o2
,
16
bleu
,
a
,
pn
%
XCC
,
small_copy
or
%
o3
,
%
o2
,
%
o3
medium_copy
:
/
*
16
<
len
<=
64
*/
andcc
%
o3
,
0x7
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
EXNV8
(
ldxa
[%
o1
]
%
asi
,
%
o5
,
add
%
o4
,
%
o2
)
EXNV8
(
stxa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o4
,
%
o2
)
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
EXNV4
(
lduwa
[%
o1
]
%
asi
,
%
o5
,
add
%
o4
,
%
o2
)
EXNV4
(
stwa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o4
,
%
o2
)
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
*
already
.
*/
U3copy_in_user_loop1
:
EXBLK1
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
EXBLK1
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
EXBLK1
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
EXBLK1
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EXBLK1
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
EXBLK1
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
EXBLK1
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
EXBLK1
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
EXBLK1
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
(
%f0
results
at
G19
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_in_user_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3copy_in_user_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3copy_in_user_loop2
:
EXBLK2
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
EXBLK2
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
EXBLK2
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
EXBLK2
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EXBLK2
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
EXBLK2
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
EXBLK2
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
EXBLK2
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
EXBLK2
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_in_user_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3copy_in_user_loopfini
:
EXBLK3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
EXBLK3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group19
faligndata
%f14
,
%f0
,
%f30
!
FGA
EXBLK3
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
Group20
EXBLK4
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
EXBLK4
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
EXBLK4
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
EXBLK4
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
EXBLK4
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
cmp
%
g1
,
0
!
A0
be
,
pt
%
icc
,
1
f
!
BR
add
%
o0
,
0x40
,
%
o0
!
A1
EXBLK4
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
MS
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
EXBLK4
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
code
found
in
U3copy_in_user_toosmall
processing
.
*/
U3copy_in_user_loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
be
,
pn
%
icc
,
U3copy_in_user_endcruft
!
BR
subcc
%
g2
,
0x8
,
%
g2
!
A1
be
,
pn
%
icc
,
U3copy_in_user_endcruft
!
BR
Group
cmp
%
g1
,
0
!
A0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
EX
(
ldda
[%
o1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
1
:
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
be
,
pn
%
icc
,
U3copy_in_user_endcruft
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
Group
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*/
U3copy_in_user_endcruft
:
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
be
,
pn
%
icc
,
U3copy_in_user_short_ret
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
ba
,
a
,
pt
%
xcc
,
U3copy_in_user_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3copy_in_user_toosmall
:
#ifdef SMALL_COPY_USES_FPU
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV2
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
2
:
VISEntryHalf
!
MS
+
MS
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
*
to
be
nonzero
.
*/
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
/
*
You
may
read
this
and
believe
that
it
allows
reading
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
a
dd
%
g1
,
0x8
,
%
g1
!
A
0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
small_copy
:
/
*
0
<
len
<=
16
*/
a
ndcc
%
o3
,
0x3
,
%
g
0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
small_copy_aligned
:
subcc
%
o2
,
4
,
%
o2
EXNV4
(
lduwa
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
EXNV4
(
stwa
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bgu
,
pt
%
XCC
,
small_copy_aligned
add
%
o1
,
4
,
%
o1
add
%
o0
,
0x8
,
%
o0
!
A1
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
out
:
retl
clr
%
o0
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3copy_in_user_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3copy_in_user_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3copy_in_user_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
EXNV2
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
2
:
andn
%
o2
,
0x7
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
3
:
EXNV3
(
ldxa
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x8
,
%
o1
add
%
o0
,
0x8
,
%
o0
subcc
%
g2
,
0x8
,
%
g2
bg
,
pt
%
icc
,
3
b
EXNV3
(
stxa
%
o3
,
[%
o0
+
-
8
]
%
asi
,
add
%
o2
,
%
g2
)
cmp
%
o2
,
0
bne
,
pn
%
icc
,
U3copy_in_user_short
nop
ba
,
a
,
pt
%
xcc
,
U3copy_in_user_short_ret
#endif /* !(SMALL_COPY_USES_FPU) */
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
EXNV1
(
lduba
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
EXNV1
(
stba
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
clr
%
o0
arch/sparc64/lib/U3copy_to_user.S
View file @
da09d2f1
/*
$Id
:
U3copy_to_user
.
S
,
v
1
.3
2000
/
11
/
01
09
:
29
:
19
davem
Exp
$
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
copy
to
userspace
.
/*
U3copy_to_user
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#undef SMALL_COPY_USES_FPU
#define XCC xcc
#define EXNV(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
...
...
@@ -34,6 +34,18 @@
.
text
; \
.
align
4
;
#define EXNV3(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
a
,
b
,
%
o0
; \
retl
; \
add
%
o0
,
4
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV4(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
...
...
@@ -112,22 +124,9 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#else
#define ASI_AIUS 0x80
#define ASI_BLK_AIUS 0xf0
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#define EXNV(x,y,a,b) x,y;
#define EXNV2(x,y,a,b) x,y;
#define EXNV3(x,y,a,b) x,y;
#define EX(x,y,a,b) x,y;
#define EXBLK1(x,y) x,y;
#define EXBLK2(x,y) x,y;
#define EXBLK3(x,y) x,y;
#define EXBLK4(x,y) x,y;
#endif
.
register
%
g2
,#
scratch
.
register
%
g3
,#
scratch
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
...
...
@@ -154,83 +153,58 @@ U3copy_to_user: /* %o0=dst, %o1=src, %o2=len */
*
Reading
%
asi
to
check
for
KERNEL_DS
is
comparatively
*
cheap
.
*/
rd
%
asi
,
%
g1
!
MS
Group
(
4
cycles
)
cmp
%
g1
,
ASI_AIUS
!
A0
Group
bne
U3memcpy
!
BR
nop
!
A1
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
mov
%
o0
,
%
g3
!
A0
Group
#endif
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
0
!
A1
ble
,
pn
%
icc
,
U3copy_to_user_short_ret
!
BR
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3copy_to_user_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3copy_to_user_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
ba
,
pt
%
xcc
,
U3copy_to_user_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
rd
%
asi
,
%
g1
cmp
%
g1
,
ASI_AIUS
bne
,
pn
%
icc
,
U3memcpy_user_stub
nop
.
align
32
U3copy_to_user_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
ldub
[%
o1
+
0x00
],
%
o3
!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3copy_to_user_short
!
BR
EXNV
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
1
)
!
MS
Group
(
1
-
cycle
stall
)
U3copy_to_user_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
cmp
%
o2
,
0
be
,
pn
%
XCC
,
out
or
%
o0
,
%
o1
,
%
o3
cmp
%
o2
,
16
bleu
,
a
,
pn
%
XCC
,
small_copy
or
%
o3
,
%
o2
,
%
o3
cmp
%
o2
,
256
blu
,
pt
%
XCC
,
medium_copy
andcc
%
o3
,
0x7
,
%
g0
ba
,
pt
%
xcc
,
enter
andcc
%
o0
,
0x3f
,
%
g2
/
*
Here
len
>=
256
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
.
align
64
U3copy_to_user_
enter
:
enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
be
,
pt
%
XCC
,
2
f
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
sub
%
g2
,
0x40
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
bg
,
pt
%
XCC
,
1
b
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
2
:
VISEntryHalf
!
MS
+
MS
and
%
o1
,
0x7
,
%
g1
!
A1
ba
,
pt
%
xcc
,
U3copy_to_user_begin
!
BR
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
2
:
VISEntryHalf
and
%
o1
,
0x7
,
%
g1
ba
,
pt
%
xcc
,
begin
alignaddr
%
o1
,
%
g0
,
%
o1
.
align
64
U3copy_to_user_
begin
:
#ifdef __KERNEL__
begin
:
.
globl
U3copy_to_user_nop_1_6
U3copy_to_user_nop_1_6
:
ldxa
[%
g0
]
ASI_DCU_CONTROL_REG
,
%
g3
...
...
@@ -239,309 +213,221 @@ U3copy_to_user_nop_1_6:
or
%
g3
,
%
o3
,
%
o3
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
membar
#
Sync
#endif
prefetch
[%
o1
+
0x000
],
#
one_read
!
MS
Group1
prefetch
[%
o1
+
0x040
],
#
one_read
!
MS
Group2
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
prefetch
[%
o1
+
0x080
],
#
one_read
!
MS
Group3
cmp
%
o4
,
0x140
!
A0
prefetch
[%
o1
+
0x0c0
],
#
one_read
!
MS
Group4
ldd
[%
o1
+
0x000
],
%f0
!
MS
Group5
(
%f0
results
at
G8
)
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x100
],
#
one_read
!
MS
Group6
1
:
ldd
[%
o1
+
0x008
],
%f2
!
AX
(
%f2
results
at
G9
)
cmp
%
o4
,
0x180
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x140
],
#
one_read
!
MS
Group7
1
:
ldd
[%
o1
+
0x010
],
%f4
!
AX
(
%f4
results
at
G10
)
cmp
%
o4
,
0x1c0
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G12
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G13
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G16
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
MS
(
%f0
results
at
G19
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
bg
,
pt
%
icc
,
U3copy_to_user_loop1
!
BR
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
ba
,
pt
%
xcc
,
U3copy_to_user_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
nop
nop
nop
nop
nop
nop
nop
prefetch
[%
o1
+
0x000
],
#
one_read
prefetch
[%
o1
+
0x040
],
#
one_read
andn
%
o2
,
(
0x40
-
1
),
%
o4
prefetch
[%
o1
+
0x080
],
#
one_read
prefetch
[%
o1
+
0x0c0
],
#
one_read
ldd
[%
o1
+
0x000
],
%f0
prefetch
[%
o1
+
0x100
],
#
one_read
ldd
[%
o1
+
0x008
],
%f2
prefetch
[%
o1
+
0x140
],
#
one_read
ldd
[%
o1
+
0x010
],
%f4
prefetch
[%
o1
+
0x180
],
#
one_read
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f8
,
%f10
,
%f24
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f10
,
%f12
,
%f26
ldd
[%
o1
+
0x040
],
%f0
sub
%
o4
,
0x80
,
%
o4
add
%
o1
,
0x40
,
%
o1
ba
,
pt
%
xcc
,
loop
srl
%
o4
,
6
,
%
o3
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
*
already
.
*/
U3copy_to_user_loop1
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
EXBLK1
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
AX
(
%f0
results
at
G19
)
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_to_user_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3copy_to_user_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3copy_to_user_loop2
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
ldd
[%
o1
+
0x010
],
%f4
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
EXBLK2
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
ldd
[%
o1
+
0x040
],
%f0
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_to_user_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
.
align
64
loop
:
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
EXBLK2
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
ldd
[%
o1
+
0x040
],
%f0
prefetch
[%
o1
+
0x180
],
#
one_read
faligndata
%f10
,
%f12
,
%f26
subcc
%
o3
,
0x01
,
%
o3
add
%
o1
,
0x40
,
%
o1
bg
,
pt
%
XCC
,
loop
add
%
o0
,
0x40
,
%
o0
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3copy_to_user_
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group19
faligndata
%f14
,
%f0
,
%f30
!
FGA
EXBLK3
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
Group20
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
cmp
%
g1
,
0
!
A0
be
,
pt
%
icc
,
1
f
!
BR
add
%
o0
,
0x40
,
%
o0
!
A1
ldd
[%
o1
+
0x040
],
%f0
!
MS
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
EXBLK4
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
#ifdef __KERNEL__
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
EXBLK3
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
cmp
%
g1
,
0
be
,
pt
%
XCC
,
1
f
add
%
o0
,
0x40
,
%
o0
ldd
[%
o1
+
0x040
],
%f0
1
:
faligndata
%f10
,
%f12
,
%f26
faligndata
%f12
,
%f14
,
%f28
faligndata
%f14
,
%f0
,
%f30
EXBLK4
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
add
%
o0
,
0x40
,
%
o0
add
%
o1
,
0x40
,
%
o1
.
globl
U3copy_to_user_nop_2_3
U3copy_to_user_nop_2_3
:
mov
PRIMARY_CONTEXT
,
%
o3
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
#endif
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
membar
#
Sync
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
code
found
in
U3copy_to_user_toosmall
processing
.
*
load
past
the
end
of
the
src
buffer
.
*/
U3copy_to_user_
loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
be
,
pn
%
icc
,
U3copy_to_user_endcruft
!
BR
subcc
%
g2
,
0x8
,
%
g2
!
A1
be
,
pn
%
icc
,
U3copy_to_user_endcruft
!
BR
Group
cmp
%
g1
,
0
!
A0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
ldd
[%
o1
+
0x00
],
%f0
!
MS
1
:
ldd
[%
o1
+
0x08
],
%f2
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
be
,
pn
%
icc
,
U3copy_to_user_endcruft
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
ldd
[%
o1
+
0x08
],
%f0
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
Group
loopend
:
and
%
o2
,
0x3f
,
%
o2
andcc
%
o2
,
0x38
,
%
g2
be
,
pn
%
XCC
,
endcruft
subcc
%
g2
,
0x8
,
%
g2
be
,
pn
%
XCC
,
endcruft
cmp
%
g1
,
0
be
,
a
,
pt
%
XCC
,
1
f
ldd
[%
o1
+
0x00
],
%f0
1
:
ldd
[%
o1
+
0x08
],
%f2
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f0
,
%f2
,
%f8
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
be
,
pn
%
XCC
,
endcruft
add
%
o0
,
0x8
,
%
o0
ldd
[%
o1
+
0x08
],
%f0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f2
,
%f0
,
%f8
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
bne
,
pn
%
XCC
,
1
b
add
%
o0
,
0x8
,
%
o0
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*/
U3copy_to_user_
endcruft
:
endcruft
:
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
be
,
pn
%
icc
,
U3copy_to_user_short_ret
nop
ba
,
a
,
pt
%
xcc
,
U3copy_to_user_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3copy_to_user_toosmall
:
#ifdef SMALL_COPY_USES_FPU
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
be
,
pn
%
XCC
,
out
sub
%
o0
,
%
o1
,
%
o3
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
andcc
%
g1
,
0x7
,
%
g0
bne
,
pn
%
icc
,
small_copy_unaligned
andcc
%
o2
,
0x8
,
%
g0
be
,
pt
%
icc
,
1
f
nop
ldx
[%
o1
],
%
o5
EXNV
(
stxa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x8
,
%
o1
2
:
VISEntryHalf
!
MS
+
MS
1
:
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduw
[%
o1
],
%
o5
EXNV
(
stwa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
and
%
o2
,
0x7
)
add
%
o1
,
0x4
,
%
o1
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
*
to
be
nonzero
.
*/
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
/
*
You
may
read
this
and
believe
that
it
allows
reading
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
ldd
[%
g1
+
0x00
],
%f2
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3copy_to_user_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3copy_to_user_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3copy_to_user_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
andcc
%
o2
,
0x2
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduh
[%
o1
],
%
o5
EXNV
(
stha
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
and
%
o2
,
0x3
)
add
%
o1
,
0x2
,
%
o1
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
1
:
andcc
%
o2
,
0x1
,
%
g0
be
,
pt
%
icc
,
out
nop
ldub
[%
o1
],
%
o5
ba
,
pt
%
xcc
,
out
EXNV
(
stba
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
and
%
o2
,
0x1
)
medium_copy
:
/
*
16
<
len
<=
64
*/
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
ldx
[%
o1
],
%
o5
EXNV4
(
stxa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
o4
)
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
lduw
[%
o1
],
%
o5
EXNV3
(
stwa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
2
:
andn
%
o2
,
0x7
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
small_copy
:
/
*
0
<
len
<=
16
*/
andcc
%
o3
,
0x3
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
3
:
ldx
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x8
,
%
o1
add
%
o0
,
0x8
,
%
o0
subcc
%
g2
,
0x8
,
%
g2
bg
,
pt
%
icc
,
3
b
EXNV3
(
stxa
%
o3
,
[%
o0
+
-
8
]
%
asi
,
add
%
o2
,
%
g2
)
small_copy_aligned
:
subcc
%
o2
,
4
,
%
o2
lduw
[%
o1
],
%
g1
EXNV3
(
stwa
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bg
u
,
pt
%
XCC
,
small_copy_aligned
add
%
o1
,
4
,
%
o1
cmp
%
o2
,
0
bne
,
pn
%
icc
,
U3copy_to_user_short
nop
ba
,
a
,
pt
%
xcc
,
U3copy_to_user_short_ret
out
:
retl
clr
%
o0
#endif /* !(SMALL_COPY_USES_FPU) */
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
ldub
[%
o1
],
%
g1
EXNV2
(
stba
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
clr
%
o0
arch/sparc64/lib/U3memcpy.S
View file @
da09d2f1
/*
$Id
:
U3memcpy
.
S
,
v
1
.2
2000
/
11
/
01
09
:
29
:
19
davem
Exp
$
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
memcpy
.
/*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
#ifdef __KERNEL__
...
...
@@ -9,15 +8,20 @@
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#undef SMALL_COPY_USES_FPU
#else
#define ASI_BLK_P 0xf0
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#endif
#ifndef XCC
#define XCC xcc
#endif
.
register
%
g2
,#
scratch
.
register
%
g3
,#
scratch
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
*
1
)
%
o5
is
preserved
from
VISEntryHalf
to
VISExitHalf
...
...
@@ -39,78 +43,53 @@
.
globl
U3memcpy
U3memcpy
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
mov
%
o0
,
%
g3
!
A0
Group
#endif
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
0
!
A1
ble
,
pn
%
icc
,
U3memcpy_short_ret
!
BR
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3memcpy_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3memcpy_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
mov
%
o0
,
%
g5
cmp
%
o2
,
0
be
,
pn
%
XCC
,
out
or
%
o0
,
%
o1
,
%
o3
cmp
%
o2
,
16
bleu
,
a
,
pn
%
XCC
,
small_copy
or
%
o3
,
%
o2
,
%
o3
ba
,
pt
%
xcc
,
U3memcpy_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
cmp
%
o2
,
256
blu
,
pt
%
XCC
,
medium_copy
andcc
%
o3
,
0x7
,
%
g0
.
align
32
U3memcpy_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
ldub
[%
o1
+
0x00
],
%
o3
!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
ba
,
pt
%
xcc
,
enter
andcc
%
o0
,
0x3f
,
%
g2
bg
,
pt
%
icc
,
U3memcpy_short
!
BR
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
(
1
-
cycle
stall
)
U3memcpy_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
/
*
Here
len
>=
256
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
.
align
64
U3memcpy_
enter
:
enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
be
,
pt
%
XCC
,
2
f
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
sub
%
g2
,
0x40
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
!
BR
Group
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
bg
,
pt
%
XCC
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
2
:
VISEntryHalf
!
MS
+
MS
and
%
o1
,
0x7
,
%
g1
!
A1
ba
,
pt
%
xcc
,
U3memcpy_begin
!
BR
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
2
:
VISEntryHalf
and
%
o1
,
0x7
,
%
g1
ba
,
pt
%
xcc
,
begin
alignaddr
%
o1
,
%
g0
,
%
o1
.
align
64
U3memcpy_
begin
:
begin
:
#ifdef __KERNEL__
.
globl
U3memcpy_nop_1_6
U3memcpy_nop_1_6
:
...
...
@@ -121,146 +100,90 @@ U3memcpy_nop_1_6:
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
membar
#
Sync
#endif
prefetch
[%
o1
+
0x000
],
#
one_read
!
MS
Group1
prefetch
[%
o1
+
0x040
],
#
one_read
!
MS
Group2
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
prefetch
[%
o1
+
0x080
],
#
one_read
!
MS
Group3
cmp
%
o4
,
0x140
!
A0
prefetch
[%
o1
+
0x0c0
],
#
one_read
!
MS
Group4
ldd
[%
o1
+
0x000
],
%f0
!
MS
Group5
(
%f0
results
at
G8
)
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x100
],
#
one_read
!
MS
Group6
1
:
ldd
[%
o1
+
0x008
],
%f2
!
AX
(
%f2
results
at
G9
)
cmp
%
o4
,
0x180
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x140
],
#
one_read
!
MS
Group7
1
:
ldd
[%
o1
+
0x010
],
%f4
!
AX
(
%f4
results
at
G10
)
cmp
%
o4
,
0x1c0
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G12
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G13
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G16
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
MS
(
%f0
results
at
G19
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
bg
,
pt
%
icc
,
U3memcpy_loop1
!
BR
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
ba
,
pt
%
xcc
,
U3memcpy_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
nop
nop
nop
nop
nop
nop
nop
prefetch
[%
o1
+
0x000
],
#
one_read
prefetch
[%
o1
+
0x040
],
#
one_read
andn
%
o2
,
(
0x40
-
1
),
%
o4
prefetch
[%
o1
+
0x080
],
#
one_read
prefetch
[%
o1
+
0x0c0
],
#
one_read
ldd
[%
o1
+
0x000
],
%f0
prefetch
[%
o1
+
0x100
],
#
one_read
ldd
[%
o1
+
0x008
],
%f2
prefetch
[%
o1
+
0x140
],
#
one_read
ldd
[%
o1
+
0x010
],
%f4
prefetch
[%
o1
+
0x180
],
#
one_read
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f8
,
%f10
,
%f24
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f10
,
%f12
,
%f26
ldd
[%
o1
+
0x040
],
%f0
sub
%
o4
,
0x80
,
%
o4
add
%
o1
,
0x40
,
%
o1
ba
,
pt
%
xcc
,
loop
srl
%
o4
,
6
,
%
o3
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
*
already
.
*/
U3memcpy_loop1
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
AX
(
%f0
results
at
G19
)
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3memcpy_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3memcpy_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3memcpy_loop2
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
ldd
[%
o1
+
0x010
],
%f4
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
ldd
[%
o1
+
0x040
],
%f0
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3memcpy_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
.
align
64
loop
:
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
ldd
[%
o1
+
0x040
],
%f0
prefetch
[%
o1
+
0x180
],
#
one_read
faligndata
%f10
,
%f12
,
%f26
subcc
%
o3
,
0x01
,
%
o3
add
%
o1
,
0x40
,
%
o1
bg
,
pt
%
XCC
,
loop
add
%
o0
,
0x40
,
%
o0
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3memcpy_
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group19
faligndata
%f14
,
%f0
,
%f30
!
FGA
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
Group20
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
cmp
%
g1
,
0
!
A0
be
,
pt
%
icc
,
1
f
!
BR
add
%
o0
,
0x40
,
%
o0
!
A1
ldd
[%
o1
+
0x040
],
%f0
!
MS
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
cmp
%
g1
,
0
be
,
pt
%
XCC
,
1
f
add
%
o0
,
0x40
,
%
o0
ldd
[%
o1
+
0x040
],
%f0
1
:
faligndata
%f10
,
%f12
,
%f26
faligndata
%f12
,
%f14
,
%f28
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
add
%
o0
,
0x40
,
%
o0
add
%
o1
,
0x40
,
%
o1
#ifdef __KERNEL__
.
globl
U3memcpy_nop_2_3
U3memcpy_nop_2_3
:
...
...
@@ -268,161 +191,143 @@ U3memcpy_nop_2_3:
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
#endif
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
membar
#
Sync
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
code
found
in
U3memcpy_toosmall
processing
.
*
load
past
the
end
of
the
src
buffer
.
*/
U3memcpy_
loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
be
,
pn
%
icc
,
U3memcpy_endcruft
!
BR
subcc
%
g2
,
0x8
,
%
g2
!
A1
be
,
pn
%
icc
,
U3memcpy_endcruft
!
BR
Group
cmp
%
g1
,
0
!
A0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
ldd
[%
o1
+
0x00
],
%f0
!
MS
1
:
ldd
[%
o1
+
0x08
],
%f2
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
be
,
pn
%
icc
,
U3memcpy_endcruft
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
ldd
[%
o1
+
0x08
],
%f0
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
Group
loopend
:
and
%
o2
,
0x3f
,
%
o2
andcc
%
o2
,
0x38
,
%
g2
be
,
pn
%
XCC
,
endcruft
subcc
%
g2
,
0x8
,
%
g2
be
,
pn
%
XCC
,
endcruft
cmp
%
g1
,
0
be
,
a
,
pt
%
XCC
,
1
f
ldd
[%
o1
+
0x00
],
%f0
1
:
ldd
[%
o1
+
0x08
],
%f2
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f0
,
%f2
,
%f8
std
%f8
,
[%
o0
+
0x00
]
be
,
pn
%
XCC
,
endcruft
add
%
o0
,
0x8
,
%
o0
ldd
[%
o1
+
0x08
],
%f0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f2
,
%f0
,
%f8
std
%f8
,
[%
o0
+
0x00
]
bne
,
pn
%
XCC
,
1
b
add
%
o0
,
0x8
,
%
o0
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*/
U3memcpy_
endcruft
:
endcruft
:
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
be
,
pn
%
icc
,
U3memcpy_short_ret
nop
ba
,
a
,
pt
%
xcc
,
U3memcpy_short
be
,
pn
%
XCC
,
out
sub
%
o0
,
%
o1
,
%
o3
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3memcpy_toosmall
:
andcc
%
g1
,
0x7
,
%
g0
bne
,
pn
%
icc
,
small_copy_unaligned
andcc
%
o2
,
0x8
,
%
g0
be
,
pt
%
icc
,
1
f
nop
ldx
[%
o1
],
%
o5
stx
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x8
,
%
o1
#ifdef SMALL_COPY_USES_FPU
1
:
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduw
[%
o1
],
%
o5
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
1
:
andcc
%
o2
,
0x2
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduh
[%
o1
],
%
o5
sth
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x2
,
%
o1
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
1
:
andcc
%
o2
,
0x1
,
%
g0
be
,
pt
%
icc
,
out
nop
ldub
[%
o1
],
%
o5
ba
,
pt
%
xcc
,
out
stb
%
o5
,
[%
o1
+
%
o3
]
medium_copy
:
/
*
16
<
len
<=
64
*/
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
ldx
[%
o1
],
%
o5
stx
%
o5
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
lduw
[%
o1
],
%
o5
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
small_copy
:
/
*
0
<
len
<=
16
*/
andcc
%
o3
,
0x3
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
bg
,
pt
%
icc
,
1
b
!
BR
Group
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
small_copy_aligned
:
subcc
%
o2
,
4
,
%
o2
lduw
[%
o1
],
%
g1
stw
%
g1
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
small_copy_aligned
add
%
o1
,
4
,
%
o1
2
:
VISEntryHalf
!
MS
+
MS
out
:
retl
mov
%
g5
,
%
o0
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
*
to
be
nonzero
.
*/
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
/
*
You
may
read
this
and
believe
that
it
allows
reading
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
ldub
[%
o1
],
%
g1
stb
%
g1
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
mov
%
g5
,
%
o0
/
*
Act
like
copy_
{
to
,
in
}
_user
(),
ie
.
return
zero
instead
*
of
original
destination
pointer
.
This
is
invoked
when
*
copy_
{
to
,
in
}
_user
()
finds
that
%
asi
is
kernel
space
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
ldd
[%
g1
+
0x00
],
%f2
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3memcpy_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3memcpy_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3memcpy_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
2
:
andn
%
o2
,
0x7
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
3
:
ldx
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x8
,
%
o1
add
%
o0
,
0x8
,
%
o0
subcc
%
g2
,
0x8
,
%
g2
bg
,
pt
%
icc
,
3
b
stx
%
o3
,
[%
o0
+
-
8
]
cmp
%
o2
,
0
bne
,
pn
%
icc
,
U3memcpy_short
nop
ba
,
a
,
pt
%
xcc
,
U3memcpy_short_ret
#endif /* !(SMALL_COPY_USES_FPU) */
.
globl
U3memcpy_user_stub
U3memcpy_user_stub
:
save
%
sp
,
-
192
,
%
sp
mov
%
i0
,
%
o0
mov
%
i1
,
%
o1
call
U3memcpy
mov
%
i2
,
%
o2
ret
restore
%
g0
,
%
g0
,
%
o0
arch/sparc64/lib/VIScopy.S
View file @
da09d2f1
...
...
@@ -306,11 +306,7 @@
.
globl
__memcpy_begin
__memcpy_begin
:
.
globl
__memcpy
.
type
__memcpy
,
@
function
memcpy_private
:
__memcpy
:
memcpy
:
mov
ASI_P
,
asi_src
!
IEU0
Group
brnz
,
pt
%
o2
,
__memcpy_entry
!
CTI
mov
ASI_P
,
asi_dest
!
IEU1
...
...
arch/sparc64/lib/splock.S
View file @
da09d2f1
...
...
@@ -6,6 +6,18 @@
.
text
.
align
64
.
globl
_raw_spin_lock
_raw_spin_lock
:
/
*
%
o0
=
lock_ptr
*/
1
:
ldstub
[%
o0
],
%
g7
brnz
,
pn
%
g7
,
2
f
membar
#
StoreLoad
|
#
StoreStore
retl
nop
2
:
ldub
[%
o0
],
%
g7
brnz
,
pt
%
g7
,
2
b
membar
#
LoadLoad
ba
,
a
,
pt
%
xcc
,
1
b
.
globl
_raw_spin_lock_flags
_raw_spin_lock_flags
:
/
*
%
o0
=
lock_ptr
,
%
o1
=
irq_flags
*/
1
:
ldstub
[%
o0
],
%
g7
...
...
drivers/sbus/char/bbc_envctrl.c
View file @
da09d2f1
...
...
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <asm/oplib.h>
#include <asm/ebus.h>
#define __KERNEL_SYSCALLS__
...
...
@@ -622,9 +623,7 @@ void bbc_envctrl_cleanup(void)
read_unlock
(
&
tasklist_lock
);
if
(
!
found
)
break
;
current
->
state
=
TASK_INTERRUPTIBLE
;
schedule_timeout
(
HZ
);
current
->
state
=
TASK_RUNNING
;
msleep
(
1000
);
}
kenvctrld_task
=
NULL
;
}
...
...
drivers/sbus/char/envctrl.c
View file @
da09d2f1
...
...
@@ -1181,8 +1181,7 @@ static void __exit envctrl_cleanup(void)
if
(
!
found
)
break
;
current
->
state
=
TASK_INTERRUPTIBLE
;
schedule_timeout
(
HZ
);
msleep
(
1000
);
}
kenvctrld_task
=
NULL
;
}
...
...
include/asm-sparc/pci.h
View file @
da09d2f1
...
...
@@ -87,12 +87,6 @@ extern dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
extern
void
pci_unmap_page
(
struct
pci_dev
*
hwdev
,
dma_addr_t
dma_address
,
size_t
size
,
int
direction
);
/* map_page and map_single cannot fail */
static
inline
int
pci_dma_mapping_error
(
dma_addr_t
dma_addr
)
{
return
0
;
}
/* Map a set of buffers described by scatterlist in streaming
* mode for DMA. This is the scather-gather version of the
* above pci_map_single interface. Here the scatter gather list
...
...
include/asm-sparc64/page.h
View file @
da09d2f1
...
...
@@ -18,7 +18,7 @@ extern void _clear_page(void *page);
#define clear_page(X) _clear_page((void *)(X))
struct
page
;
extern
void
clear_user_page
(
void
*
addr
,
unsigned
long
vaddr
,
struct
page
*
page
);
#define copy_page(X,Y)
__
memcpy((void *)(X), (void *)(Y), PAGE_SIZE)
#define copy_page(X,Y) memcpy((void *)(X), (void *)(Y), PAGE_SIZE)
extern
void
copy_user_page
(
void
*
to
,
void
*
from
,
unsigned
long
vaddr
,
struct
page
*
topage
);
/* GROSS, defining this makes gcc pass these types as aggregates,
...
...
include/asm-sparc64/spinlock.h
View file @
da09d2f1
...
...
@@ -41,22 +41,8 @@ typedef unsigned char spinlock_t;
do { membar("#LoadLoad"); \
} while(*((volatile unsigned char *)lock))
static
__inline__
void
_raw_spin_lock
(
spinlock_t
*
lock
)
{
__asm__
__volatile__
(
"1: ldstub [%0], %%g7
\n
"
" brnz,pn %%g7, 2f
\n
"
" membar #StoreLoad | #StoreStore
\n
"
" .subsection 2
\n
"
"2: ldub [%0], %%g7
\n
"
" brnz,pt %%g7, 2b
\n
"
" membar #LoadLoad
\n
"
" b,a,pt %%xcc, 1b
\n
"
" .previous
\n
"
:
/* no outputs */
:
"r"
(
lock
)
:
"g7"
,
"memory"
);
}
/* arch/sparc64/lib/spinlock.S */
extern
void
_raw_spin_lock
(
spinlock_t
*
lock
);
static
__inline__
int
_raw_spin_trylock
(
spinlock_t
*
lock
)
{
...
...
include/asm-sparc64/string.h
View file @
da09d2f1
...
...
@@ -16,9 +16,7 @@
#include <asm/asi.h>
extern
void
__memmove
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
__kernel_size_t
__memcpy
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
void
*
__memset
(
void
*
,
int
,
__kernel_size_t
);
extern
void
*
__builtin_memcpy
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
void
*
__builtin_memset
(
void
*
,
int
,
__kernel_size_t
);
#ifndef EXPORT_SYMTAB_STROPS
...
...
@@ -37,29 +35,7 @@ extern void *__builtin_memset(void *,int,__kernel_size_t);
#define __HAVE_ARCH_MEMCPY
static
inline
void
*
__constant_memcpy
(
void
*
to
,
const
void
*
from
,
__kernel_size_t
n
)
{
if
(
n
)
{
if
(
n
<=
32
)
{
__builtin_memcpy
(
to
,
from
,
n
);
}
else
{
__memcpy
(
to
,
from
,
n
);
}
}
return
to
;
}
static
inline
void
*
__nonconstant_memcpy
(
void
*
to
,
const
void
*
from
,
__kernel_size_t
n
)
{
__memcpy
(
to
,
from
,
n
);
return
to
;
}
#undef memcpy
#define memcpy(t, f, n) \
(__builtin_constant_p(n) ? \
__constant_memcpy((t),(f),(n)) : \
__nonconstant_memcpy((t),(f),(n)))
extern
void
*
memcpy
(
void
*
,
const
void
*
,
__kernel_size_t
);
#define __HAVE_ARCH_MEMSET
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment