Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
da09d2f1
Commit
da09d2f1
authored
Jul 28, 2004
by
Linus Torvalds
Browse files
Options
Browse Files
Download
Plain Diff
Merge
bk://kernel.bkbits.net/davem/sparc-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
parents
ee61f1e4
a6df905f
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
859 additions
and
1724 deletions
+859
-1724
arch/sparc64/defconfig
arch/sparc64/defconfig
+3
-9
arch/sparc64/kernel/sparc64_ksyms.c
arch/sparc64/kernel/sparc64_ksyms.c
+1
-1
arch/sparc64/kernel/sys_sparc32.c
arch/sparc64/kernel/sys_sparc32.c
+0
-132
arch/sparc64/kernel/systbls.S
arch/sparc64/kernel/systbls.S
+2
-2
arch/sparc64/lib/U3copy_from_user.S
arch/sparc64/lib/U3copy_from_user.S
+269
-358
arch/sparc64/lib/U3copy_in_user.S
arch/sparc64/lib/U3copy_in_user.S
+76
-467
arch/sparc64/lib/U3copy_to_user.S
arch/sparc64/lib/U3copy_to_user.S
+248
-362
arch/sparc64/lib/U3memcpy.S
arch/sparc64/lib/U3memcpy.S
+241
-336
arch/sparc64/lib/VIScopy.S
arch/sparc64/lib/VIScopy.S
+0
-4
arch/sparc64/lib/splock.S
arch/sparc64/lib/splock.S
+12
-0
drivers/sbus/char/bbc_envctrl.c
drivers/sbus/char/bbc_envctrl.c
+2
-3
drivers/sbus/char/envctrl.c
drivers/sbus/char/envctrl.c
+1
-2
include/asm-sparc/pci.h
include/asm-sparc/pci.h
+0
-6
include/asm-sparc64/page.h
include/asm-sparc64/page.h
+1
-1
include/asm-sparc64/spinlock.h
include/asm-sparc64/spinlock.h
+2
-16
include/asm-sparc64/string.h
include/asm-sparc64/string.h
+1
-25
No files found.
arch/sparc64/defconfig
View file @
da09d2f1
...
@@ -90,7 +90,6 @@ CONFIG_BINFMT_ELF32=y
...
@@ -90,7 +90,6 @@ CONFIG_BINFMT_ELF32=y
# CONFIG_BINFMT_AOUT32 is not set
# CONFIG_BINFMT_AOUT32 is not set
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_MISC=m
CONFIG_BINFMT_MISC=m
# CONFIG_SUNOS_EMUL is not set
CONFIG_SOLARIS_EMUL=m
CONFIG_SOLARIS_EMUL=m
#
#
...
@@ -122,7 +121,6 @@ CONFIG_FB=y
...
@@ -122,7 +121,6 @@ CONFIG_FB=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CIRRUS is not set
CONFIG_FB_PM2=y
CONFIG_FB_PM2=y
# CONFIG_FB_PM2_FIFO_DISCONNECT is not set
# CONFIG_FB_PM2_FIFO_DISCONNECT is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_BW2 is not set
# CONFIG_FB_BW2 is not set
...
@@ -180,7 +178,6 @@ CONFIG_LOGO_SUN_CLUT224=y
...
@@ -180,7 +178,6 @@ CONFIG_LOGO_SUN_CLUT224=y
#
#
# Serial drivers
# Serial drivers
#
#
# CONFIG_SERIAL_8250 is not set
#
#
# Non-8250 serial port support
# Non-8250 serial port support
...
@@ -342,7 +339,6 @@ CONFIG_SCSI_SATA_SIL=m
...
@@ -342,7 +339,6 @@ CONFIG_SCSI_SATA_SIL=m
CONFIG_SCSI_SATA_SIS=m
CONFIG_SCSI_SATA_SIS=m
CONFIG_SCSI_SATA_VIA=m
CONFIG_SCSI_SATA_VIA=m
CONFIG_SCSI_SATA_VITESSE=m
CONFIG_SCSI_SATA_VITESSE=m
# CONFIG_SCSI_BUSLOGIC is not set
CONFIG_SCSI_DMX3191D=m
CONFIG_SCSI_DMX3191D=m
CONFIG_SCSI_EATA_PIO=m
CONFIG_SCSI_EATA_PIO=m
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
...
@@ -679,6 +675,9 @@ CONFIG_NET_DIVERT=y
...
@@ -679,6 +675,9 @@ CONFIG_NET_DIVERT=y
# QoS and/or fair queueing
# QoS and/or fair queueing
#
#
CONFIG_NET_SCHED=y
CONFIG_NET_SCHED=y
# CONFIG_NET_SCH_CLK_JIFFIES is not set
# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
CONFIG_NET_SCH_CLK_CPU=y
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_HFSC=m
...
@@ -1522,11 +1521,6 @@ CONFIG_SND_VX222=m
...
@@ -1522,11 +1521,6 @@ CONFIG_SND_VX222=m
CONFIG_SND_SUN_AMD7930=m
CONFIG_SND_SUN_AMD7930=m
CONFIG_SND_SUN_CS4231=m
CONFIG_SND_SUN_CS4231=m
#
# Open Sound System
#
# CONFIG_SOUND_PRIME is not set
#
#
# USB support
# USB support
#
#
...
...
arch/sparc64/kernel/sparc64_ksyms.c
View file @
da09d2f1
...
@@ -135,6 +135,7 @@ EXPORT_SYMBOL(__write_lock);
...
@@ -135,6 +135,7 @@ EXPORT_SYMBOL(__write_lock);
EXPORT_SYMBOL
(
__write_unlock
);
EXPORT_SYMBOL
(
__write_unlock
);
EXPORT_SYMBOL
(
__write_trylock
);
EXPORT_SYMBOL
(
__write_trylock
);
/* Out of line spin-locking implementation. */
/* Out of line spin-locking implementation. */
EXPORT_SYMBOL
(
_raw_spin_lock
);
EXPORT_SYMBOL
(
_raw_spin_lock_flags
);
EXPORT_SYMBOL
(
_raw_spin_lock_flags
);
#endif
#endif
...
@@ -333,7 +334,6 @@ EXPORT_SYMBOL(sys_close);
...
@@ -333,7 +334,6 @@ EXPORT_SYMBOL(sys_close);
#endif
#endif
/* Special internal versions of library functions. */
/* Special internal versions of library functions. */
EXPORT_SYMBOL
(
__memcpy
);
EXPORT_SYMBOL
(
__memset
);
EXPORT_SYMBOL
(
__memset
);
EXPORT_SYMBOL
(
_clear_page
);
EXPORT_SYMBOL
(
_clear_page
);
EXPORT_SYMBOL
(
clear_user_page
);
EXPORT_SYMBOL
(
clear_user_page
);
...
...
arch/sparc64/kernel/sys_sparc32.c
View file @
da09d2f1
...
@@ -867,138 +867,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned
...
@@ -867,138 +867,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned
return
sys_ftruncate
(
fd
,
(
high
<<
32
)
|
low
);
return
sys_ftruncate
(
fd
,
(
high
<<
32
)
|
low
);
}
}
/* readdir & getdents */
#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
#define ROUND_UP(x) (((x)+sizeof(u32)-1) & ~(sizeof(u32)-1))
struct
old_linux_dirent32
{
u32
d_ino
;
u32
d_offset
;
unsigned
short
d_namlen
;
char
d_name
[
1
];
};
struct
readdir_callback32
{
struct
old_linux_dirent32
__user
*
dirent
;
int
count
;
};
static
int
fillonedir
(
void
*
__buf
,
const
char
*
name
,
int
namlen
,
loff_t
offset
,
ino_t
ino
,
unsigned
int
d_type
)
{
struct
readdir_callback32
*
buf
=
(
struct
readdir_callback32
*
)
__buf
;
struct
old_linux_dirent32
__user
*
dirent
;
if
(
buf
->
count
)
return
-
EINVAL
;
buf
->
count
++
;
dirent
=
buf
->
dirent
;
put_user
(
ino
,
&
dirent
->
d_ino
);
put_user
(
offset
,
&
dirent
->
d_offset
);
put_user
(
namlen
,
&
dirent
->
d_namlen
);
copy_to_user
(
dirent
->
d_name
,
name
,
namlen
);
put_user
(
0
,
dirent
->
d_name
+
namlen
);
return
0
;
}
asmlinkage
long
old32_readdir
(
unsigned
int
fd
,
struct
old_linux_dirent32
__user
*
dirent
,
unsigned
int
count
)
{
int
error
=
-
EBADF
;
struct
file
*
file
;
struct
readdir_callback32
buf
;
file
=
fget
(
fd
);
if
(
!
file
)
goto
out
;
buf
.
count
=
0
;
buf
.
dirent
=
dirent
;
error
=
vfs_readdir
(
file
,
fillonedir
,
&
buf
);
if
(
error
<
0
)
goto
out_putf
;
error
=
buf
.
count
;
out_putf:
fput
(
file
);
out:
return
error
;
}
struct
linux_dirent32
{
u32
d_ino
;
u32
d_off
;
unsigned
short
d_reclen
;
char
d_name
[
1
];
};
struct
getdents_callback32
{
struct
linux_dirent32
__user
*
current_dir
;
struct
linux_dirent32
__user
*
previous
;
int
count
;
int
error
;
};
static
int
filldir
(
void
*
__buf
,
const
char
*
name
,
int
namlen
,
loff_t
offset
,
ino_t
ino
,
unsigned
int
d_type
)
{
struct
linux_dirent32
__user
*
dirent
;
struct
getdents_callback32
*
buf
=
(
struct
getdents_callback32
*
)
__buf
;
int
reclen
=
ROUND_UP
(
NAME_OFFSET
(
dirent
)
+
namlen
+
2
);
buf
->
error
=
-
EINVAL
;
/* only used if we fail.. */
if
(
reclen
>
buf
->
count
)
return
-
EINVAL
;
dirent
=
buf
->
previous
;
if
(
dirent
)
put_user
(
offset
,
&
dirent
->
d_off
);
dirent
=
buf
->
current_dir
;
buf
->
previous
=
dirent
;
put_user
(
ino
,
&
dirent
->
d_ino
);
put_user
(
reclen
,
&
dirent
->
d_reclen
);
copy_to_user
(
dirent
->
d_name
,
name
,
namlen
);
put_user
(
0
,
dirent
->
d_name
+
namlen
);
put_user
(
d_type
,
(
char
__user
*
)
dirent
+
reclen
-
1
);
dirent
=
(
void
__user
*
)
dirent
+
reclen
;
buf
->
current_dir
=
dirent
;
buf
->
count
-=
reclen
;
return
0
;
}
asmlinkage
long
sys32_getdents
(
unsigned
int
fd
,
struct
linux_dirent32
__user
*
dirent
,
unsigned
int
count
)
{
struct
file
*
file
;
struct
linux_dirent32
__user
*
lastdirent
;
struct
getdents_callback32
buf
;
int
error
=
-
EBADF
;
file
=
fget
(
fd
);
if
(
!
file
)
goto
out
;
buf
.
current_dir
=
dirent
;
buf
.
previous
=
NULL
;
buf
.
count
=
count
;
buf
.
error
=
0
;
error
=
vfs_readdir
(
file
,
filldir
,
&
buf
);
if
(
error
<
0
)
goto
out_putf
;
lastdirent
=
buf
.
previous
;
error
=
buf
.
error
;
if
(
lastdirent
)
{
put_user
(
file
->
f_pos
,
&
lastdirent
->
d_off
);
error
=
count
-
buf
.
count
;
}
out_putf:
fput
(
file
);
out:
return
error
;
}
/* end of readdir & getdents */
int
cp_compat_stat
(
struct
kstat
*
stat
,
struct
compat_stat
__user
*
statbuf
)
int
cp_compat_stat
(
struct
kstat
*
stat
,
struct
compat_stat
__user
*
statbuf
)
{
{
int
err
;
int
err
;
...
...
arch/sparc64/kernel/systbls.S
View file @
da09d2f1
...
@@ -54,13 +54,13 @@ sys_call_table32:
...
@@ -54,13 +54,13 @@ sys_call_table32:
.
word
compat_sys_fcntl64
,
sys_ni_syscall
,
compat_sys_statfs
,
compat_sys_fstatfs
,
sys_oldumount
.
word
compat_sys_fcntl64
,
sys_ni_syscall
,
compat_sys_statfs
,
compat_sys_fstatfs
,
sys_oldumount
/*
160
*/
.
word
compat_sys_sched_setaffinity
,
compat_sys_sched_getaffinity
,
sys32_getdomainname
,
sys32_setdomainname
,
sys_nis_syscall
/*
160
*/
.
word
compat_sys_sched_setaffinity
,
compat_sys_sched_getaffinity
,
sys32_getdomainname
,
sys32_setdomainname
,
sys_nis_syscall
.
word
sys_quotactl
,
sys_set_tid_address
,
compat_sys_mount
,
sys_ustat
,
sys32_setxattr
.
word
sys_quotactl
,
sys_set_tid_address
,
compat_sys_mount
,
sys_ustat
,
sys32_setxattr
/*
170
*/
.
word
sys32_lsetxattr
,
sys32_fsetxattr
,
sys_getxattr
,
sys_lgetxattr
,
sys32
_getdents
/*
170
*/
.
word
sys32_lsetxattr
,
sys32_fsetxattr
,
sys_getxattr
,
sys_lgetxattr
,
compat_sys
_getdents
.
word
sys_setsid
,
sys_fchdir
,
sys32_fgetxattr
,
sys_listxattr
,
sys_llistxattr
.
word
sys_setsid
,
sys_fchdir
,
sys32_fgetxattr
,
sys_listxattr
,
sys_llistxattr
/*
180
*/
.
word
sys32_flistxattr
,
sys_removexattr
,
sys_lremovexattr
,
compat_sys_sigpending
,
sys_ni_syscall
/*
180
*/
.
word
sys32_flistxattr
,
sys_removexattr
,
sys_lremovexattr
,
compat_sys_sigpending
,
sys_ni_syscall
.
word
sys32_setpgid
,
sys32_fremovexattr
,
sys32_tkill
,
sys32_exit_group
,
sparc64_newuname
.
word
sys32_setpgid
,
sys32_fremovexattr
,
sys32_tkill
,
sys32_exit_group
,
sparc64_newuname
/*
190
*/
.
word
sys32_init_module
,
sparc64_personality
,
sys_remap_file_pages
,
sys32_epoll_create
,
sys32_epoll_ctl
/*
190
*/
.
word
sys32_init_module
,
sparc64_personality
,
sys_remap_file_pages
,
sys32_epoll_create
,
sys32_epoll_ctl
.
word
sys32_epoll_wait
,
sys_nis_syscall
,
sys_getppid
,
sys32_sigaction
,
sys_sgetmask
.
word
sys32_epoll_wait
,
sys_nis_syscall
,
sys_getppid
,
sys32_sigaction
,
sys_sgetmask
/*
200
*/
.
word
sys32_ssetmask
,
sys_sigsuspend
,
compat_sys_newlstat
,
sys_uselib
,
old32
_readdir
/*
200
*/
.
word
sys32_ssetmask
,
sys_sigsuspend
,
compat_sys_newlstat
,
sys_uselib
,
compat_old
_readdir
.
word
sys32_readahead
,
sys32_socketcall
,
sys32_syslog
,
sys32_lookup_dcookie
,
sys32_fadvise64
.
word
sys32_readahead
,
sys32_socketcall
,
sys32_syslog
,
sys32_lookup_dcookie
,
sys32_fadvise64
/*
210
*/
.
word
sys32_fadvise64_64
,
sys32_tgkill
,
sys32_waitpid
,
sys_swapoff
,
sys32_sysinfo
/*
210
*/
.
word
sys32_fadvise64_64
,
sys32_tgkill
,
sys32_waitpid
,
sys_swapoff
,
sys32_sysinfo
.
word
sys32_ipc
,
sys32_sigreturn
,
sys_clone
,
sys_nis_syscall
,
sys32_adjtimex
.
word
sys32_ipc
,
sys32_sigreturn
,
sys_clone
,
sys_nis_syscall
,
sys32_adjtimex
...
...
arch/sparc64/lib/U3copy_from_user.S
View file @
da09d2f1
/*
$Id
:
U3copy_from_user
.
S
,
v
1
.4
2002
/
01
/
15
07
:
16
:
26
davem
Exp
$
/*
U3copy_from_user
.
S
:
UltraSparc
-
III
optimized
copy
from
userspace
.
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
copy
from
userspace
.
*
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#include <asm/spitfire.h>
#undef SMALL_COPY_USES_FPU
#define XCC xcc
#define EXNV_RAW(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
ba
U3cfu_fixup
; \
a
,
b
,
%
o1
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV(x,y,a,b) \
#define EXNV(x,y,a,b) \
98
:
x
,
y
; \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
section
.
fixup
; \
.
align
4
; \
.
align
4
; \
99
:
VISExitHalf
;
\
99
:
add
%
o1
,
%
o3
,
%
o0
;
\
ba
U3cfu_fixup
; \
ba
U3cfu_fixup
; \
a
,
b
,
%
o1
; \
a
,
b
,
%
o1
; \
.
section
__ex_table
; \
.
section
__ex_table
; \
...
@@ -22,6 +33,32 @@
...
@@ -22,6 +33,32 @@
.
word
98
b
,
99
b
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
text
; \
.
align
4
;
.
align
4
;
#define EXNV4(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
add
%
o1
,
%
o3
,
%
o0
; \
a
,
b
,
%
o1
; \
ba
U3cfu_fixup
; \
add
%
o1
,
4
,
%
o1
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV8(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
add
%
o1
,
%
o3
,
%
o0
; \
a
,
b
,
%
o1
; \
ba
U3cfu_fixup
; \
add
%
o1
,
8
,
%
o1
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EX(x,y,a,b) \
#define EX(x,y,a,b) \
98
:
x
,
y
; \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
section
.
fixup
; \
...
@@ -77,18 +114,9 @@
...
@@ -77,18 +114,9 @@
.
word
98
b
,
99
b
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
text
; \
.
align
4
;
.
align
4
;
#else
#define ASI_BLK_P 0xf0
.
register
%
g2
,#
scratch
#define FPRS_FEF 0x04
.
register
%
g3
,#
scratch
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#define EXNV(x,y,a,b) x,y;
#define EX(x,y,a,b) x,y;
#define EX2(x,y) x,y;
#define EX3(x,y) x,y;
#define EX4(x,y) x,y;
#endif
/
*
Special
/
non
-
trivial
issues
of
this
code
:
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
*
...
@@ -109,81 +137,55 @@
...
@@ -109,81 +137,55 @@
*
of
up
to
2
.4
GB
per
second
.
*
of
up
to
2
.4
GB
per
second
.
*/
*/
.
globl
U3copy_from_user
.
globl
U3copy_from_user
U3copy_from_user
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
U3copy_from_user
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
#ifndef __KERNEL__
cmp
%
o2
,
0
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
be
,
pn
%
XCC
,
out
mov
%
o0
,
%
g3
!
A0
Group
or
%
o0
,
%
o1
,
%
o3
#endif
cmp
%
o2
,
16
/
*
Anything
to
copy
at
all
?
*/
bleu
,
a
,
pn
%
XCC
,
small_copy
cmp
%
o2
,
0
!
A1
or
%
o3
,
%
o2
,
%
o3
ble
,
pn
%
icc
,
U3copy_from_user_short_ret
!
BR
cmp
%
o2
,
256
/
*
Extremely
small
copy
?
*/
blu
,
pt
%
XCC
,
medium_copy
cmp
%
o2
,
31
!
A0
Group
andcc
%
o3
,
0x7
,
%
g0
ble
,
pn
%
icc
,
U3copy_from_user_short
!
BR
ba
,
pt
%
xcc
,
enter
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
andcc
%
o0
,
0x3f
,
%
g2
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3copy_from_user_enter
!
BR
Group
/
*
Here
len
>=
256
and
condition
codes
reflect
execution
andcc
%
o0
,
0x3f
,
%
g2
!
A0
ba
,
pt
%
xcc
,
U3copy_from_user_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
.
align
32
U3copy_from_user_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g0
)!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3copy_from_user_short
!
BR
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
(
1
-
cycle
stall
)
U3copy_from_user_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
*/
.
align
64
.
align
64
U3copy_from_user_
enter
:
enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
be
,
pt
%
XCC
,
2
f
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*
subtract
this
from
'len'
.
*/
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g2
,
0x40
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
1
:
EXNV
_RAW
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
!
A1
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
!
BR
Group
bg
,
pt
%
XCC
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
stb
%
o3
,
[%
o0
+
-
1
]
2
:
VISEntryHalf
!
MS
+
MS
2
:
VISEntryHalf
and
%
o1
,
0x7
,
%
g1
!
A1
and
%
o1
,
0x7
,
%
g1
ba
,
pt
%
xcc
,
U3copy_from_user_begin
!
BR
ba
,
pt
%
xcc
,
begin
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
alignaddr
%
o1
,
%
g0
,
%
o1
.
align
64
.
align
64
U3copy_from_user_
begin
:
begin
:
#ifdef __KERNEL__
.
globl
U3copy_from_user_nop_1_6
.
globl
U3copy_from_user_nop_1_6
U3copy_from_user_nop_1_6
:
U3copy_from_user_nop_1_6
:
ldxa
[%
g0
]
ASI_DCU_CONTROL_REG
,
%
g3
ldxa
[%
g0
]
ASI_DCU_CONTROL_REG
,
%
g3
...
@@ -192,315 +194,225 @@ U3copy_from_user_nop_1_6:
...
@@ -192,315 +194,225 @@ U3copy_from_user_nop_1_6:
or
%
g3
,
%
o3
,
%
o3
or
%
g3
,
%
o3
,
%
o3
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
membar
#
Sync
membar
#
Sync
#endif
prefetcha
[%
o1
+
0x000
]
%
asi
,
#
one_read
!
MS
Group1
prefetcha
[%
o1
+
0x000
]
%
asi
,
#
one_read
prefetcha
[%
o1
+
0x040
]
%
asi
,
#
one_read
!
MS
Group2
prefetcha
[%
o1
+
0x040
]
%
asi
,
#
one_read
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
andn
%
o2
,
(
0x40
-
1
),
%
o4
prefetcha
[%
o1
+
0x080
]
%
asi
,
#
one_read
!
MS
Group3
prefetcha
[%
o1
+
0x080
]
%
asi
,
#
one_read
cmp
%
o4
,
0x140
!
A0
prefetcha
[%
o1
+
0x0c0
]
%
asi
,
#
one_read
prefetcha
[%
o1
+
0x0c0
]
%
asi
,
#
one_read
!
MS
Group4
EX
(
ldda
[%
o1
+
0x000
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
EX
(
ldda
[%
o1
+
0x000
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group5
(
%f0
results
at
G8
)
prefetcha
[%
o1
+
0x100
]
%
asi
,
#
one_read
bge
,
a
,
pt
%
icc
,
1
f
!
BR
EX
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
prefetcha
[%
o1
+
0x140
]
%
asi
,
#
one_read
prefetcha
[%
o1
+
0x100
]
%
asi
,
#
one_read
!
MS
Group6
EX
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
,
add
%
o2
,
%
g0
)
1
:
EX
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
AX
(
%f2
results
at
G9
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
cmp
%
o4
,
0x180
!
A1
faligndata
%f0
,
%f2
,
%f16
bge
,
a
,
pt
%
icc
,
1
f
!
BR
EX
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
,
add
%
o2
,
%
g0
)
prefetcha
[%
o1
+
0x140
]
%
asi
,
#
one_read
!
MS
Group7
faligndata
%f2
,
%f4
,
%f18
1
:
EX
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
,
add
%
o2
,
%
g0
)
!
AX
(
%f4
results
at
G10
)
EX
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
,
add
%
o2
,
%
g0
)
cmp
%
o4
,
0x1c0
!
A1
faligndata
%f4
,
%f6
,
%f20
bge
,
a
,
pt
%
icc
,
1
f
!
BR
EX
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
,
add
%
o2
,
%
g0
)
faligndata
%f6
,
%f8
,
%f22
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
EX
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
,
add
%
o2
,
%
g0
)
EX
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
,
add
%
o2
,
%
g0
)
!
AX
(
%f6
results
at
G12
)
faligndata
%f8
,
%f10
,
%f24
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
EX
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
,
add
%
o2
,
%
g0
)
EX
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
,
add
%
o2
,
%
g0
)
!
MS
(
%f8
results
at
G13
)
faligndata
%f10
,
%f12
,
%f26
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
EX
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
EX
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
,
add
%
o2
,
%
g0
)
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
sub
%
o4
,
0x80
,
%
o4
add
%
o1
,
0x40
,
%
o1
EX
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
,
add
%
o2
,
%
g0
)
!
MS
(
%f12
results
at
G16
)
ba
,
pt
%
xcc
,
loop
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
srl
%
o4
,
6
,
%
o3
EX
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
,
add
%
o2
,
%
g0
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
.
align
64
EX
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
(
%f0
results
at
G19
)
loop
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
faligndata
%f12
,
%f14
,
%f28
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
bg
,
pt
%
icc
,
U3copy_from_user_loop1
!
BR
faligndata
%f14
,
%f0
,
%f30
add
%
o1
,
0x40
,
%
o1
!
A1
stda
%f16
,
[%
o0
]
ASI_BLK_P
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
faligndata
%f0
,
%f2
,
%f16
ba
,
pt
%
xcc
,
U3copy_from_user_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
nop
faligndata
%f2
,
%f4
,
%f18
nop
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
nop
faligndata
%f4
,
%f6
,
%f20
nop
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
nop
faligndata
%f6
,
%f8
,
%f22
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
nop
faligndata
%f8
,
%f10
,
%f24
nop
EX3
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
faligndata
%f10
,
%f12
,
%f26
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
subcc
%
o3
,
0x01
,
%
o3
*
already
.
add
%
o1
,
0x40
,
%
o1
*/
bg
,
pt
%
XCC
,
loop
U3copy_from_user_loop1
:
add
%
o0
,
0x40
,
%
o0
EX2
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
EX2
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
EX2
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EX2
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
EX2
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
EX2
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
EX2
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
EX2
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
(
%f0
results
at
G19
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_from_user_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3copy_from_user_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3copy_from_user_loop2
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
EX3
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_from_user_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3copy_from_user_
loopfini
:
loopfini
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
faligndata
%f12
,
%f14
,
%f28
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group19
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
Group20
stda
%f16
,
[%
o0
]
ASI_BLK_P
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
faligndata
%f0
,
%f2
,
%f16
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
faligndata
%f2
,
%f4
,
%f18
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
faligndata
%f4
,
%f6
,
%f20
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
faligndata
%f6
,
%f8
,
%f22
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
faligndata
%f8
,
%f10
,
%f24
cmp
%
g1
,
0
!
A0
cmp
%
g1
,
0
be
,
pt
%
icc
,
1
f
!
BR
be
,
pt
%
XCC
,
1
f
add
%
o0
,
0x40
,
%
o0
!
A1
add
%
o0
,
0x40
,
%
o0
EX4
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
MS
EX4
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
1
:
faligndata
%f10
,
%f12
,
%f26
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f12
,
%f14
,
%f28
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
stda
%f16
,
[%
o0
]
ASI_BLK_P
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o0
,
0x40
,
%
o0
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o1
,
0x40
,
%
o1
#ifdef __KERNEL__
.
globl
U3copy_from_user_nop_2_3
.
globl
U3copy_from_user_nop_2_3
U3copy_from_user_nop_2_3
:
U3copy_from_user_nop_2_3
:
mov
PRIMARY_CONTEXT
,
%
o3
mov
PRIMARY_CONTEXT
,
%
o3
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
#endif
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
membar
#
Sync
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
load
past
the
end
of
the
src
buffer
.
*
code
found
in
U3copy_from_user_toosmall
processing
.
*/
*/
U3copy_from_user_
loopend
:
loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
and
%
o2
,
0x3f
,
%
o2
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
be
,
pn
%
icc
,
U3copy_from_user_endcruft
!
BR
be
,
pn
%
XCC
,
endcruft
subcc
%
g2
,
0x8
,
%
g2
!
A1
subcc
%
g2
,
0x8
,
%
g2
be
,
pn
%
icc
,
U3copy_from_user_endcruft
!
BR
Group
be
,
pn
%
XCC
,
endcruft
cmp
%
g1
,
0
!
A0
cmp
%
g1
,
0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
be
,
a
,
pt
%
XCC
,
1
f
EX
(
ldda
[%
o1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
EX
(
ldda
[%
o1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
1
:
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
1
:
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x8
,
%
o1
!
A0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
!
A1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
faligndata
%f0
,
%f2
,
%f8
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
std
%f8
,
[%
o0
+
0x00
]
be
,
pn
%
icc
,
U3copy_from_user_endcruft
!
BR
be
,
pn
%
XCC
,
endcruft
add
%
o0
,
0x8
,
%
o0
!
A0
add
%
o0
,
0x8
,
%
o0
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x8
,
%
o1
!
A0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
!
A1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f2
,
%f0
,
%f8
!
FGA
faligndata
%f2
,
%f0
,
%f8
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
std
%f8
,
[%
o0
+
0x00
]
bne
,
pn
%
icc
,
1
b
!
BR
bne
,
pn
%
XCC
,
1
b
add
%
o0
,
0x8
,
%
o0
!
A0
Group
add
%
o0
,
0x8
,
%
o0
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*
alignaddr
was
performed
.
*/
*/
U3copy_from_user_
endcruft
:
endcruft
:
cmp
%
o2
,
0
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
VISExitHalf
be
,
pn
%
icc
,
U3copy_from_user_short_ret
be
,
pn
%
XCC
,
out
nop
sub
%
o0
,
%
o1
,
%
o3
ba
,
a
,
pt
%
xcc
,
U3copy_from_user_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3copy_from_user_toosmall
:
#ifdef SMALL_COPY_USES_FPU
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
andcc
%
g1
,
0x7
,
%
g0
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
(%
o3
in
3
cycles
)
bne
,
pn
%
icc
,
small_copy_unaligned
add
%
o1
,
0x1
,
%
o1
!
A1
andcc
%
o2
,
0x8
,
%
g0
add
%
o0
,
0x1
,
%
o0
!
A0
Group
be
,
pt
%
icc
,
1
f
subcc
%
g2
,
0x1
,
%
g2
!
A1
nop
EXNV
(
ldxa
[%
o1
]
%
asi
,
%
o5
,
add
%
o2
,
%
g0
)
bg
,
pt
%
icc
,
1
b
!
BR
Group
stx
%
o5
,
[%
o1
+
%
o3
]
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
add
%
o1
,
0x8
,
%
o1
2
:
VISEntryHalf
!
MS
+
MS
1
:
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
icc
,
1
f
nop
EXNV
(
lduwa
[%
o1
]
%
asi
,
%
o5
,
and
%
o2
,
0x7
)
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
1
:
andcc
%
o2
,
0x2
,
%
g0
*
to
be
nonzero
.
be
,
pt
%
icc
,
1
f
*/
nop
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
EXNV
(
lduha
[%
o1
]
%
asi
,
%
o5
,
and
%
o2
,
0x3
)
sth
%
o5
,
[%
o1
+
%
o3
]
/
*
You
may
read
this
and
believe
that
it
allows
reading
add
%
o1
,
0x2
,
%
o1
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3copy_from_user_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3copy_from_user_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3copy_from_user_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
1
:
andcc
%
o2
,
0x1
,
%
g0
add
%
o1
,
0x1
,
%
o1
be
,
pt
%
icc
,
out
add
%
o0
,
0x1
,
%
o0
nop
subcc
%
g2
,
0x1
,
%
g2
EXNV
(
lduba
[%
o1
]
%
asi
,
%
o5
,
and
%
o2
,
0x1
)
bg
,
pt
%
icc
,
1
b
ba
,
pt
%
xcc
,
out
stb
%
o3
,
[%
o0
+
-
1
]
stb
%
o5
,
[%
o1
+
%
o3
]
medium_copy
:
/
*
16
<
len
<=
64
*/
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
EXNV8
(
ldxa
[%
o1
]
%
asi
,
%
o5
,
add
%
o2
,
%
o4
)
stx
%
o5
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
EXNV4
(
lduwa
[%
o1
]
%
asi
,
%
o5
,
add
%
o2
,
%
g0
)
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
2
:
andn
%
o2
,
0x7
,
%
g2
small_copy
:
/
*
0
<
len
<=
16
*/
sub
%
o2
,
%
g2
,
%
o2
andcc
%
o3
,
0x3
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
3
:
EXNV
(
ldxa
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
small_copy_aligned
:
add
%
o1
,
0x8
,
%
o1
subcc
%
o2
,
4
,
%
o2
add
%
o0
,
0x8
,
%
o0
EXNV
(
lduwa
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
s
ubcc
%
g2
,
0x8
,
%
g2
s
tw
%
g1
,
[%
o1
+
%
o3
]
bg
,
pt
%
icc
,
3
b
bg
u
,
pt
%
XCC
,
small_copy_aligned
stx
%
o3
,
[%
o0
+
-
8
]
add
%
o1
,
4
,
%
o1
cmp
%
o2
,
0
out
:
retl
bne
,
pn
%
icc
,
U3copy_from_user_short
clr
%
o0
nop
ba
,
a
,
pt
%
xcc
,
U3copy_from_user_short_ret
#endif /* !(SMALL_COPY_USES_FPU) */
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
EXNV
(
lduba
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
stb
%
g1
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
clr
%
o0
#ifdef __KERNEL__
.
globl
U3cfu_fixup
U3cfu_fixup
:
U3cfu_fixup
:
/
*
Since
this
is
copy_from_user
(),
zero
out
the
rest
of
the
/
*
Since
this
is
copy_from_user
(),
zero
out
the
rest
of
the
*
kernel
buffer
.
*
kernel
buffer
.
...
@@ -516,4 +428,3 @@ U3cfu_fixup:
...
@@ -516,4 +428,3 @@ U3cfu_fixup:
2
:
retl
2
:
retl
mov
%
o1
,
%
o0
mov
%
o1
,
%
o0
#endif
arch/sparc64/lib/U3copy_in_user.S
View file @
da09d2f1
/*
$Id
:
U3copy_in_user
.
S
,
v
1
.4
2001
/
03
/
21
05
:
58
:
47
davem
Exp
$
/*
U3copy_in_user
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
copy
within
userspace
.
*
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/asi.h>
#undef SMALL_COPY_USES_FPU
#include <asm/dcu.h>
#include <asm/spitfire.h>
#define XCC xcc
#define EXNV(x,y,a,b) \
#define EXNV(x,y,a,b) \
98
:
x
,
y
; \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
section
.
fixup
; \
...
@@ -19,7 +21,7 @@
...
@@ -19,7 +21,7 @@
.
word
98
b
,
99
b
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
text
; \
.
align
4
;
.
align
4
;
#define EXNV
2(x,y,a,b)
\
#define EXNV
1(x,y,a,b)
\
98
:
x
,
y
; \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
section
.
fixup
; \
.
align
4
; \
.
align
4
; \
...
@@ -31,501 +33,108 @@
...
@@ -31,501 +33,108 @@
.
word
98
b
,
99
b
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
text
; \
.
align
4
;
.
align
4
;
#define EXNV
3(x,y,a,b)
\
#define EXNV
4(x,y,a,b)
\
98
:
x
,
y
; \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
section
.
fixup
; \
.
align
4
; \
.
align
4
; \
99
:
a
,
b
,
%
o0
; \
99
:
a
,
b
,
%
o0
; \
retl
; \
retl
; \
add
%
o0
,
8
,
%
o0
; \
add
%
o0
,
4
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EX(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
retl
; \
a
,
b
,
%
o0
; \
.
section
__ex_table
; \
.
section
__ex_table
; \
.
align
4
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
text
; \
.
align
4
;
.
align
4
;
#define EX
BLK1(x,y
) \
#define EX
NV8(x,y,a,b
) \
98
:
x
,
y
; \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
section
.
fixup
; \
.
align
4
; \
.
align
4
; \
99
:
VISExitHalf
; \
99
:
a
,
b
,
%
o0
; \
add
%
o4
,
0x1c0
,
%
o1
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
retl
; \
add
%
o1
,
%
o2
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXBLK2(x,y) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
sll
%
o3
,
6
,
%
o3
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
add
%
o3
,
0x80
,
%
o1
; \
retl
; \
add
%
o1
,
%
o2
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXBLK3(x,y) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
retl
; \
add
%
o2
,
0x80
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXBLK4(x,y) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
retl
; \
retl
; \
add
%
o
2
,
0x40
,
%
o0
; \
add
%
o
0
,
8
,
%
o0
; \
.
section
__ex_table
; \
.
section
__ex_table
; \
.
align
4
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
text
; \
.
align
4
;
.
align
4
;
#else
#define ASI_AIUS 0x80
#define ASI_BLK_AIUS 0xf0
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#define EXNV(x,y,a,b) x,y;
#define EXNV2(x,y,a,b) x,y;
#define EXNV3(x,y,a,b) x,y;
#define EX(x,y,a,b) x,y;
#define EXBLK1(x,y) x,y;
#define EXBLK2(x,y) x,y;
#define EXBLK3(x,y) x,y;
#define EXBLK4(x,y) x,y;
#endif
/
*
Special
/
non
-
trivial
issues
of
this
code
:
.
register
%
g2
,#
scratch
*
.
register
%
g3
,#
scratch
*
1
)
%
o5
is
preserved
from
VISEntryHalf
to
VISExitHalf
*
2
)
Only
low
32
FPU
registers
are
used
so
that
only
the
*
lower
half
of
the
FPU
register
set
is
dirtied
by
this
*
code
.
This
is
especially
important
in
the
kernel
.
*
3
)
This
code
never
prefetches
cachelines
past
the
end
*
of
the
source
buffer
.
*
*
XXX
Actually
,
Cheetah
can
buffer
up
to
8
concurrent
*
XXX
prefetches
,
revisit
this
...
*/
.
text
.
text
.
align
32
.
align
32
/
*
The
cheetah
's flexible spine, oversized liver, enlarged heart,
/
*
Don
't try to get too fancy here, just nice and
*
s
lender
muscular
body
,
and
claws
make
it
the
swiftest
hunter
*
s
imple
.
This
is
predominantly
used
for
well
aligned
*
in
Africa
and
the
fastest
animal
on
land
.
Can
reach
speeds
*
small
copies
in
the
compat
layer
.
It
is
also
used
*
of
up
to
2
.4
GB
per
second
.
*
to
copy
register
windows
around
during
thread
cloning
.
*/
*/
.
globl
U3copy_in_user
.
globl
U3copy_in_user
U3copy_in_user
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
U3copy_in_user
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
/
*
Writing
to
%
asi
is
_expensive_
so
we
hardcode
it
.
/
*
Writing
to
%
asi
is
_expensive_
so
we
hardcode
it
.
*
Reading
%
asi
to
check
for
KERNEL_DS
is
comparatively
*
Reading
%
asi
to
check
for
KERNEL_DS
is
comparatively
*
cheap
.
*
cheap
.
*/
*/
rd
%
asi
,
%
g1
!
MS
Group
(
4
cycles
)
rd
%
asi
,
%
g1
cmp
%
g1
,
ASI_AIUS
!
A0
Group
cmp
%
g1
,
ASI_AIUS
bne
U3memcpy
!
BR
bne
,
pn
%
icc
,
U3memcpy_user_stub
nop
!
A1
nop
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
mov
%
o0
,
%
g3
!
A0
Group
#endif
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
0
!
A1
ble
,
pn
%
icc
,
U3copy_in_user_short_ret
!
BR
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3copy_in_user_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3copy_in_user_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
ba
,
pt
%
xcc
,
U3copy_in_user_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
.
align
32
U3copy_in_user_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g0
)!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3copy_in_user_short
!
BR
EXNV
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
1
)
!
MS
Group
(
1
-
cycle
stall
)
U3copy_in_user_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
.
align
64
U3copy_in_user_enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
2
:
VISEntryHalf
!
MS
+
MS
and
%
o1
,
0x7
,
%
g1
!
A1
ba
,
pt
%
xcc
,
U3copy_in_user_begin
!
BR
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
.
align
64
U3copy_in_user_begin
:
prefetcha
[%
o1
+
0x000
]
%
asi
,
#
one_read
!
MS
Group1
prefetcha
[%
o1
+
0x040
]
%
asi
,
#
one_read
!
MS
Group2
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
prefetcha
[%
o1
+
0x080
]
%
asi
,
#
one_read
!
MS
Group3
cmp
%
o4
,
0x140
!
A0
prefetcha
[%
o1
+
0x0c0
]
%
asi
,
#
one_read
!
MS
Group4
EX
(
ldda
[%
o1
+
0x000
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group5
(
%f0
results
at
G8
)
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x100
]
%
asi
,
#
one_read
!
MS
Group6
1
:
EX
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
AX
(
%f2
results
at
G9
)
cmp
%
o4
,
0x180
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x140
]
%
asi
,
#
one_read
!
MS
Group7
1
:
EX
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
,
add
%
o2
,
%
g0
)
!
AX
(
%f4
results
at
G10
)
cmp
%
o4
,
0x1c0
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
EX
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
,
add
%
o2
,
%
g0
)
!
AX
(
%f6
results
at
G12
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
EX
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
,
add
%
o2
,
%
g0
)
!
MS
(
%f8
results
at
G13
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
EX
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
,
add
%
o2
,
%
g0
)
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
EX
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
,
add
%
o2
,
%
g0
)
!
MS
(
%f12
results
at
G16
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
EX
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
,
add
%
o2
,
%
g0
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
EX
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
(
%f0
results
at
G19
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
bg
,
pt
%
icc
,
U3copy_in_user_loop1
!
BR
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
ba
,
pt
%
xcc
,
U3copy_in_user_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
nop
nop
nop
nop
nop
nop
nop
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
*
already
.
*/
U3copy_in_user_loop1
:
EXBLK1
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
EXBLK1
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
EXBLK1
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
EXBLK1
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EXBLK1
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
EXBLK1
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
EXBLK1
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
EXBLK1
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
EXBLK1
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
(
%f0
results
at
G19
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_in_user_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3copy_in_user_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3copy_in_user_loop2
:
EXBLK2
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
EXBLK2
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
EXBLK2
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
EXBLK2
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EXBLK2
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
EXBLK2
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
EXBLK2
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
EXBLK2
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
EXBLK2
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_in_user_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3copy_in_user_loopfini
:
EXBLK3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
EXBLK3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group19
faligndata
%f14
,
%f0
,
%f30
!
FGA
EXBLK3
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
Group20
EXBLK4
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
EXBLK4
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
EXBLK4
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
EXBLK4
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
EXBLK4
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
cmp
%
g1
,
0
!
A0
be
,
pt
%
icc
,
1
f
!
BR
add
%
o0
,
0x40
,
%
o0
!
A1
EXBLK4
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
MS
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
EXBLK4
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
code
found
in
U3copy_in_user_toosmall
processing
.
*/
U3copy_in_user_loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
be
,
pn
%
icc
,
U3copy_in_user_endcruft
!
BR
subcc
%
g2
,
0x8
,
%
g2
!
A1
be
,
pn
%
icc
,
U3copy_in_user_endcruft
!
BR
Group
cmp
%
g1
,
0
!
A0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
EX
(
ldda
[%
o1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
1
:
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
be
,
pn
%
icc
,
U3copy_in_user_endcruft
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
Group
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*/
U3copy_in_user_endcruft
:
cmp
%
o2
,
0
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
be
,
pn
%
XCC
,
out
VISExitHalf
or
%
o0
,
%
o1
,
%
o3
be
,
pn
%
icc
,
U3copy_in_user_short_ret
cmp
%
o2
,
16
bleu
,
a
,
pn
%
XCC
,
small_copy
or
%
o3
,
%
o2
,
%
o3
medium_copy
:
/
*
16
<
len
<=
64
*/
andcc
%
o3
,
0x7
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
EXNV8
(
ldxa
[%
o1
]
%
asi
,
%
o5
,
add
%
o4
,
%
o2
)
EXNV8
(
stxa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o4
,
%
o2
)
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
EXNV4
(
lduwa
[%
o1
]
%
asi
,
%
o5
,
add
%
o4
,
%
o2
)
EXNV4
(
stwa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o4
,
%
o2
)
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
nop
ba
,
a
,
pt
%
xcc
,
U3copy_in_user_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3copy_in_user_toosmall
:
#ifdef SMALL_COPY_USES_FPU
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV2
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
2
:
VISEntryHalf
!
MS
+
MS
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
*
to
be
nonzero
.
*/
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
/
*
You
may
read
this
and
believe
that
it
allows
reading
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3copy_in_user_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3copy_in_user_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3copy_in_user_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
EXNV2
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
2
:
andn
%
o2
,
0x7
,
%
g2
small_copy
:
/
*
0
<
len
<=
16
*/
sub
%
o2
,
%
g2
,
%
o2
andcc
%
o3
,
0x3
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
3
:
EXNV3
(
ldxa
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
small_copy_aligned
:
add
%
o1
,
0x8
,
%
o1
subcc
%
o2
,
4
,
%
o2
add
%
o0
,
0x8
,
%
o0
EXNV4
(
lduwa
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
subcc
%
g2
,
0x8
,
%
g2
EXNV4
(
stwa
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bg
,
pt
%
icc
,
3
b
bg
u
,
pt
%
XCC
,
small_copy_aligned
EXNV3
(
stxa
%
o3
,
[%
o0
+
-
8
]
%
asi
,
add
%
o2
,
%
g2
)
add
%
o1
,
4
,
%
o1
cmp
%
o2
,
0
out
:
retl
bne
,
pn
%
icc
,
U3copy_in_user_short
clr
%
o0
nop
ba
,
a
,
pt
%
xcc
,
U3copy_in_user_short_ret
#endif /* !(SMALL_COPY_USES_FPU) */
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
EXNV1
(
lduba
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
EXNV1
(
stba
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
clr
%
o0
arch/sparc64/lib/U3copy_to_user.S
View file @
da09d2f1
/*
$Id
:
U3copy_to_user
.
S
,
v
1
.3
2000
/
11
/
01
09
:
29
:
19
davem
Exp
$
/*
U3copy_to_user
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
copy
to
userspace
.
*
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#include <asm/spitfire.h>
#undef SMALL_COPY_USES_FPU
#define XCC xcc
#define EXNV(x,y,a,b) \
#define EXNV(x,y,a,b) \
98
:
x
,
y
; \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
section
.
fixup
; \
...
@@ -34,6 +34,18 @@
...
@@ -34,6 +34,18 @@
.
text
; \
.
text
; \
.
align
4
;
.
align
4
;
#define EXNV3(x,y,a,b) \
#define EXNV3(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
a
,
b
,
%
o0
; \
retl
; \
add
%
o0
,
4
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV4(x,y,a,b) \
98
:
x
,
y
; \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
section
.
fixup
; \
.
align
4
; \
.
align
4
; \
...
@@ -112,22 +124,9 @@
...
@@ -112,22 +124,9 @@
.
word
98
b
,
99
b
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
text
; \
.
align
4
;
.
align
4
;
#else
#define ASI_AIUS 0x80
.
register
%
g2
,#
scratch
#define ASI_BLK_AIUS 0xf0
.
register
%
g3
,#
scratch
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#define EXNV(x,y,a,b) x,y;
#define EXNV2(x,y,a,b) x,y;
#define EXNV3(x,y,a,b) x,y;
#define EX(x,y,a,b) x,y;
#define EXBLK1(x,y) x,y;
#define EXBLK2(x,y) x,y;
#define EXBLK3(x,y) x,y;
#define EXBLK4(x,y) x,y;
#endif
/
*
Special
/
non
-
trivial
issues
of
this
code
:
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
*
...
@@ -148,89 +147,64 @@
...
@@ -148,89 +147,64 @@
*
of
up
to
2
.4
GB
per
second
.
*
of
up
to
2
.4
GB
per
second
.
*/
*/
.
globl
U3copy_to_user
.
globl
U3copy_to_user
U3copy_to_user
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
U3copy_to_user
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
/
*
Writing
to
%
asi
is
_expensive_
so
we
hardcode
it
.
/
*
Writing
to
%
asi
is
_expensive_
so
we
hardcode
it
.
*
Reading
%
asi
to
check
for
KERNEL_DS
is
comparatively
*
Reading
%
asi
to
check
for
KERNEL_DS
is
comparatively
*
cheap
.
*
cheap
.
*/
*/
rd
%
asi
,
%
g1
!
MS
Group
(
4
cycles
)
rd
%
asi
,
%
g1
cmp
%
g1
,
ASI_AIUS
!
A0
Group
cmp
%
g1
,
ASI_AIUS
bne
U3memcpy
!
BR
bne
,
pn
%
icc
,
U3memcpy_user_stub
nop
!
A1
nop
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
cmp
%
o2
,
0
mov
%
o0
,
%
g3
!
A0
Group
be
,
pn
%
XCC
,
out
#endif
or
%
o0
,
%
o1
,
%
o3
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
16
cmp
%
o2
,
0
!
A1
bleu
,
a
,
pn
%
XCC
,
small_copy
ble
,
pn
%
icc
,
U3copy_to_user_short_ret
!
BR
or
%
o3
,
%
o2
,
%
o3
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
256
cmp
%
o2
,
31
!
A0
Group
blu
,
pt
%
XCC
,
medium_copy
ble
,
pn
%
icc
,
U3copy_to_user_short
!
BR
andcc
%
o3
,
0x7
,
%
g0
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
ba
,
pt
%
xcc
,
enter
cmp
%
o2
,
0x100
!
A1
andcc
%
o0
,
0x3f
,
%
g2
bge
,
a
,
pt
%
icc
,
U3copy_to_user_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
/
*
Here
len
>=
256
and
condition
codes
reflect
execution
ba
,
pt
%
xcc
,
U3copy_to_user_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
.
align
32
U3copy_to_user_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
ldub
[%
o1
+
0x00
],
%
o3
!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3copy_to_user_short
!
BR
EXNV
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
1
)
!
MS
Group
(
1
-
cycle
stall
)
U3copy_to_user_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
*/
.
align
64
.
align
64
U3copy_to_user_
enter
:
enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
be
,
pt
%
XCC
,
2
f
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*
subtract
this
from
'len'
.
*/
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g2
,
0x40
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
!
A1
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
!
BR
Group
bg
,
pt
%
XCC
,
1
b
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
2
:
VISEntryHalf
!
MS
+
MS
2
:
VISEntryHalf
and
%
o1
,
0x7
,
%
g1
!
A1
and
%
o1
,
0x7
,
%
g1
ba
,
pt
%
xcc
,
U3copy_to_user_begin
!
BR
ba
,
pt
%
xcc
,
begin
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
alignaddr
%
o1
,
%
g0
,
%
o1
.
align
64
.
align
64
U3copy_to_user_
begin
:
begin
:
#ifdef __KERNEL__
.
globl
U3copy_to_user_nop_1_6
.
globl
U3copy_to_user_nop_1_6
U3copy_to_user_nop_1_6
:
U3copy_to_user_nop_1_6
:
ldxa
[%
g0
]
ASI_DCU_CONTROL_REG
,
%
g3
ldxa
[%
g0
]
ASI_DCU_CONTROL_REG
,
%
g3
...
@@ -239,309 +213,221 @@ U3copy_to_user_nop_1_6:
...
@@ -239,309 +213,221 @@ U3copy_to_user_nop_1_6:
or
%
g3
,
%
o3
,
%
o3
or
%
g3
,
%
o3
,
%
o3
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
membar
#
Sync
membar
#
Sync
#endif
prefetch
[%
o1
+
0x000
],
#
one_read
!
MS
Group1
prefetch
[%
o1
+
0x000
],
#
one_read
prefetch
[%
o1
+
0x040
],
#
one_read
!
MS
Group2
prefetch
[%
o1
+
0x040
],
#
one_read
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
andn
%
o2
,
(
0x40
-
1
),
%
o4
prefetch
[%
o1
+
0x080
],
#
one_read
!
MS
Group3
prefetch
[%
o1
+
0x080
],
#
one_read
cmp
%
o4
,
0x140
!
A0
prefetch
[%
o1
+
0x0c0
],
#
one_read
prefetch
[%
o1
+
0x0c0
],
#
one_read
!
MS
Group4
ldd
[%
o1
+
0x000
],
%f0
ldd
[%
o1
+
0x000
],
%f0
!
MS
Group5
(
%f0
results
at
G8
)
prefetch
[%
o1
+
0x100
],
#
one_read
bge
,
a
,
pt
%
icc
,
1
f
!
BR
ldd
[%
o1
+
0x008
],
%f2
prefetch
[%
o1
+
0x140
],
#
one_read
prefetch
[%
o1
+
0x100
],
#
one_read
!
MS
Group6
ldd
[%
o1
+
0x010
],
%f4
1
:
ldd
[%
o1
+
0x008
],
%f2
!
AX
(
%f2
results
at
G9
)
prefetch
[%
o1
+
0x180
],
#
one_read
cmp
%
o4
,
0x180
!
A1
faligndata
%f0
,
%f2
,
%f16
bge
,
a
,
pt
%
icc
,
1
f
!
BR
ldd
[%
o1
+
0x018
],
%f6
prefetch
[%
o1
+
0x140
],
#
one_read
!
MS
Group7
faligndata
%f2
,
%f4
,
%f18
1
:
ldd
[%
o1
+
0x010
],
%f4
!
AX
(
%f4
results
at
G10
)
ldd
[%
o1
+
0x020
],
%f8
cmp
%
o4
,
0x1c0
!
A1
faligndata
%f4
,
%f6
,
%f20
bge
,
a
,
pt
%
icc
,
1
f
!
BR
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f6
,
%f8
,
%f22
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
ldd
[%
o1
+
0x030
],
%f12
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G12
)
faligndata
%f8
,
%f10
,
%f24
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
ldd
[%
o1
+
0x038
],
%f14
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G13
)
faligndata
%f10
,
%f12
,
%f26
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
ldd
[%
o1
+
0x040
],
%f0
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
sub
%
o4
,
0x80
,
%
o4
add
%
o1
,
0x40
,
%
o1
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G16
)
ba
,
pt
%
xcc
,
loop
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
srl
%
o4
,
6
,
%
o3
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
.
align
64
ldd
[%
o1
+
0x040
],
%f0
!
MS
(
%f0
results
at
G19
)
loop
:
ldd
[%
o1
+
0x008
],
%f2
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
faligndata
%f12
,
%f14
,
%f28
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
ldd
[%
o1
+
0x010
],
%f4
bg
,
pt
%
icc
,
U3copy_to_user_loop1
!
BR
faligndata
%f14
,
%f0
,
%f30
add
%
o1
,
0x40
,
%
o1
!
A1
EXBLK2
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
ldd
[%
o1
+
0x018
],
%f6
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
faligndata
%f0
,
%f2
,
%f16
ba
,
pt
%
xcc
,
U3copy_to_user_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
ldd
[%
o1
+
0x020
],
%f8
nop
faligndata
%f2
,
%f4
,
%f18
nop
ldd
[%
o1
+
0x028
],
%f10
nop
faligndata
%f4
,
%f6
,
%f20
nop
ldd
[%
o1
+
0x030
],
%f12
nop
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
nop
faligndata
%f8
,
%f10
,
%f24
nop
ldd
[%
o1
+
0x040
],
%f0
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
prefetch
[%
o1
+
0x180
],
#
one_read
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
faligndata
%f10
,
%f12
,
%f26
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
subcc
%
o3
,
0x01
,
%
o3
*
already
.
add
%
o1
,
0x40
,
%
o1
*/
bg
,
pt
%
XCC
,
loop
U3copy_to_user_loop1
:
add
%
o0
,
0x40
,
%
o0
ldd
[%
o1
+
0x008
],
%f2
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
EXBLK1
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
AX
(
%f0
results
at
G19
)
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_to_user_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3copy_to_user_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3copy_to_user_loop2
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
ldd
[%
o1
+
0x010
],
%f4
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
EXBLK2
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
ldd
[%
o1
+
0x040
],
%f0
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_to_user_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3copy_to_user_
loopfini
:
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
!
FGA
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group19
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
!
FGA
faligndata
%f14
,
%f0
,
%f30
EXBLK3
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
Group20
EXBLK3
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
ldd
[%
o1
+
0x018
],
%f6
!
AX
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
!
MS
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
!
MS
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
!
MS
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
!
MS
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
faligndata
%f8
,
%f10
,
%f24
cmp
%
g1
,
0
!
A0
cmp
%
g1
,
0
be
,
pt
%
icc
,
1
f
!
BR
be
,
pt
%
XCC
,
1
f
add
%
o0
,
0x40
,
%
o0
!
A1
add
%
o0
,
0x40
,
%
o0
ldd
[%
o1
+
0x040
],
%f0
!
MS
ldd
[%
o1
+
0x040
],
%f0
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
1
:
faligndata
%f10
,
%f12
,
%f26
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f12
,
%f14
,
%f28
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
faligndata
%f14
,
%f0
,
%f30
EXBLK4
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
EXBLK4
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o0
,
0x40
,
%
o0
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o1
,
0x40
,
%
o1
#ifdef __KERNEL__
.
globl
U3copy_to_user_nop_2_3
.
globl
U3copy_to_user_nop_2_3
U3copy_to_user_nop_2_3
:
U3copy_to_user_nop_2_3
:
mov
PRIMARY_CONTEXT
,
%
o3
mov
PRIMARY_CONTEXT
,
%
o3
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
#endif
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
membar
#
Sync
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
load
past
the
end
of
the
src
buffer
.
*
code
found
in
U3copy_to_user_toosmall
processing
.
*/
*/
U3copy_to_user_
loopend
:
loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
and
%
o2
,
0x3f
,
%
o2
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
be
,
pn
%
icc
,
U3copy_to_user_endcruft
!
BR
be
,
pn
%
XCC
,
endcruft
subcc
%
g2
,
0x8
,
%
g2
!
A1
subcc
%
g2
,
0x8
,
%
g2
be
,
pn
%
icc
,
U3copy_to_user_endcruft
!
BR
Group
be
,
pn
%
XCC
,
endcruft
cmp
%
g1
,
0
!
A0
cmp
%
g1
,
0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
be
,
a
,
pt
%
XCC
,
1
f
ldd
[%
o1
+
0x00
],
%f0
!
MS
ldd
[%
o1
+
0x00
],
%f0
1
:
ldd
[%
o1
+
0x08
],
%f2
!
MS
Group
1
:
ldd
[%
o1
+
0x08
],
%f2
add
%
o1
,
0x8
,
%
o1
!
A0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
!
A1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
faligndata
%f0
,
%f2
,
%f8
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
be
,
pn
%
icc
,
U3copy_to_user_endcruft
!
BR
be
,
pn
%
XCC
,
endcruft
add
%
o0
,
0x8
,
%
o0
!
A0
add
%
o0
,
0x8
,
%
o0
ldd
[%
o1
+
0x08
],
%f0
!
MS
Group
ldd
[%
o1
+
0x08
],
%f0
add
%
o1
,
0x8
,
%
o1
!
A0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
!
A1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f2
,
%f0
,
%f8
!
FGA
faligndata
%f2
,
%f0
,
%f8
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
bne
,
pn
%
icc
,
1
b
!
BR
bne
,
pn
%
XCC
,
1
b
add
%
o0
,
0x8
,
%
o0
!
A0
Group
add
%
o0
,
0x8
,
%
o0
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*
alignaddr
was
performed
.
*/
*/
U3copy_to_user_
endcruft
:
endcruft
:
cmp
%
o2
,
0
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
VISExitHalf
be
,
pn
%
icc
,
U3copy_to_user_short_ret
be
,
pn
%
XCC
,
out
nop
sub
%
o0
,
%
o1
,
%
o3
ba
,
a
,
pt
%
xcc
,
U3copy_to_user_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3copy_to_user_toosmall
:
#ifdef SMALL_COPY_USES_FPU
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
andcc
%
g1
,
0x7
,
%
g0
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
bne
,
pn
%
icc
,
small_copy_unaligned
andcc
%
o2
,
0x8
,
%
g0
be
,
pt
%
icc
,
1
f
nop
ldx
[%
o1
],
%
o5
EXNV
(
stxa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x8
,
%
o1
2
:
VISEntryHalf
!
MS
+
MS
1
:
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduw
[%
o1
],
%
o5
EXNV
(
stwa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
and
%
o2
,
0x7
)
add
%
o1
,
0x4
,
%
o1
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
1
:
andcc
%
o2
,
0x2
,
%
g0
*
to
be
nonzero
.
be
,
pt
%
icc
,
1
f
*/
nop
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
lduh
[%
o1
],
%
o5
EXNV
(
stha
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
and
%
o2
,
0x3
)
/
*
You
may
read
this
and
believe
that
it
allows
reading
add
%
o1
,
0x2
,
%
o1
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
ldd
[%
g1
+
0x00
],
%f2
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3copy_to_user_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3copy_to_user_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3copy_to_user_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
ldub
[%
o1
+
0x00
],
%
o3
1
:
andcc
%
o2
,
0x1
,
%
g0
add
%
o1
,
0x1
,
%
o1
be
,
pt
%
icc
,
out
add
%
o0
,
0x1
,
%
o0
nop
subcc
%
g2
,
0x1
,
%
g2
ldub
[%
o1
],
%
o5
bg
,
pt
%
icc
,
1
b
ba
,
pt
%
xcc
,
out
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
EXNV
(
stba
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
and
%
o2
,
0x1
)
medium_copy
:
/
*
16
<
len
<=
64
*/
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
ldx
[%
o1
],
%
o5
EXNV4
(
stxa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
o4
)
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
lduw
[%
o1
],
%
o5
EXNV3
(
stwa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
2
:
andn
%
o2
,
0x7
,
%
g2
small_copy
:
/
*
0
<
len
<=
16
*/
sub
%
o2
,
%
g2
,
%
o2
andcc
%
o3
,
0x3
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
3
:
ldx
[%
o1
+
0x00
],
%
o3
small_copy_aligned
:
add
%
o1
,
0x8
,
%
o1
subcc
%
o2
,
4
,
%
o2
add
%
o0
,
0x8
,
%
o0
lduw
[%
o1
],
%
g1
subcc
%
g2
,
0x8
,
%
g2
EXNV3
(
stwa
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bg
,
pt
%
icc
,
3
b
bg
u
,
pt
%
XCC
,
small_copy_aligned
EXNV3
(
stxa
%
o3
,
[%
o0
+
-
8
]
%
asi
,
add
%
o2
,
%
g2
)
add
%
o1
,
4
,
%
o1
cmp
%
o2
,
0
out
:
retl
bne
,
pn
%
icc
,
U3copy_to_user_short
clr
%
o0
nop
ba
,
a
,
pt
%
xcc
,
U3copy_to_user_short_ret
#endif /* !(SMALL_COPY_USES_FPU) */
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
ldub
[%
o1
],
%
g1
EXNV2
(
stba
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
clr
%
o0
arch/sparc64/lib/U3memcpy.S
View file @
da09d2f1
/*
$Id
:
U3memcpy
.
S
,
v
1
.2
2000
/
11
/
01
09
:
29
:
19
davem
Exp
$
/*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
*/
#ifdef __KERNEL__
#ifdef __KERNEL__
...
@@ -9,15 +8,20 @@
...
@@ -9,15 +8,20 @@
#include <asm/asi.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#include <asm/spitfire.h>
#undef SMALL_COPY_USES_FPU
#else
#else
#define ASI_BLK_P 0xf0
#define ASI_BLK_P 0xf0
#define FPRS_FEF 0x04
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#endif
#endif
#ifndef XCC
#define XCC xcc
#endif
.
register
%
g2
,#
scratch
.
register
%
g3
,#
scratch
/
*
Special
/
non
-
trivial
issues
of
this
code
:
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
*
*
1
)
%
o5
is
preserved
from
VISEntryHalf
to
VISExitHalf
*
1
)
%
o5
is
preserved
from
VISEntryHalf
to
VISExitHalf
...
@@ -37,80 +41,55 @@
...
@@ -37,80 +41,55 @@
*
of
up
to
2
.4
GB
per
second
.
*
of
up
to
2
.4
GB
per
second
.
*/
*/
.
globl
U3memcpy
.
globl
U3memcpy
U3memcpy
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
U3memcpy
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
#ifndef __KERNEL__
mov
%
o0
,
%
g5
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
cmp
%
o2
,
0
mov
%
o0
,
%
g3
!
A0
Group
be
,
pn
%
XCC
,
out
#endif
or
%
o0
,
%
o1
,
%
o3
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
16
cmp
%
o2
,
0
!
A1
bleu
,
a
,
pn
%
XCC
,
small_copy
ble
,
pn
%
icc
,
U3memcpy_short_ret
!
BR
or
%
o3
,
%
o2
,
%
o3
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3memcpy_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3memcpy_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
ba
,
pt
%
xcc
,
U3memcpy_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
.
align
32
U3memcpy_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
ldub
[%
o1
+
0x00
],
%
o3
!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3memcpy_short
!
BR
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
(
1
-
cycle
stall
)
U3memcpy_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
cmp
%
o2
,
256
blu
,
pt
%
XCC
,
medium_copy
andcc
%
o3
,
0x7
,
%
g0
ba
,
pt
%
xcc
,
enter
andcc
%
o0
,
0x3f
,
%
g2
/
*
Here
len
>=
256
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
*/
.
align
64
.
align
64
U3memcpy_
enter
:
enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
be
,
pt
%
XCC
,
2
f
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*
subtract
this
from
'len'
.
*/
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g2
,
0x40
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
!
A1
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
!
BR
Group
bg
,
pt
%
XCC
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
stb
%
o3
,
[%
o0
+
-
1
]
2
:
VISEntryHalf
!
MS
+
MS
2
:
VISEntryHalf
and
%
o1
,
0x7
,
%
g1
!
A1
and
%
o1
,
0x7
,
%
g1
ba
,
pt
%
xcc
,
U3memcpy_begin
!
BR
ba
,
pt
%
xcc
,
begin
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
alignaddr
%
o1
,
%
g0
,
%
o1
.
align
64
.
align
64
U3memcpy_
begin
:
begin
:
#ifdef __KERNEL__
#ifdef __KERNEL__
.
globl
U3memcpy_nop_1_6
.
globl
U3memcpy_nop_1_6
U3memcpy_nop_1_6
:
U3memcpy_nop_1_6
:
...
@@ -121,146 +100,90 @@ U3memcpy_nop_1_6:
...
@@ -121,146 +100,90 @@ U3memcpy_nop_1_6:
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
membar
#
Sync
membar
#
Sync
#endif
#endif
prefetch
[%
o1
+
0x000
],
#
one_read
!
MS
Group1
prefetch
[%
o1
+
0x000
],
#
one_read
prefetch
[%
o1
+
0x040
],
#
one_read
!
MS
Group2
prefetch
[%
o1
+
0x040
],
#
one_read
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
andn
%
o2
,
(
0x40
-
1
),
%
o4
prefetch
[%
o1
+
0x080
],
#
one_read
!
MS
Group3
prefetch
[%
o1
+
0x080
],
#
one_read
cmp
%
o4
,
0x140
!
A0
prefetch
[%
o1
+
0x0c0
],
#
one_read
prefetch
[%
o1
+
0x0c0
],
#
one_read
!
MS
Group4
ldd
[%
o1
+
0x000
],
%f0
ldd
[%
o1
+
0x000
],
%f0
!
MS
Group5
(
%f0
results
at
G8
)
prefetch
[%
o1
+
0x100
],
#
one_read
bge
,
a
,
pt
%
icc
,
1
f
!
BR
ldd
[%
o1
+
0x008
],
%f2
prefetch
[%
o1
+
0x140
],
#
one_read
prefetch
[%
o1
+
0x100
],
#
one_read
!
MS
Group6
ldd
[%
o1
+
0x010
],
%f4
1
:
ldd
[%
o1
+
0x008
],
%f2
!
AX
(
%f2
results
at
G9
)
prefetch
[%
o1
+
0x180
],
#
one_read
cmp
%
o4
,
0x180
!
A1
faligndata
%f0
,
%f2
,
%f16
bge
,
a
,
pt
%
icc
,
1
f
!
BR
ldd
[%
o1
+
0x018
],
%f6
prefetch
[%
o1
+
0x140
],
#
one_read
!
MS
Group7
faligndata
%f2
,
%f4
,
%f18
1
:
ldd
[%
o1
+
0x010
],
%f4
!
AX
(
%f4
results
at
G10
)
ldd
[%
o1
+
0x020
],
%f8
cmp
%
o4
,
0x1c0
!
A1
faligndata
%f4
,
%f6
,
%f20
bge
,
a
,
pt
%
icc
,
1
f
!
BR
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f6
,
%f8
,
%f22
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
ldd
[%
o1
+
0x030
],
%f12
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G12
)
faligndata
%f8
,
%f10
,
%f24
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
ldd
[%
o1
+
0x038
],
%f14
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G13
)
faligndata
%f10
,
%f12
,
%f26
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
ldd
[%
o1
+
0x040
],
%f0
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
sub
%
o4
,
0x80
,
%
o4
add
%
o1
,
0x40
,
%
o1
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G16
)
ba
,
pt
%
xcc
,
loop
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
srl
%
o4
,
6
,
%
o3
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
.
align
64
ldd
[%
o1
+
0x040
],
%f0
!
MS
(
%f0
results
at
G19
)
loop
:
ldd
[%
o1
+
0x008
],
%f2
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
faligndata
%f12
,
%f14
,
%f28
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
ldd
[%
o1
+
0x010
],
%f4
bg
,
pt
%
icc
,
U3memcpy_loop1
!
BR
faligndata
%f14
,
%f0
,
%f30
add
%
o1
,
0x40
,
%
o1
!
A1
stda
%f16
,
[%
o0
]
ASI_BLK_P
ldd
[%
o1
+
0x018
],
%f6
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
faligndata
%f0
,
%f2
,
%f16
ba
,
pt
%
xcc
,
U3memcpy_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
ldd
[%
o1
+
0x020
],
%f8
nop
faligndata
%f2
,
%f4
,
%f18
nop
ldd
[%
o1
+
0x028
],
%f10
nop
faligndata
%f4
,
%f6
,
%f20
nop
ldd
[%
o1
+
0x030
],
%f12
nop
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
nop
faligndata
%f8
,
%f10
,
%f24
nop
ldd
[%
o1
+
0x040
],
%f0
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
prefetch
[%
o1
+
0x180
],
#
one_read
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
faligndata
%f10
,
%f12
,
%f26
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
subcc
%
o3
,
0x01
,
%
o3
*
already
.
add
%
o1
,
0x40
,
%
o1
*/
bg
,
pt
%
XCC
,
loop
U3memcpy_loop1
:
add
%
o0
,
0x40
,
%
o0
ldd
[%
o1
+
0x008
],
%f2
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
AX
(
%f0
results
at
G19
)
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3memcpy_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3memcpy_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3memcpy_loop2
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
ldd
[%
o1
+
0x010
],
%f4
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
ldd
[%
o1
+
0x040
],
%f0
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3memcpy_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3memcpy_
loopfini
:
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
!
FGA
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group19
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
!
FGA
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
Group20
stda
%f16
,
[%
o0
]
ASI_BLK_P
ldd
[%
o1
+
0x018
],
%f6
!
AX
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
!
MS
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
!
MS
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
!
MS
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
!
MS
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
faligndata
%f8
,
%f10
,
%f24
cmp
%
g1
,
0
!
A0
cmp
%
g1
,
0
be
,
pt
%
icc
,
1
f
!
BR
be
,
pt
%
XCC
,
1
f
add
%
o0
,
0x40
,
%
o0
!
A1
add
%
o0
,
0x40
,
%
o0
ldd
[%
o1
+
0x040
],
%f0
!
MS
ldd
[%
o1
+
0x040
],
%f0
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
1
:
faligndata
%f10
,
%f12
,
%f26
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f12
,
%f14
,
%f28
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
stda
%f16
,
[%
o0
]
ASI_BLK_P
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o0
,
0x40
,
%
o0
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o1
,
0x40
,
%
o1
#ifdef __KERNEL__
#ifdef __KERNEL__
.
globl
U3memcpy_nop_2_3
.
globl
U3memcpy_nop_2_3
U3memcpy_nop_2_3
:
U3memcpy_nop_2_3
:
...
@@ -268,161 +191,143 @@ U3memcpy_nop_2_3:
...
@@ -268,161 +191,143 @@ U3memcpy_nop_2_3:
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
#endif
#endif
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
membar
#
Sync
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
load
past
the
end
of
the
src
buffer
.
*
code
found
in
U3memcpy_toosmall
processing
.
*/
*/
U3memcpy_
loopend
:
loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
and
%
o2
,
0x3f
,
%
o2
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
be
,
pn
%
icc
,
U3memcpy_endcruft
!
BR
be
,
pn
%
XCC
,
endcruft
subcc
%
g2
,
0x8
,
%
g2
!
A1
subcc
%
g2
,
0x8
,
%
g2
be
,
pn
%
icc
,
U3memcpy_endcruft
!
BR
Group
be
,
pn
%
XCC
,
endcruft
cmp
%
g1
,
0
!
A0
cmp
%
g1
,
0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
be
,
a
,
pt
%
XCC
,
1
f
ldd
[%
o1
+
0x00
],
%f0
!
MS
ldd
[%
o1
+
0x00
],
%f0
1
:
ldd
[%
o1
+
0x08
],
%f2
!
MS
Group
1
:
ldd
[%
o1
+
0x08
],
%f2
add
%
o1
,
0x8
,
%
o1
!
A0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
!
A1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
faligndata
%f0
,
%f2
,
%f8
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
std
%f8
,
[%
o0
+
0x00
]
be
,
pn
%
icc
,
U3memcpy_endcruft
!
BR
be
,
pn
%
XCC
,
endcruft
add
%
o0
,
0x8
,
%
o0
!
A0
add
%
o0
,
0x8
,
%
o0
ldd
[%
o1
+
0x08
],
%f0
!
MS
Group
ldd
[%
o1
+
0x08
],
%f0
add
%
o1
,
0x8
,
%
o1
!
A0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
!
A1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f2
,
%f0
,
%f8
!
FGA
faligndata
%f2
,
%f0
,
%f8
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
std
%f8
,
[%
o0
+
0x00
]
bne
,
pn
%
icc
,
1
b
!
BR
bne
,
pn
%
XCC
,
1
b
add
%
o0
,
0x8
,
%
o0
!
A0
Group
add
%
o0
,
0x8
,
%
o0
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*
alignaddr
was
performed
.
*/
*/
U3memcpy_
endcruft
:
endcruft
:
cmp
%
o2
,
0
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
VISExitHalf
be
,
pn
%
icc
,
U3memcpy_short_ret
be
,
pn
%
XCC
,
out
nop
sub
%
o0
,
%
o1
,
%
o3
ba
,
a
,
pt
%
xcc
,
U3memcpy_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
andcc
%
g1
,
0x7
,
%
g0
U3memcpy_toosmall
:
bne
,
pn
%
icc
,
small_copy_unaligned
andcc
%
o2
,
0x8
,
%
g0
be
,
pt
%
icc
,
1
f
nop
ldx
[%
o1
],
%
o5
stx
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x8
,
%
o1
#ifdef SMALL_COPY_USES_FPU
1
:
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduw
[%
o1
],
%
o5
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
1
:
andcc
%
o2
,
0x2
,
%
g0
be
,
pt
%
xcc
,
2
f
!
BR
Group
be
,
pt
%
icc
,
1
f
nop
lduh
[%
o1
],
%
o5
sth
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x2
,
%
o1
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
1
:
andcc
%
o2
,
0x1
,
%
g0
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
be
,
pt
%
icc
,
out
*
subtract
this
from
'len'
.
nop
*/
ldub
[%
o1
],
%
o5
sub
%
g2
,
0x8
,
%
g2
!
A0
ba
,
pt
%
xcc
,
out
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
stb
%
o5
,
[%
o1
+
%
o3
]
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
medium_copy
:
/
*
16
<
len
<=
64
*/
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
ldx
[%
o1
],
%
o5
stx
%
o5
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
lduw
[%
o1
],
%
o5
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
small_copy
:
/
*
0
<
len
<=
16
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
(%
o3
in
3
cycles
)
andcc
%
o3
,
0x3
,
%
g0
add
%
o1
,
0x1
,
%
o1
!
A1
bne
,
pn
%
XCC
,
small_copy_unaligned
add
%
o0
,
0x1
,
%
o0
!
A0
Group
sub
%
o0
,
%
o1
,
%
o3
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
small_copy_aligned
:
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
subcc
%
o2
,
4
,
%
o2
lduw
[%
o1
],
%
g1
stw
%
g1
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
small_copy_aligned
add
%
o1
,
4
,
%
o1
2
:
VISEntryHalf
!
MS
+
MS
out
:
retl
mov
%
g5
,
%
o0
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
.
align
32
*
to
be
nonzero
.
small_copy_unaligned
:
*/
subcc
%
o2
,
1
,
%
o2
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
ldub
[%
o1
],
%
g1
stb
%
g1
,
[%
o1
+
%
o3
]
/
*
You
may
read
this
and
believe
that
it
allows
reading
bgu
,
pt
%
XCC
,
small_copy_unaligned
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
add
%
o1
,
1
,
%
o1
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
retl
*
src
,
so
we
always
stop
before
running
off
the
end
.
mov
%
g5
,
%
o0
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
/
*
Act
like
copy_
{
to
,
in
}
_user
(),
ie
.
return
zero
instead
*
of
original
destination
pointer
.
This
is
invoked
when
*
copy_
{
to
,
in
}
_user
()
finds
that
%
asi
is
kernel
space
.
*/
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
.
globl
U3memcpy_user_stub
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
U3memcpy_user_stub
:
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
(
1
-
cycle
stall
)
save
%
sp
,
-
192
,
%
sp
add
%
g1
,
0x8
,
%
g1
!
A0
mov
%
i0
,
%
o0
mov
%
i1
,
%
o1
1
:
ldd
[%
g1
+
0x00
],
%f2
!
MS
Group
call
U3memcpy
add
%
g1
,
0x8
,
%
g1
!
A0
mov
%
i2
,
%
o2
sub
%
o2
,
0x8
,
%
o2
!
A1
ret
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
restore
%
g0
,
%
g0
,
%
o0
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3memcpy_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3memcpy_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3memcpy_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
2
:
andn
%
o2
,
0x7
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
3
:
ldx
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x8
,
%
o1
add
%
o0
,
0x8
,
%
o0
subcc
%
g2
,
0x8
,
%
g2
bg
,
pt
%
icc
,
3
b
stx
%
o3
,
[%
o0
+
-
8
]
cmp
%
o2
,
0
bne
,
pn
%
icc
,
U3memcpy_short
nop
ba
,
a
,
pt
%
xcc
,
U3memcpy_short_ret
#endif /* !(SMALL_COPY_USES_FPU) */
arch/sparc64/lib/VIScopy.S
View file @
da09d2f1
...
@@ -306,11 +306,7 @@
...
@@ -306,11 +306,7 @@
.
globl
__memcpy_begin
.
globl
__memcpy_begin
__memcpy_begin
:
__memcpy_begin
:
.
globl
__memcpy
.
type
__memcpy
,
@
function
memcpy_private
:
memcpy_private
:
__memcpy
:
memcpy
:
mov
ASI_P
,
asi_src
!
IEU0
Group
memcpy
:
mov
ASI_P
,
asi_src
!
IEU0
Group
brnz
,
pt
%
o2
,
__memcpy_entry
!
CTI
brnz
,
pt
%
o2
,
__memcpy_entry
!
CTI
mov
ASI_P
,
asi_dest
!
IEU1
mov
ASI_P
,
asi_dest
!
IEU1
...
...
arch/sparc64/lib/splock.S
View file @
da09d2f1
...
@@ -6,6 +6,18 @@
...
@@ -6,6 +6,18 @@
.
text
.
text
.
align
64
.
align
64
.
globl
_raw_spin_lock
_raw_spin_lock
:
/
*
%
o0
=
lock_ptr
*/
1
:
ldstub
[%
o0
],
%
g7
brnz
,
pn
%
g7
,
2
f
membar
#
StoreLoad
|
#
StoreStore
retl
nop
2
:
ldub
[%
o0
],
%
g7
brnz
,
pt
%
g7
,
2
b
membar
#
LoadLoad
ba
,
a
,
pt
%
xcc
,
1
b
.
globl
_raw_spin_lock_flags
.
globl
_raw_spin_lock_flags
_raw_spin_lock_flags
:
/
*
%
o0
=
lock_ptr
,
%
o1
=
irq_flags
*/
_raw_spin_lock_flags
:
/
*
%
o0
=
lock_ptr
,
%
o1
=
irq_flags
*/
1
:
ldstub
[%
o0
],
%
g7
1
:
ldstub
[%
o0
],
%
g7
...
...
drivers/sbus/char/bbc_envctrl.c
View file @
da09d2f1
...
@@ -7,6 +7,7 @@
...
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <asm/oplib.h>
#include <asm/oplib.h>
#include <asm/ebus.h>
#include <asm/ebus.h>
#define __KERNEL_SYSCALLS__
#define __KERNEL_SYSCALLS__
...
@@ -622,9 +623,7 @@ void bbc_envctrl_cleanup(void)
...
@@ -622,9 +623,7 @@ void bbc_envctrl_cleanup(void)
read_unlock
(
&
tasklist_lock
);
read_unlock
(
&
tasklist_lock
);
if
(
!
found
)
if
(
!
found
)
break
;
break
;
current
->
state
=
TASK_INTERRUPTIBLE
;
msleep
(
1000
);
schedule_timeout
(
HZ
);
current
->
state
=
TASK_RUNNING
;
}
}
kenvctrld_task
=
NULL
;
kenvctrld_task
=
NULL
;
}
}
...
...
drivers/sbus/char/envctrl.c
View file @
da09d2f1
...
@@ -1181,8 +1181,7 @@ static void __exit envctrl_cleanup(void)
...
@@ -1181,8 +1181,7 @@ static void __exit envctrl_cleanup(void)
if
(
!
found
)
if
(
!
found
)
break
;
break
;
current
->
state
=
TASK_INTERRUPTIBLE
;
msleep
(
1000
);
schedule_timeout
(
HZ
);
}
}
kenvctrld_task
=
NULL
;
kenvctrld_task
=
NULL
;
}
}
...
...
include/asm-sparc/pci.h
View file @
da09d2f1
...
@@ -87,12 +87,6 @@ extern dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
...
@@ -87,12 +87,6 @@ extern dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
extern
void
pci_unmap_page
(
struct
pci_dev
*
hwdev
,
extern
void
pci_unmap_page
(
struct
pci_dev
*
hwdev
,
dma_addr_t
dma_address
,
size_t
size
,
int
direction
);
dma_addr_t
dma_address
,
size_t
size
,
int
direction
);
/* map_page and map_single cannot fail */
static
inline
int
pci_dma_mapping_error
(
dma_addr_t
dma_addr
)
{
return
0
;
}
/* Map a set of buffers described by scatterlist in streaming
/* Map a set of buffers described by scatterlist in streaming
* mode for DMA. This is the scather-gather version of the
* mode for DMA. This is the scather-gather version of the
* above pci_map_single interface. Here the scatter gather list
* above pci_map_single interface. Here the scatter gather list
...
...
include/asm-sparc64/page.h
View file @
da09d2f1
...
@@ -18,7 +18,7 @@ extern void _clear_page(void *page);
...
@@ -18,7 +18,7 @@ extern void _clear_page(void *page);
#define clear_page(X) _clear_page((void *)(X))
#define clear_page(X) _clear_page((void *)(X))
struct
page
;
struct
page
;
extern
void
clear_user_page
(
void
*
addr
,
unsigned
long
vaddr
,
struct
page
*
page
);
extern
void
clear_user_page
(
void
*
addr
,
unsigned
long
vaddr
,
struct
page
*
page
);
#define copy_page(X,Y)
__
memcpy((void *)(X), (void *)(Y), PAGE_SIZE)
#define copy_page(X,Y) memcpy((void *)(X), (void *)(Y), PAGE_SIZE)
extern
void
copy_user_page
(
void
*
to
,
void
*
from
,
unsigned
long
vaddr
,
struct
page
*
topage
);
extern
void
copy_user_page
(
void
*
to
,
void
*
from
,
unsigned
long
vaddr
,
struct
page
*
topage
);
/* GROSS, defining this makes gcc pass these types as aggregates,
/* GROSS, defining this makes gcc pass these types as aggregates,
...
...
include/asm-sparc64/spinlock.h
View file @
da09d2f1
...
@@ -41,22 +41,8 @@ typedef unsigned char spinlock_t;
...
@@ -41,22 +41,8 @@ typedef unsigned char spinlock_t;
do { membar("#LoadLoad"); \
do { membar("#LoadLoad"); \
} while(*((volatile unsigned char *)lock))
} while(*((volatile unsigned char *)lock))
static
__inline__
void
_raw_spin_lock
(
spinlock_t
*
lock
)
/* arch/sparc64/lib/spinlock.S */
{
extern
void
_raw_spin_lock
(
spinlock_t
*
lock
);
__asm__
__volatile__
(
"1: ldstub [%0], %%g7
\n
"
" brnz,pn %%g7, 2f
\n
"
" membar #StoreLoad | #StoreStore
\n
"
" .subsection 2
\n
"
"2: ldub [%0], %%g7
\n
"
" brnz,pt %%g7, 2b
\n
"
" membar #LoadLoad
\n
"
" b,a,pt %%xcc, 1b
\n
"
" .previous
\n
"
:
/* no outputs */
:
"r"
(
lock
)
:
"g7"
,
"memory"
);
}
static
__inline__
int
_raw_spin_trylock
(
spinlock_t
*
lock
)
static
__inline__
int
_raw_spin_trylock
(
spinlock_t
*
lock
)
{
{
...
...
include/asm-sparc64/string.h
View file @
da09d2f1
...
@@ -16,9 +16,7 @@
...
@@ -16,9 +16,7 @@
#include <asm/asi.h>
#include <asm/asi.h>
extern
void
__memmove
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
void
__memmove
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
__kernel_size_t
__memcpy
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
void
*
__memset
(
void
*
,
int
,
__kernel_size_t
);
extern
void
*
__memset
(
void
*
,
int
,
__kernel_size_t
);
extern
void
*
__builtin_memcpy
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
void
*
__builtin_memset
(
void
*
,
int
,
__kernel_size_t
);
extern
void
*
__builtin_memset
(
void
*
,
int
,
__kernel_size_t
);
#ifndef EXPORT_SYMTAB_STROPS
#ifndef EXPORT_SYMTAB_STROPS
...
@@ -37,29 +35,7 @@ extern void *__builtin_memset(void *,int,__kernel_size_t);
...
@@ -37,29 +35,7 @@ extern void *__builtin_memset(void *,int,__kernel_size_t);
#define __HAVE_ARCH_MEMCPY
#define __HAVE_ARCH_MEMCPY
static
inline
void
*
__constant_memcpy
(
void
*
to
,
const
void
*
from
,
__kernel_size_t
n
)
extern
void
*
memcpy
(
void
*
,
const
void
*
,
__kernel_size_t
);
{
if
(
n
)
{
if
(
n
<=
32
)
{
__builtin_memcpy
(
to
,
from
,
n
);
}
else
{
__memcpy
(
to
,
from
,
n
);
}
}
return
to
;
}
static
inline
void
*
__nonconstant_memcpy
(
void
*
to
,
const
void
*
from
,
__kernel_size_t
n
)
{
__memcpy
(
to
,
from
,
n
);
return
to
;
}
#undef memcpy
#define memcpy(t, f, n) \
(__builtin_constant_p(n) ? \
__constant_memcpy((t),(f),(n)) : \
__nonconstant_memcpy((t),(f),(n)))
#define __HAVE_ARCH_MEMSET
#define __HAVE_ARCH_MEMSET
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment