Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
da09d2f1
Commit
da09d2f1
authored
Jul 28, 2004
by
Linus Torvalds
Browse files
Options
Browse Files
Download
Plain Diff
Merge
bk://kernel.bkbits.net/davem/sparc-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
parents
ee61f1e4
a6df905f
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
859 additions
and
1724 deletions
+859
-1724
arch/sparc64/defconfig
arch/sparc64/defconfig
+3
-9
arch/sparc64/kernel/sparc64_ksyms.c
arch/sparc64/kernel/sparc64_ksyms.c
+1
-1
arch/sparc64/kernel/sys_sparc32.c
arch/sparc64/kernel/sys_sparc32.c
+0
-132
arch/sparc64/kernel/systbls.S
arch/sparc64/kernel/systbls.S
+2
-2
arch/sparc64/lib/U3copy_from_user.S
arch/sparc64/lib/U3copy_from_user.S
+269
-358
arch/sparc64/lib/U3copy_in_user.S
arch/sparc64/lib/U3copy_in_user.S
+76
-467
arch/sparc64/lib/U3copy_to_user.S
arch/sparc64/lib/U3copy_to_user.S
+248
-362
arch/sparc64/lib/U3memcpy.S
arch/sparc64/lib/U3memcpy.S
+241
-336
arch/sparc64/lib/VIScopy.S
arch/sparc64/lib/VIScopy.S
+0
-4
arch/sparc64/lib/splock.S
arch/sparc64/lib/splock.S
+12
-0
drivers/sbus/char/bbc_envctrl.c
drivers/sbus/char/bbc_envctrl.c
+2
-3
drivers/sbus/char/envctrl.c
drivers/sbus/char/envctrl.c
+1
-2
include/asm-sparc/pci.h
include/asm-sparc/pci.h
+0
-6
include/asm-sparc64/page.h
include/asm-sparc64/page.h
+1
-1
include/asm-sparc64/spinlock.h
include/asm-sparc64/spinlock.h
+2
-16
include/asm-sparc64/string.h
include/asm-sparc64/string.h
+1
-25
No files found.
arch/sparc64/defconfig
View file @
da09d2f1
...
...
@@ -90,7 +90,6 @@ CONFIG_BINFMT_ELF32=y
# CONFIG_BINFMT_AOUT32 is not set
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_MISC=m
# CONFIG_SUNOS_EMUL is not set
CONFIG_SOLARIS_EMUL=m
#
...
...
@@ -122,7 +121,6 @@ CONFIG_FB=y
# CONFIG_FB_CIRRUS is not set
CONFIG_FB_PM2=y
# CONFIG_FB_PM2_FIFO_DISCONNECT is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_BW2 is not set
...
...
@@ -180,7 +178,6 @@ CONFIG_LOGO_SUN_CLUT224=y
#
# Serial drivers
#
# CONFIG_SERIAL_8250 is not set
#
# Non-8250 serial port support
...
...
@@ -342,7 +339,6 @@ CONFIG_SCSI_SATA_SIL=m
CONFIG_SCSI_SATA_SIS=m
CONFIG_SCSI_SATA_VIA=m
CONFIG_SCSI_SATA_VITESSE=m
# CONFIG_SCSI_BUSLOGIC is not set
CONFIG_SCSI_DMX3191D=m
CONFIG_SCSI_EATA_PIO=m
# CONFIG_SCSI_FUTURE_DOMAIN is not set
...
...
@@ -679,6 +675,9 @@ CONFIG_NET_DIVERT=y
# QoS and/or fair queueing
#
CONFIG_NET_SCHED=y
# CONFIG_NET_SCH_CLK_JIFFIES is not set
# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
CONFIG_NET_SCH_CLK_CPU=y
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
...
...
@@ -1522,11 +1521,6 @@ CONFIG_SND_VX222=m
CONFIG_SND_SUN_AMD7930=m
CONFIG_SND_SUN_CS4231=m
#
# Open Sound System
#
# CONFIG_SOUND_PRIME is not set
#
# USB support
#
...
...
arch/sparc64/kernel/sparc64_ksyms.c
View file @
da09d2f1
...
...
@@ -135,6 +135,7 @@ EXPORT_SYMBOL(__write_lock);
EXPORT_SYMBOL
(
__write_unlock
);
EXPORT_SYMBOL
(
__write_trylock
);
/* Out of line spin-locking implementation. */
EXPORT_SYMBOL
(
_raw_spin_lock
);
EXPORT_SYMBOL
(
_raw_spin_lock_flags
);
#endif
...
...
@@ -333,7 +334,6 @@ EXPORT_SYMBOL(sys_close);
#endif
/* Special internal versions of library functions. */
EXPORT_SYMBOL
(
__memcpy
);
EXPORT_SYMBOL
(
__memset
);
EXPORT_SYMBOL
(
_clear_page
);
EXPORT_SYMBOL
(
clear_user_page
);
...
...
arch/sparc64/kernel/sys_sparc32.c
View file @
da09d2f1
...
...
@@ -867,138 +867,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned
return
sys_ftruncate
(
fd
,
(
high
<<
32
)
|
low
);
}
/* readdir & getdents */
#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
#define ROUND_UP(x) (((x)+sizeof(u32)-1) & ~(sizeof(u32)-1))
struct
old_linux_dirent32
{
u32
d_ino
;
u32
d_offset
;
unsigned
short
d_namlen
;
char
d_name
[
1
];
};
struct
readdir_callback32
{
struct
old_linux_dirent32
__user
*
dirent
;
int
count
;
};
static
int
fillonedir
(
void
*
__buf
,
const
char
*
name
,
int
namlen
,
loff_t
offset
,
ino_t
ino
,
unsigned
int
d_type
)
{
struct
readdir_callback32
*
buf
=
(
struct
readdir_callback32
*
)
__buf
;
struct
old_linux_dirent32
__user
*
dirent
;
if
(
buf
->
count
)
return
-
EINVAL
;
buf
->
count
++
;
dirent
=
buf
->
dirent
;
put_user
(
ino
,
&
dirent
->
d_ino
);
put_user
(
offset
,
&
dirent
->
d_offset
);
put_user
(
namlen
,
&
dirent
->
d_namlen
);
copy_to_user
(
dirent
->
d_name
,
name
,
namlen
);
put_user
(
0
,
dirent
->
d_name
+
namlen
);
return
0
;
}
asmlinkage
long
old32_readdir
(
unsigned
int
fd
,
struct
old_linux_dirent32
__user
*
dirent
,
unsigned
int
count
)
{
int
error
=
-
EBADF
;
struct
file
*
file
;
struct
readdir_callback32
buf
;
file
=
fget
(
fd
);
if
(
!
file
)
goto
out
;
buf
.
count
=
0
;
buf
.
dirent
=
dirent
;
error
=
vfs_readdir
(
file
,
fillonedir
,
&
buf
);
if
(
error
<
0
)
goto
out_putf
;
error
=
buf
.
count
;
out_putf:
fput
(
file
);
out:
return
error
;
}
struct
linux_dirent32
{
u32
d_ino
;
u32
d_off
;
unsigned
short
d_reclen
;
char
d_name
[
1
];
};
struct
getdents_callback32
{
struct
linux_dirent32
__user
*
current_dir
;
struct
linux_dirent32
__user
*
previous
;
int
count
;
int
error
;
};
static
int
filldir
(
void
*
__buf
,
const
char
*
name
,
int
namlen
,
loff_t
offset
,
ino_t
ino
,
unsigned
int
d_type
)
{
struct
linux_dirent32
__user
*
dirent
;
struct
getdents_callback32
*
buf
=
(
struct
getdents_callback32
*
)
__buf
;
int
reclen
=
ROUND_UP
(
NAME_OFFSET
(
dirent
)
+
namlen
+
2
);
buf
->
error
=
-
EINVAL
;
/* only used if we fail.. */
if
(
reclen
>
buf
->
count
)
return
-
EINVAL
;
dirent
=
buf
->
previous
;
if
(
dirent
)
put_user
(
offset
,
&
dirent
->
d_off
);
dirent
=
buf
->
current_dir
;
buf
->
previous
=
dirent
;
put_user
(
ino
,
&
dirent
->
d_ino
);
put_user
(
reclen
,
&
dirent
->
d_reclen
);
copy_to_user
(
dirent
->
d_name
,
name
,
namlen
);
put_user
(
0
,
dirent
->
d_name
+
namlen
);
put_user
(
d_type
,
(
char
__user
*
)
dirent
+
reclen
-
1
);
dirent
=
(
void
__user
*
)
dirent
+
reclen
;
buf
->
current_dir
=
dirent
;
buf
->
count
-=
reclen
;
return
0
;
}
asmlinkage
long
sys32_getdents
(
unsigned
int
fd
,
struct
linux_dirent32
__user
*
dirent
,
unsigned
int
count
)
{
struct
file
*
file
;
struct
linux_dirent32
__user
*
lastdirent
;
struct
getdents_callback32
buf
;
int
error
=
-
EBADF
;
file
=
fget
(
fd
);
if
(
!
file
)
goto
out
;
buf
.
current_dir
=
dirent
;
buf
.
previous
=
NULL
;
buf
.
count
=
count
;
buf
.
error
=
0
;
error
=
vfs_readdir
(
file
,
filldir
,
&
buf
);
if
(
error
<
0
)
goto
out_putf
;
lastdirent
=
buf
.
previous
;
error
=
buf
.
error
;
if
(
lastdirent
)
{
put_user
(
file
->
f_pos
,
&
lastdirent
->
d_off
);
error
=
count
-
buf
.
count
;
}
out_putf:
fput
(
file
);
out:
return
error
;
}
/* end of readdir & getdents */
int
cp_compat_stat
(
struct
kstat
*
stat
,
struct
compat_stat
__user
*
statbuf
)
{
int
err
;
...
...
arch/sparc64/kernel/systbls.S
View file @
da09d2f1
...
...
@@ -54,13 +54,13 @@ sys_call_table32:
.
word
compat_sys_fcntl64
,
sys_ni_syscall
,
compat_sys_statfs
,
compat_sys_fstatfs
,
sys_oldumount
/*
160
*/
.
word
compat_sys_sched_setaffinity
,
compat_sys_sched_getaffinity
,
sys32_getdomainname
,
sys32_setdomainname
,
sys_nis_syscall
.
word
sys_quotactl
,
sys_set_tid_address
,
compat_sys_mount
,
sys_ustat
,
sys32_setxattr
/*
170
*/
.
word
sys32_lsetxattr
,
sys32_fsetxattr
,
sys_getxattr
,
sys_lgetxattr
,
sys32
_getdents
/*
170
*/
.
word
sys32_lsetxattr
,
sys32_fsetxattr
,
sys_getxattr
,
sys_lgetxattr
,
compat_sys
_getdents
.
word
sys_setsid
,
sys_fchdir
,
sys32_fgetxattr
,
sys_listxattr
,
sys_llistxattr
/*
180
*/
.
word
sys32_flistxattr
,
sys_removexattr
,
sys_lremovexattr
,
compat_sys_sigpending
,
sys_ni_syscall
.
word
sys32_setpgid
,
sys32_fremovexattr
,
sys32_tkill
,
sys32_exit_group
,
sparc64_newuname
/*
190
*/
.
word
sys32_init_module
,
sparc64_personality
,
sys_remap_file_pages
,
sys32_epoll_create
,
sys32_epoll_ctl
.
word
sys32_epoll_wait
,
sys_nis_syscall
,
sys_getppid
,
sys32_sigaction
,
sys_sgetmask
/*
200
*/
.
word
sys32_ssetmask
,
sys_sigsuspend
,
compat_sys_newlstat
,
sys_uselib
,
old32
_readdir
/*
200
*/
.
word
sys32_ssetmask
,
sys_sigsuspend
,
compat_sys_newlstat
,
sys_uselib
,
compat_old
_readdir
.
word
sys32_readahead
,
sys32_socketcall
,
sys32_syslog
,
sys32_lookup_dcookie
,
sys32_fadvise64
/*
210
*/
.
word
sys32_fadvise64_64
,
sys32_tgkill
,
sys32_waitpid
,
sys_swapoff
,
sys32_sysinfo
.
word
sys32_ipc
,
sys32_sigreturn
,
sys_clone
,
sys_nis_syscall
,
sys32_adjtimex
...
...
arch/sparc64/lib/U3copy_from_user.S
View file @
da09d2f1
/*
$Id
:
U3copy_from_user
.
S
,
v
1
.4
2002
/
01
/
15
07
:
16
:
26
davem
Exp
$
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
copy
from
userspace
.
/*
U3copy_from_user
.
S
:
UltraSparc
-
III
optimized
copy
from
userspace
.
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#undef SMALL_COPY_USES_FPU
#define XCC xcc
#define EXNV_RAW(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
ba
U3cfu_fixup
; \
a
,
b
,
%
o1
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
;
\
99
:
add
%
o1
,
%
o3
,
%
o0
;
\
ba
U3cfu_fixup
; \
a
,
b
,
%
o1
; \
.
section
__ex_table
; \
...
...
@@ -22,6 +33,32 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV4(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
add
%
o1
,
%
o3
,
%
o0
; \
a
,
b
,
%
o1
; \
ba
U3cfu_fixup
; \
add
%
o1
,
4
,
%
o1
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV8(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
add
%
o1
,
%
o3
,
%
o0
; \
a
,
b
,
%
o1
; \
ba
U3cfu_fixup
; \
add
%
o1
,
8
,
%
o1
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EX(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
...
...
@@ -77,18 +114,9 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#else
#define ASI_BLK_P 0xf0
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#define EXNV(x,y,a,b) x,y;
#define EX(x,y,a,b) x,y;
#define EX2(x,y) x,y;
#define EX3(x,y) x,y;
#define EX4(x,y) x,y;
#endif
.
register
%
g2
,#
scratch
.
register
%
g3
,#
scratch
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
...
...
@@ -111,79 +139,53 @@
.
globl
U3copy_from_user
U3copy_from_user
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
mov
%
o0
,
%
g3
!
A0
Group
#endif
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
0
!
A1
ble
,
pn
%
icc
,
U3copy_from_user_short_ret
!
BR
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3copy_from_user_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3copy_from_user_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
ba
,
pt
%
xcc
,
U3copy_from_user_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
cmp
%
o2
,
0
be
,
pn
%
XCC
,
out
or
%
o0
,
%
o1
,
%
o3
cmp
%
o2
,
16
bleu
,
a
,
pn
%
XCC
,
small_copy
or
%
o3
,
%
o2
,
%
o3
.
align
32
U3copy_from_user_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g0
)!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3copy_from_user_short
!
BR
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
(
1
-
cycle
stall
)
U3copy_from_user_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
cmp
%
o2
,
256
blu
,
pt
%
XCC
,
medium_copy
andcc
%
o3
,
0x7
,
%
g0
ba
,
pt
%
xcc
,
enter
andcc
%
o0
,
0x3f
,
%
g2
/
*
Here
len
>=
256
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
.
align
64
U3copy_from_user_
enter
:
enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
be
,
pt
%
XCC
,
2
f
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
sub
%
g2
,
0x40
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
1
:
EXNV
_RAW
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
!
BR
Group
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
bg
,
pt
%
XCC
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
2
:
VISEntryHalf
!
MS
+
MS
and
%
o1
,
0x7
,
%
g1
!
A1
ba
,
pt
%
xcc
,
U3copy_from_user_begin
!
BR
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
2
:
VISEntryHalf
and
%
o1
,
0x7
,
%
g1
ba
,
pt
%
xcc
,
begin
alignaddr
%
o1
,
%
g0
,
%
o1
.
align
64
U3copy_from_user_
begin
:
#ifdef __KERNEL__
begin
:
.
globl
U3copy_from_user_nop_1_6
U3copy_from_user_nop_1_6
:
ldxa
[%
g0
]
ASI_DCU_CONTROL_REG
,
%
g3
...
...
@@ -192,315 +194,225 @@ U3copy_from_user_nop_1_6:
or
%
g3
,
%
o3
,
%
o3
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
membar
#
Sync
#endif
prefetcha
[%
o1
+
0x000
]
%
asi
,
#
one_read
!
MS
Group1
prefetcha
[%
o1
+
0x040
]
%
asi
,
#
one_read
!
MS
Group2
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
prefetcha
[%
o1
+
0x080
]
%
asi
,
#
one_read
!
MS
Group3
cmp
%
o4
,
0x140
!
A0
prefetcha
[%
o1
+
0x0c0
]
%
asi
,
#
one_read
!
MS
Group4
EX
(
ldda
[%
o1
+
0x000
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group5
(
%f0
results
at
G8
)
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x100
]
%
asi
,
#
one_read
!
MS
Group6
1
:
EX
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
AX
(
%f2
results
at
G9
)
cmp
%
o4
,
0x180
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x140
]
%
asi
,
#
one_read
!
MS
Group7
1
:
EX
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
,
add
%
o2
,
%
g0
)
!
AX
(
%f4
results
at
G10
)
cmp
%
o4
,
0x1c0
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
EX
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
,
add
%
o2
,
%
g0
)
!
AX
(
%f6
results
at
G12
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
EX
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
,
add
%
o2
,
%
g0
)
!
MS
(
%f8
results
at
G13
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
EX
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
,
add
%
o2
,
%
g0
)
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
EX
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
,
add
%
o2
,
%
g0
)
!
MS
(
%f12
results
at
G16
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
EX
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
,
add
%
o2
,
%
g0
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
EX
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
(
%f0
results
at
G19
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
bg
,
pt
%
icc
,
U3copy_from_user_loop1
!
BR
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
ba
,
pt
%
xcc
,
U3copy_from_user_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
nop
nop
nop
nop
nop
nop
nop
prefetcha
[%
o1
+
0x000
]
%
asi
,
#
one_read
prefetcha
[%
o1
+
0x040
]
%
asi
,
#
one_read
andn
%
o2
,
(
0x40
-
1
),
%
o4
prefetcha
[%
o1
+
0x080
]
%
asi
,
#
one_read
prefetcha
[%
o1
+
0x0c0
]
%
asi
,
#
one_read
EX
(
ldda
[%
o1
+
0x000
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
prefetcha
[%
o1
+
0x100
]
%
asi
,
#
one_read
EX
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
prefetcha
[%
o1
+
0x140
]
%
asi
,
#
one_read
EX
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
,
add
%
o2
,
%
g0
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
faligndata
%f0
,
%f2
,
%f16
EX
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
,
add
%
o2
,
%
g0
)
faligndata
%f2
,
%f4
,
%f18
EX
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
,
add
%
o2
,
%
g0
)
faligndata
%f4
,
%f6
,
%f20
EX
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
,
add
%
o2
,
%
g0
)
faligndata
%f6
,
%f8
,
%f22
EX
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
,
add
%
o2
,
%
g0
)
faligndata
%f8
,
%f10
,
%f24
EX
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
,
add
%
o2
,
%
g0
)
faligndata
%f10
,
%f12
,
%f26
EX
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
sub
%
o4
,
0x80
,
%
o4
add
%
o1
,
0x40
,
%
o1
ba
,
pt
%
xcc
,
loop
srl
%
o4
,
6
,
%
o3
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
*
already
.
*/
U3copy_from_user_loop1
:
EX2
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
EX2
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
EX2
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EX2
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
EX2
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
EX2
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
EX2
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
EX2
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
(
%f0
results
at
G19
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_from_user_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3copy_from_user_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3copy_from_user_loop2
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
EX3
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_from_user_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
.
align
64
loop
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
faligndata
%f12
,
%f14
,
%f28
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
faligndata
%f0
,
%f2
,
%f16
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
faligndata
%f2
,
%f4
,
%f18
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
faligndata
%f4
,
%f6
,
%f20
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
faligndata
%f6
,
%f8
,
%f22
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
faligndata
%f8
,
%f10
,
%f24
EX3
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
faligndata
%f10
,
%f12
,
%f26
subcc
%
o3
,
0x01
,
%
o3
add
%
o1
,
0x40
,
%
o1
bg
,
pt
%
XCC
,
loop
add
%
o0
,
0x40
,
%
o0
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3copy_from_user_
loopfini
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group19
faligndata
%f14
,
%f0
,
%f30
!
FGA
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
Group20
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
cmp
%
g1
,
0
!
A0
be
,
pt
%
icc
,
1
f
!
BR
add
%
o0
,
0x40
,
%
o0
!
A1
EX4
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
MS
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
#ifdef __KERNEL__
loopfini
:
EX3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
faligndata
%f12
,
%f14
,
%f28
EX3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
EX3
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
faligndata
%f0
,
%f2
,
%f16
EX3
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
faligndata
%f2
,
%f4
,
%f18
EX3
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
faligndata
%f4
,
%f6
,
%f20
EX3
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
faligndata
%f6
,
%f8
,
%f22
EX3
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
faligndata
%f8
,
%f10
,
%f24
cmp
%
g1
,
0
be
,
pt
%
XCC
,
1
f
add
%
o0
,
0x40
,
%
o0
EX4
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
1
:
faligndata
%f10
,
%f12
,
%f26
faligndata
%f12
,
%f14
,
%f28
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
add
%
o0
,
0x40
,
%
o0
add
%
o1
,
0x40
,
%
o1
.
globl
U3copy_from_user_nop_2_3
U3copy_from_user_nop_2_3
:
mov
PRIMARY_CONTEXT
,
%
o3
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
#endif
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
membar
#
Sync
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
code
found
in
U3copy_from_user_toosmall
processing
.
*
load
past
the
end
of
the
src
buffer
.
*/
U3copy_from_user_
loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
be
,
pn
%
icc
,
U3copy_from_user_endcruft
!
BR
subcc
%
g2
,
0x8
,
%
g2
!
A1
be
,
pn
%
icc
,
U3copy_from_user_endcruft
!
BR
Group
cmp
%
g1
,
0
!
A0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
EX
(
ldda
[%
o1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
1
:
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
be
,
pn
%
icc
,
U3copy_from_user_endcruft
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
Group
loopend
:
and
%
o2
,
0x3f
,
%
o2
andcc
%
o2
,
0x38
,
%
g2
be
,
pn
%
XCC
,
endcruft
subcc
%
g2
,
0x8
,
%
g2
be
,
pn
%
XCC
,
endcruft
cmp
%
g1
,
0
be
,
a
,
pt
%
XCC
,
1
f
EX
(
ldda
[%
o1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
1
:
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f0
,
%f2
,
%f8
std
%f8
,
[%
o0
+
0x00
]
be
,
pn
%
XCC
,
endcruft
add
%
o0
,
0x8
,
%
o0
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f2
,
%f0
,
%f8
std
%f8
,
[%
o0
+
0x00
]
bne
,
pn
%
XCC
,
1
b
add
%
o0
,
0x8
,
%
o0
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*/
U3copy_from_user_
endcruft
:
endcruft
:
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
be
,
pn
%
icc
,
U3copy_from_user_short_ret
nop
ba
,
a
,
pt
%
xcc
,
U3copy_from_user_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3copy_from_user_toosmall
:
#ifdef SMALL_COPY_USES_FPU
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
be
,
pn
%
XCC
,
out
sub
%
o0
,
%
o1
,
%
o3
bg
,
pt
%
icc
,
1
b
!
BR
Group
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
andcc
%
g1
,
0x7
,
%
g0
bne
,
pn
%
icc
,
small_copy_unaligned
andcc
%
o2
,
0x8
,
%
g0
be
,
pt
%
icc
,
1
f
nop
EXNV
(
ldxa
[%
o1
]
%
asi
,
%
o5
,
add
%
o2
,
%
g0
)
stx
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x8
,
%
o1
2
:
VISEntryHalf
!
MS
+
MS
1
:
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
icc
,
1
f
nop
EXNV
(
lduwa
[%
o1
]
%
asi
,
%
o5
,
and
%
o2
,
0x7
)
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
*
to
be
nonzero
.
*/
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
/
*
You
may
read
this
and
believe
that
it
allows
reading
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3copy_from_user_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3copy_from_user_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3copy_from_user_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
andcc
%
o2
,
0x2
,
%
g0
be
,
pt
%
icc
,
1
f
nop
EXNV
(
lduha
[%
o1
]
%
asi
,
%
o5
,
and
%
o2
,
0x3
)
sth
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x2
,
%
o1
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
1
:
andcc
%
o2
,
0x1
,
%
g0
be
,
pt
%
icc
,
out
nop
EXNV
(
lduba
[%
o1
]
%
asi
,
%
o5
,
and
%
o2
,
0x1
)
ba
,
pt
%
xcc
,
out
stb
%
o5
,
[%
o1
+
%
o3
]
medium_copy
:
/
*
16
<
len
<=
64
*/
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
EXNV8
(
ldxa
[%
o1
]
%
asi
,
%
o5
,
add
%
o2
,
%
o4
)
stx
%
o5
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
EXNV4
(
lduwa
[%
o1
]
%
asi
,
%
o5
,
add
%
o2
,
%
g0
)
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
2
:
andn
%
o2
,
0x7
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
small_copy
:
/
*
0
<
len
<=
16
*/
andcc
%
o3
,
0x3
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
3
:
EXNV
(
ldxa
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x8
,
%
o1
add
%
o0
,
0x8
,
%
o0
s
ubcc
%
g2
,
0x8
,
%
g2
bg
,
pt
%
icc
,
3
b
stx
%
o3
,
[%
o0
+
-
8
]
small_copy_aligned
:
subcc
%
o2
,
4
,
%
o2
EXNV
(
lduwa
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
s
tw
%
g1
,
[%
o1
+
%
o3
]
bg
u
,
pt
%
XCC
,
small_copy_aligned
add
%
o1
,
4
,
%
o1
cmp
%
o2
,
0
bne
,
pn
%
icc
,
U3copy_from_user_short
nop
ba
,
a
,
pt
%
xcc
,
U3copy_from_user_short_ret
out
:
retl
clr
%
o0
#endif /* !(SMALL_COPY_USES_FPU) */
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
EXNV
(
lduba
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
stb
%
g1
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
clr
%
o0
#ifdef __KERNEL__
.
globl
U3cfu_fixup
U3cfu_fixup
:
/
*
Since
this
is
copy_from_user
(),
zero
out
the
rest
of
the
*
kernel
buffer
.
...
...
@@ -516,4 +428,3 @@ U3cfu_fixup:
2
:
retl
mov
%
o1
,
%
o0
#endif
arch/sparc64/lib/U3copy_in_user.S
View file @
da09d2f1
/*
$Id
:
U3copy_in_user
.
S
,
v
1
.4
2001
/
03
/
21
05
:
58
:
47
davem
Exp
$
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
copy
within
userspace
.
/*
U3copy_in_user
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/asi.h>
#undef SMALL_COPY_USES_FPU
#include <asm/dcu.h>
#include <asm/spitfire.h>
#define XCC xcc
#define EXNV(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
...
...
@@ -19,7 +21,7 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV
2(x,y,a,b)
\
#define EXNV
1(x,y,a,b)
\
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
...
...
@@ -31,122 +33,41 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV
3(x,y,a,b)
\
#define EXNV
4(x,y,a,b)
\
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
a
,
b
,
%
o0
; \
retl
; \
add
%
o0
,
8
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EX(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
retl
; \
a
,
b
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXBLK1(x,y) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
add
%
o4
,
0x1c0
,
%
o1
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
retl
; \
add
%
o1
,
%
o2
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXBLK2(x,y) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
sll
%
o3
,
6
,
%
o3
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
add
%
o3
,
0x80
,
%
o1
; \
retl
; \
add
%
o1
,
%
o2
,
%
o0
; \
add
%
o0
,
4
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EX
BLK3(x,y
) \
#define EX
NV8(x,y,a,b
) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
retl
; \
add
%
o2
,
0x80
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXBLK4(x,y) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
VISExitHalf
; \
and
%
o2
,
(
0x40
-
1
),
%
o2
; \
99
:
a
,
b
,
%
o0
; \
retl
; \
add
%
o
2
,
0x40
,
%
o0
; \
add
%
o
0
,
8
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#else
#define ASI_AIUS 0x80
#define ASI_BLK_AIUS 0xf0
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#define EXNV(x,y,a,b) x,y;
#define EXNV2(x,y,a,b) x,y;
#define EXNV3(x,y,a,b) x,y;
#define EX(x,y,a,b) x,y;
#define EXBLK1(x,y) x,y;
#define EXBLK2(x,y) x,y;
#define EXBLK3(x,y) x,y;
#define EXBLK4(x,y) x,y;
#endif
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
*
1
)
%
o5
is
preserved
from
VISEntryHalf
to
VISExitHalf
*
2
)
Only
low
32
FPU
registers
are
used
so
that
only
the
*
lower
half
of
the
FPU
register
set
is
dirtied
by
this
*
code
.
This
is
especially
important
in
the
kernel
.
*
3
)
This
code
never
prefetches
cachelines
past
the
end
*
of
the
source
buffer
.
*
*
XXX
Actually
,
Cheetah
can
buffer
up
to
8
concurrent
*
XXX
prefetches
,
revisit
this
...
*/
.
register
%
g2
,#
scratch
.
register
%
g3
,#
scratch
.
text
.
align
32
/
*
The
cheetah
's flexible spine, oversized liver, enlarged heart,
*
s
lender
muscular
body
,
and
claws
make
it
the
swiftest
hunter
*
in
Africa
and
the
fastest
animal
on
land
.
Can
reach
speeds
*
of
up
to
2
.4
GB
per
second
.
/
*
Don
't try to get too fancy here, just nice and
*
s
imple
.
This
is
predominantly
used
for
well
aligned
*
small
copies
in
the
compat
layer
.
It
is
also
used
*
to
copy
register
windows
around
during
thread
cloning
.
*/
.
globl
U3copy_in_user
...
...
@@ -155,377 +76,65 @@ U3copy_in_user: /* %o0=dst, %o1=src, %o2=len */
*
Reading
%
asi
to
check
for
KERNEL_DS
is
comparatively
*
cheap
.
*/
rd
%
asi
,
%
g1
!
MS
Group
(
4
cycles
)
cmp
%
g1
,
ASI_AIUS
!
A0
Group
bne
U3memcpy
!
BR
nop
!
A1
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
mov
%
o0
,
%
g3
!
A0
Group
#endif
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
0
!
A1
ble
,
pn
%
icc
,
U3copy_in_user_short_ret
!
BR
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3copy_in_user_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3copy_in_user_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
ba
,
pt
%
xcc
,
U3copy_in_user_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
.
align
32
U3copy_in_user_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g0
)!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3copy_in_user_short
!
BR
EXNV
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
1
)
!
MS
Group
(
1
-
cycle
stall
)
U3copy_in_user_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
.
align
64
U3copy_in_user_enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
2
:
VISEntryHalf
!
MS
+
MS
and
%
o1
,
0x7
,
%
g1
!
A1
ba
,
pt
%
xcc
,
U3copy_in_user_begin
!
BR
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
.
align
64
U3copy_in_user_begin
:
prefetcha
[%
o1
+
0x000
]
%
asi
,
#
one_read
!
MS
Group1
prefetcha
[%
o1
+
0x040
]
%
asi
,
#
one_read
!
MS
Group2
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
prefetcha
[%
o1
+
0x080
]
%
asi
,
#
one_read
!
MS
Group3
cmp
%
o4
,
0x140
!
A0
prefetcha
[%
o1
+
0x0c0
]
%
asi
,
#
one_read
!
MS
Group4
EX
(
ldda
[%
o1
+
0x000
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group5
(
%f0
results
at
G8
)
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x100
]
%
asi
,
#
one_read
!
MS
Group6
1
:
EX
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
AX
(
%f2
results
at
G9
)
cmp
%
o4
,
0x180
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x140
]
%
asi
,
#
one_read
!
MS
Group7
1
:
EX
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
,
add
%
o2
,
%
g0
)
!
AX
(
%f4
results
at
G10
)
cmp
%
o4
,
0x1c0
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
EX
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
,
add
%
o2
,
%
g0
)
!
AX
(
%f6
results
at
G12
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
EX
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
,
add
%
o2
,
%
g0
)
!
MS
(
%f8
results
at
G13
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
EX
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
,
add
%
o2
,
%
g0
)
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
EX
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
,
add
%
o2
,
%
g0
)
!
MS
(
%f12
results
at
G16
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
EX
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
,
add
%
o2
,
%
g0
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
EX
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
(
%f0
results
at
G19
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
bg
,
pt
%
icc
,
U3copy_in_user_loop1
!
BR
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
ba
,
pt
%
xcc
,
U3copy_in_user_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
nop
nop
nop
nop
rd
%
asi
,
%
g1
cmp
%
g1
,
ASI_AIUS
bne
,
pn
%
icc
,
U3memcpy_user_stub
nop
cmp
%
o2
,
0
be
,
pn
%
XCC
,
out
or
%
o0
,
%
o1
,
%
o3
cmp
%
o2
,
16
bleu
,
a
,
pn
%
XCC
,
small_copy
or
%
o3
,
%
o2
,
%
o3
medium_copy
:
/
*
16
<
len
<=
64
*/
andcc
%
o3
,
0x7
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
EXNV8
(
ldxa
[%
o1
]
%
asi
,
%
o5
,
add
%
o4
,
%
o2
)
EXNV8
(
stxa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o4
,
%
o2
)
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
EXNV4
(
lduwa
[%
o1
]
%
asi
,
%
o5
,
add
%
o4
,
%
o2
)
EXNV4
(
stwa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o4
,
%
o2
)
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
*
already
.
*/
U3copy_in_user_loop1
:
EXBLK1
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
EXBLK1
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
EXBLK1
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
EXBLK1
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EXBLK1
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
EXBLK1
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
EXBLK1
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
EXBLK1
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
EXBLK1
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
(
%f0
results
at
G19
)
prefetcha
[%
o1
+
0x180
]
%
asi
,
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_in_user_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3copy_in_user_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3copy_in_user_loop2
:
EXBLK2
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
EXBLK2
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
EXBLK2
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
EXBLK2
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
EXBLK2
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
EXBLK2
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
EXBLK2
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
EXBLK2
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
EXBLK2
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_in_user_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3copy_in_user_loopfini
:
EXBLK3
(
ldda
[%
o1
+
0x008
]
%
asi
,
%f2
)
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
EXBLK3
(
ldda
[%
o1
+
0x010
]
%
asi
,
%f4
)
!
MS
Group19
faligndata
%f14
,
%f0
,
%f30
!
FGA
EXBLK3
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
Group20
EXBLK4
(
ldda
[%
o1
+
0x018
]
%
asi
,
%f6
)
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
EXBLK4
(
ldda
[%
o1
+
0x020
]
%
asi
,
%f8
)
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
EXBLK4
(
ldda
[%
o1
+
0x028
]
%
asi
,
%f10
)
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
EXBLK4
(
ldda
[%
o1
+
0x030
]
%
asi
,
%f12
)
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
EXBLK4
(
ldda
[%
o1
+
0x038
]
%
asi
,
%f14
)
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
cmp
%
g1
,
0
!
A0
be
,
pt
%
icc
,
1
f
!
BR
add
%
o0
,
0x40
,
%
o0
!
A1
EXBLK4
(
ldda
[%
o1
+
0x040
]
%
asi
,
%f0
)
!
MS
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
EXBLK4
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
code
found
in
U3copy_in_user_toosmall
processing
.
*/
U3copy_in_user_loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
be
,
pn
%
icc
,
U3copy_in_user_endcruft
!
BR
subcc
%
g2
,
0x8
,
%
g2
!
A1
be
,
pn
%
icc
,
U3copy_in_user_endcruft
!
BR
Group
cmp
%
g1
,
0
!
A0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
EX
(
ldda
[%
o1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
1
:
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
be
,
pn
%
icc
,
U3copy_in_user_endcruft
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
EX
(
ldda
[%
o1
+
0x08
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
Group
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*/
U3copy_in_user_endcruft
:
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
be
,
pn
%
icc
,
U3copy_in_user_short_ret
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
ba
,
a
,
pt
%
xcc
,
U3copy_in_user_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3copy_in_user_toosmall
:
#ifdef SMALL_COPY_USES_FPU
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
EXNV2
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
2
:
VISEntryHalf
!
MS
+
MS
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
*
to
be
nonzero
.
*/
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
/
*
You
may
read
this
and
believe
that
it
allows
reading
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f2
,
add
%
o2
,
%
g0
)
!
MS
Group
a
dd
%
g1
,
0x8
,
%
g1
!
A
0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
small_copy
:
/
*
0
<
len
<=
16
*/
a
ndcc
%
o3
,
0x3
,
%
g
0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
small_copy_aligned
:
subcc
%
o2
,
4
,
%
o2
EXNV4
(
lduwa
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
EXNV4
(
stwa
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bgu
,
pt
%
XCC
,
small_copy_aligned
add
%
o1
,
4
,
%
o1
add
%
o0
,
0x8
,
%
o0
!
A1
EX
(
ldda
[%
g1
+
0x00
]
%
asi
,
%f0
,
add
%
o2
,
%
g0
)
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
out
:
retl
clr
%
o0
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3copy_in_user_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3copy_in_user_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3copy_in_user_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
EXNV2
(
lduba
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
2
:
andn
%
o2
,
0x7
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
3
:
EXNV3
(
ldxa
[%
o1
+
0x00
]
%
asi
,
%
o3
,
add
%
o2
,
%
g2
)
add
%
o1
,
0x8
,
%
o1
add
%
o0
,
0x8
,
%
o0
subcc
%
g2
,
0x8
,
%
g2
bg
,
pt
%
icc
,
3
b
EXNV3
(
stxa
%
o3
,
[%
o0
+
-
8
]
%
asi
,
add
%
o2
,
%
g2
)
cmp
%
o2
,
0
bne
,
pn
%
icc
,
U3copy_in_user_short
nop
ba
,
a
,
pt
%
xcc
,
U3copy_in_user_short_ret
#endif /* !(SMALL_COPY_USES_FPU) */
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
EXNV1
(
lduba
[%
o1
]
%
asi
,
%
g1
,
add
%
o2
,
%
g0
)
EXNV1
(
stba
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
clr
%
o0
arch/sparc64/lib/U3copy_to_user.S
View file @
da09d2f1
/*
$Id
:
U3copy_to_user
.
S
,
v
1
.3
2000
/
11
/
01
09
:
29
:
19
davem
Exp
$
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
copy
to
userspace
.
/*
U3copy_to_user
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#undef SMALL_COPY_USES_FPU
#define XCC xcc
#define EXNV(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
...
...
@@ -34,6 +34,18 @@
.
text
; \
.
align
4
;
#define EXNV3(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
99
:
a
,
b
,
%
o0
; \
retl
; \
add
%
o0
,
4
,
%
o0
; \
.
section
__ex_table
; \
.
align
4
; \
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#define EXNV4(x,y,a,b) \
98
:
x
,
y
; \
.
section
.
fixup
; \
.
align
4
; \
...
...
@@ -112,22 +124,9 @@
.
word
98
b
,
99
b
; \
.
text
; \
.
align
4
;
#else
#define ASI_AIUS 0x80
#define ASI_BLK_AIUS 0xf0
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#define EXNV(x,y,a,b) x,y;
#define EXNV2(x,y,a,b) x,y;
#define EXNV3(x,y,a,b) x,y;
#define EX(x,y,a,b) x,y;
#define EXBLK1(x,y) x,y;
#define EXBLK2(x,y) x,y;
#define EXBLK3(x,y) x,y;
#define EXBLK4(x,y) x,y;
#endif
.
register
%
g2
,#
scratch
.
register
%
g3
,#
scratch
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
...
...
@@ -154,83 +153,58 @@ U3copy_to_user: /* %o0=dst, %o1=src, %o2=len */
*
Reading
%
asi
to
check
for
KERNEL_DS
is
comparatively
*
cheap
.
*/
rd
%
asi
,
%
g1
!
MS
Group
(
4
cycles
)
cmp
%
g1
,
ASI_AIUS
!
A0
Group
bne
U3memcpy
!
BR
nop
!
A1
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
mov
%
o0
,
%
g3
!
A0
Group
#endif
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
0
!
A1
ble
,
pn
%
icc
,
U3copy_to_user_short_ret
!
BR
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3copy_to_user_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3copy_to_user_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
ba
,
pt
%
xcc
,
U3copy_to_user_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
rd
%
asi
,
%
g1
cmp
%
g1
,
ASI_AIUS
bne
,
pn
%
icc
,
U3memcpy_user_stub
nop
.
align
32
U3copy_to_user_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
ldub
[%
o1
+
0x00
],
%
o3
!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
bg
,
pt
%
icc
,
U3copy_to_user_short
!
BR
EXNV
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
1
)
!
MS
Group
(
1
-
cycle
stall
)
U3copy_to_user_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
cmp
%
o2
,
0
be
,
pn
%
XCC
,
out
or
%
o0
,
%
o1
,
%
o3
cmp
%
o2
,
16
bleu
,
a
,
pn
%
XCC
,
small_copy
or
%
o3
,
%
o2
,
%
o3
cmp
%
o2
,
256
blu
,
pt
%
XCC
,
medium_copy
andcc
%
o3
,
0x7
,
%
g0
ba
,
pt
%
xcc
,
enter
andcc
%
o0
,
0x3f
,
%
g2
/
*
Here
len
>=
256
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
.
align
64
U3copy_to_user_
enter
:
enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
be
,
pt
%
XCC
,
2
f
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
sub
%
g2
,
0x40
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
bg
,
pt
%
XCC
,
1
b
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
2
:
VISEntryHalf
!
MS
+
MS
and
%
o1
,
0x7
,
%
g1
!
A1
ba
,
pt
%
xcc
,
U3copy_to_user_begin
!
BR
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
2
:
VISEntryHalf
and
%
o1
,
0x7
,
%
g1
ba
,
pt
%
xcc
,
begin
alignaddr
%
o1
,
%
g0
,
%
o1
.
align
64
U3copy_to_user_
begin
:
#ifdef __KERNEL__
begin
:
.
globl
U3copy_to_user_nop_1_6
U3copy_to_user_nop_1_6
:
ldxa
[%
g0
]
ASI_DCU_CONTROL_REG
,
%
g3
...
...
@@ -239,309 +213,221 @@ U3copy_to_user_nop_1_6:
or
%
g3
,
%
o3
,
%
o3
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
membar
#
Sync
#endif
prefetch
[%
o1
+
0x000
],
#
one_read
!
MS
Group1
prefetch
[%
o1
+
0x040
],
#
one_read
!
MS
Group2
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
prefetch
[%
o1
+
0x080
],
#
one_read
!
MS
Group3
cmp
%
o4
,
0x140
!
A0
prefetch
[%
o1
+
0x0c0
],
#
one_read
!
MS
Group4
ldd
[%
o1
+
0x000
],
%f0
!
MS
Group5
(
%f0
results
at
G8
)
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x100
],
#
one_read
!
MS
Group6
1
:
ldd
[%
o1
+
0x008
],
%f2
!
AX
(
%f2
results
at
G9
)
cmp
%
o4
,
0x180
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x140
],
#
one_read
!
MS
Group7
1
:
ldd
[%
o1
+
0x010
],
%f4
!
AX
(
%f4
results
at
G10
)
cmp
%
o4
,
0x1c0
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G12
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G13
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G16
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
MS
(
%f0
results
at
G19
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
bg
,
pt
%
icc
,
U3copy_to_user_loop1
!
BR
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
ba
,
pt
%
xcc
,
U3copy_to_user_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
nop
nop
nop
nop
nop
nop
nop
prefetch
[%
o1
+
0x000
],
#
one_read
prefetch
[%
o1
+
0x040
],
#
one_read
andn
%
o2
,
(
0x40
-
1
),
%
o4
prefetch
[%
o1
+
0x080
],
#
one_read
prefetch
[%
o1
+
0x0c0
],
#
one_read
ldd
[%
o1
+
0x000
],
%f0
prefetch
[%
o1
+
0x100
],
#
one_read
ldd
[%
o1
+
0x008
],
%f2
prefetch
[%
o1
+
0x140
],
#
one_read
ldd
[%
o1
+
0x010
],
%f4
prefetch
[%
o1
+
0x180
],
#
one_read
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f8
,
%f10
,
%f24
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f10
,
%f12
,
%f26
ldd
[%
o1
+
0x040
],
%f0
sub
%
o4
,
0x80
,
%
o4
add
%
o1
,
0x40
,
%
o1
ba
,
pt
%
xcc
,
loop
srl
%
o4
,
6
,
%
o3
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
*
already
.
*/
U3copy_to_user_loop1
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
EXBLK1
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
AX
(
%f0
results
at
G19
)
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_to_user_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3copy_to_user_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3copy_to_user_loop2
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
ldd
[%
o1
+
0x010
],
%f4
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
EXBLK2
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
ldd
[%
o1
+
0x040
],
%f0
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3copy_to_user_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
.
align
64
loop
:
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
EXBLK2
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
ldd
[%
o1
+
0x040
],
%f0
prefetch
[%
o1
+
0x180
],
#
one_read
faligndata
%f10
,
%f12
,
%f26
subcc
%
o3
,
0x01
,
%
o3
add
%
o1
,
0x40
,
%
o1
bg
,
pt
%
XCC
,
loop
add
%
o0
,
0x40
,
%
o0
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3copy_to_user_
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group19
faligndata
%f14
,
%f0
,
%f30
!
FGA
EXBLK3
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
Group20
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
cmp
%
g1
,
0
!
A0
be
,
pt
%
icc
,
1
f
!
BR
add
%
o0
,
0x40
,
%
o0
!
A1
ldd
[%
o1
+
0x040
],
%f0
!
MS
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
EXBLK4
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
!
MS
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
#ifdef __KERNEL__
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
EXBLK3
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
cmp
%
g1
,
0
be
,
pt
%
XCC
,
1
f
add
%
o0
,
0x40
,
%
o0
ldd
[%
o1
+
0x040
],
%f0
1
:
faligndata
%f10
,
%f12
,
%f26
faligndata
%f12
,
%f14
,
%f28
faligndata
%f14
,
%f0
,
%f30
EXBLK4
(
stda
%f16
,
[%
o0
]
ASI_BLK_AIUS
)
add
%
o0
,
0x40
,
%
o0
add
%
o1
,
0x40
,
%
o1
.
globl
U3copy_to_user_nop_2_3
U3copy_to_user_nop_2_3
:
mov
PRIMARY_CONTEXT
,
%
o3
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
#endif
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
membar
#
Sync
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
code
found
in
U3copy_to_user_toosmall
processing
.
*
load
past
the
end
of
the
src
buffer
.
*/
U3copy_to_user_
loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
be
,
pn
%
icc
,
U3copy_to_user_endcruft
!
BR
subcc
%
g2
,
0x8
,
%
g2
!
A1
be
,
pn
%
icc
,
U3copy_to_user_endcruft
!
BR
Group
cmp
%
g1
,
0
!
A0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
ldd
[%
o1
+
0x00
],
%f0
!
MS
1
:
ldd
[%
o1
+
0x08
],
%f2
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
be
,
pn
%
icc
,
U3copy_to_user_endcruft
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
ldd
[%
o1
+
0x08
],
%f0
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
(
XXX
does
it
stall
here
?
XXX
)
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
Group
loopend
:
and
%
o2
,
0x3f
,
%
o2
andcc
%
o2
,
0x38
,
%
g2
be
,
pn
%
XCC
,
endcruft
subcc
%
g2
,
0x8
,
%
g2
be
,
pn
%
XCC
,
endcruft
cmp
%
g1
,
0
be
,
a
,
pt
%
XCC
,
1
f
ldd
[%
o1
+
0x00
],
%f0
1
:
ldd
[%
o1
+
0x08
],
%f2
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f0
,
%f2
,
%f8
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
be
,
pn
%
XCC
,
endcruft
add
%
o0
,
0x8
,
%
o0
ldd
[%
o1
+
0x08
],
%f0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f2
,
%f0
,
%f8
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
bne
,
pn
%
XCC
,
1
b
add
%
o0
,
0x8
,
%
o0
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*/
U3copy_to_user_
endcruft
:
endcruft
:
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
be
,
pn
%
icc
,
U3copy_to_user_short_ret
nop
ba
,
a
,
pt
%
xcc
,
U3copy_to_user_short
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3copy_to_user_toosmall
:
#ifdef SMALL_COPY_USES_FPU
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
be
,
pn
%
XCC
,
out
sub
%
o0
,
%
o1
,
%
o3
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
bg
,
pt
%
icc
,
1
b
!
BR
Group
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
!
MS
Group
andcc
%
g1
,
0x7
,
%
g0
bne
,
pn
%
icc
,
small_copy_unaligned
andcc
%
o2
,
0x8
,
%
g0
be
,
pt
%
icc
,
1
f
nop
ldx
[%
o1
],
%
o5
EXNV
(
stxa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x8
,
%
o1
2
:
VISEntryHalf
!
MS
+
MS
1
:
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduw
[%
o1
],
%
o5
EXNV
(
stwa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
and
%
o2
,
0x7
)
add
%
o1
,
0x4
,
%
o1
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
*
to
be
nonzero
.
*/
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
/
*
You
may
read
this
and
believe
that
it
allows
reading
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
ldd
[%
g1
+
0x00
],
%f2
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
EX
(
stda
%f8
,
[%
o0
+
0x00
]
%
asi
,
add
%
o2
,
0x8
)
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3copy_to_user_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3copy_to_user_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3copy_to_user_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
andcc
%
o2
,
0x2
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduh
[%
o1
],
%
o5
EXNV
(
stha
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
and
%
o2
,
0x3
)
add
%
o1
,
0x2
,
%
o1
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
EXNV2
(
stba
%
o3
,
[%
o0
+
-
1
]
%
asi
,
add
%
o2
,
%
g2
)
1
:
andcc
%
o2
,
0x1
,
%
g0
be
,
pt
%
icc
,
out
nop
ldub
[%
o1
],
%
o5
ba
,
pt
%
xcc
,
out
EXNV
(
stba
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
and
%
o2
,
0x1
)
medium_copy
:
/
*
16
<
len
<=
64
*/
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
ldx
[%
o1
],
%
o5
EXNV4
(
stxa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
o4
)
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
lduw
[%
o1
],
%
o5
EXNV3
(
stwa
%
o5
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
2
:
andn
%
o2
,
0x7
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
small_copy
:
/
*
0
<
len
<=
16
*/
andcc
%
o3
,
0x3
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
3
:
ldx
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x8
,
%
o1
add
%
o0
,
0x8
,
%
o0
subcc
%
g2
,
0x8
,
%
g2
bg
,
pt
%
icc
,
3
b
EXNV3
(
stxa
%
o3
,
[%
o0
+
-
8
]
%
asi
,
add
%
o2
,
%
g2
)
small_copy_aligned
:
subcc
%
o2
,
4
,
%
o2
lduw
[%
o1
],
%
g1
EXNV3
(
stwa
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bg
u
,
pt
%
XCC
,
small_copy_aligned
add
%
o1
,
4
,
%
o1
cmp
%
o2
,
0
bne
,
pn
%
icc
,
U3copy_to_user_short
nop
ba
,
a
,
pt
%
xcc
,
U3copy_to_user_short_ret
out
:
retl
clr
%
o0
#endif /* !(SMALL_COPY_USES_FPU) */
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
ldub
[%
o1
],
%
g1
EXNV2
(
stba
%
g1
,
[%
o1
+
%
o3
]
ASI_AIUS
,
add
%
o2
,
%
g0
)
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
clr
%
o0
arch/sparc64/lib/U3memcpy.S
View file @
da09d2f1
/*
$Id
:
U3memcpy
.
S
,
v
1
.2
2000
/
11
/
01
09
:
29
:
19
davem
Exp
$
*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
memcpy
.
/*
U3memcpy
.
S
:
UltraSparc
-
III
optimized
memcpy
.
*
*
Copyright
(
C
)
1999
,
2000
David
S
.
Miller
(
davem
@
redhat
.
com
)
*
Copyright
(
C
)
1999
,
2000
,
2004
David
S
.
Miller
(
davem
@
redhat
.
com
)
*/
#ifdef __KERNEL__
...
...
@@ -9,15 +8,20 @@
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#undef SMALL_COPY_USES_FPU
#else
#define ASI_BLK_P 0xf0
#define FPRS_FEF 0x04
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#define SMALL_COPY_USES_FPU
#endif
#ifndef XCC
#define XCC xcc
#endif
.
register
%
g2
,#
scratch
.
register
%
g3
,#
scratch
/
*
Special
/
non
-
trivial
issues
of
this
code
:
*
*
1
)
%
o5
is
preserved
from
VISEntryHalf
to
VISExitHalf
...
...
@@ -39,78 +43,53 @@
.
globl
U3memcpy
U3memcpy
:
/
*
%
o0
=
dst
,
%
o1
=
src
,
%
o2
=
len
*/
#ifndef __KERNEL__
/
*
Save
away
original
'dst'
for
memcpy
return
value
.
*/
mov
%
o0
,
%
g3
!
A0
Group
#endif
/
*
Anything
to
copy
at
all
?
*/
cmp
%
o2
,
0
!
A1
ble
,
pn
%
icc
,
U3memcpy_short_ret
!
BR
/
*
Extremely
small
copy
?
*/
cmp
%
o2
,
31
!
A0
Group
ble
,
pn
%
icc
,
U3memcpy_short
!
BR
/
*
Large
enough
to
use
unrolled
prefetch
loops
?
*/
cmp
%
o2
,
0x100
!
A1
bge
,
a
,
pt
%
icc
,
U3memcpy_enter
!
BR
Group
andcc
%
o0
,
0x3f
,
%
g2
!
A0
mov
%
o0
,
%
g5
cmp
%
o2
,
0
be
,
pn
%
XCC
,
out
or
%
o0
,
%
o1
,
%
o3
cmp
%
o2
,
16
bleu
,
a
,
pn
%
XCC
,
small_copy
or
%
o3
,
%
o2
,
%
o3
ba
,
pt
%
xcc
,
U3memcpy_toosmall
!
BR
Group
andcc
%
o0
,
0x7
,
%
g2
!
A0
cmp
%
o2
,
256
blu
,
pt
%
XCC
,
medium_copy
andcc
%
o3
,
0x7
,
%
g0
.
align
32
U3memcpy_short
:
/
*
Copy
%
o2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
ldub
[%
o1
+
0x00
],
%
o3
!
MS
Group
add
%
o1
,
0x1
,
%
o1
!
A0
add
%
o0
,
0x1
,
%
o0
!
A1
subcc
%
o2
,
1
,
%
o2
!
A0
Group
ba
,
pt
%
xcc
,
enter
andcc
%
o0
,
0x3f
,
%
g2
bg
,
pt
%
icc
,
U3memcpy_short
!
BR
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
(
1
-
cycle
stall
)
U3memcpy_short_ret
:
#ifdef __KERNEL__
retl
!
BR
Group
(
0
-
4
cycle
stall
)
clr
%
o0
!
A0
#else
retl
!
BR
Group
(
0
-
4
cycle
stall
)
mov
%
g3
,
%
o0
!
A0
#endif
/
*
Here
len
>=
(
6
*
64
)
and
condition
codes
reflect
execution
/
*
Here
len
>=
256
and
condition
codes
reflect
execution
*
of
"andcc %o0, 0x7, %g2"
,
done
by
caller
.
*/
.
align
64
U3memcpy_
enter
:
enter
:
/
*
Is
'dst'
already
aligned
on
an
64
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
be
,
pt
%
XCC
,
2
f
/
*
Compute
abs
((
dst
&
0x3f
)
-
0x40
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
64
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x40
,
%
g2
!
A0
Group
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
sub
%
g2
,
0x40
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
!
BR
Group
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
bg
,
pt
%
XCC
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
2
:
VISEntryHalf
!
MS
+
MS
and
%
o1
,
0x7
,
%
g1
!
A1
ba
,
pt
%
xcc
,
U3memcpy_begin
!
BR
alignaddr
%
o1
,
%
g0
,
%
o1
!
MS
(
Break
-
after
)
2
:
VISEntryHalf
and
%
o1
,
0x7
,
%
g1
ba
,
pt
%
xcc
,
begin
alignaddr
%
o1
,
%
g0
,
%
o1
.
align
64
U3memcpy_
begin
:
begin
:
#ifdef __KERNEL__
.
globl
U3memcpy_nop_1_6
U3memcpy_nop_1_6
:
...
...
@@ -121,146 +100,90 @@ U3memcpy_nop_1_6:
stxa
%
o3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Enable
P
-
cache
membar
#
Sync
#endif
prefetch
[%
o1
+
0x000
],
#
one_read
!
MS
Group1
prefetch
[%
o1
+
0x040
],
#
one_read
!
MS
Group2
andn
%
o2
,
(
0x40
-
1
),
%
o4
!
A0
prefetch
[%
o1
+
0x080
],
#
one_read
!
MS
Group3
cmp
%
o4
,
0x140
!
A0
prefetch
[%
o1
+
0x0c0
],
#
one_read
!
MS
Group4
ldd
[%
o1
+
0x000
],
%f0
!
MS
Group5
(
%f0
results
at
G8
)
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x100
],
#
one_read
!
MS
Group6
1
:
ldd
[%
o1
+
0x008
],
%f2
!
AX
(
%f2
results
at
G9
)
cmp
%
o4
,
0x180
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x140
],
#
one_read
!
MS
Group7
1
:
ldd
[%
o1
+
0x010
],
%f4
!
AX
(
%f4
results
at
G10
)
cmp
%
o4
,
0x1c0
!
A1
bge
,
a
,
pt
%
icc
,
1
f
!
BR
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
Group8
1
:
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group9
(
%f16
at
G12
)
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G12
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group10
(
%f18
results
at
G13
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G13
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group12
(
1
-
cycle
stall
,
%f20
at
G15
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G15
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group13
(
%f22
results
at
G16
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G16
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
(
1
-
cycle
stall
,
%f24
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
(
%f26
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
MS
(
%f0
results
at
G19
)
/
*
We
only
use
the
first
loop
if
len
>
(
7
*
64
)
.
*/
subcc
%
o4
,
0x1c0
,
%
o4
!
A0
Group17
bg
,
pt
%
icc
,
U3memcpy_loop1
!
BR
add
%
o1
,
0x40
,
%
o1
!
A1
add
%
o4
,
0x140
,
%
o4
!
A0
Group18
ba
,
pt
%
xcc
,
U3memcpy_loop2
!
BR
srl
%
o4
,
6
,
%
o3
!
A0
Group19
nop
nop
nop
nop
nop
nop
nop
prefetch
[%
o1
+
0x000
],
#
one_read
prefetch
[%
o1
+
0x040
],
#
one_read
andn
%
o2
,
(
0x40
-
1
),
%
o4
prefetch
[%
o1
+
0x080
],
#
one_read
prefetch
[%
o1
+
0x0c0
],
#
one_read
ldd
[%
o1
+
0x000
],
%f0
prefetch
[%
o1
+
0x100
],
#
one_read
ldd
[%
o1
+
0x008
],
%f2
prefetch
[%
o1
+
0x140
],
#
one_read
ldd
[%
o1
+
0x010
],
%f4
prefetch
[%
o1
+
0x180
],
#
one_read
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f8
,
%f10
,
%f24
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f10
,
%f12
,
%f26
ldd
[%
o1
+
0x040
],
%f0
sub
%
o4
,
0x80
,
%
o4
add
%
o1
,
0x40
,
%
o1
ba
,
pt
%
xcc
,
loop
srl
%
o4
,
6
,
%
o3
/
*
This
loop
performs
the
copy
and
queues
new
prefetches
.
*
We
drop
into
the
second
loop
when
len
<=
(
5
*
64
)
.
Note
*
that
this
(
5
*
64
)
factor
has
been
subtracted
from
len
*
already
.
*/
U3memcpy_loop1
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
Group2
(
%f2
results
at
G5
)
faligndata
%f12
,
%f14
,
%f28
!
FGA
(
%f28
results
at
G5
)
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group3
(
%f4
results
at
G6
)
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
,
%f30
at
G7
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
(
%f6
results
at
G7
)
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
(
%f8
results
at
G15
)
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
(
%f18
results
at
G16
)
ldd
[%
o1
+
0x028
],
%f10
!
MS
(
%f10
results
at
G16
)
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
(
%f20
results
at
G17
)
ldd
[%
o1
+
0x030
],
%f12
!
MS
(
%f12
results
at
G17
)
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
(
%f22
results
at
G18
)
ldd
[%
o1
+
0x038
],
%f14
!
MS
(
%f14
results
at
G18
)
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
(
%f24
results
at
G19
)
ldd
[%
o1
+
0x040
],
%f0
!
AX
(
%f0
results
at
G19
)
prefetch
[%
o1
+
0x180
],
#
one_read
!
MS
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
(
%f26
results
at
G20
)
subcc
%
o4
,
0x40
,
%
o4
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3memcpy_loop1
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
U3memcpy_loop2_enter
:
mov
5
,
%
o3
!
A1
/
*
This
loop
performs
on
the
copy
,
no
new
prefetches
are
*
queued
.
We
do
things
this
way
so
that
we
do
not
perform
*
any
spurious
prefetches
past
the
end
of
the
src
buffer
.
*/
U3memcpy_loop2
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group2
ldd
[%
o1
+
0x010
],
%f4
!
MS
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group4
(
1
-
cycle
stall
)
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group12
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group13
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group14
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group15
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group16
ldd
[%
o1
+
0x040
],
%f0
!
AX
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group17
subcc
%
o3
,
0x01
,
%
o3
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
bg
,
pt
%
xcc
,
U3memcpy_loop2
!
BR
add
%
o0
,
0x40
,
%
o0
!
A0
Group18
.
align
64
loop
:
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
ldd
[%
o1
+
0x040
],
%f0
prefetch
[%
o1
+
0x180
],
#
one_read
faligndata
%f10
,
%f12
,
%f26
subcc
%
o3
,
0x01
,
%
o3
add
%
o1
,
0x40
,
%
o1
bg
,
pt
%
XCC
,
loop
add
%
o0
,
0x40
,
%
o0
/
*
Finally
we
copy
the
last
full
64
-
byte
block
.
*/
U3memcpy_
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
!
MS
faligndata
%f12
,
%f14
,
%f28
!
FGA
ldd
[%
o1
+
0x010
],
%f4
!
MS
Group19
faligndata
%f14
,
%f0
,
%f30
!
FGA
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
Group20
ldd
[%
o1
+
0x018
],
%f6
!
AX
faligndata
%f0
,
%f2
,
%f16
!
FGA
Group11
(
7
-
cycle
stall
)
ldd
[%
o1
+
0x020
],
%f8
!
MS
faligndata
%f2
,
%f4
,
%f18
!
FGA
Group12
ldd
[%
o1
+
0x028
],
%f10
!
MS
faligndata
%f4
,
%f6
,
%f20
!
FGA
Group13
ldd
[%
o1
+
0x030
],
%f12
!
MS
faligndata
%f6
,
%f8
,
%f22
!
FGA
Group14
ldd
[%
o1
+
0x038
],
%f14
!
MS
faligndata
%f8
,
%f10
,
%f24
!
FGA
Group15
cmp
%
g1
,
0
!
A0
be
,
pt
%
icc
,
1
f
!
BR
add
%
o0
,
0x40
,
%
o0
!
A1
ldd
[%
o1
+
0x040
],
%f0
!
MS
1
:
faligndata
%f10
,
%f12
,
%f26
!
FGA
Group16
faligndata
%f12
,
%f14
,
%f28
!
FGA
Group17
faligndata
%f14
,
%f0
,
%f30
!
FGA
Group18
stda
%f16
,
[%
o0
]
ASI_BLK_P
!
MS
add
%
o0
,
0x40
,
%
o0
!
A0
add
%
o1
,
0x40
,
%
o1
!
A1
loopfini
:
ldd
[%
o1
+
0x008
],
%f2
faligndata
%f12
,
%f14
,
%f28
ldd
[%
o1
+
0x010
],
%f4
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
ldd
[%
o1
+
0x018
],
%f6
faligndata
%f0
,
%f2
,
%f16
ldd
[%
o1
+
0x020
],
%f8
faligndata
%f2
,
%f4
,
%f18
ldd
[%
o1
+
0x028
],
%f10
faligndata
%f4
,
%f6
,
%f20
ldd
[%
o1
+
0x030
],
%f12
faligndata
%f6
,
%f8
,
%f22
ldd
[%
o1
+
0x038
],
%f14
faligndata
%f8
,
%f10
,
%f24
cmp
%
g1
,
0
be
,
pt
%
XCC
,
1
f
add
%
o0
,
0x40
,
%
o0
ldd
[%
o1
+
0x040
],
%f0
1
:
faligndata
%f10
,
%f12
,
%f26
faligndata
%f12
,
%f14
,
%f28
faligndata
%f14
,
%f0
,
%f30
stda
%f16
,
[%
o0
]
ASI_BLK_P
add
%
o0
,
0x40
,
%
o0
add
%
o1
,
0x40
,
%
o1
#ifdef __KERNEL__
.
globl
U3memcpy_nop_2_3
U3memcpy_nop_2_3
:
...
...
@@ -268,161 +191,143 @@ U3memcpy_nop_2_3:
stxa
%
g0
,
[%
o3
]
ASI_DMMU
!
Flush
P
-
cache
stxa
%
g3
,
[%
g0
]
ASI_DCU_CONTROL_REG
!
Disable
P
-
cache
#endif
membar
#
Sync
!
MS
Group26
(
7
-
cycle
stall
)
membar
#
Sync
/
*
Now
we
copy
the
(
len
modulo
64
)
bytes
at
the
end
.
*
Note
how
we
borrow
the
%f0
loaded
above
.
*
*
Also
notice
how
this
code
is
careful
not
to
perform
a
*
load
past
the
end
of
the
src
buffer
just
like
similar
*
code
found
in
U3memcpy_toosmall
processing
.
*
load
past
the
end
of
the
src
buffer
.
*/
U3memcpy_
loopend
:
and
%
o2
,
0x3f
,
%
o2
!
A0
Group
andcc
%
o2
,
0x38
,
%
g2
!
A0
Group
be
,
pn
%
icc
,
U3memcpy_endcruft
!
BR
subcc
%
g2
,
0x8
,
%
g2
!
A1
be
,
pn
%
icc
,
U3memcpy_endcruft
!
BR
Group
cmp
%
g1
,
0
!
A0
be
,
a
,
pt
%
icc
,
1
f
!
BR
Group
ldd
[%
o1
+
0x00
],
%f0
!
MS
1
:
ldd
[%
o1
+
0x08
],
%f2
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
be
,
pn
%
icc
,
U3memcpy_endcruft
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
ldd
[%
o1
+
0x08
],
%f0
!
MS
Group
add
%
o1
,
0x8
,
%
o1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
std
%f8
,
[%
o0
+
0x00
]
!
MS
(
XXX
does
it
stall
here
?
XXX
)
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A0
Group
loopend
:
and
%
o2
,
0x3f
,
%
o2
andcc
%
o2
,
0x38
,
%
g2
be
,
pn
%
XCC
,
endcruft
subcc
%
g2
,
0x8
,
%
g2
be
,
pn
%
XCC
,
endcruft
cmp
%
g1
,
0
be
,
a
,
pt
%
XCC
,
1
f
ldd
[%
o1
+
0x00
],
%f0
1
:
ldd
[%
o1
+
0x08
],
%f2
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f0
,
%f2
,
%f8
std
%f8
,
[%
o0
+
0x00
]
be
,
pn
%
XCC
,
endcruft
add
%
o0
,
0x8
,
%
o0
ldd
[%
o1
+
0x08
],
%f0
add
%
o1
,
0x8
,
%
o1
sub
%
o2
,
0x8
,
%
o2
subcc
%
g2
,
0x8
,
%
g2
faligndata
%f2
,
%f0
,
%f8
std
%f8
,
[%
o0
+
0x00
]
bne
,
pn
%
XCC
,
1
b
add
%
o0
,
0x8
,
%
o0
/
*
If
anything
is
left
,
we
copy
it
one
byte
at
a
time
.
*
Note
that
%
g1
is
(
src
&
0x3
)
saved
above
before
the
*
alignaddr
was
performed
.
*/
U3memcpy_
endcruft
:
endcruft
:
cmp
%
o2
,
0
add
%
o1
,
%
g1
,
%
o1
VISExitHalf
be
,
pn
%
icc
,
U3memcpy_short_ret
nop
ba
,
a
,
pt
%
xcc
,
U3memcpy_short
be
,
pn
%
XCC
,
out
sub
%
o0
,
%
o1
,
%
o3
/
*
If
we
get
here
,
then
32
<=
len
<
(
6
*
64
)
*/
U3memcpy_toosmall
:
andcc
%
g1
,
0x7
,
%
g0
bne
,
pn
%
icc
,
small_copy_unaligned
andcc
%
o2
,
0x8
,
%
g0
be
,
pt
%
icc
,
1
f
nop
ldx
[%
o1
],
%
o5
stx
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x8
,
%
o1
#ifdef SMALL_COPY_USES_FPU
1
:
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduw
[%
o1
],
%
o5
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
/
*
Is
'dst'
already
aligned
on
an
8
-
byte
boundary
?
*/
be
,
pt
%
xcc
,
2
f
!
BR
Group
1
:
andcc
%
o2
,
0x2
,
%
g0
be
,
pt
%
icc
,
1
f
nop
lduh
[%
o1
],
%
o5
sth
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x2
,
%
o1
/
*
Compute
abs
((
dst
&
7
)
-
8
)
into
%
g2
.
This
is
the
number
*
of
bytes
to
copy
to
make
'dst'
8
-
byte
aligned
.
We
pre
-
*
subtract
this
from
'len'
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
sub
%
g0
,
%
g2
,
%
g2
!
A0
Group
(
reg
-
dep
)
sub
%
o2
,
%
g2
,
%
o2
!
A0
Group
(
reg
-
dep
)
1
:
andcc
%
o2
,
0x1
,
%
g0
be
,
pt
%
icc
,
out
nop
ldub
[%
o1
],
%
o5
ba
,
pt
%
xcc
,
out
stb
%
o5
,
[%
o1
+
%
o3
]
medium_copy
:
/
*
16
<
len
<=
64
*/
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
medium_copy_aligned
:
andn
%
o2
,
0x7
,
%
o4
and
%
o2
,
0x7
,
%
o2
1
:
subcc
%
o4
,
0x8
,
%
o4
ldx
[%
o1
],
%
o5
stx
%
o5
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
1
b
add
%
o1
,
0x8
,
%
o1
andcc
%
o2
,
0x4
,
%
g0
be
,
pt
%
XCC
,
1
f
nop
sub
%
o2
,
0x4
,
%
o2
lduw
[%
o1
],
%
o5
stw
%
o5
,
[%
o1
+
%
o3
]
add
%
o1
,
0x4
,
%
o1
1
:
cmp
%
o2
,
0
be
,
pt
%
XCC
,
out
nop
ba
,
pt
%
xcc
,
small_copy_unaligned
nop
/
*
Copy
%
g2
bytes
from
src
to
dst
,
one
byte
at
a
time
.
*/
1
:
ldub
[%
o1
+
0x00
],
%
o3
!
MS
(
Group
)
(%
o3
in
3
cycles
)
add
%
o1
,
0x1
,
%
o1
!
A1
add
%
o0
,
0x1
,
%
o0
!
A0
Group
subcc
%
g2
,
0x1
,
%
g2
!
A1
small_copy
:
/
*
0
<
len
<=
16
*/
andcc
%
o3
,
0x3
,
%
g0
bne
,
pn
%
XCC
,
small_copy_unaligned
sub
%
o0
,
%
o1
,
%
o3
bg
,
pt
%
icc
,
1
b
!
BR
Group
stb
%
o3
,
[%
o0
+
-
1
]
!
MS
Group
small_copy_aligned
:
subcc
%
o2
,
4
,
%
o2
lduw
[%
o1
],
%
g1
stw
%
g1
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
small_copy_aligned
add
%
o1
,
4
,
%
o1
2
:
VISEntryHalf
!
MS
+
MS
out
:
retl
mov
%
g5
,
%
o0
/
*
Compute
(
len
-
(
len
%
8
))
into
%
g2
.
This
is
guaranteed
*
to
be
nonzero
.
*/
andn
%
o2
,
0x7
,
%
g2
!
A0
Group
/
*
You
may
read
this
and
believe
that
it
allows
reading
*
one
8
-
byte
longword
past
the
end
of
src
.
It
actually
*
does
not
,
as
%
g2
is
subtracted
as
loads
are
done
from
*
src
,
so
we
always
stop
before
running
off
the
end
.
*
Also
,
we
are
guaranteed
to
have
at
least
0x10
bytes
*
to
move
here
.
.
align
32
small_copy_unaligned
:
subcc
%
o2
,
1
,
%
o2
ldub
[%
o1
],
%
g1
stb
%
g1
,
[%
o1
+
%
o3
]
bgu
,
pt
%
XCC
,
small_copy_unaligned
add
%
o1
,
1
,
%
o1
retl
mov
%
g5
,
%
o0
/
*
Act
like
copy_
{
to
,
in
}
_user
(),
ie
.
return
zero
instead
*
of
original
destination
pointer
.
This
is
invoked
when
*
copy_
{
to
,
in
}
_user
()
finds
that
%
asi
is
kernel
space
.
*/
sub
%
g2
,
0x8
,
%
g2
!
A0
Group
(
reg
-
dep
)
alignaddr
%
o1
,
%
g0
,
%
g1
!
MS
(
Break
-
after
)
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
(
1
-
cycle
stall
)
add
%
g1
,
0x8
,
%
g1
!
A0
1
:
ldd
[%
g1
+
0x00
],
%f2
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f0
,
%f2
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
be
,
pn
%
icc
,
2
f
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
ldd
[%
g1
+
0x00
],
%f0
!
MS
Group
add
%
g1
,
0x8
,
%
g1
!
A0
sub
%
o2
,
0x8
,
%
o2
!
A1
subcc
%
g2
,
0x8
,
%
g2
!
A0
Group
faligndata
%f2
,
%f0
,
%f8
!
FGA
Group
(
1
-
cycle
stall
)
std
%f8
,
[%
o0
+
0x00
]
!
MS
Group
(
2
-
cycle
stall
)
add
%
o1
,
0x8
,
%
o1
!
A0
bne
,
pn
%
icc
,
1
b
!
BR
add
%
o0
,
0x8
,
%
o0
!
A1
/
*
Nothing
left
to
copy
?
*/
2
:
cmp
%
o2
,
0
!
A0
Group
VISExitHalf
!
A0
+
MS
be
,
pn
%
icc
,
U3memcpy_short_ret
!
BR
Group
nop
!
A0
ba
,
a
,
pt
%
xcc
,
U3memcpy_short
!
BR
Group
#else /* !(SMALL_COPY_USES_FPU) */
xor
%
o1
,
%
o0
,
%
g2
andcc
%
g2
,
0x7
,
%
g0
bne
,
pn
%
icc
,
U3memcpy_short
andcc
%
o1
,
0x7
,
%
g2
be
,
pt
%
xcc
,
2
f
sub
%
g2
,
0x8
,
%
g2
sub
%
g0
,
%
g2
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
1
:
ldub
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x1
,
%
o1
add
%
o0
,
0x1
,
%
o0
subcc
%
g2
,
0x1
,
%
g2
bg
,
pt
%
icc
,
1
b
stb
%
o3
,
[%
o0
+
-
1
]
2
:
andn
%
o2
,
0x7
,
%
g2
sub
%
o2
,
%
g2
,
%
o2
3
:
ldx
[%
o1
+
0x00
],
%
o3
add
%
o1
,
0x8
,
%
o1
add
%
o0
,
0x8
,
%
o0
subcc
%
g2
,
0x8
,
%
g2
bg
,
pt
%
icc
,
3
b
stx
%
o3
,
[%
o0
+
-
8
]
cmp
%
o2
,
0
bne
,
pn
%
icc
,
U3memcpy_short
nop
ba
,
a
,
pt
%
xcc
,
U3memcpy_short_ret
#endif /* !(SMALL_COPY_USES_FPU) */
.
globl
U3memcpy_user_stub
U3memcpy_user_stub
:
save
%
sp
,
-
192
,
%
sp
mov
%
i0
,
%
o0
mov
%
i1
,
%
o1
call
U3memcpy
mov
%
i2
,
%
o2
ret
restore
%
g0
,
%
g0
,
%
o0
arch/sparc64/lib/VIScopy.S
View file @
da09d2f1
...
...
@@ -306,11 +306,7 @@
.
globl
__memcpy_begin
__memcpy_begin
:
.
globl
__memcpy
.
type
__memcpy
,
@
function
memcpy_private
:
__memcpy
:
memcpy
:
mov
ASI_P
,
asi_src
!
IEU0
Group
brnz
,
pt
%
o2
,
__memcpy_entry
!
CTI
mov
ASI_P
,
asi_dest
!
IEU1
...
...
arch/sparc64/lib/splock.S
View file @
da09d2f1
...
...
@@ -6,6 +6,18 @@
.
text
.
align
64
.
globl
_raw_spin_lock
_raw_spin_lock
:
/
*
%
o0
=
lock_ptr
*/
1
:
ldstub
[%
o0
],
%
g7
brnz
,
pn
%
g7
,
2
f
membar
#
StoreLoad
|
#
StoreStore
retl
nop
2
:
ldub
[%
o0
],
%
g7
brnz
,
pt
%
g7
,
2
b
membar
#
LoadLoad
ba
,
a
,
pt
%
xcc
,
1
b
.
globl
_raw_spin_lock_flags
_raw_spin_lock_flags
:
/
*
%
o0
=
lock_ptr
,
%
o1
=
irq_flags
*/
1
:
ldstub
[%
o0
],
%
g7
...
...
drivers/sbus/char/bbc_envctrl.c
View file @
da09d2f1
...
...
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <asm/oplib.h>
#include <asm/ebus.h>
#define __KERNEL_SYSCALLS__
...
...
@@ -622,9 +623,7 @@ void bbc_envctrl_cleanup(void)
read_unlock
(
&
tasklist_lock
);
if
(
!
found
)
break
;
current
->
state
=
TASK_INTERRUPTIBLE
;
schedule_timeout
(
HZ
);
current
->
state
=
TASK_RUNNING
;
msleep
(
1000
);
}
kenvctrld_task
=
NULL
;
}
...
...
drivers/sbus/char/envctrl.c
View file @
da09d2f1
...
...
@@ -1181,8 +1181,7 @@ static void __exit envctrl_cleanup(void)
if
(
!
found
)
break
;
current
->
state
=
TASK_INTERRUPTIBLE
;
schedule_timeout
(
HZ
);
msleep
(
1000
);
}
kenvctrld_task
=
NULL
;
}
...
...
include/asm-sparc/pci.h
View file @
da09d2f1
...
...
@@ -87,12 +87,6 @@ extern dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
extern
void
pci_unmap_page
(
struct
pci_dev
*
hwdev
,
dma_addr_t
dma_address
,
size_t
size
,
int
direction
);
/* map_page and map_single cannot fail */
static
inline
int
pci_dma_mapping_error
(
dma_addr_t
dma_addr
)
{
return
0
;
}
/* Map a set of buffers described by scatterlist in streaming
* mode for DMA. This is the scather-gather version of the
* above pci_map_single interface. Here the scatter gather list
...
...
include/asm-sparc64/page.h
View file @
da09d2f1
...
...
@@ -18,7 +18,7 @@ extern void _clear_page(void *page);
#define clear_page(X) _clear_page((void *)(X))
struct
page
;
extern
void
clear_user_page
(
void
*
addr
,
unsigned
long
vaddr
,
struct
page
*
page
);
#define copy_page(X,Y)
__
memcpy((void *)(X), (void *)(Y), PAGE_SIZE)
#define copy_page(X,Y) memcpy((void *)(X), (void *)(Y), PAGE_SIZE)
extern
void
copy_user_page
(
void
*
to
,
void
*
from
,
unsigned
long
vaddr
,
struct
page
*
topage
);
/* GROSS, defining this makes gcc pass these types as aggregates,
...
...
include/asm-sparc64/spinlock.h
View file @
da09d2f1
...
...
@@ -41,22 +41,8 @@ typedef unsigned char spinlock_t;
do { membar("#LoadLoad"); \
} while(*((volatile unsigned char *)lock))
static
__inline__
void
_raw_spin_lock
(
spinlock_t
*
lock
)
{
__asm__
__volatile__
(
"1: ldstub [%0], %%g7
\n
"
" brnz,pn %%g7, 2f
\n
"
" membar #StoreLoad | #StoreStore
\n
"
" .subsection 2
\n
"
"2: ldub [%0], %%g7
\n
"
" brnz,pt %%g7, 2b
\n
"
" membar #LoadLoad
\n
"
" b,a,pt %%xcc, 1b
\n
"
" .previous
\n
"
:
/* no outputs */
:
"r"
(
lock
)
:
"g7"
,
"memory"
);
}
/* arch/sparc64/lib/spinlock.S */
extern
void
_raw_spin_lock
(
spinlock_t
*
lock
);
static
__inline__
int
_raw_spin_trylock
(
spinlock_t
*
lock
)
{
...
...
include/asm-sparc64/string.h
View file @
da09d2f1
...
...
@@ -16,9 +16,7 @@
#include <asm/asi.h>
extern
void
__memmove
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
__kernel_size_t
__memcpy
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
void
*
__memset
(
void
*
,
int
,
__kernel_size_t
);
extern
void
*
__builtin_memcpy
(
void
*
,
const
void
*
,
__kernel_size_t
);
extern
void
*
__builtin_memset
(
void
*
,
int
,
__kernel_size_t
);
#ifndef EXPORT_SYMTAB_STROPS
...
...
@@ -37,29 +35,7 @@ extern void *__builtin_memset(void *,int,__kernel_size_t);
#define __HAVE_ARCH_MEMCPY
static
inline
void
*
__constant_memcpy
(
void
*
to
,
const
void
*
from
,
__kernel_size_t
n
)
{
if
(
n
)
{
if
(
n
<=
32
)
{
__builtin_memcpy
(
to
,
from
,
n
);
}
else
{
__memcpy
(
to
,
from
,
n
);
}
}
return
to
;
}
static
inline
void
*
__nonconstant_memcpy
(
void
*
to
,
const
void
*
from
,
__kernel_size_t
n
)
{
__memcpy
(
to
,
from
,
n
);
return
to
;
}
#undef memcpy
#define memcpy(t, f, n) \
(__builtin_constant_p(n) ? \
__constant_memcpy((t),(f),(n)) : \
__nonconstant_memcpy((t),(f),(n)))
extern
void
*
memcpy
(
void
*
,
const
void
*
,
__kernel_size_t
);
#define __HAVE_ARCH_MEMSET
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment