Commit 0013aceb authored by Max Filippov's avatar Max Filippov

xtensa: clean up fixups in assembly code

Remove duplicate definitions of EX() and similar TRY/CATCH and SRC/DST
macros from assembly sources and put single definition into asm/asmmacro.h
Signed-off-by: default avatarMax Filippov <jcmvbkbc@gmail.com>
parent 2da03d41
......@@ -150,5 +150,12 @@
__endl \ar \as
.endm
/* Load or store instructions that may cause exceptions use the EX macro. */
#define EX(handler) \
.section __ex_table, "a"; \
.word 97f, handler; \
.previous \
97:
#endif /* _XTENSA_ASMMACRO_H */
......@@ -14,6 +14,7 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/asmmacro.h>
#include <asm/processor.h>
#include <asm/coprocessor.h>
#include <asm/thread_info.h>
......@@ -1094,35 +1095,12 @@ ENDPROC(fast_syscall_unrecoverable)
* < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
*
* Note: we don't have to save a2; a2 holds the return value
*
* We use the two macros TRY and CATCH:
*
* TRY adds an entry to the __ex_table fixup table for the immediately
* following instruction.
*
* CATCH catches any exception that occurred at one of the preceding TRY
* statements and continues from there
*
* Usage TRY l32i a0, a1, 0
* <other code>
* done: rfe
* CATCH <set return code>
* j done
*/
.literal_position
#ifdef CONFIG_FAST_SYSCALL_XTENSA
#define TRY \
.section __ex_table, "a"; \
.word 66f, 67f; \
.text; \
66:
#define CATCH \
67:
ENTRY(fast_syscall_xtensa)
s32i a7, a2, PT_AREG7 # we need an additional register
......@@ -1136,9 +1114,9 @@ ENTRY(fast_syscall_xtensa)
.Lswp: /* Atomic compare and swap */
TRY l32i a0, a3, 0 # read old value
EX(.Leac) l32i a0, a3, 0 # read old value
bne a0, a4, 1f # same as old value? jump
TRY s32i a5, a3, 0 # different, modify value
EX(.Leac) s32i a5, a3, 0 # different, modify value
l32i a7, a2, PT_AREG7 # restore a7
l32i a0, a2, PT_AREG0 # restore a0
movi a2, 1 # and return 1
......@@ -1151,12 +1129,12 @@ TRY s32i a5, a3, 0 # different, modify value
.Lnswp: /* Atomic set, add, and exg_add. */
TRY l32i a7, a3, 0 # orig
EX(.Leac) l32i a7, a3, 0 # orig
addi a6, a6, -SYS_XTENSA_ATOMIC_SET
add a0, a4, a7 # + arg
moveqz a0, a4, a6 # set
addi a6, a6, SYS_XTENSA_ATOMIC_SET
TRY s32i a0, a3, 0 # write new value
EX(.Leac) s32i a0, a3, 0 # write new value
mov a0, a2
mov a2, a7
......@@ -1164,7 +1142,6 @@ TRY s32i a0, a3, 0 # write new value
l32i a0, a0, PT_AREG0 # restore a0
rfe
CATCH
.Leac: l32i a7, a2, PT_AREG7 # restore a7
l32i a0, a2, PT_AREG0 # restore a0
movi a2, -EFAULT
......
......@@ -14,9 +14,10 @@
* 2 of the License, or (at your option) any later version.
*/
#include <asm/errno.h>
#include <linux/errno.h>
#include <linux/linkage.h>
#include <variant/core.h>
#include <asm/asmmacro.h>
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
......@@ -175,23 +176,8 @@ ENDPROC(csum_partial)
/*
* Copy from ds while checksumming, otherwise like csum_partial
*
* The macros SRC and DST specify the type of access for the instruction.
* thus we can call a custom exception handler for each access type.
*/
#define SRC(y...) \
9999: y; \
.section __ex_table, "a"; \
.long 9999b, 6001f ; \
.previous
#define DST(y...) \
9999: y; \
.section __ex_table, "a"; \
.long 9999b, 6002f ; \
.previous
/*
unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
int sum, int *src_err_ptr, int *dst_err_ptr)
......@@ -244,28 +230,28 @@ ENTRY(csum_partial_copy_generic)
add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
.Loop5:
#endif
SRC( l32i a9, a2, 0 )
SRC( l32i a8, a2, 4 )
DST( s32i a9, a3, 0 )
DST( s32i a8, a3, 4 )
EX(10f) l32i a9, a2, 0
EX(10f) l32i a8, a2, 4
EX(11f) s32i a9, a3, 0
EX(11f) s32i a8, a3, 4
ONES_ADD(a5, a9)
ONES_ADD(a5, a8)
SRC( l32i a9, a2, 8 )
SRC( l32i a8, a2, 12 )
DST( s32i a9, a3, 8 )
DST( s32i a8, a3, 12 )
EX(10f) l32i a9, a2, 8
EX(10f) l32i a8, a2, 12
EX(11f) s32i a9, a3, 8
EX(11f) s32i a8, a3, 12
ONES_ADD(a5, a9)
ONES_ADD(a5, a8)
SRC( l32i a9, a2, 16 )
SRC( l32i a8, a2, 20 )
DST( s32i a9, a3, 16 )
DST( s32i a8, a3, 20 )
EX(10f) l32i a9, a2, 16
EX(10f) l32i a8, a2, 20
EX(11f) s32i a9, a3, 16
EX(11f) s32i a8, a3, 20
ONES_ADD(a5, a9)
ONES_ADD(a5, a8)
SRC( l32i a9, a2, 24 )
SRC( l32i a8, a2, 28 )
DST( s32i a9, a3, 24 )
DST( s32i a8, a3, 28 )
EX(10f) l32i a9, a2, 24
EX(10f) l32i a8, a2, 28
EX(11f) s32i a9, a3, 24
EX(11f) s32i a8, a3, 28
ONES_ADD(a5, a9)
ONES_ADD(a5, a8)
addi a2, a2, 32
......@@ -284,8 +270,8 @@ DST( s32i a8, a3, 28 )
add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
.Loop6:
#endif
SRC( l32i a9, a2, 0 )
DST( s32i a9, a3, 0 )
EX(10f) l32i a9, a2, 0
EX(11f) s32i a9, a3, 0
ONES_ADD(a5, a9)
addi a2, a2, 4
addi a3, a3, 4
......@@ -315,8 +301,8 @@ DST( s32i a9, a3, 0 )
add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
.Loop7:
#endif
SRC( l16ui a9, a2, 0 )
DST( s16i a9, a3, 0 )
EX(10f) l16ui a9, a2, 0
EX(11f) s16i a9, a3, 0
ONES_ADD(a5, a9)
addi a2, a2, 2
addi a3, a3, 2
......@@ -326,8 +312,8 @@ DST( s16i a9, a3, 0 )
4:
/* This section processes a possible trailing odd byte. */
_bbci.l a4, 0, 8f /* 1-byte chunk */
SRC( l8ui a9, a2, 0 )
DST( s8i a9, a3, 0 )
EX(10f) l8ui a9, a2, 0
EX(11f) s8i a9, a3, 0
#ifdef __XTENSA_EB__
slli a9, a9, 8 /* shift byte to bits 8..15 */
#endif
......@@ -350,10 +336,10 @@ DST( s8i a9, a3, 0 )
add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
.Loop8:
#endif
SRC( l8ui a9, a2, 0 )
SRC( l8ui a8, a2, 1 )
DST( s8i a9, a3, 0 )
DST( s8i a8, a3, 1 )
EX(10f) l8ui a9, a2, 0
EX(10f) l8ui a8, a2, 1
EX(11f) s8i a9, a3, 0
EX(11f) s8i a8, a3, 1
#ifdef __XTENSA_EB__
slli a9, a9, 8 /* combine into a single 16-bit value */
#else /* for checksum computation */
......@@ -381,7 +367,7 @@ ENDPROC(csum_partial_copy_generic)
a12 = original dst for exception handling
*/
6001:
10:
_movi a2, -EFAULT
s32i a2, a6, 0 /* src_err_ptr */
......@@ -403,7 +389,7 @@ ENDPROC(csum_partial_copy_generic)
2:
retw
6002:
11:
movi a2, -EFAULT
s32i a2, a7, 0 /* dst_err_ptr */
movi a2, 0
......
......@@ -12,6 +12,7 @@
*/
#include <variant/core.h>
#include <asm/asmmacro.h>
/*
* void *memset(void *dst, int c, size_t length)
......@@ -28,15 +29,6 @@
* the alignment labels).
*/
/* Load or store instructions that may cause exceptions use the EX macro. */
#define EX(insn,reg1,reg2,offset,handler) \
9: insn reg1, reg2, offset; \
.section __ex_table, "a"; \
.word 9b, handler; \
.previous
.text
.align 4
.global memset
......@@ -73,10 +65,10 @@ memset:
add a6, a6, a5 # a6 = end of last 16B chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop1:
EX(s32i, a3, a5, 0, memset_fixup)
EX(s32i, a3, a5, 4, memset_fixup)
EX(s32i, a3, a5, 8, memset_fixup)
EX(s32i, a3, a5, 12, memset_fixup)
EX(10f) s32i a3, a5, 0
EX(10f) s32i a3, a5, 4
EX(10f) s32i a3, a5, 8
EX(10f) s32i a3, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
blt a5, a6, .Loop1
......@@ -84,23 +76,23 @@ memset:
.Loop1done:
bbci.l a4, 3, .L2
# set 8 bytes
EX(s32i, a3, a5, 0, memset_fixup)
EX(s32i, a3, a5, 4, memset_fixup)
EX(10f) s32i a3, a5, 0
EX(10f) s32i a3, a5, 4
addi a5, a5, 8
.L2:
bbci.l a4, 2, .L3
# set 4 bytes
EX(s32i, a3, a5, 0, memset_fixup)
EX(10f) s32i a3, a5, 0
addi a5, a5, 4
.L3:
bbci.l a4, 1, .L4
# set 2 bytes
EX(s16i, a3, a5, 0, memset_fixup)
EX(10f) s16i a3, a5, 0
addi a5, a5, 2
.L4:
bbci.l a4, 0, .L5
# set 1 byte
EX(s8i, a3, a5, 0, memset_fixup)
EX(10f) s8i a3, a5, 0
.L5:
.Lret1:
retw
......@@ -114,7 +106,7 @@ memset:
bbci.l a5, 0, .L20 # branch if dst alignment half-aligned
# dst is only byte aligned
# set 1 byte
EX(s8i, a3, a5, 0, memset_fixup)
EX(10f) s8i a3, a5, 0
addi a5, a5, 1
addi a4, a4, -1
# now retest if dst aligned
......@@ -122,7 +114,7 @@ memset:
.L20:
# dst half-aligned
# set 2 bytes
EX(s16i, a3, a5, 0, memset_fixup)
EX(10f) s16i a3, a5, 0
addi a5, a5, 2
addi a4, a4, -2
j .L0 # dst is now aligned, return to main algorithm
......@@ -141,7 +133,7 @@ memset:
add a6, a5, a4 # a6 = ending address
#endif /* !XCHAL_HAVE_LOOPS */
.Lbyteloop:
EX(s8i, a3, a5, 0, memset_fixup)
EX(10f) s8i a3, a5, 0
addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS
blt a5, a6, .Lbyteloop
......@@ -155,6 +147,6 @@ memset:
/* We return zero if a failure occurred. */
memset_fixup:
10:
movi a2, 0
retw
......@@ -11,16 +11,9 @@
* Copyright (C) 2002 Tensilica Inc.
*/
#include <variant/core.h>
#include <linux/errno.h>
/* Load or store instructions that may cause exceptions use the EX macro. */
#define EX(insn,reg1,reg2,offset,handler) \
9: insn reg1, reg2, offset; \
.section __ex_table, "a"; \
.word 9b, handler; \
.previous
#include <variant/core.h>
#include <asm/asmmacro.h>
/*
* char *__strncpy_user(char *dst, const char *src, size_t len)
......@@ -75,9 +68,9 @@ __strncpy_user:
j .Ldstunaligned
.Lsrc1mod2: # src address is odd
EX(l8ui, a9, a3, 0, fixup_l) # get byte 0
EX(11f) l8ui a9, a3, 0 # get byte 0
addi a3, a3, 1 # advance src pointer
EX(s8i, a9, a11, 0, fixup_s) # store byte 0
EX(10f) s8i a9, a11, 0 # store byte 0
beqz a9, .Lret # if byte 0 is zero
addi a11, a11, 1 # advance dst pointer
addi a4, a4, -1 # decrement len
......@@ -85,16 +78,16 @@ __strncpy_user:
bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned
.Lsrc2mod4: # src address is 2 mod 4
EX(l8ui, a9, a3, 0, fixup_l) # get byte 0
EX(11f) l8ui a9, a3, 0 # get byte 0
/* 1-cycle interlock */
EX(s8i, a9, a11, 0, fixup_s) # store byte 0
EX(10f) s8i a9, a11, 0 # store byte 0
beqz a9, .Lret # if byte 0 is zero
addi a11, a11, 1 # advance dst pointer
addi a4, a4, -1 # decrement len
beqz a4, .Lret # if len is zero
EX(l8ui, a9, a3, 1, fixup_l) # get byte 0
EX(11f) l8ui a9, a3, 1 # get byte 0
addi a3, a3, 2 # advance src pointer
EX(s8i, a9, a11, 0, fixup_s) # store byte 0
EX(10f) s8i a9, a11, 0 # store byte 0
beqz a9, .Lret # if byte 0 is zero
addi a11, a11, 1 # advance dst pointer
addi a4, a4, -1 # decrement len
......@@ -117,12 +110,12 @@ __strncpy_user:
add a12, a12, a11 # a12 = end of last 4B chunck
#endif
.Loop1:
EX(l32i, a9, a3, 0, fixup_l) # get word from src
EX(11f) l32i a9, a3, 0 # get word from src
addi a3, a3, 4 # advance src pointer
bnone a9, a5, .Lz0 # if byte 0 is zero
bnone a9, a6, .Lz1 # if byte 1 is zero
bnone a9, a7, .Lz2 # if byte 2 is zero
EX(s32i, a9, a11, 0, fixup_s) # store word to dst
EX(10f) s32i a9, a11, 0 # store word to dst
bnone a9, a8, .Lz3 # if byte 3 is zero
addi a11, a11, 4 # advance dst pointer
#if !XCHAL_HAVE_LOOPS
......@@ -132,7 +125,7 @@ __strncpy_user:
.Loop1done:
bbci.l a4, 1, .L100
# copy 2 bytes
EX(l16ui, a9, a3, 0, fixup_l)
EX(11f) l16ui a9, a3, 0
addi a3, a3, 2 # advance src pointer
#ifdef __XTENSA_EB__
bnone a9, a7, .Lz0 # if byte 2 is zero
......@@ -141,13 +134,13 @@ __strncpy_user:
bnone a9, a5, .Lz0 # if byte 0 is zero
bnone a9, a6, .Lz1 # if byte 1 is zero
#endif
EX(s16i, a9, a11, 0, fixup_s)
EX(10f) s16i a9, a11, 0
addi a11, a11, 2 # advance dst pointer
.L100:
bbci.l a4, 0, .Lret
EX(l8ui, a9, a3, 0, fixup_l)
EX(11f) l8ui a9, a3, 0
/* slot */
EX(s8i, a9, a11, 0, fixup_s)
EX(10f) s8i a9, a11, 0
beqz a9, .Lret # if byte is zero
addi a11, a11, 1-3 # advance dst ptr 1, but also cancel
# the effect of adding 3 in .Lz3 code
......@@ -161,14 +154,14 @@ __strncpy_user:
#ifdef __XTENSA_EB__
movi a9, 0
#endif /* __XTENSA_EB__ */
EX(s8i, a9, a11, 0, fixup_s)
EX(10f) s8i a9, a11, 0
sub a2, a11, a2 # compute strlen
retw
.Lz1: # byte 1 is zero
#ifdef __XTENSA_EB__
extui a9, a9, 16, 16
#endif /* __XTENSA_EB__ */
EX(s16i, a9, a11, 0, fixup_s)
EX(10f) s16i a9, a11, 0
addi a11, a11, 1 # advance dst pointer
sub a2, a11, a2 # compute strlen
retw
......@@ -176,9 +169,9 @@ __strncpy_user:
#ifdef __XTENSA_EB__
extui a9, a9, 16, 16
#endif /* __XTENSA_EB__ */
EX(s16i, a9, a11, 0, fixup_s)
EX(10f) s16i a9, a11, 0
movi a9, 0
EX(s8i, a9, a11, 2, fixup_s)
EX(10f) s8i a9, a11, 2
addi a11, a11, 2 # advance dst pointer
sub a2, a11, a2 # compute strlen
retw
......@@ -196,9 +189,9 @@ __strncpy_user:
add a12, a11, a4 # a12 = ending address
#endif /* XCHAL_HAVE_LOOPS */
.Lnextbyte:
EX(l8ui, a9, a3, 0, fixup_l)
EX(11f) l8ui a9, a3, 0
addi a3, a3, 1
EX(s8i, a9, a11, 0, fixup_s)
EX(10f) s8i a9, a11, 0
beqz a9, .Lunalignedend
addi a11, a11, 1
#if !XCHAL_HAVE_LOOPS
......@@ -218,8 +211,7 @@ __strncpy_user:
* implementation in memset(). Thus, we differentiate between
* load/store fixups. */
fixup_s:
fixup_l:
10:
11:
movi a2, -EFAULT
retw
......@@ -12,14 +12,7 @@
*/
#include <variant/core.h>
/* Load or store instructions that may cause exceptions use the EX macro. */
#define EX(insn,reg1,reg2,offset,handler) \
9: insn reg1, reg2, offset; \
.section __ex_table, "a"; \
.word 9b, handler; \
.previous
#include <asm/asmmacro.h>
/*
* size_t __strnlen_user(const char *s, size_t len)
......@@ -77,7 +70,7 @@ __strnlen_user:
add a10, a10, a4 # a10 = end of last 4B chunk
#endif /* XCHAL_HAVE_LOOPS */
.Loop:
EX(l32i, a9, a4, 4, lenfixup) # get next word of string
EX(10f) l32i a9, a4, 4 # get next word of string
addi a4, a4, 4 # advance string pointer
bnone a9, a5, .Lz0 # if byte 0 is zero
bnone a9, a6, .Lz1 # if byte 1 is zero
......@@ -88,7 +81,7 @@ __strnlen_user:
#endif
.Ldone:
EX(l32i, a9, a4, 4, lenfixup) # load 4 bytes for remaining checks
EX(10f) l32i a9, a4, 4 # load 4 bytes for remaining checks
bbci.l a3, 1, .L100
# check two more bytes (bytes 0, 1 of word)
......@@ -125,14 +118,14 @@ __strnlen_user:
retw
.L1mod2: # address is odd
EX(l8ui, a9, a4, 4, lenfixup) # get byte 0
EX(10f) l8ui a9, a4, 4 # get byte 0
addi a4, a4, 1 # advance string pointer
beqz a9, .Lz3 # if byte 0 is zero
bbci.l a4, 1, .Laligned # if string pointer is now word-aligned
.L2mod4: # address is 2 mod 4
addi a4, a4, 2 # advance ptr for aligned access
EX(l32i, a9, a4, 0, lenfixup) # get word with first two bytes of string
EX(10f) l32i a9, a4, 0 # get word with first two bytes of string
bnone a9, a7, .Lz2 # if byte 2 (of word, not string) is zero
bany a9, a8, .Laligned # if byte 3 (of word, not string) is nonzero
# byte 3 is zero
......@@ -142,6 +135,6 @@ __strnlen_user:
.section .fixup, "ax"
.align 4
lenfixup:
10:
movi a2, 0
retw
......@@ -54,6 +54,7 @@
*/
#include <variant/core.h>
#include <asm/asmmacro.h>
#ifdef __XTENSA_EB__
#define ALIGN(R, W0, W1) src R, W0, W1
......@@ -63,15 +64,6 @@
#define SSA8(R) ssa8l R
#endif
/* Load or store instructions that may cause exceptions use the EX macro. */
#define EX(insn,reg1,reg2,offset,handler) \
9: insn reg1, reg2, offset; \
.section __ex_table, "a"; \
.word 9b, handler; \
.previous
.text
.align 4
.global __xtensa_copy_user
......@@ -102,9 +94,9 @@ __xtensa_copy_user:
bltui a4, 7, .Lbytecopy # do short copies byte by byte
# copy 1 byte
EX(l8ui, a6, a3, 0, fixup)
EX(10f) l8ui a6, a3, 0
addi a3, a3, 1
EX(s8i, a6, a5, 0, fixup)
EX(10f) s8i a6, a5, 0
addi a5, a5, 1
addi a4, a4, -1
bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
......@@ -112,11 +104,11 @@ __xtensa_copy_user:
.Ldst2mod4: # dst 16-bit aligned
# copy 2 bytes
bltui a4, 6, .Lbytecopy # do short copies byte by byte
EX(l8ui, a6, a3, 0, fixup)
EX(l8ui, a7, a3, 1, fixup)
EX(10f) l8ui a6, a3, 0
EX(10f) l8ui a7, a3, 1
addi a3, a3, 2
EX(s8i, a6, a5, 0, fixup)
EX(s8i, a7, a5, 1, fixup)
EX(10f) s8i a6, a5, 0
EX(10f) s8i a7, a5, 1
addi a5, a5, 2
addi a4, a4, -2
j .Ldstaligned # dst is now aligned, return to main algorithm
......@@ -135,9 +127,9 @@ __xtensa_copy_user:
add a7, a3, a4 # a7 = end address for source
#endif /* !XCHAL_HAVE_LOOPS */
.Lnextbyte:
EX(l8ui, a6, a3, 0, fixup)
EX(10f) l8ui a6, a3, 0
addi a3, a3, 1
EX(s8i, a6, a5, 0, fixup)
EX(10f) s8i a6, a5, 0
addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS
blt a3, a7, .Lnextbyte
......@@ -161,15 +153,15 @@ __xtensa_copy_user:
add a8, a8, a3 # a8 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop1:
EX(l32i, a6, a3, 0, fixup)
EX(l32i, a7, a3, 4, fixup)
EX(s32i, a6, a5, 0, fixup)
EX(l32i, a6, a3, 8, fixup)
EX(s32i, a7, a5, 4, fixup)
EX(l32i, a7, a3, 12, fixup)
EX(s32i, a6, a5, 8, fixup)
EX(10f) l32i a6, a3, 0
EX(10f) l32i a7, a3, 4
EX(10f) s32i a6, a5, 0
EX(10f) l32i a6, a3, 8
EX(10f) s32i a7, a5, 4
EX(10f) l32i a7, a3, 12
EX(10f) s32i a6, a5, 8
addi a3, a3, 16
EX(s32i, a7, a5, 12, fixup)
EX(10f) s32i a7, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
blt a3, a8, .Loop1
......@@ -177,31 +169,31 @@ __xtensa_copy_user:
.Loop1done:
bbci.l a4, 3, .L2
# copy 8 bytes
EX(l32i, a6, a3, 0, fixup)
EX(l32i, a7, a3, 4, fixup)
EX(10f) l32i a6, a3, 0
EX(10f) l32i a7, a3, 4
addi a3, a3, 8
EX(s32i, a6, a5, 0, fixup)
EX(s32i, a7, a5, 4, fixup)
EX(10f) s32i a6, a5, 0
EX(10f) s32i a7, a5, 4
addi a5, a5, 8
.L2:
bbci.l a4, 2, .L3
# copy 4 bytes
EX(l32i, a6, a3, 0, fixup)
EX(10f) l32i a6, a3, 0
addi a3, a3, 4
EX(s32i, a6, a5, 0, fixup)
EX(10f) s32i a6, a5, 0
addi a5, a5, 4
.L3:
bbci.l a4, 1, .L4
# copy 2 bytes
EX(l16ui, a6, a3, 0, fixup)
EX(10f) l16ui a6, a3, 0
addi a3, a3, 2
EX(s16i, a6, a5, 0, fixup)
EX(10f) s16i a6, a5, 0
addi a5, a5, 2
.L4:
bbci.l a4, 0, .L5
# copy 1 byte
EX(l8ui, a6, a3, 0, fixup)
EX(s8i, a6, a5, 0, fixup)
EX(10f) l8ui a6, a3, 0
EX(10f) s8i a6, a5, 0
.L5:
movi a2, 0 # return success for len bytes copied
retw
......@@ -217,7 +209,7 @@ __xtensa_copy_user:
# copy 16 bytes per iteration for word-aligned dst and unaligned src
and a10, a3, a8 # save unalignment offset for below
sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
EX(l32i, a6, a3, 0, fixup) # load first word
EX(10f) l32i a6, a3, 0 # load first word
#if XCHAL_HAVE_LOOPS
loopnez a7, .Loop2done
#else /* !XCHAL_HAVE_LOOPS */
......@@ -226,19 +218,19 @@ __xtensa_copy_user:
add a12, a12, a3 # a12 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop2:
EX(l32i, a7, a3, 4, fixup)
EX(l32i, a8, a3, 8, fixup)
EX(10f) l32i a7, a3, 4
EX(10f) l32i a8, a3, 8
ALIGN( a6, a6, a7)
EX(s32i, a6, a5, 0, fixup)
EX(l32i, a9, a3, 12, fixup)
EX(10f) s32i a6, a5, 0
EX(10f) l32i a9, a3, 12
ALIGN( a7, a7, a8)
EX(s32i, a7, a5, 4, fixup)
EX(l32i, a6, a3, 16, fixup)
EX(10f) s32i a7, a5, 4
EX(10f) l32i a6, a3, 16
ALIGN( a8, a8, a9)
EX(s32i, a8, a5, 8, fixup)
EX(10f) s32i a8, a5, 8
addi a3, a3, 16
ALIGN( a9, a9, a6)
EX(s32i, a9, a5, 12, fixup)
EX(10f) s32i a9, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
blt a3, a12, .Loop2
......@@ -246,39 +238,39 @@ __xtensa_copy_user:
.Loop2done:
bbci.l a4, 3, .L12
# copy 8 bytes
EX(l32i, a7, a3, 4, fixup)
EX(l32i, a8, a3, 8, fixup)
EX(10f) l32i a7, a3, 4
EX(10f) l32i a8, a3, 8
ALIGN( a6, a6, a7)
EX(s32i, a6, a5, 0, fixup)
EX(10f) s32i a6, a5, 0
addi a3, a3, 8
ALIGN( a7, a7, a8)
EX(s32i, a7, a5, 4, fixup)
EX(10f) s32i a7, a5, 4
addi a5, a5, 8
mov a6, a8
.L12:
bbci.l a4, 2, .L13
# copy 4 bytes
EX(l32i, a7, a3, 4, fixup)
EX(10f) l32i a7, a3, 4
addi a3, a3, 4
ALIGN( a6, a6, a7)
EX(s32i, a6, a5, 0, fixup)
EX(10f) s32i a6, a5, 0
addi a5, a5, 4
mov a6, a7
.L13:
add a3, a3, a10 # readjust a3 with correct misalignment
bbci.l a4, 1, .L14
# copy 2 bytes
EX(l8ui, a6, a3, 0, fixup)
EX(l8ui, a7, a3, 1, fixup)
EX(10f) l8ui a6, a3, 0
EX(10f) l8ui a7, a3, 1
addi a3, a3, 2
EX(s8i, a6, a5, 0, fixup)
EX(s8i, a7, a5, 1, fixup)
EX(10f) s8i a6, a5, 0
EX(10f) s8i a7, a5, 1
addi a5, a5, 2
.L14:
bbci.l a4, 0, .L15
# copy 1 byte
EX(l8ui, a6, a3, 0, fixup)
EX(s8i, a6, a5, 0, fixup)
EX(10f) l8ui a6, a3, 0
EX(10f) s8i a6, a5, 0
.L15:
movi a2, 0 # return success for len bytes copied
retw
......@@ -294,7 +286,7 @@ __xtensa_copy_user:
*/
fixup:
10:
sub a2, a5, a2 /* a2 <-- bytes copied */
sub a2, a11, a2 /* a2 <-- bytes not copied */
retw
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment