Commit 68dec269 authored by Maciej W. Rozycki's avatar Maciej W. Rozycki Committed by Paul Burton

MIPS: memset: Limit excessive `noreorder' assembly mode use

Rewrite to use the `reorder' assembly mode and remove manually scheduled
delay slots except where GAS cannot schedule a delay-slot instruction
due to a data dependency or a section switch (as is the case with the EX
macro).  No change in machine code produced.
Signed-off-by: default avatarMaciej W. Rozycki <macro@linux-mips.org>
[paul.burton@mips.com:
  Fix conflict with commit 932afdee ("MIPS: Add Kconfig variable for
  CPUs with unaligned load/store instructions")]
Signed-off-by: default avatarPaul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20834/
Cc: Ralf Baechle <ralf@linux-mips.org>
parent 2f7619ae
...@@ -78,7 +78,6 @@ ...@@ -78,7 +78,6 @@
#endif #endif
.endm .endm
.set noreorder
.align 5 .align 5
/* /*
...@@ -94,13 +93,16 @@ ...@@ -94,13 +93,16 @@
.endif .endif
sltiu t0, a2, STORSIZE /* very small region? */ sltiu t0, a2, STORSIZE /* very small region? */
.set noreorder
bnez t0, .Lsmall_memset\@ bnez t0, .Lsmall_memset\@
andi t0, a0, STORMASK /* aligned? */ andi t0, a0, STORMASK /* aligned? */
.set reorder
#ifdef CONFIG_CPU_MICROMIPS #ifdef CONFIG_CPU_MICROMIPS
move t8, a1 /* used by 'swp' instruction */ move t8, a1 /* used by 'swp' instruction */
move t9, a1 move t9, a1
#endif #endif
.set noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f beqz t0, 1f
PTR_SUBU t0, STORSIZE /* alignment in bytes */ PTR_SUBU t0, STORSIZE /* alignment in bytes */
...@@ -111,6 +113,7 @@ ...@@ -111,6 +113,7 @@
PTR_SUBU t0, AT /* alignment in bytes */ PTR_SUBU t0, AT /* alignment in bytes */
.set at .set at
#endif #endif
.set reorder
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
...@@ -125,8 +128,10 @@ ...@@ -125,8 +128,10 @@
#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define STORE_BYTE(N) \ #define STORE_BYTE(N) \
EX(sb, a1, N(a0), .Lbyte_fixup\@); \ EX(sb, a1, N(a0), .Lbyte_fixup\@); \
.set noreorder; \
beqz t0, 0f; \ beqz t0, 0f; \
PTR_ADDU t0, 1; PTR_ADDU t0, 1; \
.set reorder;
PTR_ADDU a2, t0 /* correct size */ PTR_ADDU a2, t0 /* correct size */
PTR_ADDU t0, 1 PTR_ADDU t0, 1
...@@ -148,16 +153,14 @@ ...@@ -148,16 +153,14 @@
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
1: ori t1, a2, 0x3f /* # of full blocks */ 1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f xori t1, 0x3f
beqz t1, .Lmemset_partial\@ /* no block to fill */
andi t0, a2, 0x40-STORSIZE andi t0, a2, 0x40-STORSIZE
beqz t1, .Lmemset_partial\@ /* no block to fill */
PTR_ADDU t1, a0 /* end address */ PTR_ADDU t1, a0 /* end address */
.set reorder
1: PTR_ADDIU a0, 64 1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
bne t1, a0, 1b bne t1, a0, 1b
.set noreorder
.Lmemset_partial\@: .Lmemset_partial\@:
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
...@@ -173,20 +176,18 @@ ...@@ -173,20 +176,18 @@
PTR_SUBU t1, AT PTR_SUBU t1, AT
.set at .set at
#endif #endif
jr t1
PTR_ADDU a0, t0 /* dest ptr */ PTR_ADDU a0, t0 /* dest ptr */
jr t1
.set push
.set noreorder
.set nomacro
/* ... but first do longs ... */ /* ... but first do longs ... */
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
2: .set pop 2: andi a2, STORMASK /* At most one long to go */
andi a2, STORMASK /* At most one long to go */
.set noreorder
beqz a2, 1f beqz a2, 1f
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
PTR_ADDU a0, a2 /* What's left */ PTR_ADDU a0, a2 /* What's left */
.set reorder
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
#ifdef __MIPSEB__ #ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@) EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
...@@ -195,6 +196,7 @@ ...@@ -195,6 +196,7 @@
#endif #endif
#else #else
PTR_SUBU t0, $0, a2 PTR_SUBU t0, $0, a2
.set reorder
move a2, zero /* No remaining longs */ move a2, zero /* No remaining longs */
PTR_ADDIU t0, 1 PTR_ADDIU t0, 1
STORE_BYTE(0) STORE_BYTE(0)
...@@ -210,20 +212,22 @@ ...@@ -210,20 +212,22 @@
#endif #endif
0: 0:
#endif #endif
1: jr ra 1: move a2, zero
move a2, zero jr ra
.Lsmall_memset\@: .Lsmall_memset\@:
beqz a2, 2f
PTR_ADDU t1, a0, a2 PTR_ADDU t1, a0, a2
beqz a2, 2f
1: PTR_ADDIU a0, 1 /* fill bytewise */ 1: PTR_ADDIU a0, 1 /* fill bytewise */
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
.set noreorder
bne t1, a0, 1b bne t1, a0, 1b
EX(sb, a1, -1(a0), .Lsmall_fixup\@) EX(sb, a1, -1(a0), .Lsmall_fixup\@)
.set reorder
2: jr ra /* done */ 2: move a2, zero
move a2, zero jr ra /* done */
.if __memset == 1 .if __memset == 1
END(memset) END(memset)
.set __memset, 0 .set __memset, 0
...@@ -237,14 +241,13 @@ ...@@ -237,14 +241,13 @@
* a2 = a2 - t0 + 1 * a2 = a2 - t0 + 1
*/ */
PTR_SUBU a2, t0 PTR_SUBU a2, t0
jr ra
PTR_ADDIU a2, 1 PTR_ADDIU a2, 1
jr ra
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
.Lfirst_fixup\@: .Lfirst_fixup\@:
/* unset_bytes already in a2 */ /* unset_bytes already in a2 */
jr ra jr ra
nop
.Lfwd_fixup\@: .Lfwd_fixup\@:
/* /*
...@@ -255,8 +258,8 @@ ...@@ -255,8 +258,8 @@
andi a2, 0x3f andi a2, 0x3f
LONG_L t0, THREAD_BUADDR(t0) LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, t1 LONG_ADDU a2, t1
jr ra
LONG_SUBU a2, t0 LONG_SUBU a2, t0
jr ra
.Lpartial_fixup\@: .Lpartial_fixup\@:
/* /*
...@@ -267,24 +270,21 @@ ...@@ -267,24 +270,21 @@
andi a2, STORMASK andi a2, STORMASK
LONG_L t0, THREAD_BUADDR(t0) LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, a0 LONG_ADDU a2, a0
jr ra
LONG_SUBU a2, t0 LONG_SUBU a2, t0
jr ra
.Llast_fixup\@: .Llast_fixup\@:
/* unset_bytes already in a2 */ /* unset_bytes already in a2 */
jr ra jr ra
nop
.Lsmall_fixup\@: .Lsmall_fixup\@:
/* /*
* unset_bytes = end_addr - current_addr + 1 * unset_bytes = end_addr - current_addr + 1
* a2 = t1 - a0 + 1 * a2 = t1 - a0 + 1
*/ */
.set reorder
PTR_SUBU a2, t1, a0 PTR_SUBU a2, t1, a0
PTR_ADDIU a2, 1 PTR_ADDIU a2, 1
jr ra jr ra
.set noreorder
.endm .endm
...@@ -298,8 +298,8 @@ ...@@ -298,8 +298,8 @@
LEAF(memset) LEAF(memset)
EXPORT_SYMBOL(memset) EXPORT_SYMBOL(memset)
beqz a1, 1f
move v0, a0 /* result */ move v0, a0 /* result */
beqz a1, 1f
andi a1, 0xff /* spread fillword */ andi a1, 0xff /* spread fillword */
LONG_SLL t1, a1, 8 LONG_SLL t1, a1, 8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment