Commit 26c5e07d authored by Steven J. Hill's avatar Steven J. Hill Committed by Ralf Baechle

MIPS: microMIPS: Optimise 'memset' core library function.

Optimise 'memset' to use microMIPS instructions and/or optimisations
for binary size reduction. When the microMIPS ISA is not being used,
the library function compiles to the original binary code.
Signed-off-by: default avatarSteven J. Hill <Steven.Hill@imgtec.com>
parent bce86083
...@@ -296,6 +296,7 @@ symbol = value ...@@ -296,6 +296,7 @@ symbol = value
#define LONG_SUBU subu #define LONG_SUBU subu
#define LONG_L lw #define LONG_L lw
#define LONG_S sw #define LONG_S sw
#define LONG_SP swp
#define LONG_SLL sll #define LONG_SLL sll
#define LONG_SLLV sllv #define LONG_SLLV sllv
#define LONG_SRL srl #define LONG_SRL srl
...@@ -318,6 +319,7 @@ symbol = value ...@@ -318,6 +319,7 @@ symbol = value
#define LONG_SUBU dsubu #define LONG_SUBU dsubu
#define LONG_L ld #define LONG_L ld
#define LONG_S sd #define LONG_S sd
#define LONG_SP sdp
#define LONG_SLL dsll #define LONG_SLL dsll
#define LONG_SLLV dsllv #define LONG_SLLV dsllv
#define LONG_SRL dsrl #define LONG_SRL dsrl
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
* *
* Copyright (C) 1998, 1999, 2000 by Ralf Baechle * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc. * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2007 Maciej W. Rozycki * Copyright (C) 2007 by Maciej W. Rozycki
* Copyright (C) 2011, 2012 MIPS Technologies, Inc.
*/ */
#include <asm/asm.h> #include <asm/asm.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
...@@ -19,6 +20,20 @@ ...@@ -19,6 +20,20 @@
#define LONG_S_R sdr #define LONG_S_R sdr
#endif #endif
#ifdef CONFIG_CPU_MICROMIPS
#define STORSIZE (LONGSIZE * 2)
#define STORMASK (STORSIZE - 1)
#define FILL64RG t8
#define FILLPTRG t7
#undef LONG_S
#define LONG_S LONG_SP
#else
#define STORSIZE LONGSIZE
#define STORMASK LONGMASK
#define FILL64RG a1
#define FILLPTRG t0
#endif
#define EX(insn,reg,addr,handler) \ #define EX(insn,reg,addr,handler) \
9: insn reg, addr; \ 9: insn reg, addr; \
.section __ex_table,"a"; \ .section __ex_table,"a"; \
...@@ -26,23 +41,25 @@ ...@@ -26,23 +41,25 @@
.previous .previous
.macro f_fill64 dst, offset, val, fixup .macro f_fill64 dst, offset, val, fixup
EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 0 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 1 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 2 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 3 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup) #if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS))
EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 4 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 5 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 6 * STORSIZE)(\dst), \fixup)
#if LONGSIZE == 4 EX(LONG_S, \val, (\offset + 7 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup) #endif
EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup) #if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4))
EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 8 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 9 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)
#endif #endif
.endm .endm
...@@ -71,16 +88,20 @@ LEAF(memset) ...@@ -71,16 +88,20 @@ LEAF(memset)
1: 1:
FEXPORT(__bzero) FEXPORT(__bzero)
sltiu t0, a2, LONGSIZE /* very small region? */ sltiu t0, a2, STORSIZE /* very small region? */
bnez t0, .Lsmall_memset bnez t0, .Lsmall_memset
andi t0, a0, LONGMASK /* aligned? */ andi t0, a0, STORMASK /* aligned? */
#ifdef CONFIG_CPU_MICROMIPS
move t8, a1 /* used by 'swp' instruction */
move t9, a1
#endif
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f beqz t0, 1f
PTR_SUBU t0, LONGSIZE /* alignment in bytes */ PTR_SUBU t0, STORSIZE /* alignment in bytes */
#else #else
.set noat .set noat
li AT, LONGSIZE li AT, STORSIZE
beqz t0, 1f beqz t0, 1f
PTR_SUBU t0, AT /* alignment in bytes */ PTR_SUBU t0, AT /* alignment in bytes */
.set at .set at
...@@ -99,24 +120,27 @@ FEXPORT(__bzero) ...@@ -99,24 +120,27 @@ FEXPORT(__bzero)
1: ori t1, a2, 0x3f /* # of full blocks */ 1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f xori t1, 0x3f
beqz t1, .Lmemset_partial /* no block to fill */ beqz t1, .Lmemset_partial /* no block to fill */
andi t0, a2, 0x40-LONGSIZE andi t0, a2, 0x40-STORSIZE
PTR_ADDU t1, a0 /* end address */ PTR_ADDU t1, a0 /* end address */
.set reorder .set reorder
1: PTR_ADDIU a0, 64 1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
f_fill64 a0, -64, a1, .Lfwd_fixup f_fill64 a0, -64, FILL64RG, .Lfwd_fixup
bne t1, a0, 1b bne t1, a0, 1b
.set noreorder .set noreorder
.Lmemset_partial: .Lmemset_partial:
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
PTR_LA t1, 2f /* where to start */ PTR_LA t1, 2f /* where to start */
#ifdef CONFIG_CPU_MICROMIPS
LONG_SRL t7, t0, 1
#endif
#if LONGSIZE == 4 #if LONGSIZE == 4
PTR_SUBU t1, t0 PTR_SUBU t1, FILLPTRG
#else #else
.set noat .set noat
LONG_SRL AT, t0, 1 LONG_SRL AT, FILLPTRG, 1
PTR_SUBU t1, AT PTR_SUBU t1, AT
.set at .set at
#endif #endif
...@@ -126,9 +150,9 @@ FEXPORT(__bzero) ...@@ -126,9 +150,9 @@ FEXPORT(__bzero)
.set push .set push
.set noreorder .set noreorder
.set nomacro .set nomacro
f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */ f_fill64 a0, -64, FILL64RG, .Lpartial_fixup /* ... but first do longs ... */
2: .set pop 2: .set pop
andi a2, LONGMASK /* At most one long to go */ andi a2, STORMASK /* At most one long to go */
beqz a2, 1f beqz a2, 1f
PTR_ADDU a0, a2 /* What's left */ PTR_ADDU a0, a2 /* What's left */
...@@ -169,7 +193,7 @@ FEXPORT(__bzero) ...@@ -169,7 +193,7 @@ FEXPORT(__bzero)
.Lpartial_fixup: .Lpartial_fixup:
PTR_L t0, TI_TASK($28) PTR_L t0, TI_TASK($28)
andi a2, LONGMASK andi a2, STORMASK
LONG_L t0, THREAD_BUADDR(t0) LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, t1 LONG_ADDU a2, t1
jr ra jr ra
...@@ -177,4 +201,4 @@ FEXPORT(__bzero) ...@@ -177,4 +201,4 @@ FEXPORT(__bzero)
.Llast_fixup: .Llast_fixup:
jr ra jr ra
andi v1, a2, LONGMASK andi v1, a2, STORMASK
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment