Commit a16036e9 authored by David S. Miller's avatar David S. Miller

Merge branch 'sparc64-optimized-fls'

Vijay Kumar says:

====================
sparc64: Optimize fls and __fls

SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.

v3->v4:
 -  Fixed a typo.
v2->v3:
 -  Using ENTRY(), ENDPROC() for assembler functions.
 -  Removed BITS_PER_LONG from __fls.
 -  Using generic fls64().
 -  Replaced lzcnt instruction with .word directive.
v1->v2:
 - Fixed delay slot issue.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9a08862a 46ad8d2d
......@@ -22,10 +22,11 @@ void set_bit(unsigned long nr, volatile unsigned long *addr);
void clear_bit(unsigned long nr, volatile unsigned long *addr);
void change_bit(unsigned long nr, volatile unsigned long *addr);
int fls(unsigned int word);
int __fls(unsigned long word);
#include <asm-generic/bitops/non-atomic.h>
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#ifdef __KERNEL__
......
......@@ -640,6 +640,8 @@ niagara4_patch:
nop
call niagara4_patch_pageops
nop
call niagara4_patch_fls
nop
ba,a,pt %xcc, 80f
nop
......
......@@ -16,6 +16,9 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
lib-$(CONFIG_SPARC64) += fls.o
lib-$(CONFIG_SPARC64) += fls64.o
obj-$(CONFIG_SPARC64) += NG4fls.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
......
/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
*/
#include <linux/linkage.h>
#define LZCNT_O0_G2 \
.word 0x85b002e8
.text
.register %g2, #scratch
.register %g3, #scratch
ENTRY(NG4fls)
LZCNT_O0_G2 !lzcnt %o0, %g2
mov 64, %g3
retl
sub %g3, %g2, %o0
ENDPROC(NG4fls)
ENTRY(__NG4fls)
brz,pn %o0, 1f
LZCNT_O0_G2 !lzcnt %o0, %g2
mov 63, %g3
sub %g3, %g2, %o0
1:
retl
nop
ENDPROC(__NG4fls)
......@@ -3,6 +3,8 @@
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
*/
#include <linux/linkage.h>
#define BRANCH_ALWAYS 0x10680000
#define NOP 0x01000000
#define NG_DO_PATCH(OLD, NEW) \
......@@ -52,3 +54,10 @@ niagara4_patch_pageops:
retl
nop
.size niagara4_patch_pageops,.-niagara4_patch_pageops
ENTRY(niagara4_patch_fls)
NG_DO_PATCH(fls, NG4fls)
NG_DO_PATCH(__fls, __NG4fls)
retl
nop
ENDPROC(niagara4_patch_fls)
/* fls.S: SPARC default fls definition.
*
* SPARC default fls definition, which follows the same algorithm as
* in generic fls(). This function will be boot time patched on T4
* and onward.
*/
#include <linux/linkage.h>
#include <asm/export.h>
.text
.register %g2, #scratch
.register %g3, #scratch
ENTRY(fls)
brz,pn %o0, 6f
mov 0, %o1
sethi %hi(0xffff0000), %g3
mov %o0, %g2
andcc %o0, %g3, %g0
be,pt %icc, 8f
mov 32, %o1
sethi %hi(0xff000000), %g3
andcc %g2, %g3, %g0
bne,pt %icc, 3f
sethi %hi(0xf0000000), %g3
sll %o0, 8, %o0
1:
add %o1, -8, %o1
sra %o0, 0, %o0
mov %o0, %g2
2:
sethi %hi(0xf0000000), %g3
3:
andcc %g2, %g3, %g0
bne,pt %icc, 4f
sethi %hi(0xc0000000), %g3
sll %o0, 4, %o0
add %o1, -4, %o1
sra %o0, 0, %o0
mov %o0, %g2
4:
andcc %g2, %g3, %g0
be,a,pt %icc, 7f
sll %o0, 2, %o0
5:
xnor %g0, %o0, %o0
srl %o0, 31, %o0
sub %o1, %o0, %o1
6:
jmp %o7 + 8
sra %o1, 0, %o0
7:
add %o1, -2, %o1
ba,pt %xcc, 5b
sra %o0, 0, %o0
8:
sll %o0, 16, %o0
sethi %hi(0xff000000), %g3
sra %o0, 0, %o0
mov %o0, %g2
andcc %g2, %g3, %g0
bne,pt %icc, 2b
mov 16, %o1
ba,pt %xcc, 1b
sll %o0, 8, %o0
ENDPROC(fls)
EXPORT_SYMBOL(fls)
/* fls64.S: SPARC default __fls definition.
*
* SPARC default __fls definition, which follows the same algorithm as
* in generic __fls(). This function will be boot time patched on T4
* and onward.
*/
#include <linux/linkage.h>
#include <asm/export.h>
.text
.register %g2, #scratch
.register %g3, #scratch
ENTRY(__fls)
mov -1, %g2
sllx %g2, 32, %g2
and %o0, %g2, %g2
brnz,pt %g2, 1f
mov 63, %g1
sllx %o0, 32, %o0
mov 31, %g1
1:
mov -1, %g2
sllx %g2, 48, %g2
and %o0, %g2, %g2
brnz,pt %g2, 2f
mov -1, %g2
sllx %o0, 16, %o0
add %g1, -16, %g1
2:
mov -1, %g2
sllx %g2, 56, %g2
and %o0, %g2, %g2
brnz,pt %g2, 3f
mov -1, %g2
sllx %o0, 8, %o0
add %g1, -8, %g1
3:
sllx %g2, 60, %g2
and %o0, %g2, %g2
brnz,pt %g2, 4f
mov -1, %g2
sllx %o0, 4, %o0
add %g1, -4, %g1
4:
sllx %g2, 62, %g2
and %o0, %g2, %g2
brnz,pt %g2, 5f
mov -1, %g3
sllx %o0, 2, %o0
add %g1, -2, %g1
5:
mov 0, %g2
sllx %g3, 63, %g3
and %o0, %g3, %o0
movre %o0, 1, %g2
sub %g1, %g2, %g1
jmp %o7+8
sra %g1, 0, %o0
ENDPROC(__fls)
EXPORT_SYMBOL(__fls)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment