Commit 500c2e1f authored by David Daney's avatar David Daney Committed by Ralf Baechle

MIPS: Optimize spinlocks.

The current locking mechanism uses a ll/sc sequence to release a
spinlock.  This is slower than a wmb() followed by a store to unlock.

The branching forward to .subsection 2 on sc failure slows down the
contended case.  So we get rid of that part too.

Since we are now working on naturally aligned u16 values, we can get
rid of a masking operation as the LHU already does the right thing.
The ANDI are reversed for better scheduling on multi-issue CPUs

On a 12 CPU 750MHz Octeon cn5750 this patch improves ipv4 UDP packet
forwarding rates from 3.58*10^6 PPS to 3.99*10^6 PPS, or about 11%.
Signed-off-by: default avatarDavid Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/937/Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent e275ed5e
...@@ -168,8 +168,14 @@ ...@@ -168,8 +168,14 @@
#ifdef CONFIG_CPU_CAVIUM_OCTEON #ifdef CONFIG_CPU_CAVIUM_OCTEON
#define smp_mb__before_llsc() smp_wmb() #define smp_mb__before_llsc() smp_wmb()
/* Cause previous writes to become visible on all CPUs as soon as possible */
#define nudge_writes() __asm__ __volatile__(".set push\n\t" \
".set arch=octeon\n\t" \
"syncw\n\t" \
".set pop" : : : "memory")
#else #else
#define smp_mb__before_llsc() smp_llsc_mb() #define smp_mb__before_llsc() smp_llsc_mb()
#define nudge_writes() mb()
#endif #endif
#endif /* __ASM_BARRIER_H */ #endif /* __ASM_BARRIER_H */
...@@ -36,9 +36,9 @@ ...@@ -36,9 +36,9 @@
static inline int arch_spin_is_locked(arch_spinlock_t *lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{ {
unsigned int counters = ACCESS_ONCE(lock->lock); u32 counters = ACCESS_ONCE(lock->lock);
return ((counters >> 14) ^ counters) & 0x1fff; return ((counters >> 16) ^ counters) & 0xffff;
} }
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
...@@ -47,9 +47,9 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock) ...@@ -47,9 +47,9 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
static inline int arch_spin_is_contended(arch_spinlock_t *lock) static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{ {
unsigned int counters = ACCESS_ONCE(lock->lock); u32 counters = ACCESS_ONCE(lock->lock);
return (((counters >> 14) - counters) & 0x1fff) > 1; return (((counters >> 16) - counters) & 0xffff) > 1;
} }
#define arch_spin_is_contended arch_spin_is_contended #define arch_spin_is_contended arch_spin_is_contended
...@@ -57,6 +57,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -57,6 +57,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
{ {
int my_ticket; int my_ticket;
int tmp; int tmp;
int inc = 0x10000;
if (R10000_LLSC_WAR) { if (R10000_LLSC_WAR) {
__asm__ __volatile__ ( __asm__ __volatile__ (
...@@ -64,25 +65,24 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -64,25 +65,24 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
" .set noreorder \n" " .set noreorder \n"
" \n" " \n"
"1: ll %[ticket], %[ticket_ptr] \n" "1: ll %[ticket], %[ticket_ptr] \n"
" addiu %[my_ticket], %[ticket], 0x4000 \n" " addu %[my_ticket], %[ticket], %[inc] \n"
" sc %[my_ticket], %[ticket_ptr] \n" " sc %[my_ticket], %[ticket_ptr] \n"
" beqzl %[my_ticket], 1b \n" " beqzl %[my_ticket], 1b \n"
" nop \n" " nop \n"
" srl %[my_ticket], %[ticket], 14 \n" " srl %[my_ticket], %[ticket], 16 \n"
" andi %[my_ticket], %[my_ticket], 0x1fff \n" " andi %[ticket], %[ticket], 0xffff \n"
" andi %[ticket], %[ticket], 0x1fff \n" " andi %[my_ticket], %[my_ticket], 0xffff \n"
" bne %[ticket], %[my_ticket], 4f \n" " bne %[ticket], %[my_ticket], 4f \n"
" subu %[ticket], %[my_ticket], %[ticket] \n" " subu %[ticket], %[my_ticket], %[ticket] \n"
"2: \n" "2: \n"
" .subsection 2 \n" " .subsection 2 \n"
"4: andi %[ticket], %[ticket], 0x1fff \n" "4: andi %[ticket], %[ticket], 0xffff \n"
" sll %[ticket], 5 \n" " sll %[ticket], 5 \n"
" \n" " \n"
"6: bnez %[ticket], 6b \n" "6: bnez %[ticket], 6b \n"
" subu %[ticket], 1 \n" " subu %[ticket], 1 \n"
" \n" " \n"
" lw %[ticket], %[ticket_ptr] \n" " lhu %[ticket], %[serving_now_ptr] \n"
" andi %[ticket], %[ticket], 0x1fff \n"
" beq %[ticket], %[my_ticket], 2b \n" " beq %[ticket], %[my_ticket], 2b \n"
" subu %[ticket], %[my_ticket], %[ticket] \n" " subu %[ticket], %[my_ticket], %[ticket] \n"
" b 4b \n" " b 4b \n"
...@@ -90,36 +90,33 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -90,36 +90,33 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
" .previous \n" " .previous \n"
" .set pop \n" " .set pop \n"
: [ticket_ptr] "+m" (lock->lock), : [ticket_ptr] "+m" (lock->lock),
[serving_now_ptr] "+m" (lock->h.serving_now),
[ticket] "=&r" (tmp), [ticket] "=&r" (tmp),
[my_ticket] "=&r" (my_ticket)); [my_ticket] "=&r" (my_ticket)
: [inc] "r" (inc));
} else { } else {
__asm__ __volatile__ ( __asm__ __volatile__ (
" .set push # arch_spin_lock \n" " .set push # arch_spin_lock \n"
" .set noreorder \n" " .set noreorder \n"
" \n" " \n"
" ll %[ticket], %[ticket_ptr] \n" "1: ll %[ticket], %[ticket_ptr] \n"
"1: addiu %[my_ticket], %[ticket], 0x4000 \n" " addu %[my_ticket], %[ticket], %[inc] \n"
" sc %[my_ticket], %[ticket_ptr] \n" " sc %[my_ticket], %[ticket_ptr] \n"
" beqz %[my_ticket], 3f \n" " beqz %[my_ticket], 1b \n"
" nop \n" " srl %[my_ticket], %[ticket], 16 \n"
" srl %[my_ticket], %[ticket], 14 \n" " andi %[ticket], %[ticket], 0xffff \n"
" andi %[my_ticket], %[my_ticket], 0x1fff \n" " andi %[my_ticket], %[my_ticket], 0xffff \n"
" andi %[ticket], %[ticket], 0x1fff \n"
" bne %[ticket], %[my_ticket], 4f \n" " bne %[ticket], %[my_ticket], 4f \n"
" subu %[ticket], %[my_ticket], %[ticket] \n" " subu %[ticket], %[my_ticket], %[ticket] \n"
"2: \n" "2: \n"
" .subsection 2 \n" " .subsection 2 \n"
"3: b 1b \n"
" ll %[ticket], %[ticket_ptr] \n"
" \n"
"4: andi %[ticket], %[ticket], 0x1fff \n" "4: andi %[ticket], %[ticket], 0x1fff \n"
" sll %[ticket], 5 \n" " sll %[ticket], 5 \n"
" \n" " \n"
"6: bnez %[ticket], 6b \n" "6: bnez %[ticket], 6b \n"
" subu %[ticket], 1 \n" " subu %[ticket], 1 \n"
" \n" " \n"
" lw %[ticket], %[ticket_ptr] \n" " lhu %[ticket], %[serving_now_ptr] \n"
" andi %[ticket], %[ticket], 0x1fff \n"
" beq %[ticket], %[my_ticket], 2b \n" " beq %[ticket], %[my_ticket], 2b \n"
" subu %[ticket], %[my_ticket], %[ticket] \n" " subu %[ticket], %[my_ticket], %[ticket] \n"
" b 4b \n" " b 4b \n"
...@@ -127,8 +124,10 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -127,8 +124,10 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
" .previous \n" " .previous \n"
" .set pop \n" " .set pop \n"
: [ticket_ptr] "+m" (lock->lock), : [ticket_ptr] "+m" (lock->lock),
[serving_now_ptr] "+m" (lock->h.serving_now),
[ticket] "=&r" (tmp), [ticket] "=&r" (tmp),
[my_ticket] "=&r" (my_ticket)); [my_ticket] "=&r" (my_ticket)
: [inc] "r" (inc));
} }
smp_llsc_mb(); smp_llsc_mb();
...@@ -136,47 +135,16 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -136,47 +135,16 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
static inline void arch_spin_unlock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock)
{ {
int tmp; unsigned int serving_now = lock->h.serving_now + 1;
wmb();
smp_mb__before_llsc(); lock->h.serving_now = (u16)serving_now;
nudge_writes();
if (R10000_LLSC_WAR) {
__asm__ __volatile__ (
" # arch_spin_unlock \n"
"1: ll %[ticket], %[ticket_ptr] \n"
" addiu %[ticket], %[ticket], 1 \n"
" ori %[ticket], %[ticket], 0x2000 \n"
" xori %[ticket], %[ticket], 0x2000 \n"
" sc %[ticket], %[ticket_ptr] \n"
" beqzl %[ticket], 1b \n"
: [ticket_ptr] "+m" (lock->lock),
[ticket] "=&r" (tmp));
} else {
__asm__ __volatile__ (
" .set push # arch_spin_unlock \n"
" .set noreorder \n"
" \n"
" ll %[ticket], %[ticket_ptr] \n"
"1: addiu %[ticket], %[ticket], 1 \n"
" ori %[ticket], %[ticket], 0x2000 \n"
" xori %[ticket], %[ticket], 0x2000 \n"
" sc %[ticket], %[ticket_ptr] \n"
" beqz %[ticket], 2f \n"
" nop \n"
" \n"
" .subsection 2 \n"
"2: b 1b \n"
" ll %[ticket], %[ticket_ptr] \n"
" .previous \n"
" .set pop \n"
: [ticket_ptr] "+m" (lock->lock),
[ticket] "=&r" (tmp));
}
} }
static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
{ {
int tmp, tmp2, tmp3; int tmp, tmp2, tmp3;
int inc = 0x10000;
if (R10000_LLSC_WAR) { if (R10000_LLSC_WAR) {
__asm__ __volatile__ ( __asm__ __volatile__ (
...@@ -184,11 +152,11 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) ...@@ -184,11 +152,11 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
" .set noreorder \n" " .set noreorder \n"
" \n" " \n"
"1: ll %[ticket], %[ticket_ptr] \n" "1: ll %[ticket], %[ticket_ptr] \n"
" srl %[my_ticket], %[ticket], 14 \n" " srl %[my_ticket], %[ticket], 16 \n"
" andi %[my_ticket], %[my_ticket], 0x1fff \n" " andi %[my_ticket], %[my_ticket], 0xffff \n"
" andi %[now_serving], %[ticket], 0x1fff \n" " andi %[now_serving], %[ticket], 0xffff \n"
" bne %[my_ticket], %[now_serving], 3f \n" " bne %[my_ticket], %[now_serving], 3f \n"
" addiu %[ticket], %[ticket], 0x4000 \n" " addu %[ticket], %[ticket], %[inc] \n"
" sc %[ticket], %[ticket_ptr] \n" " sc %[ticket], %[ticket_ptr] \n"
" beqzl %[ticket], 1b \n" " beqzl %[ticket], 1b \n"
" li %[ticket], 1 \n" " li %[ticket], 1 \n"
...@@ -201,33 +169,33 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) ...@@ -201,33 +169,33 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
: [ticket_ptr] "+m" (lock->lock), : [ticket_ptr] "+m" (lock->lock),
[ticket] "=&r" (tmp), [ticket] "=&r" (tmp),
[my_ticket] "=&r" (tmp2), [my_ticket] "=&r" (tmp2),
[now_serving] "=&r" (tmp3)); [now_serving] "=&r" (tmp3)
: [inc] "r" (inc));
} else { } else {
__asm__ __volatile__ ( __asm__ __volatile__ (
" .set push # arch_spin_trylock \n" " .set push # arch_spin_trylock \n"
" .set noreorder \n" " .set noreorder \n"
" \n" " \n"
" ll %[ticket], %[ticket_ptr] \n" "1: ll %[ticket], %[ticket_ptr] \n"
"1: srl %[my_ticket], %[ticket], 14 \n" " srl %[my_ticket], %[ticket], 16 \n"
" andi %[my_ticket], %[my_ticket], 0x1fff \n" " andi %[my_ticket], %[my_ticket], 0xffff \n"
" andi %[now_serving], %[ticket], 0x1fff \n" " andi %[now_serving], %[ticket], 0xffff \n"
" bne %[my_ticket], %[now_serving], 3f \n" " bne %[my_ticket], %[now_serving], 3f \n"
" addiu %[ticket], %[ticket], 0x4000 \n" " addu %[ticket], %[ticket], %[inc] \n"
" sc %[ticket], %[ticket_ptr] \n" " sc %[ticket], %[ticket_ptr] \n"
" beqz %[ticket], 4f \n" " beqz %[ticket], 1b \n"
" li %[ticket], 1 \n" " li %[ticket], 1 \n"
"2: \n" "2: \n"
" .subsection 2 \n" " .subsection 2 \n"
"3: b 2b \n" "3: b 2b \n"
" li %[ticket], 0 \n" " li %[ticket], 0 \n"
"4: b 1b \n"
" ll %[ticket], %[ticket_ptr] \n"
" .previous \n" " .previous \n"
" .set pop \n" " .set pop \n"
: [ticket_ptr] "+m" (lock->lock), : [ticket_ptr] "+m" (lock->lock),
[ticket] "=&r" (tmp), [ticket] "=&r" (tmp),
[my_ticket] "=&r" (tmp2), [my_ticket] "=&r" (tmp2),
[now_serving] "=&r" (tmp3)); [now_serving] "=&r" (tmp3)
: [inc] "r" (inc));
} }
smp_llsc_mb(); smp_llsc_mb();
......
...@@ -5,16 +5,28 @@ ...@@ -5,16 +5,28 @@
# error "please don't include this file directly" # error "please don't include this file directly"
#endif #endif
typedef struct { #include <linux/types.h>
#include <asm/byteorder.h>
typedef union {
/* /*
* bits 0..13: serving_now * bits 0..15 : serving_now
* bits 14 : junk data * bits 16..31 : ticket
* bits 15..28: ticket
*/ */
unsigned int lock; u32 lock;
struct {
#ifdef __BIG_ENDIAN
u16 ticket;
u16 serving_now;
#else
u16 serving_now;
u16 ticket;
#endif
} h;
} arch_spinlock_t; } arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } #define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0 }
typedef struct { typedef struct {
volatile unsigned int lock; volatile unsigned int lock;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment