Commit 44230282 authored by Heiko Carstens's avatar Heiko Carstens Committed by Martin Schwidefsky

s390/spinlock: optimize spin_unlock code

Use a memory barrier + store sequence instead of a load + compare and swap
sequence to unlock a spinlock and an rw lock.
For the spinlock case this saves us two memory reads and a not needed cpu
serialization after the compare and swap instruction stored the new value.

The kernel size (performance_defconfig) gets reduced by ~14k.

Average execution time of a tight inlined spin_unlock loop drops from
5.8ns to 0.7ns on a zEC12 machine.

An artificial stress test case where several counters are protected with
a single spinlock and which are only incremented while holding the spinlock
shows ~30% improvement on a 4 cpu machine.
Signed-off-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent 3d1e220d
...@@ -15,11 +15,13 @@ ...@@ -15,11 +15,13 @@
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
/* Fast-BCR without checkpoint synchronization */ /* Fast-BCR without checkpoint synchronization */
#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0) #define __ASM_BARRIER "bcr 14,0\n"
#else #else
#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0) #define __ASM_BARRIER "bcr 15,0\n"
#endif #endif
#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
#define rmb() mb() #define rmb() mb()
#define wmb() mb() #define wmb() mb()
#define read_barrier_depends() do { } while(0) #define read_barrier_depends() do { } while(0)
......
...@@ -64,11 +64,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) ...@@ -64,11 +64,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
_raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL));
} }
static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp)
{
return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0);
}
static inline void arch_spin_lock(arch_spinlock_t *lp) static inline void arch_spin_lock(arch_spinlock_t *lp)
{ {
if (!arch_spin_trylock_once(lp)) if (!arch_spin_trylock_once(lp))
...@@ -91,7 +86,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) ...@@ -91,7 +86,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
static inline void arch_spin_unlock(arch_spinlock_t *lp) static inline void arch_spin_unlock(arch_spinlock_t *lp)
{ {
arch_spin_tryrelease_once(lp); typecheck(unsigned int, lp->lock);
asm volatile(
__ASM_BARRIER
"st %1,%0\n"
: "+Q" (lp->lock)
: "d" (0)
: "cc", "memory");
} }
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
...@@ -179,7 +180,13 @@ static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) ...@@ -179,7 +180,13 @@ static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
static inline void arch_write_unlock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw)
{ {
_raw_compare_and_swap(&rw->lock, 0x80000000, 0); typecheck(unsigned int, rw->lock);
asm volatile(
__ASM_BARRIER
"st %1,%0\n"
: "+Q" (rw->lock)
: "d" (0)
: "cc", "memory");
} }
static inline int arch_read_trylock(arch_rwlock_t *rw) static inline int arch_read_trylock(arch_rwlock_t *rw)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment