Commit 1bf7067c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:
 "The main changes are:

   - 'qspinlock' support, enabled on x86: queued spinlocks - these are
     now the spinlock variant used by x86 as they outperform ticket
     spinlocks in every category.  (Waiman Long)

   - 'pvqspinlock' support on x86: paravirtualized variant of queued
     spinlocks.  (Waiman Long, Peter Zijlstra)

   - 'qrwlock' support, enabled on x86: queued rwlocks.  Similar to
     queued spinlocks, they are now the variant used by x86:

       CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
       CONFIG_QUEUED_SPINLOCKS=y
       CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
       CONFIG_QUEUED_RWLOCKS=y

   - various lockdep fixlets

   - various locking primitives cleanups, further WRITE_ONCE()
     propagation"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  locking/lockdep: Remove hard coded array size dependency
  locking/qrwlock: Don't contend with readers when setting _QW_WAITING
  lockdep: Do not break user-visible string
  locking/arch: Rename set_mb() to smp_store_mb()
  locking/arch: Add WRITE_ONCE() to set_mb()
  rtmutex: Warn if trylock is called from hard/softirq context
  arch: Remove __ARCH_HAVE_CMPXCHG
  locking/rtmutex: Drop usage of __HAVE_ARCH_CMPXCHG
  locking/qrwlock: Rename QUEUE_RWLOCK to QUEUED_RWLOCKS
  locking/pvqspinlock: Rename QUEUED_SPINLOCK to QUEUED_SPINLOCKS
  locking/pvqspinlock: Replace xchg() by the more descriptive set_mb()
  locking/pvqspinlock, x86: Enable PV qspinlock for Xen
  locking/pvqspinlock, x86: Enable PV qspinlock for KVM
  locking/pvqspinlock, x86: Implement the paravirt qspinlock call patching
  locking/pvqspinlock: Implement simple paravirt support for the qspinlock
  locking/qspinlock: Revert to test-and-set on hypervisors
  locking/qspinlock: Use a simple write to grab the lock
  locking/qspinlock: Optimize for smaller NR_CPUS
  locking/qspinlock: Extract out code snippets for the next patch
  locking/qspinlock: Add pending bit
  ...
parents fc934d40 68722101
...@@ -1673,7 +1673,7 @@ CPU from reordering them. ...@@ -1673,7 +1673,7 @@ CPU from reordering them.
There are some more advanced barrier functions: There are some more advanced barrier functions:
(*) set_mb(var, value) (*) smp_store_mb(var, value)
This assigns the value to the variable and then inserts a full memory This assigns the value to the variable and then inserts a full memory
barrier after it, depending on the function. It isn't guaranteed to barrier after it, depending on the function. It isn't guaranteed to
...@@ -1985,7 +1985,7 @@ after it has altered the task state: ...@@ -1985,7 +1985,7 @@ after it has altered the task state:
CPU 1 CPU 1
=============================== ===============================
set_current_state(); set_current_state();
set_mb(); smp_store_mb();
STORE current->state STORE current->state
<general barrier> <general barrier>
LOAD event_indicated LOAD event_indicated
...@@ -2026,7 +2026,7 @@ between the STORE to indicate the event and the STORE to set TASK_RUNNING: ...@@ -2026,7 +2026,7 @@ between the STORE to indicate the event and the STORE to set TASK_RUNNING:
CPU 1 CPU 2 CPU 1 CPU 2
=============================== =============================== =============================== ===============================
set_current_state(); STORE event_indicated set_current_state(); STORE event_indicated
set_mb(); wake_up(); smp_store_mb(); wake_up();
STORE current->state <write barrier> STORE current->state <write barrier>
<general barrier> STORE current->state <general barrier> STORE current->state
LOAD event_indicated LOAD event_indicated
......
...@@ -66,6 +66,4 @@ ...@@ -66,6 +66,4 @@
#undef __ASM__MB #undef __ASM__MB
#undef ____cmpxchg #undef ____cmpxchg
#define __HAVE_ARCH_CMPXCHG 1
#endif /* _ALPHA_CMPXCHG_H */ #endif /* _ALPHA_CMPXCHG_H */
...@@ -81,7 +81,7 @@ do { \ ...@@ -81,7 +81,7 @@ do { \
#define read_barrier_depends() do { } while(0) #define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0)
#define set_mb(var, value) do { var = value; smp_mb(); } while (0) #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_mb__before_atomic() smp_mb() #define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb()
......
...@@ -114,7 +114,7 @@ do { \ ...@@ -114,7 +114,7 @@ do { \
#define read_barrier_depends() do { } while(0) #define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0)
#define set_mb(var, value) do { var = value; smp_mb(); } while (0) #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define nop() asm volatile("nop"); #define nop() asm volatile("nop");
#define smp_mb__before_atomic() smp_mb() #define smp_mb__before_atomic() smp_mb()
......
...@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels( ...@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
if something tries to do an invalid cmpxchg(). */ if something tries to do an invalid cmpxchg(). */
extern void __cmpxchg_called_with_bad_pointer(void); extern void __cmpxchg_called_with_bad_pointer(void);
#define __HAVE_ARCH_CMPXCHG 1
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size) unsigned long new, int size)
{ {
......
...@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, ...@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
* looks just like atomic_cmpxchg on our arch currently with a bunch of * looks just like atomic_cmpxchg on our arch currently with a bunch of
* variable casting. * variable casting.
*/ */
#define __HAVE_ARCH_CMPXCHG 1
#define cmpxchg(ptr, old, new) \ #define cmpxchg(ptr, old, new) \
({ \ ({ \
......
...@@ -77,12 +77,7 @@ do { \ ...@@ -77,12 +77,7 @@ do { \
___p1; \ ___p1; \
}) })
/* #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
* XXX check on this ---I suspect what Linus really wants here is
* acquire vs release semantics but we can't discuss this stuff with
* Linus just yet. Grrr...
*/
#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
/* /*
* The group barrier in front of the rsm & ssm are necessary to ensure * The group barrier in front of the rsm & ssm are necessary to ensure
......
...@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void); ...@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
* indicated by comparing RETURN with OLD. * indicated by comparing RETURN with OLD.
*/ */
#define __HAVE_ARCH_CMPXCHG 1
/* /*
* This function doesn't exist, so you'll get a linker error * This function doesn't exist, so you'll get a linker error
* if something tries to do an invalid cmpxchg(). * if something tries to do an invalid cmpxchg().
......
...@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size) ...@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \ ((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \
sizeof(*(ptr)))) sizeof(*(ptr))))
#define __HAVE_ARCH_CMPXCHG 1
static inline unsigned long static inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new) __cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
{ {
......
...@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *, ...@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
* indicated by comparing RETURN with OLD. * indicated by comparing RETURN with OLD.
*/ */
#ifdef CONFIG_RMW_INSNS #ifdef CONFIG_RMW_INSNS
#define __HAVE_ARCH_CMPXCHG 1
static inline unsigned long __cmpxchg(volatile void *p, unsigned long old, static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
unsigned long new, int size) unsigned long new, int size)
......
...@@ -84,7 +84,7 @@ static inline void fence(void) ...@@ -84,7 +84,7 @@ static inline void fence(void)
#define read_barrier_depends() do { } while (0) #define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; smp_mb(); } while (0) #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_store_release(p, v) \ #define smp_store_release(p, v) \
do { \ do { \
......
...@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, ...@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
return old; return old;
} }
#define __HAVE_ARCH_CMPXCHG 1
#define cmpxchg(ptr, o, n) \ #define cmpxchg(ptr, o, n) \
({ \ ({ \
__typeof__(*(ptr)) _o_ = (o); \ __typeof__(*(ptr)) _o_ = (o); \
......
...@@ -112,8 +112,8 @@ ...@@ -112,8 +112,8 @@
#define __WEAK_LLSC_MB " \n" #define __WEAK_LLSC_MB " \n"
#endif #endif
#define set_mb(var, value) \ #define smp_store_mb(var, value) \
do { var = value; smp_mb(); } while (0) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
......
...@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz ...@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \ __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
}) })
#define __HAVE_ARCH_CMPXCHG 1
#define __cmpxchg_asm(ld, st, m, old, new) \ #define __cmpxchg_asm(ld, st, m, old, new) \
({ \ ({ \
__typeof(*(m)) __ret; \ __typeof(*(m)) __ret; \
......
...@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size) ...@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
#define xchg(ptr, x) \ #define xchg(ptr, x) \
((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
#define __HAVE_ARCH_CMPXCHG 1
/* bug catcher for when unsupported size is used - won't link */ /* bug catcher for when unsupported size is used - won't link */
extern void __cmpxchg_called_with_bad_pointer(void); extern void __cmpxchg_called_with_bad_pointer(void);
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory")
#define set_mb(var, value) do { var = value; mb(); } while (0) #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#ifdef __SUBARCH_HAS_LWSYNC #ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC # define SMPWMB LWSYNC
......
...@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) ...@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
* Compare and exchange - if *p == old, set it to new, * Compare and exchange - if *p == old, set it to new,
* and return the old value of *p. * and return the old value of *p.
*/ */
#define __HAVE_ARCH_CMPXCHG 1
static __always_inline unsigned long static __always_inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb() #define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb()
#define set_mb(var, value) do { var = value; mb(); } while (0) #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_release(p, v) \ #define smp_store_release(p, v) \
do { \ do { \
......
...@@ -32,8 +32,6 @@ ...@@ -32,8 +32,6 @@
__old; \ __old; \
}) })
#define __HAVE_ARCH_CMPXCHG
#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ #define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
({ \ ({ \
register __typeof__(*(p1)) __old1 asm("2") = (o1); \ register __typeof__(*(p1)) __old1 asm("2") = (o1); \
......
...@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m, ...@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
(unsigned long)(o), \ (unsigned long)(o), \
(unsigned long)(n))) (unsigned long)(n)))
#define __HAVE_ARCH_CMPXCHG 1
#include <asm-generic/cmpxchg-local.h> #include <asm-generic/cmpxchg-local.h>
#endif /* _ASM_SCORE_CMPXCHG_H */ #endif /* _ASM_SCORE_CMPXCHG_H */
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop") #define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
#endif #endif
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#include <asm-generic/barrier.h> #include <asm-generic/barrier.h>
......
...@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void); ...@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
* if something tries to do an invalid cmpxchg(). */ * if something tries to do an invalid cmpxchg(). */
extern void __cmpxchg_called_with_bad_pointer(void); extern void __cmpxchg_called_with_bad_pointer(void);
#define __HAVE_ARCH_CMPXCHG 1
static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old, static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
unsigned long new, int size) unsigned long new, int size)
{ {
......
...@@ -40,8 +40,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ ...@@ -40,8 +40,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
#define dma_rmb() rmb() #define dma_rmb() rmb()
#define dma_wmb() wmb() #define dma_wmb() wmb()
#define set_mb(__var, __value) \ #define smp_store_mb(__var, __value) \
do { __var = __value; membar_safe("#StoreLoad"); } while(0) do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define smp_mb() mb() #define smp_mb() mb()
......
...@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int ...@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
* *
* Cribbed from <asm-parisc/atomic.h> * Cribbed from <asm-parisc/atomic.h>
*/ */
#define __HAVE_ARCH_CMPXCHG 1
/* bug catcher for when unsupported size is used - won't link */ /* bug catcher for when unsupported size is used - won't link */
void __cmpxchg_called_with_bad_pointer(void); void __cmpxchg_called_with_bad_pointer(void);
......
...@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, ...@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
#include <asm-generic/cmpxchg-local.h> #include <asm-generic/cmpxchg-local.h>
#define __HAVE_ARCH_CMPXCHG 1
static inline unsigned long static inline unsigned long
__cmpxchg_u32(volatile int *m, int old, int new) __cmpxchg_u32(volatile int *m, int old, int new)
{ {
......
...@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u) ...@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
/* Define this to indicate that cmpxchg is an efficient operation. */
#define __HAVE_ARCH_CMPXCHG
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif /* _ASM_TILE_ATOMIC_64_H */ #endif /* _ASM_TILE_ATOMIC_64_H */
...@@ -127,7 +127,8 @@ config X86 ...@@ -127,7 +127,8 @@ config X86
select MODULES_USE_ELF_RELA if X86_64 select MODULES_USE_ELF_RELA if X86_64
select CLONE_BACKWARDS if X86_32 select CLONE_BACKWARDS if X86_32
select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_QUEUE_RWLOCK select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_USE_QUEUED_RWLOCKS
select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
select OLD_SIGACTION if X86_32 select OLD_SIGACTION if X86_32
select COMPAT_OLD_SIGACTION if IA32_EMULATION select COMPAT_OLD_SIGACTION if IA32_EMULATION
...@@ -666,7 +667,7 @@ config PARAVIRT_DEBUG ...@@ -666,7 +667,7 @@ config PARAVIRT_DEBUG
config PARAVIRT_SPINLOCKS config PARAVIRT_SPINLOCKS
bool "Paravirtualization layer for spinlocks" bool "Paravirtualization layer for spinlocks"
depends on PARAVIRT && SMP depends on PARAVIRT && SMP
select UNINLINE_SPIN_UNLOCK select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
---help--- ---help---
Paravirtualized spinlocks allow a pvops backend to replace the Paravirtualized spinlocks allow a pvops backend to replace the
spinlock implementation with something virtualization-friendly spinlock implementation with something virtualization-friendly
......
...@@ -35,12 +35,12 @@ ...@@ -35,12 +35,12 @@
#define smp_mb() mb() #define smp_mb() mb()
#define smp_rmb() dma_rmb() #define smp_rmb() dma_rmb()
#define smp_wmb() barrier() #define smp_wmb() barrier()
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* !SMP */ #else /* !SMP */
#define smp_mb() barrier() #define smp_mb() barrier()
#define smp_rmb() barrier() #define smp_rmb() barrier()
#define smp_wmb() barrier() #define smp_wmb() barrier()
#define set_mb(var, value) do { var = value; barrier(); } while (0) #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#endif /* SMP */ #endif /* SMP */
#define read_barrier_depends() do { } while (0) #define read_barrier_depends() do { } while (0)
......
...@@ -4,8 +4,6 @@ ...@@ -4,8 +4,6 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */ #include <asm/alternative.h> /* Provides LOCK_PREFIX */
#define __HAVE_ARCH_CMPXCHG 1
/* /*
* Non-existant functions to indicate usage errors at link time * Non-existant functions to indicate usage errors at link time
* (or compile-time if the compiler implements __compiletime_error(). * (or compile-time if the compiler implements __compiletime_error().
......
...@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, ...@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#ifdef CONFIG_QUEUED_SPINLOCKS
static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
u32 val)
{
PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
}
static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
{
PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
}
static __always_inline void pv_wait(u8 *ptr, u8 val)
{
PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
}
static __always_inline void pv_kick(int cpu)
{
PVOP_VCALL1(pv_lock_ops.kick, cpu);
}
#else /* !CONFIG_QUEUED_SPINLOCKS */
static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
__ticket_t ticket) __ticket_t ticket)
{ {
...@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, ...@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
} }
#endif #endif /* CONFIG_QUEUED_SPINLOCKS */
#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" #define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
......
...@@ -333,9 +333,19 @@ struct arch_spinlock; ...@@ -333,9 +333,19 @@ struct arch_spinlock;
typedef u16 __ticket_t; typedef u16 __ticket_t;
#endif #endif
struct qspinlock;
struct pv_lock_ops { struct pv_lock_ops {
#ifdef CONFIG_QUEUED_SPINLOCKS
void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
struct paravirt_callee_save queued_spin_unlock;
void (*wait)(u8 *ptr, u8 val);
void (*kick)(int cpu);
#else /* !CONFIG_QUEUED_SPINLOCKS */
struct paravirt_callee_save lock_spinning; struct paravirt_callee_save lock_spinning;
void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
#endif /* !CONFIG_QUEUED_SPINLOCKS */
}; };
/* This contains all the paravirt structures: we get a convenient /* This contains all the paravirt structures: we get a convenient
......
#ifndef _ASM_X86_QSPINLOCK_H
#define _ASM_X86_QSPINLOCK_H
#include <asm/cpufeature.h>
#include <asm-generic/qspinlock_types.h>
#include <asm/paravirt.h>
#define queued_spin_unlock queued_spin_unlock
/**
* queued_spin_unlock - release a queued spinlock
* @lock : Pointer to queued spinlock structure
*
* A smp_store_release() on the least-significant byte.
*/
static inline void native_queued_spin_unlock(struct qspinlock *lock)
{
smp_store_release((u8 *)lock, 0);
}
#ifdef CONFIG_PARAVIRT_SPINLOCKS
extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __pv_init_lock_hash(void);
extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
pv_queued_spin_lock_slowpath(lock, val);
}
static inline void queued_spin_unlock(struct qspinlock *lock)
{
pv_queued_spin_unlock(lock);
}
#else
static inline void queued_spin_unlock(struct qspinlock *lock)
{
native_queued_spin_unlock(lock);
}
#endif
#define virt_queued_spin_lock virt_queued_spin_lock
static inline bool virt_queued_spin_lock(struct qspinlock *lock)
{
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
return false;
while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
cpu_relax();
return true;
}
#include <asm-generic/qspinlock.h>
#endif /* _ASM_X86_QSPINLOCK_H */
#ifndef __ASM_QSPINLOCK_PARAVIRT_H
#define __ASM_QSPINLOCK_PARAVIRT_H
PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
#endif
...@@ -42,6 +42,10 @@ ...@@ -42,6 +42,10 @@
extern struct static_key paravirt_ticketlocks_enabled; extern struct static_key paravirt_ticketlocks_enabled;
static __always_inline bool static_key_false(struct static_key *key); static __always_inline bool static_key_false(struct static_key *key);
#ifdef CONFIG_QUEUED_SPINLOCKS
#include <asm/qspinlock.h>
#else
#ifdef CONFIG_PARAVIRT_SPINLOCKS #ifdef CONFIG_PARAVIRT_SPINLOCKS
static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
...@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) ...@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
cpu_relax(); cpu_relax();
} }
} }
#endif /* CONFIG_QUEUED_SPINLOCKS */
/* /*
* Read-write spinlocks, allowing multiple readers * Read-write spinlocks, allowing multiple readers
......
...@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t; ...@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
#define TICKET_SHIFT (sizeof(__ticket_t) * 8) #define TICKET_SHIFT (sizeof(__ticket_t) * 8)
#ifdef CONFIG_QUEUED_SPINLOCKS
#include <asm-generic/qspinlock_types.h>
#else
typedef struct arch_spinlock { typedef struct arch_spinlock {
union { union {
__ticketpair_t head_tail; __ticketpair_t head_tail;
...@@ -33,6 +36,7 @@ typedef struct arch_spinlock { ...@@ -33,6 +36,7 @@ typedef struct arch_spinlock {
} arch_spinlock_t; } arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
#endif /* CONFIG_QUEUED_SPINLOCKS */
#include <asm-generic/qrwlock_types.h> #include <asm-generic/qrwlock_types.h>
......
...@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu) ...@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
} }
#ifdef CONFIG_QUEUED_SPINLOCKS
#include <asm/qspinlock.h>
static void kvm_wait(u8 *ptr, u8 val)
{
unsigned long flags;
if (in_nmi())
return;
local_irq_save(flags);
if (READ_ONCE(*ptr) != val)
goto out;
/*
* halt until it's our turn and kicked. Note that we do safe halt
* for irq enabled case to avoid hang when lock info is overwritten
* in irq spinlock slowpath and no spurious interrupt occur to save us.
*/
if (arch_irqs_disabled_flags(flags))
halt();
else
safe_halt();
out:
local_irq_restore(flags);
}
#else /* !CONFIG_QUEUED_SPINLOCKS */
enum kvm_contention_stat { enum kvm_contention_stat {
TAKEN_SLOW, TAKEN_SLOW,
TAKEN_SLOW_PICKUP, TAKEN_SLOW_PICKUP,
...@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) ...@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
} }
} }
#endif /* !CONFIG_QUEUED_SPINLOCKS */
/* /*
* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
*/ */
...@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void) ...@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return; return;
#ifdef CONFIG_QUEUED_SPINLOCKS
__pv_init_lock_hash();
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
pv_lock_ops.wait = kvm_wait;
pv_lock_ops.kick = kvm_kick_cpu;
#else /* !CONFIG_QUEUED_SPINLOCKS */
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
pv_lock_ops.unlock_kick = kvm_unlock_kick; pv_lock_ops.unlock_kick = kvm_unlock_kick;
#endif
} }
static __init int kvm_spinlock_init_jump(void) static __init int kvm_spinlock_init_jump(void)
......
...@@ -8,11 +8,33 @@ ...@@ -8,11 +8,33 @@
#include <asm/paravirt.h> #include <asm/paravirt.h>
#ifdef CONFIG_QUEUED_SPINLOCKS
__visible void __native_queued_spin_unlock(struct qspinlock *lock)
{
native_queued_spin_unlock(lock);
}
PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
bool pv_is_native_spin_unlock(void)
{
return pv_lock_ops.queued_spin_unlock.func ==
__raw_callee_save___native_queued_spin_unlock;
}
#endif
struct pv_lock_ops pv_lock_ops = { struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#ifdef CONFIG_QUEUED_SPINLOCKS
.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
.wait = paravirt_nop,
.kick = paravirt_nop,
#else /* !CONFIG_QUEUED_SPINLOCKS */
.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
.unlock_kick = paravirt_nop, .unlock_kick = paravirt_nop,
#endif #endif /* !CONFIG_QUEUED_SPINLOCKS */
#endif /* SMP */
}; };
EXPORT_SYMBOL(pv_lock_ops); EXPORT_SYMBOL(pv_lock_ops);
......
...@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); ...@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{ {
/* arg in %eax, return in %eax */ /* arg in %eax, return in %eax */
...@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) ...@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
return 0; return 0;
} }
extern bool pv_is_native_spin_unlock(void);
unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len) unsigned long addr, unsigned len)
{ {
...@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, ...@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_mmu_ops, write_cr3);
PATCH_SITE(pv_cpu_ops, clts); PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_cpu_ops, read_tsc); PATCH_SITE(pv_cpu_ops, read_tsc);
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
patch_site: case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
ret = paravirt_patch_insns(ibuf, len, start, end); if (pv_is_native_spin_unlock()) {
break; start = start_pv_lock_ops_queued_spin_unlock;
end = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
#endif
default: default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break; break;
patch_site:
ret = paravirt_patch_insns(ibuf, len, start, end);
break;
} }
#undef PATCH_SITE #undef PATCH_SITE
return ret; return ret;
......
...@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); ...@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov32, "mov %edi, %eax");
DEF_NATIVE(, mov64, "mov %rdi, %rax"); DEF_NATIVE(, mov64, "mov %rdi, %rax");
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{ {
return paravirt_patch_insns(insnbuf, len, return paravirt_patch_insns(insnbuf, len,
...@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) ...@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
start__mov64, end__mov64); start__mov64, end__mov64);
} }
extern bool pv_is_native_spin_unlock(void);
unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len) unsigned long addr, unsigned len)
{ {
...@@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, ...@@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_cpu_ops, clts); PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd); PATCH_SITE(pv_cpu_ops, wbinvd);
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
patch_site: case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
ret = paravirt_patch_insns(ibuf, len, start, end); if (pv_is_native_spin_unlock()) {
break; start = start_pv_lock_ops_queued_spin_unlock;
end = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
#endif
default: default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break; break;
patch_site:
ret = paravirt_patch_insns(ibuf, len, start, end);
break;
} }
#undef PATCH_SITE #undef PATCH_SITE
return ret; return ret;
......
...@@ -39,7 +39,8 @@ ...@@ -39,7 +39,8 @@
#define smp_mb() barrier() #define smp_mb() barrier()
#define smp_rmb() barrier() #define smp_rmb() barrier()
#define smp_wmb() barrier() #define smp_wmb() barrier()
#define set_mb(var, value) do { var = value; barrier(); } while (0)
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#define read_barrier_depends() do { } while (0) #define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0)
......
...@@ -17,6 +17,56 @@ ...@@ -17,6 +17,56 @@
#include "xen-ops.h" #include "xen-ops.h"
#include "debugfs.h" #include "debugfs.h"
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(char *, irq_name);
static bool xen_pvspin = true;
#ifdef CONFIG_QUEUED_SPINLOCKS
#include <asm/qspinlock.h>
static void xen_qlock_kick(int cpu)
{
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
}
/*
* Halt the current CPU & release it back to the host
*/
static void xen_qlock_wait(u8 *byte, u8 val)
{
int irq = __this_cpu_read(lock_kicker_irq);
/* If kicker interrupts not initialized yet, just spin */
if (irq == -1)
return;
/* clear pending */
xen_clear_irq_pending(irq);
barrier();
/*
* We check the byte value after clearing pending IRQ to make sure
* that we won't miss a wakeup event because of the clearing.
*
* The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
* So it is effectively a memory barrier for x86.
*/
if (READ_ONCE(*byte) != val)
return;
/*
* If an interrupt happens here, it will leave the wakeup irq
* pending, which will cause xen_poll_irq() to return
* immediately.
*/
/* Block until irq becomes pending (or perhaps a spurious wakeup) */
xen_poll_irq(irq);
}
#else /* CONFIG_QUEUED_SPINLOCKS */
enum xen_contention_stat { enum xen_contention_stat {
TAKEN_SLOW, TAKEN_SLOW,
TAKEN_SLOW_PICKUP, TAKEN_SLOW_PICKUP,
...@@ -100,12 +150,9 @@ struct xen_lock_waiting { ...@@ -100,12 +150,9 @@ struct xen_lock_waiting {
__ticket_t want; __ticket_t want;
}; };
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(char *, irq_name);
static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
static cpumask_t waiting_cpus; static cpumask_t waiting_cpus;
static bool xen_pvspin = true;
__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
{ {
int irq = __this_cpu_read(lock_kicker_irq); int irq = __this_cpu_read(lock_kicker_irq);
...@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) ...@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
} }
} }
} }
#endif /* CONFIG_QUEUED_SPINLOCKS */
static irqreturn_t dummy_handler(int irq, void *dev_id) static irqreturn_t dummy_handler(int irq, void *dev_id)
{ {
...@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void) ...@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
return; return;
} }
printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
#ifdef CONFIG_QUEUED_SPINLOCKS
__pv_init_lock_hash();
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
pv_lock_ops.wait = xen_qlock_wait;
pv_lock_ops.kick = xen_qlock_kick;
#else
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick; pv_lock_ops.unlock_kick = xen_unlock_kick;
#endif
} }
/* /*
...@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg) ...@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
} }
early_param("xen_nopvspin", xen_parse_nopvspin); early_param("xen_nopvspin", xen_parse_nopvspin);
#ifdef CONFIG_XEN_DEBUG_FS #if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
static struct dentry *d_spin_debug; static struct dentry *d_spin_debug;
......
...@@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) ...@@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
* doesn't imply write barrier and the users expect write * doesn't imply write barrier and the users expect write
* barrier semantics on wakeup functions. The following * barrier semantics on wakeup functions. The following
* smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
* and is paired with set_mb() in poll_schedule_timeout. * and is paired with smp_store_mb() in poll_schedule_timeout.
*/ */
smp_wmb(); smp_wmb();
pwq->triggered = 1; pwq->triggered = 1;
...@@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ...@@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
/* /*
* Prepare for the next iteration. * Prepare for the next iteration.
* *
* The following set_mb() serves two purposes. First, it's * The following smp_store_mb() serves two purposes. First, it's
* the counterpart rmb of the wmb in pollwake() such that data * the counterpart rmb of the wmb in pollwake() such that data
* written before wake up is always visible after wake up. * written before wake up is always visible after wake up.
* Second, the full barrier guarantees that triggered clearing * Second, the full barrier guarantees that triggered clearing
...@@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ...@@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
* this problem doesn't exist for the first iteration as * this problem doesn't exist for the first iteration as
* add_wait_queue() has full barrier semantics. * add_wait_queue() has full barrier semantics.
*/ */
set_mb(pwq->triggered, 0); smp_store_mb(pwq->triggered, 0);
return rc; return rc;
} }
......
...@@ -66,8 +66,8 @@ ...@@ -66,8 +66,8 @@
#define smp_read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0)
#endif #endif
#ifndef set_mb #ifndef smp_store_mb
#define set_mb(var, value) do { (var) = (value); mb(); } while (0) #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#endif #endif
#ifndef smp_mb__before_atomic #ifndef smp_mb__before_atomic
......
...@@ -86,9 +86,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size) ...@@ -86,9 +86,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
/* /*
* Atomic compare and exchange. * Atomic compare and exchange.
*
* Do not define __HAVE_ARCH_CMPXCHG because we want to use it to check whether
* a cmpxchg primitive faster than repeated local irq save/restore exists.
*/ */
#include <asm-generic/cmpxchg-local.h> #include <asm-generic/cmpxchg-local.h>
......
/*
* Queued spinlock
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
*
* Authors: Waiman Long <waiman.long@hp.com>
*/
#ifndef __ASM_GENERIC_QSPINLOCK_H
#define __ASM_GENERIC_QSPINLOCK_H
#include <asm-generic/qspinlock_types.h>
/**
* queued_spin_is_locked - is the spinlock locked?
* @lock: Pointer to queued spinlock structure
* Return: 1 if it is locked, 0 otherwise
*/
static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
{
return atomic_read(&lock->val);
}
/**
* queued_spin_value_unlocked - is the spinlock structure unlocked?
* @lock: queued spinlock structure
* Return: 1 if it is unlocked, 0 otherwise
*
* N.B. Whenever there are tasks waiting for the lock, it is considered
* locked wrt the lockref code to avoid lock stealing by the lockref
* code and change things underneath the lock. This also allows some
* optimizations to be applied without conflict with lockref.
*/
static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
{
return !atomic_read(&lock.val);
}
/**
* queued_spin_is_contended - check if the lock is contended
* @lock : Pointer to queued spinlock structure
* Return: 1 if lock contended, 0 otherwise
*/
static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
{
return atomic_read(&lock->val) & ~_Q_LOCKED_MASK;
}
/**
* queued_spin_trylock - try to acquire the queued spinlock
* @lock : Pointer to queued spinlock structure
* Return: 1 if lock acquired, 0 if failed
*/
static __always_inline int queued_spin_trylock(struct qspinlock *lock)
{
if (!atomic_read(&lock->val) &&
(atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
return 1;
return 0;
}
extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
/**
* queued_spin_lock - acquire a queued spinlock
* @lock: Pointer to queued spinlock structure
*/
static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
u32 val;
val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
if (likely(val == 0))
return;
queued_spin_lock_slowpath(lock, val);
}
#ifndef queued_spin_unlock
/**
* queued_spin_unlock - release a queued spinlock
* @lock : Pointer to queued spinlock structure
*/
static __always_inline void queued_spin_unlock(struct qspinlock *lock)
{
/*
* smp_mb__before_atomic() in order to guarantee release semantics
*/
smp_mb__before_atomic_dec();
atomic_sub(_Q_LOCKED_VAL, &lock->val);
}
#endif
/**
* queued_spin_unlock_wait - wait until current lock holder releases the lock
* @lock : Pointer to queued spinlock structure
*
* There is a very slight possibility of live-lock if the lockers keep coming
* and the waiter is just unfortunate enough to not see any unlock state.
*/
static inline void queued_spin_unlock_wait(struct qspinlock *lock)
{
while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
cpu_relax();
}
#ifndef virt_queued_spin_lock
static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock)
{
return false;
}
#endif
/*
* Initializier
*/
#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) }
/*
* Remapping spinlock architecture specific functions to the corresponding
* queued spinlock functions.
*/
#define arch_spin_is_locked(l) queued_spin_is_locked(l)
#define arch_spin_is_contended(l) queued_spin_is_contended(l)
#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l)
#define arch_spin_lock(l) queued_spin_lock(l)
#define arch_spin_trylock(l) queued_spin_trylock(l)
#define arch_spin_unlock(l) queued_spin_unlock(l)
#define arch_spin_lock_flags(l, f) queued_spin_lock(l)
#define arch_spin_unlock_wait(l) queued_spin_unlock_wait(l)
#endif /* __ASM_GENERIC_QSPINLOCK_H */
/*
* Queued spinlock
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
*
* Authors: Waiman Long <waiman.long@hp.com>
*/
#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H
#define __ASM_GENERIC_QSPINLOCK_TYPES_H
/*
* Including atomic.h with PARAVIRT on will cause compilation errors because
* of recursive header file incluson via paravirt_types.h. So don't include
* it if PARAVIRT is on.
*/
#ifndef CONFIG_PARAVIRT
#include <linux/types.h>
#include <linux/atomic.h>
#endif
typedef struct qspinlock {
atomic_t val;
} arch_spinlock_t;
/*
* Bitfields in the atomic value:
*
* When NR_CPUS < 16K
* 0- 7: locked byte
* 8: pending
* 9-15: not used
* 16-17: tail index
* 18-31: tail cpu (+1)
*
* When NR_CPUS >= 16K
* 0- 7: locked byte
* 8: pending
* 9-10: tail index
* 11-31: tail cpu (+1)
*/
#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
<< _Q_ ## type ## _OFFSET)
#define _Q_LOCKED_OFFSET 0
#define _Q_LOCKED_BITS 8
#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED)
#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
#if CONFIG_NR_CPUS < (1U << 14)
#define _Q_PENDING_BITS 8
#else
#define _Q_PENDING_BITS 1
#endif
#define _Q_PENDING_MASK _Q_SET_MASK(PENDING)
#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
#define _Q_TAIL_IDX_BITS 2
#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX)
#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET)
#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET
#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
...@@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s ...@@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
#define WRITE_ONCE(x, val) \ #define WRITE_ONCE(x, val) \
({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
/** /**
* READ_ONCE_CTRL - Read a value heading a control dependency * READ_ONCE_CTRL - Read a value heading a control dependency
...@@ -466,7 +466,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s ...@@ -466,7 +466,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
* with an explicit memory barrier or atomic instruction that provides the * with an explicit memory barrier or atomic instruction that provides the
* required ordering. * required ordering.
* *
* If possible use READ_ONCE/ASSIGN_ONCE instead. * If possible use READ_ONCE()/WRITE_ONCE() instead.
*/ */
#define __ACCESS_ONCE(x) ({ \ #define __ACCESS_ONCE(x) ({ \
__maybe_unused typeof(x) __var = (__force typeof(x)) 0; \ __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
......
...@@ -130,8 +130,8 @@ enum bounce_type { ...@@ -130,8 +130,8 @@ enum bounce_type {
}; };
struct lock_class_stats { struct lock_class_stats {
unsigned long contention_point[4]; unsigned long contention_point[LOCKSTAT_POINTS];
unsigned long contending_point[4]; unsigned long contending_point[LOCKSTAT_POINTS];
struct lock_time read_waittime; struct lock_time read_waittime;
struct lock_time write_waittime; struct lock_time write_waittime;
struct lock_time read_holdtime; struct lock_time read_holdtime;
......
...@@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock) ...@@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
extern bool osq_lock(struct optimistic_spin_queue *lock); extern bool osq_lock(struct optimistic_spin_queue *lock);
extern void osq_unlock(struct optimistic_spin_queue *lock); extern void osq_unlock(struct optimistic_spin_queue *lock);
static inline bool osq_is_locked(struct optimistic_spin_queue *lock)
{
return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL;
}
#endif #endif
...@@ -253,7 +253,7 @@ extern char ___assert_task_state[1 - 2*!!( ...@@ -253,7 +253,7 @@ extern char ___assert_task_state[1 - 2*!!(
#define set_task_state(tsk, state_value) \ #define set_task_state(tsk, state_value) \
do { \ do { \
(tsk)->task_state_change = _THIS_IP_; \ (tsk)->task_state_change = _THIS_IP_; \
set_mb((tsk)->state, (state_value)); \ smp_store_mb((tsk)->state, (state_value)); \
} while (0) } while (0)
/* /*
...@@ -275,7 +275,7 @@ extern char ___assert_task_state[1 - 2*!!( ...@@ -275,7 +275,7 @@ extern char ___assert_task_state[1 - 2*!!(
#define set_current_state(state_value) \ #define set_current_state(state_value) \
do { \ do { \
current->task_state_change = _THIS_IP_; \ current->task_state_change = _THIS_IP_; \
set_mb(current->state, (state_value)); \ smp_store_mb(current->state, (state_value)); \
} while (0) } while (0)
#else #else
...@@ -283,7 +283,7 @@ extern char ___assert_task_state[1 - 2*!!( ...@@ -283,7 +283,7 @@ extern char ___assert_task_state[1 - 2*!!(
#define __set_task_state(tsk, state_value) \ #define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0) do { (tsk)->state = (state_value); } while (0)
#define set_task_state(tsk, state_value) \ #define set_task_state(tsk, state_value) \
set_mb((tsk)->state, (state_value)) smp_store_mb((tsk)->state, (state_value))
/* /*
* set_current_state() includes a barrier so that the write of current->state * set_current_state() includes a barrier so that the write of current->state
...@@ -299,7 +299,7 @@ extern char ___assert_task_state[1 - 2*!!( ...@@ -299,7 +299,7 @@ extern char ___assert_task_state[1 - 2*!!(
#define __set_current_state(state_value) \ #define __set_current_state(state_value) \
do { current->state = (state_value); } while (0) do { current->state = (state_value); } while (0)
#define set_current_state(state_value) \ #define set_current_state(state_value) \
set_mb(current->state, (state_value)) smp_store_mb(current->state, (state_value))
#endif #endif
......
...@@ -235,9 +235,16 @@ config LOCK_SPIN_ON_OWNER ...@@ -235,9 +235,16 @@ config LOCK_SPIN_ON_OWNER
def_bool y def_bool y
depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
config ARCH_USE_QUEUE_RWLOCK config ARCH_USE_QUEUED_SPINLOCKS
bool bool
config QUEUE_RWLOCK config QUEUED_SPINLOCKS
def_bool y if ARCH_USE_QUEUE_RWLOCK def_bool y if ARCH_USE_QUEUED_SPINLOCKS
depends on SMP
config ARCH_USE_QUEUED_RWLOCKS
bool
config QUEUED_RWLOCKS
def_bool y if ARCH_USE_QUEUED_RWLOCKS
depends on SMP depends on SMP
...@@ -2055,7 +2055,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, ...@@ -2055,7 +2055,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
{ {
/* /*
* The task state is guaranteed to be set before another task can * The task state is guaranteed to be set before another task can
* wake it. set_current_state() is implemented using set_mb() and * wake it. set_current_state() is implemented using smp_store_mb() and
* queue_me() calls spin_unlock() upon completion, both serializing * queue_me() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier. * access to the hash list and forcing another memory barrier.
*/ */
......
...@@ -17,6 +17,7 @@ obj-$(CONFIG_SMP) += spinlock.o ...@@ -17,6 +17,7 @@ obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
obj-$(CONFIG_SMP) += lglock.o obj-$(CONFIG_SMP) += lglock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
...@@ -25,5 +26,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o ...@@ -25,5 +26,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
...@@ -4067,8 +4067,7 @@ void __init lockdep_info(void) ...@@ -4067,8 +4067,7 @@ void __init lockdep_info(void)
#ifdef CONFIG_DEBUG_LOCKDEP #ifdef CONFIG_DEBUG_LOCKDEP
if (lockdep_init_error) { if (lockdep_init_error) {
printk("WARNING: lockdep init error! lock-%s was acquired" printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
"before lockdep_init\n", lock_init_error);
printk("Call stack leading to lockdep invocation was:\n"); printk("Call stack leading to lockdep invocation was:\n");
print_stack_trace(&lockdep_init_trace, 0); print_stack_trace(&lockdep_init_trace, 0);
} }
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
struct mcs_spinlock { struct mcs_spinlock {
struct mcs_spinlock *next; struct mcs_spinlock *next;
int locked; /* 1 if lock acquired */ int locked; /* 1 if lock acquired */
int count; /* nesting count, see qspinlock.c */
}; };
#ifndef arch_mcs_spin_lock_contended #ifndef arch_mcs_spin_lock_contended
......
/* /*
* Queue read/write lock * Queued read/write locks
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -22,6 +22,26 @@ ...@@ -22,6 +22,26 @@
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <asm/qrwlock.h> #include <asm/qrwlock.h>
/*
* This internal data structure is used for optimizing access to some of
* the subfields within the atomic_t cnts.
*/
struct __qrwlock {
union {
atomic_t cnts;
struct {
#ifdef __LITTLE_ENDIAN
u8 wmode; /* Writer mode */
u8 rcnts[3]; /* Reader counts */
#else
u8 rcnts[3]; /* Reader counts */
u8 wmode; /* Writer mode */
#endif
};
};
arch_spinlock_t lock;
};
/** /**
* rspin_until_writer_unlock - inc reader count & spin until writer is gone * rspin_until_writer_unlock - inc reader count & spin until writer is gone
* @lock : Pointer to queue rwlock structure * @lock : Pointer to queue rwlock structure
...@@ -107,10 +127,10 @@ void queue_write_lock_slowpath(struct qrwlock *lock) ...@@ -107,10 +127,10 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
* or wait for a previous writer to go away. * or wait for a previous writer to go away.
*/ */
for (;;) { for (;;) {
cnts = atomic_read(&lock->cnts); struct __qrwlock *l = (struct __qrwlock *)lock;
if (!(cnts & _QW_WMASK) &&
(atomic_cmpxchg(&lock->cnts, cnts, if (!READ_ONCE(l->wmode) &&
cnts | _QW_WAITING) == cnts)) (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0))
break; break;
cpu_relax_lowlatency(); cpu_relax_lowlatency();
......
This diff is collapsed.
#ifndef _GEN_PV_LOCK_SLOWPATH
#error "do not include this file"
#endif
#include <linux/hash.h>
#include <linux/bootmem.h>
/*
* Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
* of spinning them.
*
* This relies on the architecture to provide two paravirt hypercalls:
*
* pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr == val
* pv_kick(cpu) -- wakes a suspended vcpu
*
* Using these we implement __pv_queued_spin_lock_slowpath() and
* __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath() and
* native_queued_spin_unlock().
*/
#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET)
enum vcpu_state {
vcpu_running = 0,
vcpu_halted,
};
struct pv_node {
struct mcs_spinlock mcs;
struct mcs_spinlock __res[3];
int cpu;
u8 state;
};
/*
* Lock and MCS node addresses hash table for fast lookup
*
* Hashing is done on a per-cacheline basis to minimize the need to access
* more than one cacheline.
*
* Dynamically allocate a hash table big enough to hold at least 4X the
* number of possible cpus in the system. Allocation is done on page
* granularity. So the minimum number of hash buckets should be at least
* 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page.
*
* Since we should not be holding locks from NMI context (very rare indeed) the
* max load factor is 0.75, which is around the point where open addressing
* breaks down.
*
*/
struct pv_hash_entry {
struct qspinlock *lock;
struct pv_node *node;
};
#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry))
#define PV_HE_MIN (PAGE_SIZE / sizeof(struct pv_hash_entry))
static struct pv_hash_entry *pv_lock_hash;
static unsigned int pv_lock_hash_bits __read_mostly;
/*
* Allocate memory for the PV qspinlock hash buckets
*
* This function should be called from the paravirt spinlock initialization
* routine.
*/
void __init __pv_init_lock_hash(void)
{
int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE);
if (pv_hash_size < PV_HE_MIN)
pv_hash_size = PV_HE_MIN;
/*
* Allocate space from bootmem which should be page-size aligned
* and hence cacheline aligned.
*/
pv_lock_hash = alloc_large_system_hash("PV qspinlock",
sizeof(struct pv_hash_entry),
pv_hash_size, 0, HASH_EARLY,
&pv_lock_hash_bits, NULL,
pv_hash_size, pv_hash_size);
}
#define for_each_hash_entry(he, offset, hash) \
for (hash &= ~(PV_HE_PER_LINE - 1), he = &pv_lock_hash[hash], offset = 0; \
offset < (1 << pv_lock_hash_bits); \
offset++, he = &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash_bits) - 1)])
static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
{
unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
struct pv_hash_entry *he;
for_each_hash_entry(he, offset, hash) {
if (!cmpxchg(&he->lock, NULL, lock)) {
WRITE_ONCE(he->node, node);
return &he->lock;
}
}
/*
* Hard assume there is a free entry for us.
*
* This is guaranteed by ensuring every blocked lock only ever consumes
* a single entry, and since we only have 4 nesting levels per CPU
* and allocated 4*nr_possible_cpus(), this must be so.
*
* The single entry is guaranteed by having the lock owner unhash
* before it releases.
*/
BUG();
}
static struct pv_node *pv_unhash(struct qspinlock *lock)
{
unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
struct pv_hash_entry *he;
struct pv_node *node;
for_each_hash_entry(he, offset, hash) {
if (READ_ONCE(he->lock) == lock) {
node = READ_ONCE(he->node);
WRITE_ONCE(he->lock, NULL);
return node;
}
}
/*
* Hard assume we'll find an entry.
*
* This guarantees a limited lookup time and is itself guaranteed by
* having the lock owner do the unhash -- IFF the unlock sees the
* SLOW flag, there MUST be a hash entry.
*/
BUG();
}
/*
* Initialize the PV part of the mcs_spinlock node.
*/
static void pv_init_node(struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock));
pn->cpu = smp_processor_id();
pn->state = vcpu_running;
}
/*
* Wait for node->locked to become true, halt the vcpu after a short spin.
* pv_kick_node() is used to wake the vcpu again.
*/
static void pv_wait_node(struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
int loop;
for (;;) {
for (loop = SPIN_THRESHOLD; loop; loop--) {
if (READ_ONCE(node->locked))
return;
cpu_relax();
}
/*
* Order pn->state vs pn->locked thusly:
*
* [S] pn->state = vcpu_halted [S] next->locked = 1
* MB MB
* [L] pn->locked [RmW] pn->state = vcpu_running
*
* Matches the xchg() from pv_kick_node().
*/
smp_store_mb(pn->state, vcpu_halted);
if (!READ_ONCE(node->locked))
pv_wait(&pn->state, vcpu_halted);
/*
* Reset the vCPU state to avoid unncessary CPU kicking
*/
WRITE_ONCE(pn->state, vcpu_running);
/*
* If the locked flag is still not set after wakeup, it is a
* spurious wakeup and the vCPU should wait again. However,
* there is a pretty high overhead for CPU halting and kicking.
* So it is better to spin for a while in the hope that the
* MCS lock will be released soon.
*/
}
/*
* By now our node->locked should be 1 and our caller will not actually
* spin-wait for it. We do however rely on our caller to do a
* load-acquire for us.
*/
}
/*
* Called after setting next->locked = 1, used to wake those stuck in
* pv_wait_node().
*/
static void pv_kick_node(struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
/*
* Note that because node->locked is already set, this actual
* mcs_spinlock entry could be re-used already.
*
* This should be fine however, kicking people for no reason is
* harmless.
*
* See the comment in pv_wait_node().
*/
if (xchg(&pn->state, vcpu_running) == vcpu_halted)
pv_kick(pn->cpu);
}
/*
* Wait for l->locked to become clear; halt the vcpu after a short spin.
* __pv_queued_spin_unlock() will wake us.
*/
static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
struct __qspinlock *l = (void *)lock;
struct qspinlock **lp = NULL;
int loop;
for (;;) {
for (loop = SPIN_THRESHOLD; loop; loop--) {
if (!READ_ONCE(l->locked))
return;
cpu_relax();
}
WRITE_ONCE(pn->state, vcpu_halted);
if (!lp) { /* ONCE */
lp = pv_hash(lock, pn);
/*
* lp must be set before setting _Q_SLOW_VAL
*
* [S] lp = lock [RmW] l = l->locked = 0
* MB MB
* [S] l->locked = _Q_SLOW_VAL [L] lp
*
* Matches the cmpxchg() in __pv_queued_spin_unlock().
*/
if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
/*
* The lock is free and _Q_SLOW_VAL has never
* been set. Therefore we need to unhash before
* getting the lock.
*/
WRITE_ONCE(*lp, NULL);
return;
}
}
pv_wait(&l->locked, _Q_SLOW_VAL);
/*
* The unlocker should have freed the lock before kicking the
* CPU. So if the lock is still not free, it is a spurious
* wakeup and so the vCPU should wait again after spinning for
* a while.
*/
}
/*
* Lock is unlocked now; the caller will acquire it without waiting.
* As with pv_wait_node() we rely on the caller to do a load-acquire
* for us.
*/
}
/*
* PV version of the unlock function to be used in stead of
* queued_spin_unlock().
*/
__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
{
struct __qspinlock *l = (void *)lock;
struct pv_node *node;
/*
* We must not unlock if SLOW, because in that case we must first
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
if (likely(cmpxchg(&l->locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL))
return;
/*
* Since the above failed to release, this must be the SLOW path.
* Therefore start by looking up the blocked node and unhashing it.
*/
node = pv_unhash(lock);
/*
* Now that we have a reference to the (likely) blocked pv_node,
* release the lock.
*/
smp_store_release(&l->locked, 0);
/*
* At this point the memory pointed at by lock can be freed/reused,
* however we can still use the pv_node to kick the CPU.
*/
if (READ_ONCE(node->state) == vcpu_halted)
pv_kick(node->cpu);
}
/*
* Include the architecture specific callee-save thunk of the
* __pv_queued_spin_unlock(). This thunk is put together with
* __pv_queued_spin_unlock() near the top of the file to make sure
* that the callee-save thunk and the real unlock function are close
* to each other sharing consecutive instruction cachelines.
*/
#include <asm/qspinlock_paravirt.h>
...@@ -70,10 +70,10 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) ...@@ -70,10 +70,10 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
} }
/* /*
* We can speed up the acquire/release, if the architecture * We can speed up the acquire/release, if there's no debugging state to be
* supports cmpxchg and if there's no debugging state to be set up * set up.
*/ */
#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) #ifndef CONFIG_DEBUG_RT_MUTEXES
# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) # define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{ {
...@@ -1443,10 +1443,17 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); ...@@ -1443,10 +1443,17 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
* *
* @lock: the rt_mutex to be locked * @lock: the rt_mutex to be locked
* *
* This function can only be called in thread context. It's safe to
* call it from atomic regions, but not from hard interrupt or soft
* interrupt context.
*
* Returns 1 on success and 0 on contention * Returns 1 on success and 0 on contention
*/ */
int __sched rt_mutex_trylock(struct rt_mutex *lock) int __sched rt_mutex_trylock(struct rt_mutex *lock)
{ {
if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
return 0;
return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
} }
EXPORT_SYMBOL_GPL(rt_mutex_trylock); EXPORT_SYMBOL_GPL(rt_mutex_trylock);
......
...@@ -409,11 +409,24 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) ...@@ -409,11 +409,24 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
return taken; return taken;
} }
/*
* Return true if the rwsem has active spinner
*/
static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
{
return osq_is_locked(&sem->osq);
}
#else #else
static bool rwsem_optimistic_spin(struct rw_semaphore *sem) static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{ {
return false; return false;
} }
static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
{
return false;
}
#endif #endif
/* /*
...@@ -496,7 +509,38 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) ...@@ -496,7 +509,38 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{ {
unsigned long flags; unsigned long flags;
/*
* If a spinner is present, it is not necessary to do the wakeup.
* Try to do wakeup only if the trylock succeeds to minimize
* spinlock contention which may introduce too much delay in the
* unlock operation.
*
* spinning writer up_write/up_read caller
* --------------- -----------------------
* [S] osq_unlock() [L] osq
* MB RMB
* [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
*
* Here, it is important to make sure that there won't be a missed
* wakeup while the rwsem is free and the only spinning writer goes
* to sleep without taking the rwsem. Even when the spinning writer
* is just going to break out of the waiting loop, it will still do
* a trylock in rwsem_down_write_failed() before sleeping. IOW, if
* rwsem_has_spinner() is true, it will guarantee at least one
* trylock attempt on the rwsem later on.
*/
if (rwsem_has_spinner(sem)) {
/*
* The smp_rmb() here is to make sure that the spinner
* state is consulted before reading the wait_lock.
*/
smp_rmb();
if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
return sem;
goto locked;
}
raw_spin_lock_irqsave(&sem->wait_lock, flags); raw_spin_lock_irqsave(&sem->wait_lock, flags);
locked:
/* do nothing if list empty */ /* do nothing if list empty */
if (!list_empty(&sem->wait_list)) if (!list_empty(&sem->wait_list))
......
...@@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout) ...@@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
* condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
* an event. * an event.
*/ */
set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
return timeout; return timeout;
} }
...@@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) ...@@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
* doesn't imply write barrier and the users expects write * doesn't imply write barrier and the users expects write
* barrier semantics on wakeup functions. The following * barrier semantics on wakeup functions. The following
* smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
* and is paired with set_mb() in wait_woken(). * and is paired with smp_store_mb() in wait_woken().
*/ */
smp_wmb(); /* C */ smp_wmb(); /* C */
wait->flags |= WQ_FLAG_WOKEN; wait->flags |= WQ_FLAG_WOKEN;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment