Commit d63a9788 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking changes from Ingo Molnar:
 "The main changes in this cycle were:

   - More gradual enhancements to atomic ops: new atomic*_read_ctrl()
     ops, synchronize atomic_{read,set}() ordering requirements between
     architectures, add atomic_long_t bitops.  (Peter Zijlstra)

   - Add _{relaxed|acquire|release}() variants for inc/dec atomics and
     use them in various locking primitives: mutex, rtmutex, mcs, rwsem.
     This enables weakly ordered architectures (such as arm64) to make
     use of more locking related optimizations.  (Davidlohr Bueso)

   - Implement atomic[64]_{inc,dec}_relaxed() on ARM.  (Will Deacon)

   - Futex kernel data cache footprint micro-optimization.  (Rasmus
     Villemoes)

   - pvqspinlock runtime overhead micro-optimization.  (Waiman Long)

   - misc smaller fixlets"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  ARM, locking/atomics: Implement _relaxed variants of atomic[64]_{inc,dec}
  locking/rwsem: Use acquire/release semantics
  locking/mcs: Use acquire/release semantics
  locking/rtmutex: Use acquire/release semantics
  locking/mutex: Use acquire/release semantics
  locking/asm-generic: Add _{relaxed|acquire|release}() variants for inc/dec atomics
  atomic: Implement atomic_read_ctrl()
  atomic, arch: Audit atomic_{read,set}()
  atomic: Add atomic_long_t bitops
  futex: Force hot variables into a single cache line
  locking/pvqspinlock: Kick the PV CPU unconditionally when _Q_SLOW_VAL
  locking/osq: Relax atomic semantics
  locking/qrwlock: Rename ->lock to ->wait_lock
  locking/Documentation/lockstat: Fix typo - lokcing -> locking
  locking/atomics, cmpxchg: Privatize the inclusion of asm/cmpxchg.h
parents 28142286 6e490b01
......@@ -542,6 +542,10 @@ The routines xchg() and cmpxchg() must provide the same exact
memory-barrier semantics as the atomic and bit operations returning
values.
Note: If someone wants to use xchg(), cmpxchg() and their variants,
linux/atomic.h should be included rather than asm/cmpxchg.h, unless
the code is in arch/* and can take care of itself.
Spinlocks and rwlocks have memory barrier expectations as well.
The rule to follow is simple:
......
......@@ -12,7 +12,7 @@ Because things like lock contention can severely impact performance.
- HOW
Lockdep already has hooks in the lock functions and maps lock instances to
lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt).
lock classes. We build on that (see Documentation/locking/lockdep-design.txt).
The graph below shows the relation between the lock functions and the various
hooks therein.
......
......@@ -637,7 +637,8 @@ as follows:
b = p; /* BUG: Compiler and CPU can both reorder!!! */
Finally, the READ_ONCE_CTRL() includes an smp_read_barrier_depends()
that DEC Alpha needs in order to respect control depedencies.
that DEC Alpha needs in order to respect control depedencies. Alternatively
use one of atomic{,64}_read_ctrl().
So don't leave out the READ_ONCE_CTRL().
......@@ -796,9 +797,9 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
In summary:
(*) Control dependencies must be headed by READ_ONCE_CTRL().
Or, as a much less preferable alternative, interpose
smp_read_barrier_depends() between a READ_ONCE() and the
(*) Control dependencies must be headed by READ_ONCE_CTRL(),
atomic{,64}_read_ctrl(). Or, as a much less preferable alternative,
interpose smp_read_barrier_depends() between a READ_ONCE() and the
control-dependent write.
(*) Control dependencies can order prior loads against later stores.
......@@ -820,10 +821,10 @@ In summary:
and WRITE_ONCE() can help to preserve the needed conditional.
(*) Control dependencies require that the compiler avoid reordering the
dependency into nonexistence. Careful use of READ_ONCE_CTRL()
or smp_read_barrier_depends() can help to preserve your control
dependency. Please see the Compiler Barrier section for more
information.
dependency into nonexistence. Careful use of READ_ONCE_CTRL(),
atomic{,64}_read_ctrl() or smp_read_barrier_depends() can help to
preserve your control dependency. Please see the Compiler Barrier
section for more information.
(*) Control dependencies pair normally with other types of barriers.
......
......@@ -17,11 +17,11 @@
#define ATOMIC_INIT(i) { (i) }
#define ATOMIC64_INIT(i) { (i) }
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic64_read(v) ACCESS_ONCE((v)->counter)
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic64_read(v) READ_ONCE((v)->counter)
#define atomic_set(v,i) ((v)->counter = (i))
#define atomic64_set(v,i) ((v)->counter = (i))
#define atomic_set(v,i) WRITE_ONCE((v)->counter, (i))
#define atomic64_set(v,i) WRITE_ONCE((v)->counter, (i))
/*
* To get proper branch prediction for the main line, we must branch
......
......@@ -17,11 +17,11 @@
#include <asm/barrier.h>
#include <asm/smp.h>
#define atomic_read(v) ((v)->counter)
#define atomic_read(v) READ_ONCE((v)->counter)
#ifdef CONFIG_ARC_HAS_LLSC
#define atomic_set(v, i) (((v)->counter) = (i))
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#ifdef CONFIG_ARC_STAR_9000923308
......@@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
#ifndef CONFIG_SMP
/* violating atomic_xxx API locking protocol in UP for optimization sake */
#define atomic_set(v, i) (((v)->counter) = (i))
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#else
......@@ -125,7 +125,7 @@ static inline void atomic_set(atomic_t *v, int i)
unsigned long flags;
atomic_ops_lock(flags);
v->counter = i;
WRITE_ONCE(v->counter, i);
atomic_ops_unlock(flags);
}
......
......@@ -27,8 +27,8 @@
* strex/ldrex monitor on some implementations. The reason we can use it for
* atomic_set() is the clrex or dummy strex done on every exception return.
*/
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_set(v,i) (((v)->counter) = (i))
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v,i) WRITE_ONCE(((v)->counter), (i))
#if __LINUX_ARM_ARCH__ >= 6
......@@ -210,8 +210,8 @@ ATOMIC_OP(xor, ^=, eor)
#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
#define atomic_inc_return(v) (atomic_add_return(1, v))
#define atomic_dec_return(v) (atomic_sub_return(1, v))
#define atomic_inc_return_relaxed(v) (atomic_add_return_relaxed(1, v))
#define atomic_dec_return_relaxed(v) (atomic_sub_return_relaxed(1, v))
#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
......@@ -442,11 +442,11 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
#define atomic64_inc(v) atomic64_add(1LL, (v))
#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1LL, (v))
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
#define atomic64_dec(v) atomic64_sub(1LL, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1LL, (v))
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
......
......@@ -54,7 +54,7 @@
#define ATOMIC_INIT(i) { (i) }
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) (((v)->counter) = (i))
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define atomic_xchg(v, new) xchg(&((v)->counter), (new))
#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new))
......
......@@ -19,8 +19,8 @@
#define ATOMIC_INIT(i) { (i) }
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_set(v, i) (((v)->counter) = i)
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define ATOMIC_OP_RETURN(op, asm_op, asm_con) \
static inline int __atomic_##op##_return(int i, atomic_t *v) \
......
......@@ -32,8 +32,8 @@
*/
#define ATOMIC_INIT(i) { (i) }
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_set(v, i) (((v)->counter) = (i))
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
static inline int atomic_inc_return(atomic_t *v)
{
......
......@@ -11,8 +11,8 @@
#define ATOMIC_INIT(i) { (i) }
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_set(v, i) (((v)->counter) = i)
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#include <linux/kernel.h>
......
......@@ -48,7 +48,7 @@ static inline void atomic_set(atomic_t *v, int new)
*
* Assumes all word reads on our architecture are atomic.
*/
#define atomic_read(v) ((v)->counter)
#define atomic_read(v) READ_ONCE((v)->counter)
/**
* atomic_xchg - atomic
......
......@@ -21,11 +21,11 @@
#define ATOMIC_INIT(i) { (i) }
#define ATOMIC64_INIT(i) { (i) }
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic64_read(v) ACCESS_ONCE((v)->counter)
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic64_read(v) READ_ONCE((v)->counter)
#define atomic_set(v,i) (((v)->counter) = (i))
#define atomic64_set(v,i) (((v)->counter) = (i))
#define atomic_set(v,i) WRITE_ONCE(((v)->counter), (i))
#define atomic64_set(v,i) WRITE_ONCE(((v)->counter), (i))
#define ATOMIC_OP(op, c_op) \
static __inline__ int \
......
......@@ -28,7 +28,7 @@
*
* Atomically reads the value of @v.
*/
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_read(v) READ_ONCE((v)->counter)
/**
* atomic_set - set atomic variable
......@@ -37,7 +37,7 @@
*
* Atomically sets the value of @v to @i.
*/
#define atomic_set(v,i) (((v)->counter) = (i))
#define atomic_set(v,i) WRITE_ONCE(((v)->counter), (i))
#ifdef CONFIG_CHIP_M32700_TS1
#define __ATOMIC_CLOBBER , "r4"
......
......@@ -17,8 +17,8 @@
#define ATOMIC_INIT(i) { (i) }
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_set(v, i) (((v)->counter) = i)
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
/*
* The ColdFire parts cannot do some immediate to memory operations,
......
......@@ -3,7 +3,7 @@
#define ATOMIC_INIT(i) { (i) }
#define atomic_set(v, i) ((v)->counter = (i))
#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i))
#include <linux/compiler.h>
......
......@@ -10,7 +10,7 @@
static inline int atomic_read(const atomic_t *v)
{
return (v)->counter;
return READ_ONCE((v)->counter);
}
/*
......
......@@ -30,7 +30,7 @@
*
* Atomically reads the value of @v.
*/
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_read(v) READ_ONCE((v)->counter)
/*
* atomic_set - set atomic variable
......@@ -39,7 +39,7 @@
*
* Atomically sets the value of @v to @i.
*/
#define atomic_set(v, i) ((v)->counter = (i))
#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i))
#define ATOMIC_OP(op, c_op, asm_op) \
static __inline__ void atomic_##op(int i, atomic_t * v) \
......@@ -315,14 +315,14 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
* @v: pointer of type atomic64_t
*
*/
#define atomic64_read(v) ACCESS_ONCE((v)->counter)
#define atomic64_read(v) READ_ONCE((v)->counter)
/*
* atomic64_set - set atomic variable
* @v: pointer of type atomic64_t
* @i: required value
*/
#define atomic64_set(v, i) ((v)->counter = (i))
#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
#define ATOMIC64_OP(op, c_op, asm_op) \
static __inline__ void atomic64_##op(long i, atomic64_t * v) \
......
......@@ -34,7 +34,7 @@
*
* Atomically reads the value of @v. Note that the guaranteed
*/
#define atomic_read(v) (ACCESS_ONCE((v)->counter))
#define atomic_read(v) READ_ONCE((v)->counter)
/**
* atomic_set - set atomic variable
......@@ -43,7 +43,7 @@
*
* Atomically sets the value of @v to @i. Note that the guaranteed
*/
#define atomic_set(v, i) (((v)->counter) = (i))
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define ATOMIC_OP(op) \
static inline void atomic_##op(int i, atomic_t *v) \
......
......@@ -67,7 +67,7 @@ static __inline__ void atomic_set(atomic_t *v, int i)
static __inline__ int atomic_read(const atomic_t *v)
{
return ACCESS_ONCE((v)->counter);
return READ_ONCE((v)->counter);
}
/* exported interface */
......
......@@ -14,8 +14,8 @@
#define ATOMIC_INIT(i) { (i) }
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_set(v,i) ((v)->counter = (i))
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v,i) WRITE_ONCE((v)->counter, (i))
#if defined(CONFIG_GUSA_RB)
#include <asm/atomic-grb.h>
......
......@@ -14,11 +14,11 @@
#define ATOMIC_INIT(i) { (i) }
#define ATOMIC64_INIT(i) { (i) }
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic64_read(v) ACCESS_ONCE((v)->counter)
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic64_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) (((v)->counter) = i)
#define atomic64_set(v, i) (((v)->counter) = i)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define atomic64_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define ATOMIC_OP(op) \
void atomic_##op(int, atomic_t *); \
......
......@@ -34,7 +34,7 @@
*/
static inline int atomic_read(const atomic_t *v)
{
return ACCESS_ONCE(v->counter);
return READ_ONCE(v->counter);
}
/**
......
......@@ -24,7 +24,7 @@
/* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */
#define atomic_set(v, i) ((v)->counter = (i))
#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i))
/*
* The smp_mb() operations throughout are to support the fact that
......@@ -82,8 +82,8 @@ static inline void atomic_xor(int i, atomic_t *v)
#define ATOMIC64_INIT(i) { (i) }
#define atomic64_read(v) ((v)->counter)
#define atomic64_set(v, i) ((v)->counter = (i))
#define atomic64_read(v) READ_ONCE((v)->counter)
#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
static inline void atomic64_add(long i, atomic64_t *v)
{
......
......@@ -24,7 +24,7 @@
*/
static __always_inline int atomic_read(const atomic_t *v)
{
return ACCESS_ONCE((v)->counter);
return READ_ONCE((v)->counter);
}
/**
......@@ -36,7 +36,7 @@ static __always_inline int atomic_read(const atomic_t *v)
*/
static __always_inline void atomic_set(atomic_t *v, int i)
{
v->counter = i;
WRITE_ONCE(v->counter, i);
}
/**
......
......@@ -18,7 +18,7 @@
*/
static inline long atomic64_read(const atomic64_t *v)
{
return ACCESS_ONCE((v)->counter);
return READ_ONCE((v)->counter);
}
/**
......@@ -30,7 +30,7 @@ static inline long atomic64_read(const atomic64_t *v)
*/
static inline void atomic64_set(atomic64_t *v, long i)
{
v->counter = i;
WRITE_ONCE(v->counter, i);
}
/**
......
......@@ -47,7 +47,7 @@
*
* Atomically reads the value of @v.
*/
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_read(v) READ_ONCE((v)->counter)
/**
* atomic_set - set atomic variable
......@@ -56,7 +56,7 @@
*
* Atomically sets the value of @v to @i.
*/
#define atomic_set(v,i) ((v)->counter = (i))
#define atomic_set(v,i) WRITE_ONCE((v)->counter, (i))
#if XCHAL_HAVE_S32C1I
#define ATOMIC_OP(op) \
......
......@@ -9,7 +9,7 @@
#include <linux/delay.h>
#include <linux/moduleparam.h>
#include <asm/cmpxchg.h>
#include <linux/atomic.h>
#include "net_driver.h"
#include "nic.h"
#include "io.h"
......
......@@ -17,8 +17,7 @@
#include <linux/phy/phy.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
#include <asm/cmpxchg.h>
#include <linux/atomic.h>
#define USBHS_LPSTS 0x02
#define USBHS_UGCTRL 0x80
......
......@@ -7,7 +7,7 @@
#include <linux/workqueue.h>
#include <linux/tty.h>
#include <linux/tty_flip.h>
#include <asm/cmpxchg.h>
#include <linux/atomic.h>
#include "speakup.h"
......
......@@ -35,7 +35,7 @@ typedef atomic_t atomic_long_t;
#endif
#define ATOMIC_LONG_READ_OP(mo) \
static inline long atomic_long_read##mo(atomic_long_t *l) \
static inline long atomic_long_read##mo(const atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
......@@ -43,6 +43,7 @@ static inline long atomic_long_read##mo(atomic_long_t *l) \
}
ATOMIC_LONG_READ_OP()
ATOMIC_LONG_READ_OP(_acquire)
ATOMIC_LONG_READ_OP(_ctrl)
#undef ATOMIC_LONG_READ_OP
......@@ -112,19 +113,23 @@ static inline void atomic_long_dec(atomic_long_t *l)
ATOMIC_LONG_PFX(_dec)(v);
}
static inline void atomic_long_add(long i, atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
ATOMIC_LONG_PFX(_add)(i, v);
#define ATOMIC_LONG_OP(op) \
static inline void \
atomic_long_##op(long i, atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
ATOMIC_LONG_PFX(_##op)(i, v); \
}
static inline void atomic_long_sub(long i, atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
ATOMIC_LONG_OP(add)
ATOMIC_LONG_OP(sub)
ATOMIC_LONG_OP(and)
ATOMIC_LONG_OP(or)
ATOMIC_LONG_OP(xor)
ATOMIC_LONG_OP(andnot)
ATOMIC_LONG_PFX(_sub)(i, v);
}
#undef ATOMIC_LONG_OP
static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
{
......@@ -154,19 +159,24 @@ static inline int atomic_long_add_negative(long i, atomic_long_t *l)
return ATOMIC_LONG_PFX(_add_negative)(i, v);
}
static inline long atomic_long_inc_return(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return (long)ATOMIC_LONG_PFX(_inc_return)(v);
}
static inline long atomic_long_dec_return(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return (long)ATOMIC_LONG_PFX(_dec_return)(v);
#define ATOMIC_LONG_INC_DEC_OP(op, mo) \
static inline long \
atomic_long_##op##_return##mo(atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(v); \
}
ATOMIC_LONG_INC_DEC_OP(inc,)
ATOMIC_LONG_INC_DEC_OP(inc, _relaxed)
ATOMIC_LONG_INC_DEC_OP(inc, _acquire)
ATOMIC_LONG_INC_DEC_OP(inc, _release)
ATOMIC_LONG_INC_DEC_OP(dec,)
ATOMIC_LONG_INC_DEC_OP(dec, _relaxed)
ATOMIC_LONG_INC_DEC_OP(dec, _acquire)
ATOMIC_LONG_INC_DEC_OP(dec, _release)
#undef ATOMIC_LONG_INC_DEC_OP
static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
{
......
......@@ -127,7 +127,7 @@ ATOMIC_OP(xor, ^)
* Atomically reads the value of @v.
*/
#ifndef atomic_read
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_read(v) READ_ONCE((v)->counter)
#endif
/**
......@@ -137,7 +137,7 @@ ATOMIC_OP(xor, ^)
*
* Atomically sets the value of @v to @i.
*/
#define atomic_set(v, i) (((v)->counter) = (i))
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#include <linux/irqflags.h>
......
......@@ -20,7 +20,7 @@
static inline void
__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
if (unlikely(atomic_dec_return(count) < 0))
if (unlikely(atomic_dec_return_acquire(count) < 0))
fail_fn(count);
}
......@@ -35,7 +35,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(atomic_dec_return(count) < 0))
if (unlikely(atomic_dec_return_acquire(count) < 0))
return -1;
return 0;
}
......@@ -56,7 +56,7 @@ __mutex_fastpath_lock_retval(atomic_t *count)
static inline void
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
if (unlikely(atomic_inc_return(count) <= 0))
if (unlikely(atomic_inc_return_release(count) <= 0))
fail_fn(count);
}
......@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
if (likely(atomic_cmpxchg(count, 1, 0) == 1))
if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
return 1;
return 0;
}
......
......@@ -31,7 +31,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
* to ensure that any waiting tasks are woken up by the
* unlock slow path.
*/
if (likely(atomic_xchg(count, -1) != 1))
if (likely(atomic_xchg_acquire(count, -1) != 1))
fail_fn(count);
}
......@@ -46,7 +46,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(atomic_xchg(count, 0) != 1))
if (unlikely(atomic_xchg_acquire(count, 0) != 1))
if (likely(atomic_xchg(count, -1) != 1))
return -1;
return 0;
......@@ -67,7 +67,7 @@ __mutex_fastpath_lock_retval(atomic_t *count)
static inline void
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
if (unlikely(atomic_xchg(count, 1) != 0))
if (unlikely(atomic_xchg_release(count, 1) != 0))
fail_fn(count);
}
......@@ -91,7 +91,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
int prev = atomic_xchg(count, 0);
int prev = atomic_xchg_acquire(count, 0);
if (unlikely(prev < 0)) {
/*
......@@ -105,7 +105,7 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
* owner's unlock path needlessly, but that's not a problem
* in practice. ]
*/
prev = atomic_xchg(count, prev);
prev = atomic_xchg_acquire(count, prev);
if (prev < 0)
prev = 0;
}
......
......@@ -10,12 +10,12 @@
typedef struct qrwlock {
atomic_t cnts;
arch_spinlock_t lock;
arch_spinlock_t wait_lock;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { \
.cnts = ATOMIC_INIT(0), \
.lock = __ARCH_SPIN_LOCK_UNLOCKED, \
.wait_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
}
#endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */
......@@ -33,7 +33,7 @@
*/
static inline void __down_read(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_inc_return((atomic_long_t *)&sem->count) <= 0))
if (unlikely(atomic_long_inc_return_acquire((atomic_long_t *)&sem->count) <= 0))
rwsem_down_read_failed(sem);
}
......@@ -42,7 +42,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
long tmp;
while ((tmp = sem->count) >= 0) {
if (tmp == cmpxchg(&sem->count, tmp,
if (tmp == cmpxchg_acquire(&sem->count, tmp,
tmp + RWSEM_ACTIVE_READ_BIAS)) {
return 1;
}
......@@ -57,7 +57,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
{
long tmp;
tmp = atomic_long_add_return(RWSEM_ACTIVE_WRITE_BIAS,
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
(atomic_long_t *)&sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
rwsem_down_write_failed(sem);
......@@ -72,7 +72,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
{
long tmp;
tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
return tmp == RWSEM_UNLOCKED_VALUE;
}
......@@ -84,7 +84,7 @@ static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_dec_return((atomic_long_t *)&sem->count);
tmp = atomic_long_dec_return_release((atomic_long_t *)&sem->count);
if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
rwsem_wake(sem);
}
......@@ -94,7 +94,7 @@ static inline void __up_read(struct rw_semaphore *sem)
*/
static inline void __up_write(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
(atomic_long_t *)&sem->count) < 0))
rwsem_wake(sem);
}
......@@ -114,7 +114,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_add_return(-RWSEM_WAITING_BIAS,
/*
* When downgrading from exclusive to shared ownership,
* anything inside the write-locked region cannot leak
* into the read side. In contrast, anything in the
* read-locked region is ok to be re-ordered into the
* write side. As such, rely on RELEASE semantics.
*/
tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS,
(atomic_long_t *)&sem->count);
if (tmp < 0)
rwsem_downgrade_wake(sem);
......
......@@ -4,6 +4,15 @@
#include <asm/atomic.h>
#include <asm/barrier.h>
#ifndef atomic_read_ctrl
static inline int atomic_read_ctrl(const atomic_t *v)
{
int val = atomic_read(v);
smp_read_barrier_depends(); /* Enforce control dependency. */
return val;
}
#endif
/*
* Relaxed variants of xchg, cmpxchg and some atomic operations.
*
......@@ -81,6 +90,30 @@
#endif
#endif /* atomic_add_return_relaxed */
/* atomic_inc_return_relaxed */
#ifndef atomic_inc_return_relaxed
#define atomic_inc_return_relaxed atomic_inc_return
#define atomic_inc_return_acquire atomic_inc_return
#define atomic_inc_return_release atomic_inc_return
#else /* atomic_inc_return_relaxed */
#ifndef atomic_inc_return_acquire
#define atomic_inc_return_acquire(...) \
__atomic_op_acquire(atomic_inc_return, __VA_ARGS__)
#endif
#ifndef atomic_inc_return_release
#define atomic_inc_return_release(...) \
__atomic_op_release(atomic_inc_return, __VA_ARGS__)
#endif
#ifndef atomic_inc_return
#define atomic_inc_return(...) \
__atomic_op_fence(atomic_inc_return, __VA_ARGS__)
#endif
#endif /* atomic_inc_return_relaxed */
/* atomic_sub_return_relaxed */
#ifndef atomic_sub_return_relaxed
#define atomic_sub_return_relaxed atomic_sub_return
......@@ -105,6 +138,30 @@
#endif
#endif /* atomic_sub_return_relaxed */
/* atomic_dec_return_relaxed */
#ifndef atomic_dec_return_relaxed
#define atomic_dec_return_relaxed atomic_dec_return
#define atomic_dec_return_acquire atomic_dec_return
#define atomic_dec_return_release atomic_dec_return
#else /* atomic_dec_return_relaxed */
#ifndef atomic_dec_return_acquire
#define atomic_dec_return_acquire(...) \
__atomic_op_acquire(atomic_dec_return, __VA_ARGS__)
#endif
#ifndef atomic_dec_return_release
#define atomic_dec_return_release(...) \
__atomic_op_release(atomic_dec_return, __VA_ARGS__)
#endif
#ifndef atomic_dec_return
#define atomic_dec_return(...) \
__atomic_op_fence(atomic_dec_return, __VA_ARGS__)
#endif
#endif /* atomic_dec_return_relaxed */
/* atomic_xchg_relaxed */
#ifndef atomic_xchg_relaxed
#define atomic_xchg_relaxed atomic_xchg
......@@ -185,6 +242,31 @@
#endif
#endif /* atomic64_add_return_relaxed */
/* atomic64_inc_return_relaxed */
#ifndef atomic64_inc_return_relaxed
#define atomic64_inc_return_relaxed atomic64_inc_return
#define atomic64_inc_return_acquire atomic64_inc_return
#define atomic64_inc_return_release atomic64_inc_return
#else /* atomic64_inc_return_relaxed */
#ifndef atomic64_inc_return_acquire
#define atomic64_inc_return_acquire(...) \
__atomic_op_acquire(atomic64_inc_return, __VA_ARGS__)
#endif
#ifndef atomic64_inc_return_release
#define atomic64_inc_return_release(...) \
__atomic_op_release(atomic64_inc_return, __VA_ARGS__)
#endif
#ifndef atomic64_inc_return
#define atomic64_inc_return(...) \
__atomic_op_fence(atomic64_inc_return, __VA_ARGS__)
#endif
#endif /* atomic64_inc_return_relaxed */
/* atomic64_sub_return_relaxed */
#ifndef atomic64_sub_return_relaxed
#define atomic64_sub_return_relaxed atomic64_sub_return
......@@ -209,6 +291,30 @@
#endif
#endif /* atomic64_sub_return_relaxed */
/* atomic64_dec_return_relaxed */
#ifndef atomic64_dec_return_relaxed
#define atomic64_dec_return_relaxed atomic64_dec_return
#define atomic64_dec_return_acquire atomic64_dec_return
#define atomic64_dec_return_release atomic64_dec_return
#else /* atomic64_dec_return_relaxed */
#ifndef atomic64_dec_return_acquire
#define atomic64_dec_return_acquire(...) \
__atomic_op_acquire(atomic64_dec_return, __VA_ARGS__)
#endif
#ifndef atomic64_dec_return_release
#define atomic64_dec_return_release(...) \
__atomic_op_release(atomic64_dec_return, __VA_ARGS__)
#endif
#ifndef atomic64_dec_return
#define atomic64_dec_return(...) \
__atomic_op_fence(atomic64_dec_return, __VA_ARGS__)
#endif
#endif /* atomic64_dec_return_relaxed */
/* atomic64_xchg_relaxed */
#ifndef atomic64_xchg_relaxed
#define atomic64_xchg_relaxed atomic64_xchg
......@@ -451,11 +557,19 @@ static inline int atomic_dec_if_positive(atomic_t *v)
}
#endif
#include <asm-generic/atomic-long.h>
#ifdef CONFIG_GENERIC_ATOMIC64
#include <asm-generic/atomic64.h>
#endif
#ifndef atomic64_read_ctrl
static inline long long atomic64_read_ctrl(const atomic64_t *v)
{
long long val = atomic64_read(v);
smp_read_barrier_depends(); /* Enforce control dependency. */
return val;
}
#endif
#ifndef atomic64_andnot
static inline void atomic64_andnot(long long i, atomic64_t *v)
{
......@@ -463,4 +577,6 @@ static inline void atomic64_andnot(long long i, atomic64_t *v)
}
#endif
#include <asm-generic/atomic-long.h>
#endif /* _LINUX_ATOMIC_H */
......@@ -255,9 +255,18 @@ struct futex_hash_bucket {
struct plist_head chain;
} ____cacheline_aligned_in_smp;
static unsigned long __read_mostly futex_hashsize;
/*
* The base of the bucket array and its size are always used together
* (after initialization only in hash_futex()), so ensure that they
* reside in the same cacheline.
*/
static struct {
struct futex_hash_bucket *queues;
unsigned long hashsize;
} __futex_data __read_mostly __aligned(2*sizeof(long));
#define futex_queues (__futex_data.queues)
#define futex_hashsize (__futex_data.hashsize)
static struct futex_hash_bucket *futex_queues;
/*
* Fault injections for futexes.
......
......@@ -67,7 +67,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
node->locked = 0;
node->next = NULL;
prev = xchg(lock, node);
prev = xchg_acquire(lock, node);
if (likely(prev == NULL)) {
/*
* Lock acquired, don't need to set node->locked to 1. Threads
......@@ -98,7 +98,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
/*
* Release the lock by setting it to NULL
*/
if (likely(cmpxchg(lock, node, NULL) == node))
if (likely(cmpxchg_release(lock, node, NULL) == node))
return;
/* Wait until the next pointer is set */
while (!(next = READ_ONCE(node->next)))
......
......@@ -277,7 +277,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
static inline bool mutex_try_to_acquire(struct mutex *lock)
{
return !mutex_is_locked(lock) &&
(atomic_cmpxchg(&lock->count, 1, 0) == 1);
(atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1);
}
/*
......@@ -529,7 +529,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* Once more, try to acquire the lock. Only try-lock the mutex if
* it is unlocked to reduce unnecessary xchg() operations.
*/
if (!mutex_is_locked(lock) && (atomic_xchg(&lock->count, 0) == 1))
if (!mutex_is_locked(lock) &&
(atomic_xchg_acquire(&lock->count, 0) == 1))
goto skip_wait;
debug_mutex_lock_common(lock, &waiter);
......@@ -553,7 +554,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* non-negative in order to avoid unnecessary xchg operations:
*/
if (atomic_read(&lock->count) >= 0 &&
(atomic_xchg(&lock->count, -1) == 1))
(atomic_xchg_acquire(&lock->count, -1) == 1))
break;
/*
......@@ -867,7 +868,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
spin_lock_mutex(&lock->wait_lock, flags);
prev = atomic_xchg(&lock->count, -1);
prev = atomic_xchg_acquire(&lock->count, -1);
if (likely(prev == 1)) {
mutex_set_owner(lock);
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
......
......@@ -50,7 +50,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
for (;;) {
if (atomic_read(&lock->tail) == curr &&
atomic_cmpxchg(&lock->tail, curr, old) == curr) {
atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
/*
* We were the last queued, we moved @lock back. @prev
* will now observe @lock and will complete its
......@@ -92,7 +92,11 @@ bool osq_lock(struct optimistic_spin_queue *lock)
node->next = NULL;
node->cpu = curr;
old = atomic_xchg(&lock->tail, curr);
/*
* ACQUIRE semantics, pairs with corresponding RELEASE
* in unlock() uncontended, or fastpath.
*/
old = atomic_xchg_acquire(&lock->tail, curr);
if (old == OSQ_UNLOCKED_VAL)
return true;
......@@ -184,7 +188,8 @@ void osq_unlock(struct optimistic_spin_queue *lock)
/*
* Fast path for the uncontended case.
*/
if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
if (likely(atomic_cmpxchg_release(&lock->tail, curr,
OSQ_UNLOCKED_VAL) == curr))
return;
/*
......
......@@ -86,7 +86,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
/*
* Put the reader into the wait queue
*/
arch_spin_lock(&lock->lock);
arch_spin_lock(&lock->wait_lock);
/*
* The ACQUIRE semantics of the following spinning code ensure
......@@ -99,7 +99,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
/*
* Signal the next one in queue to become queue head
*/
arch_spin_unlock(&lock->lock);
arch_spin_unlock(&lock->wait_lock);
}
EXPORT_SYMBOL(queued_read_lock_slowpath);
......@@ -112,7 +112,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
u32 cnts;
/* Put the writer into the wait queue */
arch_spin_lock(&lock->lock);
arch_spin_lock(&lock->wait_lock);
/* Try to acquire the lock directly if no reader is present */
if (!atomic_read(&lock->cnts) &&
......@@ -144,6 +144,6 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
cpu_relax_lowlatency();
}
unlock:
arch_spin_unlock(&lock->lock);
arch_spin_unlock(&lock->wait_lock);
}
EXPORT_SYMBOL(queued_write_lock_slowpath);
......@@ -267,7 +267,6 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
}
if (!lp) { /* ONCE */
WRITE_ONCE(pn->state, vcpu_hashed);
lp = pv_hash(lock, pn);
/*
......@@ -275,11 +274,9 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
* when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
* we'll be sure to be able to observe our hash entry.
*
* [S] pn->state
* [S] <hash> [Rmw] l->locked == _Q_SLOW_VAL
* MB RMB
* [RmW] l->locked = _Q_SLOW_VAL [L] <unhash>
* [L] pn->state
*
* Matches the smp_rmb() in __pv_queued_spin_unlock().
*/
......@@ -364,7 +361,6 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* vCPU is harmless other than the additional latency in completing
* the unlock.
*/
if (READ_ONCE(node->state) == vcpu_hashed)
pv_kick(node->cpu);
}
/*
......
......@@ -74,14 +74,23 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
* set up.
*/
#ifndef CONFIG_DEBUG_RT_MUTEXES
# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
# define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
/*
* Callers must hold the ->wait_lock -- which is the whole purpose as we force
* all future threads that attempt to [Rmw] the lock to the slowpath. As such
* relaxed semantics suffice.
*/
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
do {
owner = *p;
} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
} while (cmpxchg_relaxed(p, owner,
owner | RT_MUTEX_HAS_WAITERS) != owner);
}
/*
......@@ -121,11 +130,14 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
* lock(wait_lock);
* acquire(lock);
*/
return rt_mutex_cmpxchg(lock, owner, NULL);
return rt_mutex_cmpxchg_release(lock, owner, NULL);
}
#else
# define rt_mutex_cmpxchg(l,c,n) (0)
# define rt_mutex_cmpxchg_relaxed(l,c,n) (0)
# define rt_mutex_cmpxchg_acquire(l,c,n) (0)
# define rt_mutex_cmpxchg_release(l,c,n) (0)
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
lock->owner = (struct task_struct *)
......@@ -1321,7 +1333,7 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
enum rtmutex_chainwalk chwalk))
{
if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
......@@ -1337,7 +1349,7 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
enum rtmutex_chainwalk chwalk))
{
if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
likely(rt_mutex_cmpxchg(lock, NULL, current))) {
likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
......@@ -1348,7 +1360,7 @@ static inline int
rt_mutex_fasttrylock(struct rt_mutex *lock,
int (*slowfn)(struct rt_mutex *lock))
{
if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 1;
}
......@@ -1362,7 +1374,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
{
WAKE_Q(wake_q);
if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
} else {
......@@ -1484,7 +1496,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wqh)
{
if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
return false;
}
......
......@@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
* to reduce unnecessary expensive cmpxchg() operations.
*/
if (count == RWSEM_WAITING_BIAS &&
cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS,
RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
if (!list_is_singular(&sem->wait_list))
rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
......@@ -285,7 +285,8 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
return false;
old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
old = cmpxchg_acquire(&sem->count, count,
count + RWSEM_ACTIVE_WRITE_BIAS);
if (old == count) {
rwsem_set_owner(sem);
return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment