Commit 69d927bb authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

x86/atomic: Fix smp_mb__{before,after}_atomic()

Recent probing at the Linux Kernel Memory Model uncovered a
'surprise'. Strongly ordered architectures where the atomic RmW
primitive implies full memory ordering and
smp_mb__{before,after}_atomic() are a simple barrier() (such as x86)
fail for:

	*x = 1;
	atomic_inc(u);
	smp_mb__after_atomic();
	r0 = *y;

Because, while the atomic_inc() implies memory order, it
(surprisingly) does not provide a compiler barrier. This then allows
the compiler to re-order like so:

	atomic_inc(u);
	*x = 1;
	smp_mb__after_atomic();
	r0 = *y;

Which the CPU is then allowed to re-order (under TSO rules) like:

	atomic_inc(u);
	r0 = *y;
	*x = 1;

And this very much was not intended. Therefore strengthen the atomic
RmW ops to include a compiler barrier.

NOTE: atomic_{or,and,xor} and the bitops already had the compiler
barrier.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent dd471efe
...@@ -196,6 +196,9 @@ These helper barriers exist because architectures have varying implicit ...@@ -196,6 +196,9 @@ These helper barriers exist because architectures have varying implicit
ordering on their SMP atomic primitives. For example our TSO architectures ordering on their SMP atomic primitives. For example our TSO architectures
provide full ordered atomics and these barriers are no-ops. provide full ordered atomics and these barriers are no-ops.
NOTE: when the atomic RmW ops are fully ordered, they should also imply a
compiler barrier.
Thus: Thus:
atomic_fetch_add(); atomic_fetch_add();
......
...@@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v) ...@@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
{ {
asm volatile(LOCK_PREFIX "addl %1,%0" asm volatile(LOCK_PREFIX "addl %1,%0"
: "+m" (v->counter) : "+m" (v->counter)
: "ir" (i)); : "ir" (i) : "memory");
} }
/** /**
...@@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v) ...@@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{ {
asm volatile(LOCK_PREFIX "subl %1,%0" asm volatile(LOCK_PREFIX "subl %1,%0"
: "+m" (v->counter) : "+m" (v->counter)
: "ir" (i)); : "ir" (i) : "memory");
} }
/** /**
...@@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) ...@@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
static __always_inline void arch_atomic_inc(atomic_t *v) static __always_inline void arch_atomic_inc(atomic_t *v)
{ {
asm volatile(LOCK_PREFIX "incl %0" asm volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter)); : "+m" (v->counter) :: "memory");
} }
#define arch_atomic_inc arch_atomic_inc #define arch_atomic_inc arch_atomic_inc
...@@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v) ...@@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
static __always_inline void arch_atomic_dec(atomic_t *v) static __always_inline void arch_atomic_dec(atomic_t *v)
{ {
asm volatile(LOCK_PREFIX "decl %0" asm volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter)); : "+m" (v->counter) :: "memory");
} }
#define arch_atomic_dec arch_atomic_dec #define arch_atomic_dec arch_atomic_dec
......
...@@ -45,7 +45,7 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) ...@@ -45,7 +45,7 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{ {
asm volatile(LOCK_PREFIX "addq %1,%0" asm volatile(LOCK_PREFIX "addq %1,%0"
: "=m" (v->counter) : "=m" (v->counter)
: "er" (i), "m" (v->counter)); : "er" (i), "m" (v->counter) : "memory");
} }
/** /**
...@@ -59,7 +59,7 @@ static inline void arch_atomic64_sub(s64 i, atomic64_t *v) ...@@ -59,7 +59,7 @@ static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{ {
asm volatile(LOCK_PREFIX "subq %1,%0" asm volatile(LOCK_PREFIX "subq %1,%0"
: "=m" (v->counter) : "=m" (v->counter)
: "er" (i), "m" (v->counter)); : "er" (i), "m" (v->counter) : "memory");
} }
/** /**
...@@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) ...@@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
{ {
asm volatile(LOCK_PREFIX "incq %0" asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter) : "=m" (v->counter)
: "m" (v->counter)); : "m" (v->counter) : "memory");
} }
#define arch_atomic64_inc arch_atomic64_inc #define arch_atomic64_inc arch_atomic64_inc
...@@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) ...@@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
{ {
asm volatile(LOCK_PREFIX "decq %0" asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter) : "=m" (v->counter)
: "m" (v->counter)); : "m" (v->counter) : "memory");
} }
#define arch_atomic64_dec arch_atomic64_dec #define arch_atomic64_dec arch_atomic64_dec
......
...@@ -80,8 +80,8 @@ do { \ ...@@ -80,8 +80,8 @@ do { \
}) })
/* Atomic operations are already serializing on x86 */ /* Atomic operations are already serializing on x86 */
#define __smp_mb__before_atomic() barrier() #define __smp_mb__before_atomic() do { } while (0)
#define __smp_mb__after_atomic() barrier() #define __smp_mb__after_atomic() do { } while (0)
#include <asm-generic/barrier.h> #include <asm-generic/barrier.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment