Merge bk://kernel.bkbits.net/davem/sparc-2.6

into ppc970.osdl.org:/home/torvalds/v2.6/linux

Merge bk://kernel.bkbits.net/davem/sparc-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
747df425 · Linus Torvalds · 72aa1a62 · 190bd54f · 747df425 · 747df425
Commit 747df425 authored Feb 06, 2005 by Linus Torvalds
4 changed files
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -4,8 +4,8 @@
 			  David S. Miller	 
 	This document is intended to serve as a guide to Linux port
-maintainers on how to implement atomic counter and bitops interfaces
+maintainers on how to implement atomic counter, bitops, and spinlock
-properly.
+interfaces properly.
 	The atomic_t type should be defined as a signed integer.
 Also, it should be made opaque such that any kind of cast to a normal
@@ -242,6 +242,19 @@ happen.  Specifically, in the above case the atomic_dec_and_test()
 counter decrement would not become globally visible until the
 obj->active update does.
+As a historical note, 32-bit Sparc used to only allow usage of
+24-bits of it's atomic_t type.  This was because it used 8 bits
+as a spinlock for SMP safety.  Sparc32 lacked a "compare and swap"
+type instruction.  However, 32-bit Sparc has since been moved over
+to a "hash table of spinlocks" scheme, that allows the full 32-bit
+counter to be realized.  Essentially, an array of spinlocks are
+indexed into based upon the address of the atomic_t being operated
+on, and that lock protects the atomic operation.  Parisc uses the
+same scheme.
+Another note is that the atomic_t operations returning values are
+extremely slow on an old 386.
 We will now cover the atomic bitmask operations.  You will find that
 their SMP and memory barrier semantics are similar in shape and scope
 to the atomic_t ops above.
@@ -345,3 +358,99 @@ except that two underscores are prefixed to the interface name.
 These non-atomic variants also do not require any special memory
 barrier semantics.
+The routines xchg() and cmpxchg() need the same exact memory barriers
+as the atomic and bit operations returning values.
+Spinlocks and rwlocks have memory barrier expectations as well.
+The rule to follow is simple:
+1) When acquiring a lock, the implementation must make it globally
+   visible before any subsequent memory operation.
+2) When releasing a lock, the implementation must make it such that
+   all previous memory operations are globally visible before the
+   lock release.
+Which finally brings us to _atomic_dec_and_lock().  There is an
+architecture-neutral version implemented in lib/dec_and_lock.c,
+but most platforms will wish to optimize this in assembler.
+	int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
+Atomically decrement the given counter, and if will drop to zero
+atomically acquire the given spinlock and perform the decrement
+of the counter to zero.  If it does not drop to zero, do nothing
+with the spinlock.
+It is actually pretty simple to get the memory barrier correct.
+Simply satisfy the spinlock grab requirements, which is make
+sure the spinlock operation is globally visible before any
+subsequent memory operation.
+We can demonstrate this operation more clearly if we define
+an abstract atomic operation:
+	long cas(long *mem, long old, long new);
+"cas" stands for "compare and swap".  It atomically:
+1) Compares "old" with the value currently at "mem".
+2) If they are equal, "new" is written to "mem".
+3) Regardless, the current value at "mem" is returned.
+As an example usage, here is what an atomic counter update
+might look like:
+void example_atomic_inc(long *counter)
+{
+	long old, new, ret;
+	while (1) {
+		old = *counter;
+		new = old + 1;
+		ret = cas(counter, old, new);
+		if (ret == old)
+			break;
+	}
+}
+Let's use cas() in order to build a pseudo-C atomic_dec_and_lock():
+int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+	long old, new, ret;
+	int went_to_zero;
+	went_to_zero = 0;
+	while (1) {
+		old = atomic_read(atomic);
+		new = old - 1;
+		if (new == 0) {
+			went_to_zero = 1;
+			spin_lock(lock);
+		}
+		ret = cas(atomic, old, new);
+		if (ret == old)
+			break;
+		if (went_to_zero) {
+			spin_unlock(lock);
+			went_to_zero = 0;
+		}
+	}
+	return went_to_zero;
+}
+Now, as far as memory barriers go, as long as spin_lock()
+strictly orders all subsequent memory operations (including
+the cas()) with respect to itself, things will be fine.
+Said another way, _atomic_dec_and_lock() must guarentee that
+a counter dropping to zero is never made visible before the
+spinlock being acquired.
+Note that this also means that for the case where the counter
+is not dropping to zero, there are no memory ordering
+requirements.
--- a/arch/sparc64/lib/debuglocks.c
+++ b/arch/sparc64/lib/debuglocks.c
@@ -172,6 +172,7 @@ void _do_read_unlock (rwlock_t *rw, char *str)
 runlock_again:
 	/* Spin trying to decrement the counter using casx.  */
 	__asm__ __volatile__(
+"	membar	#StoreLoad | #LoadLoad\n"
 "	ldx	[%0], %%g5\n"
 "	sub	%%g5, 1, %%g7\n"
 "	casx	[%0], %%g5, %%g7\n"
@@ -290,6 +291,7 @@ void _do_write_unlock(rwlock_t *rw)
 	current->thread.smp_lock_count--;
 wlock_again:
 	__asm__ __volatile__(
+"	membar	#StoreLoad | #LoadLoad\n"
 "	mov	1, %%g3\n"
 "	sllx	%%g3, 63, %%g3\n"
 "	ldx	[%0], %%g5\n"

--- a/include/asm-sparc64/spinlock.h
+++ b/include/asm-sparc64/spinlock.h
@@ -171,12 +171,13 @@ static void inline __read_unlock(rwlock_t *lock)
 	unsigned long tmp1, tmp2;
 	__asm__ __volatile__(
+"	membar	#StoreLoad | #LoadLoad\n"
 "1:	lduw	[%2], %0\n"
 "	sub	%0, 1, %1\n"
 "	cas	[%2], %0, %1\n"
 "	cmp	%0, %1\n"
 "	bne,pn	%%xcc, 1b\n"
-"	 membar	#StoreLoad | #StoreStore"
+"	 nop"
 	: "=&r" (tmp1), "=&r" (tmp2)
 	: "r" (lock)
 	: "memory");

--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -229,6 +229,7 @@ do {	if (test_thread_flag(TIF_PERFCTR)) {				\
 static __inline__ unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
 {
 	__asm__ __volatile__(
+"	membar		#StoreLoad | #LoadLoad\n"
 "	mov		%0, %%g5\n"
 "1:	lduw		[%2], %%g7\n"
 "	cas		[%2], %%g7, %0\n"
@@ -245,6 +246,7 @@ static __inline__ unsigned long xchg32(__volatile__ unsigned int *m, unsigned in
 static __inline__ unsigned long xchg64(__volatile__ unsigned long *m, unsigned long val)
 {
 	__asm__ __volatile__(
+"	membar		#StoreLoad | #LoadLoad\n"
 "	mov		%0, %%g5\n"
 "1:	ldx		[%2], %%g7\n"
 "	casx		[%2], %%g7, %0\n"
@@ -289,7 +291,8 @@ extern void die_if_kernel(char *str, struct pt_regs *regs) __attribute__ ((noret
 static __inline__ unsigned long
 __cmpxchg_u32(volatile int *m, int old, int new)
 {
-	__asm__ __volatile__("cas [%2], %3, %0\n\t"
+	__asm__ __volatile__("membar #StoreLoad | #LoadLoad\n"
+			     "cas [%2], %3, %0\n\t"
 			     "membar #StoreLoad | #StoreStore"
 			     : "=&r" (new)
 			     : "0" (new), "r" (m), "r" (old)
@@ -301,7 +304,8 @@ __cmpxchg_u32(volatile int *m, int old, int new)
 static __inline__ unsigned long
 __cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new)
 {
-	__asm__ __volatile__("casx [%2], %3, %0\n\t"
+	__asm__ __volatile__("membar #StoreLoad | #LoadLoad\n"
+			     "casx [%2], %3, %0\n\t"
 			     "membar #StoreLoad | #StoreStore"
 			     : "=&r" (new)
 			     : "0" (new), "r" (m), "r" (old)