Commit 24ba3c53 authored by Eric Dumazet's avatar Eric Dumazet Committed by Willy Tarreau

af_unix: fix a fatal race with bit fields

commit 60bc851a upstream.

Using bit fields is dangerous on ppc64/sparc64, as the compiler [1]
uses 64bit instructions to manipulate them.
If the 64bit word includes any atomic_t or spinlock_t, we can lose
critical concurrent changes.

This is happening in af_unix, where unix_sk(sk)->gc_candidate/
gc_maybe_cycle/lock share the same 64bit word.

This leads to fatal deadlock, as one/several cpus spin forever
on a spinlock that will never be available again.

A safer way would be to use a long to store flags.
This way we are sure compiler/arch wont do bad things.

As we own unix_gc_lock spinlock when clearing or setting bits,
we can use the non atomic __set_bit()/__clear_bit().

recursion_level can share the same 64bit location with the spinlock,
as it is set only with this spinlock held.

[1] bug fixed in gcc-4.8.0 :
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52080Reported-by: default avatarAmbrose Feinstein <ambrose@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarBen Hutchings <ben@decadent.org.uk>
(cherry picked from commit 2ee9cbe7)
[wt: adjusted context]
Signed-off-by: default avatarWilly Tarreau <w@1wt.eu>
parent 7e0e67b0
...@@ -55,9 +55,10 @@ struct unix_sock { ...@@ -55,9 +55,10 @@ struct unix_sock {
struct list_head link; struct list_head link;
atomic_long_t inflight; atomic_long_t inflight;
spinlock_t lock; spinlock_t lock;
unsigned int gc_candidate : 1;
unsigned int gc_maybe_cycle : 1;
unsigned char recursion_level; unsigned char recursion_level;
unsigned long gc_flags;
#define UNIX_GC_CANDIDATE 0
#define UNIX_GC_MAYBE_CYCLE 1
wait_queue_head_t peer_wait; wait_queue_head_t peer_wait;
wait_queue_t peer_wake; wait_queue_t peer_wake;
}; };
......
...@@ -195,7 +195,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), ...@@ -195,7 +195,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
* have been added to the queues after * have been added to the queues after
* starting the garbage collection * starting the garbage collection
*/ */
if (u->gc_candidate) { if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
hit = true; hit = true;
func(u); func(u);
} }
...@@ -264,7 +264,7 @@ static void inc_inflight_move_tail(struct unix_sock *u) ...@@ -264,7 +264,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)
* of the list, so that it's checked even if it was already * of the list, so that it's checked even if it was already
* passed over * passed over
*/ */
if (u->gc_maybe_cycle) if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
list_move_tail(&u->link, &gc_candidates); list_move_tail(&u->link, &gc_candidates);
} }
...@@ -325,8 +325,8 @@ void unix_gc(void) ...@@ -325,8 +325,8 @@ void unix_gc(void)
BUG_ON(total_refs < inflight_refs); BUG_ON(total_refs < inflight_refs);
if (total_refs == inflight_refs) { if (total_refs == inflight_refs) {
list_move_tail(&u->link, &gc_candidates); list_move_tail(&u->link, &gc_candidates);
u->gc_candidate = 1; __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
u->gc_maybe_cycle = 1; __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
} }
} }
...@@ -354,7 +354,7 @@ void unix_gc(void) ...@@ -354,7 +354,7 @@ void unix_gc(void)
if (atomic_long_read(&u->inflight) > 0) { if (atomic_long_read(&u->inflight) > 0) {
list_move_tail(&u->link, &not_cycle_list); list_move_tail(&u->link, &not_cycle_list);
u->gc_maybe_cycle = 0; __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
scan_children(&u->sk, inc_inflight_move_tail, NULL); scan_children(&u->sk, inc_inflight_move_tail, NULL);
} }
} }
...@@ -366,7 +366,7 @@ void unix_gc(void) ...@@ -366,7 +366,7 @@ void unix_gc(void)
*/ */
while (!list_empty(&not_cycle_list)) { while (!list_empty(&not_cycle_list)) {
u = list_entry(not_cycle_list.next, struct unix_sock, link); u = list_entry(not_cycle_list.next, struct unix_sock, link);
u->gc_candidate = 0; __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
list_move_tail(&u->link, &gc_inflight_list); list_move_tail(&u->link, &gc_inflight_list);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment