Commit f064af1e authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: fix a lockdep splat

We have for each socket :

One spinlock (sk_slock.slock)
One rwlock (sk_callback_lock)

Possible scenarios are :

(A) (this is used in net/sunrpc/xprtsock.c)
read_lock(&sk->sk_callback_lock) (without blocking BH)
<BH>
spin_lock(&sk->sk_slock.slock);
...
read_lock(&sk->sk_callback_lock);
...

(B)
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)

(C)
spin_lock_bh(&sk->sk_slock)
...
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)
spin_unlock_bh(&sk->sk_slock)

This (C) case conflicts with (A) :

CPU1 [A]                         CPU2 [C]
read_lock(callback_lock)
<BH>                             spin_lock_bh(slock)
<wait to spin_lock(slock)>
                                 <wait to write_lock_bh(callback_lock)>

We have one problematic (C) use case in inet_csk_listen_stop() :

local_bh_disable();
bh_lock_sock(child); // spin_lock_bh(&sk->sk_slock)
WARN_ON(sock_owned_by_user(child));
...
sock_orphan(child); // write_lock_bh(&sk->sk_callback_lock)

lockdep is not happy with this, as reported by Tetsuo Handa

It seems only way to deal with this is to use read_lock_bh(callbacklock)
everywhere.

Thanks to Jarek for pointing a bug in my first attempt and suggesting
this solution.
Reported-by: default avatarTetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: default avatarTetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
Tested-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 605c82ba
...@@ -1351,9 +1351,9 @@ int sock_i_uid(struct sock *sk) ...@@ -1351,9 +1351,9 @@ int sock_i_uid(struct sock *sk)
{ {
int uid; int uid;
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
return uid; return uid;
} }
EXPORT_SYMBOL(sock_i_uid); EXPORT_SYMBOL(sock_i_uid);
...@@ -1362,9 +1362,9 @@ unsigned long sock_i_ino(struct sock *sk) ...@@ -1362,9 +1362,9 @@ unsigned long sock_i_ino(struct sock *sk)
{ {
unsigned long ino; unsigned long ino;
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
return ino; return ino;
} }
EXPORT_SYMBOL(sock_i_ino); EXPORT_SYMBOL(sock_i_ino);
......
...@@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) ...@@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk)
struct rds_connection *conn; struct rds_connection *conn;
struct rds_tcp_connection *tc; struct rds_tcp_connection *tc;
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
conn = sk->sk_user_data; conn = sk->sk_user_data;
if (conn == NULL) { if (conn == NULL) {
state_change = sk->sk_state_change; state_change = sk->sk_state_change;
...@@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk) ...@@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk)
break; break;
} }
out: out:
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
state_change(sk); state_change(sk);
} }
......
...@@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) ...@@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
rdsdebug("listen data ready sk %p\n", sk); rdsdebug("listen data ready sk %p\n", sk);
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
ready = sk->sk_user_data; ready = sk->sk_user_data;
if (ready == NULL) { /* check for teardown race */ if (ready == NULL) { /* check for teardown race */
ready = sk->sk_data_ready; ready = sk->sk_data_ready;
...@@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) ...@@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
queue_work(rds_wq, &rds_tcp_listen_work); queue_work(rds_wq, &rds_tcp_listen_work);
out: out:
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
ready(sk, bytes); ready(sk, bytes);
} }
......
...@@ -324,7 +324,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) ...@@ -324,7 +324,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
rdsdebug("data ready sk %p bytes %d\n", sk, bytes); rdsdebug("data ready sk %p bytes %d\n", sk, bytes);
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
conn = sk->sk_user_data; conn = sk->sk_user_data;
if (conn == NULL) { /* check for teardown race */ if (conn == NULL) { /* check for teardown race */
ready = sk->sk_data_ready; ready = sk->sk_data_ready;
...@@ -338,7 +338,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) ...@@ -338,7 +338,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM) if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM)
queue_delayed_work(rds_wq, &conn->c_recv_w, 0); queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
out: out:
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
ready(sk, bytes); ready(sk, bytes);
} }
......
...@@ -224,7 +224,7 @@ void rds_tcp_write_space(struct sock *sk) ...@@ -224,7 +224,7 @@ void rds_tcp_write_space(struct sock *sk)
struct rds_connection *conn; struct rds_connection *conn;
struct rds_tcp_connection *tc; struct rds_tcp_connection *tc;
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
conn = sk->sk_user_data; conn = sk->sk_user_data;
if (conn == NULL) { if (conn == NULL) {
write_space = sk->sk_write_space; write_space = sk->sk_write_space;
...@@ -244,7 +244,7 @@ void rds_tcp_write_space(struct sock *sk) ...@@ -244,7 +244,7 @@ void rds_tcp_write_space(struct sock *sk)
queue_delayed_work(rds_wq, &conn->c_send_w, 0); queue_delayed_work(rds_wq, &conn->c_send_w, 0);
out: out:
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
/* /*
* write_space is only called when data leaves tcp's send queue if * write_space is only called when data leaves tcp's send queue if
......
...@@ -800,7 +800,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) ...@@ -800,7 +800,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
u32 _xid; u32 _xid;
__be32 *xp; __be32 *xp;
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
dprintk("RPC: xs_udp_data_ready...\n"); dprintk("RPC: xs_udp_data_ready...\n");
if (!(xprt = xprt_from_sock(sk))) if (!(xprt = xprt_from_sock(sk)))
goto out; goto out;
...@@ -852,7 +852,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) ...@@ -852,7 +852,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
dropit: dropit:
skb_free_datagram(sk, skb); skb_free_datagram(sk, skb);
out: out:
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
} }
static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
...@@ -1229,7 +1229,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) ...@@ -1229,7 +1229,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
dprintk("RPC: xs_tcp_data_ready...\n"); dprintk("RPC: xs_tcp_data_ready...\n");
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk))) if (!(xprt = xprt_from_sock(sk)))
goto out; goto out;
if (xprt->shutdown) if (xprt->shutdown)
...@@ -1248,7 +1248,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) ...@@ -1248,7 +1248,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
} while (read > 0); } while (read > 0);
out: out:
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
} }
/* /*
...@@ -1301,7 +1301,7 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1301,7 +1301,7 @@ static void xs_tcp_state_change(struct sock *sk)
{ {
struct rpc_xprt *xprt; struct rpc_xprt *xprt;
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk))) if (!(xprt = xprt_from_sock(sk)))
goto out; goto out;
dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); dprintk("RPC: xs_tcp_state_change client %p...\n", xprt);
...@@ -1313,7 +1313,7 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1313,7 +1313,7 @@ static void xs_tcp_state_change(struct sock *sk)
switch (sk->sk_state) { switch (sk->sk_state) {
case TCP_ESTABLISHED: case TCP_ESTABLISHED:
spin_lock_bh(&xprt->transport_lock); spin_lock(&xprt->transport_lock);
if (!xprt_test_and_set_connected(xprt)) { if (!xprt_test_and_set_connected(xprt)) {
struct sock_xprt *transport = container_of(xprt, struct sock_xprt *transport = container_of(xprt,
struct sock_xprt, xprt); struct sock_xprt, xprt);
...@@ -1327,7 +1327,7 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1327,7 +1327,7 @@ static void xs_tcp_state_change(struct sock *sk)
xprt_wake_pending_tasks(xprt, -EAGAIN); xprt_wake_pending_tasks(xprt, -EAGAIN);
} }
spin_unlock_bh(&xprt->transport_lock); spin_unlock(&xprt->transport_lock);
break; break;
case TCP_FIN_WAIT1: case TCP_FIN_WAIT1:
/* The client initiated a shutdown of the socket */ /* The client initiated a shutdown of the socket */
...@@ -1365,7 +1365,7 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1365,7 +1365,7 @@ static void xs_tcp_state_change(struct sock *sk)
xs_sock_mark_closed(xprt); xs_sock_mark_closed(xprt);
} }
out: out:
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
} }
/** /**
...@@ -1376,7 +1376,7 @@ static void xs_error_report(struct sock *sk) ...@@ -1376,7 +1376,7 @@ static void xs_error_report(struct sock *sk)
{ {
struct rpc_xprt *xprt; struct rpc_xprt *xprt;
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk))) if (!(xprt = xprt_from_sock(sk)))
goto out; goto out;
dprintk("RPC: %s client %p...\n" dprintk("RPC: %s client %p...\n"
...@@ -1384,7 +1384,7 @@ static void xs_error_report(struct sock *sk) ...@@ -1384,7 +1384,7 @@ static void xs_error_report(struct sock *sk)
__func__, xprt, sk->sk_err); __func__, xprt, sk->sk_err);
xprt_wake_pending_tasks(xprt, -EAGAIN); xprt_wake_pending_tasks(xprt, -EAGAIN);
out: out:
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
} }
static void xs_write_space(struct sock *sk) static void xs_write_space(struct sock *sk)
...@@ -1416,13 +1416,13 @@ static void xs_write_space(struct sock *sk) ...@@ -1416,13 +1416,13 @@ static void xs_write_space(struct sock *sk)
*/ */
static void xs_udp_write_space(struct sock *sk) static void xs_udp_write_space(struct sock *sk)
{ {
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
/* from net/core/sock.c:sock_def_write_space */ /* from net/core/sock.c:sock_def_write_space */
if (sock_writeable(sk)) if (sock_writeable(sk))
xs_write_space(sk); xs_write_space(sk);
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
} }
/** /**
...@@ -1437,13 +1437,13 @@ static void xs_udp_write_space(struct sock *sk) ...@@ -1437,13 +1437,13 @@ static void xs_udp_write_space(struct sock *sk)
*/ */
static void xs_tcp_write_space(struct sock *sk) static void xs_tcp_write_space(struct sock *sk)
{ {
read_lock(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
/* from net/core/stream.c:sk_stream_write_space */ /* from net/core/stream.c:sk_stream_write_space */
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
xs_write_space(sk); xs_write_space(sk);
read_unlock(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
} }
static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment