Commit 6997fbd7 authored by Tetsuo Handa's avatar Tetsuo Handa Committed by Jakub Kicinski

net: rds: use maybe_get_net() when acquiring refcount on TCP sockets

Eric Dumazet is reporting addition on 0 problem at rds_tcp_tune(), for
delayed works queued in rds_wq might be invoked after a net namespace's
refcount already reached 0.

Since rds_tcp_exit_net() from cleanup_net() calls flush_workqueue(rds_wq),
it is guaranteed that we can instead use maybe_get_net() from delayed work
functions until rds_tcp_exit_net() returns.

Note that I'm not convinced that all works which might access a net
namespace are already queued in rds_wq by the moment rds_tcp_exit_net()
calls flush_workqueue(rds_wq). If some race is there, rds_tcp_exit_net()
will fail to wait for work functions, and kmem_cache_free() could be
called from net_free() before maybe_get_net() is called from
rds_tcp_tune().
Reported-by: default avatarEric Dumazet <edumazet@google.com>
Fixes: 3a58f13a ("net: rds: acquire refcount on TCP sockets")
Signed-off-by: default avatarTetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/41d09faf-bc78-1a87-dfd1-c6d1b5984b61@I-love.SAKURA.ne.jpSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 68533eb1
...@@ -487,11 +487,11 @@ struct rds_tcp_net { ...@@ -487,11 +487,11 @@ struct rds_tcp_net {
/* All module specific customizations to the RDS-TCP socket should be done in /* All module specific customizations to the RDS-TCP socket should be done in
* rds_tcp_tune() and applied after socket creation. * rds_tcp_tune() and applied after socket creation.
*/ */
void rds_tcp_tune(struct socket *sock) bool rds_tcp_tune(struct socket *sock)
{ {
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); struct rds_tcp_net *rtn;
tcp_sock_set_nodelay(sock->sk); tcp_sock_set_nodelay(sock->sk);
lock_sock(sk); lock_sock(sk);
...@@ -499,10 +499,15 @@ void rds_tcp_tune(struct socket *sock) ...@@ -499,10 +499,15 @@ void rds_tcp_tune(struct socket *sock)
* a process which created this net namespace terminated. * a process which created this net namespace terminated.
*/ */
if (!sk->sk_net_refcnt) { if (!sk->sk_net_refcnt) {
if (!maybe_get_net(net)) {
release_sock(sk);
return false;
}
sk->sk_net_refcnt = 1; sk->sk_net_refcnt = 1;
get_net_track(net, &sk->ns_tracker, GFP_KERNEL); netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1); sock_inuse_add(net, 1);
} }
rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) { if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size; sk->sk_sndbuf = rtn->sndbuf_size;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
...@@ -512,6 +517,7 @@ void rds_tcp_tune(struct socket *sock) ...@@ -512,6 +517,7 @@ void rds_tcp_tune(struct socket *sock)
sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
} }
release_sock(sk); release_sock(sk);
return true;
} }
static void rds_tcp_accept_worker(struct work_struct *work) static void rds_tcp_accept_worker(struct work_struct *work)
......
...@@ -49,7 +49,7 @@ struct rds_tcp_statistics { ...@@ -49,7 +49,7 @@ struct rds_tcp_statistics {
}; };
/* tcp.c */ /* tcp.c */
void rds_tcp_tune(struct socket *sock); bool rds_tcp_tune(struct socket *sock);
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock, void rds_tcp_restore_callbacks(struct socket *sock,
......
...@@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) ...@@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
if (ret < 0) if (ret < 0)
goto out; goto out;
rds_tcp_tune(sock); if (!rds_tcp_tune(sock)) {
ret = -EINVAL;
goto out;
}
if (isv6) { if (isv6) {
sin6.sin6_family = AF_INET6; sin6.sin6_family = AF_INET6;
......
...@@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock) ...@@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock)
__module_get(new_sock->ops->owner); __module_get(new_sock->ops->owner);
rds_tcp_keepalive(new_sock); rds_tcp_keepalive(new_sock);
rds_tcp_tune(new_sock); if (!rds_tcp_tune(new_sock)) {
ret = -EINVAL;
goto out;
}
inet = inet_sk(new_sock->sk); inet = inet_sk(new_sock->sk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment