Commit 68bf33f4 authored by David S. Miller's avatar David S. Miller

Merge branch 'rds-tcp-netns-delete-related-fixes'

Sowmini Varadhan says:

====================
rds-tcp netns delete related fixes

Patchset contains cleanup and bug fixes. Patch 1 is the removal
of some redundant code/functions. Patch 2 and 3 are fixes for
corner cases identified by syzkaller. I've not been able to
reproduce the actual use-after-free race flagged in the syzkaller
reports, thus these fixes are based on code inspection plus
manual testing to make sure the modified code paths are executed
without problems in the commonly encountered timing cases.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4c94cc2d f10b4cff
...@@ -366,6 +366,8 @@ void rds_conn_shutdown(struct rds_conn_path *cp) ...@@ -366,6 +366,8 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
* to the conn hash, so we never trigger a reconnect on this * to the conn hash, so we never trigger a reconnect on this
* conn - the reconnect is always triggered by the active peer. */ * conn - the reconnect is always triggered by the active peer. */
cancel_delayed_work_sync(&cp->cp_conn_w); cancel_delayed_work_sync(&cp->cp_conn_w);
if (conn->c_destroy_in_prog)
return;
rcu_read_lock(); rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node)) { if (!hlist_unhashed(&conn->c_hash_node)) {
rcu_read_unlock(); rcu_read_unlock();
...@@ -445,7 +447,6 @@ void rds_conn_destroy(struct rds_connection *conn) ...@@ -445,7 +447,6 @@ void rds_conn_destroy(struct rds_connection *conn)
*/ */
rds_cong_remove_conn(conn); rds_cong_remove_conn(conn);
put_net(conn->c_net);
kfree(conn->c_path); kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn); kmem_cache_free(rds_conn_slab, conn);
......
...@@ -150,7 +150,7 @@ struct rds_connection { ...@@ -150,7 +150,7 @@ struct rds_connection {
/* Protocol version */ /* Protocol version */
unsigned int c_version; unsigned int c_version;
struct net *c_net; possible_net_t c_net;
struct list_head c_map_item; struct list_head c_map_item;
unsigned long c_map_queued; unsigned long c_map_queued;
...@@ -165,13 +165,13 @@ struct rds_connection { ...@@ -165,13 +165,13 @@ struct rds_connection {
static inline static inline
struct net *rds_conn_net(struct rds_connection *conn) struct net *rds_conn_net(struct rds_connection *conn)
{ {
return conn->c_net; return read_pnet(&conn->c_net);
} }
static inline static inline
void rds_conn_net_set(struct rds_connection *conn, struct net *net) void rds_conn_net_set(struct rds_connection *conn, struct net *net)
{ {
conn->c_net = get_net(net); write_pnet(&conn->c_net, net);
} }
#define RDS_FLAG_CONG_BITMAP 0x01 #define RDS_FLAG_CONG_BITMAP 0x01
......
...@@ -306,7 +306,8 @@ static void rds_tcp_conn_free(void *arg) ...@@ -306,7 +306,8 @@ static void rds_tcp_conn_free(void *arg)
rdsdebug("freeing tc %p\n", tc); rdsdebug("freeing tc %p\n", tc);
spin_lock_irqsave(&rds_tcp_conn_lock, flags); spin_lock_irqsave(&rds_tcp_conn_lock, flags);
list_del(&tc->t_tcp_node); if (!tc->t_tcp_node_detached)
list_del(&tc->t_tcp_node);
spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
kmem_cache_free(rds_tcp_conn_slab, tc); kmem_cache_free(rds_tcp_conn_slab, tc);
...@@ -495,27 +496,6 @@ static struct pernet_operations rds_tcp_net_ops = { ...@@ -495,27 +496,6 @@ static struct pernet_operations rds_tcp_net_ops = {
.size = sizeof(struct rds_tcp_net), .size = sizeof(struct rds_tcp_net),
}; };
/* explicitly send a RST on each socket, thereby releasing any socket refcnts
* that may otherwise hold up netns deletion.
*/
static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
{
struct rds_conn_path *cp;
struct rds_tcp_connection *tc;
int i;
struct sock *sk;
for (i = 0; i < RDS_MPATH_WORKERS; i++) {
cp = &conn->c_path[i];
tc = cp->cp_transport_data;
if (!tc->t_sock)
continue;
sk = tc->t_sock->sk;
sk->sk_prot->disconnect(sk, 0);
tcp_done(sk);
}
}
static void rds_tcp_kill_sock(struct net *net) static void rds_tcp_kill_sock(struct net *net)
{ {
struct rds_tcp_connection *tc, *_tc; struct rds_tcp_connection *tc, *_tc;
...@@ -527,18 +507,20 @@ static void rds_tcp_kill_sock(struct net *net) ...@@ -527,18 +507,20 @@ static void rds_tcp_kill_sock(struct net *net)
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock); spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = tc->t_cpath->cp_conn->c_net; struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock) if (net != c_net || !tc->t_sock)
continue; continue;
if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
list_move_tail(&tc->t_tcp_node, &tmp_list); list_move_tail(&tc->t_tcp_node, &tmp_list);
} else {
list_del(&tc->t_tcp_node);
tc->t_tcp_node_detached = true;
}
} }
spin_unlock_irq(&rds_tcp_conn_lock); spin_unlock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
rds_conn_destroy(tc->t_cpath->cp_conn); rds_conn_destroy(tc->t_cpath->cp_conn);
}
} }
void *rds_tcp_listen_sock_def_readable(struct net *net) void *rds_tcp_listen_sock_def_readable(struct net *net)
...@@ -586,7 +568,7 @@ static void rds_tcp_sysctl_reset(struct net *net) ...@@ -586,7 +568,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_lock_irq(&rds_tcp_conn_lock); spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = tc->t_cpath->cp_conn->c_net; struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock) if (net != c_net || !tc->t_sock)
continue; continue;
......
...@@ -12,6 +12,7 @@ struct rds_tcp_incoming { ...@@ -12,6 +12,7 @@ struct rds_tcp_incoming {
struct rds_tcp_connection { struct rds_tcp_connection {
struct list_head t_tcp_node; struct list_head t_tcp_node;
bool t_tcp_node_detached;
struct rds_conn_path *t_cpath; struct rds_conn_path *t_cpath;
/* t_conn_path_lock synchronizes the connection establishment between /* t_conn_path_lock synchronizes the connection establishment between
* rds_tcp_accept_one and rds_tcp_conn_path_connect * rds_tcp_accept_one and rds_tcp_conn_path_connect
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment