Commit 0b943d90 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-fix-races-on-accept'

Paolo Abeni says:

====================
mptcp: fix races on accept()

This series includes some fixes for accept() races which may cause inconsistent
MPTCP socket status and oops. Please see the individual patches for the
technical details.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bd019427 fca5c82c
...@@ -1332,7 +1332,9 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) ...@@ -1332,7 +1332,9 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
} }
#endif #endif
struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) struct sock *mptcp_sk_clone(const struct sock *sk,
const struct tcp_options_received *opt_rx,
struct request_sock *req)
{ {
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
...@@ -1370,14 +1372,15 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) ...@@ -1370,14 +1372,15 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
msk->write_seq = subflow_req->idsn + 1; msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq); atomic64_set(&msk->snd_una, msk->write_seq);
if (subflow_req->remote_key_valid) { if (opt_rx->mptcp.mp_capable) {
msk->can_ack = true; msk->can_ack = true;
msk->remote_key = subflow_req->remote_key; msk->remote_key = opt_rx->mptcp.sndr_key;
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++; ack_seq++;
msk->ack_seq = ack_seq; msk->ack_seq = ack_seq;
} }
sock_reset_flag(nsk, SOCK_RCU_FREE);
/* will be fully established after successful MPC subflow creation */ /* will be fully established after successful MPC subflow creation */
inet_sk_state_store(nsk, TCP_SYN_RECV); inet_sk_state_store(nsk, TCP_SYN_RECV);
bh_unlock_sock(nsk); bh_unlock_sock(nsk);
...@@ -1779,6 +1782,8 @@ static int mptcp_listen(struct socket *sock, int backlog) ...@@ -1779,6 +1782,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
goto unlock; goto unlock;
} }
sock_set_flag(sock->sk, SOCK_RCU_FREE);
err = ssock->ops->listen(ssock, backlog); err = ssock->ops->listen(ssock, backlog);
inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
if (!err) if (!err)
......
...@@ -206,12 +206,10 @@ struct mptcp_subflow_request_sock { ...@@ -206,12 +206,10 @@ struct mptcp_subflow_request_sock {
struct tcp_request_sock sk; struct tcp_request_sock sk;
u16 mp_capable : 1, u16 mp_capable : 1,
mp_join : 1, mp_join : 1,
backup : 1, backup : 1;
remote_key_valid : 1;
u8 local_id; u8 local_id;
u8 remote_id; u8 remote_id;
u64 local_key; u64 local_key;
u64 remote_key;
u64 idsn; u64 idsn;
u32 token; u32 token;
u32 ssn_offset; u32 ssn_offset;
...@@ -332,7 +330,9 @@ void mptcp_proto_init(void); ...@@ -332,7 +330,9 @@ void mptcp_proto_init(void);
int mptcp_proto_v6_init(void); int mptcp_proto_v6_init(void);
#endif #endif
struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req); struct sock *mptcp_sk_clone(const struct sock *sk,
const struct tcp_options_received *opt_rx,
struct request_sock *req);
void mptcp_get_options(const struct sk_buff *skb, void mptcp_get_options(const struct sk_buff *skb,
struct tcp_options_received *opt_rx); struct tcp_options_received *opt_rx);
......
...@@ -133,7 +133,6 @@ static void subflow_init_req(struct request_sock *req, ...@@ -133,7 +133,6 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->mp_capable = 0; subflow_req->mp_capable = 0;
subflow_req->mp_join = 0; subflow_req->mp_join = 0;
subflow_req->remote_key_valid = 0;
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
...@@ -376,6 +375,17 @@ static void mptcp_force_close(struct sock *sk) ...@@ -376,6 +375,17 @@ static void mptcp_force_close(struct sock *sk)
sk_common_release(sk); sk_common_release(sk);
} }
static void subflow_ulp_fallback(struct sock *sk,
struct mptcp_subflow_context *old_ctx)
{
struct inet_connection_sock *icsk = inet_csk(sk);
mptcp_subflow_tcp_fallback(sk, old_ctx);
icsk->icsk_ulp_ops = NULL;
rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
tcp_sk(sk)->is_mptcp = 0;
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk, static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
struct request_sock *req, struct request_sock *req,
...@@ -388,10 +398,12 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -388,10 +398,12 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct tcp_options_received opt_rx; struct tcp_options_received opt_rx;
bool fallback_is_fatal = false; bool fallback_is_fatal = false;
struct sock *new_msk = NULL; struct sock *new_msk = NULL;
bool fallback = false;
struct sock *child; struct sock *child;
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
opt_rx.mptcp.mp_capable = 0;
if (tcp_rsk(req)->is_mptcp == 0) if (tcp_rsk(req)->is_mptcp == 0)
goto create_child; goto create_child;
...@@ -406,20 +418,16 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -406,20 +418,16 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
goto create_msk; goto create_msk;
} }
opt_rx.mptcp.mp_capable = 0;
mptcp_get_options(skb, &opt_rx); mptcp_get_options(skb, &opt_rx);
if (opt_rx.mptcp.mp_capable) { if (!opt_rx.mptcp.mp_capable) {
subflow_req->remote_key = opt_rx.mptcp.sndr_key; fallback = true;
subflow_req->remote_key_valid = 1;
} else {
subflow_req->mp_capable = 0;
goto create_child; goto create_child;
} }
create_msk: create_msk:
new_msk = mptcp_sk_clone(listener->conn, req); new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req);
if (!new_msk) if (!new_msk)
subflow_req->mp_capable = 0; fallback = true;
} else if (subflow_req->mp_join) { } else if (subflow_req->mp_join) {
fallback_is_fatal = true; fallback_is_fatal = true;
opt_rx.mptcp.mp_join = 0; opt_rx.mptcp.mp_join = 0;
...@@ -438,12 +446,18 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -438,12 +446,18 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (child && *own_req) { if (child && *own_req) {
struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
/* we have null ctx on TCP fallback, which is fatal on /* we need to fallback on ctx allocation failure and on pre-reqs
* MPJ handshake * checking above. In the latter scenario we additionally need
* to reset the context to non MPTCP status.
*/ */
if (!ctx) { if (!ctx || fallback) {
if (fallback_is_fatal) if (fallback_is_fatal)
goto close_child; goto close_child;
if (ctx) {
subflow_ulp_fallback(child, ctx);
kfree_rcu(ctx, rcu);
}
goto out; goto out;
} }
...@@ -455,6 +469,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -455,6 +469,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
mptcp_pm_new_connection(mptcp_sk(new_msk), 1); mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
ctx->conn = new_msk; ctx->conn = new_msk;
new_msk = NULL; new_msk = NULL;
/* with OoO packets we can reach here without ingress
* mpc option
*/
ctx->remote_key = opt_rx.mptcp.sndr_key;
ctx->fully_established = opt_rx.mptcp.mp_capable;
ctx->can_ack = opt_rx.mptcp.mp_capable;
} else if (ctx->mp_join) { } else if (ctx->mp_join) {
struct mptcp_sock *owner; struct mptcp_sock *owner;
...@@ -474,6 +495,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -474,6 +495,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* dispose of the left over mptcp master, if any */ /* dispose of the left over mptcp master, if any */
if (unlikely(new_msk)) if (unlikely(new_msk))
mptcp_force_close(new_msk); mptcp_force_close(new_msk);
/* check for expected invariant - should never trigger, just help
* catching eariler subtle bugs
*/
WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp &&
(!mptcp_subflow_ctx(child) ||
!mptcp_subflow_ctx(child)->conn));
return child; return child;
close_child: close_child:
...@@ -1076,17 +1104,6 @@ static void subflow_ulp_release(struct sock *sk) ...@@ -1076,17 +1104,6 @@ static void subflow_ulp_release(struct sock *sk)
kfree_rcu(ctx, rcu); kfree_rcu(ctx, rcu);
} }
static void subflow_ulp_fallback(struct sock *sk,
struct mptcp_subflow_context *old_ctx)
{
struct inet_connection_sock *icsk = inet_csk(sk);
mptcp_subflow_tcp_fallback(sk, old_ctx);
icsk->icsk_ulp_ops = NULL;
rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
tcp_sk(sk)->is_mptcp = 0;
}
static void subflow_ulp_clone(const struct request_sock *req, static void subflow_ulp_clone(const struct request_sock *req,
struct sock *newsk, struct sock *newsk,
const gfp_t priority) const gfp_t priority)
...@@ -1120,9 +1137,6 @@ static void subflow_ulp_clone(const struct request_sock *req, ...@@ -1120,9 +1137,6 @@ static void subflow_ulp_clone(const struct request_sock *req,
* is fully established only after we receive the remote key * is fully established only after we receive the remote key
*/ */
new_ctx->mp_capable = 1; new_ctx->mp_capable = 1;
new_ctx->fully_established = subflow_req->remote_key_valid;
new_ctx->can_ack = subflow_req->remote_key_valid;
new_ctx->remote_key = subflow_req->remote_key;
new_ctx->local_key = subflow_req->local_key; new_ctx->local_key = subflow_req->local_key;
new_ctx->token = subflow_req->token; new_ctx->token = subflow_req->token;
new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->ssn_offset = subflow_req->ssn_offset;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment