Commit 27e5ccc2 authored by Paolo Abeni's avatar Paolo Abeni Committed by David S. Miller

mptcp: fix dangling connection hang-up

According to RFC 8684 section 3.3:

  A connection is not closed unless [...] or an implementation-specific
  connection-level send timeout.

Currently the MPTCP protocol does not implement such timeout, and
connection timing-out at the TCP-level never move to close state.

Introduces a catch-up condition at subflow close time to move the
MPTCP socket to close, too.

That additionally allows removing similar existing inside the worker.

Finally, allow some additional timeout for plain ESTABLISHED mptcp
sockets, as the protocol allows creating new subflows even at that
point and making the connection functional again.

This issue is actually present since the beginning, but it is basically
impossible to solve without a long chain of functional pre-requisites
topped by commit bbd49d11 ("mptcp: consolidate transition to
TCP_CLOSE in mptcp_do_fastclose()"). When backporting this current
patch, please also backport this other commit as well.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430
Fixes: e16163b6 ("mptcp: refactor shutdown and close")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Reviewed-by: default avatarMatthieu Baerts <matthieu.baerts@tessares.net>
Reviewed-by: default avatarMat Martineau <martineau@kernel.org>
Signed-off-by: default avatarMatthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f6909dc1
...@@ -892,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) ...@@ -892,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk); mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk); mptcp_subflow_joined(msk, ssk);
mptcp_stop_tout_timer(sk);
return true; return true;
} }
...@@ -2369,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, ...@@ -2369,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
bool dispose_it, need_push = false; bool dispose_it, need_push = false;
/* If the first subflow moved to a close state before accept, e.g. due /* If the first subflow moved to a close state before accept, e.g. due
* to an incoming reset, mptcp either: * to an incoming reset or listener shutdown, the subflow socket is
* - if either the subflow or the msk are dead, destroy the context * already deleted by inet_child_forget() and the mptcp socket can't
* (the subflow socket is deleted by inet_child_forget) and the msk * survive too.
* - otherwise do nothing at the moment and take action at accept and/or
* listener shutdown - user-space must be able to accept() the closed
* socket.
*/ */
if (msk->in_accept_queue && msk->first == ssk) { if (msk->in_accept_queue && msk->first == ssk &&
if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
return;
/* ensure later check in mptcp_worker() will dispose the msk */ /* ensure later check in mptcp_worker() will dispose the msk */
mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1));
sock_set_flag(sk, SOCK_DEAD); sock_set_flag(sk, SOCK_DEAD);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk); mptcp_subflow_drop_ctx(ssk);
...@@ -2443,6 +2440,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, ...@@ -2443,6 +2440,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
out: out:
if (need_push) if (need_push)
__mptcp_push_pending(sk, 0); __mptcp_push_pending(sk, 0);
/* Catch every 'all subflows closed' scenario, including peers silently
* closing them, e.g. due to timeout.
* For established sockets, allow an additional timeout before closing,
* as the protocol can still create more subflows.
*/
if (list_is_singular(&msk->conn_list) && msk->first &&
inet_sk_state_load(msk->first) == TCP_CLOSE) {
if (sk->sk_state != TCP_ESTABLISHED ||
msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_close_wake_up(sk);
} else {
mptcp_start_tout_timer(sk);
}
}
} }
void mptcp_close_ssk(struct sock *sk, struct sock *ssk, void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
...@@ -2486,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk) ...@@ -2486,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk)
} }
static bool mptcp_should_close(const struct sock *sk) static bool mptcp_close_tout_expired(const struct sock *sk)
{ {
s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
struct mptcp_subflow_context *subflow; sk->sk_state == TCP_CLOSE)
return false;
if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
return true;
/* if all subflows are in closed status don't bother with additional return time_after32(tcp_jiffies32,
* timeout inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
*/
mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
TCP_CLOSE)
return false;
}
return true;
} }
static void mptcp_check_fastclose(struct mptcp_sock *msk) static void mptcp_check_fastclose(struct mptcp_sock *msk)
...@@ -2641,15 +2645,16 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) ...@@ -2641,15 +2645,16 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
struct sock *sk = (struct sock *)msk; struct sock *sk = (struct sock *)msk;
unsigned long timeout, close_timeout; unsigned long timeout, close_timeout;
if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
return; return;
close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
TCP_TIMEWAIT_LEN;
/* the close timeout takes precedence on the fail one, and here at least one of /* the close timeout takes precedence on the fail one, and here at least one of
* them is active * them is active
*/ */
timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
sk_reset_timer(sk, &sk->sk_timer, timeout); sk_reset_timer(sk, &sk->sk_timer, timeout);
} }
...@@ -2668,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) ...@@ -2668,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
mptcp_subflow_reset(ssk); mptcp_subflow_reset(ssk);
WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
unlock_sock_fast(ssk, slow); unlock_sock_fast(ssk, slow);
mptcp_reset_tout_timer(msk, 0);
} }
static void mptcp_do_fastclose(struct sock *sk) static void mptcp_do_fastclose(struct sock *sk)
...@@ -2706,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work) ...@@ -2706,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(sk); __mptcp_close_subflow(sk);
/* There is no point in keeping around an orphaned sk timedout or if (mptcp_close_tout_expired(sk)) {
* closed, but we need the msk around to reply to incoming DATA_FIN, mptcp_do_fastclose(sk);
* even if it is orphaned and in FIN_WAIT2 state mptcp_close_wake_up(sk);
*/ }
if (sock_flag(sk, SOCK_DEAD)) {
if (mptcp_should_close(sk))
mptcp_do_fastclose(sk);
if (sk->sk_state == TCP_CLOSE) { if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk); __mptcp_destroy_sock(sk);
goto unlock; goto unlock;
}
} }
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
...@@ -3016,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout) ...@@ -3016,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
cleanup: cleanup:
/* orphan all the subflows */ /* orphan all the subflows */
inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
mptcp_for_each_subflow(msk, subflow) { mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk); bool slow = lock_sock_fast_nested(ssk);
...@@ -3053,7 +3051,7 @@ bool __mptcp_close(struct sock *sk, long timeout) ...@@ -3053,7 +3051,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
__mptcp_destroy_sock(sk); __mptcp_destroy_sock(sk);
do_cancel_work = true; do_cancel_work = true;
} else { } else {
mptcp_reset_tout_timer(msk, 0); mptcp_start_tout_timer(sk);
} }
return do_cancel_work; return do_cancel_work;
...@@ -3117,7 +3115,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) ...@@ -3117,7 +3115,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
inet_sk_state_store(sk, TCP_CLOSE); inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_rtx_timer(sk); mptcp_stop_rtx_timer(sk);
sk_stop_timer(sk, &sk->sk_timer); mptcp_stop_tout_timer(sk);
if (msk->token) if (msk->token)
mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
......
...@@ -719,6 +719,28 @@ void mptcp_get_options(const struct sk_buff *skb, ...@@ -719,6 +719,28 @@ void mptcp_get_options(const struct sk_buff *skb,
void mptcp_finish_connect(struct sock *sk); void mptcp_finish_connect(struct sock *sk);
void __mptcp_set_connected(struct sock *sk); void __mptcp_set_connected(struct sock *sk);
void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
static inline void mptcp_stop_tout_timer(struct sock *sk)
{
if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
return;
sk_stop_timer(sk, &sk->sk_timer);
inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
}
static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout)
{
/* avoid 0 timestamp, as that means no close timeout */
inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1;
}
static inline void mptcp_start_tout_timer(struct sock *sk)
{
mptcp_set_close_tout(sk, tcp_jiffies32);
mptcp_reset_tout_timer(mptcp_sk(sk), 0);
}
static inline bool mptcp_is_fully_established(struct sock *sk) static inline bool mptcp_is_fully_established(struct sock *sk)
{ {
return inet_sk_state_load(sk) == TCP_ESTABLISHED && return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
......
...@@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, ...@@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_sock_graft(ssk, sk->sk_socket); mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf)); iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false); WRITE_ONCE(msk->allow_infinite_fallback, false);
mptcp_stop_tout_timer(sk);
return 0; return 0;
failed_unlink: failed_unlink:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment