Commit 972983fc authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-stalled-connections-fix'

Matthieu Baerts says:

====================
mptcp: fix stalled connections

Daire reported a few issues with MPTCP where some connections were
stalled in different states. Paolo did a great job fixing them.

Patch 1 fixes bogus receive window shrinkage with multiple subflows. Due
to a race condition and unlucky circumstances, that may lead to
TCP-level window shrinkage, and the connection being stalled on the
sender end.

Patch 2 is a preparation for patch 3 which processes pending subflow
errors on close. Without that and under specific circumstances, the
MPTCP-level socket might not switch to the CLOSE state and stall.

Patch 4 is also a preparation patch for the next one. Patch 5 fixes
MPTCP connections not switching to the CLOSE state when all subflows
have been closed but no DATA_FIN have been exchanged to explicitly close
the MPTCP connection. Now connections in such state will switch to the
CLOSE state after a timeout, still allowing the "make-after-break"
feature but making sure connections don't stall forever. It will be
possible to modify this timeout -- currently matching TCP TIMEWAIT value
(60 seconds) -- in a future version.
====================
Signed-off-by: default avatarMatthieu Baerts <matthieu.baerts@tessares.net>
parents 8a47558a 27e5ccc2
...@@ -1269,12 +1269,13 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) ...@@ -1269,12 +1269,13 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
if (rcv_wnd == rcv_wnd_old) if (rcv_wnd == rcv_wnd_old)
break; break;
if (before64(rcv_wnd_new, rcv_wnd)) {
rcv_wnd_old = rcv_wnd;
if (before64(rcv_wnd_new, rcv_wnd_old)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
goto raise_win; goto raise_win;
} }
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
rcv_wnd_old = rcv_wnd;
} }
return; return;
} }
......
This diff is collapsed.
...@@ -718,7 +718,29 @@ void mptcp_get_options(const struct sk_buff *skb, ...@@ -718,7 +718,29 @@ void mptcp_get_options(const struct sk_buff *skb,
void mptcp_finish_connect(struct sock *sk); void mptcp_finish_connect(struct sock *sk);
void __mptcp_set_connected(struct sock *sk); void __mptcp_set_connected(struct sock *sk);
void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
static inline void mptcp_stop_tout_timer(struct sock *sk)
{
if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
return;
sk_stop_timer(sk, &sk->sk_timer);
inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
}
static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout)
{
/* avoid 0 timestamp, as that means no close timeout */
inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1;
}
static inline void mptcp_start_tout_timer(struct sock *sk)
{
mptcp_set_close_tout(sk, tcp_jiffies32);
mptcp_reset_tout_timer(mptcp_sk(sk), 0);
}
static inline bool mptcp_is_fully_established(struct sock *sk) static inline bool mptcp_is_fully_established(struct sock *sk)
{ {
return inet_sk_state_load(sk) == TCP_ESTABLISHED && return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
......
...@@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) ...@@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
WRITE_ONCE(subflow->fail_tout, fail_tout); WRITE_ONCE(subflow->fail_tout, fail_tout);
tcp_send_ack(ssk); tcp_send_ack(ssk);
mptcp_reset_timeout(msk, subflow->fail_tout); mptcp_reset_tout_timer(msk, subflow->fail_tout);
} }
static bool subflow_check_data_avail(struct sock *ssk) static bool subflow_check_data_avail(struct sock *ssk)
...@@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) ...@@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
*full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
} }
void __mptcp_error_report(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int err = sock_error(ssk);
int ssk_state;
if (!err)
continue;
/* only propagate errors on fallen-back sockets or
* on MPC connect
*/
if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
continue;
/* We need to propagate only transition to CLOSE state.
* Orphaned socket will see such state change via
* subflow_sched_work_if_closed() and that path will properly
* destroy the msk as needed.
*/
ssk_state = inet_sk_state_load(ssk);
if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
inet_sk_state_store(sk, ssk_state);
WRITE_ONCE(sk->sk_err, -err);
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
smp_wmb();
sk_error_report(sk);
break;
}
}
static void subflow_error_report(struct sock *ssk) static void subflow_error_report(struct sock *ssk)
{ {
struct sock *sk = mptcp_subflow_ctx(ssk)->conn; struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
...@@ -1588,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, ...@@ -1588,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_sock_graft(ssk, sk->sk_socket); mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf)); iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false); WRITE_ONCE(msk->allow_infinite_fallback, false);
mptcp_stop_tout_timer(sk);
return 0; return 0;
failed_unlink: failed_unlink:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment