Commit 323410ef authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-Exchange-MPTCP-DATA_FIN-DATA_ACK-before-TCP-FIN'

Mat Martineau says:

====================
mptcp: Exchange MPTCP DATA_FIN/DATA_ACK before TCP FIN

This series allows the MPTCP-level connection to be closed with the
peers exchanging DATA_FIN and DATA_ACK according to the state machine in
appendix D of RFC 8684. The process is very similar to the TCP
disconnect state machine.

The prior code sends DATA_FIN only when TCP FIN packets are sent, and
does not allow for the MPTCP-level connection to be half-closed.

Patch 8 ("mptcp: Use full MPTCP-level disconnect state machine") is the
core of the series. Earlier patches in the series have some small fixes
and helpers in preparation, and the final four small patches do some
cleanup.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0003041e 721e9089
...@@ -451,6 +451,8 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, ...@@ -451,6 +451,8 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
struct sk_buff *skb, struct mptcp_ext *ext) struct sk_buff *skb, struct mptcp_ext *ext)
{ {
u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq);
if (!ext->use_map || !skb->len) { if (!ext->use_map || !skb->len) {
/* RFC6824 requires a DSS mapping with specific values /* RFC6824 requires a DSS mapping with specific values
* if DATA_FIN is set but no data payload is mapped * if DATA_FIN is set but no data payload is mapped
...@@ -458,10 +460,13 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, ...@@ -458,10 +460,13 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
ext->data_fin = 1; ext->data_fin = 1;
ext->use_map = 1; ext->use_map = 1;
ext->dsn64 = 1; ext->dsn64 = 1;
ext->data_seq = subflow->data_fin_tx_seq; /* The write_seq value has already been incremented, so
* the actual sequence number for the DATA_FIN is one less.
*/
ext->data_seq = data_fin_tx_seq - 1;
ext->subflow_seq = 0; ext->subflow_seq = 0;
ext->data_len = 1; ext->data_len = 1;
} else if (ext->data_seq + ext->data_len == subflow->data_fin_tx_seq) { } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) {
/* If there's an existing DSS mapping and it is the /* If there's an existing DSS mapping and it is the
* final mapping, DATA_FIN consumes 1 additional byte of * final mapping, DATA_FIN consumes 1 additional byte of
* mapping space. * mapping space.
...@@ -477,22 +482,17 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, ...@@ -477,22 +482,17 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts) struct mptcp_out_options *opts)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
unsigned int dss_size = 0; unsigned int dss_size = 0;
u64 snd_data_fin_enable;
struct mptcp_ext *mpext; struct mptcp_ext *mpext;
struct mptcp_sock *msk;
unsigned int ack_size; unsigned int ack_size;
bool ret = false; bool ret = false;
u8 tcp_fin;
if (skb) { mpext = skb ? mptcp_get_ext(skb) : NULL;
mpext = mptcp_get_ext(skb); snd_data_fin_enable = READ_ONCE(msk->snd_data_fin_enable);
tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
} else {
mpext = NULL;
tcp_fin = 0;
}
if (!skb || (mpext && mpext->use_map) || tcp_fin) { if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
unsigned int map_size; unsigned int map_size;
map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
...@@ -502,7 +502,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, ...@@ -502,7 +502,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
if (mpext) if (mpext)
opts->ext_copy = *mpext; opts->ext_copy = *mpext;
if (skb && tcp_fin && subflow->data_fin_tx_enable) if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy); mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
ret = true; ret = true;
} }
...@@ -511,7 +511,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, ...@@ -511,7 +511,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
* if the first subflow may have the already the remote key handy * if the first subflow may have the already the remote key handy
*/ */
opts->ext_copy.use_ack = 0; opts->ext_copy.use_ack = 0;
msk = mptcp_sk(subflow->conn);
if (!READ_ONCE(msk->can_ack)) { if (!READ_ONCE(msk->can_ack)) {
*size = ALIGN(dss_size, 4); *size = ALIGN(dss_size, 4);
return ret; return ret;
...@@ -783,6 +782,22 @@ static void update_una(struct mptcp_sock *msk, ...@@ -783,6 +782,22 @@ static void update_una(struct mptcp_sock *msk,
} }
} }
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq)
{
/* Skip if DATA_FIN was already received.
* If updating simultaneously with the recvmsg loop, values
* should match. If they mismatch, the peer is misbehaving and
* we will prefer the most recent information.
*/
if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
return false;
WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq);
WRITE_ONCE(msk->rcv_data_fin, 1);
return true;
}
static bool add_addr_hmac_valid(struct mptcp_sock *msk, static bool add_addr_hmac_valid(struct mptcp_sock *msk,
struct mptcp_options_received *mp_opt) struct mptcp_options_received *mp_opt)
{ {
...@@ -853,6 +868,20 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, ...@@ -853,6 +868,20 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
if (mp_opt.use_ack) if (mp_opt.use_ack)
update_una(msk, &mp_opt); update_una(msk, &mp_opt);
/* Zero-data-length packets are dropped by the caller and not
* propagated to the MPTCP layer, so the skb extension does not
* need to be allocated or populated. DATA_FIN information, if
* present, needs to be updated here before the skb is freed.
*/
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
if (mp_opt.data_fin && mp_opt.data_len == 1 &&
mptcp_update_rcv_data_fin(msk, mp_opt.data_seq) &&
schedule_work(&msk->work))
sock_hold(subflow->conn);
return;
}
mpext = skb_ext_add(skb, SKB_EXT_MPTCP); mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext) if (!mpext)
return; return;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <net/inet_hashtables.h> #include <net/inet_hashtables.h>
#include <net/protocol.h> #include <net/protocol.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <net/tcp_states.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6) #if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/transp_v6.h> #include <net/transp_v6.h>
#endif #endif
...@@ -142,6 +143,14 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, ...@@ -142,6 +143,14 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
MPTCP_SKB_CB(skb)->offset = offset; MPTCP_SKB_CB(skb)->offset = offset;
} }
static void mptcp_stop_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
mptcp_sk(sk)->timer_ival = 0;
}
/* both sockets must be locked */ /* both sockets must be locked */
static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk, static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
struct sock *ssk) struct sock *ssk)
...@@ -163,6 +172,139 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk, ...@@ -163,6 +172,139 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
return mptcp_subflow_data_available(ssk); return mptcp_subflow_data_available(ssk);
} }
static void mptcp_check_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
if (__mptcp_check_fallback(msk))
return;
/* Look for an acknowledged DATA_FIN */
if (((1 << sk->sk_state) &
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) &&
msk->write_seq == atomic64_read(&msk->snd_una)) {
mptcp_stop_timer(sk);
WRITE_ONCE(msk->snd_data_fin_enable, 0);
switch (sk->sk_state) {
case TCP_FIN_WAIT1:
inet_sk_state_store(sk, TCP_FIN_WAIT2);
sk->sk_state_change(sk);
break;
case TCP_CLOSING:
fallthrough;
case TCP_LAST_ACK:
inet_sk_state_store(sk, TCP_CLOSE);
sk->sk_state_change(sk);
break;
}
if (sk->sk_shutdown == SHUTDOWN_MASK ||
sk->sk_state == TCP_CLOSE)
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
else
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}
}
static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
{
struct mptcp_sock *msk = mptcp_sk(sk);
if (READ_ONCE(msk->rcv_data_fin) &&
((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
if (msk->ack_seq == rcv_data_fin_seq) {
if (seq)
*seq = rcv_data_fin_seq;
return true;
}
}
return false;
}
static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
{
long tout = ssk && inet_csk(ssk)->icsk_pending ?
inet_csk(ssk)->icsk_timeout - jiffies : 0;
if (tout <= 0)
tout = mptcp_sk(sk)->timer_ival;
mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
}
static void mptcp_check_data_fin(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
u64 rcv_data_fin_seq;
if (__mptcp_check_fallback(msk) || !msk->first)
return;
/* Need to ack a DATA_FIN received from a peer while this side
* of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
* msk->rcv_data_fin was set when parsing the incoming options
* at the subflow level and the msk lock was not held, so this
* is the first opportunity to act on the DATA_FIN and change
* the msk state.
*
* If we are caught up to the sequence number of the incoming
* DATA_FIN, send the DATA_ACK now and do state transition. If
* not caught up, do nothing and let the recv code send DATA_ACK
* when catching up.
*/
if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) {
struct mptcp_subflow_context *subflow;
msk->ack_seq++;
WRITE_ONCE(msk->rcv_data_fin, 0);
sk->sk_shutdown |= RCV_SHUTDOWN;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_DATA_READY, &msk->flags);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
inet_sk_state_store(sk, TCP_CLOSE_WAIT);
break;
case TCP_FIN_WAIT1:
inet_sk_state_store(sk, TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
inet_sk_state_store(sk, TCP_CLOSE);
// @@ Close subflows now?
break;
default:
/* Other states not expected */
WARN_ON_ONCE(1);
break;
}
mptcp_set_timeout(sk, NULL);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
lock_sock(ssk);
tcp_send_ack(ssk);
release_sock(ssk);
}
sk->sk_state_change(sk);
if (sk->sk_shutdown == SHUTDOWN_MASK ||
sk->sk_state == TCP_CLOSE)
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
else
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}
}
static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
struct sock *ssk, struct sock *ssk,
unsigned int *bytes) unsigned int *bytes)
...@@ -239,6 +381,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, ...@@ -239,6 +381,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
*bytes = moved; *bytes = moved;
/* If the moves have caught up with the DATA_FIN sequence number
* it's time to ack the DATA_FIN and change socket state, but
* this is not a good place to change state. Let the workqueue
* do it.
*/
if (mptcp_pending_data_fin(sk, NULL) &&
schedule_work(&msk->work))
sock_hold(sk);
return done; return done;
} }
...@@ -303,16 +454,6 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk) ...@@ -303,16 +454,6 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk)
spin_unlock_bh(&msk->join_list_lock); spin_unlock_bh(&msk->join_list_lock);
} }
static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
{
long tout = ssk && inet_csk(ssk)->icsk_pending ?
inet_csk(ssk)->icsk_timeout - jiffies : 0;
if (tout <= 0)
tout = mptcp_sk(sk)->timer_ival;
mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
}
static bool mptcp_timer_pending(struct sock *sk) static bool mptcp_timer_pending(struct sock *sk)
{ {
return timer_pending(&inet_csk(sk)->icsk_retransmit_timer); return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
...@@ -334,7 +475,8 @@ void mptcp_data_acked(struct sock *sk) ...@@ -334,7 +475,8 @@ void mptcp_data_acked(struct sock *sk)
{ {
mptcp_reset_timer(sk); mptcp_reset_timer(sk);
if (!sk_stream_is_writeable(sk) && if ((!sk_stream_is_writeable(sk) ||
(inet_sk_state_load(sk) != TCP_ESTABLISHED)) &&
schedule_work(&mptcp_sk(sk)->work)) schedule_work(&mptcp_sk(sk)->work))
sock_hold(sk); sock_hold(sk);
} }
...@@ -369,14 +511,6 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) ...@@ -369,14 +511,6 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
} }
} }
static void mptcp_stop_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
mptcp_sk(sk)->timer_ival = 0;
}
static bool mptcp_ext_cache_refill(struct mptcp_sock *msk) static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
{ {
const struct sock *sk = (const struct sock *)msk; const struct sock *sk = (const struct sock *)msk;
...@@ -659,7 +793,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, ...@@ -659,7 +793,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
out: out:
if (!retransmission) if (!retransmission)
pfrag->offset += frag_truesize; pfrag->offset += frag_truesize;
*write_seq += ret; WRITE_ONCE(*write_seq, *write_seq + ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret; mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret; return ret;
...@@ -748,6 +882,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -748,6 +882,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
restart: restart:
mptcp_clean_una(sk); mptcp_clean_una(sk);
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) {
ret = -EPIPE;
goto out;
}
wait_for_sndbuf: wait_for_sndbuf:
__mptcp_flush_join_list(msk); __mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk); ssk = mptcp_subflow_get_send(msk);
...@@ -1130,7 +1269,7 @@ static void mptcp_retransmit_handler(struct sock *sk) ...@@ -1130,7 +1269,7 @@ static void mptcp_retransmit_handler(struct sock *sk)
{ {
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
if (atomic64_read(&msk->snd_una) == msk->write_seq) { if (atomic64_read(&msk->snd_una) == READ_ONCE(msk->write_seq)) {
mptcp_stop_timer(sk); mptcp_stop_timer(sk);
} else { } else {
set_bit(MPTCP_WORK_RTX, &msk->flags); set_bit(MPTCP_WORK_RTX, &msk->flags);
...@@ -1255,6 +1394,7 @@ static void mptcp_worker(struct work_struct *work) ...@@ -1255,6 +1394,7 @@ static void mptcp_worker(struct work_struct *work)
lock_sock(sk); lock_sock(sk);
mptcp_clean_una(sk); mptcp_clean_una(sk);
mptcp_check_data_fin_ack(sk);
__mptcp_flush_join_list(msk); __mptcp_flush_join_list(msk);
__mptcp_move_skbs(msk); __mptcp_move_skbs(msk);
...@@ -1264,6 +1404,8 @@ static void mptcp_worker(struct work_struct *work) ...@@ -1264,6 +1404,8 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
mptcp_check_for_eof(msk); mptcp_check_for_eof(msk);
mptcp_check_data_fin(sk);
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
goto unlock; goto unlock;
...@@ -1386,8 +1528,7 @@ static void mptcp_cancel_work(struct sock *sk) ...@@ -1386,8 +1528,7 @@ static void mptcp_cancel_work(struct sock *sk)
sock_put(sk); sock_put(sk);
} }
static void mptcp_subflow_shutdown(struct sock *ssk, int how, static void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
bool data_fin_tx_enable, u64 data_fin_tx_seq)
{ {
lock_sock(ssk); lock_sock(ssk);
...@@ -1400,34 +1541,84 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how, ...@@ -1400,34 +1541,84 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how,
tcp_disconnect(ssk, O_NONBLOCK); tcp_disconnect(ssk, O_NONBLOCK);
break; break;
default: default:
if (data_fin_tx_enable) { if (__mptcp_check_fallback(mptcp_sk(sk))) {
struct mptcp_subflow_context *subflow; pr_debug("Fallback");
ssk->sk_shutdown |= how;
subflow = mptcp_subflow_ctx(ssk); tcp_shutdown(ssk, how);
subflow->data_fin_tx_seq = data_fin_tx_seq; } else {
subflow->data_fin_tx_enable = 1; pr_debug("Sending DATA_FIN on subflow %p", ssk);
mptcp_set_timeout(sk, ssk);
tcp_send_ack(ssk);
} }
ssk->sk_shutdown |= how;
tcp_shutdown(ssk, how);
break; break;
} }
release_sock(ssk); release_sock(ssk);
} }
/* Called with msk lock held, releases such lock before returning */ static const unsigned char new_state[16] = {
/* current state: new state: action: */
[0 /* (Invalid) */] = TCP_CLOSE,
[TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
[TCP_SYN_SENT] = TCP_CLOSE,
[TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
[TCP_FIN_WAIT1] = TCP_FIN_WAIT1,
[TCP_FIN_WAIT2] = TCP_FIN_WAIT2,
[TCP_TIME_WAIT] = TCP_CLOSE, /* should not happen ! */
[TCP_CLOSE] = TCP_CLOSE,
[TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN,
[TCP_LAST_ACK] = TCP_LAST_ACK,
[TCP_LISTEN] = TCP_CLOSE,
[TCP_CLOSING] = TCP_CLOSING,
[TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */
};
static int mptcp_close_state(struct sock *sk)
{
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK;
inet_sk_state_store(sk, ns);
return next & TCP_ACTION_FIN;
}
static void mptcp_close(struct sock *sk, long timeout) static void mptcp_close(struct sock *sk, long timeout)
{ {
struct mptcp_subflow_context *subflow, *tmp; struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
LIST_HEAD(conn_list); LIST_HEAD(conn_list);
u64 data_fin_tx_seq;
lock_sock(sk); lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
if (sk->sk_state == TCP_LISTEN) {
inet_sk_state_store(sk, TCP_CLOSE);
goto cleanup;
} else if (sk->sk_state == TCP_CLOSE) {
goto cleanup;
}
if (__mptcp_check_fallback(msk)) {
goto update_state;
} else if (mptcp_close_state(sk)) {
pr_debug("Sending DATA_FIN sk=%p", sk);
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
WRITE_ONCE(msk->snd_data_fin_enable, 1);
mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
mptcp_subflow_shutdown(sk, tcp_sk, SHUTDOWN_MASK);
}
}
sk_stream_wait_close(sk, timeout);
update_state:
inet_sk_state_store(sk, TCP_CLOSE); inet_sk_state_store(sk, TCP_CLOSE);
cleanup:
/* be sure to always acquire the join list lock, to sync vs /* be sure to always acquire the join list lock, to sync vs
* mptcp_finish_join(). * mptcp_finish_join().
*/ */
...@@ -1436,17 +1627,12 @@ static void mptcp_close(struct sock *sk, long timeout) ...@@ -1436,17 +1627,12 @@ static void mptcp_close(struct sock *sk, long timeout)
spin_unlock_bh(&msk->join_list_lock); spin_unlock_bh(&msk->join_list_lock);
list_splice_init(&msk->conn_list, &conn_list); list_splice_init(&msk->conn_list, &conn_list);
data_fin_tx_seq = msk->write_seq;
__mptcp_clear_xmit(sk); __mptcp_clear_xmit(sk);
release_sock(sk); release_sock(sk);
list_for_each_entry_safe(subflow, tmp, &conn_list, node) { list_for_each_entry_safe(subflow, tmp, &conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
subflow->data_fin_tx_seq = data_fin_tx_seq;
subflow->data_fin_tx_enable = 1;
__mptcp_close_ssk(sk, ssk, subflow, timeout); __mptcp_close_ssk(sk, ssk, subflow, timeout);
} }
...@@ -2123,11 +2309,8 @@ static int mptcp_shutdown(struct socket *sock, int how) ...@@ -2123,11 +2309,8 @@ static int mptcp_shutdown(struct socket *sock, int how)
pr_debug("sk=%p, how=%d", msk, how); pr_debug("sk=%p, how=%d", msk, how);
lock_sock(sock->sk); lock_sock(sock->sk);
if (how == SHUT_WR || how == SHUT_RDWR)
inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
how++; how++;
if ((how & ~SHUTDOWN_MASK) || !how) { if ((how & ~SHUTDOWN_MASK) || !how) {
ret = -EINVAL; ret = -EINVAL;
goto out_unlock; goto out_unlock;
...@@ -2141,11 +2324,31 @@ static int mptcp_shutdown(struct socket *sock, int how) ...@@ -2141,11 +2324,31 @@ static int mptcp_shutdown(struct socket *sock, int how)
sock->state = SS_CONNECTED; sock->state = SS_CONNECTED;
} }
__mptcp_flush_join_list(msk); /* If we've already sent a FIN, or it's a closed state, skip this. */
mptcp_for_each_subflow(msk, subflow) { if (__mptcp_check_fallback(msk)) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); if (how == SHUT_WR || how == SHUT_RDWR)
inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
mptcp_subflow_shutdown(sock->sk, tcp_sk, how);
}
} else if ((how & SEND_SHUTDOWN) &&
((1 << sock->sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) &&
mptcp_close_state(sock->sk)) {
__mptcp_flush_join_list(msk);
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
WRITE_ONCE(msk->snd_data_fin_enable, 1);
mptcp_subflow_shutdown(tcp_sk, how, 1, msk->write_seq); mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
mptcp_subflow_shutdown(sock->sk, tcp_sk, how);
}
} }
/* Wake up anyone sleeping in poll. */ /* Wake up anyone sleeping in poll. */
......
...@@ -193,12 +193,15 @@ struct mptcp_sock { ...@@ -193,12 +193,15 @@ struct mptcp_sock {
u64 remote_key; u64 remote_key;
u64 write_seq; u64 write_seq;
u64 ack_seq; u64 ack_seq;
u64 rcv_data_fin_seq;
atomic64_t snd_una; atomic64_t snd_una;
unsigned long timer_ival; unsigned long timer_ival;
u32 token; u32 token;
unsigned long flags; unsigned long flags;
bool can_ack; bool can_ack;
bool fully_established; bool fully_established;
bool rcv_data_fin;
bool snd_data_fin_enable;
spinlock_t join_list_lock; spinlock_t join_list_lock;
struct work_struct work; struct work_struct work;
struct list_head conn_list; struct list_head conn_list;
...@@ -291,10 +294,8 @@ struct mptcp_subflow_context { ...@@ -291,10 +294,8 @@ struct mptcp_subflow_context {
backup : 1, backup : 1,
data_avail : 1, data_avail : 1,
rx_eof : 1, rx_eof : 1,
data_fin_tx_enable : 1,
use_64bit_ack : 1, /* Set when we received a 64-bit DSN */ use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
can_ack : 1; /* only after processing the remote a key */ can_ack : 1; /* only after processing the remote a key */
u64 data_fin_tx_seq;
u32 remote_nonce; u32 remote_nonce;
u64 thmac; u64 thmac;
u32 local_nonce; u32 local_nonce;
...@@ -386,6 +387,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk); ...@@ -386,6 +387,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk); bool mptcp_finish_join(struct sock *sk);
void mptcp_data_acked(struct sock *sk); void mptcp_data_acked(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk); void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq);
void __init mptcp_token_init(void); void __init mptcp_token_init(void);
static inline void mptcp_token_init_request(struct request_sock *req) static inline void mptcp_token_init_request(struct request_sock *req)
......
...@@ -598,7 +598,8 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) ...@@ -598,7 +598,8 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
return true; return true;
} }
static enum mapping_status get_mapping_status(struct sock *ssk) static enum mapping_status get_mapping_status(struct sock *ssk,
struct mptcp_sock *msk)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_ext *mpext; struct mptcp_ext *mpext;
...@@ -648,7 +649,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk) ...@@ -648,7 +649,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
if (mpext->data_fin == 1) { if (mpext->data_fin == 1) {
if (data_len == 1) { if (data_len == 1) {
pr_debug("DATA_FIN with no payload"); mptcp_update_rcv_data_fin(msk, mpext->data_seq);
pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
if (subflow->map_valid) { if (subflow->map_valid) {
/* A DATA_FIN might arrive in a DSS /* A DATA_FIN might arrive in a DSS
* option before the previous mapping * option before the previous mapping
...@@ -660,6 +662,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk) ...@@ -660,6 +662,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
} else { } else {
return MAPPING_DATA_FIN; return MAPPING_DATA_FIN;
} }
} else {
mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len);
pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len);
} }
/* Adjust for DATA_FIN using 1 byte of sequence space */ /* Adjust for DATA_FIN using 1 byte of sequence space */
...@@ -748,7 +753,7 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -748,7 +753,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
u64 ack_seq; u64 ack_seq;
u64 old_ack; u64 old_ack;
status = get_mapping_status(ssk); status = get_mapping_status(ssk, msk);
pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status); pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
if (status == MAPPING_INVALID) { if (status == MAPPING_INVALID) {
ssk->sk_err = EBADMSG; ssk->sk_err = EBADMSG;
...@@ -1154,7 +1159,8 @@ static void subflow_state_change(struct sock *sk) ...@@ -1154,7 +1159,8 @@ static void subflow_state_change(struct sock *sk)
if (mptcp_subflow_data_available(sk)) if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk); mptcp_data_ready(parent, sk);
if (!(parent->sk_shutdown & RCV_SHUTDOWN) && if (__mptcp_check_fallback(mptcp_sk(parent)) &&
!(parent->sk_shutdown & RCV_SHUTDOWN) &&
!subflow->rx_eof && subflow_is_done(sk)) { !subflow->rx_eof && subflow_is_done(sk)) {
subflow->rx_eof = 1; subflow->rx_eof = 1;
mptcp_subflow_eof(parent); mptcp_subflow_eof(parent);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment