Commit fa3fe2b1 authored by Florian Westphal's avatar Florian Westphal Committed by Jakub Kicinski

mptcp: track window announced to peer

OoO handling attempts to detect when packet is out-of-window by testing
current ack sequence and remaining space vs. sequence number.

This doesn't work reliably. Store the highest allowed sequence number
that we've announced and use it to detect oow packets.

Do this when mptcp options get written to the packet (wire format).
For this to work we need to move the write_options call until after
stack selected a new tcp window.
Acked-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarMat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 523514ed
...@@ -88,7 +88,8 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, ...@@ -88,7 +88,8 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts); struct mptcp_out_options *opts);
void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts); void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts);
/* move the skb extension owership, with the assumption that 'to' is /* move the skb extension owership, with the assumption that 'to' is
* newly allocated * newly allocated
......
...@@ -445,11 +445,12 @@ struct tcp_out_options { ...@@ -445,11 +445,12 @@ struct tcp_out_options {
struct mptcp_out_options mptcp; struct mptcp_out_options mptcp;
}; };
static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts) static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp,
struct tcp_out_options *opts)
{ {
#if IS_ENABLED(CONFIG_MPTCP) #if IS_ENABLED(CONFIG_MPTCP)
if (unlikely(OPTION_MPTCP & opts->options)) if (unlikely(OPTION_MPTCP & opts->options))
mptcp_write_options(ptr, &opts->mptcp); mptcp_write_options(ptr, tp, &opts->mptcp);
#endif #endif
} }
...@@ -701,7 +702,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, ...@@ -701,7 +702,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
smc_options_write(ptr, &options); smc_options_write(ptr, &options);
mptcp_options_write(ptr, opts); mptcp_options_write(ptr, tp, opts);
} }
static void smc_set_option(const struct tcp_sock *tp, static void smc_set_option(const struct tcp_sock *tp,
...@@ -1346,7 +1347,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1346,7 +1347,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
} }
} }
tcp_options_write((__be32 *)(th + 1), tp, &opts);
skb_shinfo(skb)->gso_type = sk->sk_gso_type; skb_shinfo(skb)->gso_type = sk->sk_gso_type;
if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) { if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
th->window = htons(tcp_select_window(sk)); th->window = htons(tcp_select_window(sk));
...@@ -1357,6 +1357,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1357,6 +1357,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
*/ */
th->window = htons(min(tp->rcv_wnd, 65535U)); th->window = htons(min(tp->rcv_wnd, 65535U));
} }
tcp_options_write((__be32 *)(th + 1), tp, &opts);
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */ /* Calculate the MD5 hash, as we have all we need now */
if (md5) { if (md5) {
......
...@@ -1010,7 +1010,24 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) ...@@ -1010,7 +1010,24 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
} }
} }
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) static void mptcp_set_rwin(const struct tcp_sock *tp)
{
const struct sock *ssk = (const struct sock *)tp;
const struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
u64 ack_seq;
subflow = mptcp_subflow_ctx(ssk);
msk = mptcp_sk(subflow->conn);
ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{ {
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) { OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
...@@ -1167,4 +1184,7 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) ...@@ -1167,4 +1184,7 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
} }
} }
if (tp)
mptcp_set_rwin(tp);
} }
...@@ -168,19 +168,19 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) ...@@ -168,19 +168,19 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
struct rb_node **p, *parent; struct rb_node **p, *parent;
u64 seq, end_seq, max_seq; u64 seq, end_seq, max_seq;
struct sk_buff *skb1; struct sk_buff *skb1;
int space;
seq = MPTCP_SKB_CB(skb)->map_seq; seq = MPTCP_SKB_CB(skb)->map_seq;
end_seq = MPTCP_SKB_CB(skb)->end_seq; end_seq = MPTCP_SKB_CB(skb)->end_seq;
space = tcp_space(sk); max_seq = READ_ONCE(msk->rcv_wnd_sent);
max_seq = space > 0 ? space + msk->ack_seq : msk->ack_seq;
pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq, pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq,
RB_EMPTY_ROOT(&msk->out_of_order_queue)); RB_EMPTY_ROOT(&msk->out_of_order_queue));
if (after64(seq, max_seq)) { if (after64(end_seq, max_seq)) {
/* out of window */ /* out of window */
mptcp_drop(sk, skb); mptcp_drop(sk, skb);
pr_debug("oow by %ld", (unsigned long)seq - (unsigned long)max_seq); pr_debug("oow by %lld, rcv_wnd_sent %llu\n",
(unsigned long long)end_seq - (unsigned long)max_seq,
(unsigned long long)msk->rcv_wnd_sent);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW);
return; return;
} }
...@@ -2295,6 +2295,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, ...@@ -2295,6 +2295,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++; ack_seq++;
WRITE_ONCE(msk->ack_seq, ack_seq); WRITE_ONCE(msk->ack_seq, ack_seq);
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
} }
sock_reset_flag(nsk, SOCK_RCU_FREE); sock_reset_flag(nsk, SOCK_RCU_FREE);
...@@ -2587,6 +2588,7 @@ void mptcp_finish_connect(struct sock *ssk) ...@@ -2587,6 +2588,7 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq); WRITE_ONCE(msk->snd_nxt, msk->write_seq);
WRITE_ONCE(msk->ack_seq, ack_seq); WRITE_ONCE(msk->ack_seq, ack_seq);
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
WRITE_ONCE(msk->can_ack, 1); WRITE_ONCE(msk->can_ack, 1);
atomic64_set(&msk->snd_una, msk->write_seq); atomic64_set(&msk->snd_una, msk->write_seq);
......
...@@ -216,6 +216,7 @@ struct mptcp_sock { ...@@ -216,6 +216,7 @@ struct mptcp_sock {
u64 write_seq; u64 write_seq;
u64 snd_nxt; u64 snd_nxt;
u64 ack_seq; u64 ack_seq;
u64 rcv_wnd_sent;
u64 rcv_data_fin_seq; u64 rcv_data_fin_seq;
struct sock *last_snd; struct sock *last_snd;
int snd_burst; int snd_burst;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment