Commit 7788174e authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller

tcp: change IPv6 flow-label upon receiving spurious retransmission

Currently a Linux IPv6 TCP sender will change the flow label upon
timeouts to potentially steer away from a data path that has gone
bad. However this does not help if the problem is on the ACK path
and the data path is healthy. In this case the receiver is likely
to receive repeated spurious retransmission because the sender
couldn't get the ACKs in time and has recurring timeouts.

This patch adds another feature to mitigate this problem. It
leverages the DSACK states in the receiver to change the flow
label of the ACKs to speculatively re-route the ACK packets.
In order to allow triggering on the second consecutive spurious
RTO, the receiver changes the flow label upon sending a second
consecutive DSACK for a sequence number below RCV.NXT.
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 506a03aa
...@@ -2595,6 +2595,8 @@ int tcp_disconnect(struct sock *sk, int flags) ...@@ -2595,6 +2595,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->compressed_ack = 0; tp->compressed_ack = 0;
tp->bytes_sent = 0; tp->bytes_sent = 0;
tp->bytes_retrans = 0; tp->bytes_retrans = 0;
tp->duplicate_sack[0].start_seq = 0;
tp->duplicate_sack[0].end_seq = 0;
tp->dsack_dups = 0; tp->dsack_dups = 0;
tp->reord_seen = 0; tp->reord_seen = 0;
......
...@@ -4199,6 +4199,17 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) ...@@ -4199,6 +4199,17 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
tcp_sack_extend(tp->duplicate_sack, seq, end_seq); tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
} }
static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
{
/* When the ACK path fails or drops most ACKs, the sender would
* timeout and spuriously retransmit the same segment repeatedly.
* The receiver remembers and reflects via DSACKs. Leverage the
* DSACK state and change the txhash to re-route speculatively.
*/
if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
sk_rethink_txhash(sk);
}
static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -4211,6 +4222,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) ...@@ -4211,6 +4222,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq; u32 end_seq = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_spurious_retrans(sk, skb);
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
end_seq = tp->rcv_nxt; end_seq = tp->rcv_nxt;
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq); tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
...@@ -4755,6 +4767,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ...@@ -4755,6 +4767,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
} }
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
tcp_rcv_spurious_retrans(sk, skb);
/* A retransmit, 2nd most common case. Force an immediate ack. */ /* A retransmit, 2nd most common case. Force an immediate ack. */
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment