Commit d4761754 authored by Yousuk Seung's avatar Yousuk Seung Committed by David S. Miller

tcp: invalidate rate samples during SACK reneging

Mark tcp_sock during a SACK reneging event and invalidate rate samples
while marked. Such rate samples may overestimate bw by including packets
that were SACKed before reneging.

< ack 6001 win 10000 sack 7001:38001
< ack 7001 win 0 sack 8001:38001 // Reneg detected
> seq 7001:8001 // RTO, SACK cleared.
< ack 38001 win 10000

In above example the rate sample taken after the last ack will count
7001-38001 as delivered while the actual delivery rate likely could
be much lower i.e. 7001-8001.

This patch adds a new field tcp_sock.sack_reneg and marks it when we
declare SACK reneging and entering TCP_CA_Loss, and unmarks it after
the last rate sample was taken before moving back to TCP_CA_Open. This
patch also invalidates rate samples taken while tcp_sock.is_sack_reneg
is set.

Fixes: b9f64820 ("tcp: track data delivery rate for a TCP connection")
Signed-off-by: default avatarYousuk Seung <ysseung@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarPriyaranjan Jha <priyarjha@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 195bd525
...@@ -224,7 +224,8 @@ struct tcp_sock { ...@@ -224,7 +224,8 @@ struct tcp_sock {
rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
unused:3; is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
unused:2;
u8 nonagle : 4,/* Disable Nagle algorithm? */ u8 nonagle : 4,/* Disable Nagle algorithm? */
thin_lto : 1,/* Use linear timeouts for thin streams */ thin_lto : 1,/* Use linear timeouts for thin streams */
unused1 : 1, unused1 : 1,
......
...@@ -1055,7 +1055,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); ...@@ -1055,7 +1055,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs); struct rate_sample *rs);
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
struct rate_sample *rs); bool is_sack_reneg, struct rate_sample *rs);
void tcp_rate_check_app_limited(struct sock *sk); void tcp_rate_check_app_limited(struct sock *sk);
/* These functions determine how the current flow behaves in respect of SACK /* These functions determine how the current flow behaves in respect of SACK
......
...@@ -2412,6 +2412,7 @@ int tcp_disconnect(struct sock *sk, int flags) ...@@ -2412,6 +2412,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0; tp->window_clamp = 0;
tcp_set_ca_state(sk, TCP_CA_Open); tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
tcp_clear_retrans(tp); tcp_clear_retrans(tp);
inet_csk_delack_init(sk); inet_csk_delack_init(sk);
/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
......
...@@ -1942,6 +1942,8 @@ void tcp_enter_loss(struct sock *sk) ...@@ -1942,6 +1942,8 @@ void tcp_enter_loss(struct sock *sk)
if (is_reneg) { if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0; tp->sacked_out = 0;
/* Mark SACK reneging until we recover from this loss event. */
tp->is_sack_reneg = 1;
} }
tcp_clear_all_retrans_hints(tp); tcp_clear_all_retrans_hints(tp);
...@@ -2365,6 +2367,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) ...@@ -2365,6 +2367,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
return true; return true;
} }
tcp_set_ca_state(sk, TCP_CA_Open); tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
return false; return false;
} }
...@@ -2398,8 +2401,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) ...@@ -2398,8 +2401,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
NET_INC_STATS(sock_net(sk), NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS); LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0; inet_csk(sk)->icsk_retransmits = 0;
if (frto_undo || tcp_is_sack(tp)) if (frto_undo || tcp_is_sack(tp)) {
tcp_set_ca_state(sk, TCP_CA_Open); tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
}
return true; return true;
} }
return false; return false;
...@@ -3496,6 +3501,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3496,6 +3501,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
struct tcp_sacktag_state sack_state; struct tcp_sacktag_state sack_state;
struct rate_sample rs = { .prior_delivered = 0 }; struct rate_sample rs = { .prior_delivered = 0 };
u32 prior_snd_una = tp->snd_una; u32 prior_snd_una = tp->snd_una;
bool is_sack_reneg = tp->is_sack_reneg;
u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq; u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false; bool is_dupack = false;
...@@ -3612,7 +3618,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3612,7 +3618,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */ lost = tp->lost - lost; /* freshly marked lost */
tcp_rate_gen(sk, delivered, lost, sack_state.rate); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit); tcp_xmit_recovery(sk, rexmit);
return 1; return 1;
......
...@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, ...@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
/* Update the connection delivery information and generate a rate sample. */ /* Update the connection delivery information and generate a rate sample. */
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
struct rate_sample *rs) bool is_sack_reneg, struct rate_sample *rs)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
u32 snd_us, ack_us; u32 snd_us, ack_us;
...@@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, ...@@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
rs->losses = lost; /* freshly marked lost */ rs->losses = lost; /* freshly marked lost */
/* Return an invalid sample if no timing information is available. */ /* Return an invalid sample if no timing information is available or
if (!rs->prior_mstamp) { * in recovery from loss with SACK reneging. Rate samples taken during
* a SACK reneging event may overestimate bw by including packets that
* were SACKed before the reneg.
*/
if (!rs->prior_mstamp || is_sack_reneg) {
rs->delivered = -1; rs->delivered = -1;
rs->interval_us = -1; rs->interval_us = -1;
return; return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment