Commit 59c9af42 authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller

tcp: measure RTT from new SACK

Take RTT sample if an ACK selectively acks some sequences that
have never been retransmitted. The Karn's algorithm does not apply
even if that ACK (s)acks other retransmitted sequences, because it
must been generated by an original but perhaps out-of-order packet.
There is no ambiguity. In case when multiple blocks are newly
sacked because of ACK losses the earliest block is used to
measure RTT, similar to cummulative ACKs.

Such RTT samples allow the sender to estimate the RTO during loss
recovery and packet reordering events. It is still useful even with
TCP timestamps. That's because during these events the SND.UNA may
not advance preventing RTT samples from TS ECR (thus the FLAG_ACKED
check before calling tcp_ack_update_rtt()).  Therefore this new
RTT source is complementary to existing ACK and TS RTT mechanisms.

This patch does not update the RTO. It is done in the next patch.
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5b08e47c
...@@ -1048,6 +1048,7 @@ struct tcp_sacktag_state { ...@@ -1048,6 +1048,7 @@ struct tcp_sacktag_state {
int reord; int reord;
int fack_count; int fack_count;
int flag; int flag;
s32 rtt; /* RTT measured by SACKing never-retransmitted data */
}; };
/* Check if skb is fully within the SACK block. In presence of GSO skbs, /* Check if skb is fully within the SACK block. In presence of GSO skbs,
...@@ -1108,7 +1109,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, ...@@ -1108,7 +1109,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
static u8 tcp_sacktag_one(struct sock *sk, static u8 tcp_sacktag_one(struct sock *sk,
struct tcp_sacktag_state *state, u8 sacked, struct tcp_sacktag_state *state, u8 sacked,
u32 start_seq, u32 end_seq, u32 start_seq, u32 end_seq,
bool dup_sack, int pcount) int dup_sack, int pcount, u32 xmit_time)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int fack_count = state->fack_count; int fack_count = state->fack_count;
...@@ -1148,6 +1149,9 @@ static u8 tcp_sacktag_one(struct sock *sk, ...@@ -1148,6 +1149,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
state->reord); state->reord);
if (!after(end_seq, tp->high_seq)) if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED; state->flag |= FLAG_ORIG_SACK_ACKED;
/* Pick the earliest sequence sacked for RTT */
if (state->rtt < 0)
state->rtt = tcp_time_stamp - xmit_time;
} }
if (sacked & TCPCB_LOST) { if (sacked & TCPCB_LOST) {
...@@ -1205,7 +1209,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1205,7 +1209,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
* tcp_highest_sack_seq() when skb is highest_sack. * tcp_highest_sack_seq() when skb is highest_sack.
*/ */
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
start_seq, end_seq, dup_sack, pcount); start_seq, end_seq, dup_sack, pcount,
TCP_SKB_CB(skb)->when);
if (skb == tp->lost_skb_hint) if (skb == tp->lost_skb_hint)
tp->lost_cnt_hint += pcount; tp->lost_cnt_hint += pcount;
...@@ -1479,7 +1484,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, ...@@ -1479,7 +1484,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(skb)->end_seq, TCP_SKB_CB(skb)->end_seq,
dup_sack, dup_sack,
tcp_skb_pcount(skb)); tcp_skb_pcount(skb),
TCP_SKB_CB(skb)->when);
if (!before(TCP_SKB_CB(skb)->seq, if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp))) tcp_highest_sack_seq(tp)))
...@@ -1536,7 +1542,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl ...@@ -1536,7 +1542,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
static int static int
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
u32 prior_snd_una) u32 prior_snd_una, s32 *sack_rtt)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
const unsigned char *ptr = (skb_transport_header(ack_skb) + const unsigned char *ptr = (skb_transport_header(ack_skb) +
...@@ -1554,6 +1560,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, ...@@ -1554,6 +1560,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
state.flag = 0; state.flag = 0;
state.reord = tp->packets_out; state.reord = tp->packets_out;
state.rtt = -1;
if (!tp->sacked_out) { if (!tp->sacked_out) {
if (WARN_ON(tp->fackets_out)) if (WARN_ON(tp->fackets_out))
...@@ -1737,6 +1744,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, ...@@ -1737,6 +1744,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
WARN_ON((int)tp->retrans_out < 0); WARN_ON((int)tp->retrans_out < 0);
WARN_ON((int)tcp_packets_in_flight(tp) < 0); WARN_ON((int)tcp_packets_in_flight(tp) < 0);
#endif #endif
*sack_rtt = state.rtt;
return state.flag; return state.flag;
} }
...@@ -3254,6 +3262,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3254,6 +3262,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
int prior_packets = tp->packets_out; int prior_packets = tp->packets_out;
const int prior_unsacked = tp->packets_out - tp->sacked_out; const int prior_unsacked = tp->packets_out - tp->sacked_out;
int acked = 0; /* Number of packets newly acked */ int acked = 0; /* Number of packets newly acked */
s32 sack_rtt = -1;
/* If the ack is older than previous acks /* If the ack is older than previous acks
* then we can probably ignore it. * then we can probably ignore it.
...@@ -3310,7 +3319,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3310,7 +3319,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
if (TCP_SKB_CB(skb)->sacked) if (TCP_SKB_CB(skb)->sacked)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_rtt);
if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE; flag |= FLAG_ECE;
...@@ -3382,7 +3392,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3382,7 +3392,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* If data was DSACKed, see if we can undo a cwnd reduction. * If data was DSACKed, see if we can undo a cwnd reduction.
*/ */
if (TCP_SKB_CB(skb)->sacked) { if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_rtt);
tcp_fastretrans_alert(sk, acked, prior_unsacked, tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag); is_dupack, flag);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment