Commit 7f12422c authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller

tcp: always timestamp on every skb transmission

Previously TCP skbs are not always timestamped if the transmission
failed due to memory or other local issues. This makes deciding
when to abort a socket tricky and complicated because the first
unacknowledged skb's timestamp may be 0 on TCP timeout.

The straight-forward fix is to always timestamp skb on every
transmission attempt. Also every skb retransmission needs to be
flagged properly to avoid RTT under-estimation. This can happen
upon receiving an ACK for the original packet and the a previous
(spurious) retransmission has failed.

It's worth noting that this reverts to the old time-stamping
style before commit 8c72c65b ("tcp: update skb->skb_mstamp more
carefully") which addresses a problem in computing the elapsed time
of a stalled window-probing socket. The problem will be addressed
differently in the next patches with a simpler approach.
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarNeal Cardwell <ncardwell@google.com>
Reviewed-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 88f8598d
...@@ -980,7 +980,6 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, ...@@ -980,7 +980,6 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
if (sk->sk_pacing_status != SK_PACING_NONE) { if (sk->sk_pacing_status != SK_PACING_NONE) {
unsigned long rate = sk->sk_pacing_rate; unsigned long rate = sk->sk_pacing_rate;
...@@ -1028,7 +1027,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1028,7 +1027,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
BUG_ON(!skb || !tcp_skb_pcount(skb)); BUG_ON(!skb || !tcp_skb_pcount(skb));
tp = tcp_sk(sk); tp = tcp_sk(sk);
prior_wstamp = tp->tcp_wstamp_ns;
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
if (clone_it) { if (clone_it) {
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una; - tp->snd_una;
...@@ -1045,11 +1046,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1045,11 +1046,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
return -ENOBUFS; return -ENOBUFS;
} }
prior_wstamp = tp->tcp_wstamp_ns;
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
inet = inet_sk(sk); inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb); tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts)); memset(&opts, 0, sizeof(opts));
...@@ -2937,12 +2933,16 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) ...@@ -2937,12 +2933,16 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
} }
/* To avoid taking spuriously low RTT samples based on a timestamp
* for a transmit that never happened, always mark EVER_RETRANS
*/
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG)) if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
TCP_SKB_CB(skb)->seq, segs, err); TCP_SKB_CB(skb)->seq, segs, err);
if (likely(!err)) { if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb); trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) { } else if (err != -EBUSY) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment