Commit 645f4c6f authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: switch rcv_rtt_est and rcvq_space to high resolution timestamps

Some devices or distributions use HZ=100 or HZ=250

TCP receive buffer autotuning has poor behavior caused by this choice.
Since autotuning happens after 4 ms or 10 ms, short distance flows
get their receive buffer tuned to a very high value, but after an initial
period where it was frozen to (too small) initial value.

With tp->tcp_mstamp introduction, we can switch to high resolution
timestamps almost for free (at the expense of 8 additional bytes per
TCP structure)

Note that some TCP stacks use usec TCP timestamps where this
patch makes even more sense : Many TCP flows have < 500 usec RTT.
Hopefully this finer TS option can be standardized soon.

Tested:
 HZ=100 kernel
 ./netperf -H lpaa24 -t TCP_RR -l 1000 -- -r 10000,10000 &

 Peer without patch :
 lpaa24:~# ss -tmi dst lpaa23
 ...
 skmem:(r0,rb8388608,...)
 rcv_rtt:10 rcv_space:3210000 minrtt:0.017

 Peer with the patch :
 lpaa23:~# ss -tmi dst lpaa24
 ...
 skmem:(r0,rb428800,...)
 rcv_rtt:0.069 rcv_space:30000 minrtt:0.017

We can see saner RCVBUF, and more precise rcv_rtt information.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a6db50b8
......@@ -333,16 +333,16 @@ struct tcp_sock {
/* Receiver side RTT estimation */
struct {
u32 rtt;
u32 seq;
u32 time;
u32 rtt_us;
u32 seq;
struct skb_mstamp time;
} rcv_rtt_est;
/* Receiver queue space */
struct {
int space;
u32 seq;
u32 time;
int space;
u32 seq;
struct skb_mstamp time;
} rcvq_space;
/* TCP-specific MTU probe information. */
......
......@@ -2853,7 +2853,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_snd_ssthresh = tp->snd_ssthresh;
info->tcpi_advmss = tp->advmss;
info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3;
info->tcpi_rcv_space = tp->rcvq_space.space;
info->tcpi_total_retrans = tp->total_retrans;
......
......@@ -442,7 +442,8 @@ void tcp_init_buffer_space(struct sock *sk)
tcp_sndbuf_expand(sk);
tp->rcvq_space.space = tp->rcv_wnd;
tp->rcvq_space.time = tcp_time_stamp;
skb_mstamp_get(&tp->tcp_mstamp);
tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
maxwin = tcp_full_space(sk);
......@@ -518,7 +519,7 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
*/
static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
{
u32 new_sample = tp->rcv_rtt_est.rtt;
u32 new_sample = tp->rcv_rtt_est.rtt_us;
long m = sample;
if (m == 0)
......@@ -548,21 +549,23 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
new_sample = m << 3;
}
if (tp->rcv_rtt_est.rtt != new_sample)
tp->rcv_rtt_est.rtt = new_sample;
tp->rcv_rtt_est.rtt_us = new_sample;
}
static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
{
if (tp->rcv_rtt_est.time == 0)
u32 delta_us;
if (tp->rcv_rtt_est.time.v64 == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
tp->rcv_rtt_est.time = tcp_time_stamp;
tp->rcv_rtt_est.time = tp->tcp_mstamp;
}
static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
......@@ -572,7 +575,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
if (tp->rx_opt.rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
tcp_rcv_rtt_update(tp,
jiffies_to_usecs(tcp_time_stamp -
tp->rx_opt.rcv_tsecr),
0);
}
/*
......@@ -585,8 +591,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
int time;
int copied;
time = tcp_time_stamp - tp->rcvq_space.time;
if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
return;
/* Number of bytes copied to user in last RTT */
......@@ -642,7 +648,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
new_measure:
tp->rcvq_space.seq = tp->copied_seq;
tp->rcvq_space.time = tcp_time_stamp;
tp->rcvq_space.time = tp->tcp_mstamp;
}
/* There is something which you must keep in mind when you analyze the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment