Commit a58598a4 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp_bbr-TCP-BBR-changes-for-EDT-pacing-model'

Neal Cardwell says:

====================
tcp_bbr: TCP BBR changes for EDT pacing model

Two small patches for TCP BBR to follow up with Eric's recent work to change
the TCP and fq pacing machinery to an "earliest departure time" (EDT) model:

- The first patch adjusts the TCP BBR logic to work with the new
  "earliest departure time" (EDT) pacing model.

- The second patch adjusts the TCP BBR logic to centralize the setting
  of gain values, to simplify the code and prepare for future changes.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents cb10c7c0 cf33e25c
...@@ -369,6 +369,39 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) ...@@ -369,6 +369,39 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
return cwnd; return cwnd;
} }
/* With pacing at lower layers, there's often less data "in the network" than
* "in flight". With TSQ and departure time pacing at lower layers (e.g. fq),
* we often have several skbs queued in the pacing layer with a pre-scheduled
* earliest departure time (EDT). BBR adapts its pacing rate based on the
* inflight level that it estimates has already been "baked in" by previous
* departure time decisions. We calculate a rough estimate of the number of our
* packets that might be in the network at the earliest departure time for the
* next skb scheduled:
* in_network_at_edt = inflight_at_edt - (EDT - now) * bw
* If we're increasing inflight, then we want to know if the transmit of the
* EDT skb will push inflight above the target, so inflight_at_edt includes
* bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight,
* then estimate if inflight will sink too low just before the EDT transmit.
*/
static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 now_ns, edt_ns, interval_us;
u32 interval_delivered, inflight_at_edt;
now_ns = tp->tcp_clock_cache;
edt_ns = max(tp->tcp_wstamp_ns, now_ns);
interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC);
interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE;
inflight_at_edt = inflight_now;
if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */
inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */
if (interval_delivered >= inflight_at_edt)
return 0;
return inflight_at_edt - interval_delivered;
}
/* An optimization in BBR to reduce losses: On the first round of recovery, we /* An optimization in BBR to reduce losses: On the first round of recovery, we
* follow the packet conservation principle: send P packets per P packets acked. * follow the packet conservation principle: send P packets per P packets acked.
* After that, we slow-start and send at most 2*P packets per P packets acked. * After that, we slow-start and send at most 2*P packets per P packets acked.
...@@ -460,7 +493,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk, ...@@ -460,7 +493,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
if (bbr->pacing_gain == BBR_UNIT) if (bbr->pacing_gain == BBR_UNIT)
return is_full_length; /* just use wall clock time */ return is_full_length; /* just use wall clock time */
inflight = rs->prior_in_flight; /* what was in-flight before ACK? */ inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
bw = bbr_max_bw(sk); bw = bbr_max_bw(sk);
/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
...@@ -488,8 +521,6 @@ static void bbr_advance_cycle_phase(struct sock *sk) ...@@ -488,8 +521,6 @@ static void bbr_advance_cycle_phase(struct sock *sk)
bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
bbr->cycle_mstamp = tp->delivered_mstamp; bbr->cycle_mstamp = tp->delivered_mstamp;
bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
bbr_pacing_gain[bbr->cycle_idx];
} }
/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
...@@ -507,8 +538,6 @@ static void bbr_reset_startup_mode(struct sock *sk) ...@@ -507,8 +538,6 @@ static void bbr_reset_startup_mode(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk); struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_STARTUP; bbr->mode = BBR_STARTUP;
bbr->pacing_gain = bbr_high_gain;
bbr->cwnd_gain = bbr_high_gain;
} }
static void bbr_reset_probe_bw_mode(struct sock *sk) static void bbr_reset_probe_bw_mode(struct sock *sk)
...@@ -516,8 +545,6 @@ static void bbr_reset_probe_bw_mode(struct sock *sk) ...@@ -516,8 +545,6 @@ static void bbr_reset_probe_bw_mode(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk); struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_PROBE_BW; bbr->mode = BBR_PROBE_BW;
bbr->pacing_gain = BBR_UNIT;
bbr->cwnd_gain = bbr_cwnd_gain;
bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand); bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */
} }
...@@ -735,13 +762,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) ...@@ -735,13 +762,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
bbr->mode = BBR_DRAIN; /* drain queue we created */ bbr->mode = BBR_DRAIN; /* drain queue we created */
bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
tcp_sk(sk)->snd_ssthresh = tcp_sk(sk)->snd_ssthresh =
bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT); bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
} /* fall through to check if in-flight is already small: */ } /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN && if (bbr->mode == BBR_DRAIN &&
tcp_packets_in_flight(tcp_sk(sk)) <= bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT)) bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */
} }
...@@ -798,8 +823,6 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) ...@@ -798,8 +823,6 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
if (bbr_probe_rtt_mode_ms > 0 && filter_expired && if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
!bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */
bbr->pacing_gain = BBR_UNIT;
bbr->cwnd_gain = BBR_UNIT;
bbr_save_cwnd(sk); /* note cwnd so we can restore it */ bbr_save_cwnd(sk); /* note cwnd so we can restore it */
bbr->probe_rtt_done_stamp = 0; bbr->probe_rtt_done_stamp = 0;
} }
...@@ -827,6 +850,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) ...@@ -827,6 +850,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
bbr->idle_restart = 0; bbr->idle_restart = 0;
} }
static void bbr_update_gains(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
switch (bbr->mode) {
case BBR_STARTUP:
bbr->pacing_gain = bbr_high_gain;
bbr->cwnd_gain = bbr_high_gain;
break;
case BBR_DRAIN:
bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */
bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */
break;
case BBR_PROBE_BW:
bbr->pacing_gain = (bbr->lt_use_bw ?
BBR_UNIT :
bbr_pacing_gain[bbr->cycle_idx]);
bbr->cwnd_gain = bbr_cwnd_gain;
break;
case BBR_PROBE_RTT:
bbr->pacing_gain = BBR_UNIT;
bbr->cwnd_gain = BBR_UNIT;
break;
default:
WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode);
break;
}
}
static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
{ {
bbr_update_bw(sk, rs); bbr_update_bw(sk, rs);
...@@ -834,6 +886,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) ...@@ -834,6 +886,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
bbr_check_full_bw_reached(sk, rs); bbr_check_full_bw_reached(sk, rs);
bbr_check_drain(sk, rs); bbr_check_drain(sk, rs);
bbr_update_min_rtt(sk, rs); bbr_update_min_rtt(sk, rs);
bbr_update_gains(sk);
} }
static void bbr_main(struct sock *sk, const struct rate_sample *rs) static void bbr_main(struct sock *sk, const struct rate_sample *rs)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment