Commit c0402760 authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller

tcp: new CC hook to set sending rate with rate_sample in any CA state

This commit introduces an optional new "omnipotent" hook,
cong_control(), for congestion control modules. The cong_control()
function is called at the end of processing an ACK (i.e., after
updating sequence numbers, the SACK scoreboard, and loss
detection). At that moment we have precise delivery rate information
the congestion control module can use to control the sending behavior
(using cwnd, TSO skb size, and pacing rate) in any CA state.

This function can also be used by a congestion control that prefers
not to use the default cwnd reduction approach (i.e., the PRR
algorithm) during CA_Recovery to control the cwnd and sending rate
during loss recovery.

We take advantage of the fact that recent changes defer the
retransmission or transmission of new data (e.g. by F-RTO) in recovery
until the new tcp_cong_control() function is run.

With this commit, we only run tcp_update_pacing_rate() if the
congestion control is not using this new API. New congestion controls
which use the new API do not want the TCP stack to run the default
pacing rate calculation and overwrite whatever pacing rate they have
chosen at initialization time.
Signed-off-by: default avatarVan Jacobson <vanj@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarNandita Dukkipati <nanditad@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 77bfc174
...@@ -919,6 +919,10 @@ struct tcp_congestion_ops { ...@@ -919,6 +919,10 @@ struct tcp_congestion_ops {
u32 (*tso_segs_goal)(struct sock *sk); u32 (*tso_segs_goal)(struct sock *sk);
/* returns the multiplier used in tcp_sndbuf_expand (optional) */ /* returns the multiplier used in tcp_sndbuf_expand (optional) */
u32 (*sndbuf_expand)(struct sock *sk); u32 (*sndbuf_expand)(struct sock *sk);
/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
*/
void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
/* get info for inet_diag (optional) */ /* get info for inet_diag (optional) */
size_t (*get_info)(struct sock *sk, u32 ext, int *attr, size_t (*get_info)(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info); union tcp_cc_info *info);
......
...@@ -69,7 +69,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) ...@@ -69,7 +69,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
int ret = 0; int ret = 0;
/* all algorithms must implement ssthresh and cong_avoid ops */ /* all algorithms must implement ssthresh and cong_avoid ops */
if (!ca->ssthresh || !ca->cong_avoid) { if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) {
pr_err("%s does not implement required ops\n", ca->name); pr_err("%s does not implement required ops\n", ca->name);
return -EINVAL; return -EINVAL;
} }
......
...@@ -2536,6 +2536,9 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) ...@@ -2536,6 +2536,9 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
if (inet_csk(sk)->icsk_ca_ops->cong_control)
return;
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
(tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
...@@ -3312,8 +3315,15 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) ...@@ -3312,8 +3315,15 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
* information. All transmission or retransmission are delayed afterwards. * information. All transmission or retransmission are delayed afterwards.
*/ */
static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
int flag) int flag, const struct rate_sample *rs)
{ {
const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->cong_control) {
icsk->icsk_ca_ops->cong_control(sk, rs);
return;
}
if (tcp_in_cwnd_reduction(sk)) { if (tcp_in_cwnd_reduction(sk)) {
/* Reduce cwnd if state mandates */ /* Reduce cwnd if state mandates */
tcp_cwnd_reduction(sk, acked_sacked, flag); tcp_cwnd_reduction(sk, acked_sacked, flag);
...@@ -3683,7 +3693,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3683,7 +3693,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */ lost = tp->lost - lost; /* freshly marked lost */
tcp_rate_gen(sk, delivered, lost, &now, &rs); tcp_rate_gen(sk, delivered, lost, &now, &rs);
tcp_cong_control(sk, ack, delivered, flag); tcp_cong_control(sk, ack, delivered, flag, &rs);
tcp_xmit_recovery(sk, rexmit); tcp_xmit_recovery(sk, rexmit);
return 1; return 1;
...@@ -5982,7 +5992,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) ...@@ -5982,7 +5992,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
} else } else
tcp_init_metrics(sk); tcp_init_metrics(sk);
tcp_update_pacing_rate(sk); if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_update_pacing_rate(sk);
/* Prevent spurious tcp_cwnd_restart() on first data packet */ /* Prevent spurious tcp_cwnd_restart() on first data packet */
tp->lsndtime = tcp_time_stamp; tp->lsndtime = tcp_time_stamp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment