Commit 16a76bdb authored by David S. Miller's avatar David S. Miller

[TCP]: Add vegas congestion avoidance support.

A forward port of an old 2.3.x kernel hack done
years ago.  I (DaveM) did the first rough port,
Stephen Hemminger actually cleaned it up and
made it usable.
parent 91a79387
......@@ -327,6 +327,10 @@ enum
NET_TCP_WESTWOOD=95,
NET_IPV4_IGMP_MAX_MSF=96,
NET_TCP_NO_METRICS_SAVE=97,
NET_TCP_VEGAS=98,
NET_TCP_VEGAS_ALPHA=99,
NET_TCP_VEGAS_BETA=100,
NET_TCP_VEGAS_GAMMA=101,
};
enum {
......
......@@ -388,6 +388,18 @@ struct tcp_opt {
__u32 rtt;
__u32 rtt_min; /* minimum observed RTT */
} westwood;
/* Vegas variables */
struct {
__u32 beg_snd_nxt; /* right edge during last RTT */
__u32 beg_snd_una; /* left edge during last RTT */
__u32 beg_snd_cwnd; /* saves the size of the cwnd */
__u8 do_vegas; /* do vegas for this connection */
__u8 doing_vegas_now;/* if true, do vegas for this RTT */
__u16 cntRTT; /* # of RTTs measured within last RTT */
__u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
__u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
} vegas;
};
/* WARNING: don't change the layout of the members in tcp_sock! */
......
......@@ -583,6 +583,10 @@ extern int sysctl_tcp_tw_reuse;
extern int sysctl_tcp_frto;
extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_westwood;
extern int sysctl_tcp_vegas_cong_avoid;
extern int sysctl_tcp_vegas_alpha;
extern int sysctl_tcp_vegas_beta;
extern int sysctl_tcp_vegas_gamma;
extern int sysctl_tcp_nometrics_save;
extern atomic_t tcp_memory_allocated;
......@@ -1212,8 +1216,56 @@ static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
return max(tp->snd_cwnd >> 1U, 2U);
}
/* Stop taking Vegas samples for now. */
#define tcp_vegas_disable(__tp) ((__tp)->vegas.doing_vegas_now = 0)
/* Is this TCP connection using Vegas (regardless of whether it is taking
* Vegas measurements at the current time)?
*/
#define tcp_is_vegas(__tp) ((__tp)->vegas.do_vegas)
static inline void tcp_vegas_enable(struct tcp_opt *tp)
{
/* There are several situations when we must "re-start" Vegas:
*
* o when a connection is established
* o after an RTO
* o after fast recovery
* o when we send a packet and there is no outstanding
* unacknowledged data (restarting an idle connection)
*
* In these circumstances we cannot do a Vegas calculation at the
* end of the first RTT, because any calculation we do is using
* stale info -- both the saved cwnd and congestion feedback are
* stale.
*
* Instead we must wait until the completion of an RTT during
* which we actually receive ACKs.
*/
/* Begin taking Vegas samples next time we send something. */
tp->vegas.doing_vegas_now = 1;
/* Set the beginning of the next send window. */
tp->vegas.beg_snd_nxt = tp->snd_nxt;
tp->vegas.cntRTT = 0;
tp->vegas.minRTT = 0x7fffffff;
}
/* Should we be taking Vegas samples right now? */
#define tcp_vegas_enabled(__tp) ((__tp)->vegas.doing_vegas_now)
extern void tcp_vegas_init(struct tcp_opt *tp);
static inline void tcp_set_ca_state(struct tcp_opt *tp, u8 ca_state)
{
if (tcp_is_vegas(tp)) {
if (ca_state == TCP_CA_Open)
tcp_vegas_enable(tp);
else
tcp_vegas_disable(tp);
}
tp->ca_state = ca_state;
}
......
......@@ -609,7 +609,38 @@ ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_TCP_VEGAS,
.procname = "tcp_vegas_cong_avoid",
.data = &sysctl_tcp_vegas_cong_avoid,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_TCP_VEGAS_ALPHA,
.procname = "tcp_vegas_alpha",
.data = &sysctl_tcp_vegas_alpha,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_TCP_VEGAS_BETA,
.procname = "tcp_vegas_beta",
.data = &sysctl_tcp_vegas_beta,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_TCP_VEGAS_GAMMA,
.procname = "tcp_vegas_gamma",
.data = &sysctl_tcp_vegas_gamma,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
......
This diff is collapsed.
......@@ -841,6 +841,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
if (newtp->ecn_flags&TCP_ECN_OK)
newsk->sk_no_largesend = 1;
tcp_vegas_init(newtp);
TCP_INC_STATS_BH(TcpPassiveOpens);
}
return newsk;
......
......@@ -106,6 +106,9 @@ static void tcp_cwnd_restart(struct tcp_opt *tp, struct dst_entry *dst)
u32 restart_cwnd = tcp_init_cwnd(tp, dst);
u32 cwnd = tp->snd_cwnd;
if (tcp_is_vegas(tp))
tcp_vegas_enable(tp);
tp->snd_ssthresh = tcp_current_ssthresh(tp);
restart_cwnd = min(restart_cwnd, cwnd);
......@@ -225,6 +228,19 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
(tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
}
/*
* If the connection is idle and we are restarting,
* then we don't want to do any Vegas calculations
* until we get fresh RTT samples. So when we
* restart, we reset our Vegas state to a clean
* slate. After we get acks for this flight of
* packets, _then_ we can make Vegas calculations
* again.
*/
if (tcp_is_vegas(tp) && tcp_packets_in_flight(tp) == 0)
tcp_vegas_enable(tp);
th = (struct tcphdr *) skb_push(skb, tcp_header_size);
skb->h.th = th;
skb_set_owner_w(skb, sk);
......@@ -1268,6 +1284,7 @@ static inline void tcp_connect_init(struct sock *sk)
tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
tp->advmss = dst_metric(dst, RTAX_ADVMSS);
tcp_initialize_rcv_mss(sk);
tcp_vegas_init(tp);
tcp_select_initial_window(tcp_full_space(sk),
tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
......@@ -1318,6 +1335,7 @@ int tcp_connect(struct sock *sk)
TCP_SKB_CB(buff)->end_seq = tp->write_seq;
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
tcp_vegas_init(tp);
/* Send it off. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment