Commit c6932918 authored by Stephen Hemminger's avatar Stephen Hemminger Committed by David S. Miller

[TCP]: Choose congestion algorithm at initialization.

The choice of congestion algorithm needs to be made when connection
is setup to avoid problems when the sysctl values change later and the
necessary data hasn't been collected.
Signed-off-by: default avatarStephen Hemminger <shemminger@osdl.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0ed667a9
...@@ -205,6 +205,13 @@ typedef struct tcp_pcount { ...@@ -205,6 +205,13 @@ typedef struct tcp_pcount {
__u32 val; __u32 val;
} tcp_pcount_t; } tcp_pcount_t;
enum tcp_congestion_algo {
TCP_RENO=0,
TCP_VEGAS,
TCP_WESTWOOD,
TCP_BIC,
};
struct tcp_opt { struct tcp_opt {
int tcp_header_len; /* Bytes of tcp header to send */ int tcp_header_len; /* Bytes of tcp header to send */
...@@ -265,7 +272,7 @@ struct tcp_opt { ...@@ -265,7 +272,7 @@ struct tcp_opt {
__u8 frto_counter; /* Number of new acks after RTO */ __u8 frto_counter; /* Number of new acks after RTO */
__u32 frto_highmark; /* snd_nxt when RTO occurred */ __u32 frto_highmark; /* snd_nxt when RTO occurred */
__u8 unused_pad; __u8 adv_cong; /* Using Vegas, Westwood, or BIC */
__u8 defer_accept; /* User waits for some data after accept() */ __u8 defer_accept; /* User waits for some data after accept() */
/* one byte hole, try to pack */ /* one byte hole, try to pack */
...@@ -412,7 +419,6 @@ struct tcp_opt { ...@@ -412,7 +419,6 @@ struct tcp_opt {
__u32 beg_snd_nxt; /* right edge during last RTT */ __u32 beg_snd_nxt; /* right edge during last RTT */
__u32 beg_snd_una; /* left edge during last RTT */ __u32 beg_snd_una; /* left edge during last RTT */
__u32 beg_snd_cwnd; /* saves the size of the cwnd */ __u32 beg_snd_cwnd; /* saves the size of the cwnd */
__u8 do_vegas; /* do vegas for this connection */
__u8 doing_vegas_now;/* if true, do vegas for this RTT */ __u8 doing_vegas_now;/* if true, do vegas for this RTT */
__u16 cntRTT; /* # of RTTs measured within last RTT */ __u16 cntRTT; /* # of RTTs measured within last RTT */
__u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ __u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
......
...@@ -1271,6 +1271,13 @@ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp) ...@@ -1271,6 +1271,13 @@ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
tcp_get_pcount(&tp->retrans_out)); tcp_get_pcount(&tp->retrans_out));
} }
/*
* Which congestion algorithim is in use on the connection.
*/
#define tcp_is_vegas(__tp) ((__tp)->adv_cong == TCP_VEGAS)
#define tcp_is_westwood(__tp) ((__tp)->adv_cong == TCP_WESTWOOD)
#define tcp_is_bic(__tp) ((__tp)->adv_cong == TCP_BIC)
/* Recalculate snd_ssthresh, we want to set it to: /* Recalculate snd_ssthresh, we want to set it to:
* *
* Reno: * Reno:
...@@ -1283,7 +1290,7 @@ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp) ...@@ -1283,7 +1290,7 @@ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
*/ */
static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp) static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
{ {
if (sysctl_tcp_bic) { if (tcp_is_bic(tp)) {
if (sysctl_tcp_bic_fast_convergence && if (sysctl_tcp_bic_fast_convergence &&
tp->snd_cwnd < tp->bictcp.last_max_cwnd) tp->snd_cwnd < tp->bictcp.last_max_cwnd)
tp->bictcp.last_max_cwnd tp->bictcp.last_max_cwnd
...@@ -1302,11 +1309,6 @@ static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp) ...@@ -1302,11 +1309,6 @@ static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
/* Stop taking Vegas samples for now. */ /* Stop taking Vegas samples for now. */
#define tcp_vegas_disable(__tp) ((__tp)->vegas.doing_vegas_now = 0) #define tcp_vegas_disable(__tp) ((__tp)->vegas.doing_vegas_now = 0)
/* Is this TCP connection using Vegas (regardless of whether it is taking
* Vegas measurements at the current time)?
*/
#define tcp_is_vegas(__tp) ((__tp)->vegas.do_vegas)
static inline void tcp_vegas_enable(struct tcp_opt *tp) static inline void tcp_vegas_enable(struct tcp_opt *tp)
{ {
...@@ -1340,7 +1342,7 @@ static inline void tcp_vegas_enable(struct tcp_opt *tp) ...@@ -1340,7 +1342,7 @@ static inline void tcp_vegas_enable(struct tcp_opt *tp)
/* Should we be taking Vegas samples right now? */ /* Should we be taking Vegas samples right now? */
#define tcp_vegas_enabled(__tp) ((__tp)->vegas.doing_vegas_now) #define tcp_vegas_enabled(__tp) ((__tp)->vegas.doing_vegas_now)
extern void tcp_vegas_init(struct tcp_opt *tp); extern void tcp_ca_init(struct tcp_opt *tp);
static inline void tcp_set_ca_state(struct tcp_opt *tp, u8 ca_state) static inline void tcp_set_ca_state(struct tcp_opt *tp, u8 ca_state)
{ {
...@@ -2024,7 +2026,7 @@ extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); ...@@ -2024,7 +2026,7 @@ extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
static inline void tcp_westwood_update_rtt(struct tcp_opt *tp, __u32 rtt_seq) static inline void tcp_westwood_update_rtt(struct tcp_opt *tp, __u32 rtt_seq)
{ {
if (sysctl_tcp_westwood) if (tcp_is_westwood(tp))
tp->westwood.rtt = rtt_seq; tp->westwood.rtt = rtt_seq;
} }
...@@ -2033,13 +2035,13 @@ void __tcp_westwood_slow_bw(struct sock *, struct sk_buff *); ...@@ -2033,13 +2035,13 @@ void __tcp_westwood_slow_bw(struct sock *, struct sk_buff *);
static inline void tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb) static inline void tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb)
{ {
if (sysctl_tcp_westwood) if (tcp_is_westwood(tcp_sk(sk)))
__tcp_westwood_fast_bw(sk, skb); __tcp_westwood_fast_bw(sk, skb);
} }
static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
{ {
if (sysctl_tcp_westwood) if (tcp_is_westwood(tcp_sk(sk)))
__tcp_westwood_slow_bw(sk, skb); __tcp_westwood_slow_bw(sk, skb);
} }
...@@ -2052,14 +2054,14 @@ static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp) ...@@ -2052,14 +2054,14 @@ static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp)
static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_opt *tp) static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_opt *tp)
{ {
return sysctl_tcp_westwood ? __tcp_westwood_bw_rttmin(tp) : 0; return tcp_is_westwood(tp) ? __tcp_westwood_bw_rttmin(tp) : 0;
} }
static inline int tcp_westwood_ssthresh(struct tcp_opt *tp) static inline int tcp_westwood_ssthresh(struct tcp_opt *tp)
{ {
__u32 ssthresh = 0; __u32 ssthresh = 0;
if (sysctl_tcp_westwood) { if (tcp_is_westwood(tp)) {
ssthresh = __tcp_westwood_bw_rttmin(tp); ssthresh = __tcp_westwood_bw_rttmin(tp);
if (ssthresh) if (ssthresh)
tp->snd_ssthresh = ssthresh; tp->snd_ssthresh = ssthresh;
...@@ -2072,7 +2074,7 @@ static inline int tcp_westwood_cwnd(struct tcp_opt *tp) ...@@ -2072,7 +2074,7 @@ static inline int tcp_westwood_cwnd(struct tcp_opt *tp)
{ {
__u32 cwnd = 0; __u32 cwnd = 0;
if (sysctl_tcp_westwood) { if (tcp_is_westwood(tp)) {
cwnd = __tcp_westwood_bw_rttmin(tp); cwnd = __tcp_westwood_bw_rttmin(tp);
if (cwnd) if (cwnd)
tp->snd_cwnd = cwnd; tp->snd_cwnd = cwnd;
......
...@@ -555,17 +555,20 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b ...@@ -555,17 +555,20 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
tcp_grow_window(sk, tp, skb); tcp_grow_window(sk, tp, skb);
} }
/* Set up a new TCP connection, depending on whether it should be /* When starting a new connection, pin down the current choice of
* using Vegas or not. * congestion algorithm.
*/ */
void tcp_vegas_init(struct tcp_opt *tp) void tcp_ca_init(struct tcp_opt *tp)
{ {
if (sysctl_tcp_vegas_cong_avoid) { if (sysctl_tcp_westwood)
tp->vegas.do_vegas = 1; tp->adv_cong = TCP_WESTWOOD;
else if (sysctl_tcp_bic)
tp->adv_cong = TCP_BIC;
else if (sysctl_tcp_vegas_cong_avoid) {
tp->adv_cong = TCP_VEGAS;
tp->vegas.baseRTT = 0x7fffffff; tp->vegas.baseRTT = 0x7fffffff;
tcp_vegas_enable(tp); tcp_vegas_enable(tp);
} else }
tcp_vegas_disable(tp);
} }
/* Do RTT sampling needed for Vegas. /* Do RTT sampling needed for Vegas.
...@@ -2039,7 +2042,7 @@ tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt) ...@@ -2039,7 +2042,7 @@ tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt)
static inline __u32 bictcp_cwnd(struct tcp_opt *tp) static inline __u32 bictcp_cwnd(struct tcp_opt *tp)
{ {
/* orignal Reno behaviour */ /* orignal Reno behaviour */
if (!sysctl_tcp_bic) if (!tcp_is_bic(tp))
return tp->snd_cwnd; return tp->snd_cwnd;
if (tp->bictcp.last_cwnd == tp->snd_cwnd && if (tp->bictcp.last_cwnd == tp->snd_cwnd &&
......
...@@ -841,7 +841,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, ...@@ -841,7 +841,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
if (newtp->ecn_flags&TCP_ECN_OK) if (newtp->ecn_flags&TCP_ECN_OK)
newsk->sk_no_largesend = 1; newsk->sk_no_largesend = 1;
tcp_vegas_init(newtp); tcp_ca_init(newtp);
TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS); TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS);
} }
return newsk; return newsk;
......
...@@ -1359,7 +1359,7 @@ static inline void tcp_connect_init(struct sock *sk) ...@@ -1359,7 +1359,7 @@ static inline void tcp_connect_init(struct sock *sk)
tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
tp->advmss = dst_metric(dst, RTAX_ADVMSS); tp->advmss = dst_metric(dst, RTAX_ADVMSS);
tcp_initialize_rcv_mss(sk); tcp_initialize_rcv_mss(sk);
tcp_vegas_init(tp); tcp_ca_init(tp);
tcp_select_initial_window(tcp_full_space(sk), tcp_select_initial_window(tcp_full_space(sk),
tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
...@@ -1411,7 +1411,7 @@ int tcp_connect(struct sock *sk) ...@@ -1411,7 +1411,7 @@ int tcp_connect(struct sock *sk)
TCP_SKB_CB(buff)->end_seq = tp->write_seq; TCP_SKB_CB(buff)->end_seq = tp->write_seq;
tp->snd_nxt = tp->write_seq; tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq; tp->pushed_seq = tp->write_seq;
tcp_vegas_init(tp); tcp_ca_init(tp);
/* Send it off. */ /* Send it off. */
TCP_SKB_CB(buff)->when = tcp_time_stamp; TCP_SKB_CB(buff)->when = tcp_time_stamp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment