Commit 21e99435 authored by Stephen Hemminger's avatar Stephen Hemminger Committed by Linus Torvalds

[TCP]: Fix BIC max_cwnd calculation error.

The BIC TCP cwnd problem as identified by Yee-Ting Li and Doug Leith
is that the computation is recalc_ssthresh is incorrect and
BICTCP_1_OVER_BETA/2 should be BICTCP_1_OVER_BETA*2.

My fix is to implement the code from BIC TCP 1.1 which uses a sysctl
to set the beta.  There are a few variable name changes from the 1.1
code, and made the scaling factor a #define instead of hardcoded.

I validated this using netem and kprobes, for more details see
http://developer.osdl.org/shemminger/bic-beta-patch.pdfSigned-off-by: default avatarStephen Hemminger <shemminger@osdl.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent fce0deb5
...@@ -344,6 +344,7 @@ enum ...@@ -344,6 +344,7 @@ enum
NET_TCP_DEFAULT_WIN_SCALE=105, NET_TCP_DEFAULT_WIN_SCALE=105,
NET_TCP_MODERATE_RCVBUF=106, NET_TCP_MODERATE_RCVBUF=106,
NET_TCP_TSO_WIN_DIVISOR=107, NET_TCP_TSO_WIN_DIVISOR=107,
NET_TCP_BIC_BETA=108,
}; };
enum { enum {
......
...@@ -505,9 +505,8 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) ...@@ -505,9 +505,8 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG) # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
#endif #endif
#define BICTCP_1_OVER_BETA 8 /* #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
* Fast recovery * max_cwnd = snd_cwnd * beta
* multiplicative decrease factor
*/ */
#define BICTCP_MAX_INCREMENT 32 /* #define BICTCP_MAX_INCREMENT 32 /*
* Limit on the amount of * Limit on the amount of
...@@ -606,6 +605,7 @@ extern int sysctl_tcp_nometrics_save; ...@@ -606,6 +605,7 @@ extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_bic; extern int sysctl_tcp_bic;
extern int sysctl_tcp_bic_fast_convergence; extern int sysctl_tcp_bic_fast_convergence;
extern int sysctl_tcp_bic_low_window; extern int sysctl_tcp_bic_low_window;
extern int sysctl_tcp_bic_beta;
extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_tso_win_divisor;
...@@ -1244,15 +1244,16 @@ static inline __u32 tcp_recalc_ssthresh(struct tcp_sock *tp) ...@@ -1244,15 +1244,16 @@ static inline __u32 tcp_recalc_ssthresh(struct tcp_sock *tp)
if (tcp_is_bic(tp)) { if (tcp_is_bic(tp)) {
if (sysctl_tcp_bic_fast_convergence && if (sysctl_tcp_bic_fast_convergence &&
tp->snd_cwnd < tp->bictcp.last_max_cwnd) tp->snd_cwnd < tp->bictcp.last_max_cwnd)
tp->bictcp.last_max_cwnd tp->bictcp.last_max_cwnd = (tp->snd_cwnd *
= (tp->snd_cwnd * (2*BICTCP_1_OVER_BETA-1)) (BICTCP_BETA_SCALE
/ (BICTCP_1_OVER_BETA/2); + sysctl_tcp_bic_beta))
/ (2 * BICTCP_BETA_SCALE);
else else
tp->bictcp.last_max_cwnd = tp->snd_cwnd; tp->bictcp.last_max_cwnd = tp->snd_cwnd;
if (tp->snd_cwnd > sysctl_tcp_bic_low_window) if (tp->snd_cwnd > sysctl_tcp_bic_low_window)
return max(tp->snd_cwnd - (tp->snd_cwnd/BICTCP_1_OVER_BETA), return max((tp->snd_cwnd * sysctl_tcp_bic_beta)
2U); / BICTCP_BETA_SCALE, 2U);
} }
return max(tp->snd_cwnd >> 1U, 2U); return max(tp->snd_cwnd >> 1U, 2U);
......
...@@ -682,6 +682,14 @@ ctl_table ipv4_table[] = { ...@@ -682,6 +682,14 @@ ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = NET_TCP_BIC_BETA,
.procname = "tcp_bic_beta",
.data = &sysctl_tcp_bic_beta,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 } { .ctl_name = 0 }
}; };
......
...@@ -102,6 +102,7 @@ int sysctl_tcp_vegas_gamma = 1<<V_PARAM_SHIFT; ...@@ -102,6 +102,7 @@ int sysctl_tcp_vegas_gamma = 1<<V_PARAM_SHIFT;
int sysctl_tcp_bic = 1; int sysctl_tcp_bic = 1;
int sysctl_tcp_bic_fast_convergence = 1; int sysctl_tcp_bic_fast_convergence = 1;
int sysctl_tcp_bic_low_window = 14; int sysctl_tcp_bic_low_window = 14;
int sysctl_tcp_bic_beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
#define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment