Commit 6687e988 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller

[ICSK]: Move TCP congestion avoidance members to icsk

This changeset basically moves tcp_sk()->{ca_ops,ca_state,etc} to inet_csk(),
minimal renaming/moving done in this changeset to ease review.

Most of it is just changes of struct tcp_sock * to struct sock * parameters.

With this we move to a state closer to two interesting goals:

1. Generalisation of net/ipv4/tcp_diag.c, becoming inet_diag.c, being used
   for any INET transport protocol that has struct inet_hashinfo and are
   derived from struct inet_connection_sock. Keeps the userspace API, that will
   just not display DCCP sockets, while newer versions of tools can support
   DCCP.

2. INET generic transport pluggable Congestion Avoidance infrastructure, using
   the current TCP CA infrastructure with DCCP.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 64ce2073
...@@ -258,19 +258,15 @@ struct tcp_sock { ...@@ -258,19 +258,15 @@ struct tcp_sock {
__u32 mss_cache; /* Cached effective mss, not including SACKS */ __u32 mss_cache; /* Cached effective mss, not including SACKS */
__u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 xmit_size_goal; /* Goal for segmenting output packets */
__u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
__u8 ca_state; /* State of fast-retransmit machine */
__u8 keepalive_probes; /* num of allowed keep alive probes */
__u16 advmss; /* Advertised MSS */
__u32 window_clamp; /* Maximal window to advertise */ __u32 window_clamp; /* Maximal window to advertise */
__u32 rcv_ssthresh; /* Current window clamp */ __u32 rcv_ssthresh; /* Current window clamp */
__u32 frto_highmark; /* snd_nxt when RTO occurred */ __u32 frto_highmark; /* snd_nxt when RTO occurred */
__u8 reordering; /* Packet reordering metric. */ __u8 reordering; /* Packet reordering metric. */
__u8 frto_counter; /* Number of new acks after RTO */ __u8 frto_counter; /* Number of new acks after RTO */
__u8 nonagle; /* Disable Nagle algorithm? */ __u8 nonagle; /* Disable Nagle algorithm? */
/* ONE BYTE HOLE, TRY TO PACK */ __u8 keepalive_probes; /* num of allowed keep alive probes */
/* RTT measurement */ /* RTT measurement */
__u32 srtt; /* smoothed round trip time << 3 */ __u32 srtt; /* smoothed round trip time << 3 */
...@@ -311,8 +307,7 @@ struct tcp_sock { ...@@ -311,8 +307,7 @@ struct tcp_sock {
struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
__u8 probes_out; /* unanswered 0 window probes */ __u16 advmss; /* Advertised MSS */
__u8 ecn_flags; /* ECN status bits. */
__u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */
__u32 lost_out; /* Lost packets */ __u32 lost_out; /* Lost packets */
__u32 sacked_out; /* SACK'd packets */ __u32 sacked_out; /* SACK'd packets */
...@@ -327,7 +322,7 @@ struct tcp_sock { ...@@ -327,7 +322,7 @@ struct tcp_sock {
__u32 urg_seq; /* Seq of received urgent pointer */ __u32 urg_seq; /* Seq of received urgent pointer */
__u16 urg_data; /* Saved octet of OOB data and control flags */ __u16 urg_data; /* Saved octet of OOB data and control flags */
__u8 urg_mode; /* In urgent mode */ __u8 urg_mode; /* In urgent mode */
/* ONE BYTE HOLE, TRY TO PACK! */ __u8 ecn_flags; /* ECN status bits. */
__u32 snd_up; /* Urgent pointer */ __u32 snd_up; /* Urgent pointer */
__u32 total_retrans; /* Total retransmits for entire connection */ __u32 total_retrans; /* Total retransmits for entire connection */
...@@ -351,11 +346,6 @@ struct tcp_sock { ...@@ -351,11 +346,6 @@ struct tcp_sock {
__u32 seq; __u32 seq;
__u32 time; __u32 time;
} rcvq_space; } rcvq_space;
/* Pluggable TCP congestion control hook */
struct tcp_congestion_ops *ca_ops;
u32 ca_priv[16];
#define TCP_CA_PRIV_SIZE (16*sizeof(u32))
}; };
static inline struct tcp_sock *tcp_sk(const struct sock *sk) static inline struct tcp_sock *tcp_sk(const struct sock *sk)
...@@ -377,11 +367,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) ...@@ -377,11 +367,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
return (struct tcp_timewait_sock *)sk; return (struct tcp_timewait_sock *)sk;
} }
static inline void *tcp_ca(const struct tcp_sock *tp)
{
return (void *) tp->ca_priv;
}
#endif #endif
#endif /* _LINUX_TCP_H */ #endif /* _LINUX_TCP_H */
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
struct inet_bind_bucket; struct inet_bind_bucket;
struct inet_hashinfo; struct inet_hashinfo;
struct tcp_congestion_ops;
/** inet_connection_sock - INET connection oriented sock /** inet_connection_sock - INET connection oriented sock
* *
...@@ -35,10 +36,13 @@ struct inet_hashinfo; ...@@ -35,10 +36,13 @@ struct inet_hashinfo;
* @icsk_timeout: Timeout * @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack) * @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout * @icsk_rto: Retransmit timeout
* @icsk_ca_ops Pluggable congestion control hook
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event * @icsk_pending: Scheduled timer event
* @icsk_backoff: Backoff * @icsk_backoff: Backoff
* @icsk_syn_retries: Number of allowed SYN (or equivalent) retries * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries
* @icsk_probes_out: unanswered 0 window probes
* @icsk_ack: Delayed ACK control data * @icsk_ack: Delayed ACK control data
*/ */
struct inet_connection_sock { struct inet_connection_sock {
...@@ -50,10 +54,14 @@ struct inet_connection_sock { ...@@ -50,10 +54,14 @@ struct inet_connection_sock {
struct timer_list icsk_retransmit_timer; struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer; struct timer_list icsk_delack_timer;
__u32 icsk_rto; __u32 icsk_rto;
struct tcp_congestion_ops *icsk_ca_ops;
__u8 icsk_ca_state;
__u8 icsk_retransmits; __u8 icsk_retransmits;
__u8 icsk_pending; __u8 icsk_pending;
__u8 icsk_backoff; __u8 icsk_backoff;
__u8 icsk_syn_retries; __u8 icsk_syn_retries;
__u8 icsk_probes_out;
/* 2 BYTES HOLE, TRY TO PACK! */
struct { struct {
__u8 pending; /* ACK is pending */ __u8 pending; /* ACK is pending */
__u8 quick; /* Scheduled number of quick acks */ __u8 quick; /* Scheduled number of quick acks */
...@@ -65,6 +73,8 @@ struct inet_connection_sock { ...@@ -65,6 +73,8 @@ struct inet_connection_sock {
__u16 last_seg_size; /* Size of last incoming segment */ __u16 last_seg_size; /* Size of last incoming segment */
__u16 rcv_mss; /* MSS used for delayed ACK decisions */ __u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack; } icsk_ack;
u32 icsk_ca_priv[16];
#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
}; };
#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_RETRANS 1 /* Retransmit timer */
...@@ -77,6 +87,11 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) ...@@ -77,6 +87,11 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
return (struct inet_connection_sock *)sk; return (struct inet_connection_sock *)sk;
} }
static inline void *inet_csk_ca(const struct sock *sk)
{
return (void *)inet_csk(sk)->icsk_ca_priv;
}
extern struct sock *inet_csk_clone(struct sock *sk, extern struct sock *inet_csk_clone(struct sock *sk,
const struct request_sock *req, const struct request_sock *req,
const unsigned int __nocast priority); const unsigned int __nocast priority);
......
...@@ -669,29 +669,29 @@ struct tcp_congestion_ops { ...@@ -669,29 +669,29 @@ struct tcp_congestion_ops {
struct list_head list; struct list_head list;
/* initialize private data (optional) */ /* initialize private data (optional) */
void (*init)(struct tcp_sock *tp); void (*init)(struct sock *sk);
/* cleanup private data (optional) */ /* cleanup private data (optional) */
void (*release)(struct tcp_sock *tp); void (*release)(struct sock *sk);
/* return slow start threshold (required) */ /* return slow start threshold (required) */
u32 (*ssthresh)(struct tcp_sock *tp); u32 (*ssthresh)(struct sock *sk);
/* lower bound for congestion window (optional) */ /* lower bound for congestion window (optional) */
u32 (*min_cwnd)(struct tcp_sock *tp); u32 (*min_cwnd)(struct sock *sk);
/* do new cwnd calculation (required) */ /* do new cwnd calculation (required) */
void (*cong_avoid)(struct tcp_sock *tp, u32 ack, void (*cong_avoid)(struct sock *sk, u32 ack,
u32 rtt, u32 in_flight, int good_ack); u32 rtt, u32 in_flight, int good_ack);
/* round trip time sample per acked packet (optional) */ /* round trip time sample per acked packet (optional) */
void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); void (*rtt_sample)(struct sock *sk, u32 usrtt);
/* call before changing ca_state (optional) */ /* call before changing ca_state (optional) */
void (*set_state)(struct tcp_sock *tp, u8 new_state); void (*set_state)(struct sock *sk, u8 new_state);
/* call when cwnd event occurs (optional) */ /* call when cwnd event occurs (optional) */
void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
/* new value of cwnd after loss (optional) */ /* new value of cwnd after loss (optional) */
u32 (*undo_cwnd)(struct tcp_sock *tp); u32 (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */ /* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); void (*pkts_acked)(struct sock *sk, u32 num_acked);
/* get info for tcp_diag (optional) */ /* get info for tcp_diag (optional) */
void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
char name[TCP_CA_NAME_MAX]; char name[TCP_CA_NAME_MAX];
struct module *owner; struct module *owner;
...@@ -700,30 +700,34 @@ struct tcp_congestion_ops { ...@@ -700,30 +700,34 @@ struct tcp_congestion_ops {
extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
extern void tcp_init_congestion_control(struct tcp_sock *tp); extern void tcp_init_congestion_control(struct sock *sk);
extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); extern void tcp_cleanup_congestion_control(struct sock *sk);
extern int tcp_set_default_congestion_control(const char *name); extern int tcp_set_default_congestion_control(const char *name);
extern void tcp_get_default_congestion_control(char *name); extern void tcp_get_default_congestion_control(char *name);
extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); extern int tcp_set_congestion_control(struct sock *sk, const char *name);
extern struct tcp_congestion_ops tcp_init_congestion_ops; extern struct tcp_congestion_ops tcp_init_congestion_ops;
extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); extern u32 tcp_reno_ssthresh(struct sock *sk);
extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
u32 rtt, u32 in_flight, int flag); u32 rtt, u32 in_flight, int flag);
extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); extern u32 tcp_reno_min_cwnd(struct sock *sk);
extern struct tcp_congestion_ops tcp_reno; extern struct tcp_congestion_ops tcp_reno;
static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
{ {
if (tp->ca_ops->set_state) struct inet_connection_sock *icsk = inet_csk(sk);
tp->ca_ops->set_state(tp, ca_state);
tp->ca_state = ca_state; if (icsk->icsk_ca_ops->set_state)
icsk->icsk_ca_ops->set_state(sk, ca_state);
icsk->icsk_ca_state = ca_state;
} }
static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{ {
if (tp->ca_ops->cwnd_event) const struct inet_connection_sock *icsk = inet_csk(sk);
tp->ca_ops->cwnd_event(tp, event);
if (icsk->icsk_ca_ops->cwnd_event)
icsk->icsk_ca_ops->cwnd_event(sk, event);
} }
/* This determines how many packets are "in the network" to the best /* This determines how many packets are "in the network" to the best
...@@ -749,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) ...@@ -749,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
* The exception is rate halving phase, when cwnd is decreasing towards * The exception is rate halving phase, when cwnd is decreasing towards
* ssthresh. * ssthresh.
*/ */
static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) static inline __u32 tcp_current_ssthresh(const struct sock *sk)
{ {
if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) const struct tcp_sock *tp = tcp_sk(sk);
if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
return tp->snd_ssthresh; return tp->snd_ssthresh;
else else
return max(tp->snd_ssthresh, return max(tp->snd_ssthresh,
...@@ -768,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) ...@@ -768,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
} }
/* Set slow start threshold and cwnd not falling to slow start */ /* Set slow start threshold and cwnd not falling to slow start */
static inline void __tcp_enter_cwr(struct tcp_sock *tp) static inline void __tcp_enter_cwr(struct sock *sk)
{ {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
tp->undo_marker = 0; tp->undo_marker = 0;
tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd = min(tp->snd_cwnd,
tcp_packets_in_flight(tp) + 1U); tcp_packets_in_flight(tp) + 1U);
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
...@@ -780,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp) ...@@ -780,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp)
TCP_ECN_queue_cwr(tp); TCP_ECN_queue_cwr(tp);
} }
static inline void tcp_enter_cwr(struct tcp_sock *tp) static inline void tcp_enter_cwr(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk);
tp->prior_ssthresh = 0; tp->prior_ssthresh = 0;
if (tp->ca_state < TCP_CA_CWR) { if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
__tcp_enter_cwr(tp); __tcp_enter_cwr(sk);
tcp_set_ca_state(tp, TCP_CA_CWR); tcp_set_ca_state(sk, TCP_CA_CWR);
} }
} }
......
...@@ -508,7 +508,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, ...@@ -508,7 +508,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
newsk->sk_write_space = sk_stream_write_space; newsk->sk_write_space = sk_stream_write_space;
newicsk->icsk_retransmits = 0; newicsk->icsk_retransmits = 0;
newicsk->icsk_backoff = 0; newicsk->icsk_backoff = 0;
newicsk->icsk_probes_out = 0;
/* Deinitialize accept_queue to trap illegal accesses. */ /* Deinitialize accept_queue to trap illegal accesses. */
memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
......
...@@ -1671,11 +1671,11 @@ int tcp_disconnect(struct sock *sk, int flags) ...@@ -1671,11 +1671,11 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->write_seq = 1; tp->write_seq = 1;
icsk->icsk_backoff = 0; icsk->icsk_backoff = 0;
tp->snd_cwnd = 2; tp->snd_cwnd = 2;
tp->probes_out = 0; icsk->icsk_probes_out = 0;
tp->packets_out = 0; tp->packets_out = 0;
tp->snd_ssthresh = 0x7fffffff; tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(tp, TCP_CA_Open); tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp); tcp_clear_retrans(tp);
inet_csk_delack_init(sk); inet_csk_delack_init(sk);
sk->sk_send_head = NULL; sk->sk_send_head = NULL;
...@@ -1718,7 +1718,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, ...@@ -1718,7 +1718,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
name[val] = 0; name[val] = 0;
lock_sock(sk); lock_sock(sk);
err = tcp_set_congestion_control(tp, name); err = tcp_set_congestion_control(sk, name);
release_sock(sk); release_sock(sk);
return err; return err;
} }
...@@ -1886,9 +1886,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) ...@@ -1886,9 +1886,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
memset(info, 0, sizeof(*info)); memset(info, 0, sizeof(*info));
info->tcpi_state = sk->sk_state; info->tcpi_state = sk->sk_state;
info->tcpi_ca_state = tp->ca_state; info->tcpi_ca_state = icsk->icsk_ca_state;
info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_retransmits = icsk->icsk_retransmits;
info->tcpi_probes = tp->probes_out; info->tcpi_probes = icsk->icsk_probes_out;
info->tcpi_backoff = icsk->icsk_backoff; info->tcpi_backoff = icsk->icsk_backoff;
if (tp->rx_opt.tstamp_ok) if (tp->rx_opt.tstamp_ok)
...@@ -2016,7 +2016,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, ...@@ -2016,7 +2016,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
len = min_t(unsigned int, len, TCP_CA_NAME_MAX); len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
if (put_user(len, optlen)) if (put_user(len, optlen))
return -EFAULT; return -EFAULT;
if (copy_to_user(optval, tp->ca_ops->name, len)) if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
return -EFAULT; return -EFAULT;
return 0; return 0;
default: default:
......
...@@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca) ...@@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca)
ca->delayed_ack = 2 << ACK_RATIO_SHIFT; ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
} }
static void bictcp_init(struct tcp_sock *tp) static void bictcp_init(struct sock *sk)
{ {
bictcp_reset(tcp_ca(tp)); bictcp_reset(inet_csk_ca(sk));
if (initial_ssthresh) if (initial_ssthresh)
tp->snd_ssthresh = initial_ssthresh; tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
} }
/* /*
...@@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ...@@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
/* Detect low utilization in congestion avoidance */ /* Detect low utilization in congestion avoidance */
static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) static inline void bictcp_low_utilization(struct sock *sk, int flag)
{ {
struct bictcp *ca = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
u32 dist, delay; u32 dist, delay;
/* No time stamp */ /* No time stamp */
...@@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) ...@@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag)
} }
static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, static void bictcp_cong_avoid(struct sock *sk, u32 ack,
u32 seq_rtt, u32 in_flight, int data_acked) u32 seq_rtt, u32 in_flight, int data_acked)
{ {
struct bictcp *ca = tcp_ca(tp); struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
bictcp_low_utilization(tp, data_acked); bictcp_low_utilization(sk, data_acked);
if (in_flight < tp->snd_cwnd) if (in_flight < tp->snd_cwnd)
return; return;
...@@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, ...@@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack,
* behave like Reno until low_window is reached, * behave like Reno until low_window is reached,
* then increase congestion window slowly * then increase congestion window slowly
*/ */
static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) static u32 bictcp_recalc_ssthresh(struct sock *sk)
{ {
struct bictcp *ca = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
ca->epoch_start = 0; /* end of epoch */ ca->epoch_start = 0; /* end of epoch */
...@@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) ...@@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
} }
static u32 bictcp_undo_cwnd(struct tcp_sock *tp) static u32 bictcp_undo_cwnd(struct sock *sk)
{ {
struct bictcp *ca = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
const struct bictcp *ca = inet_csk_ca(sk);
return max(tp->snd_cwnd, ca->last_max_cwnd); return max(tp->snd_cwnd, ca->last_max_cwnd);
} }
static u32 bictcp_min_cwnd(struct tcp_sock *tp) static u32 bictcp_min_cwnd(struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk);
return tp->snd_ssthresh; return tp->snd_ssthresh;
} }
static void bictcp_state(struct tcp_sock *tp, u8 new_state) static void bictcp_state(struct sock *sk, u8 new_state)
{ {
if (new_state == TCP_CA_Loss) if (new_state == TCP_CA_Loss)
bictcp_reset(tcp_ca(tp)); bictcp_reset(inet_csk_ca(sk));
} }
/* Track delayed acknowledgement ratio using sliding window /* Track delayed acknowledgement ratio using sliding window
* ratio = (15*ratio + sample) / 16 * ratio = (15*ratio + sample) / 16
*/ */
static void bictcp_acked(struct tcp_sock *tp, u32 cnt) static void bictcp_acked(struct sock *sk, u32 cnt)
{ {
if (cnt > 0 && tp->ca_state == TCP_CA_Open) { const struct inet_connection_sock *icsk = inet_csk(sk);
struct bictcp *ca = tcp_ca(tp);
if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
struct bictcp *ca = inet_csk_ca(sk);
cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
ca->delayed_ack += cnt; ca->delayed_ack += cnt;
} }
...@@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = { ...@@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = {
static int __init bictcp_register(void) static int __init bictcp_register(void)
{ {
BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE); BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&bictcp); return tcp_register_congestion_control(&bictcp);
} }
......
...@@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) ...@@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
/* Assign choice of congestion control. */ /* Assign choice of congestion control. */
void tcp_init_congestion_control(struct tcp_sock *tp) void tcp_init_congestion_control(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_congestion_ops *ca; struct tcp_congestion_ops *ca;
if (tp->ca_ops != &tcp_init_congestion_ops) if (icsk->icsk_ca_ops != &tcp_init_congestion_ops)
return; return;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(ca, &tcp_cong_list, list) { list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
if (try_module_get(ca->owner)) { if (try_module_get(ca->owner)) {
tp->ca_ops = ca; icsk->icsk_ca_ops = ca;
break; break;
} }
} }
rcu_read_unlock(); rcu_read_unlock();
if (tp->ca_ops->init) if (icsk->icsk_ca_ops->init)
tp->ca_ops->init(tp); icsk->icsk_ca_ops->init(sk);
} }
/* Manage refcounts on socket close. */ /* Manage refcounts on socket close. */
void tcp_cleanup_congestion_control(struct tcp_sock *tp) void tcp_cleanup_congestion_control(struct sock *sk)
{ {
if (tp->ca_ops->release) struct inet_connection_sock *icsk = inet_csk(sk);
tp->ca_ops->release(tp);
module_put(tp->ca_ops->owner); if (icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
module_put(icsk->icsk_ca_ops->owner);
} }
/* Used by sysctl to change default congestion control */ /* Used by sysctl to change default congestion control */
...@@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name) ...@@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name)
} }
/* Change congestion control for socket */ /* Change congestion control for socket */
int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) int tcp_set_congestion_control(struct sock *sk, const char *name)
{ {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_congestion_ops *ca; struct tcp_congestion_ops *ca;
int err = 0; int err = 0;
rcu_read_lock(); rcu_read_lock();
ca = tcp_ca_find(name); ca = tcp_ca_find(name);
if (ca == tp->ca_ops) if (ca == icsk->icsk_ca_ops)
goto out; goto out;
if (!ca) if (!ca)
...@@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) ...@@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
err = -EBUSY; err = -EBUSY;
else { else {
tcp_cleanup_congestion_control(tp); tcp_cleanup_congestion_control(sk);
tp->ca_ops = ca; icsk->icsk_ca_ops = ca;
if (tp->ca_ops->init) if (icsk->icsk_ca_ops->init)
tp->ca_ops->init(tp); icsk->icsk_ca_ops->init(sk);
} }
out: out:
rcu_read_unlock(); rcu_read_unlock();
...@@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) ...@@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
/* This is Jacobson's slow start and congestion avoidance. /* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328. * SIGCOMM '88, p. 328.
*/ */
void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
int flag) int flag)
{ {
struct tcp_sock *tp = tcp_sk(sk);
if (in_flight < tp->snd_cwnd) if (in_flight < tp->snd_cwnd)
return; return;
...@@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, ...@@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight,
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
/* Slow start threshold is half the congestion window (min 2) */ /* Slow start threshold is half the congestion window (min 2) */
u32 tcp_reno_ssthresh(struct tcp_sock *tp) u32 tcp_reno_ssthresh(struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk);
return max(tp->snd_cwnd >> 1U, 2U); return max(tp->snd_cwnd >> 1U, 2U);
} }
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
/* Lower bound on congestion window. */ /* Lower bound on congestion window. */
u32 tcp_reno_min_cwnd(struct tcp_sock *tp) u32 tcp_reno_min_cwnd(struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk);
return tp->snd_ssthresh/2; return tp->snd_ssthresh/2;
} }
EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
......
...@@ -66,10 +66,10 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, ...@@ -66,10 +66,10 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
if (ext & (1<<(TCPDIAG_INFO-1))) if (ext & (1<<(TCPDIAG_INFO-1)))
info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info));
if (ext & (1<<(TCPDIAG_CONG-1))) { if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) {
size_t len = strlen(tp->ca_ops->name); size_t len = strlen(icsk->icsk_ca_ops->name);
strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1),
tp->ca_ops->name); icsk->icsk_ca_ops->name);
} }
} }
r->tcpdiag_family = sk->sk_family; r->tcpdiag_family = sk->sk_family;
...@@ -136,18 +136,17 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, ...@@ -136,18 +136,17 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
r->tcpdiag_timer = 4; r->tcpdiag_timer = 4;
r->tcpdiag_retrans = tp->probes_out; r->tcpdiag_retrans = icsk->icsk_probes_out;
r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
} else if (timer_pending(&sk->sk_timer)) { } else if (timer_pending(&sk->sk_timer)) {
r->tcpdiag_timer = 2; r->tcpdiag_timer = 2;
r->tcpdiag_retrans = tp->probes_out; r->tcpdiag_retrans = icsk->icsk_probes_out;
r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
} else { } else {
r->tcpdiag_timer = 0; r->tcpdiag_timer = 0;
r->tcpdiag_expires = 0; r->tcpdiag_expires = 0;
} }
#undef EXPIRES_IN_MS #undef EXPIRES_IN_MS
r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq;
r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; r->tcpdiag_wqueue = tp->write_seq - tp->snd_una;
r->tcpdiag_uid = sock_i_uid(sk); r->tcpdiag_uid = sock_i_uid(sk);
...@@ -163,8 +162,9 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, ...@@ -163,8 +162,9 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
if (info) if (info)
tcp_get_info(sk, info); tcp_get_info(sk, info);
if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info) if (sk->sk_state < TCP_TIME_WAIT &&
tp->ca_ops->get_info(tp, ext, skb); icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
icsk->icsk_ca_ops->get_info(sk, ext, skb);
nlh->nlmsg_len = skb->tail - b; nlh->nlmsg_len = skb->tail - b;
return skb->len; return skb->len;
......
...@@ -98,9 +98,10 @@ struct hstcp { ...@@ -98,9 +98,10 @@ struct hstcp {
u32 ai; u32 ai;
}; };
static void hstcp_init(struct tcp_sock *tp) static void hstcp_init(struct sock *sk)
{ {
struct hstcp *ca = tcp_ca(tp); struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
ca->ai = 0; ca->ai = 0;
...@@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp) ...@@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp)
tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
} }
static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
u32 in_flight, int good) u32 in_flight, int good)
{ {
struct hstcp *ca = tcp_ca(tp); struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
if (in_flight < tp->snd_cwnd) if (in_flight < tp->snd_cwnd)
return; return;
...@@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, ...@@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt,
} }
} }
static u32 hstcp_ssthresh(struct tcp_sock *tp) static u32 hstcp_ssthresh(struct sock *sk)
{ {
struct hstcp *ca = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
const struct hstcp *ca = inet_csk_ca(sk);
/* Do multiplicative decrease */ /* Do multiplicative decrease */
return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
...@@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = { ...@@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = {
static int __init hstcp_register(void) static int __init hstcp_register(void)
{ {
BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE); BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_highspeed); return tcp_register_congestion_control(&tcp_highspeed);
} }
......
...@@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca) ...@@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca)
ca->snd_cwnd_cnt2 = 0; ca->snd_cwnd_cnt2 = 0;
} }
static u32 htcp_cwnd_undo(struct tcp_sock *tp) static u32 htcp_cwnd_undo(struct sock *sk)
{ {
struct htcp *ca = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
ca->ccount = ca->undo_ccount; ca->ccount = ca->undo_ccount;
ca->maxRTT = ca->undo_maxRTT; ca->maxRTT = ca->undo_maxRTT;
ca->old_maxB = ca->undo_old_maxB; ca->old_maxB = ca->undo_old_maxB;
return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta);
} }
static inline void measure_rtt(struct tcp_sock *tp) static inline void measure_rtt(struct sock *sk)
{ {
struct htcp *ca = tcp_ca(tp); const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
u32 srtt = tp->srtt>>3; u32 srtt = tp->srtt>>3;
/* keep track of minimum RTT seen so far, minRTT is zero at first */ /* keep track of minimum RTT seen so far, minRTT is zero at first */
...@@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp) ...@@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp)
ca->minRTT = srtt; ca->minRTT = srtt;
/* max RTT */ /* max RTT */
if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) {
if (ca->maxRTT < ca->minRTT) if (ca->maxRTT < ca->minRTT)
ca->maxRTT = ca->minRTT; ca->maxRTT = ca->minRTT;
if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50) if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50)
...@@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp) ...@@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp)
} }
} }
static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked) static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
{ {
struct htcp *ca = tcp_ca(tp); const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
u32 now = tcp_time_stamp; u32 now = tcp_time_stamp;
/* achieved throughput calculations */ /* achieved throughput calculations */
if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) { if (icsk->icsk_ca_state != TCP_CA_Open &&
icsk->icsk_ca_state != TCP_CA_Disorder) {
ca->packetcount = 0; ca->packetcount = 0;
ca->lasttime = now; ca->lasttime = now;
return; return;
...@@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca) ...@@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca)
* that point do we really have a real sense of maxRTT (the queues en route * that point do we really have a real sense of maxRTT (the queues en route
* were getting just too full now). * were getting just too full now).
*/ */
static void htcp_param_update(struct tcp_sock *tp) static void htcp_param_update(struct sock *sk)
{ {
struct htcp *ca = tcp_ca(tp); struct htcp *ca = inet_csk_ca(sk);
u32 minRTT = ca->minRTT; u32 minRTT = ca->minRTT;
u32 maxRTT = ca->maxRTT; u32 maxRTT = ca->maxRTT;
...@@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp) ...@@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp)
ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100;
} }
static u32 htcp_recalc_ssthresh(struct tcp_sock *tp) static u32 htcp_recalc_ssthresh(struct sock *sk)
{ {
struct htcp *ca = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
htcp_param_update(tp); const struct htcp *ca = inet_csk_ca(sk);
htcp_param_update(sk);
return max((tp->snd_cwnd * ca->beta) >> 7, 2U); return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
} }
static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32 in_flight, int data_acked) u32 in_flight, int data_acked)
{ {
struct htcp *ca = tcp_ca(tp); struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
if (in_flight < tp->snd_cwnd) if (in_flight < tp->snd_cwnd)
return; return;
...@@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, ...@@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
if (tp->snd_cwnd < tp->snd_cwnd_clamp) if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++; tp->snd_cwnd++;
} else { } else {
measure_rtt(tp); measure_rtt(sk);
/* keep track of number of round-trip times since last backoff event */ /* keep track of number of round-trip times since last backoff event */
if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) { if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) {
...@@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, ...@@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
} }
/* Lower bound on congestion window. */ /* Lower bound on congestion window. */
static u32 htcp_min_cwnd(struct tcp_sock *tp) static u32 htcp_min_cwnd(struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk);
return tp->snd_ssthresh; return tp->snd_ssthresh;
} }
static void htcp_init(struct tcp_sock *tp) static void htcp_init(struct sock *sk)
{ {
struct htcp *ca = tcp_ca(tp); struct htcp *ca = inet_csk_ca(sk);
memset(ca, 0, sizeof(struct htcp)); memset(ca, 0, sizeof(struct htcp));
ca->alpha = ALPHA_BASE; ca->alpha = ALPHA_BASE;
ca->beta = BETA_MIN; ca->beta = BETA_MIN;
} }
static void htcp_state(struct tcp_sock *tp, u8 new_state) static void htcp_state(struct sock *sk, u8 new_state)
{ {
switch (new_state) { switch (new_state) {
case TCP_CA_CWR: case TCP_CA_CWR:
case TCP_CA_Recovery: case TCP_CA_Recovery:
case TCP_CA_Loss: case TCP_CA_Loss:
htcp_reset(tcp_ca(tp)); htcp_reset(inet_csk_ca(sk));
break; break;
} }
} }
...@@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = { ...@@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = {
static int __init htcp_register(void) static int __init htcp_register(void)
{ {
BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE); BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
BUILD_BUG_ON(BETA_MIN >= BETA_MAX); BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
if (!use_bandwidth_switch) if (!use_bandwidth_switch)
htcp.pkts_acked = NULL; htcp.pkts_acked = NULL;
......
...@@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); ...@@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)");
/* This is called to refresh values for hybla parameters */ /* This is called to refresh values for hybla parameters */
static inline void hybla_recalc_param (struct tcp_sock *tp) static inline void hybla_recalc_param (struct sock *sk)
{ {
struct hybla *ca = tcp_ca(tp); struct hybla *ca = inet_csk_ca(sk);
ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8); ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8);
ca->rho = ca->rho_3ls >> 3; ca->rho = ca->rho_3ls >> 3;
ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
ca->rho2 = ca->rho2_7ls >>7; ca->rho2 = ca->rho2_7ls >>7;
} }
static void hybla_init(struct tcp_sock *tp) static void hybla_init(struct sock *sk)
{ {
struct hybla *ca = tcp_ca(tp); struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca = inet_csk_ca(sk);
ca->rho = 0; ca->rho = 0;
ca->rho2 = 0; ca->rho2 = 0;
...@@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp) ...@@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp)
tp->snd_cwnd_clamp = 65535; tp->snd_cwnd_clamp = 65535;
/* 1st Rho measurement based on initial srtt */ /* 1st Rho measurement based on initial srtt */
hybla_recalc_param(tp); hybla_recalc_param(sk);
/* set minimum rtt as this is the 1st ever seen */ /* set minimum rtt as this is the 1st ever seen */
ca->minrtt = tp->srtt; ca->minrtt = tp->srtt;
tp->snd_cwnd = ca->rho; tp->snd_cwnd = ca->rho;
} }
static void hybla_state(struct tcp_sock *tp, u8 ca_state) static void hybla_state(struct sock *sk, u8 ca_state)
{ {
struct hybla *ca = tcp_ca(tp); struct hybla *ca = inet_csk_ca(sk);
ca->hybla_en = (ca_state == TCP_CA_Open); ca->hybla_en = (ca_state == TCP_CA_Open);
} }
...@@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds) ...@@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds)
* o Give cwnd a new value based on the model proposed * o Give cwnd a new value based on the model proposed
* o remember increments <1 * o remember increments <1
*/ */
static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32 in_flight, int flag) u32 in_flight, int flag)
{ {
struct hybla *ca = tcp_ca(tp); struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca = inet_csk_ca(sk);
u32 increment, odd, rho_fractions; u32 increment, odd, rho_fractions;
int is_slowstart = 0; int is_slowstart = 0;
/* Recalculate rho only if this srtt is the lowest */ /* Recalculate rho only if this srtt is the lowest */
if (tp->srtt < ca->minrtt){ if (tp->srtt < ca->minrtt){
hybla_recalc_param(tp); hybla_recalc_param(sk);
ca->minrtt = tp->srtt; ca->minrtt = tp->srtt;
} }
if (!ca->hybla_en) if (!ca->hybla_en)
return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag); return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
if (in_flight < tp->snd_cwnd) if (in_flight < tp->snd_cwnd)
return; return;
if (ca->rho == 0) if (ca->rho == 0)
hybla_recalc_param(tp); hybla_recalc_param(sk);
rho_fractions = ca->rho_3ls - (ca->rho << 3); rho_fractions = ca->rho_3ls - (ca->rho << 3);
...@@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = { ...@@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = {
static int __init hybla_register(void) static int __init hybla_register(void)
{ {
BUG_ON(sizeof(struct hybla) > TCP_CA_PRIV_SIZE); BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_hybla); return tcp_register_congestion_control(&tcp_hybla);
} }
......
This diff is collapsed.
...@@ -1409,13 +1409,14 @@ struct tcp_func ipv4_specific = { ...@@ -1409,13 +1409,14 @@ struct tcp_func ipv4_specific = {
*/ */
static int tcp_v4_init_sock(struct sock *sk) static int tcp_v4_init_sock(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
skb_queue_head_init(&tp->out_of_order_queue); skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk); tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp); tcp_prequeue_init(tp);
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT;
/* So many TCP implementations out there (incorrectly) count the /* So many TCP implementations out there (incorrectly) count the
...@@ -1433,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk) ...@@ -1433,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk)
tp->mss_cache = 536; tp->mss_cache = 536;
tp->reordering = sysctl_tcp_reordering; tp->reordering = sysctl_tcp_reordering;
tp->ca_ops = &tcp_init_congestion_ops; icsk->icsk_ca_ops = &tcp_init_congestion_ops;
sk->sk_state = TCP_CLOSE; sk->sk_state = TCP_CLOSE;
...@@ -1456,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk) ...@@ -1456,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
tcp_clear_xmit_timers(sk); tcp_clear_xmit_timers(sk);
tcp_cleanup_congestion_control(tp); tcp_cleanup_congestion_control(sk);
/* Cleanup up the write buffer. */ /* Cleanup up the write buffer. */
sk_stream_writequeue_purge(sk); sk_stream_writequeue_purge(sk);
...@@ -1883,7 +1884,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) ...@@ -1883,7 +1884,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
jiffies_to_clock_t(timer_expires - jiffies), jiffies_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits, icsk->icsk_retransmits,
sock_i_uid(sp), sock_i_uid(sp),
tp->probes_out, icsk->icsk_probes_out,
sock_i_ino(sp), sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_refcnt), sp,
icsk->icsk_rto, icsk->icsk_rto,
......
...@@ -384,9 +384,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, ...@@ -384,9 +384,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->frto_counter = 0; newtp->frto_counter = 0;
newtp->frto_highmark = 0; newtp->frto_highmark = 0;
newtp->ca_ops = &tcp_reno; newicsk->icsk_ca_ops = &tcp_reno;
tcp_set_ca_state(newtp, TCP_CA_Open); tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk); tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue); skb_queue_head_init(&newtp->out_of_order_queue);
newtp->rcv_wup = treq->rcv_isn + 1; newtp->rcv_wup = treq->rcv_isn + 1;
...@@ -399,7 +399,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, ...@@ -399,7 +399,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.dsack = 0; newtp->rx_opt.dsack = 0;
newtp->rx_opt.eff_sacks = 0; newtp->rx_opt.eff_sacks = 0;
newtp->probes_out = 0;
newtp->rx_opt.num_sacks = 0; newtp->rx_opt.num_sacks = 0;
newtp->urg_data = 0; newtp->urg_data = 0;
......
...@@ -112,9 +112,9 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) ...@@ -112,9 +112,9 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
u32 restart_cwnd = tcp_init_cwnd(tp, dst); u32 restart_cwnd = tcp_init_cwnd(tp, dst);
u32 cwnd = tp->snd_cwnd; u32 cwnd = tp->snd_cwnd;
tcp_ca_event(tp, CA_EVENT_CWND_RESTART); tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
tp->snd_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tcp_current_ssthresh(sk);
restart_cwnd = min(restart_cwnd, cwnd); restart_cwnd = min(restart_cwnd, cwnd);
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
...@@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) ...@@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
{ {
if (skb != NULL) { if (skb != NULL) {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
...@@ -280,7 +281,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -280,7 +281,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
#define SYSCTL_FLAG_SACK 0x4 #define SYSCTL_FLAG_SACK 0x4
/* If congestion control is doing timestamping */ /* If congestion control is doing timestamping */
if (tp->ca_ops->rtt_sample) if (icsk->icsk_ca_ops->rtt_sample)
do_gettimeofday(&skb->stamp); do_gettimeofday(&skb->stamp);
sysctl_flags = 0; sysctl_flags = 0;
...@@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
} }
if (tcp_packets_in_flight(tp) == 0) if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(tp, CA_EVENT_TX_START); tcp_ca_event(sk, CA_EVENT_TX_START);
th = (struct tcphdr *) skb_push(skb, tcp_header_size); th = (struct tcphdr *) skb_push(skb, tcp_header_size);
skb->h.th = th; skb->h.th = th;
...@@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
if (err <= 0) if (err <= 0)
return err; return err;
tcp_enter_cwr(tp); tcp_enter_cwr(sk);
/* NET_XMIT_CN is special. It does not guarantee, /* NET_XMIT_CN is special. It does not guarantee,
* that this packet is lost. It tells that device * that this packet is lost. It tells that device
...@@ -905,12 +906,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, ...@@ -905,12 +906,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
*/ */
static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
{ {
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 send_win, cong_win, limit, in_flight; u32 send_win, cong_win, limit, in_flight;
if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
return 0; return 0;
if (tp->ca_state != TCP_CA_Open) if (icsk->icsk_ca_state != TCP_CA_Open)
return 0; return 0;
in_flight = tcp_packets_in_flight(tp); in_flight = tcp_packets_in_flight(tp);
...@@ -1287,6 +1289,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m ...@@ -1287,6 +1289,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
*/ */
void tcp_simple_retransmit(struct sock *sk) void tcp_simple_retransmit(struct sock *sk)
{ {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb; struct sk_buff *skb;
unsigned int mss = tcp_current_mss(sk, 0); unsigned int mss = tcp_current_mss(sk, 0);
...@@ -1317,12 +1320,12 @@ void tcp_simple_retransmit(struct sock *sk) ...@@ -1317,12 +1320,12 @@ void tcp_simple_retransmit(struct sock *sk)
* in network, but units changed and effective * in network, but units changed and effective
* cwnd/ssthresh really reduced now. * cwnd/ssthresh really reduced now.
*/ */
if (tp->ca_state != TCP_CA_Loss) { if (icsk->icsk_ca_state != TCP_CA_Loss) {
tp->high_seq = tp->snd_nxt; tp->high_seq = tp->snd_nxt;
tp->snd_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tcp_current_ssthresh(sk);
tp->prior_ssthresh = 0; tp->prior_ssthresh = 0;
tp->undo_marker = 0; tp->undo_marker = 0;
tcp_set_ca_state(tp, TCP_CA_Loss); tcp_set_ca_state(sk, TCP_CA_Loss);
} }
tcp_xmit_retransmit_queue(sk); tcp_xmit_retransmit_queue(sk);
} }
...@@ -1462,6 +1465,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1462,6 +1465,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/ */
void tcp_xmit_retransmit_queue(struct sock *sk) void tcp_xmit_retransmit_queue(struct sock *sk)
{ {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb; struct sk_buff *skb;
int packet_cnt = tp->lost_out; int packet_cnt = tp->lost_out;
...@@ -1485,7 +1489,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) ...@@ -1485,7 +1489,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
if (tcp_retransmit_skb(sk, skb)) if (tcp_retransmit_skb(sk, skb))
return; return;
if (tp->ca_state != TCP_CA_Loss) if (icsk->icsk_ca_state != TCP_CA_Loss)
NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
else else
NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
...@@ -1507,7 +1511,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) ...@@ -1507,7 +1511,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
/* OK, demanded retransmission is finished. */ /* OK, demanded retransmission is finished. */
/* Forward retransmissions are possible only during Recovery. */ /* Forward retransmissions are possible only during Recovery. */
if (tp->ca_state != TCP_CA_Recovery) if (icsk->icsk_ca_state != TCP_CA_Recovery)
return; return;
/* No forward retransmissions in Reno are possible. */ /* No forward retransmissions in Reno are possible. */
...@@ -2028,7 +2032,7 @@ void tcp_send_probe0(struct sock *sk) ...@@ -2028,7 +2032,7 @@ void tcp_send_probe0(struct sock *sk)
if (tp->packets_out || !sk->sk_send_head) { if (tp->packets_out || !sk->sk_send_head) {
/* Cancel probe timer, if it is not required. */ /* Cancel probe timer, if it is not required. */
tp->probes_out = 0; icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0; icsk->icsk_backoff = 0;
return; return;
} }
...@@ -2036,19 +2040,19 @@ void tcp_send_probe0(struct sock *sk) ...@@ -2036,19 +2040,19 @@ void tcp_send_probe0(struct sock *sk)
if (err <= 0) { if (err <= 0) {
if (icsk->icsk_backoff < sysctl_tcp_retries2) if (icsk->icsk_backoff < sysctl_tcp_retries2)
icsk->icsk_backoff++; icsk->icsk_backoff++;
tp->probes_out++; icsk->icsk_probes_out++;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
TCP_RTO_MAX); TCP_RTO_MAX);
} else { } else {
/* If packet was not sent due to local congestion, /* If packet was not sent due to local congestion,
* do not backoff and do not remember probes_out. * do not backoff and do not remember icsk_probes_out.
* Let local senders to fight for local resources. * Let local senders to fight for local resources.
* *
* Use accumulated backoff yet. * Use accumulated backoff yet.
*/ */
if (!tp->probes_out) if (!icsk->icsk_probes_out)
tp->probes_out=1; icsk->icsk_probes_out = 1;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
min(icsk->icsk_rto << icsk->icsk_backoff, min(icsk->icsk_rto << icsk->icsk_backoff,
TCP_RESOURCE_PROBE_INTERVAL), TCP_RESOURCE_PROBE_INTERVAL),
......
...@@ -16,9 +16,10 @@ ...@@ -16,9 +16,10 @@
#define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_AI_CNT 50U
#define TCP_SCALABLE_MD_SCALE 3 #define TCP_SCALABLE_MD_SCALE 3
static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32 in_flight, int flag) u32 in_flight, int flag)
{ {
struct tcp_sock *tp = tcp_sk(sk);
if (in_flight < tp->snd_cwnd) if (in_flight < tp->snd_cwnd)
return; return;
...@@ -35,8 +36,9 @@ static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, ...@@ -35,8 +36,9 @@ static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
tp->snd_cwnd_stamp = tcp_time_stamp; tp->snd_cwnd_stamp = tcp_time_stamp;
} }
static u32 tcp_scalable_ssthresh(struct tcp_sock *tp) static u32 tcp_scalable_ssthresh(struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk);
return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
} }
......
...@@ -233,11 +233,12 @@ static void tcp_delack_timer(unsigned long data) ...@@ -233,11 +233,12 @@ static void tcp_delack_timer(unsigned long data)
static void tcp_probe_timer(struct sock *sk) static void tcp_probe_timer(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int max_probes; int max_probes;
if (tp->packets_out || !sk->sk_send_head) { if (tp->packets_out || !sk->sk_send_head) {
tp->probes_out = 0; icsk->icsk_probes_out = 0;
return; return;
} }
...@@ -248,7 +249,7 @@ static void tcp_probe_timer(struct sock *sk) ...@@ -248,7 +249,7 @@ static void tcp_probe_timer(struct sock *sk)
* FIXME: We ought not to do it, Solaris 2.5 actually has fixing * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
* this behaviour in Solaris down as a bug fix. [AC] * this behaviour in Solaris down as a bug fix. [AC]
* *
* Let me to explain. probes_out is zeroed by incoming ACKs * Let me to explain. icsk_probes_out is zeroed by incoming ACKs
* even if they advertise zero window. Hence, connection is killed only * even if they advertise zero window. Hence, connection is killed only
* if we received no ACKs for normal connection timeout. It is not killed * if we received no ACKs for normal connection timeout. It is not killed
* only because window stays zero for some time, window may be zero * only because window stays zero for some time, window may be zero
...@@ -259,16 +260,15 @@ static void tcp_probe_timer(struct sock *sk) ...@@ -259,16 +260,15 @@ static void tcp_probe_timer(struct sock *sk)
max_probes = sysctl_tcp_retries2; max_probes = sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) { if (sock_flag(sk, SOCK_DEAD)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX); const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX);
max_probes = tcp_orphan_retries(sk, alive); max_probes = tcp_orphan_retries(sk, alive);
if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes)) if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes))
return; return;
} }
if (tp->probes_out > max_probes) { if (icsk->icsk_probes_out > max_probes) {
tcp_write_err(sk); tcp_write_err(sk);
} else { } else {
/* Only send another probe if we didn't close things up. */ /* Only send another probe if we didn't close things up. */
...@@ -319,19 +319,20 @@ static void tcp_retransmit_timer(struct sock *sk) ...@@ -319,19 +319,20 @@ static void tcp_retransmit_timer(struct sock *sk)
goto out; goto out;
if (icsk->icsk_retransmits == 0) { if (icsk->icsk_retransmits == 0) {
if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) { if (icsk->icsk_ca_state == TCP_CA_Disorder ||
icsk->icsk_ca_state == TCP_CA_Recovery) {
if (tp->rx_opt.sack_ok) { if (tp->rx_opt.sack_ok) {
if (tp->ca_state == TCP_CA_Recovery) if (icsk->icsk_ca_state == TCP_CA_Recovery)
NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
else else
NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
} else { } else {
if (tp->ca_state == TCP_CA_Recovery) if (icsk->icsk_ca_state == TCP_CA_Recovery)
NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
else else
NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
} }
} else if (tp->ca_state == TCP_CA_Loss) { } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
} else { } else {
NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
...@@ -449,6 +450,7 @@ void tcp_set_keepalive(struct sock *sk, int val) ...@@ -449,6 +450,7 @@ void tcp_set_keepalive(struct sock *sk, int val)
static void tcp_keepalive_timer (unsigned long data) static void tcp_keepalive_timer (unsigned long data)
{ {
struct sock *sk = (struct sock *) data; struct sock *sk = (struct sock *) data;
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
__u32 elapsed; __u32 elapsed;
...@@ -490,14 +492,14 @@ static void tcp_keepalive_timer (unsigned long data) ...@@ -490,14 +492,14 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = tcp_time_stamp - tp->rcv_tstamp; elapsed = tcp_time_stamp - tp->rcv_tstamp;
if (elapsed >= keepalive_time_when(tp)) { if (elapsed >= keepalive_time_when(tp)) {
if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) || if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) ||
(tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) { (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
tcp_send_active_reset(sk, GFP_ATOMIC); tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk); tcp_write_err(sk);
goto out; goto out;
} }
if (tcp_write_wakeup(sk) <= 0) { if (tcp_write_wakeup(sk) <= 0) {
tp->probes_out++; icsk->icsk_probes_out++;
elapsed = keepalive_intvl_when(tp); elapsed = keepalive_intvl_when(tp);
} else { } else {
/* If keepalive was lost due to local congestion, /* If keepalive was lost due to local congestion,
......
...@@ -82,9 +82,10 @@ struct vegas { ...@@ -82,9 +82,10 @@ struct vegas {
* Instead we must wait until the completion of an RTT during * Instead we must wait until the completion of an RTT during
* which we actually receive ACKs. * which we actually receive ACKs.
*/ */
static inline void vegas_enable(struct tcp_sock *tp) static inline void vegas_enable(struct sock *sk)
{ {
struct vegas *vegas = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
/* Begin taking Vegas samples next time we send something. */ /* Begin taking Vegas samples next time we send something. */
vegas->doing_vegas_now = 1; vegas->doing_vegas_now = 1;
...@@ -97,19 +98,19 @@ static inline void vegas_enable(struct tcp_sock *tp) ...@@ -97,19 +98,19 @@ static inline void vegas_enable(struct tcp_sock *tp)
} }
/* Stop taking Vegas samples for now. */ /* Stop taking Vegas samples for now. */
static inline void vegas_disable(struct tcp_sock *tp) static inline void vegas_disable(struct sock *sk)
{ {
struct vegas *vegas = tcp_ca(tp); struct vegas *vegas = inet_csk_ca(sk);
vegas->doing_vegas_now = 0; vegas->doing_vegas_now = 0;
} }
static void tcp_vegas_init(struct tcp_sock *tp) static void tcp_vegas_init(struct sock *sk)
{ {
struct vegas *vegas = tcp_ca(tp); struct vegas *vegas = inet_csk_ca(sk);
vegas->baseRTT = 0x7fffffff; vegas->baseRTT = 0x7fffffff;
vegas_enable(tp); vegas_enable(sk);
} }
/* Do RTT sampling needed for Vegas. /* Do RTT sampling needed for Vegas.
...@@ -120,9 +121,9 @@ static void tcp_vegas_init(struct tcp_sock *tp) ...@@ -120,9 +121,9 @@ static void tcp_vegas_init(struct tcp_sock *tp)
* o min-filter RTT samples from a much longer window (forever for now) * o min-filter RTT samples from a much longer window (forever for now)
* to find the propagation delay (baseRTT) * to find the propagation delay (baseRTT)
*/ */
static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
{ {
struct vegas *vegas = tcp_ca(tp); struct vegas *vegas = inet_csk_ca(sk);
u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
/* Filter to find propagation delay: */ /* Filter to find propagation delay: */
...@@ -136,13 +137,13 @@ static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) ...@@ -136,13 +137,13 @@ static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt)
vegas->cntRTT++; vegas->cntRTT++;
} }
static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) static void tcp_vegas_state(struct sock *sk, u8 ca_state)
{ {
if (ca_state == TCP_CA_Open) if (ca_state == TCP_CA_Open)
vegas_enable(tp); vegas_enable(sk);
else else
vegas_disable(tp); vegas_disable(sk);
} }
/* /*
...@@ -154,20 +155,21 @@ static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) ...@@ -154,20 +155,21 @@ static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state)
* packets, _then_ we can make Vegas calculations * packets, _then_ we can make Vegas calculations
* again. * again.
*/ */
static void tcp_vegas_cwnd_event(struct tcp_sock *tp, enum tcp_ca_event event) static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{ {
if (event == CA_EVENT_CWND_RESTART || if (event == CA_EVENT_CWND_RESTART ||
event == CA_EVENT_TX_START) event == CA_EVENT_TX_START)
tcp_vegas_init(tp); tcp_vegas_init(sk);
} }
static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
u32 seq_rtt, u32 in_flight, int flag) u32 seq_rtt, u32 in_flight, int flag)
{ {
struct vegas *vegas = tcp_ca(tp); struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
if (!vegas->doing_vegas_now) if (!vegas->doing_vegas_now)
return tcp_reno_cong_avoid(tp, ack, seq_rtt, in_flight, flag); return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
/* The key players are v_beg_snd_una and v_beg_snd_nxt. /* The key players are v_beg_snd_una and v_beg_snd_nxt.
* *
...@@ -219,7 +221,7 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, ...@@ -219,7 +221,7 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
* but that's not too awful, since we're taking the min, * but that's not too awful, since we're taking the min,
* rather than averaging. * rather than averaging.
*/ */
tcp_vegas_rtt_calc(tp, seq_rtt*1000); tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
/* We do the Vegas calculations only if we got enough RTT /* We do the Vegas calculations only if we got enough RTT
* samples that we can be reasonably sure that we got * samples that we can be reasonably sure that we got
...@@ -359,10 +361,10 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, ...@@ -359,10 +361,10 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
} }
/* Extract info for Tcp socket info provided via netlink. */ /* Extract info for Tcp socket info provided via netlink. */
static void tcp_vegas_get_info(struct tcp_sock *tp, u32 ext, static void tcp_vegas_get_info(struct sock *sk, u32 ext,
struct sk_buff *skb) struct sk_buff *skb)
{ {
const struct vegas *ca = tcp_ca(tp); const struct vegas *ca = inet_csk_ca(sk);
if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { if (ext & (1<<(TCPDIAG_VEGASINFO-1))) {
struct tcpvegas_info *info; struct tcpvegas_info *info;
...@@ -393,7 +395,7 @@ static struct tcp_congestion_ops tcp_vegas = { ...@@ -393,7 +395,7 @@ static struct tcp_congestion_ops tcp_vegas = {
static int __init tcp_vegas_register(void) static int __init tcp_vegas_register(void)
{ {
BUG_ON(sizeof(struct vegas) > TCP_CA_PRIV_SIZE); BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
tcp_register_congestion_control(&tcp_vegas); tcp_register_congestion_control(&tcp_vegas);
return 0; return 0;
} }
......
...@@ -40,9 +40,9 @@ struct westwood { ...@@ -40,9 +40,9 @@ struct westwood {
* way as soon as possible. It will reasonably happen within the first * way as soon as possible. It will reasonably happen within the first
* RTT period of the connection lifetime. * RTT period of the connection lifetime.
*/ */
static void tcp_westwood_init(struct tcp_sock *tp) static void tcp_westwood_init(struct sock *sk)
{ {
struct westwood *w = tcp_ca(tp); struct westwood *w = inet_csk_ca(sk);
w->bk = 0; w->bk = 0;
w->bw_ns_est = 0; w->bw_ns_est = 0;
...@@ -51,7 +51,7 @@ static void tcp_westwood_init(struct tcp_sock *tp) ...@@ -51,7 +51,7 @@ static void tcp_westwood_init(struct tcp_sock *tp)
w->cumul_ack = 0; w->cumul_ack = 0;
w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT;
w->rtt_win_sx = tcp_time_stamp; w->rtt_win_sx = tcp_time_stamp;
w->snd_una = tp->snd_una; w->snd_una = tcp_sk(sk)->snd_una;
} }
/* /*
...@@ -74,11 +74,11 @@ static inline void westwood_filter(struct westwood *w, u32 delta) ...@@ -74,11 +74,11 @@ static inline void westwood_filter(struct westwood *w, u32 delta)
* Called after processing group of packets. * Called after processing group of packets.
* but all westwood needs is the last sample of srtt. * but all westwood needs is the last sample of srtt.
*/ */
static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
{ {
struct westwood *w = tcp_ca(tp); struct westwood *w = inet_csk_ca(sk);
if (cnt > 0) if (cnt > 0)
w->rtt = tp->srtt >> 3; w->rtt = tcp_sk(sk)->srtt >> 3;
} }
/* /*
...@@ -86,9 +86,9 @@ static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) ...@@ -86,9 +86,9 @@ static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt)
* It updates RTT evaluation window if it is the right moment to do * It updates RTT evaluation window if it is the right moment to do
* it. If so it calls filter for evaluating bandwidth. * it. If so it calls filter for evaluating bandwidth.
*/ */
static void westwood_update_window(struct tcp_sock *tp) static void westwood_update_window(struct sock *sk)
{ {
struct westwood *w = tcp_ca(tp); struct westwood *w = inet_csk_ca(sk);
s32 delta = tcp_time_stamp - w->rtt_win_sx; s32 delta = tcp_time_stamp - w->rtt_win_sx;
/* /*
...@@ -114,11 +114,12 @@ static void westwood_update_window(struct tcp_sock *tp) ...@@ -114,11 +114,12 @@ static void westwood_update_window(struct tcp_sock *tp)
* header prediction is successful. In such case in fact update is * header prediction is successful. In such case in fact update is
* straight forward and doesn't need any particular care. * straight forward and doesn't need any particular care.
*/ */
static inline void westwood_fast_bw(struct tcp_sock *tp) static inline void westwood_fast_bw(struct sock *sk)
{ {
struct westwood *w = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
westwood_update_window(tp); westwood_update_window(sk);
w->bk += tp->snd_una - w->snd_una; w->bk += tp->snd_una - w->snd_una;
w->snd_una = tp->snd_una; w->snd_una = tp->snd_una;
...@@ -130,9 +131,10 @@ static inline void westwood_fast_bw(struct tcp_sock *tp) ...@@ -130,9 +131,10 @@ static inline void westwood_fast_bw(struct tcp_sock *tp)
* This function evaluates cumul_ack for evaluating bk in case of * This function evaluates cumul_ack for evaluating bk in case of
* delayed or partial acks. * delayed or partial acks.
*/ */
static inline u32 westwood_acked_count(struct tcp_sock *tp) static inline u32 westwood_acked_count(struct sock *sk)
{ {
struct westwood *w = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
w->cumul_ack = tp->snd_una - w->snd_una; w->cumul_ack = tp->snd_una - w->snd_una;
...@@ -160,9 +162,10 @@ static inline u32 westwood_acked_count(struct tcp_sock *tp) ...@@ -160,9 +162,10 @@ static inline u32 westwood_acked_count(struct tcp_sock *tp)
return w->cumul_ack; return w->cumul_ack;
} }
static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) static inline u32 westwood_bw_rttmin(const struct sock *sk)
{ {
struct westwood *w = tcp_ca(tp); const struct tcp_sock *tp = tcp_sk(sk);
const struct westwood *w = inet_csk_ca(sk);
return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2);
} }
...@@ -172,31 +175,32 @@ static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) ...@@ -172,31 +175,32 @@ static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp)
* in packets we use mss_cache). Rttmin is guaranteed to be >= 2 * in packets we use mss_cache). Rttmin is guaranteed to be >= 2
* so avoids ever returning 0. * so avoids ever returning 0.
*/ */
static u32 tcp_westwood_cwnd_min(struct tcp_sock *tp) static u32 tcp_westwood_cwnd_min(struct sock *sk)
{ {
return westwood_bw_rttmin(tp); return westwood_bw_rttmin(sk);
} }
static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
{ {
struct westwood *w = tcp_ca(tp); struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
switch(event) { switch(event) {
case CA_EVENT_FAST_ACK: case CA_EVENT_FAST_ACK:
westwood_fast_bw(tp); westwood_fast_bw(sk);
break; break;
case CA_EVENT_COMPLETE_CWR: case CA_EVENT_COMPLETE_CWR:
tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(tp); tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(sk);
break; break;
case CA_EVENT_FRTO: case CA_EVENT_FRTO:
tp->snd_ssthresh = westwood_bw_rttmin(tp); tp->snd_ssthresh = westwood_bw_rttmin(sk);
break; break;
case CA_EVENT_SLOW_ACK: case CA_EVENT_SLOW_ACK:
westwood_update_window(tp); westwood_update_window(sk);
w->bk += westwood_acked_count(tp); w->bk += westwood_acked_count(sk);
w->rtt_min = min(w->rtt, w->rtt_min); w->rtt_min = min(w->rtt, w->rtt_min);
break; break;
...@@ -208,10 +212,10 @@ static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) ...@@ -208,10 +212,10 @@ static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event)
/* Extract info for Tcp socket info provided via netlink. */ /* Extract info for Tcp socket info provided via netlink. */
static void tcp_westwood_info(struct tcp_sock *tp, u32 ext, static void tcp_westwood_info(struct sock *sk, u32 ext,
struct sk_buff *skb) struct sk_buff *skb)
{ {
const struct westwood *ca = tcp_ca(tp); const struct westwood *ca = inet_csk_ca(sk);
if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { if (ext & (1<<(TCPDIAG_VEGASINFO-1))) {
struct rtattr *rta; struct rtattr *rta;
struct tcpvegas_info *info; struct tcpvegas_info *info;
...@@ -242,7 +246,7 @@ static struct tcp_congestion_ops tcp_westwood = { ...@@ -242,7 +246,7 @@ static struct tcp_congestion_ops tcp_westwood = {
static int __init tcp_westwood_register(void) static int __init tcp_westwood_register(void)
{ {
BUG_ON(sizeof(struct westwood) > TCP_CA_PRIV_SIZE); BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_westwood); return tcp_register_congestion_control(&tcp_westwood);
} }
......
...@@ -2010,13 +2010,14 @@ static struct tcp_func ipv6_mapped = { ...@@ -2010,13 +2010,14 @@ static struct tcp_func ipv6_mapped = {
*/ */
static int tcp_v6_init_sock(struct sock *sk) static int tcp_v6_init_sock(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
skb_queue_head_init(&tp->out_of_order_queue); skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk); tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp); tcp_prequeue_init(tp);
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT;
/* So many TCP implementations out there (incorrectly) count the /* So many TCP implementations out there (incorrectly) count the
...@@ -2038,7 +2039,7 @@ static int tcp_v6_init_sock(struct sock *sk) ...@@ -2038,7 +2039,7 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_state = TCP_CLOSE; sk->sk_state = TCP_CLOSE;
tp->af_specific = &ipv6_specific; tp->af_specific = &ipv6_specific;
tp->ca_ops = &tcp_init_congestion_ops; icsk->icsk_ca_ops = &tcp_init_congestion_ops;
sk->sk_write_space = sk_stream_write_space; sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
...@@ -2135,7 +2136,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) ...@@ -2135,7 +2136,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(timer_expires - jiffies), jiffies_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits, icsk->icsk_retransmits,
sock_i_uid(sp), sock_i_uid(sp),
tp->probes_out, icsk->icsk_probes_out,
sock_i_ino(sp), sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_refcnt), sp,
icsk->icsk_rto, icsk->icsk_rto,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment