Commit 14a1f445 authored by David S. Miller's avatar David S. Miller Committed by Patrick McHardy

[TCP]: Make TSO play nice with congestion window.

Previously TSO would not abide by the congestion
window properly.  Essentially, each TSO packet would
be trated just like 1 normal packet, even though a TSO
packet generates more than 1 normal packet.  This
violates congestion window rules entirely.

So now we record the TSO factor, a count of how many
real packets a TSO packet will generate, and include
this in all the packet counting routines.

This initial version has a bug in that skb_entail() is
not the correct time to figure out the TSO factor for
the SKB, and tp->mss_tso_factor is not necessarily the
right value for a given SKB.  Will fix this up next.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 10bc9563
......@@ -201,6 +201,10 @@ struct tcp_sack_block {
__u32 end_seq;
};
typedef struct tcp_pcount {
__u32 val;
} tcp_pcount_t;
struct tcp_opt {
int tcp_header_len; /* Bytes of tcp header to send */
......@@ -250,6 +254,7 @@ struct tcp_opt {
__u32 max_window; /* Maximal window ever seen from peer */
__u32 pmtu_cookie; /* Last pmtu seen by socket */
__u32 mss_cache; /* Cached effective mss, not including SACKS */
__u32 mss_tso_factor; /* Real packets per TSO packet */
__u16 mss_cache_std; /* Like mss_cache, but without TSO */
__u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
__u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
......@@ -274,9 +279,9 @@ struct tcp_opt {
__u32 rtt_seq; /* sequence number to update rttvar */
__u32 rto; /* retransmit timeout */
__u32 packets_out; /* Packets which are "in flight" */
__u32 left_out; /* Packets which leaved network */
__u32 retrans_out; /* Retransmitted packets out */
tcp_pcount_t packets_out; /* Packets which are "in flight" */
tcp_pcount_t left_out; /* Packets which leaved network */
tcp_pcount_t retrans_out; /* Retransmitted packets out */
/*
......@@ -337,9 +342,9 @@ struct tcp_opt {
__u8 syn_retries; /* num of allowed syn retries */
__u8 ecn_flags; /* ECN status bits. */
__u16 prior_ssthresh; /* ssthresh saved at recovery start */
__u32 lost_out; /* Lost packets */
__u32 sacked_out; /* SACK'd packets */
__u32 fackets_out; /* FACK'd packets */
tcp_pcount_t lost_out; /* Lost packets */
tcp_pcount_t sacked_out;/* SACK'd packets */
tcp_pcount_t fackets_out;/* FACK'd packets */
__u32 high_seq; /* snd_nxt at onset of congestion */
__u32 retrans_stamp; /* Timestamp of the last retransmit,
......
......@@ -1047,13 +1047,18 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long
* is not a big flaw.
*/
static __inline__ unsigned int tcp_current_mss(struct sock *sk, int large)
static inline unsigned int tcp_current_mss(struct sock *sk, int large, int *factor)
{
struct tcp_opt *tp = tcp_sk(sk);
struct dst_entry *dst = __sk_dst_get(sk);
int mss_now = large && (sk->sk_route_caps & NETIF_F_TSO) &&
!tp->urg_mode ?
tp->mss_cache : tp->mss_cache_std;
int do_large, mss_now;
do_large = (large &&
(sk->sk_route_caps & NETIF_F_TSO) &&
!tp->urg_mode);
mss_now = do_large ? tp->mss_cache : tp->mss_cache_std;
if (factor)
*factor = do_large ? tp->mss_tso_factor : 1;
if (dst) {
u32 mtu = dst_pmtu(dst);
......@@ -1181,12 +1186,76 @@ struct tcp_skb_cb {
__u16 urg_ptr; /* Valid w/URG flags is set. */
__u32 ack_seq; /* Sequence number ACK'd */
__u32 tso_factor;
};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
#include <net/tcp_ecn.h>
/* Due to TSO, an SKB can be composed of multiple actual
* packets. To keep these tracked properly, we use this.
*/
static inline int tcp_skb_pcount(struct sk_buff *skb)
{
return TCP_SKB_CB(skb)->tso_factor;
}
static inline void tcp_inc_pcount(tcp_pcount_t *count, struct sk_buff *skb)
{
count->val += tcp_skb_pcount(skb);
}
static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt)
{
count->val += amt;
}
static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt)
{
count->val -= amt;
}
static inline void tcp_dec_pcount(tcp_pcount_t *count, struct sk_buff *skb)
{
count->val -= tcp_skb_pcount(skb);
}
static inline void tcp_dec_pcount_approx(tcp_pcount_t *count,
struct sk_buff *skb)
{
if (count->val) {
count->val -= tcp_skb_pcount(skb);
if ((int)count->val < 0)
count->val = 0;
}
}
static inline __u32 tcp_get_pcount(tcp_pcount_t *count)
{
return count->val;
}
static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val)
{
count->val = val;
}
static inline void tcp_packets_out_inc(struct sock *sk, struct tcp_opt *tp,
struct sk_buff *skb)
{
int orig = tcp_get_pcount(&tp->packets_out);
tcp_inc_pcount(&tp->packets_out, skb);
if (!orig)
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
}
static inline void tcp_packets_out_dec(struct tcp_opt *tp, struct sk_buff *skb)
{
tcp_dec_pcount(&tp->packets_out, skb);
}
/* This determines how many packets are "in the network" to the best
* of our knowledge. In many cases it is conservative, but where
* detailed information is available from the receiver (via SACK
......@@ -1203,7 +1272,9 @@ struct tcp_skb_cb {
*/
static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
{
return tp->packets_out - tp->left_out + tp->retrans_out;
return (tcp_get_pcount(&tp->packets_out) -
tcp_get_pcount(&tp->left_out) +
tcp_get_pcount(&tp->retrans_out));
}
/* Recalculate snd_ssthresh, we want to set it to:
......@@ -1304,9 +1375,15 @@ static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
static inline void tcp_sync_left_out(struct tcp_opt *tp)
{
if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
tp->sacked_out = tp->packets_out - tp->lost_out;
tp->left_out = tp->sacked_out + tp->lost_out;
if (tp->sack_ok &&
(tcp_get_pcount(&tp->sacked_out) >=
tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out)))
tcp_set_pcount(&tp->sacked_out,
(tcp_get_pcount(&tp->packets_out) -
tcp_get_pcount(&tp->lost_out)));
tcp_set_pcount(&tp->left_out,
(tcp_get_pcount(&tp->sacked_out) +
tcp_get_pcount(&tp->lost_out)));
}
extern void tcp_cwnd_application_limited(struct sock *sk);
......@@ -1315,14 +1392,16 @@ extern void tcp_cwnd_application_limited(struct sock *sk);
static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
{
if (tp->packets_out >= tp->snd_cwnd) {
__u32 packets_out = tcp_get_pcount(&tp->packets_out);
if (packets_out >= tp->snd_cwnd) {
/* Network is feed fully. */
tp->snd_cwnd_used = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
} else {
/* Network starves. */
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used)
tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out);
if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
tcp_cwnd_application_limited(sk);
......@@ -1388,7 +1467,7 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n
!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
((nonagle&TCP_NAGLE_CORK) ||
(!nonagle &&
tp->packets_out &&
tcp_get_pcount(&tp->packets_out) &&
tcp_minshall_check(tp))));
}
......@@ -1398,6 +1477,8 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n
static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
unsigned cur_mss, int nonagle)
{
int pkts = TCP_SKB_CB(skb)->tso_factor;
/* RFC 1122 - section 4.2.3.4
*
* We must queue if
......@@ -1424,14 +1505,14 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
*/
return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode
|| !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
(((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) ||
(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
}
static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
{
if (!tp->packets_out && !tp->pending)
if (!tcp_get_pcount(&tp->packets_out) && !tp->pending)
tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
}
......@@ -1464,7 +1545,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk,
static __inline__ void tcp_push_pending_frames(struct sock *sk,
struct tcp_opt *tp)
{
__tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
__tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1, NULL), tp->nonagle);
}
static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
......@@ -1472,7 +1553,7 @@ static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
struct sk_buff *skb = sk->sk_send_head;
return (skb &&
tcp_snd_test(tp, skb, tcp_current_mss(sk, 1),
tcp_snd_test(tp, skb, tcp_current_mss(sk, 1, NULL),
tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle));
}
......@@ -1964,7 +2045,7 @@ static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp)
{
return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) /
(__u32) (tp->mss_cache),
(__u32) (tp->mss_cache_std),
2U);
}
......
......@@ -590,13 +590,14 @@ static inline int forced_push(struct tcp_opt *tp)
}
static inline void skb_entail(struct sock *sk, struct tcp_opt *tp,
struct sk_buff *skb)
struct sk_buff *skb, int tso_factor)
{
skb->csum = 0;
TCP_SKB_CB(skb)->seq = tp->write_seq;
TCP_SKB_CB(skb)->end_seq = tp->write_seq;
TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = tso_factor;
__skb_queue_tail(&sk->sk_write_queue, skb);
sk_charge_skb(sk, skb);
if (!sk->sk_send_head)
......@@ -632,7 +633,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
size_t psize, int flags)
{
struct tcp_opt *tp = tcp_sk(sk);
int mss_now;
int mss_now, mss_factor_now;
int err;
ssize_t copied;
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
......@@ -644,7 +645,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), &mss_factor_now);
copied = 0;
err = -EPIPE;
......@@ -668,7 +669,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
if (!skb)
goto wait_for_memory;
skb_entail(sk, tp, skb);
skb_entail(sk, tp, skb, mss_factor_now);
copy = mss_now;
}
......@@ -719,7 +720,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
mss_now = tcp_current_mss(sk, !(flags&MSG_OOB),
&mss_factor_now);
}
out:
......@@ -780,7 +782,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb;
int iovlen, flags;
int mss_now;
int mss_now, mss_factor_now;
int err, copied;
long timeo;
......@@ -798,7 +800,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/* This should be in poll */
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
mss_now = tcp_current_mss(sk, !(flags&MSG_OOB), &mss_factor_now);
/* Ok commence sending. */
iovlen = msg->msg_iovlen;
......@@ -843,7 +845,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
NETIF_F_HW_CSUM))
skb->ip_summed = CHECKSUM_HW;
skb_entail(sk, tp, skb);
skb_entail(sk, tp, skb, mss_factor_now);
copy = mss_now;
}
......@@ -962,7 +964,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
mss_now = tcp_current_mss(sk, !(flags&MSG_OOB),
&mss_factor_now);
}
}
......@@ -1818,7 +1821,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->backoff = 0;
tp->snd_cwnd = 2;
tp->probes_out = 0;
tp->packets_out = 0;
tcp_set_pcount(&tp->packets_out, 0);
tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(tp, TCP_CA_Open);
......
......@@ -70,14 +70,14 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rto = (1000000*tp->rto)/HZ;
info->tcpi_ato = (1000000*tp->ack.ato)/HZ;
info->tcpi_snd_mss = tp->mss_cache;
info->tcpi_snd_mss = tp->mss_cache_std;
info->tcpi_rcv_mss = tp->ack.rcv_mss;
info->tcpi_unacked = tp->packets_out;
info->tcpi_sacked = tp->sacked_out;
info->tcpi_lost = tp->lost_out;
info->tcpi_retrans = tp->retrans_out;
info->tcpi_fackets = tp->fackets_out;
info->tcpi_unacked = tcp_get_pcount(&tp->packets_out);
info->tcpi_sacked = tcp_get_pcount(&tp->sacked_out);
info->tcpi_lost = tcp_get_pcount(&tp->lost_out);
info->tcpi_retrans = tcp_get_pcount(&tp->retrans_out);
info->tcpi_fackets = tcp_get_pcount(&tp->fackets_out);
info->tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ;
info->tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ;
......
......@@ -897,7 +897,9 @@ static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts)
#if FASTRETRANS_DEBUG > 1
printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
tp->sack_ok, tp->ca_state,
tp->reordering, tp->fackets_out, tp->sacked_out,
tp->reordering,
tcp_get_pcount(&tp->fackets_out),
tcp_get_pcount(&tp->sacked_out),
tp->undo_marker ? tp->undo_retrans : 0);
#endif
/* Disable FACK yet. */
......@@ -960,7 +962,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
int reord = tp->packets_out;
int reord = tcp_get_pcount(&tp->packets_out);
int prior_fackets;
u32 lost_retrans = 0;
int flag = 0;
......@@ -972,11 +974,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
sk->sk_route_caps &= ~NETIF_F_TSO;
sk->sk_no_largesend = 1;
tp->mss_cache = tp->mss_cache_std;
tp->mss_tso_factor = 1;
}
if (!tp->sacked_out)
tp->fackets_out = 0;
prior_fackets = tp->fackets_out;
if (!tcp_get_pcount(&tp->sacked_out))
tcp_set_pcount(&tp->fackets_out, 0);
prior_fackets = tcp_get_pcount(&tp->fackets_out);
for (i=0; i<num_sacks; i++, sp++) {
struct sk_buff *skb;
......@@ -1074,8 +1077,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
*/
if (sacked & TCPCB_LOST) {
TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
tp->lost_out--;
tp->retrans_out--;
tcp_dec_pcount(&tp->lost_out, skb);
tcp_dec_pcount(&tp->retrans_out, skb);
}
} else {
/* New sack for not retransmitted frame,
......@@ -1087,16 +1090,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (sacked & TCPCB_LOST) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
tp->lost_out--;
tcp_dec_pcount(&tp->lost_out, skb);
}
}
TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
flag |= FLAG_DATA_SACKED;
tp->sacked_out++;
tcp_inc_pcount(&tp->sacked_out, skb);
if (fack_count > tp->fackets_out)
tp->fackets_out = fack_count;
if (fack_count > tcp_get_pcount(&tp->fackets_out))
tcp_set_pcount(&tp->fackets_out, fack_count);
} else {
if (dup_sack && (sacked&TCPCB_RETRANS))
reord = min(fack_count, reord);
......@@ -1110,7 +1113,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (dup_sack &&
(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out--;
tcp_dec_pcount(&tp->retrans_out, skb);
}
}
}
......@@ -1134,12 +1137,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
(IsFack(tp) ||
!before(lost_retrans,
TCP_SKB_CB(skb)->ack_seq + tp->reordering *
tp->mss_cache))) {
tp->mss_cache_std))) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out--;
tcp_dec_pcount(&tp->retrans_out, skb);
if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
tp->lost_out++;
tcp_inc_pcount(&tp->lost_out, skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
flag |= FLAG_DATA_SACKED;
NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
......@@ -1148,15 +1151,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
}
}
tp->left_out = tp->sacked_out + tp->lost_out;
tcp_set_pcount(&tp->left_out,
(tcp_get_pcount(&tp->sacked_out) +
tcp_get_pcount(&tp->lost_out)));
if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss)
tcp_update_reordering(tp, (tp->fackets_out + 1) - reord, 0);
if ((reord < tcp_get_pcount(&tp->fackets_out)) &&
tp->ca_state != TCP_CA_Loss)
tcp_update_reordering(tp,
((tcp_get_pcount(&tp->fackets_out) + 1) -
reord), 0);
#if FASTRETRANS_DEBUG > 0
BUG_TRAP((int)tp->sacked_out >= 0);
BUG_TRAP((int)tp->lost_out >= 0);
BUG_TRAP((int)tp->retrans_out >= 0);
BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
#endif
return flag;
......@@ -1186,7 +1194,7 @@ void tcp_enter_frto(struct sock *sk)
* If something was really lost, it is eventually caught up
* in tcp_enter_frto_loss.
*/
tp->retrans_out = 0;
tcp_set_pcount(&tp->retrans_out, 0);
tp->undo_marker = tp->snd_una;
tp->undo_retrans = 0;
......@@ -1209,26 +1217,26 @@ static void tcp_enter_frto_loss(struct sock *sk)
struct sk_buff *skb;
int cnt = 0;
tp->sacked_out = 0;
tp->lost_out = 0;
tp->fackets_out = 0;
tcp_set_pcount(&tp->sacked_out, 0);
tcp_set_pcount(&tp->lost_out, 0);
tcp_set_pcount(&tp->fackets_out, 0);
sk_stream_for_retrans_queue(skb, sk) {
cnt++;
cnt += TCP_SKB_CB(skb)->tso_factor;;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
/* Do not mark those segments lost that were
* forward transmitted after RTO
*/
if(!after(TCP_SKB_CB(skb)->end_seq,
if (!after(TCP_SKB_CB(skb)->end_seq,
tp->frto_highmark)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out++;
tcp_inc_pcount(&tp->lost_out, skb);
}
} else {
tp->sacked_out++;
tp->fackets_out = cnt;
tcp_inc_pcount(&tp->sacked_out, skb);
tcp_set_pcount(&tp->fackets_out, cnt);
}
}
tcp_sync_left_out(tp);
......@@ -1250,12 +1258,12 @@ static void tcp_enter_frto_loss(struct sock *sk)
void tcp_clear_retrans(struct tcp_opt *tp)
{
tp->left_out = 0;
tp->retrans_out = 0;
tcp_set_pcount(&tp->left_out, 0);
tcp_set_pcount(&tp->retrans_out, 0);
tp->fackets_out = 0;
tp->sacked_out = 0;
tp->lost_out = 0;
tcp_set_pcount(&tp->fackets_out, 0);
tcp_set_pcount(&tp->sacked_out, 0);
tcp_set_pcount(&tp->lost_out, 0);
tp->undo_marker = 0;
tp->undo_retrans = 0;
......@@ -1289,17 +1297,17 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->undo_marker = tp->snd_una;
sk_stream_for_retrans_queue(skb, sk) {
cnt++;
cnt += TCP_SKB_CB(skb)->tso_factor;
if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
tp->undo_marker = 0;
TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out++;
tcp_inc_pcount(&tp->lost_out, skb);
} else {
tp->sacked_out++;
tp->fackets_out = cnt;
tcp_inc_pcount(&tp->sacked_out, skb);
tcp_set_pcount(&tp->fackets_out, cnt);
}
}
tcp_sync_left_out(tp);
......@@ -1336,7 +1344,8 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp)
static inline int tcp_fackets_out(struct tcp_opt *tp)
{
return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
return IsReno(tp) ? tcp_get_pcount(&tp->sacked_out)+1 :
tcp_get_pcount(&tp->fackets_out);
}
static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
......@@ -1346,7 +1355,7 @@ static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
{
return tp->packets_out &&
return tcp_get_pcount(&tp->packets_out) &&
tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue));
}
......@@ -1446,8 +1455,10 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
static int
tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
{
__u32 packets_out;
/* Trick#1: The loss is proven. */
if (tp->lost_out)
if (tcp_get_pcount(&tp->lost_out))
return 1;
/* Not-A-Trick#2 : Classic rule... */
......@@ -1463,8 +1474,9 @@ tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
/* Trick#4: It is still not OK... But will it be useful to delay
* recovery more?
*/
if (tp->packets_out <= tp->reordering &&
tp->sacked_out >= max_t(__u32, tp->packets_out/2, sysctl_tcp_reordering) &&
packets_out = tcp_get_pcount(&tp->packets_out);
if (packets_out <= tp->reordering &&
tcp_get_pcount(&tp->sacked_out) >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
!tcp_may_send_now(sk, tp)) {
/* We have nothing to send. This connection is limited
* either by receiver window or by application.
......@@ -1483,12 +1495,16 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
{
u32 holes;
holes = max(tp->lost_out, 1U);
holes = min(holes, tp->packets_out);
holes = max(tcp_get_pcount(&tp->lost_out), 1U);
holes = min(holes, tcp_get_pcount(&tp->packets_out));
if (tp->sacked_out + holes > tp->packets_out) {
tp->sacked_out = tp->packets_out - holes;
tcp_update_reordering(tp, tp->packets_out+addend, 0);
if ((tcp_get_pcount(&tp->sacked_out) + holes) >
tcp_get_pcount(&tp->packets_out)) {
tcp_set_pcount(&tp->sacked_out,
(tcp_get_pcount(&tp->packets_out) - holes));
tcp_update_reordering(tp,
tcp_get_pcount(&tp->packets_out)+addend,
0);
}
}
......@@ -1496,7 +1512,7 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
static void tcp_add_reno_sack(struct tcp_opt *tp)
{
++tp->sacked_out;
tcp_inc_pcount_explicit(&tp->sacked_out, 1);
tcp_check_reno_reordering(tp, 0);
tcp_sync_left_out(tp);
}
......@@ -1507,10 +1523,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked
{
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
if (acked-1 >= tp->sacked_out)
tp->sacked_out = 0;
if (acked-1 >= tcp_get_pcount(&tp->sacked_out))
tcp_set_pcount(&tp->sacked_out, 0);
else
tp->sacked_out -= acked-1;
tcp_dec_pcount_explicit(&tp->sacked_out, acked-1);
}
tcp_check_reno_reordering(tp, acked);
tcp_sync_left_out(tp);
......@@ -1518,8 +1534,8 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked
static inline void tcp_reset_reno_sack(struct tcp_opt *tp)
{
tp->sacked_out = 0;
tp->left_out = tp->lost_out;
tcp_set_pcount(&tp->sacked_out, 0);
tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->lost_out));
}
/* Mark head of queue up as lost. */
......@@ -1529,14 +1545,15 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se
struct sk_buff *skb;
int cnt = packets;
BUG_TRAP(cnt <= tp->packets_out);
BUG_TRAP(cnt <= tcp_get_pcount(&tp->packets_out));
sk_stream_for_retrans_queue(skb, sk) {
if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
cnt -= TCP_SKB_CB(skb)->tso_factor;
if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
break;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out++;
tcp_inc_pcount(&tp->lost_out, skb);
}
}
tcp_sync_left_out(tp);
......@@ -1547,7 +1564,7 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se
static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
{
if (IsFack(tp)) {
int lost = tp->fackets_out - tp->reordering;
int lost = tcp_get_pcount(&tp->fackets_out) - tp->reordering;
if (lost <= 0)
lost = 1;
tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
......@@ -1567,7 +1584,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
if (tcp_skb_timedout(tp, skb) &&
!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out++;
tcp_inc_pcount(&tp->lost_out, skb);
}
}
tcp_sync_left_out(tp);
......@@ -1632,8 +1649,9 @@ static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg)
printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
msg,
NIPQUAD(inet->daddr), ntohs(inet->dport),
tp->snd_cwnd, tp->left_out,
tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out);
tp->snd_cwnd, tcp_get_pcount(&tp->left_out),
tp->snd_ssthresh, tp->prior_ssthresh,
tcp_get_pcount(&tp->packets_out));
}
#else
#define DBGUNDO(x...) do { } while (0)
......@@ -1703,13 +1721,13 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp)
static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked)
{
/* Partial ACK arrived. Force Hoe's retransmit. */
int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
int failed = IsReno(tp) || tcp_get_pcount(&tp->fackets_out)>tp->reordering;
if (tcp_may_undo(tp)) {
/* Plain luck! Hole if filled with delayed
* packet, rather than with a retransmit.
*/
if (tp->retrans_out == 0)
if (tcp_get_pcount(&tp->retrans_out) == 0)
tp->retrans_stamp = 0;
tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
......@@ -1736,8 +1754,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp)
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
DBGUNDO(sk, tp, "partial loss");
tp->lost_out = 0;
tp->left_out = tp->sacked_out;
tcp_set_pcount(&tp->lost_out, 0);
tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
tcp_undo_cwr(tp, 1);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
tp->retransmits = 0;
......@@ -1760,9 +1778,9 @@ static __inline__ void tcp_complete_cwr(struct tcp_opt *tp)
static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
{
tp->left_out = tp->sacked_out;
tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
if (tp->retrans_out == 0)
if (tcp_get_pcount(&tp->retrans_out) == 0)
tp->retrans_stamp = 0;
if (flag&FLAG_ECE)
......@@ -1771,8 +1789,8 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
if (tp->ca_state != TCP_CA_CWR) {
int state = TCP_CA_Open;
if (tp->left_out ||
tp->retrans_out ||
if (tcp_get_pcount(&tp->left_out) ||
tcp_get_pcount(&tp->retrans_out) ||
tp->undo_marker)
state = TCP_CA_Disorder;
......@@ -1806,11 +1824,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* Some technical things:
* 1. Reno does not count dupacks (sacked_out) automatically. */
if (!tp->packets_out)
tp->sacked_out = 0;
if (!tcp_get_pcount(&tp->packets_out))
tcp_set_pcount(&tp->sacked_out, 0);
/* 2. SACK counts snd_fack in packets inaccurately. */
if (tp->sacked_out == 0)
tp->fackets_out = 0;
if (tcp_get_pcount(&tp->sacked_out) == 0)
tcp_set_pcount(&tp->fackets_out, 0);
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
......@@ -1818,15 +1836,15 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs. */
if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
if (tcp_get_pcount(&tp->sacked_out) && tcp_check_sack_reneging(sk, tp))
return;
/* C. Process data loss notification, provided it is valid. */
if ((flag&FLAG_DATA_LOST) &&
before(tp->snd_una, tp->high_seq) &&
tp->ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
tcp_get_pcount(&tp->fackets_out) > tp->reordering) {
tcp_mark_head_lost(sk, tp, tcp_get_pcount(&tp->fackets_out)-tp->reordering, tp->high_seq);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
......@@ -1837,7 +1855,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
* when high_seq is ACKed. */
if (tp->ca_state == TCP_CA_Open) {
if (!sysctl_tcp_frto)
BUG_TRAP(tp->retrans_out == 0);
BUG_TRAP(tcp_get_pcount(&tp->retrans_out) == 0);
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
switch (tp->ca_state) {
......@@ -1884,7 +1902,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
if (IsReno(tp) && is_dupack)
tcp_add_reno_sack(tp);
} else {
int acked = prior_packets - tp->packets_out;
int acked = prior_packets -
tcp_get_pcount(&tp->packets_out);
if (IsReno(tp))
tcp_remove_reno_sacks(sk, tp, acked);
is_dupack = tcp_try_undo_partial(sk, tp, acked);
......@@ -1927,7 +1946,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tp->high_seq = tp->snd_nxt;
tp->prior_ssthresh = 0;
tp->undo_marker = tp->snd_una;
tp->undo_retrans = tp->retrans_out;
tp->undo_retrans = tcp_get_pcount(&tp->retrans_out);
if (tp->ca_state < TCP_CA_CWR) {
if (!(flag&FLAG_ECE))
......@@ -2156,7 +2175,7 @@ static void vegas_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
* is the cwnd during the previous RTT.
*/
old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) /
tp->mss_cache;
tp->mss_cache_std;
old_snd_cwnd = tp->vegas.beg_snd_cwnd;
/* Save the extent of the current window so we can use this
......@@ -2327,7 +2346,7 @@ static inline void tcp_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
{
if (tp->packets_out==0) {
if (!tcp_get_pcount(&tp->packets_out)) {
tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
} else {
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
......@@ -2343,7 +2362,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
int acked = 0;
__s32 seq_rtt = -1;
while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) {
while ((skb = skb_peek(&sk->sk_write_queue)) &&
skb != sk->sk_send_head) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
__u8 sacked = scb->sacked;
......@@ -2361,7 +2381,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
* connection startup slow start one packet too
* quickly. This is severely frowned upon behavior.
*/
if(!(scb->flags & TCPCB_FLAG_SYN)) {
if (!(scb->flags & TCPCB_FLAG_SYN)) {
acked |= FLAG_DATA_ACKED;
} else {
acked |= FLAG_SYN_ACKED;
......@@ -2369,27 +2389,26 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
if (sacked) {
if(sacked & TCPCB_RETRANS) {
if (sacked & TCPCB_RETRANS) {
if(sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out--;
tcp_dec_pcount(&tp->retrans_out, skb);
acked |= FLAG_RETRANS_DATA_ACKED;
seq_rtt = -1;
} else if (seq_rtt < 0)
seq_rtt = now - scb->when;
if(sacked & TCPCB_SACKED_ACKED)
tp->sacked_out--;
if(sacked & TCPCB_LOST)
tp->lost_out--;
if(sacked & TCPCB_URG) {
if (sacked & TCPCB_SACKED_ACKED)
tcp_dec_pcount(&tp->sacked_out, skb);
if (sacked & TCPCB_LOST)
tcp_dec_pcount(&tp->lost_out, skb);
if (sacked & TCPCB_URG) {
if (tp->urg_mode &&
!before(scb->end_seq, tp->snd_up))
tp->urg_mode = 0;
}
} else if (seq_rtt < 0)
seq_rtt = now - scb->when;
if (tp->fackets_out)
tp->fackets_out--;
tp->packets_out--;
tcp_dec_pcount_approx(&tp->fackets_out, skb);
tcp_packets_out_dec(tp, skb);
__skb_unlink(skb, skb->list);
sk_stream_free_skb(sk, skb);
}
......@@ -2400,24 +2419,27 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
#if FASTRETRANS_DEBUG > 0
BUG_TRAP((int)tp->sacked_out >= 0);
BUG_TRAP((int)tp->lost_out >= 0);
BUG_TRAP((int)tp->retrans_out >= 0);
if (!tp->packets_out && tp->sack_ok) {
if (tp->lost_out) {
printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out,
tp->ca_state);
tp->lost_out = 0;
BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
if (!tcp_get_pcount(&tp->packets_out) && tp->sack_ok) {
if (tcp_get_pcount(&tp->lost_out)) {
printk(KERN_DEBUG "Leak l=%u %d\n",
tcp_get_pcount(&tp->lost_out),
tp->ca_state);
tcp_set_pcount(&tp->lost_out, 0);
}
if (tp->sacked_out) {
printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out,
tp->ca_state);
tp->sacked_out = 0;
if (tcp_get_pcount(&tp->sacked_out)) {
printk(KERN_DEBUG "Leak s=%u %d\n",
tcp_get_pcount(&tp->sacked_out),
tp->ca_state);
tcp_set_pcount(&tp->sacked_out, 0);
}
if (tp->retrans_out) {
printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out,
tp->ca_state);
tp->retrans_out = 0;
if (tcp_get_pcount(&tp->retrans_out)) {
printk(KERN_DEBUG "Leak r=%u %d\n",
tcp_get_pcount(&tp->retrans_out),
tp->ca_state);
tcp_set_pcount(&tp->retrans_out, 0);
}
}
#endif
......@@ -2712,19 +2734,19 @@ static void westwood_dupack_update(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
tp->westwood.accounted += tp->mss_cache;
tp->westwood.cumul_ack = tp->mss_cache;
tp->westwood.accounted += tp->mss_cache_std;
tp->westwood.cumul_ack = tp->mss_cache_std;
}
static inline int westwood_may_change_cumul(struct tcp_opt *tp)
{
return ((tp->westwood.cumul_ack) > tp->mss_cache);
return ((tp->westwood.cumul_ack) > tp->mss_cache_std);
}
static inline void westwood_partial_update(struct tcp_opt *tp)
{
tp->westwood.accounted -= tp->westwood.cumul_ack;
tp->westwood.cumul_ack = tp->mss_cache;
tp->westwood.cumul_ack = tp->mss_cache_std;
}
static inline void westwood_complete_update(struct tcp_opt *tp)
......@@ -2835,7 +2857,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
*/
sk->sk_err_soft = 0;
tp->rcv_tstamp = tcp_time_stamp;
prior_packets = tp->packets_out;
prior_packets = tcp_get_pcount(&tp->packets_out);
if (!prior_packets)
goto no_queue;
......@@ -3857,11 +3879,11 @@ static void tcp_new_space(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
if (tp->packets_out < tp->snd_cwnd &&
if (tcp_get_pcount(&tp->packets_out) < tp->snd_cwnd &&
!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
!tcp_memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache_std) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1);
......
......@@ -2075,7 +2075,8 @@ static int tcp_v4_init_sock(struct sock *sk)
*/
tp->snd_ssthresh = 0x7fffffff; /* Infinity */
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = 536;
tp->mss_cache_std = tp->mss_cache = 536;
tp->mss_tso_factor = 1;
tp->reordering = sysctl_tcp_reordering;
......
......@@ -752,11 +752,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->mdev = TCP_TIMEOUT_INIT;
newtp->rto = TCP_TIMEOUT_INIT;
newtp->packets_out = 0;
newtp->left_out = 0;
newtp->retrans_out = 0;
newtp->sacked_out = 0;
newtp->fackets_out = 0;
tcp_set_pcount(&newtp->packets_out, 0);
tcp_set_pcount(&newtp->left_out, 0);
tcp_set_pcount(&newtp->retrans_out, 0);
tcp_set_pcount(&newtp->sacked_out, 0);
tcp_set_pcount(&newtp->fackets_out, 0);
newtp->snd_ssthresh = 0x7fffffff;
/* So many TCP implementations out there (incorrectly) count the
......
......@@ -52,8 +52,7 @@ void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
sk->sk_send_head = NULL;
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
if (tp->packets_out++ == 0)
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
tcp_packets_out_inc(sk, tp, skb);
}
/* SND.NXT, if window was not shrunk.
......@@ -123,7 +122,8 @@ static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *s
{
u32 now = tcp_time_stamp;
if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
if (!tcp_get_pcount(&tp->packets_out) &&
(s32)(now - tp->lsndtime) > tp->rto)
tcp_cwnd_restart(tp, __sk_dst_get(sk));
tp->lsndtime = now;
......@@ -259,7 +259,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
*/
int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
{
if(skb != NULL) {
if (skb != NULL) {
struct inet_opt *inet = inet_sk(sk);
struct tcp_opt *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
......@@ -268,6 +268,8 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
int sysctl_flags;
int err;
BUG_ON(!TCP_SKB_CB(skb)->tso_factor);
#define SYSCTL_FLAG_TSTAMPS 0x1
#define SYSCTL_FLAG_WSCALE 0x2
#define SYSCTL_FLAG_SACK 0x4
......@@ -414,8 +416,7 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss)
if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
sk->sk_send_head = NULL;
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
if (tp->packets_out++ == 0)
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
tcp_packets_out_inc(sk, tp, skb);
return;
}
}
......@@ -453,10 +454,13 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
flags = TCP_SKB_CB(skb)->flags;
TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
TCP_SKB_CB(buff)->flags = flags;
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
TCP_SKB_CB(buff)->sacked =
(TCP_SKB_CB(skb)->sacked &
(TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL));
TCP_SKB_CB(buff)->tso_factor = tp->mss_tso_factor;
if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
tp->lost_out++;
tp->left_out++;
tcp_inc_pcount(&tp->lost_out, buff);
tcp_inc_pcount(&tp->left_out, buff);
}
TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
......@@ -594,9 +598,10 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
/* And store cached results */
tp->pmtu_cookie = pmtu;
tp->mss_cache = tp->mss_cache_std = mss_now;
tp->mss_tso_factor = 1;
if (sk->sk_route_caps & NETIF_F_TSO) {
int large_mss;
int large_mss, factor;
large_mss = 65535 - tp->af_specific->net_header_len -
tp->ext_header_len - tp->ext2_header_len - tp->tcp_header_len;
......@@ -604,8 +609,15 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
if (tp->max_window && large_mss > (tp->max_window>>1))
large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len);
/* Always keep large mss multiple of real mss. */
tp->mss_cache = mss_now*(large_mss/mss_now);
/* Always keep large mss multiple of real mss, but
* do not exceed congestion window.
*/
factor = large_mss / mss_now;
if (factor > tp->snd_cwnd)
factor = tp->snd_cwnd;
tp->mss_cache = mss_now * factor;
tp->mss_tso_factor = factor;
}
return mss_now;
......@@ -637,7 +649,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle)
* We also handle things correctly when the user adds some
* IP options mid-stream. Silly to do, but cover it.
*/
mss_now = tcp_current_mss(sk, 1);
mss_now = tcp_current_mss(sk, 1, NULL);
while ((skb = sk->sk_send_head) &&
tcp_snd_test(tp, skb, mss_now,
......@@ -662,7 +674,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle)
return 0;
}
return !tp->packets_out && sk->sk_send_head;
return !tcp_get_pcount(&tp->packets_out) && sk->sk_send_head;
}
return 0;
}
......@@ -788,7 +800,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
/* The first test we must make is that neither of these two
* SKB's are still referenced by someone else.
*/
if(!skb_cloned(skb) && !skb_cloned(next_skb)) {
if (!skb_cloned(skb) && !skb_cloned(next_skb)) {
int skb_size = skb->len, next_skb_size = next_skb->len;
u16 flags = TCP_SKB_CB(skb)->flags;
......@@ -831,24 +843,23 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
*/
TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
tp->retrans_out--;
tcp_dec_pcount(&tp->retrans_out, next_skb);
if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {
tp->lost_out--;
tp->left_out--;
tcp_dec_pcount(&tp->lost_out, next_skb);
tcp_dec_pcount(&tp->left_out, next_skb);
}
/* Reno case is special. Sigh... */
if (!tp->sack_ok && tp->sacked_out) {
tp->sacked_out--;
tp->left_out--;
if (!tp->sack_ok && tcp_get_pcount(&tp->sacked_out)) {
tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
tcp_dec_pcount(&tp->left_out, next_skb);
}
/* Not quite right: it can be > snd.fack, but
* it is better to underestimate fackets.
*/
if (tp->fackets_out)
tp->fackets_out--;
tcp_dec_pcount_approx(&tp->fackets_out, next_skb);
tcp_packets_out_dec(tp, next_skb);
sk_stream_free_skb(sk, next_skb);
tp->packets_out--;
}
}
......@@ -860,7 +871,7 @@ void tcp_simple_retransmit(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb;
unsigned int mss = tcp_current_mss(sk, 0);
unsigned int mss = tcp_current_mss(sk, 0, NULL);
int lost = 0;
sk_stream_for_retrans_queue(skb, sk) {
......@@ -868,11 +879,11 @@ void tcp_simple_retransmit(struct sock *sk)
!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out--;
tcp_dec_pcount(&tp->retrans_out, skb);
}
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out++;
tcp_inc_pcount(&tp->lost_out, skb);
lost = 1;
}
}
......@@ -905,7 +916,7 @@ void tcp_simple_retransmit(struct sock *sk)
int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_opt *tp = tcp_sk(sk);
unsigned int cur_mss = tcp_current_mss(sk, 0);
unsigned int cur_mss = tcp_current_mss(sk, 0, NULL);
int err;
/* Do not sent more than we queued. 1/4 is reserved for possible
......@@ -923,6 +934,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
sk->sk_route_caps &= ~NETIF_F_TSO;
sk->sk_no_largesend = 1;
tp->mss_cache = tp->mss_cache_std;
tp->mss_tso_factor = 1;
}
if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
......@@ -938,12 +950,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
&& TCP_SKB_CB(skb)->seq != tp->snd_una)
return -EAGAIN;
if(skb->len > cur_mss) {
if(tcp_fragment(sk, skb, cur_mss))
if (skb->len > cur_mss) {
if (tcp_fragment(sk, skb, cur_mss))
return -ENOMEM; /* We'll try again later. */
/* New SKB created, account for it. */
tp->packets_out++;
tcp_inc_pcount(&tp->packets_out, skb);
}
/* Collapse two adjacent packets if worthwhile and we can. */
......@@ -992,7 +1004,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
}
#endif
TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
tp->retrans_out++;
tcp_inc_pcount(&tp->retrans_out, skb);
/* Save stamp of the first retransmit. */
if (!tp->retrans_stamp)
......@@ -1020,14 +1032,16 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb;
int packet_cnt = tp->lost_out;
int packet_cnt = tcp_get_pcount(&tp->lost_out);
/* First pass: retransmit lost packets. */
if (packet_cnt) {
sk_stream_for_retrans_queue(skb, sk) {
__u8 sacked = TCP_SKB_CB(skb)->sacked;
int pkts = TCP_SKB_CB(skb)->tso_factor;
if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
if ((tcp_packets_in_flight(tp) + (pkts-1)) >=
tp->snd_cwnd)
return;
if (sacked&TCPCB_LOST) {
......@@ -1044,7 +1058,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
}
if (--packet_cnt <= 0)
packet_cnt -= TCP_SKB_CB(skb)->tso_factor;
if (packet_cnt <= 0)
break;
}
}
......@@ -1073,17 +1088,20 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
packet_cnt = 0;
sk_stream_for_retrans_queue(skb, sk) {
if(++packet_cnt > tp->fackets_out)
int pkts = TCP_SKB_CB(skb)->tso_factor;
packet_cnt += pkts;
if (packet_cnt > tcp_get_pcount(&tp->fackets_out))
break;
if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
if ((tcp_packets_in_flight(tp) + (pkts-1)) >= tp->snd_cwnd)
break;
if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
continue;
/* Ok, retransmit it. */
if(tcp_retransmit_skb(sk, skb))
if (tcp_retransmit_skb(sk, skb))
break;
if (skb == skb_peek(&sk->sk_write_queue))
......@@ -1101,13 +1119,13 @@ void tcp_send_fin(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue);
unsigned int mss_now;
int mss_now;
/* Optimization, tack on the FIN if we have a queue of
* unsent frames. But be careful about outgoing SACKS
* and IP options.
*/
mss_now = tcp_current_mss(sk, 1);
mss_now = tcp_current_mss(sk, 1, NULL);
if (sk->sk_send_head != NULL) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
......@@ -1127,6 +1145,7 @@ void tcp_send_fin(struct sock *sk)
skb->csum = 0;
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = 1;
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
TCP_SKB_CB(skb)->seq = tp->write_seq;
......@@ -1158,6 +1177,7 @@ void tcp_send_active_reset(struct sock *sk, int priority)
skb->csum = 0;
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = 1;
/* Send it off. */
TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
......@@ -1237,6 +1257,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->dest = req->rmt_port;
TCP_SKB_CB(skb)->seq = req->snt_isn;
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = 1;
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(req->rcv_isn + 1);
if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
......@@ -1338,6 +1360,7 @@ int tcp_connect(struct sock *sk)
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
TCP_ECN_send_syn(sk, tp, buff);
TCP_SKB_CB(buff)->sacked = 0;
TCP_SKB_CB(buff)->tso_factor = 1;
buff->csum = 0;
TCP_SKB_CB(buff)->seq = tp->write_seq++;
TCP_SKB_CB(buff)->end_seq = tp->write_seq;
......@@ -1350,7 +1373,7 @@ int tcp_connect(struct sock *sk)
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
__skb_queue_tail(&sk->sk_write_queue, buff);
sk_charge_skb(sk, buff);
tp->packets_out++;
tcp_inc_pcount(&tp->packets_out, buff);
tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
......@@ -1437,6 +1460,7 @@ void tcp_send_ack(struct sock *sk)
buff->csum = 0;
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(buff)->sacked = 0;
TCP_SKB_CB(buff)->tso_factor = 1;
/* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
......@@ -1471,6 +1495,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
skb->csum = 0;
TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(skb)->sacked = urgent;
TCP_SKB_CB(skb)->tso_factor = 1;
/* Use a previous sequence. This should cause the other
* end to send an ack. Don't queue or clone SKB, just
......@@ -1491,7 +1516,7 @@ int tcp_write_wakeup(struct sock *sk)
if ((skb = sk->sk_send_head) != NULL &&
before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
int err;
int mss = tcp_current_mss(sk, 0);
int mss = tcp_current_mss(sk, 0, NULL);
int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
......@@ -1513,6 +1538,7 @@ int tcp_write_wakeup(struct sock *sk)
sk->sk_no_largesend = 1;
sk->sk_route_caps &= ~NETIF_F_TSO;
tp->mss_cache = tp->mss_cache_std;
tp->mss_tso_factor = 1;
}
}
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
......@@ -1542,7 +1568,7 @@ void tcp_send_probe0(struct sock *sk)
err = tcp_write_wakeup(sk);
if (tp->packets_out || !sk->sk_send_head) {
if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) {
/* Cancel probe timer, if it is not required. */
tp->probes_out = 0;
tp->backoff = 0;
......
......@@ -121,7 +121,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
* 1. Last segment was sent recently. */
if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
/* 2. Window is closed. */
(!tp->snd_wnd && !tp->packets_out))
(!tp->snd_wnd && !tcp_get_pcount(&tp->packets_out)))
do_reset = 1;
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC);
......@@ -269,7 +269,7 @@ static void tcp_probe_timer(struct sock *sk)
struct tcp_opt *tp = tcp_sk(sk);
int max_probes;
if (tp->packets_out || !sk->sk_send_head) {
if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) {
tp->probes_out = 0;
return;
}
......@@ -316,7 +316,7 @@ static void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
if (tp->packets_out == 0)
if (!tcp_get_pcount(&tp->packets_out))
goto out;
BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
......@@ -606,7 +606,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */
if (tp->packets_out || sk->sk_send_head)
if (tcp_get_pcount(&tp->packets_out) || sk->sk_send_head)
goto resched;
elapsed = tcp_time_stamp - tp->rcv_tstamp;
......
......@@ -1929,7 +1929,8 @@ static int tcp_v6_init_sock(struct sock *sk)
*/
tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = 536;
tp->mss_cache_std = tp->mss_cache = 536;
tp->mss_tso_factor = 1;
tp->reordering = sysctl_tcp_reordering;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment