Commit 363c9612 authored by David S. Miller's avatar David S. Miller

[TCP]: Fix congestion window expansion when using TSO.

We only do congestion window expansion on full packet
ACKs.  We should do it for ACKs of sub-packets of a
TSO frame as well.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e76e8754
...@@ -1180,7 +1180,8 @@ struct tcp_skb_cb { ...@@ -1180,7 +1180,8 @@ struct tcp_skb_cb {
__u16 urg_ptr; /* Valid w/URG flags is set. */ __u16 urg_ptr; /* Valid w/URG flags is set. */
__u32 ack_seq; /* Sequence number ACK'd */ __u32 ack_seq; /* Sequence number ACK'd */
__u32 tso_factor; __u16 tso_factor; /* If > 1, TSO frame */
__u16 tso_mss; /* MSS that FACTOR's in terms of*/
}; };
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
......
...@@ -2355,6 +2355,86 @@ static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp) ...@@ -2355,6 +2355,86 @@ static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
} }
} }
/* There is one downside to this scheme. Although we keep the
* ACK clock ticking, adjusting packet counters and advancing
* congestion window, we do not liberate socket send buffer
* space.
*
* Mucking with skb->truesize and sk->sk_wmem_alloc et al.
* then making a write space wakeup callback is a possible
* future enhancement. WARNING: it is not trivial to make.
*/
static int tcp_tso_acked(struct tcp_opt *tp, struct sk_buff *skb,
__u32 now, __s32 *seq_rtt)
{
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
__u32 mss = scb->tso_mss;
__u32 snd_una = tp->snd_una;
__u32 seq = scb->seq;
__u32 packets_acked = 0;
int acked = 0;
/* If we get here, the whole TSO packet has not been
* acked.
*/
BUG_ON(!after(scb->end_seq, snd_una));
while (!after(seq + mss, snd_una)) {
packets_acked++;
seq += mss;
}
if (packets_acked) {
__u8 sacked = scb->sacked;
/* We adjust scb->seq but we do not pskb_pull() the
* SKB. We let tcp_retransmit_skb() handle this case
* by checking skb->len against the data sequence span.
* This way, we avoid the pskb_pull() work unless we
* actually need to retransmit the SKB.
*/
scb->seq = seq;
acked |= FLAG_DATA_ACKED;
if (sacked) {
if (sacked & TCPCB_RETRANS) {
if (sacked & TCPCB_SACKED_RETRANS)
tcp_dec_pcount_explicit(&tp->retrans_out,
packets_acked);
acked |= FLAG_RETRANS_DATA_ACKED;
*seq_rtt = -1;
} else if (*seq_rtt < 0)
*seq_rtt = now - scb->when;
if (sacked & TCPCB_SACKED_ACKED)
tcp_dec_pcount_explicit(&tp->sacked_out,
packets_acked);
if (sacked & TCPCB_LOST)
tcp_dec_pcount_explicit(&tp->lost_out,
packets_acked);
if (sacked & TCPCB_URG) {
if (tp->urg_mode &&
!before(scb->seq, tp->snd_up))
tp->urg_mode = 0;
}
} else if (*seq_rtt < 0)
*seq_rtt = now - scb->when;
if (tcp_get_pcount(&tp->fackets_out)) {
__u32 dval = min(tcp_get_pcount(&tp->fackets_out),
packets_acked);
tcp_dec_pcount_explicit(&tp->fackets_out, dval);
}
tcp_dec_pcount_explicit(&tp->packets_out, packets_acked);
scb->tso_factor -= packets_acked;
BUG_ON(scb->tso_factor == 0);
BUG_ON(!before(scb->seq, scb->end_seq));
}
return acked;
}
/* Remove acknowledged frames from the retransmission queue. */ /* Remove acknowledged frames from the retransmission queue. */
static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
{ {
...@@ -2373,8 +2453,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) ...@@ -2373,8 +2453,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
* discard it as it's confirmed to have arrived at * discard it as it's confirmed to have arrived at
* the other end. * the other end.
*/ */
if (after(scb->end_seq, tp->snd_una)) if (after(scb->end_seq, tp->snd_una)) {
if (scb->tso_factor > 1)
acked |= tcp_tso_acked(tp, skb,
now, &seq_rtt);
break; break;
}
/* Initial outgoing SYN's get put onto the write_queue /* Initial outgoing SYN's get put onto the write_queue
* just like anything else we transmit. It is not * just like anything else we transmit. It is not
......
...@@ -436,6 +436,7 @@ void tcp_set_skb_tso_factor(struct sk_buff *skb, unsigned int mss_std) ...@@ -436,6 +436,7 @@ void tcp_set_skb_tso_factor(struct sk_buff *skb, unsigned int mss_std)
factor /= mss_std; factor /= mss_std;
TCP_SKB_CB(skb)->tso_factor = factor; TCP_SKB_CB(skb)->tso_factor = factor;
} }
TCP_SKB_CB(skb)->tso_mss = mss_std;
} }
/* Function to create two new TCP segments. Shrinks the given segment /* Function to create two new TCP segments. Shrinks the given segment
...@@ -552,7 +553,7 @@ unsigned char * __pskb_trim_head(struct sk_buff *skb, int len) ...@@ -552,7 +553,7 @@ unsigned char * __pskb_trim_head(struct sk_buff *skb, int len)
return skb->tail; return skb->tail;
} }
static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) static int __tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{ {
if (skb_cloned(skb) && if (skb_cloned(skb) &&
pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
...@@ -565,11 +566,20 @@ static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) ...@@ -565,11 +566,20 @@ static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
return -ENOMEM; return -ENOMEM;
} }
TCP_SKB_CB(skb)->seq += len;
skb->ip_summed = CHECKSUM_HW; skb->ip_summed = CHECKSUM_HW;
return 0; return 0;
} }
static inline int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{
int err = __tcp_trim_head(sk, skb, len);
if (!err)
TCP_SKB_CB(skb)->seq += len;
return err;
}
/* This function synchronize snd mss to current pmtu/exthdr set. /* This function synchronize snd mss to current pmtu/exthdr set.
tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
...@@ -949,6 +959,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -949,6 +959,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
unsigned int cur_mss = tcp_current_mss(sk, 0); unsigned int cur_mss = tcp_current_mss(sk, 0);
__u32 data_seq, data_end_seq;
int err; int err;
/* Do not sent more than we queued. 1/4 is reserved for possible /* Do not sent more than we queued. 1/4 is reserved for possible
...@@ -958,6 +969,22 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -958,6 +969,22 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
return -EAGAIN; return -EAGAIN;
/* What is going on here? When TSO packets are partially ACK'd,
* we adjust the TCP_SKB_CB(skb)->seq value forward but we do
* not adjust the data area of the SKB. We defer that to here
* so that we can avoid the work unless we really retransmit
* the packet.
*/
data_seq = TCP_SKB_CB(skb)->seq;
data_end_seq = TCP_SKB_CB(skb)->end_seq;
if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
data_end_seq--;
if (skb->len != (data_end_seq - data_seq)) {
if (__tcp_trim_head(sk, skb, data_end_seq - data_seq))
return -ENOMEM;
}
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
BUG(); BUG();
...@@ -1191,6 +1218,7 @@ void tcp_send_fin(struct sock *sk) ...@@ -1191,6 +1218,7 @@ void tcp_send_fin(struct sock *sk)
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = 1; TCP_SKB_CB(skb)->tso_factor = 1;
TCP_SKB_CB(skb)->tso_mss = tp->mss_cache_std;
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
TCP_SKB_CB(skb)->seq = tp->write_seq; TCP_SKB_CB(skb)->seq = tp->write_seq;
...@@ -1223,6 +1251,7 @@ void tcp_send_active_reset(struct sock *sk, int priority) ...@@ -1223,6 +1251,7 @@ void tcp_send_active_reset(struct sock *sk, int priority)
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = 1; TCP_SKB_CB(skb)->tso_factor = 1;
TCP_SKB_CB(skb)->tso_mss = tp->mss_cache_std;
/* Send it off. */ /* Send it off. */
TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
...@@ -1304,6 +1333,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, ...@@ -1304,6 +1333,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = 1; TCP_SKB_CB(skb)->tso_factor = 1;
TCP_SKB_CB(skb)->tso_mss = tp->mss_cache_std;
th->seq = htonl(TCP_SKB_CB(skb)->seq); th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(req->rcv_isn + 1); th->ack_seq = htonl(req->rcv_isn + 1);
if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
...@@ -1406,6 +1436,7 @@ int tcp_connect(struct sock *sk) ...@@ -1406,6 +1436,7 @@ int tcp_connect(struct sock *sk)
TCP_ECN_send_syn(sk, tp, buff); TCP_ECN_send_syn(sk, tp, buff);
TCP_SKB_CB(buff)->sacked = 0; TCP_SKB_CB(buff)->sacked = 0;
TCP_SKB_CB(buff)->tso_factor = 1; TCP_SKB_CB(buff)->tso_factor = 1;
TCP_SKB_CB(buff)->tso_mss = tp->mss_cache_std;
buff->csum = 0; buff->csum = 0;
TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->seq = tp->write_seq++;
TCP_SKB_CB(buff)->end_seq = tp->write_seq; TCP_SKB_CB(buff)->end_seq = tp->write_seq;
...@@ -1506,6 +1537,7 @@ void tcp_send_ack(struct sock *sk) ...@@ -1506,6 +1537,7 @@ void tcp_send_ack(struct sock *sk)
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(buff)->sacked = 0; TCP_SKB_CB(buff)->sacked = 0;
TCP_SKB_CB(buff)->tso_factor = 1; TCP_SKB_CB(buff)->tso_factor = 1;
TCP_SKB_CB(buff)->tso_mss = tp->mss_cache_std;
/* Send it off, this clears delayed acks for us. */ /* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
...@@ -1541,6 +1573,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) ...@@ -1541,6 +1573,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(skb)->sacked = urgent; TCP_SKB_CB(skb)->sacked = urgent;
TCP_SKB_CB(skb)->tso_factor = 1; TCP_SKB_CB(skb)->tso_factor = 1;
TCP_SKB_CB(skb)->tso_mss = tp->mss_cache_std;
/* Use a previous sequence. This should cause the other /* Use a previous sequence. This should cause the other
* end to send an ack. Don't queue or clone SKB, just * end to send an ack. Don't queue or clone SKB, just
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment