Commit 074d9880 authored by Pasi Sarolahti's avatar Pasi Sarolahti Committed by David S. Miller

[TCP]: Add F-RTO support.

Motivation for this modification is that especially on some wireless
network technologies there are delay spikes that trigger RTO even though
no packets are lost. F-RTO sender continues by sending new data after RTO
retransmission in order to avoid unnecessary retransmissions in that case.  
If the sender sees any duplicate acks after the RTO retransmission, it
reverts to traditional slow start retransmissions. If new acks arrive
after forward transmissions, they very likely indicate that the RTO was
indeed spurious and the sender can continue sending new data (because 
only one segment was retransmitted).
parent 583bcb77
...@@ -301,7 +301,8 @@ enum ...@@ -301,7 +301,8 @@ enum
NET_IPV4_NONLOCAL_BIND=88, NET_IPV4_NONLOCAL_BIND=88,
NET_IPV4_ICMP_RATELIMIT=89, NET_IPV4_ICMP_RATELIMIT=89,
NET_IPV4_ICMP_RATEMASK=90, NET_IPV4_ICMP_RATEMASK=90,
NET_TCP_TW_REUSE=91 NET_TCP_TW_REUSE=91,
NET_TCP_FRTO=92
}; };
enum { enum {
......
...@@ -366,6 +366,9 @@ struct tcp_opt { ...@@ -366,6 +366,9 @@ struct tcp_opt {
unsigned int keepalive_intvl; /* time interval between keep alive probes */ unsigned int keepalive_intvl; /* time interval between keep alive probes */
int linger2; int linger2;
int frto_counter; /* Number of new acks after RTO */
__u32 frto_highmark; /* snd_nxt when RTO occurred */
unsigned long last_synq_overflow; unsigned long last_synq_overflow;
}; };
......
...@@ -472,6 +472,7 @@ extern int sysctl_tcp_rmem[3]; ...@@ -472,6 +472,7 @@ extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win; extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_tw_reuse;
extern int sysctl_tcp_frto;
extern atomic_t tcp_memory_allocated; extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated; extern atomic_t tcp_sockets_allocated;
...@@ -1855,4 +1856,17 @@ static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst) ...@@ -1855,4 +1856,17 @@ static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst)
#define TCP_CHECK_TIMER(sk) do { } while (0) #define TCP_CHECK_TIMER(sk) do { } while (0)
static inline int tcp_use_frto(const struct sock *sk)
{
const struct tcp_opt *tp = tcp_sk(sk);
/* F-RTO must be activated in sysctl and there must be some
* unsent new data, and the advertised window should allow
* sending it.
*/
return (sysctl_tcp_frto && tp->send_head &&
!after(TCP_SKB_CB(tp->send_head)->end_seq,
tp->snd_una + tp->snd_wnd));
}
#endif /* _TCP_H */ #endif /* _TCP_H */
...@@ -221,6 +221,8 @@ ctl_table ipv4_table[] = { ...@@ -221,6 +221,8 @@ ctl_table ipv4_table[] = {
&sysctl_icmp_ratemask, sizeof(int), 0644, NULL, &proc_dointvec}, &sysctl_icmp_ratemask, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_TCP_TW_REUSE, "tcp_tw_reuse", {NET_TCP_TW_REUSE, "tcp_tw_reuse",
&sysctl_tcp_tw_reuse, sizeof(int), 0644, NULL, &proc_dointvec}, &sysctl_tcp_tw_reuse, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_TCP_FRTO, "tcp_frto",
&sysctl_tcp_frto, sizeof(int), 0644, NULL, &proc_dointvec},
{0} {0}
}; };
......
...@@ -60,6 +60,7 @@ ...@@ -60,6 +60,7 @@
* Pasi Sarolahti, * Pasi Sarolahti,
* Panu Kuhlberg: Experimental audit of TCP (re)transmission * Panu Kuhlberg: Experimental audit of TCP (re)transmission
* engine. Lots of bugs are found. * engine. Lots of bugs are found.
* Pasi Sarolahti: F-RTO for dealing with spurious RTOs
*/ */
#include <linux/config.h> #include <linux/config.h>
...@@ -86,6 +87,7 @@ int sysctl_tcp_adv_win_scale = 2; ...@@ -86,6 +87,7 @@ int sysctl_tcp_adv_win_scale = 2;
int sysctl_tcp_stdurg = 0; int sysctl_tcp_stdurg = 0;
int sysctl_tcp_rfc1337 = 0; int sysctl_tcp_rfc1337 = 0;
int sysctl_tcp_max_orphans = NR_FILE; int sysctl_tcp_max_orphans = NR_FILE;
int sysctl_tcp_frto = 1;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
...@@ -968,6 +970,89 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ ...@@ -968,6 +970,89 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
return flag; return flag;
} }
/* RTO occurred, but do not yet enter loss state. Instead, transmit two new
* segments to see from the next ACKs whether any data was really missing.
* If the RTO was spurious, new ACKs should arrive.
*/
void tcp_enter_frto(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb;
tp->frto_counter = 1;
if (tp->ca_state <= TCP_CA_Disorder ||
tp->snd_una == tp->high_seq ||
(tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(tp);
tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
}
/* Have to clear retransmission markers here to keep the bookkeeping
* in shape, even though we are not yet in Loss state.
* If something was really lost, it is eventually caught up
* in tcp_enter_frto_loss.
*/
tp->retrans_out = 0;
tp->undo_marker = tp->snd_una;
tp->undo_retrans = 0;
for_retrans_queue(skb, sk, tp) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS;
}
tcp_sync_left_out(tp);
tp->ca_state = TCP_CA_Open;
tp->frto_highmark = tp->snd_nxt;
}
/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
* which indicates that we should follow the traditional RTO recovery,
* i.e. mark everything lost and do go-back-N retransmission.
*/
void tcp_enter_frto_loss(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb;
int cnt = 0;
tp->sacked_out = 0;
tp->lost_out = 0;
tp->fackets_out = 0;
for_retrans_queue(skb, sk, tp) {
cnt++;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
/* Do not mark those segments lost that were
* forward transmitted after RTO
*/
if(!after(TCP_SKB_CB(skb)->end_seq,
tp->frto_highmark)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out++;
}
} else {
tp->sacked_out++;
tp->fackets_out = cnt;
}
}
tcp_sync_left_out(tp);
tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->undo_marker = 0;
tp->frto_counter = 0;
tp->reordering = min_t(unsigned int, tp->reordering,
sysctl_tcp_reordering);
tp->ca_state = TCP_CA_Loss;
tp->high_seq = tp->frto_highmark;
TCP_ECN_queue_cwr(tp);
}
void tcp_clear_retrans(struct tcp_opt *tp) void tcp_clear_retrans(struct tcp_opt *tp)
{ {
tp->left_out = 0; tp->left_out = 0;
...@@ -1539,7 +1624,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, ...@@ -1539,7 +1624,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* E. Check state exit conditions. State can be terminated /* E. Check state exit conditions. State can be terminated
* when high_seq is ACKed. */ * when high_seq is ACKed. */
if (tp->ca_state == TCP_CA_Open) { if (tp->ca_state == TCP_CA_Open) {
BUG_TRAP(tp->retrans_out == 0); if (!sysctl_tcp_frto)
BUG_TRAP(tp->retrans_out == 0);
tp->retrans_stamp = 0; tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) { } else if (!before(tp->snd_una, tp->high_seq)) {
switch (tp->ca_state) { switch (tp->ca_state) {
...@@ -1910,6 +1996,41 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp, ...@@ -1910,6 +1996,41 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp,
return flag; return flag;
} }
static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
{
struct tcp_opt *tp = tcp_sk(sk);
tcp_sync_left_out(tp);
if (tp->snd_una == prior_snd_una ||
!before(tp->snd_una, tp->frto_highmark)) {
/* RTO was caused by loss, start retransmitting in
* go-back-N slow start
*/
tcp_enter_frto_loss(sk);
return;
}
if (tp->frto_counter == 1) {
/* First ACK after RTO advances the window: allow two new
* segments out.
*/
tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
} else {
/* Also the second ACK after RTO advances the window.
* The RTO was likely spurious. Reduce cwnd and continue
* in congestion avoidance
*/
tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
tcp_moderate_cwnd(tp);
}
/* F-RTO affects on two new ACKs following RTO.
* At latest on third ACK the TCP behavor is back to normal.
*/
tp->frto_counter = (tp->frto_counter + 1) % 3;
}
/* This routine deals with incoming acks, but not outgoing ones. */ /* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
{ {
...@@ -1968,6 +2089,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) ...@@ -1968,6 +2089,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */ /* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk); flag |= tcp_clean_rtx_queue(sk);
if (tp->frto_counter)
tcp_process_frto(sk, prior_snd_una);
if (tcp_ack_is_dubious(tp, flag)) { if (tcp_ack_is_dubious(tp, flag)) {
/* Advanve CWND, if state allows this. */ /* Advanve CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) && if ((flag & FLAG_DATA_ACKED) &&
......
...@@ -2032,6 +2032,9 @@ static int tcp_v4_init_sock(struct sock *sk) ...@@ -2032,6 +2032,9 @@ static int tcp_v4_init_sock(struct sock *sk)
*/ */
tp->snd_cwnd = 2; tp->snd_cwnd = 2;
tp->frto_counter = 0;
tp->frto_highmark = 0;
/* See draft-stevens-tcpca-spec-01 for discussion of the /* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values. * initialization of these values.
*/ */
......
...@@ -718,6 +718,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, ...@@ -718,6 +718,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->snd_cwnd = 2; newtp->snd_cwnd = 2;
newtp->snd_cwnd_cnt = 0; newtp->snd_cwnd_cnt = 0;
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
newtp->ca_state = TCP_CA_Open; newtp->ca_state = TCP_CA_Open;
tcp_init_xmit_timers(newsk); tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue); skb_queue_head_init(&newtp->out_of_order_queue);
......
...@@ -374,7 +374,11 @@ static void tcp_retransmit_timer(struct sock *sk) ...@@ -374,7 +374,11 @@ static void tcp_retransmit_timer(struct sock *sk)
} }
} }
tcp_enter_loss(sk, 0); if (tcp_use_frto(sk)) {
tcp_enter_frto(sk);
} else {
tcp_enter_loss(sk, 0);
}
if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) { if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) {
/* Retransmission failed because of local congestion, /* Retransmission failed because of local congestion,
......
...@@ -1849,6 +1849,9 @@ static int tcp_v6_init_sock(struct sock *sk) ...@@ -1849,6 +1849,9 @@ static int tcp_v6_init_sock(struct sock *sk)
*/ */
tp->snd_cwnd = 2; tp->snd_cwnd = 2;
tp->frto_counter = 0;
tp->frto_highmark = 0;
/* See draft-stevens-tcpca-spec-01 for discussion of the /* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values. * initialization of these values.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment