Commit 889865cf authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-change-pingpong-to-3-in-delayed-ack-logic'

Wei Wang says:

====================
tcp: change pingpong to 3 in delayed ack logic

TCP receiver today tries not to delay the ACKs to speed up the initial
slow start (a.k.a QUICK ACK mechanism). However the previous design
does not work well with modern TCP applications that starts with an
application-level handshake. For example, a HTTPs server often
receives the SSL hello and responds right away which triggers the TCP
stack to stop the quick ack and start delaying the ACKs based only one
instance of ping-pong. This patchset changes the threshold from 1 to 3
ping-pong transactions, so that we only start to delay the acks after
the receiver responds data quickly three times.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fb1b6999 4a41f453
...@@ -314,4 +314,29 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, ...@@ -314,4 +314,29 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen); char __user *optval, unsigned int optlen);
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
#define TCP_PINGPONG_THRESH 3
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{
inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH;
}
static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
{
inet_csk(sk)->icsk_ack.pingpong = 0;
}
static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
{
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ack.pingpong < U8_MAX)
icsk->icsk_ack.pingpong++;
}
#endif /* _INET_CONNECTION_SOCK_H */ #endif /* _INET_CONNECTION_SOCK_H */
...@@ -480,7 +480,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, ...@@ -480,7 +480,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
} }
if (sk->sk_write_pending || icsk->icsk_ack.pingpong || if (sk->sk_write_pending || inet_csk_in_pingpong_mode(sk) ||
icsk->icsk_accept_queue.rskq_defer_accept) { icsk->icsk_accept_queue.rskq_defer_accept) {
/* Save one ACK. Data will be ready after /* Save one ACK. Data will be ready after
* several ticks, if write_pending is set. * several ticks, if write_pending is set.
......
...@@ -199,7 +199,7 @@ static void dccp_delack_timer(struct timer_list *t) ...@@ -199,7 +199,7 @@ static void dccp_delack_timer(struct timer_list *t)
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
if (inet_csk_ack_scheduled(sk)) { if (inet_csk_ack_scheduled(sk)) {
if (!icsk->icsk_ack.pingpong) { if (!inet_csk_in_pingpong_mode(sk)) {
/* Delayed ACK missed: inflate ATO. */ /* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
icsk->icsk_rto); icsk->icsk_rto);
...@@ -207,7 +207,7 @@ static void dccp_delack_timer(struct timer_list *t) ...@@ -207,7 +207,7 @@ static void dccp_delack_timer(struct timer_list *t)
/* Delayed ACK missed: leave pingpong mode and /* Delayed ACK missed: leave pingpong mode and
* deflate ATO. * deflate ATO.
*/ */
icsk->icsk_ack.pingpong = 0; inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN; icsk->icsk_ack.ato = TCP_ATO_MIN;
} }
dccp_send_ack(sk); dccp_send_ack(sk);
......
...@@ -1551,7 +1551,7 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) ...@@ -1551,7 +1551,7 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
(copied > 0 && (copied > 0 &&
((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
!icsk->icsk_ack.pingpong)) && !inet_csk_in_pingpong_mode(sk))) &&
!atomic_read(&sk->sk_rmem_alloc))) !atomic_read(&sk->sk_rmem_alloc)))
time_to_ack = true; time_to_ack = true;
} }
...@@ -2984,16 +2984,16 @@ static int do_tcp_setsockopt(struct sock *sk, int level, ...@@ -2984,16 +2984,16 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_QUICKACK: case TCP_QUICKACK:
if (!val) { if (!val) {
icsk->icsk_ack.pingpong = 1; inet_csk_enter_pingpong_mode(sk);
} else { } else {
icsk->icsk_ack.pingpong = 0; inet_csk_exit_pingpong_mode(sk);
if ((1 << sk->sk_state) & if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
inet_csk_ack_scheduled(sk)) { inet_csk_ack_scheduled(sk)) {
icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
tcp_cleanup_rbuf(sk, 1); tcp_cleanup_rbuf(sk, 1);
if (!(val & 1)) if (!(val & 1))
icsk->icsk_ack.pingpong = 1; inet_csk_enter_pingpong_mode(sk);
} }
} }
break; break;
...@@ -3407,7 +3407,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, ...@@ -3407,7 +3407,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return 0; return 0;
} }
case TCP_QUICKACK: case TCP_QUICKACK:
val = !icsk->icsk_ack.pingpong; val = !inet_csk_in_pingpong_mode(sk);
break; break;
case TCP_CONGESTION: case TCP_CONGESTION:
......
...@@ -221,7 +221,7 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) ...@@ -221,7 +221,7 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
tcp_incr_quickack(sk, max_quickacks); tcp_incr_quickack(sk, max_quickacks);
icsk->icsk_ack.pingpong = 0; inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN; icsk->icsk_ack.ato = TCP_ATO_MIN;
} }
EXPORT_SYMBOL(tcp_enter_quickack_mode); EXPORT_SYMBOL(tcp_enter_quickack_mode);
...@@ -236,7 +236,7 @@ static bool tcp_in_quickack_mode(struct sock *sk) ...@@ -236,7 +236,7 @@ static bool tcp_in_quickack_mode(struct sock *sk)
const struct dst_entry *dst = __sk_dst_get(sk); const struct dst_entry *dst = __sk_dst_get(sk);
return (dst && dst_metric(dst, RTAX_QUICKACK)) || return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
(icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong); (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
} }
static void tcp_ecn_queue_cwr(struct tcp_sock *tp) static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
...@@ -4094,7 +4094,7 @@ void tcp_fin(struct sock *sk) ...@@ -4094,7 +4094,7 @@ void tcp_fin(struct sock *sk)
case TCP_ESTABLISHED: case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */ /* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT); tcp_set_state(sk, TCP_CLOSE_WAIT);
inet_csk(sk)->icsk_ack.pingpong = 1; inet_csk_enter_pingpong_mode(sk);
break; break;
case TCP_CLOSE_WAIT: case TCP_CLOSE_WAIT:
...@@ -5889,7 +5889,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -5889,7 +5889,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
return -1; return -1;
if (sk->sk_write_pending || if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept || icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) { inet_csk_in_pingpong_mode(sk)) {
/* Save one ACK. Data will be ready after /* Save one ACK. Data will be ready after
* several ticks, if write_pending is set. * several ticks, if write_pending is set.
* *
......
...@@ -2437,7 +2437,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) ...@@ -2437,7 +2437,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
refcount_read(&sk->sk_refcnt), sk, refcount_read(&sk->sk_refcnt), sk,
jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato), jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
tp->snd_cwnd, tp->snd_cwnd,
state == TCP_LISTEN ? state == TCP_LISTEN ?
fastopenq->max_qlen : fastopenq->max_qlen :
......
...@@ -165,13 +165,16 @@ static void tcp_event_data_sent(struct tcp_sock *tp, ...@@ -165,13 +165,16 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
if (tcp_packets_in_flight(tp) == 0) if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START); tcp_ca_event(sk, CA_EVENT_TX_START);
tp->lsndtime = now; /* If this is the first data packet sent in response to the
* previous received data,
/* If it is a reply for ato after last received * and it is a reply for ato after last received packet,
* packet, enter pingpong mode. * increase pingpong count.
*/ */
if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
icsk->icsk_ack.pingpong = 1; (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
inet_csk_inc_pingpong_cnt(sk);
tp->lsndtime = now;
} }
/* Account for an ACK we sent. */ /* Account for an ACK we sent. */
...@@ -3569,7 +3572,7 @@ void tcp_send_delayed_ack(struct sock *sk) ...@@ -3569,7 +3572,7 @@ void tcp_send_delayed_ack(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
int max_ato = HZ / 2; int max_ato = HZ / 2;
if (icsk->icsk_ack.pingpong || if (inet_csk_in_pingpong_mode(sk) ||
(icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
max_ato = TCP_DELACK_MAX; max_ato = TCP_DELACK_MAX;
......
...@@ -277,14 +277,14 @@ void tcp_delack_timer_handler(struct sock *sk) ...@@ -277,14 +277,14 @@ void tcp_delack_timer_handler(struct sock *sk)
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
if (inet_csk_ack_scheduled(sk)) { if (inet_csk_ack_scheduled(sk)) {
if (!icsk->icsk_ack.pingpong) { if (!inet_csk_in_pingpong_mode(sk)) {
/* Delayed ACK missed: inflate ATO. */ /* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
} else { } else {
/* Delayed ACK missed: leave pingpong mode and /* Delayed ACK missed: leave pingpong mode and
* deflate ATO. * deflate ATO.
*/ */
icsk->icsk_ack.pingpong = 0; inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN; icsk->icsk_ack.ato = TCP_ATO_MIN;
} }
tcp_mstamp_refresh(tcp_sk(sk)); tcp_mstamp_refresh(tcp_sk(sk));
......
...@@ -1864,7 +1864,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) ...@@ -1864,7 +1864,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
refcount_read(&sp->sk_refcnt), sp, refcount_read(&sp->sk_refcnt), sp,
jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato), jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
tp->snd_cwnd, tp->snd_cwnd,
state == TCP_LISTEN ? state == TCP_LISTEN ?
fastopenq->max_qlen : fastopenq->max_qlen :
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment