Commit 477a6ccb authored by Alexey Kuznetsov's avatar Alexey Kuznetsov Committed by David S. Miller

tcp_output.c, tcp.c, tcp.h:

  reconciling TCP_CORK and TCP_NODELAY
parent 040f3e3e
...@@ -450,6 +450,11 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) ...@@ -450,6 +450,11 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
#define TCP_TIME_PROBE0 3 /* Zero window probe timer */ #define TCP_TIME_PROBE0 3 /* Zero window probe timer */
#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */ #define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */
/* Flags in tp->nonagle */
#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
#define TCP_NAGLE_CORK 2 /* Socket is corked */
#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */
/* sysctl variables for tcp */ /* sysctl variables for tcp */
extern int sysctl_max_syn_backlog; extern int sysctl_max_syn_backlog;
extern int sysctl_tcp_timestamps; extern int sysctl_tcp_timestamps;
...@@ -1216,7 +1221,7 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n ...@@ -1216,7 +1221,7 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n
{ {
return (skb->len < mss_now && return (skb->len < mss_now &&
!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
(nonagle == 2 || ((nonagle&TCP_NAGLE_CORK) ||
(!nonagle && (!nonagle &&
tp->packets_out && tp->packets_out &&
tcp_minshall_check(tp)))); tcp_minshall_check(tp))));
...@@ -1252,7 +1257,7 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, ...@@ -1252,7 +1257,7 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
/* Don't be strict about the congestion window for the /* Don't be strict about the congestion window for the
* final FIN frame. -DaveM * final FIN frame. -DaveM
*/ */
return ((nonagle==1 || tp->urg_mode return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode
|| !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
((tcp_packets_in_flight(tp) < tp->snd_cwnd) || ((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
...@@ -1283,7 +1288,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk, ...@@ -1283,7 +1288,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk,
if (skb) { if (skb) {
if (!tcp_skb_is_last(sk, skb)) if (!tcp_skb_is_last(sk, skb))
nonagle = 1; nonagle = TCP_NAGLE_PUSH;
if (!tcp_snd_test(tp, skb, cur_mss, nonagle) || if (!tcp_snd_test(tp, skb, cur_mss, nonagle) ||
tcp_write_xmit(sk, nonagle)) tcp_write_xmit(sk, nonagle))
tcp_check_probe_timer(sk, tp); tcp_check_probe_timer(sk, tp);
...@@ -1303,7 +1308,7 @@ static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) ...@@ -1303,7 +1308,7 @@ static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
return (skb && return (skb &&
tcp_snd_test(tp, skb, tcp_current_mss(sk, 1), tcp_snd_test(tp, skb, tcp_current_mss(sk, 1),
tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle)); tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle));
} }
static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq) static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq)
......
...@@ -800,6 +800,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_opt *tp, ...@@ -800,6 +800,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_opt *tp,
tcp_charge_skb(sk, skb); tcp_charge_skb(sk, skb);
if (!tp->send_head) if (!tp->send_head)
tp->send_head = skb; tp->send_head = skb;
else if (tp->nonagle&TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
} }
static inline void tcp_mark_urg(struct tcp_opt *tp, int flags, static inline void tcp_mark_urg(struct tcp_opt *tp, int flags,
...@@ -821,7 +823,7 @@ static inline void tcp_push(struct sock *sk, struct tcp_opt *tp, int flags, ...@@ -821,7 +823,7 @@ static inline void tcp_push(struct sock *sk, struct tcp_opt *tp, int flags,
tcp_mark_push(tp, skb); tcp_mark_push(tp, skb);
tcp_mark_urg(tp, flags, skb); tcp_mark_urg(tp, flags, skb);
__tcp_push_pending_frames(sk, tp, mss_now, __tcp_push_pending_frames(sk, tp, mss_now,
(flags & MSG_MORE) ? 2 : nonagle); (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
} }
} }
...@@ -911,7 +913,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, ...@@ -911,7 +913,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
if (forced_push(tp)) { if (forced_push(tp)) {
tcp_mark_push(tp, skb); tcp_mark_push(tp, skb);
__tcp_push_pending_frames(sk, tp, mss_now, 1); __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
} else if (skb == tp->send_head) } else if (skb == tp->send_head)
tcp_push_one(sk, mss_now); tcp_push_one(sk, mss_now);
continue; continue;
...@@ -920,7 +922,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, ...@@ -920,7 +922,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
set_bit(SOCK_NOSPACE, &sk->socket->flags); set_bit(SOCK_NOSPACE, &sk->socket->flags);
wait_for_memory: wait_for_memory:
if (copied) if (copied)
tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, 1); tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = wait_for_tcp_memory(sk, &timeo)) != 0) if ((err = wait_for_tcp_memory(sk, &timeo)) != 0)
goto do_error; goto do_error;
...@@ -1199,7 +1201,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1199,7 +1201,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (forced_push(tp)) { if (forced_push(tp)) {
tcp_mark_push(tp, skb); tcp_mark_push(tp, skb);
__tcp_push_pending_frames(sk, tp, mss_now, 1); __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
} else if (skb == tp->send_head) } else if (skb == tp->send_head)
tcp_push_one(sk, mss_now); tcp_push_one(sk, mss_now);
continue; continue;
...@@ -1208,7 +1210,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1208,7 +1210,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
set_bit(SOCK_NOSPACE, &sk->socket->flags); set_bit(SOCK_NOSPACE, &sk->socket->flags);
wait_for_memory: wait_for_memory:
if (copied) if (copied)
tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, 1); tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = wait_for_tcp_memory(sk, &timeo)) != 0) if ((err = wait_for_tcp_memory(sk, &timeo)) != 0)
goto do_error; goto do_error;
...@@ -2300,16 +2302,20 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, ...@@ -2300,16 +2302,20 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval,
break; break;
case TCP_NODELAY: case TCP_NODELAY:
/* You cannot try to use this and TCP_CORK in if (val) {
* tandem, so let the user know. /* TCP_NODELAY is weaker than TCP_CORK, so that
*/ * this option on corked socket is remembered, but
if (tp->nonagle == 2) { * it is not activated until cork is cleared.
err = -EINVAL; *
break; * However, when TCP_NODELAY is set we make
} * an explicit push, which overrides even TCP_CORK
tp->nonagle = !val ? 0 : 1; * for currently queued segments.
if (val) */
tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
tcp_push_pending_frames(sk, tp); tcp_push_pending_frames(sk, tp);
} else {
tp->nonagle &= ~TCP_NAGLE_OFF;
}
break; break;
case TCP_CORK: case TCP_CORK:
...@@ -2321,18 +2327,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, ...@@ -2321,18 +2327,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval,
* out headers with a write() call first and then use * out headers with a write() call first and then use
* sendfile to send out the data parts. * sendfile to send out the data parts.
* *
* You cannot try to use TCP_NODELAY and this mechanism * TCP_CORK can be set together with TCP_NODELAY and it is
* at the same time, so let the user know. * stronger than TCP_NODELAY.
*/ */
if (tp->nonagle == 1) { if (val) {
err = -EINVAL; tp->nonagle |= TCP_NAGLE_CORK;
break;
}
if (val != 0) {
tp->nonagle = 2;
} else { } else {
tp->nonagle = 0; tp->nonagle &= ~TCP_NAGLE_CORK;
if (tp->nonagle&TCP_NAGLE_OFF)
tp->nonagle |= TCP_NAGLE_PUSH;
tcp_push_pending_frames(sk, tp); tcp_push_pending_frames(sk, tp);
} }
break; break;
...@@ -2455,10 +2458,10 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, ...@@ -2455,10 +2458,10 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval,
val = tp->user_mss; val = tp->user_mss;
break; break;
case TCP_NODELAY: case TCP_NODELAY:
val = (tp->nonagle == 1); val = !!(tp->nonagle&TCP_NAGLE_OFF);
break; break;
case TCP_CORK: case TCP_CORK:
val = (tp->nonagle == 2); val = !!(tp->nonagle&TCP_NAGLE_CORK);
break; break;
case TCP_KEEPIDLE: case TCP_KEEPIDLE:
val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ; val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ;
......
...@@ -336,7 +336,7 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss) ...@@ -336,7 +336,7 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss)
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb = tp->send_head; struct sk_buff *skb = tp->send_head;
if (tcp_snd_test(tp, skb, cur_mss, 1)) { if (tcp_snd_test(tp, skb, cur_mss, TCP_NAGLE_PUSH)) {
/* Send it out now. */ /* Send it out now. */
TCP_SKB_CB(skb)->when = tcp_time_stamp; TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) {
...@@ -632,7 +632,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle) ...@@ -632,7 +632,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle)
mss_now = tcp_current_mss(sk, 1); mss_now = tcp_current_mss(sk, 1);
while((skb = tp->send_head) && while((skb = tp->send_head) &&
tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) { tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)) {
if (skb->len > mss_now) { if (skb->len > mss_now) {
if (tcp_fragment(sk, skb, mss_now)) if (tcp_fragment(sk, skb, mss_now))
break; break;
...@@ -1106,7 +1106,7 @@ void tcp_send_fin(struct sock *sk) ...@@ -1106,7 +1106,7 @@ void tcp_send_fin(struct sock *sk)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
tcp_send_skb(sk, skb, 1, mss_now); tcp_send_skb(sk, skb, 1, mss_now);
} }
__tcp_push_pending_frames(sk, tp, mss_now, 1); __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);
} }
/* We get here when a process closes a file descriptor (either due to /* We get here when a process closes a file descriptor (either due to
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment