Commit d1361840 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: fix SO_RCVLOWAT and RCVBUF autotuning

Applications might use SO_RCVLOWAT on TCP socket hoping to receive
one [E]POLLIN event only when a given amount of bytes are ready in socket
receive queue.

Problem is that receive autotuning is not aware of this constraint,
meaning sk_rcvbuf might be too small to allow all bytes to be stored.

Add a new (struct proto_ops)->set_rcvlowat method so that a protocol
can override the default setsockopt(SO_RCVLOWAT) behavior.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 10b19aea
......@@ -197,6 +197,7 @@ struct proto_ops {
int offset, size_t size, int flags);
int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
size_t size);
int (*set_rcvlowat)(struct sock *sk, int val);
};
#define DECLARE_SOCKADDR(type, dst, src) \
......
......@@ -402,6 +402,7 @@ void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
int tcp_set_rcvlowat(struct sock *sk, int val);
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc);
......
......@@ -905,7 +905,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
sk->sk_rcvlowat = val ? : 1;
if (sock->ops->set_rcvlowat)
ret = sock->ops->set_rcvlowat(sk, val);
else
sk->sk_rcvlowat = val ? : 1;
break;
case SO_RCVTIMEO:
......
......@@ -1006,6 +1006,7 @@ const struct proto_ops inet_stream_ops = {
.compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
.set_rcvlowat = tcp_set_rcvlowat,
};
EXPORT_SYMBOL(inet_stream_ops);
......
......@@ -1701,6 +1701,27 @@ int tcp_peek_len(struct socket *sock)
}
EXPORT_SYMBOL(tcp_peek_len);
/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
int tcp_set_rcvlowat(struct sock *sk, int val)
{
sk->sk_rcvlowat = val ? : 1;
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
return 0;
/* val comes from user space and might be close to INT_MAX */
val <<= 1;
if (val < 0)
val = INT_MAX;
val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (val > sk->sk_rcvbuf) {
sk->sk_rcvbuf = val;
tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
}
return 0;
}
EXPORT_SYMBOL(tcp_set_rcvlowat);
static void tcp_update_recv_tstamps(struct sk_buff *skb,
struct scm_timestamping *tss)
{
......
......@@ -590,6 +590,7 @@ const struct proto_ops inet6_stream_ops = {
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
.set_rcvlowat = tcp_set_rcvlowat,
};
const struct proto_ops inet6_dgram_ops = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment