Commit 2472186f authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-listener-fixes-and-improvement'

Eric Dumazet says:

====================
tcp: lockless listener fixes and improvement

This fixes issues with TCP FastOpen vs lockless listeners,
and SYNACK being attached to request sockets.

Then, last patch brings performance improvement for
syncookies generation and validation.

Tested under a 4.3 Mpps SYNFLOOD attack, new perf profile looks
like :
    12.11%  [kernel]  [k] sha_transform
     5.83%  [kernel]  [k] tcp_conn_request
     4.59%  [kernel]  [k] __inet_lookup_listener
     4.11%  [kernel]  [k] ipt_do_table
     3.91%  [kernel]  [k] tcp_make_synack
     3.05%  [kernel]  [k] fib_table_lookup
     2.74%  [kernel]  [k] sock_wfree
     2.66%  [kernel]  [k] memcpy_erms
     2.12%  [kernel]  [k] tcp_v4_rcv
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3e087caa a1a5344d
...@@ -264,9 +264,9 @@ struct tcp6_timewait_sock { ...@@ -264,9 +264,9 @@ struct tcp6_timewait_sock {
}; };
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk)
{ {
return inet_sk(__sk)->pinet6; return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL;
} }
static inline struct raw6_sock *raw6_sk(const struct sock *sk) static inline struct raw6_sock *raw6_sk(const struct sock *sk)
......
...@@ -245,7 +245,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr, ...@@ -245,7 +245,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr,
} }
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
struct sock *sk_listener); struct sock *sk_listener,
bool attach_listener);
static inline __u8 inet_sk_flowi_flags(const struct sock *sk) static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
{ {
......
...@@ -323,12 +323,15 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, ...@@ -323,12 +323,15 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
{ {
if (!skb->sk || ip_sk_use_pmtu(skb->sk)) { struct sock *sk = skb->sk;
if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
} else {
return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
} }
return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
} }
u32 ip_idents_reserve(u32 hash, int segs); u32 ip_idents_reserve(u32 hash, int segs);
......
...@@ -80,7 +80,8 @@ static inline struct sock *req_to_sk(struct request_sock *req) ...@@ -80,7 +80,8 @@ static inline struct sock *req_to_sk(struct request_sock *req)
} }
static inline struct request_sock * static inline struct request_sock *
reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
bool attach_listener)
{ {
struct request_sock *req; struct request_sock *req;
...@@ -88,10 +89,15 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) ...@@ -88,10 +89,15 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
if (req) { if (req) {
req->rsk_ops = ops; req->rsk_ops = ops;
sock_hold(sk_listener); if (attach_listener) {
req->rsk_listener = sk_listener; sock_hold(sk_listener);
req->rsk_listener = sk_listener;
} else {
req->rsk_listener = NULL;
}
req_to_sk(req)->sk_prot = sk_listener->sk_prot; req_to_sk(req)->sk_prot = sk_listener->sk_prot;
sk_node_init(&req_to_sk(req)->sk_node); sk_node_init(&req_to_sk(req)->sk_node);
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL; req->saved_syn = NULL;
/* Following is temporary. It is coupled with debugging /* Following is temporary. It is coupled with debugging
* helpers in reqsk_put() & reqsk_free() * helpers in reqsk_put() & reqsk_free()
......
...@@ -2974,6 +2974,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) ...@@ -2974,6 +2974,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
new_index = skb_tx_hash(dev, skb); new_index = skb_tx_hash(dev, skb);
if (queue_index != new_index && sk && if (queue_index != new_index && sk &&
sk_fullsock(sk) &&
rcu_access_pointer(sk->sk_dst_cache)) rcu_access_pointer(sk->sk_dst_cache))
sk_tx_queue_set(sk, new_index); sk_tx_queue_set(sk, new_index);
......
...@@ -595,7 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -595,7 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop; goto drop;
req = inet_reqsk_alloc(&dccp_request_sock_ops, sk); req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
if (req == NULL) if (req == NULL)
goto drop; goto drop;
......
...@@ -319,7 +319,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -319,7 +319,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop; goto drop;
req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk); req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
if (req == NULL) if (req == NULL)
goto drop; goto drop;
......
...@@ -326,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ...@@ -326,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out; goto out;
ret = NULL; ret = NULL;
req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */ req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
if (!req) if (!req)
goto out; goto out;
......
...@@ -168,8 +168,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, ...@@ -168,8 +168,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
TCP_TIMEOUT_INIT, TCP_RTO_MAX); TCP_TIMEOUT_INIT, TCP_RTO_MAX);
atomic_set(&req->rsk_refcnt, 2); atomic_set(&req->rsk_refcnt, 2);
/* Add the child socket directly into the accept queue */
inet_csk_reqsk_queue_add(sk, req, child);
/* Now finish processing the fastopen child socket. */ /* Now finish processing the fastopen child socket. */
inet_csk(child)->icsk_af_ops->rebuild_header(child); inet_csk(child)->icsk_af_ops->rebuild_header(child);
...@@ -178,12 +176,10 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, ...@@ -178,12 +176,10 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
tcp_init_metrics(child); tcp_init_metrics(child);
tcp_init_buffer_space(child); tcp_init_buffer_space(child);
/* Queue the data carried in the SYN packet. We need to first /* Queue the data carried in the SYN packet.
* bump skb's refcnt because the caller will attempt to free it. * We used to play tricky games with skb_get().
* Note that IPv6 might also have used skb_get() trick * With lockless listener, it is a dead end.
* in tcp_v6_conn_request() to keep this SYN around (treq->pktopts) * Do not think about it.
* So we need to eventually get a clone of the packet,
* before inserting it in sk_receive_queue.
* *
* XXX (TFO) - we honor a zero-payload TFO request for now, * XXX (TFO) - we honor a zero-payload TFO request for now,
* (any reason not to?) but no need to queue the skb since * (any reason not to?) but no need to queue the skb since
...@@ -191,12 +187,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, ...@@ -191,12 +187,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
*/ */
end_seq = TCP_SKB_CB(skb)->end_seq; end_seq = TCP_SKB_CB(skb)->end_seq;
if (end_seq != TCP_SKB_CB(skb)->seq + 1) { if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
struct sk_buff *skb2; struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (unlikely(skb_shared(skb)))
skb2 = skb_clone(skb, GFP_ATOMIC);
else
skb2 = skb_get(skb);
if (likely(skb2)) { if (likely(skb2)) {
skb_dst_drop(skb2); skb_dst_drop(skb2);
...@@ -214,12 +205,9 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, ...@@ -214,12 +205,9 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
} }
} }
tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq; tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
sk->sk_data_ready(sk); /* tcp_conn_request() is sending the SYNACK,
bh_unlock_sock(child); * and queues the child into listener accept queue.
/* Note: sock_put(child) will be done by tcp_conn_request()
* after SYNACK packet is sent.
*/ */
WARN_ON(!req->sk);
return child; return child;
} }
......
...@@ -6042,9 +6042,11 @@ static void tcp_openreq_init(struct request_sock *req, ...@@ -6042,9 +6042,11 @@ static void tcp_openreq_init(struct request_sock *req,
} }
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
struct sock *sk_listener) struct sock *sk_listener,
bool attach_listener)
{ {
struct request_sock *req = reqsk_alloc(ops, sk_listener); struct request_sock *req = reqsk_alloc(ops, sk_listener,
attach_listener);
if (req) { if (req) {
struct inet_request_sock *ireq = inet_rsk(req); struct inet_request_sock *ireq = inet_rsk(req);
...@@ -6143,7 +6145,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, ...@@ -6143,7 +6145,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop; goto drop;
} }
req = inet_reqsk_alloc(rsk_ops, sk); req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
if (!req) if (!req)
goto drop; goto drop;
...@@ -6229,12 +6231,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, ...@@ -6229,12 +6231,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_rsk(req)->txhash = net_tx_rndhash();
tcp_openreq_init_rwin(req, sk, dst); tcp_openreq_init_rwin(req, sk, dst);
if (!want_cookie) { if (!want_cookie) {
fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
tcp_reqsk_record_syn(sk, req, skb); tcp_reqsk_record_syn(sk, req, skb);
fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
} }
if (fastopen_sk) { if (fastopen_sk) {
af_ops->send_synack(fastopen_sk, dst, &fl, req, af_ops->send_synack(fastopen_sk, dst, &fl, req,
skb_get_queue_mapping(skb), &foc, false); skb_get_queue_mapping(skb), &foc, false);
/* Add the child socket directly into the accept queue */
inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
sk->sk_data_ready(sk);
bh_unlock_sock(fastopen_sk);
sock_put(fastopen_sk); sock_put(fastopen_sk);
} else { } else {
tcp_rsk(req)->tfo_listener = false; tcp_rsk(req)->tfo_listener = false;
......
...@@ -170,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ...@@ -170,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out; goto out;
ret = NULL; ret = NULL;
req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk); req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
if (!req) if (!req)
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment