Commit e0f9759f authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: try to keep packet if SYN_RCV race is lost

배석진 reported that in some situations, packets for a given 5-tuple
end up being processed by different CPUS.

This involves RPS, and fragmentation.

배석진 is seeing packet drops when a SYN_RECV request socket is
moved into ESTABLISH state. Other states are protected by socket lock.

This is caused by a CPU losing the race, and simply not caring enough.

Since this seems to occur frequently, we can do better and perform
a second lookup.

Note that all needed memory barriers are already in the existing code,
thanks to the spin_lock()/spin_unlock() pair in inet_ehash_insert()
and reqsk_put(). The second lookup must find the new socket,
unless it has already been accepted and closed by another cpu.

Note that the fragmentation could be avoided in the first place by
use of a correct TCP MSS option in the SYN{ACK} packet, but this
does not mean we can not be more robust.

Many thanks to 배석진 for a very detailed analysis.
Reported-by: default avatar배석진 <soukjin.bae@samsung.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1d631583
...@@ -374,7 +374,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, ...@@ -374,7 +374,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sk_buff *skb, struct sk_buff *skb,
const struct tcphdr *th); const struct tcphdr *th);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen); struct request_sock *req, bool fastopen,
bool *lost_race);
int tcp_child_process(struct sock *parent, struct sock *child, int tcp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb); struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk); void tcp_enter_loss(struct sock *sk);
......
...@@ -5870,10 +5870,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) ...@@ -5870,10 +5870,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tp->rx_opt.saw_tstamp = 0; tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk; req = tp->fastopen_rsk;
if (req) { if (req) {
bool req_stolen;
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1); sk->sk_state != TCP_FIN_WAIT1);
if (!tcp_check_req(sk, skb, req, true)) if (!tcp_check_req(sk, skb, req, true, &req_stolen))
goto discard; goto discard;
} }
......
...@@ -1672,6 +1672,7 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -1672,6 +1672,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (sk->sk_state == TCP_NEW_SYN_RECV) { if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk); struct request_sock *req = inet_reqsk(sk);
bool req_stolen = false;
struct sock *nsk; struct sock *nsk;
sk = req->rsk_listener; sk = req->rsk_listener;
...@@ -1694,10 +1695,20 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -1694,10 +1695,20 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data; th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb); iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th); tcp_v4_fill_cb(skb, iph, th);
nsk = tcp_check_req(sk, skb, req, false); nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
} }
if (!nsk) { if (!nsk) {
reqsk_put(req); reqsk_put(req);
if (req_stolen) {
/* Another cpu got exclusive access to req
* and created a full blown socket.
* Try to feed this packet to this socket
* instead of discarding it.
*/
tcp_v4_restore_cb(skb);
sock_put(sk);
goto lookup;
}
goto discard_and_relse; goto discard_and_relse;
} }
if (nsk == sk) { if (nsk == sk) {
......
...@@ -578,7 +578,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child); ...@@ -578,7 +578,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, struct request_sock *req,
bool fastopen) bool fastopen, bool *req_stolen)
{ {
struct tcp_options_received tmp_opt; struct tcp_options_received tmp_opt;
struct sock *child; struct sock *child;
...@@ -785,6 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, ...@@ -785,6 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
sock_rps_save_rxhash(child, skb); sock_rps_save_rxhash(child, skb);
tcp_synack_rtt_meas(child, req); tcp_synack_rtt_meas(child, req);
*req_stolen = !own_req;
return inet_csk_complete_hashdance(sk, child, req, own_req); return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow: listen_overflow:
......
...@@ -1451,6 +1451,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) ...@@ -1451,6 +1451,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
if (sk->sk_state == TCP_NEW_SYN_RECV) { if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk); struct request_sock *req = inet_reqsk(sk);
bool req_stolen = false;
struct sock *nsk; struct sock *nsk;
sk = req->rsk_listener; sk = req->rsk_listener;
...@@ -1470,10 +1471,20 @@ static int tcp_v6_rcv(struct sk_buff *skb) ...@@ -1470,10 +1471,20 @@ static int tcp_v6_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data; th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb); hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th); tcp_v6_fill_cb(skb, hdr, th);
nsk = tcp_check_req(sk, skb, req, false); nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
} }
if (!nsk) { if (!nsk) {
reqsk_put(req); reqsk_put(req);
if (req_stolen) {
/* Another cpu got exclusive access to req
* and created a full blown socket.
* Try to feed this packet to this socket
* instead of discarding it.
*/
tcp_v6_restore_cb(skb);
sock_put(sk);
goto lookup;
}
goto discard_and_relse; goto discard_and_relse;
} }
if (nsk == sk) { if (nsk == sk) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment