Commit e66f33bd authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-active-reset'

Jason Xing says:

===================
tcp: completely support active reset

This time the patch series finally covers all the cases in the active
reset logic. After this, we can know the related exact reason(s).

v4
Link:
1. revise the changelog to avoid future confusion in patch [5/7] (Eric)
2. revise the changelog of patch [6/7] like above.
3. add reviewed-by tags (Eric)

v3
Link: https://lore.kernel.org/all/20240731120955.23542-1-kerneljasonxing@gmail.com/
1. introduce TCP_DISCONNECT_WITH_DATA reason (Eric)
2. use a better name 'TCP_KEEPALIVE_TIMEOUT' (Eric)
3. add three reviewed-by tags (Eric)

v2
Link: https://lore.kernel.org/all/20240730133513.99986-1-kerneljasonxing@gmail.com/
1. use RFC 9293 in the comment and changelog instead of old RFC 793
2. correct the comment and changelog in patch 5
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2c14119a ba0ca286
...@@ -17,6 +17,12 @@ ...@@ -17,6 +17,12 @@
FN(TCP_ABORT_ON_DATA) \ FN(TCP_ABORT_ON_DATA) \
FN(TCP_TIMEWAIT_SOCKET) \ FN(TCP_TIMEWAIT_SOCKET) \
FN(INVALID_SYN) \ FN(INVALID_SYN) \
FN(TCP_ABORT_ON_CLOSE) \
FN(TCP_ABORT_ON_LINGER) \
FN(TCP_ABORT_ON_MEMORY) \
FN(TCP_STATE) \
FN(TCP_KEEPALIVE_TIMEOUT) \
FN(TCP_DISCONNECT_WITH_DATA) \
FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EUNSPEC) \
FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_EMPTCP) \
FN(MPTCP_RST_ERESOURCE) \ FN(MPTCP_RST_ERESOURCE) \
...@@ -84,6 +90,39 @@ enum sk_rst_reason { ...@@ -84,6 +90,39 @@ enum sk_rst_reason {
* an error, send a reset" * an error, send a reset"
*/ */
SK_RST_REASON_INVALID_SYN, SK_RST_REASON_INVALID_SYN,
/**
* @SK_RST_REASON_TCP_ABORT_ON_CLOSE: abort on close
* corresponding to LINUX_MIB_TCPABORTONCLOSE
*/
SK_RST_REASON_TCP_ABORT_ON_CLOSE,
/**
* @SK_RST_REASON_TCP_ABORT_ON_LINGER: abort on linger
* corresponding to LINUX_MIB_TCPABORTONLINGER
*/
SK_RST_REASON_TCP_ABORT_ON_LINGER,
/**
* @SK_RST_REASON_TCP_ABORT_ON_MEMORY: abort on memory
* corresponding to LINUX_MIB_TCPABORTONMEMORY
*/
SK_RST_REASON_TCP_ABORT_ON_MEMORY,
/**
* @SK_RST_REASON_TCP_STATE: abort on tcp state
* Please see RFC 9293 for all possible reset conditions
*/
SK_RST_REASON_TCP_STATE,
/**
* @SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT: time to timeout
* When we have already run out of all the chances, which means
* keepalive timeout, we have to reset the connection
*/
SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT,
/**
* @SK_RST_REASON_TCP_DISCONNECT_WITH_DATA: disconnect when write
* queue is not empty
* It means user has written data into the write queue when doing
* disconnecting, so we have to send an RST.
*/
SK_RST_REASON_TCP_DISCONNECT_WITH_DATA,
/* Copy from include/uapi/linux/mptcp.h. /* Copy from include/uapi/linux/mptcp.h.
* These reset fields will not be changed since they adhere to * These reset fields will not be changed since they adhere to
......
...@@ -2833,7 +2833,7 @@ void __tcp_close(struct sock *sk, long timeout) ...@@ -2833,7 +2833,7 @@ void __tcp_close(struct sock *sk, long timeout)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE); tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, sk->sk_allocation, tcp_send_active_reset(sk, sk->sk_allocation,
SK_RST_REASON_NOT_SPECIFIED); SK_RST_REASON_TCP_ABORT_ON_CLOSE);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */ /* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0); sk->sk_prot->disconnect(sk, 0);
...@@ -2908,7 +2908,7 @@ void __tcp_close(struct sock *sk, long timeout) ...@@ -2908,7 +2908,7 @@ void __tcp_close(struct sock *sk, long timeout)
if (READ_ONCE(tp->linger2) < 0) { if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE); tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC, tcp_send_active_reset(sk, GFP_ATOMIC,
SK_RST_REASON_NOT_SPECIFIED); SK_RST_REASON_TCP_ABORT_ON_LINGER);
__NET_INC_STATS(sock_net(sk), __NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER); LINUX_MIB_TCPABORTONLINGER);
} else { } else {
...@@ -2927,7 +2927,7 @@ void __tcp_close(struct sock *sk, long timeout) ...@@ -2927,7 +2927,7 @@ void __tcp_close(struct sock *sk, long timeout)
if (tcp_check_oom(sk, 0)) { if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE); tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC, tcp_send_active_reset(sk, GFP_ATOMIC,
SK_RST_REASON_NOT_SPECIFIED); SK_RST_REASON_TCP_ABORT_ON_MEMORY);
__NET_INC_STATS(sock_net(sk), __NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY); LINUX_MIB_TCPABORTONMEMORY);
} else if (!check_net(sock_net(sk))) { } else if (!check_net(sock_net(sk))) {
...@@ -3025,13 +3025,16 @@ int tcp_disconnect(struct sock *sk, int flags) ...@@ -3025,13 +3025,16 @@ int tcp_disconnect(struct sock *sk, int flags)
inet_csk_listen_stop(sk); inet_csk_listen_stop(sk);
} else if (unlikely(tp->repair)) { } else if (unlikely(tp->repair)) {
WRITE_ONCE(sk->sk_err, ECONNABORTED); WRITE_ONCE(sk->sk_err, ECONNABORTED);
} else if (tcp_need_reset(old_state) || } else if (tcp_need_reset(old_state)) {
(tp->snd_nxt != tp->write_seq && tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_TCP_STATE);
(1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (tp->snd_nxt != tp->write_seq &&
(1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
/* The last check adjusts for discrepancy of Linux wrt. RFC /* The last check adjusts for discrepancy of Linux wrt. RFC
* states * states
*/ */
tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED); tcp_send_active_reset(sk, gfp_any(),
SK_RST_REASON_TCP_DISCONNECT_WITH_DATA);
WRITE_ONCE(sk->sk_err, ECONNRESET); WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (old_state == TCP_SYN_SENT) } else if (old_state == TCP_SYN_SENT)
WRITE_ONCE(sk->sk_err, ECONNRESET); WRITE_ONCE(sk->sk_err, ECONNRESET);
...@@ -4649,7 +4652,7 @@ int tcp_abort(struct sock *sk, int err) ...@@ -4649,7 +4652,7 @@ int tcp_abort(struct sock *sk, int err)
if (!sock_flag(sk, SOCK_DEAD)) { if (!sock_flag(sk, SOCK_DEAD)) {
if (tcp_need_reset(sk->sk_state)) if (tcp_need_reset(sk->sk_state))
tcp_send_active_reset(sk, GFP_ATOMIC, tcp_send_active_reset(sk, GFP_ATOMIC,
SK_RST_REASON_NOT_SPECIFIED); SK_RST_REASON_TCP_STATE);
tcp_done_with_error(sk, err); tcp_done_with_error(sk, err);
} }
......
...@@ -3649,7 +3649,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority, ...@@ -3649,7 +3649,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority,
/* skb of trace_tcp_send_reset() keeps the skb that caused RST, /* skb of trace_tcp_send_reset() keeps the skb that caused RST,
* skb here is different to the troublesome skb, so use NULL * skb here is different to the troublesome skb, so use NULL
*/ */
trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED); trace_tcp_send_reset(sk, NULL, reason);
} }
/* Send a crossed SYN-ACK during socket establishment. /* Send a crossed SYN-ACK during socket establishment.
......
...@@ -125,7 +125,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) ...@@ -125,7 +125,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
do_reset = true; do_reset = true;
if (do_reset) if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC, tcp_send_active_reset(sk, GFP_ATOMIC,
SK_RST_REASON_NOT_SPECIFIED); SK_RST_REASON_TCP_ABORT_ON_MEMORY);
tcp_done(sk); tcp_done(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1; return 1;
...@@ -779,7 +779,7 @@ static void tcp_keepalive_timer (struct timer_list *t) ...@@ -779,7 +779,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out; goto out;
} }
} }
tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_STATE);
goto death; goto death;
} }
...@@ -807,7 +807,7 @@ static void tcp_keepalive_timer (struct timer_list *t) ...@@ -807,7 +807,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
(user_timeout == 0 && (user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) { icsk->icsk_probes_out >= keepalive_probes(tp))) {
tcp_send_active_reset(sk, GFP_ATOMIC, tcp_send_active_reset(sk, GFP_ATOMIC,
SK_RST_REASON_NOT_SPECIFIED); SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT);
tcp_write_err(sk); tcp_write_err(sk);
goto out; goto out;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment