Commit 23729ff2 authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Daniel Borkmann

bpf: add BPF_CGROUP_SOCK_OPS callback that is executed on every RTT

Performance impact should be minimal because it's under a new
BPF_SOCK_OPS_RTT_CB_FLAG flag that has to be explicitly enabled.
Suggested-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Priyaranjan Jha <priyarjha@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Acked-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarStanislav Fomichev <sdf@google.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent d2f5bbbc
...@@ -2221,6 +2221,14 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) ...@@ -2221,6 +2221,14 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
} }
static inline void tcp_bpf_rtt(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG))
tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL);
}
#if IS_ENABLED(CONFIG_SMC) #if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc; extern struct static_key_false tcp_have_smc;
#endif #endif
......
...@@ -1770,6 +1770,7 @@ union bpf_attr { ...@@ -1770,6 +1770,7 @@ union bpf_attr {
* * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
* * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
* * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
* * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
* *
* Therefore, this function can be used to clear a callback flag by * Therefore, this function can be used to clear a callback flag by
* setting the appropriate bit to zero. e.g. to disable the RTO * setting the appropriate bit to zero. e.g. to disable the RTO
...@@ -3314,7 +3315,8 @@ struct bpf_sock_ops { ...@@ -3314,7 +3315,8 @@ struct bpf_sock_ops {
#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) #define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently #define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
* supported cb flags * supported cb flags
*/ */
...@@ -3369,6 +3371,8 @@ enum { ...@@ -3369,6 +3371,8 @@ enum {
BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
* socket transition to LISTEN state. * socket transition to LISTEN state.
*/ */
BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
*/
}; };
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
......
...@@ -778,6 +778,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) ...@@ -778,6 +778,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2; tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
tp->rtt_seq = tp->snd_nxt; tp->rtt_seq = tp->snd_nxt;
tp->mdev_max_us = tcp_rto_min_us(sk); tp->mdev_max_us = tcp_rto_min_us(sk);
tcp_bpf_rtt(sk);
} }
} else { } else {
/* no previous measure. */ /* no previous measure. */
...@@ -786,6 +788,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) ...@@ -786,6 +788,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk)); tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
tp->mdev_max_us = tp->rttvar_us; tp->mdev_max_us = tp->rttvar_us;
tp->rtt_seq = tp->snd_nxt; tp->rtt_seq = tp->snd_nxt;
tcp_bpf_rtt(sk);
} }
tp->srtt_us = max(1U, srtt); tp->srtt_us = max(1U, srtt);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment