Commit cbe35adf authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'selftests-bpf-retire-bpf_tcp_helpers-h'

Martin KaFai Lau says:

====================
selftests/bpf: Retire bpf_tcp_helpers.h

From: Martin KaFai Lau <martin.lau@kernel.org>

The earlier commit 8e6d9ae2 ("selftests/bpf: Use bpf_tracing.h instead of bpf_tcp_helpers.h")
removed the bpf_tcp_helpers.h usages from the non networking tests.

This patch set is a continuation of this effort to retire
the bpf_tcp_helpers.h from the networking tests (mostly tcp-cc related).

The main usage of the bpf_tcp_helpers.h is the partial kernel
socket definitions (e.g. sock, tcp_sock). New fields are kept adding
back to those partial socket definitions while everything is available
in the vmlinux.h. The recent bpf_cc_cubic.c test tried to extend
bpf_tcp_helpers.c but eventually used the vmlinux.h instead. To avoid
this unnecessary detour for new tests and have one consistent way
of using the kernel sockets, this patch set retires the bpf_tcp_helpers.h
usages and consolidates the tests to use vmlinux.h instead.
====================

Link: https://lore.kernel.org/r/20240509175026.3423614-1-martin.lau@linux.devSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 00936709 6a650816
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BPF_TCP_HELPERS_H
#define __BPF_TCP_HELPERS_H
#include <stdbool.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>
#define BPF_STRUCT_OPS(name, args...) \
SEC("struct_ops/"#name) \
BPF_PROG(name, args)
#ifndef SOL_TCP
#define SOL_TCP 6
#endif
#ifndef TCP_CA_NAME_MAX
#define TCP_CA_NAME_MAX 16
#endif
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
struct sock_common {
unsigned char skc_state;
__u16 skc_num;
} __attribute__((preserve_access_index));
enum sk_pacing {
SK_PACING_NONE = 0,
SK_PACING_NEEDED = 1,
SK_PACING_FQ = 2,
};
struct sock {
struct sock_common __sk_common;
#define sk_state __sk_common.skc_state
unsigned long sk_pacing_rate;
__u32 sk_pacing_status; /* see enum sk_pacing */
} __attribute__((preserve_access_index));
struct inet_sock {
struct sock sk;
} __attribute__((preserve_access_index));
struct inet_connection_sock {
struct inet_sock icsk_inet;
__u8 icsk_ca_state:6,
icsk_ca_setsockopt:1,
icsk_ca_dst_locked:1;
struct {
__u8 pending;
} icsk_ack;
__u64 icsk_ca_priv[104 / sizeof(__u64)];
} __attribute__((preserve_access_index));
struct request_sock {
struct sock_common __req_common;
} __attribute__((preserve_access_index));
struct tcp_sock {
struct inet_connection_sock inet_conn;
__u32 rcv_nxt;
__u32 snd_nxt;
__u32 snd_una;
__u32 window_clamp;
__u8 ecn_flags;
__u32 delivered;
__u32 delivered_ce;
__u32 snd_cwnd;
__u32 snd_cwnd_cnt;
__u32 snd_cwnd_clamp;
__u32 snd_ssthresh;
__u8 syn_data:1, /* SYN includes data */
syn_fastopen:1, /* SYN includes Fast Open option */
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
save_syn:1, /* Save headers of SYN packet */
is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
syn_smc:1; /* SYN includes SMC */
__u32 max_packets_out;
__u32 lsndtime;
__u32 prior_cwnd;
__u64 tcp_mstamp; /* most recent packet received/sent */
bool is_mptcp;
} __attribute__((preserve_access_index));
static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
{
return (struct inet_connection_sock *)sk;
}
static __always_inline void *inet_csk_ca(const struct sock *sk)
{
return (void *)inet_csk(sk)->icsk_ca_priv;
}
static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
static __always_inline bool before(__u32 seq1, __u32 seq2)
{
return (__s32)(seq1-seq2) < 0;
}
#define after(seq2, seq1) before(seq1, seq2)
#define TCP_ECN_OK 1
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
enum inet_csk_ack_state_t {
ICSK_ACK_SCHED = 1,
ICSK_ACK_TIMER = 2,
ICSK_ACK_PUSHED = 4,
ICSK_ACK_PUSHED2 = 8,
ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
};
enum tcp_ca_event {
CA_EVENT_TX_START = 0,
CA_EVENT_CWND_RESTART = 1,
CA_EVENT_COMPLETE_CWR = 2,
CA_EVENT_LOSS = 3,
CA_EVENT_ECN_NO_CE = 4,
CA_EVENT_ECN_IS_CE = 5,
};
struct ack_sample {
__u32 pkts_acked;
__s32 rtt_us;
__u32 in_flight;
} __attribute__((preserve_access_index));
struct rate_sample {
__u64 prior_mstamp; /* starting timestamp for interval */
__u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
__s32 delivered; /* number of packets delivered over interval */
long interval_us; /* time for tp->delivered to incr "delivered" */
__u32 snd_interval_us; /* snd interval for delivered packets */
__u32 rcv_interval_us; /* rcv interval for delivered packets */
long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
int losses; /* number of packets marked lost upon ACK */
__u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
__u32 prior_in_flight; /* in flight before this ACK */
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
} __attribute__((preserve_access_index));
#define TCP_CA_NAME_MAX 16
#define TCP_CONG_NEEDS_ECN 0x2
struct tcp_congestion_ops {
char name[TCP_CA_NAME_MAX];
__u32 flags;
/* initialize private data (optional) */
void (*init)(struct sock *sk);
/* cleanup private data (optional) */
void (*release)(struct sock *sk);
/* return slow start threshold (required) */
__u32 (*ssthresh)(struct sock *sk);
/* do new cwnd calculation (required) */
void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
/* call before changing ca_state (optional) */
void (*set_state)(struct sock *sk, __u8 new_state);
/* call when cwnd event occurs (optional) */
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
/* call when ack arrives (optional) */
void (*in_ack_event)(struct sock *sk, __u32 flags);
/* new value of cwnd after loss (required) */
__u32 (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
/* override sysctl_tcp_min_tso_segs */
__u32 (*min_tso_segs)(struct sock *sk);
/* returns the multiplier used in tcp_sndbuf_expand (optional) */
__u32 (*sndbuf_expand)(struct sock *sk);
/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
*/
void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
void *owner;
};
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min_not_zero(x, y) ({ \
typeof(x) __x = (x); \
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
return tp->snd_cwnd < tp->snd_ssthresh;
}
static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
if (tcp_in_slow_start(tp))
return tp->snd_cwnd < 2 * tp->max_packets_out;
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
static __always_inline bool tcp_cc_eq(const char *a, const char *b)
{
int i;
for (i = 0; i < TCP_CA_NAME_MAX; i++) {
if (a[i] != b[i])
return false;
if (!a[i])
break;
}
return true;
}
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
struct mptcp_sock {
struct inet_connection_sock sk;
__u32 token;
struct sock *first;
char ca_name[TCP_CA_NAME_MAX];
} __attribute__((preserve_access_index));
#endif
...@@ -13,15 +13,9 @@ ...@@ -13,15 +13,9 @@
* kernel functions. * kernel functions.
*/ */
#include "vmlinux.h" #include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
#include "bpf_tracing_net.h"
#define BPF_STRUCT_OPS(name, args...) \
SEC("struct_ops/"#name) \
BPF_PROG(name, args)
#define USEC_PER_SEC 1000000UL #define USEC_PER_SEC 1000000UL
#define TCP_PACING_SS_RATIO (200) #define TCP_PACING_SS_RATIO (200)
...@@ -40,16 +34,6 @@ extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; ...@@ -40,16 +34,6 @@ extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym; extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
static struct inet_connection_sock *inet_csk(const struct sock *sk)
{
return (struct inet_connection_sock *)sk;
}
static struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
static bool before(__u32 seq1, __u32 seq2) static bool before(__u32 seq1, __u32 seq2)
{ {
return (__s32)(seq1-seq2) < 0; return (__s32)(seq1-seq2) < 0;
...@@ -126,18 +110,21 @@ static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) ...@@ -126,18 +110,21 @@ static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
return flag & FLAG_DATA_ACKED; return flag & FLAG_DATA_ACKED;
} }
void BPF_STRUCT_OPS(bpf_cubic_init, struct sock *sk) SEC("struct_ops")
void BPF_PROG(bpf_cubic_init, struct sock *sk)
{ {
cubictcp_init(sk); cubictcp_init(sk);
} }
void BPF_STRUCT_OPS(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) SEC("struct_ops")
void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{ {
cubictcp_cwnd_event(sk, event); cubictcp_cwnd_event(sk, event);
} }
void BPF_STRUCT_OPS(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag, SEC("struct_ops")
const struct rate_sample *rs) void BPF_PROG(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag,
const struct rate_sample *rs)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -163,23 +150,26 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag ...@@ -163,23 +150,26 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag
tcp_update_pacing_rate(sk); tcp_update_pacing_rate(sk);
} }
__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk) SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
{ {
return cubictcp_recalc_ssthresh(sk); return cubictcp_recalc_ssthresh(sk);
} }
void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state) SEC("struct_ops")
void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
{ {
cubictcp_state(sk, new_state); cubictcp_state(sk, new_state);
} }
void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, SEC("struct_ops")
const struct ack_sample *sample) void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
{ {
cubictcp_acked(sk, sample); cubictcp_acked(sk, sample);
} }
__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk) SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
{ {
return tcp_reno_undo_cwnd(sk); return tcp_reno_undo_cwnd(sk);
} }
......
...@@ -14,14 +14,22 @@ ...@@ -14,14 +14,22 @@
* "ca->ack_cnt / delta" operation. * "ca->ack_cnt / delta" operation.
*/ */
#include <linux/bpf.h> #include "bpf_tracing_net.h"
#include <linux/stddef.h> #include <bpf/bpf_tracing.h>
#include <linux/tcp.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
static bool before(__u32 seq1, __u32 seq2)
{
return (__s32)(seq1-seq2) < 0;
}
#define after(seq2, seq1) before(seq1, seq2)
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
* max_cwnd = snd_cwnd * beta * max_cwnd = snd_cwnd * beta
...@@ -70,7 +78,7 @@ static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ)) ...@@ -70,7 +78,7 @@ static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ))
/ (bic_scale * 10); / (bic_scale * 10);
/* BIC TCP Parameters */ /* BIC TCP Parameters */
struct bictcp { struct bpf_bictcp {
__u32 cnt; /* increase cwnd by 1 after ACKs */ __u32 cnt; /* increase cwnd by 1 after ACKs */
__u32 last_max_cwnd; /* last maximum snd_cwnd */ __u32 last_max_cwnd; /* last maximum snd_cwnd */
__u32 last_cwnd; /* the last snd_cwnd */ __u32 last_cwnd; /* the last snd_cwnd */
...@@ -91,7 +99,7 @@ struct bictcp { ...@@ -91,7 +99,7 @@ struct bictcp {
__u32 curr_rtt; /* the minimum rtt of current round */ __u32 curr_rtt; /* the minimum rtt of current round */
}; };
static inline void bictcp_reset(struct bictcp *ca) static void bictcp_reset(struct bpf_bictcp *ca)
{ {
ca->cnt = 0; ca->cnt = 0;
ca->last_max_cwnd = 0; ca->last_max_cwnd = 0;
...@@ -112,7 +120,7 @@ extern unsigned long CONFIG_HZ __kconfig; ...@@ -112,7 +120,7 @@ extern unsigned long CONFIG_HZ __kconfig;
#define USEC_PER_SEC 1000000UL #define USEC_PER_SEC 1000000UL
#define USEC_PER_JIFFY (USEC_PER_SEC / HZ) #define USEC_PER_JIFFY (USEC_PER_SEC / HZ)
static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) static __u64 div64_u64(__u64 dividend, __u64 divisor)
{ {
return dividend / divisor; return dividend / divisor;
} }
...@@ -120,7 +128,7 @@ static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) ...@@ -120,7 +128,7 @@ static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
#define div64_ul div64_u64 #define div64_ul div64_u64
#define BITS_PER_U64 (sizeof(__u64) * 8) #define BITS_PER_U64 (sizeof(__u64) * 8)
static __always_inline int fls64(__u64 x) static int fls64(__u64 x)
{ {
int num = BITS_PER_U64 - 1; int num = BITS_PER_U64 - 1;
...@@ -153,15 +161,15 @@ static __always_inline int fls64(__u64 x) ...@@ -153,15 +161,15 @@ static __always_inline int fls64(__u64 x)
return num + 1; return num + 1;
} }
static __always_inline __u32 bictcp_clock_us(const struct sock *sk) static __u32 bictcp_clock_us(const struct sock *sk)
{ {
return tcp_sk(sk)->tcp_mstamp; return tcp_sk(sk)->tcp_mstamp;
} }
static __always_inline void bictcp_hystart_reset(struct sock *sk) static void bictcp_hystart_reset(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk); struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->round_start = ca->last_ack = bictcp_clock_us(sk); ca->round_start = ca->last_ack = bictcp_clock_us(sk);
ca->end_seq = tp->snd_nxt; ca->end_seq = tp->snd_nxt;
...@@ -169,11 +177,10 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk) ...@@ -169,11 +177,10 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0; ca->sample_cnt = 0;
} }
/* "struct_ops/" prefix is a requirement */ SEC("struct_ops")
SEC("struct_ops/bpf_cubic_init")
void BPF_PROG(bpf_cubic_init, struct sock *sk) void BPF_PROG(bpf_cubic_init, struct sock *sk)
{ {
struct bictcp *ca = inet_csk_ca(sk); struct bpf_bictcp *ca = inet_csk_ca(sk);
bictcp_reset(ca); bictcp_reset(ca);
...@@ -184,12 +191,11 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk) ...@@ -184,12 +191,11 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh; tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
} }
/* "struct_ops" prefix is a requirement */ SEC("struct_ops")
SEC("struct_ops/bpf_cubic_cwnd_event")
void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{ {
if (event == CA_EVENT_TX_START) { if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk); struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 now = tcp_jiffies32; __u32 now = tcp_jiffies32;
__s32 delta; __s32 delta;
...@@ -230,7 +236,7 @@ static const __u8 v[] = { ...@@ -230,7 +236,7 @@ static const __u8 v[] = {
* Newton-Raphson iteration. * Newton-Raphson iteration.
* Avg err ~= 0.195% * Avg err ~= 0.195%
*/ */
static __always_inline __u32 cubic_root(__u64 a) static __u32 cubic_root(__u64 a)
{ {
__u32 x, b, shift; __u32 x, b, shift;
...@@ -263,8 +269,7 @@ static __always_inline __u32 cubic_root(__u64 a) ...@@ -263,8 +269,7 @@ static __always_inline __u32 cubic_root(__u64 a)
/* /*
* Compute congestion window to use. * Compute congestion window to use.
*/ */
static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd, static void bictcp_update(struct bpf_bictcp *ca, __u32 cwnd, __u32 acked)
__u32 acked)
{ {
__u32 delta, bic_target, max_cnt; __u32 delta, bic_target, max_cnt;
__u64 offs, t; __u64 offs, t;
...@@ -377,11 +382,11 @@ static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd, ...@@ -377,11 +382,11 @@ static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
ca->cnt = max(ca->cnt, 2U); ca->cnt = max(ca->cnt, 2U);
} }
/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */ SEC("struct_ops")
void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) void BPF_PROG(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk); struct bpf_bictcp *ca = inet_csk_ca(sk);
if (!tcp_is_cwnd_limited(sk)) if (!tcp_is_cwnd_limited(sk))
return; return;
...@@ -397,10 +402,11 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acke ...@@ -397,10 +402,11 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acke
tcp_cong_avoid_ai(tp, ca->cnt, acked); tcp_cong_avoid_ai(tp, ca->cnt, acked);
} }
__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk) SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk); struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->epoch_start = 0; /* end of epoch */ ca->epoch_start = 0; /* end of epoch */
...@@ -414,7 +420,8 @@ __u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk) ...@@ -414,7 +420,8 @@ __u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
} }
void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state) SEC("struct_ops")
void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
{ {
if (new_state == TCP_CA_Loss) { if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk)); bictcp_reset(inet_csk_ca(sk));
...@@ -433,7 +440,7 @@ void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state) ...@@ -433,7 +440,7 @@ void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
* We apply another 100% factor because @rate is doubled at this point. * We apply another 100% factor because @rate is doubled at this point.
* We cap the cushion to 1ms. * We cap the cushion to 1ms.
*/ */
static __always_inline __u32 hystart_ack_delay(struct sock *sk) static __u32 hystart_ack_delay(struct sock *sk)
{ {
unsigned long rate; unsigned long rate;
...@@ -444,10 +451,10 @@ static __always_inline __u32 hystart_ack_delay(struct sock *sk) ...@@ -444,10 +451,10 @@ static __always_inline __u32 hystart_ack_delay(struct sock *sk)
div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate)); div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
} }
static __always_inline void hystart_update(struct sock *sk, __u32 delay) static void hystart_update(struct sock *sk, __u32 delay)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk); struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 threshold; __u32 threshold;
if (hystart_detect & HYSTART_ACK_TRAIN) { if (hystart_detect & HYSTART_ACK_TRAIN) {
...@@ -492,11 +499,11 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay) ...@@ -492,11 +499,11 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
int bpf_cubic_acked_called = 0; int bpf_cubic_acked_called = 0;
void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, SEC("struct_ops")
const struct ack_sample *sample) void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk); struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 delay; __u32 delay;
bpf_cubic_acked_called = 1; bpf_cubic_acked_called = 1;
...@@ -524,7 +531,8 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, ...@@ -524,7 +531,8 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk) SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
{ {
return tcp_reno_undo_cwnd(sk); return tcp_reno_undo_cwnd(sk);
} }
......
...@@ -6,15 +6,23 @@ ...@@ -6,15 +6,23 @@
* the kernel BPF logic. * the kernel BPF logic.
*/ */
#include <stddef.h> #include "bpf_tracing_net.h"
#include <linux/bpf.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include <errno.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
#ifndef EBUSY
#define EBUSY 16
#endif
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min_not_zero(x, y) ({ \
typeof(x) __x = (x); \
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
static bool before(__u32 seq1, __u32 seq2)
{
return (__s32)(seq1-seq2) < 0;
}
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
...@@ -35,7 +43,7 @@ struct { ...@@ -35,7 +43,7 @@ struct {
#define DCTCP_MAX_ALPHA 1024U #define DCTCP_MAX_ALPHA 1024U
struct dctcp { struct bpf_dctcp {
__u32 old_delivered; __u32 old_delivered;
__u32 old_delivered_ce; __u32 old_delivered_ce;
__u32 prior_rcv_nxt; __u32 prior_rcv_nxt;
...@@ -48,8 +56,7 @@ struct dctcp { ...@@ -48,8 +56,7 @@ struct dctcp {
static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */ static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */
static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA; static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA;
static __always_inline void dctcp_reset(const struct tcp_sock *tp, static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca)
struct dctcp *ca)
{ {
ca->next_seq = tp->snd_nxt; ca->next_seq = tp->snd_nxt;
...@@ -57,11 +64,11 @@ static __always_inline void dctcp_reset(const struct tcp_sock *tp, ...@@ -57,11 +64,11 @@ static __always_inline void dctcp_reset(const struct tcp_sock *tp,
ca->old_delivered_ce = tp->delivered_ce; ca->old_delivered_ce = tp->delivered_ce;
} }
SEC("struct_ops/dctcp_init") SEC("struct_ops")
void BPF_PROG(dctcp_init, struct sock *sk) void BPF_PROG(dctcp_init, struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk); struct bpf_dctcp *ca = inet_csk_ca(sk);
int *stg; int *stg;
if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) { if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
...@@ -104,21 +111,21 @@ void BPF_PROG(dctcp_init, struct sock *sk) ...@@ -104,21 +111,21 @@ void BPF_PROG(dctcp_init, struct sock *sk)
dctcp_reset(tp, ca); dctcp_reset(tp, ca);
} }
SEC("struct_ops/dctcp_ssthresh") SEC("struct_ops")
__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) __u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
{ {
struct dctcp *ca = inet_csk_ca(sk); struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd; ca->loss_cwnd = tp->snd_cwnd;
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
} }
SEC("struct_ops/dctcp_update_alpha") SEC("struct_ops")
void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk); struct bpf_dctcp *ca = inet_csk_ca(sk);
/* Expired RTT */ /* Expired RTT */
if (!before(tp->snd_una, ca->next_seq)) { if (!before(tp->snd_una, ca->next_seq)) {
...@@ -144,16 +151,16 @@ void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) ...@@ -144,16 +151,16 @@ void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
} }
} }
static __always_inline void dctcp_react_to_loss(struct sock *sk) static void dctcp_react_to_loss(struct sock *sk)
{ {
struct dctcp *ca = inet_csk_ca(sk); struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd; ca->loss_cwnd = tp->snd_cwnd;
tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
} }
SEC("struct_ops/dctcp_state") SEC("struct_ops")
void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
{ {
if (new_state == TCP_CA_Recovery && if (new_state == TCP_CA_Recovery &&
...@@ -164,7 +171,7 @@ void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) ...@@ -164,7 +171,7 @@ void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
*/ */
} }
static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) static void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -179,9 +186,8 @@ static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) ...@@ -179,9 +186,8 @@ static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
* S: 0 <- last pkt was non-CE * S: 0 <- last pkt was non-CE
* 1 <- last pkt was CE * 1 <- last pkt was CE
*/ */
static __always_inline static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, __u32 *prior_rcv_nxt, __u32 *ce_state)
__u32 *prior_rcv_nxt, __u32 *ce_state)
{ {
__u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0; __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
...@@ -201,10 +207,10 @@ void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, ...@@ -201,10 +207,10 @@ void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
dctcp_ece_ack_cwr(sk, new_ce_state); dctcp_ece_ack_cwr(sk, new_ce_state);
} }
SEC("struct_ops/dctcp_cwnd_event") SEC("struct_ops")
void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
{ {
struct dctcp *ca = inet_csk_ca(sk); struct bpf_dctcp *ca = inet_csk_ca(sk);
switch (ev) { switch (ev) {
case CA_EVENT_ECN_IS_CE: case CA_EVENT_ECN_IS_CE:
...@@ -220,17 +226,17 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) ...@@ -220,17 +226,17 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
} }
} }
SEC("struct_ops/dctcp_cwnd_undo") SEC("struct_ops")
__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
{ {
const struct dctcp *ca = inet_csk_ca(sk); const struct bpf_dctcp *ca = inet_csk_ca(sk);
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
} }
extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
SEC("struct_ops/dctcp_reno_cong_avoid") SEC("struct_ops")
void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{ {
tcp_reno_cong_avoid(sk, ack, acked); tcp_reno_cong_avoid(sk, ack, acked);
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */ /* Copyright (c) 2021 Facebook */
#include <stddef.h> #include "bpf_tracing_net.h"
#include <linux/bpf.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
const char cubic[] = "cubic"; const char cubic[] = "cubic";
void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk) SEC("struct_ops")
void BPF_PROG(dctcp_nouse_release, struct sock *sk)
{ {
bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION, bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
(void *)cubic, sizeof(cubic)); (void *)cubic, sizeof(cubic));
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h> #include "bpf_tracing_net.h"
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "X"; char _license[] SEC("license") = "X";
void BPF_STRUCT_OPS(nogpltcp_init, struct sock *sk) SEC("struct_ops")
void BPF_PROG(nogpltcp_init, struct sock *sk)
{ {
} }
......
...@@ -2,6 +2,9 @@ ...@@ -2,6 +2,9 @@
#ifndef __BPF_TRACING_NET_H__ #ifndef __BPF_TRACING_NET_H__
#define __BPF_TRACING_NET_H__ #define __BPF_TRACING_NET_H__
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
#define AF_INET 2 #define AF_INET 2
#define AF_INET6 10 #define AF_INET6 10
...@@ -22,6 +25,7 @@ ...@@ -22,6 +25,7 @@
#define IP_TOS 1 #define IP_TOS 1
#define SOL_IPV6 41
#define IPV6_TCLASS 67 #define IPV6_TCLASS 67
#define IPV6_AUTOFLOWLABEL 70 #define IPV6_AUTOFLOWLABEL 70
...@@ -46,6 +50,13 @@ ...@@ -46,6 +50,13 @@
#define TCP_CA_NAME_MAX 16 #define TCP_CA_NAME_MAX 16
#define TCP_NAGLE_OFF 1 #define TCP_NAGLE_OFF 1
#define TCP_ECN_OK 1
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
#define TCP_CONG_NEEDS_ECN 0x2
#define ICSK_TIME_RETRANS 1 #define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3 #define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5 #define ICSK_TIME_LOSS_PROBE 5
...@@ -129,4 +140,35 @@ ...@@ -129,4 +140,35 @@
#define tcp_jiffies32 ((__u32)bpf_jiffies64()) #define tcp_jiffies32 ((__u32)bpf_jiffies64())
static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
{
return (struct inet_connection_sock *)sk;
}
static inline void *inet_csk_ca(const struct sock *sk)
{
return (void *)inet_csk(sk)->icsk_ca_priv;
}
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
return tp->snd_cwnd < tp->snd_ssthresh;
}
static inline bool tcp_is_cwnd_limited(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
if (tcp_in_slow_start(tp))
return tp->snd_cwnd < 2 * tp->max_packets_out;
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
#endif #endif
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h> #include <bpf/bpf_endian.h>
#include "bpf_tcp_helpers.h"
#define SRC_REWRITE_IP4 0x7f000004U #define SRC_REWRITE_IP4 0x7f000004U
#define DST_REWRITE_IP4 0x7f000001U #define DST_REWRITE_IP4 0x7f000001U
#define DST_REWRITE_PORT4 4444 #define DST_REWRITE_PORT4 4444
...@@ -32,6 +30,10 @@ ...@@ -32,6 +30,10 @@
#define IFNAMSIZ 16 #define IFNAMSIZ 16
#endif #endif
#ifndef SOL_TCP
#define SOL_TCP 6
#endif
__attribute__ ((noinline)) __weak __attribute__ ((noinline)) __weak
int do_bind(struct bpf_sock_addr *ctx) int do_bind(struct bpf_sock_addr *ctx)
{ {
......
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include "bpf_tracing_net.h"
struct bpf_fib_lookup fib_params = {}; struct bpf_fib_lookup fib_params = {};
int fib_lookup_ret = 0; int fib_lookup_ret = 0;
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
/* Copyright (c) 2020, Tessares SA. */ /* Copyright (c) 2020, Tessares SA. */
/* Copyright (c) 2022, SUSE. */ /* Copyright (c) 2022, SUSE. */
#include <linux/bpf.h> #include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include "bpf_tcp_helpers.h" #include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
__u32 token = 0; __u32 token = 0;
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */ /* Copyright (c) 2021 Facebook */
#include <string.h> #include "bpf_tracing_net.h"
#include <linux/tcp.h>
#include <netinet/in.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
__s32 page_size = 0; __s32 page_size = 0;
const char cc_reno[TCP_CA_NAME_MAX] = "reno";
const char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
SEC("cgroup/setsockopt") SEC("cgroup/setsockopt")
int sockopt_qos_to_cc(struct bpf_sockopt *ctx) int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
{ {
void *optval_end = ctx->optval_end; void *optval_end = ctx->optval_end;
int *optval = ctx->optval; int *optval = ctx->optval;
char buf[TCP_CA_NAME_MAX]; char buf[TCP_CA_NAME_MAX];
char cc_reno[TCP_CA_NAME_MAX] = "reno";
char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS) if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
goto out; goto out;
...@@ -29,11 +25,11 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx) ...@@ -29,11 +25,11 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf))) if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
return 0; return 0;
if (!tcp_cc_eq(buf, cc_cubic)) if (bpf_strncmp(buf, sizeof(buf), cc_cubic))
return 0; return 0;
if (*optval == 0x2d) { if (*optval == 0x2d) {
if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno, if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, (void *)&cc_reno,
sizeof(cc_reno))) sizeof(cc_reno)))
return 0; return 0;
} }
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h" #include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
static inline struct tcp_sock *tcp_sk(const struct sock *sk) SEC("struct_ops")
{
return (struct tcp_sock *)sk;
}
SEC("struct_ops/incompl_cong_ops_ssthresh")
__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk) __u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
{ {
return tcp_sk(sk)->snd_ssthresh; return tcp_sk(sk)->snd_ssthresh;
} }
SEC("struct_ops/incompl_cong_ops_undo_cwnd") SEC("struct_ops")
__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk) __u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
{ {
return tcp_sk(sk)->snd_cwnd; return tcp_sk(sk)->snd_cwnd;
......
...@@ -27,7 +27,7 @@ extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym; ...@@ -27,7 +27,7 @@ extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym;
extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym; extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
SEC("struct_ops/init") SEC("struct_ops")
void BPF_PROG(init, struct sock *sk) void BPF_PROG(init, struct sock *sk)
{ {
bbr_init(sk); bbr_init(sk);
...@@ -35,38 +35,38 @@ void BPF_PROG(init, struct sock *sk) ...@@ -35,38 +35,38 @@ void BPF_PROG(init, struct sock *sk)
cubictcp_init(sk); cubictcp_init(sk);
} }
SEC("struct_ops/in_ack_event") SEC("struct_ops")
void BPF_PROG(in_ack_event, struct sock *sk, u32 flags) void BPF_PROG(in_ack_event, struct sock *sk, u32 flags)
{ {
dctcp_update_alpha(sk, flags); dctcp_update_alpha(sk, flags);
} }
SEC("struct_ops/cong_control") SEC("struct_ops")
void BPF_PROG(cong_control, struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) void BPF_PROG(cong_control, struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
{ {
bbr_main(sk, ack, flag, rs); bbr_main(sk, ack, flag, rs);
} }
SEC("struct_ops/cong_avoid") SEC("struct_ops")
void BPF_PROG(cong_avoid, struct sock *sk, u32 ack, u32 acked) void BPF_PROG(cong_avoid, struct sock *sk, u32 ack, u32 acked)
{ {
cubictcp_cong_avoid(sk, ack, acked); cubictcp_cong_avoid(sk, ack, acked);
} }
SEC("struct_ops/sndbuf_expand") SEC("struct_ops")
u32 BPF_PROG(sndbuf_expand, struct sock *sk) u32 BPF_PROG(sndbuf_expand, struct sock *sk)
{ {
return bbr_sndbuf_expand(sk); return bbr_sndbuf_expand(sk);
} }
SEC("struct_ops/undo_cwnd") SEC("struct_ops")
u32 BPF_PROG(undo_cwnd, struct sock *sk) u32 BPF_PROG(undo_cwnd, struct sock *sk)
{ {
bbr_undo_cwnd(sk); bbr_undo_cwnd(sk);
return dctcp_cwnd_undo(sk); return dctcp_cwnd_undo(sk);
} }
SEC("struct_ops/cwnd_event") SEC("struct_ops")
void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event) void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event)
{ {
bbr_cwnd_event(sk, event); bbr_cwnd_event(sk, event);
...@@ -74,7 +74,7 @@ void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event) ...@@ -74,7 +74,7 @@ void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event)
cubictcp_cwnd_event(sk, event); cubictcp_cwnd_event(sk, event);
} }
SEC("struct_ops/ssthresh") SEC("struct_ops")
u32 BPF_PROG(ssthresh, struct sock *sk) u32 BPF_PROG(ssthresh, struct sock *sk)
{ {
bbr_ssthresh(sk); bbr_ssthresh(sk);
...@@ -82,13 +82,13 @@ u32 BPF_PROG(ssthresh, struct sock *sk) ...@@ -82,13 +82,13 @@ u32 BPF_PROG(ssthresh, struct sock *sk)
return cubictcp_recalc_ssthresh(sk); return cubictcp_recalc_ssthresh(sk);
} }
SEC("struct_ops/min_tso_segs") SEC("struct_ops")
u32 BPF_PROG(min_tso_segs, struct sock *sk) u32 BPF_PROG(min_tso_segs, struct sock *sk)
{ {
return bbr_min_tso_segs(sk); return bbr_min_tso_segs(sk);
} }
SEC("struct_ops/set_state") SEC("struct_ops")
void BPF_PROG(set_state, struct sock *sk, u8 new_state) void BPF_PROG(set_state, struct sock *sk, u8 new_state)
{ {
bbr_set_state(sk, new_state); bbr_set_state(sk, new_state);
...@@ -96,7 +96,7 @@ void BPF_PROG(set_state, struct sock *sk, u8 new_state) ...@@ -96,7 +96,7 @@ void BPF_PROG(set_state, struct sock *sk, u8 new_state)
cubictcp_state(sk, new_state); cubictcp_state(sk, new_state);
} }
SEC("struct_ops/pkts_acked") SEC("struct_ops")
void BPF_PROG(pkts_acked, struct sock *sk, const struct ack_sample *sample) void BPF_PROG(pkts_acked, struct sock *sk, const struct ack_sample *sample)
{ {
cubictcp_acked(sk, sample); cubictcp_acked(sk, sample);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
SEC("struct_ops/unsupp_cong_op_get_info") SEC("struct_ops")
size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr, size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info) union tcp_cc_info *info)
{ {
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h" #include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
...@@ -10,36 +9,31 @@ char _license[] SEC("license") = "GPL"; ...@@ -10,36 +9,31 @@ char _license[] SEC("license") = "GPL";
int ca1_cnt = 0; int ca1_cnt = 0;
int ca2_cnt = 0; int ca2_cnt = 0;
static inline struct tcp_sock *tcp_sk(const struct sock *sk) SEC("struct_ops")
{
return (struct tcp_sock *)sk;
}
SEC("struct_ops/ca_update_1_init")
void BPF_PROG(ca_update_1_init, struct sock *sk) void BPF_PROG(ca_update_1_init, struct sock *sk)
{ {
ca1_cnt++; ca1_cnt++;
} }
SEC("struct_ops/ca_update_2_init") SEC("struct_ops")
void BPF_PROG(ca_update_2_init, struct sock *sk) void BPF_PROG(ca_update_2_init, struct sock *sk)
{ {
ca2_cnt++; ca2_cnt++;
} }
SEC("struct_ops/ca_update_cong_control") SEC("struct_ops")
void BPF_PROG(ca_update_cong_control, struct sock *sk, void BPF_PROG(ca_update_cong_control, struct sock *sk,
const struct rate_sample *rs) const struct rate_sample *rs)
{ {
} }
SEC("struct_ops/ca_update_ssthresh") SEC("struct_ops")
__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) __u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
{ {
return tcp_sk(sk)->snd_ssthresh; return tcp_sk(sk)->snd_ssthresh;
} }
SEC("struct_ops/ca_update_undo_cwnd") SEC("struct_ops")
__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) __u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
{ {
return tcp_sk(sk)->snd_cwnd; return tcp_sk(sk)->snd_cwnd;
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h" #include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
...@@ -11,22 +10,17 @@ char _license[] SEC("license") = "GPL"; ...@@ -11,22 +10,17 @@ char _license[] SEC("license") = "GPL";
#define min(a, b) ((a) < (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b))
static inline struct tcp_sock *tcp_sk(const struct sock *sk) static unsigned int tcp_left_out(const struct tcp_sock *tp)
{
return (struct tcp_sock *)sk;
}
static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
{ {
return tp->sacked_out + tp->lost_out; return tp->sacked_out + tp->lost_out;
} }
static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) static unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
{ {
return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
} }
SEC("struct_ops/write_sk_pacing_init") SEC("struct_ops")
void BPF_PROG(write_sk_pacing_init, struct sock *sk) void BPF_PROG(write_sk_pacing_init, struct sock *sk)
{ {
#ifdef ENABLE_ATOMICS_TESTS #ifdef ENABLE_ATOMICS_TESTS
...@@ -37,7 +31,7 @@ void BPF_PROG(write_sk_pacing_init, struct sock *sk) ...@@ -37,7 +31,7 @@ void BPF_PROG(write_sk_pacing_init, struct sock *sk)
#endif #endif
} }
SEC("struct_ops/write_sk_pacing_cong_control") SEC("struct_ops")
void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk, void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
const struct rate_sample *rs) const struct rate_sample *rs)
{ {
...@@ -49,13 +43,13 @@ void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk, ...@@ -49,13 +43,13 @@ void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1; tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1;
} }
SEC("struct_ops/write_sk_pacing_ssthresh") SEC("struct_ops")
__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk) __u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
{ {
return tcp_sk(sk)->snd_ssthresh; return tcp_sk(sk)->snd_ssthresh;
} }
SEC("struct_ops/write_sk_pacing_undo_cwnd") SEC("struct_ops")
__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk) __u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
{ {
return tcp_sk(sk)->snd_cwnd; return tcp_sk(sk)->snd_cwnd;
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */ /* Copyright (c) 2020 Facebook */
#include <string.h> #include "bpf_tracing_net.h"
#include <errno.h>
#include <netinet/in.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h> #include <bpf/bpf_endian.h>
#include "bpf_tcp_helpers.h"
#ifndef ENOENT
#define ENOENT 2
#endif
struct sockaddr_in6 srv_sa6 = {}; struct sockaddr_in6 srv_sa6 = {};
__u16 listen_tp_sport = 0; __u16 listen_tp_sport = 0;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <bpf/bpf_endian.h> #include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <linux/ip.h> #include <linux/ip.h>
#include "bpf_tracing_net.h" #include <linux/if_ether.h>
/* We don't care about whether the packet can be received by network stack. /* We don't care about whether the packet can be received by network stack.
* Just care if the packet is sent to the correct device at correct direction * Just care if the packet is sent to the correct device at correct direction
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h> #include <bpf/bpf_endian.h>
#include "bpf_tcp_helpers.h"
enum bpf_linum_array_idx { enum bpf_linum_array_idx {
EGRESS_LINUM_IDX, EGRESS_LINUM_IDX,
...@@ -42,6 +41,10 @@ struct { ...@@ -42,6 +41,10 @@ struct {
__type(value, struct bpf_spinlock_cnt); __type(value, struct bpf_spinlock_cnt);
} sk_pkt_out_cnt10 SEC(".maps"); } sk_pkt_out_cnt10 SEC(".maps");
struct tcp_sock {
__u32 lsndtime;
} __attribute__((preserve_access_index));
struct bpf_tcp_sock listen_tp = {}; struct bpf_tcp_sock listen_tp = {};
struct sockaddr_in6 srv_sa6 = {}; struct sockaddr_in6 srv_sa6 = {};
struct bpf_tcp_sock cli_tp = {}; struct bpf_tcp_sock cli_tp = {};
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <stddef.h> #include "bpf_tracing_net.h"
#include <string.h>
#include <netinet/in.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/tcp.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h> #include <bpf/bpf_endian.h>
#include "bpf_tcp_helpers.h"
#include "test_tcpbpf.h" #include "test_tcpbpf.h"
struct tcpbpf_globals global = {}; struct tcpbpf_globals global = {};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment