Commit cbe35adf authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'selftests-bpf-retire-bpf_tcp_helpers-h'

Martin KaFai Lau says:

====================
selftests/bpf: Retire bpf_tcp_helpers.h

From: Martin KaFai Lau <martin.lau@kernel.org>

The earlier commit 8e6d9ae2 ("selftests/bpf: Use bpf_tracing.h instead of bpf_tcp_helpers.h")
removed the bpf_tcp_helpers.h usages from the non networking tests.

This patch set is a continuation of this effort to retire
the bpf_tcp_helpers.h from the networking tests (mostly tcp-cc related).

The main usage of the bpf_tcp_helpers.h is the partial kernel
socket definitions (e.g. sock, tcp_sock). New fields are kept adding
back to those partial socket definitions while everything is available
in the vmlinux.h. The recent bpf_cc_cubic.c test tried to extend
bpf_tcp_helpers.c but eventually used the vmlinux.h instead. To avoid
this unnecessary detour for new tests and have one consistent way
of using the kernel sockets, this patch set retires the bpf_tcp_helpers.h
usages and consolidates the tests to use vmlinux.h instead.
====================

Link: https://lore.kernel.org/r/20240509175026.3423614-1-martin.lau@linux.devSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 00936709 6a650816
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BPF_TCP_HELPERS_H
#define __BPF_TCP_HELPERS_H
#include <stdbool.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>
#define BPF_STRUCT_OPS(name, args...) \
SEC("struct_ops/"#name) \
BPF_PROG(name, args)
#ifndef SOL_TCP
#define SOL_TCP 6
#endif
#ifndef TCP_CA_NAME_MAX
#define TCP_CA_NAME_MAX 16
#endif
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
struct sock_common {
unsigned char skc_state;
__u16 skc_num;
} __attribute__((preserve_access_index));
enum sk_pacing {
SK_PACING_NONE = 0,
SK_PACING_NEEDED = 1,
SK_PACING_FQ = 2,
};
struct sock {
struct sock_common __sk_common;
#define sk_state __sk_common.skc_state
unsigned long sk_pacing_rate;
__u32 sk_pacing_status; /* see enum sk_pacing */
} __attribute__((preserve_access_index));
struct inet_sock {
struct sock sk;
} __attribute__((preserve_access_index));
struct inet_connection_sock {
struct inet_sock icsk_inet;
__u8 icsk_ca_state:6,
icsk_ca_setsockopt:1,
icsk_ca_dst_locked:1;
struct {
__u8 pending;
} icsk_ack;
__u64 icsk_ca_priv[104 / sizeof(__u64)];
} __attribute__((preserve_access_index));
struct request_sock {
struct sock_common __req_common;
} __attribute__((preserve_access_index));
struct tcp_sock {
struct inet_connection_sock inet_conn;
__u32 rcv_nxt;
__u32 snd_nxt;
__u32 snd_una;
__u32 window_clamp;
__u8 ecn_flags;
__u32 delivered;
__u32 delivered_ce;
__u32 snd_cwnd;
__u32 snd_cwnd_cnt;
__u32 snd_cwnd_clamp;
__u32 snd_ssthresh;
__u8 syn_data:1, /* SYN includes data */
syn_fastopen:1, /* SYN includes Fast Open option */
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
save_syn:1, /* Save headers of SYN packet */
is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
syn_smc:1; /* SYN includes SMC */
__u32 max_packets_out;
__u32 lsndtime;
__u32 prior_cwnd;
__u64 tcp_mstamp; /* most recent packet received/sent */
bool is_mptcp;
} __attribute__((preserve_access_index));
static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
{
return (struct inet_connection_sock *)sk;
}
static __always_inline void *inet_csk_ca(const struct sock *sk)
{
return (void *)inet_csk(sk)->icsk_ca_priv;
}
static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
static __always_inline bool before(__u32 seq1, __u32 seq2)
{
return (__s32)(seq1-seq2) < 0;
}
#define after(seq2, seq1) before(seq1, seq2)
#define TCP_ECN_OK 1
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
enum inet_csk_ack_state_t {
ICSK_ACK_SCHED = 1,
ICSK_ACK_TIMER = 2,
ICSK_ACK_PUSHED = 4,
ICSK_ACK_PUSHED2 = 8,
ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
};
enum tcp_ca_event {
CA_EVENT_TX_START = 0,
CA_EVENT_CWND_RESTART = 1,
CA_EVENT_COMPLETE_CWR = 2,
CA_EVENT_LOSS = 3,
CA_EVENT_ECN_NO_CE = 4,
CA_EVENT_ECN_IS_CE = 5,
};
struct ack_sample {
__u32 pkts_acked;
__s32 rtt_us;
__u32 in_flight;
} __attribute__((preserve_access_index));
struct rate_sample {
__u64 prior_mstamp; /* starting timestamp for interval */
__u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
__s32 delivered; /* number of packets delivered over interval */
long interval_us; /* time for tp->delivered to incr "delivered" */
__u32 snd_interval_us; /* snd interval for delivered packets */
__u32 rcv_interval_us; /* rcv interval for delivered packets */
long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
int losses; /* number of packets marked lost upon ACK */
__u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
__u32 prior_in_flight; /* in flight before this ACK */
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
} __attribute__((preserve_access_index));
#define TCP_CA_NAME_MAX 16
#define TCP_CONG_NEEDS_ECN 0x2
struct tcp_congestion_ops {
char name[TCP_CA_NAME_MAX];
__u32 flags;
/* initialize private data (optional) */
void (*init)(struct sock *sk);
/* cleanup private data (optional) */
void (*release)(struct sock *sk);
/* return slow start threshold (required) */
__u32 (*ssthresh)(struct sock *sk);
/* do new cwnd calculation (required) */
void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
/* call before changing ca_state (optional) */
void (*set_state)(struct sock *sk, __u8 new_state);
/* call when cwnd event occurs (optional) */
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
/* call when ack arrives (optional) */
void (*in_ack_event)(struct sock *sk, __u32 flags);
/* new value of cwnd after loss (required) */
__u32 (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
/* override sysctl_tcp_min_tso_segs */
__u32 (*min_tso_segs)(struct sock *sk);
/* returns the multiplier used in tcp_sndbuf_expand (optional) */
__u32 (*sndbuf_expand)(struct sock *sk);
/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
*/
void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
void *owner;
};
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min_not_zero(x, y) ({ \
typeof(x) __x = (x); \
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
return tp->snd_cwnd < tp->snd_ssthresh;
}
static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
if (tcp_in_slow_start(tp))
return tp->snd_cwnd < 2 * tp->max_packets_out;
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
static __always_inline bool tcp_cc_eq(const char *a, const char *b)
{
int i;
for (i = 0; i < TCP_CA_NAME_MAX; i++) {
if (a[i] != b[i])
return false;
if (!a[i])
break;
}
return true;
}
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
struct mptcp_sock {
struct inet_connection_sock sk;
__u32 token;
struct sock *first;
char ca_name[TCP_CA_NAME_MAX];
} __attribute__((preserve_access_index));
#endif
......@@ -13,15 +13,9 @@
* kernel functions.
*/
#include "vmlinux.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tracing_net.h"
#define BPF_STRUCT_OPS(name, args...) \
SEC("struct_ops/"#name) \
BPF_PROG(name, args)
#define USEC_PER_SEC 1000000UL
#define TCP_PACING_SS_RATIO (200)
......@@ -40,16 +34,6 @@ extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
static struct inet_connection_sock *inet_csk(const struct sock *sk)
{
return (struct inet_connection_sock *)sk;
}
static struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
static bool before(__u32 seq1, __u32 seq2)
{
return (__s32)(seq1-seq2) < 0;
......@@ -126,18 +110,21 @@ static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
return flag & FLAG_DATA_ACKED;
}
void BPF_STRUCT_OPS(bpf_cubic_init, struct sock *sk)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
cubictcp_init(sk);
}
void BPF_STRUCT_OPS(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
cubictcp_cwnd_event(sk, event);
}
void BPF_STRUCT_OPS(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag,
const struct rate_sample *rs)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag,
const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
......@@ -163,23 +150,26 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag
tcp_update_pacing_rate(sk);
}
__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
return cubictcp_recalc_ssthresh(sk);
}
void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
cubictcp_state(sk, new_state);
}
void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
const struct ack_sample *sample)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
{
cubictcp_acked(sk, sample);
}
__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
{
return tcp_reno_undo_cwnd(sk);
}
......
......@@ -14,14 +14,22 @@
* "ca->ack_cnt / delta" operation.
*/
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include "bpf_tcp_helpers.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
static bool before(__u32 seq1, __u32 seq2)
{
return (__s32)(seq1-seq2) < 0;
}
#define after(seq2, seq1) before(seq1, seq2)
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
* max_cwnd = snd_cwnd * beta
......@@ -70,7 +78,7 @@ static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ))
/ (bic_scale * 10);
/* BIC TCP Parameters */
struct bictcp {
struct bpf_bictcp {
__u32 cnt; /* increase cwnd by 1 after ACKs */
__u32 last_max_cwnd; /* last maximum snd_cwnd */
__u32 last_cwnd; /* the last snd_cwnd */
......@@ -91,7 +99,7 @@ struct bictcp {
__u32 curr_rtt; /* the minimum rtt of current round */
};
static inline void bictcp_reset(struct bictcp *ca)
static void bictcp_reset(struct bpf_bictcp *ca)
{
ca->cnt = 0;
ca->last_max_cwnd = 0;
......@@ -112,7 +120,7 @@ extern unsigned long CONFIG_HZ __kconfig;
#define USEC_PER_SEC 1000000UL
#define USEC_PER_JIFFY (USEC_PER_SEC / HZ)
static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
static __u64 div64_u64(__u64 dividend, __u64 divisor)
{
return dividend / divisor;
}
......@@ -120,7 +128,7 @@ static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
#define div64_ul div64_u64
#define BITS_PER_U64 (sizeof(__u64) * 8)
static __always_inline int fls64(__u64 x)
static int fls64(__u64 x)
{
int num = BITS_PER_U64 - 1;
......@@ -153,15 +161,15 @@ static __always_inline int fls64(__u64 x)
return num + 1;
}
static __always_inline __u32 bictcp_clock_us(const struct sock *sk)
static __u32 bictcp_clock_us(const struct sock *sk)
{
return tcp_sk(sk)->tcp_mstamp;
}
static __always_inline void bictcp_hystart_reset(struct sock *sk)
static void bictcp_hystart_reset(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->round_start = ca->last_ack = bictcp_clock_us(sk);
ca->end_seq = tp->snd_nxt;
......@@ -169,11 +177,10 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0;
}
/* "struct_ops/" prefix is a requirement */
SEC("struct_ops/bpf_cubic_init")
SEC("struct_ops")
void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
struct bictcp *ca = inet_csk_ca(sk);
struct bpf_bictcp *ca = inet_csk_ca(sk);
bictcp_reset(ca);
......@@ -184,12 +191,11 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
/* "struct_ops" prefix is a requirement */
SEC("struct_ops/bpf_cubic_cwnd_event")
SEC("struct_ops")
void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 now = tcp_jiffies32;
__s32 delta;
......@@ -230,7 +236,7 @@ static const __u8 v[] = {
* Newton-Raphson iteration.
* Avg err ~= 0.195%
*/
static __always_inline __u32 cubic_root(__u64 a)
static __u32 cubic_root(__u64 a)
{
__u32 x, b, shift;
......@@ -263,8 +269,7 @@ static __always_inline __u32 cubic_root(__u64 a)
/*
* Compute congestion window to use.
*/
static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
__u32 acked)
static void bictcp_update(struct bpf_bictcp *ca, __u32 cwnd, __u32 acked)
{
__u32 delta, bic_target, max_cnt;
__u64 offs, t;
......@@ -377,11 +382,11 @@ static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
ca->cnt = max(ca->cnt, 2U);
}
/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
struct bpf_bictcp *ca = inet_csk_ca(sk);
if (!tcp_is_cwnd_limited(sk))
return;
......@@ -397,10 +402,11 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acke
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->epoch_start = 0; /* end of epoch */
......@@ -414,7 +420,8 @@ __u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
......@@ -433,7 +440,7 @@ void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
* We apply another 100% factor because @rate is doubled at this point.
* We cap the cushion to 1ms.
*/
static __always_inline __u32 hystart_ack_delay(struct sock *sk)
static __u32 hystart_ack_delay(struct sock *sk)
{
unsigned long rate;
......@@ -444,10 +451,10 @@ static __always_inline __u32 hystart_ack_delay(struct sock *sk)
div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
}
static __always_inline void hystart_update(struct sock *sk, __u32 delay)
static void hystart_update(struct sock *sk, __u32 delay)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 threshold;
if (hystart_detect & HYSTART_ACK_TRAIN) {
......@@ -492,11 +499,11 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
int bpf_cubic_acked_called = 0;
void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
const struct ack_sample *sample)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 delay;
bpf_cubic_acked_called = 1;
......@@ -524,7 +531,8 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
{
return tcp_reno_undo_cwnd(sk);
}
......
......@@ -6,15 +6,23 @@
* the kernel BPF logic.
*/
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include <errno.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
#ifndef EBUSY
#define EBUSY 16
#endif
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min_not_zero(x, y) ({ \
typeof(x) __x = (x); \
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
static bool before(__u32 seq1, __u32 seq2)
{
return (__s32)(seq1-seq2) < 0;
}
char _license[] SEC("license") = "GPL";
......@@ -35,7 +43,7 @@ struct {
#define DCTCP_MAX_ALPHA 1024U
struct dctcp {
struct bpf_dctcp {
__u32 old_delivered;
__u32 old_delivered_ce;
__u32 prior_rcv_nxt;
......@@ -48,8 +56,7 @@ struct dctcp {
static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */
static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA;
static __always_inline void dctcp_reset(const struct tcp_sock *tp,
struct dctcp *ca)
static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca)
{
ca->next_seq = tp->snd_nxt;
......@@ -57,11 +64,11 @@ static __always_inline void dctcp_reset(const struct tcp_sock *tp,
ca->old_delivered_ce = tp->delivered_ce;
}
SEC("struct_ops/dctcp_init")
SEC("struct_ops")
void BPF_PROG(dctcp_init, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk);
struct bpf_dctcp *ca = inet_csk_ca(sk);
int *stg;
if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
......@@ -104,21 +111,21 @@ void BPF_PROG(dctcp_init, struct sock *sk)
dctcp_reset(tp, ca);
}
SEC("struct_ops/dctcp_ssthresh")
SEC("struct_ops")
__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
{
struct dctcp *ca = inet_csk_ca(sk);
struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd;
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
}
SEC("struct_ops/dctcp_update_alpha")
SEC("struct_ops")
void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk);
struct bpf_dctcp *ca = inet_csk_ca(sk);
/* Expired RTT */
if (!before(tp->snd_una, ca->next_seq)) {
......@@ -144,16 +151,16 @@ void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
}
}
static __always_inline void dctcp_react_to_loss(struct sock *sk)
static void dctcp_react_to_loss(struct sock *sk)
{
struct dctcp *ca = inet_csk_ca(sk);
struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd;
tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
}
SEC("struct_ops/dctcp_state")
SEC("struct_ops")
void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Recovery &&
......@@ -164,7 +171,7 @@ void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
*/
}
static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
static void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
{
struct tcp_sock *tp = tcp_sk(sk);
......@@ -179,9 +186,8 @@ static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
* S: 0 <- last pkt was non-CE
* 1 <- last pkt was CE
*/
static __always_inline
void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
__u32 *prior_rcv_nxt, __u32 *ce_state)
static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
__u32 *prior_rcv_nxt, __u32 *ce_state)
{
__u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
......@@ -201,10 +207,10 @@ void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
dctcp_ece_ack_cwr(sk, new_ce_state);
}
SEC("struct_ops/dctcp_cwnd_event")
SEC("struct_ops")
void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
{
struct dctcp *ca = inet_csk_ca(sk);
struct bpf_dctcp *ca = inet_csk_ca(sk);
switch (ev) {
case CA_EVENT_ECN_IS_CE:
......@@ -220,17 +226,17 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
}
}
SEC("struct_ops/dctcp_cwnd_undo")
SEC("struct_ops")
__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
{
const struct dctcp *ca = inet_csk_ca(sk);
const struct bpf_dctcp *ca = inet_csk_ca(sk);
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
}
extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
SEC("struct_ops/dctcp_reno_cong_avoid")
SEC("struct_ops")
void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
tcp_reno_cong_avoid(sk, ack, acked);
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
const char cubic[] = "cubic";
void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
SEC("struct_ops")
void BPF_PROG(dctcp_nouse_release, struct sock *sk)
{
bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
(void *)cubic, sizeof(cubic));
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "X";
void BPF_STRUCT_OPS(nogpltcp_init, struct sock *sk)
SEC("struct_ops")
void BPF_PROG(nogpltcp_init, struct sock *sk)
{
}
......
......@@ -2,6 +2,9 @@
#ifndef __BPF_TRACING_NET_H__
#define __BPF_TRACING_NET_H__
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
#define AF_INET 2
#define AF_INET6 10
......@@ -22,6 +25,7 @@
#define IP_TOS 1
#define SOL_IPV6 41
#define IPV6_TCLASS 67
#define IPV6_AUTOFLOWLABEL 70
......@@ -46,6 +50,13 @@
#define TCP_CA_NAME_MAX 16
#define TCP_NAGLE_OFF 1
#define TCP_ECN_OK 1
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
#define TCP_CONG_NEEDS_ECN 0x2
#define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5
......@@ -129,4 +140,35 @@
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
{
return (struct inet_connection_sock *)sk;
}
static inline void *inet_csk_ca(const struct sock *sk)
{
return (void *)inet_csk(sk)->icsk_ca_priv;
}
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
return tp->snd_cwnd < tp->snd_ssthresh;
}
static inline bool tcp_is_cwnd_limited(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
if (tcp_in_slow_start(tp))
return tp->snd_cwnd < 2 * tp->max_packets_out;
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
#endif
......@@ -14,8 +14,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_tcp_helpers.h"
#define SRC_REWRITE_IP4 0x7f000004U
#define DST_REWRITE_IP4 0x7f000001U
#define DST_REWRITE_PORT4 4444
......@@ -32,6 +30,10 @@
#define IFNAMSIZ 16
#endif
#ifndef SOL_TCP
#define SOL_TCP 6
#endif
__attribute__ ((noinline)) __weak
int do_bind(struct bpf_sock_addr *ctx)
{
......
......@@ -3,8 +3,8 @@
#include <linux/types.h>
#include <linux/bpf.h>
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
#include "bpf_tracing_net.h"
struct bpf_fib_lookup fib_params = {};
int fib_lookup_ret = 0;
......
......@@ -2,9 +2,9 @@
/* Copyright (c) 2020, Tessares SA. */
/* Copyright (c) 2022, SUSE. */
#include <linux/bpf.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include "bpf_tcp_helpers.h"
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
__u32 token = 0;
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include <string.h>
#include <linux/tcp.h>
#include <netinet/in.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_tcp_helpers.h"
#include "bpf_tracing_net.h"
char _license[] SEC("license") = "GPL";
__s32 page_size = 0;
const char cc_reno[TCP_CA_NAME_MAX] = "reno";
const char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
SEC("cgroup/setsockopt")
int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
{
void *optval_end = ctx->optval_end;
int *optval = ctx->optval;
char buf[TCP_CA_NAME_MAX];
char cc_reno[TCP_CA_NAME_MAX] = "reno";
char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
goto out;
......@@ -29,11 +25,11 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
return 0;
if (!tcp_cc_eq(buf, cc_cubic))
if (bpf_strncmp(buf, sizeof(buf), cc_cubic))
return 0;
if (*optval == 0x2d) {
if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno,
if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, (void *)&cc_reno,
sizeof(cc_reno)))
return 0;
}
......
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
SEC("struct_ops/incompl_cong_ops_ssthresh")
SEC("struct_ops")
__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
SEC("struct_ops/incompl_cong_ops_undo_cwnd")
SEC("struct_ops")
__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
......
......@@ -27,7 +27,7 @@ extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym;
extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
SEC("struct_ops/init")
SEC("struct_ops")
void BPF_PROG(init, struct sock *sk)
{
bbr_init(sk);
......@@ -35,38 +35,38 @@ void BPF_PROG(init, struct sock *sk)
cubictcp_init(sk);
}
SEC("struct_ops/in_ack_event")
SEC("struct_ops")
void BPF_PROG(in_ack_event, struct sock *sk, u32 flags)
{
dctcp_update_alpha(sk, flags);
}
SEC("struct_ops/cong_control")
SEC("struct_ops")
void BPF_PROG(cong_control, struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
{
bbr_main(sk, ack, flag, rs);
}
SEC("struct_ops/cong_avoid")
SEC("struct_ops")
void BPF_PROG(cong_avoid, struct sock *sk, u32 ack, u32 acked)
{
cubictcp_cong_avoid(sk, ack, acked);
}
SEC("struct_ops/sndbuf_expand")
SEC("struct_ops")
u32 BPF_PROG(sndbuf_expand, struct sock *sk)
{
return bbr_sndbuf_expand(sk);
}
SEC("struct_ops/undo_cwnd")
SEC("struct_ops")
u32 BPF_PROG(undo_cwnd, struct sock *sk)
{
bbr_undo_cwnd(sk);
return dctcp_cwnd_undo(sk);
}
SEC("struct_ops/cwnd_event")
SEC("struct_ops")
void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
bbr_cwnd_event(sk, event);
......@@ -74,7 +74,7 @@ void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event)
cubictcp_cwnd_event(sk, event);
}
SEC("struct_ops/ssthresh")
SEC("struct_ops")
u32 BPF_PROG(ssthresh, struct sock *sk)
{
bbr_ssthresh(sk);
......@@ -82,13 +82,13 @@ u32 BPF_PROG(ssthresh, struct sock *sk)
return cubictcp_recalc_ssthresh(sk);
}
SEC("struct_ops/min_tso_segs")
SEC("struct_ops")
u32 BPF_PROG(min_tso_segs, struct sock *sk)
{
return bbr_min_tso_segs(sk);
}
SEC("struct_ops/set_state")
SEC("struct_ops")
void BPF_PROG(set_state, struct sock *sk, u8 new_state)
{
bbr_set_state(sk, new_state);
......@@ -96,7 +96,7 @@ void BPF_PROG(set_state, struct sock *sk, u8 new_state)
cubictcp_state(sk, new_state);
}
SEC("struct_ops/pkts_acked")
SEC("struct_ops")
void BPF_PROG(pkts_acked, struct sock *sk, const struct ack_sample *sample)
{
cubictcp_acked(sk, sample);
......
......@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
SEC("struct_ops/unsupp_cong_op_get_info")
SEC("struct_ops")
size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
{
......
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
......@@ -10,36 +9,31 @@ char _license[] SEC("license") = "GPL";
int ca1_cnt = 0;
int ca2_cnt = 0;
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
SEC("struct_ops/ca_update_1_init")
SEC("struct_ops")
void BPF_PROG(ca_update_1_init, struct sock *sk)
{
ca1_cnt++;
}
SEC("struct_ops/ca_update_2_init")
SEC("struct_ops")
void BPF_PROG(ca_update_2_init, struct sock *sk)
{
ca2_cnt++;
}
SEC("struct_ops/ca_update_cong_control")
SEC("struct_ops")
void BPF_PROG(ca_update_cong_control, struct sock *sk,
const struct rate_sample *rs)
{
}
SEC("struct_ops/ca_update_ssthresh")
SEC("struct_ops")
__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
SEC("struct_ops/ca_update_undo_cwnd")
SEC("struct_ops")
__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
......
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
......@@ -11,22 +10,17 @@ char _license[] SEC("license") = "GPL";
#define min(a, b) ((a) < (b) ? (a) : (b))
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
static unsigned int tcp_left_out(const struct tcp_sock *tp)
{
return tp->sacked_out + tp->lost_out;
}
static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
static unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
{
return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
}
SEC("struct_ops/write_sk_pacing_init")
SEC("struct_ops")
void BPF_PROG(write_sk_pacing_init, struct sock *sk)
{
#ifdef ENABLE_ATOMICS_TESTS
......@@ -37,7 +31,7 @@ void BPF_PROG(write_sk_pacing_init, struct sock *sk)
#endif
}
SEC("struct_ops/write_sk_pacing_cong_control")
SEC("struct_ops")
void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
const struct rate_sample *rs)
{
......@@ -49,13 +43,13 @@ void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1;
}
SEC("struct_ops/write_sk_pacing_ssthresh")
SEC("struct_ops")
__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
SEC("struct_ops/write_sk_pacing_undo_cwnd")
SEC("struct_ops")
__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <string.h>
#include <errno.h>
#include <netinet/in.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_tcp_helpers.h"
#ifndef ENOENT
#define ENOENT 2
#endif
struct sockaddr_in6 srv_sa6 = {};
__u16 listen_tp_sport = 0;
......
......@@ -3,7 +3,7 @@
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include <linux/ip.h>
#include "bpf_tracing_net.h"
#include <linux/if_ether.h>
/* We don't care about whether the packet can be received by network stack.
* Just care if the packet is sent to the correct device at correct direction
......
......@@ -7,7 +7,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_tcp_helpers.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
......@@ -42,6 +41,10 @@ struct {
__type(value, struct bpf_spinlock_cnt);
} sk_pkt_out_cnt10 SEC(".maps");
struct tcp_sock {
__u32 lsndtime;
} __attribute__((preserve_access_index));
struct bpf_tcp_sock listen_tp = {};
struct sockaddr_in6 srv_sa6 = {};
struct bpf_tcp_sock cli_tp = {};
......
// SPDX-License-Identifier: GPL-2.0
#include <stddef.h>
#include <string.h>
#include <netinet/in.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/tcp.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_tcp_helpers.h"
#include "test_tcpbpf.h"
struct tcpbpf_globals global = {};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment