Commit 248c7f9c authored by Magnus Karlsson's avatar Magnus Karlsson Committed by Daniel Borkmann

samples/bpf: convert xdpsock to use libbpf for AF_XDP access

This commit converts the xdpsock sample application to use the AF_XDP
functions present in libbpf. This cuts down the size of it by nearly
300 lines of code.

The default ring sizes plus the batch size has been increased and the
size of the umem area has decreased. This so that the sample application
will provide higher throughput. Note also that the shared umem code
has been removed from the sample as this is not supported by libbpf
at this point in time.
Tested-by: default avatarBjörn Töpel <bjorn.topel@intel.com>
Signed-off-by: default avatarMagnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 1cad0788
...@@ -163,7 +163,6 @@ always += xdp2skb_meta_kern.o ...@@ -163,7 +163,6 @@ always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o always += syscall_tp_kern.o
always += cpustat_kern.o always += cpustat_kern.o
always += xdp_adjust_tail_kern.o always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
always += xdp_fwd_kern.o always += xdp_fwd_kern.o
always += task_fd_query_kern.o always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o always += xdp_sample_pkts_kern.o
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef XDPSOCK_H_
#define XDPSOCK_H_
/* Power-of-2 number of sockets */
#define MAX_SOCKS 4
/* Round-robin receive */
#define RR_LB 0
#endif /* XDPSOCK_H_ */
// SPDX-License-Identifier: GPL-2.0
#define KBUILD_MODNAME "foo"
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
#include "xdpsock.h"
struct bpf_map_def SEC("maps") qidconf_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1,
};
struct bpf_map_def SEC("maps") xsks_map = {
.type = BPF_MAP_TYPE_XSKMAP,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = MAX_SOCKS,
};
struct bpf_map_def SEC("maps") rr_map = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(unsigned int),
.max_entries = 1,
};
SEC("xdp_sock")
int xdp_sock_prog(struct xdp_md *ctx)
{
int *qidconf, key = 0, idx;
unsigned int *rr;
qidconf = bpf_map_lookup_elem(&qidconf_map, &key);
if (!qidconf)
return XDP_ABORTED;
if (*qidconf != ctx->rx_queue_index)
return XDP_PASS;
#if RR_LB /* NB! RR_LB is configured in xdpsock.h */
rr = bpf_map_lookup_elem(&rr_map, &key);
if (!rr)
return XDP_ABORTED;
*rr = (*rr + 1) & (MAX_SOCKS - 1);
idx = *rr;
#else
idx = 0;
#endif
return bpf_redirect_map(&xsks_map, idx, 0);
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2017 - 2018 Intel Corporation. */ /* Copyright(c) 2017 - 2018 Intel Corporation. */
#include <assert.h> #include <asm/barrier.h>
#include <errno.h> #include <errno.h>
#include <getopt.h> #include <getopt.h>
#include <libgen.h> #include <libgen.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/compiler.h>
#include <linux/if_link.h> #include <linux/if_link.h>
#include <linux/if_xdp.h> #include <linux/if_xdp.h>
#include <linux/if_ether.h> #include <linux/if_ether.h>
#include <locale.h>
#include <net/ethernet.h>
#include <net/if.h> #include <net/if.h>
#include <poll.h>
#include <pthread.h>
#include <signal.h> #include <signal.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <net/ethernet.h> #include <sys/mman.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/mman.h> #include <sys/types.h>
#include <time.h> #include <time.h>
#include <unistd.h> #include <unistd.h>
#include <pthread.h>
#include <locale.h>
#include <sys/types.h>
#include <poll.h>
#include "bpf/libbpf.h" #include "bpf/libbpf.h"
#include "bpf_util.h" #include "bpf/xsk.h"
#include <bpf/bpf.h> #include <bpf/bpf.h>
#include "xdpsock.h"
#ifndef SOL_XDP #ifndef SOL_XDP
#define SOL_XDP 283 #define SOL_XDP 283
#endif #endif
...@@ -44,17 +43,11 @@ ...@@ -44,17 +43,11 @@
#define PF_XDP AF_XDP #define PF_XDP AF_XDP
#endif #endif
#define NUM_FRAMES 131072 #define NUM_FRAMES (4 * 1024)
#define FRAME_HEADROOM 0 #define BATCH_SIZE 64
#define FRAME_SHIFT 11
#define FRAME_SIZE 2048
#define NUM_DESCS 1024
#define BATCH_SIZE 16
#define FQ_NUM_DESCS 1024
#define CQ_NUM_DESCS 1024
#define DEBUG_HEXDUMP 0 #define DEBUG_HEXDUMP 0
#define MAX_SOCKS 8
typedef __u64 u64; typedef __u64 u64;
typedef __u32 u32; typedef __u32 u32;
...@@ -73,54 +66,31 @@ static const char *opt_if = ""; ...@@ -73,54 +66,31 @@ static const char *opt_if = "";
static int opt_ifindex; static int opt_ifindex;
static int opt_queue; static int opt_queue;
static int opt_poll; static int opt_poll;
static int opt_shared_packet_buffer;
static int opt_interval = 1; static int opt_interval = 1;
static u32 opt_xdp_bind_flags; static u32 opt_xdp_bind_flags;
static __u32 prog_id; static __u32 prog_id;
struct xdp_umem_uqueue { struct xsk_umem_info {
u32 cached_prod; struct xsk_ring_prod fq;
u32 cached_cons; struct xsk_ring_cons cq;
u32 mask; struct xsk_umem *umem;
u32 size; void *buffer;
u32 *producer;
u32 *consumer;
u64 *ring;
void *map;
}; };
struct xdp_umem { struct xsk_socket_info {
char *frames; struct xsk_ring_cons rx;
struct xdp_umem_uqueue fq; struct xsk_ring_prod tx;
struct xdp_umem_uqueue cq; struct xsk_umem_info *umem;
int fd; struct xsk_socket *xsk;
};
struct xdp_uqueue {
u32 cached_prod;
u32 cached_cons;
u32 mask;
u32 size;
u32 *producer;
u32 *consumer;
struct xdp_desc *ring;
void *map;
};
struct xdpsock {
struct xdp_uqueue rx;
struct xdp_uqueue tx;
int sfd;
struct xdp_umem *umem;
u32 outstanding_tx;
unsigned long rx_npkts; unsigned long rx_npkts;
unsigned long tx_npkts; unsigned long tx_npkts;
unsigned long prev_rx_npkts; unsigned long prev_rx_npkts;
unsigned long prev_tx_npkts; unsigned long prev_tx_npkts;
u32 outstanding_tx;
}; };
static int num_socks; static int num_socks;
struct xdpsock *xsks[MAX_SOCKS]; struct xsk_socket_info *xsks[MAX_SOCKS];
static unsigned long get_nsecs(void) static unsigned long get_nsecs(void)
{ {
...@@ -130,225 +100,124 @@ static unsigned long get_nsecs(void) ...@@ -130,225 +100,124 @@ static unsigned long get_nsecs(void)
return ts.tv_sec * 1000000000UL + ts.tv_nsec; return ts.tv_sec * 1000000000UL + ts.tv_nsec;
} }
static void dump_stats(void); static void print_benchmark(bool running)
#define lassert(expr) \
do { \
if (!(expr)) { \
fprintf(stderr, "%s:%s:%i: Assertion failed: " \
#expr ": errno: %d/\"%s\"\n", \
__FILE__, __func__, __LINE__, \
errno, strerror(errno)); \
dump_stats(); \
exit(EXIT_FAILURE); \
} \
} while (0)
#define barrier() __asm__ __volatile__("": : :"memory")
#ifdef __aarch64__
#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory")
#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory")
#else
#define u_smp_rmb() barrier()
#define u_smp_wmb() barrier()
#endif
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
static const char pkt_data[] =
"\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
"\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
"\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
"\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
{
u32 free_entries = q->cached_cons - q->cached_prod;
if (free_entries >= nb)
return free_entries;
/* Refresh the local tail pointer */
q->cached_cons = *q->consumer + q->size;
return q->cached_cons - q->cached_prod;
}
static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
{ {
u32 free_entries = q->cached_cons - q->cached_prod; const char *bench_str = "INVALID";
if (free_entries >= ndescs) if (opt_bench == BENCH_RXDROP)
return free_entries; bench_str = "rxdrop";
else if (opt_bench == BENCH_TXONLY)
bench_str = "txonly";
else if (opt_bench == BENCH_L2FWD)
bench_str = "l2fwd";
/* Refresh the local tail pointer */ printf("%s:%d %s ", opt_if, opt_queue, bench_str);
q->cached_cons = *q->consumer + q->size; if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
return q->cached_cons - q->cached_prod; printf("xdp-skb ");
} else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
printf("xdp-drv ");
else
printf(" ");
static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb) if (opt_poll)
{ printf("poll() ");
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) { if (running) {
q->cached_prod = *q->producer; printf("running...");
entries = q->cached_prod - q->cached_cons; fflush(stdout);
} }
return (entries > nb) ? nb : entries;
} }
static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs) static void dump_stats(void)
{ {
u32 entries = q->cached_prod - q->cached_cons; unsigned long now = get_nsecs();
long dt = now - prev_time;
int i;
if (entries == 0) { prev_time = now;
q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
return (entries > ndescs) ? ndescs : entries; for (i = 0; i < num_socks && xsks[i]; i++) {
} char *fmt = "%-15s %'-11.0f %'-11lu\n";
double rx_pps, tx_pps;
static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
struct xdp_desc *d, 1000000000. / dt;
size_t nb) tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
{ 1000000000. / dt;
u32 i;
if (umem_nb_free(fq, nb) < nb) printf("\n sock%d@", i);
return -ENOSPC; print_benchmark(false);
printf("\n");
for (i = 0; i < nb; i++) { printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
u32 idx = fq->cached_prod++ & fq->mask; dt / 1000000000.);
printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
fq->ring[idx] = d[i].addr; xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
} }
u_smp_wmb();
*fq->producer = fq->cached_prod;
return 0;
} }
static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u64 *d, static void *poller(void *arg)
size_t nb)
{ {
u32 i; (void)arg;
for (;;) {
if (umem_nb_free(fq, nb) < nb) sleep(opt_interval);
return -ENOSPC; dump_stats();
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
fq->ring[idx] = d[i];
} }
u_smp_wmb(); return NULL;
*fq->producer = fq->cached_prod;
return 0;
} }
static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, static void remove_xdp_program(void)
u64 *d, size_t nb)
{ {
u32 idx, i, entries = umem_nb_avail(cq, nb); __u32 curr_prog_id = 0;
u_smp_rmb();
for (i = 0; i < entries; i++) {
idx = cq->cached_cons++ & cq->mask;
d[i] = cq->ring[idx];
}
if (entries > 0) {
u_smp_wmb();
*cq->consumer = cq->cached_cons; if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(EXIT_FAILURE);
} }
if (prog_id == curr_prog_id)
return entries; bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
} else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
static inline void *xq_get_data(struct xdpsock *xsk, u64 addr) else
{ printf("program on interface changed, not removing\n");
return &xsk->umem->frames[addr];
} }
static inline int xq_enq(struct xdp_uqueue *uq, static void int_exit(int sig)
const struct xdp_desc *descs,
unsigned int ndescs)
{ {
struct xdp_desc *r = uq->ring; struct xsk_umem *umem = xsks[0]->umem->umem;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs) (void)sig;
return -ENOSPC;
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
r[idx].addr = descs[i].addr;
r[idx].len = descs[i].len;
}
u_smp_wmb(); dump_stats();
xsk_socket__delete(xsks[0]->xsk);
(void)xsk_umem__delete(umem);
remove_xdp_program();
*uq->producer = uq->cached_prod; exit(EXIT_SUCCESS);
return 0;
} }
static inline int xq_enq_tx_only(struct xdp_uqueue *uq, static void __exit_with_error(int error, const char *file, const char *func,
unsigned int id, unsigned int ndescs) int line)
{ {
struct xdp_desc *r = uq->ring; fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
unsigned int i; line, error, strerror(error));
dump_stats();
if (xq_nb_free(uq, ndescs) < ndescs) remove_xdp_program();
return -ENOSPC; exit(EXIT_FAILURE);
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
r[idx].addr = (id + i) << FRAME_SHIFT;
r[idx].len = sizeof(pkt_data) - 1;
}
u_smp_wmb();
*uq->producer = uq->cached_prod;
return 0;
} }
static inline int xq_deq(struct xdp_uqueue *uq, #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
struct xdp_desc *descs, __LINE__)
int ndescs)
{
struct xdp_desc *r = uq->ring;
unsigned int idx;
int i, entries;
entries = xq_nb_avail(uq, ndescs);
u_smp_rmb();
for (i = 0; i < entries; i++) {
idx = uq->cached_cons++ & uq->mask;
descs[i] = r[idx];
}
if (entries > 0) {
u_smp_wmb();
*uq->consumer = uq->cached_cons; static const char pkt_data[] =
} "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
"\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
return entries; "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
} "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
static void swap_mac_addresses(void *data) static void swap_mac_addresses(void *data)
{ {
...@@ -397,258 +266,74 @@ static void hex_dump(void *pkt, size_t length, u64 addr) ...@@ -397,258 +266,74 @@ static void hex_dump(void *pkt, size_t length, u64 addr)
printf("\n"); printf("\n");
} }
static size_t gen_eth_frame(char *frame) static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
{ {
memcpy(frame, pkt_data, sizeof(pkt_data) - 1); memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
sizeof(pkt_data) - 1);
return sizeof(pkt_data) - 1; return sizeof(pkt_data) - 1;
} }
static struct xdp_umem *xdp_umem_configure(int sfd) static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
{ {
int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS; struct xsk_umem_info *umem;
struct xdp_mmap_offsets off; int ret;
struct xdp_umem_reg mr;
struct xdp_umem *umem;
socklen_t optlen;
void *bufs;
umem = calloc(1, sizeof(*umem)); umem = calloc(1, sizeof(*umem));
lassert(umem); if (!umem)
exit_with_error(errno);
lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
NUM_FRAMES * FRAME_SIZE) == 0);
mr.addr = (__u64)bufs;
mr.len = NUM_FRAMES * FRAME_SIZE;
mr.chunk_size = FRAME_SIZE;
mr.headroom = FRAME_HEADROOM;
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
sizeof(int)) == 0);
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
sizeof(int)) == 0);
optlen = sizeof(off);
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
&optlen) == 0);
umem->fq.map = mmap(0, off.fr.desc +
FQ_NUM_DESCS * sizeof(u64),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_FILL_RING);
lassert(umem->fq.map != MAP_FAILED);
umem->fq.mask = FQ_NUM_DESCS - 1;
umem->fq.size = FQ_NUM_DESCS;
umem->fq.producer = umem->fq.map + off.fr.producer;
umem->fq.consumer = umem->fq.map + off.fr.consumer;
umem->fq.ring = umem->fq.map + off.fr.desc;
umem->fq.cached_cons = FQ_NUM_DESCS;
umem->cq.map = mmap(0, off.cr.desc +
CQ_NUM_DESCS * sizeof(u64),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_COMPLETION_RING);
lassert(umem->cq.map != MAP_FAILED);
umem->cq.mask = CQ_NUM_DESCS - 1;
umem->cq.size = CQ_NUM_DESCS;
umem->cq.producer = umem->cq.map + off.cr.producer;
umem->cq.consumer = umem->cq.map + off.cr.consumer;
umem->cq.ring = umem->cq.map + off.cr.desc;
umem->frames = bufs;
umem->fd = sfd;
if (opt_bench == BENCH_TXONLY) { ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
int i; NULL);
if (ret)
for (i = 0; i < NUM_FRAMES * FRAME_SIZE; i += FRAME_SIZE) exit_with_error(-ret);
(void)gen_eth_frame(&umem->frames[i]);
}
umem->buffer = buffer;
return umem; return umem;
} }
static struct xdpsock *xsk_configure(struct xdp_umem *umem) static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem)
{ {
struct sockaddr_xdp sxdp = {}; struct xsk_socket_config cfg;
struct xdp_mmap_offsets off; struct xsk_socket_info *xsk;
int sfd, ndescs = NUM_DESCS; int ret;
struct xdpsock *xsk; u32 idx;
bool shared = true; int i;
socklen_t optlen;
u64 i;
sfd = socket(PF_XDP, SOCK_RAW, 0);
lassert(sfd >= 0);
xsk = calloc(1, sizeof(*xsk)); xsk = calloc(1, sizeof(*xsk));
lassert(xsk); if (!xsk)
exit_with_error(errno);
xsk->sfd = sfd;
xsk->outstanding_tx = 0;
if (!umem) {
shared = false;
xsk->umem = xdp_umem_configure(sfd);
} else {
xsk->umem = umem; xsk->umem = umem;
} cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING, cfg.libbpf_flags = 0;
&ndescs, sizeof(int)) == 0); cfg.xdp_flags = opt_xdp_flags;
lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING, cfg.bind_flags = opt_xdp_bind_flags;
&ndescs, sizeof(int)) == 0); ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
optlen = sizeof(off); &xsk->rx, &xsk->tx, &cfg);
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, if (ret)
&optlen) == 0); exit_with_error(-ret);
/* Rx */ ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
xsk->rx.map = mmap(NULL, if (ret)
off.rx.desc + exit_with_error(-ret);
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE, ret = xsk_ring_prod__reserve(&xsk->umem->fq,
MAP_SHARED | MAP_POPULATE, sfd, XSK_RING_PROD__DEFAULT_NUM_DESCS,
XDP_PGOFF_RX_RING); &idx);
lassert(xsk->rx.map != MAP_FAILED); if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
exit_with_error(-ret);
if (!shared) { for (i = 0;
for (i = 0; i < NUM_DESCS * FRAME_SIZE; i += FRAME_SIZE) i < XSK_RING_PROD__DEFAULT_NUM_DESCS *
lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1) XSK_UMEM__DEFAULT_FRAME_SIZE;
== 0); i += XSK_UMEM__DEFAULT_FRAME_SIZE)
} *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = i;
xsk_ring_prod__submit(&xsk->umem->fq,
/* Tx */ XSK_RING_PROD__DEFAULT_NUM_DESCS);
xsk->tx.map = mmap(NULL,
off.tx.desc +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_TX_RING);
lassert(xsk->tx.map != MAP_FAILED);
xsk->rx.mask = NUM_DESCS - 1;
xsk->rx.size = NUM_DESCS;
xsk->rx.producer = xsk->rx.map + off.rx.producer;
xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
xsk->rx.ring = xsk->rx.map + off.rx.desc;
xsk->tx.mask = NUM_DESCS - 1;
xsk->tx.size = NUM_DESCS;
xsk->tx.producer = xsk->tx.map + off.tx.producer;
xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
xsk->tx.ring = xsk->tx.map + off.tx.desc;
xsk->tx.cached_cons = NUM_DESCS;
sxdp.sxdp_family = PF_XDP;
sxdp.sxdp_ifindex = opt_ifindex;
sxdp.sxdp_queue_id = opt_queue;
if (shared) {
sxdp.sxdp_flags = XDP_SHARED_UMEM;
sxdp.sxdp_shared_umem_fd = umem->fd;
} else {
sxdp.sxdp_flags = opt_xdp_bind_flags;
}
lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0);
return xsk; return xsk;
} }
static void print_benchmark(bool running)
{
const char *bench_str = "INVALID";
if (opt_bench == BENCH_RXDROP)
bench_str = "rxdrop";
else if (opt_bench == BENCH_TXONLY)
bench_str = "txonly";
else if (opt_bench == BENCH_L2FWD)
bench_str = "l2fwd";
printf("%s:%d %s ", opt_if, opt_queue, bench_str);
if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
printf("xdp-skb ");
else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
printf("xdp-drv ");
else
printf(" ");
if (opt_poll)
printf("poll() ");
if (running) {
printf("running...");
fflush(stdout);
}
}
static void dump_stats(void)
{
unsigned long now = get_nsecs();
long dt = now - prev_time;
int i;
prev_time = now;
for (i = 0; i < num_socks && xsks[i]; i++) {
char *fmt = "%-15s %'-11.0f %'-11lu\n";
double rx_pps, tx_pps;
rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
1000000000. / dt;
tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
1000000000. / dt;
printf("\n sock%d@", i);
print_benchmark(false);
printf("\n");
printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
dt / 1000000000.);
printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
}
}
static void *poller(void *arg)
{
(void)arg;
for (;;) {
sleep(opt_interval);
dump_stats();
}
return NULL;
}
static void int_exit(int sig)
{
__u32 curr_prog_id = 0;
(void)sig;
dump_stats();
if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(EXIT_FAILURE);
}
if (prog_id == curr_prog_id)
bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
else
printf("program on interface changed, not removing\n");
exit(EXIT_SUCCESS);
}
static struct option long_options[] = { static struct option long_options[] = {
{"rxdrop", no_argument, 0, 'r'}, {"rxdrop", no_argument, 0, 'r'},
{"txonly", no_argument, 0, 't'}, {"txonly", no_argument, 0, 't'},
...@@ -656,7 +341,6 @@ static struct option long_options[] = { ...@@ -656,7 +341,6 @@ static struct option long_options[] = {
{"interface", required_argument, 0, 'i'}, {"interface", required_argument, 0, 'i'},
{"queue", required_argument, 0, 'q'}, {"queue", required_argument, 0, 'q'},
{"poll", no_argument, 0, 'p'}, {"poll", no_argument, 0, 'p'},
{"shared-buffer", no_argument, 0, 's'},
{"xdp-skb", no_argument, 0, 'S'}, {"xdp-skb", no_argument, 0, 'S'},
{"xdp-native", no_argument, 0, 'N'}, {"xdp-native", no_argument, 0, 'N'},
{"interval", required_argument, 0, 'n'}, {"interval", required_argument, 0, 'n'},
...@@ -676,7 +360,6 @@ static void usage(const char *prog) ...@@ -676,7 +360,6 @@ static void usage(const char *prog)
" -i, --interface=n Run on interface n\n" " -i, --interface=n Run on interface n\n"
" -q, --queue=n Use queue n (default 0)\n" " -q, --queue=n Use queue n (default 0)\n"
" -p, --poll Use poll syscall\n" " -p, --poll Use poll syscall\n"
" -s, --shared-buffer Use shared packet buffer\n"
" -S, --xdp-skb=n Use XDP skb-mod\n" " -S, --xdp-skb=n Use XDP skb-mod\n"
" -N, --xdp-native=n Enfore XDP native mode\n" " -N, --xdp-native=n Enfore XDP native mode\n"
" -n, --interval=n Specify statistics update interval (default 1 sec).\n" " -n, --interval=n Specify statistics update interval (default 1 sec).\n"
...@@ -715,9 +398,6 @@ static void parse_command_line(int argc, char **argv) ...@@ -715,9 +398,6 @@ static void parse_command_line(int argc, char **argv)
case 'q': case 'q':
opt_queue = atoi(optarg); opt_queue = atoi(optarg);
break; break;
case 's':
opt_shared_packet_buffer = 1;
break;
case 'p': case 'p':
opt_poll = 1; opt_poll = 1;
break; break;
...@@ -751,75 +431,104 @@ static void parse_command_line(int argc, char **argv) ...@@ -751,75 +431,104 @@ static void parse_command_line(int argc, char **argv)
opt_if); opt_if);
usage(basename(argv[0])); usage(basename(argv[0]));
} }
} }
static void kick_tx(int fd) static void kick_tx(struct xsk_socket_info *xsk)
{ {
int ret; int ret;
ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0); ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY) if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
return; return;
lassert(0); exit_with_error(errno);
} }
static inline void complete_tx_l2fwd(struct xdpsock *xsk) static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
{ {
u64 descs[BATCH_SIZE]; u32 idx_cq, idx_fq;
unsigned int rcvd; unsigned int rcvd;
size_t ndescs; size_t ndescs;
if (!xsk->outstanding_tx) if (!xsk->outstanding_tx)
return; return;
kick_tx(xsk->sfd); kick_tx(xsk);
ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE : ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
xsk->outstanding_tx; xsk->outstanding_tx;
/* re-add completed Tx buffers */ /* re-add completed Tx buffers */
rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs); rcvd = xsk_ring_cons__peek(&xsk->umem->cq, ndescs, &idx_cq);
if (rcvd > 0) { if (rcvd > 0) {
umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd); unsigned int i;
int ret;
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd,
&idx_fq);
}
for (i = 0; i < rcvd; i++)
*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) =
*xsk_ring_cons__comp_addr(&xsk->umem->cq,
idx_cq++);
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd; xsk->outstanding_tx -= rcvd;
xsk->tx_npkts += rcvd; xsk->tx_npkts += rcvd;
} }
} }
static inline void complete_tx_only(struct xdpsock *xsk) static inline void complete_tx_only(struct xsk_socket_info *xsk)
{ {
u64 descs[BATCH_SIZE];
unsigned int rcvd; unsigned int rcvd;
u32 idx;
if (!xsk->outstanding_tx) if (!xsk->outstanding_tx)
return; return;
kick_tx(xsk->sfd); kick_tx(xsk);
rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE); rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
if (rcvd > 0) { if (rcvd > 0) {
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd; xsk->outstanding_tx -= rcvd;
xsk->tx_npkts += rcvd; xsk->tx_npkts += rcvd;
} }
} }
static void rx_drop(struct xdpsock *xsk) static void rx_drop(struct xsk_socket_info *xsk)
{ {
struct xdp_desc descs[BATCH_SIZE];
unsigned int rcvd, i; unsigned int rcvd, i;
u32 idx_rx, idx_fq = 0;
int ret;
rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
if (!rcvd) if (!rcvd)
return; return;
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
}
for (i = 0; i < rcvd; i++) { for (i = 0; i < rcvd; i++) {
char *pkt = xq_get_data(xsk, descs[i].addr); u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
hex_dump(pkt, descs[i].len, descs[i].addr); hex_dump(pkt, len, addr);
*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = addr;
} }
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
xsk_ring_cons__release(&xsk->rx, rcvd);
xsk->rx_npkts += rcvd; xsk->rx_npkts += rcvd;
umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd);
} }
static void rx_drop_all(void) static void rx_drop_all(void)
...@@ -830,7 +539,7 @@ static void rx_drop_all(void) ...@@ -830,7 +539,7 @@ static void rx_drop_all(void)
memset(fds, 0, sizeof(fds)); memset(fds, 0, sizeof(fds));
for (i = 0; i < num_socks; i++) { for (i = 0; i < num_socks; i++) {
fds[i].fd = xsks[i]->sfd; fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
fds[i].events = POLLIN; fds[i].events = POLLIN;
timeout = 1000; /* 1sn */ timeout = 1000; /* 1sn */
} }
...@@ -847,14 +556,14 @@ static void rx_drop_all(void) ...@@ -847,14 +556,14 @@ static void rx_drop_all(void)
} }
} }
static void tx_only(struct xdpsock *xsk) static void tx_only(struct xsk_socket_info *xsk)
{ {
int timeout, ret, nfds = 1; int timeout, ret, nfds = 1;
struct pollfd fds[nfds + 1]; struct pollfd fds[nfds + 1];
unsigned int idx = 0; u32 idx, frame_nb = 0;
memset(fds, 0, sizeof(fds)); memset(fds, 0, sizeof(fds));
fds[0].fd = xsk->sfd; fds[0].fd = xsk_socket__fd(xsk->xsk);
fds[0].events = POLLOUT; fds[0].events = POLLOUT;
timeout = 1000; /* 1sn */ timeout = 1000; /* 1sn */
...@@ -864,50 +573,73 @@ static void tx_only(struct xdpsock *xsk) ...@@ -864,50 +573,73 @@ static void tx_only(struct xdpsock *xsk)
if (ret <= 0) if (ret <= 0)
continue; continue;
if (fds[0].fd != xsk->sfd || if (!(fds[0].revents & POLLOUT))
!(fds[0].revents & POLLOUT))
continue; continue;
} }
if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) { if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) ==
lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0); BATCH_SIZE) {
unsigned int i;
for (i = 0; i < BATCH_SIZE; i++) {
xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
= (frame_nb + i) <<
XSK_UMEM__DEFAULT_FRAME_SHIFT;
xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
sizeof(pkt_data) - 1;
}
xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
xsk->outstanding_tx += BATCH_SIZE; xsk->outstanding_tx += BATCH_SIZE;
idx += BATCH_SIZE; frame_nb += BATCH_SIZE;
idx %= NUM_FRAMES; frame_nb %= NUM_FRAMES;
} }
complete_tx_only(xsk); complete_tx_only(xsk);
} }
} }
static void l2fwd(struct xdpsock *xsk) static void l2fwd(struct xsk_socket_info *xsk)
{ {
for (;;) { for (;;) {
struct xdp_desc descs[BATCH_SIZE];
unsigned int rcvd, i; unsigned int rcvd, i;
u32 idx_rx, idx_tx = 0;
int ret; int ret;
for (;;) { for (;;) {
complete_tx_l2fwd(xsk); complete_tx_l2fwd(xsk);
rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE,
&idx_rx);
if (rcvd > 0) if (rcvd > 0)
break; break;
} }
ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
}
for (i = 0; i < rcvd; i++) { for (i = 0; i < rcvd; i++) {
char *pkt = xq_get_data(xsk, descs[i].addr); u64 addr = xsk_ring_cons__rx_desc(&xsk->rx,
idx_rx)->addr;
u32 len = xsk_ring_cons__rx_desc(&xsk->rx,
idx_rx++)->len;
char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
swap_mac_addresses(pkt); swap_mac_addresses(pkt);
hex_dump(pkt, descs[i].len, descs[i].addr); hex_dump(pkt, len, addr);
xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr;
xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
} }
xsk->rx_npkts += rcvd; xsk_ring_prod__submit(&xsk->tx, rcvd);
xsk_ring_cons__release(&xsk->rx, rcvd);
ret = xq_enq(&xsk->tx, descs, rcvd); xsk->rx_npkts += rcvd;
lassert(ret == 0);
xsk->outstanding_tx += rcvd; xsk->outstanding_tx += rcvd;
} }
} }
...@@ -915,17 +647,10 @@ static void l2fwd(struct xdpsock *xsk) ...@@ -915,17 +647,10 @@ static void l2fwd(struct xdpsock *xsk)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = { struct xsk_umem_info *umem;
.prog_type = BPF_PROG_TYPE_XDP,
};
int prog_fd, qidconf_map, xsks_map;
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
struct bpf_object *obj;
char xdp_filename[256];
struct bpf_map *map;
int i, ret, key = 0;
pthread_t pt; pthread_t pt;
void *bufs;
int ret;
parse_command_line(argc, argv); parse_command_line(argc, argv);
...@@ -935,67 +660,22 @@ int main(int argc, char **argv) ...@@ -935,67 +660,22 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
prog_load_attr.file = xdp_filename; NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE);
if (ret)
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) exit_with_error(ret);
exit(EXIT_FAILURE);
if (prog_fd < 0) {
fprintf(stderr, "ERROR: no program found: %s\n",
strerror(prog_fd));
exit(EXIT_FAILURE);
}
map = bpf_object__find_map_by_name(obj, "qidconf_map");
qidconf_map = bpf_map__fd(map);
if (qidconf_map < 0) {
fprintf(stderr, "ERROR: no qidconf map found: %s\n",
strerror(qidconf_map));
exit(EXIT_FAILURE);
}
map = bpf_object__find_map_by_name(obj, "xsks_map");
xsks_map = bpf_map__fd(map);
if (xsks_map < 0) {
fprintf(stderr, "ERROR: no xsks map found: %s\n",
strerror(xsks_map));
exit(EXIT_FAILURE);
}
if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
fprintf(stderr, "ERROR: link set xdp fd failed\n");
exit(EXIT_FAILURE);
}
ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (ret) {
printf("can't get prog info - %s\n", strerror(errno));
return 1;
}
prog_id = info.id;
ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
exit(EXIT_FAILURE);
}
/* Create sockets... */ /* Create sockets... */
xsks[num_socks++] = xsk_configure(NULL); umem = xsk_configure_umem(bufs,
NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE);
xsks[num_socks++] = xsk_configure_socket(umem);
#if RR_LB if (opt_bench == BENCH_TXONLY) {
for (i = 0; i < MAX_SOCKS - 1; i++) int i;
xsks[num_socks++] = xsk_configure(xsks[0]->umem);
#endif
/* ...and insert them into the map. */ for (i = 0; i < NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE;
for (i = 0; i < num_socks; i++) { i += XSK_UMEM__DEFAULT_FRAME_SIZE)
key = i; (void)gen_eth_frame(umem, i);
ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
exit(EXIT_FAILURE);
}
} }
signal(SIGINT, int_exit); signal(SIGINT, int_exit);
...@@ -1005,7 +685,8 @@ int main(int argc, char **argv) ...@@ -1005,7 +685,8 @@ int main(int argc, char **argv)
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
ret = pthread_create(&pt, NULL, poller, NULL); ret = pthread_create(&pt, NULL, poller, NULL);
lassert(ret == 0); if (ret)
exit_with_error(ret);
prev_time = get_nsecs(); prev_time = get_nsecs();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment