Commit e0724d73 authored by Jan Rüth's avatar Jan Rüth

This adds XDP support to BCC as currently supported in net-next.

Concretely, it adds two functions to bcc, namely:
`attach_xdp` and `remove_xdp`
which allows to attach an XDP program to a device (given via its name, e.g., en0) (in the future this might change to a specific queue on a device once the kernel offers this interface)
and `remove_xdp` removes a XDP program from a device. Please note that there can currently be only one program attached to the device and attaching another program replaces the previous.

One example is available to test XDP, in networking/xdp which drops all packets an counts for which protocol a packet was dropped (this is taken from the kernel xdp1 example). Please note that you cannot use the network headers defined in <bcc/proto.h> as they cause llvm/clang to generate instructions not available on XDP layer. On XDP layer you do not have an skb yet, so you are operating on the bare packet data.

XDP support is currently limited to only some network adapters, there is the `mlx4` and there is also a patch available for the `e1000` driver.
parent 60b082e0
...@@ -84,6 +84,7 @@ enum bpf_map_type { ...@@ -84,6 +84,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
}; };
enum bpf_prog_type { enum bpf_prog_type {
...@@ -92,6 +93,8 @@ enum bpf_prog_type { ...@@ -92,6 +93,8 @@ enum bpf_prog_type {
BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_KPROBE,
BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP,
}; };
#define BPF_PSEUDO_MAP_FD 1 #define BPF_PSEUDO_MAP_FD 1
...@@ -312,6 +315,66 @@ enum bpf_func_id { ...@@ -312,6 +315,66 @@ enum bpf_func_id {
*/ */
BPF_FUNC_skb_get_tunnel_opt, BPF_FUNC_skb_get_tunnel_opt,
BPF_FUNC_skb_set_tunnel_opt, BPF_FUNC_skb_set_tunnel_opt,
/**
* bpf_skb_change_proto(skb, proto, flags)
* Change protocol of the skb. Currently supported is
* v4 -> v6, v6 -> v4 transitions. The helper will also
* resize the skb. eBPF program is expected to fill the
* new headers via skb_store_bytes and lX_csum_replace.
* @skb: pointer to skb
* @proto: new skb->protocol type
* @flags: reserved
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_proto,
/**
* bpf_skb_change_type(skb, type)
* Change packet type of skb.
* @skb: pointer to skb
* @type: new skb->pkt_type type
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_type,
/**
* bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 skb failed the cgroup2 descendant test
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
BPF_FUNC_skb_in_cgroup,
/**
* bpf_get_hash_recalc(skb)
* Retrieve and possibly recalculate skb->hash.
* @skb: pointer to skb
* Return: hash
*/
BPF_FUNC_get_hash_recalc,
/**
* u64 bpf_get_current_task(void)
* Returns current task_struct
* Return: current
*/
BPF_FUNC_get_current_task,
/**
* bpf_probe_write_user(void *dst, void *src, int len)
* safely attempt to write to a location
* @dst: destination address in userspace
* @src: source address on stack
* @len: number of bytes to copy
* Return: 0 on success or negative error
*/
BPF_FUNC_probe_write_user,
__BPF_FUNC_MAX_ID, __BPF_FUNC_MAX_ID,
}; };
...@@ -346,6 +409,12 @@ enum bpf_func_id { ...@@ -346,6 +409,12 @@ enum bpf_func_id {
#define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2) #define BPF_F_DONT_FRAGMENT (1ULL << 2)
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* user accessible mirror of in-kernel sk_buff. /* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure * new fields can only be added to the end of this structure
*/ */
...@@ -365,6 +434,8 @@ struct __sk_buff { ...@@ -365,6 +434,8 @@ struct __sk_buff {
__u32 cb[5]; __u32 cb[5];
__u32 hash; __u32 hash;
__u32 tc_classid; __u32 tc_classid;
__u32 data;
__u32 data_end;
}; };
struct bpf_tunnel_key { struct bpf_tunnel_key {
...@@ -379,4 +450,24 @@ struct bpf_tunnel_key { ...@@ -379,4 +450,24 @@ struct bpf_tunnel_key {
__u32 tunnel_label; __u32 tunnel_label;
}; };
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
* in packet drop.
*/
enum xdp_action {
XDP_ABORTED = 0,
XDP_DROP,
XDP_PASS,
XDP_TX,
};
/* user accessible metadata for XDP packet hook
* new fields must be added to the end of this structure
*/
struct xdp_md {
__u32 data;
__u32 data_end;
};
#endif /* _UAPI__LINUX_BPF_H__ */ #endif /* _UAPI__LINUX_BPF_H__ */
...@@ -85,6 +85,7 @@ enum bpf_map_type { ...@@ -85,6 +85,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
}; };
enum bpf_prog_type { enum bpf_prog_type {
...@@ -93,6 +94,8 @@ enum bpf_prog_type { ...@@ -93,6 +94,8 @@ enum bpf_prog_type {
BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_KPROBE,
BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP,
}; };
#define BPF_PSEUDO_MAP_FD 1 #define BPF_PSEUDO_MAP_FD 1
...@@ -313,6 +316,66 @@ enum bpf_func_id { ...@@ -313,6 +316,66 @@ enum bpf_func_id {
*/ */
BPF_FUNC_skb_get_tunnel_opt, BPF_FUNC_skb_get_tunnel_opt,
BPF_FUNC_skb_set_tunnel_opt, BPF_FUNC_skb_set_tunnel_opt,
/**
* bpf_skb_change_proto(skb, proto, flags)
* Change protocol of the skb. Currently supported is
* v4 -> v6, v6 -> v4 transitions. The helper will also
* resize the skb. eBPF program is expected to fill the
* new headers via skb_store_bytes and lX_csum_replace.
* @skb: pointer to skb
* @proto: new skb->protocol type
* @flags: reserved
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_proto,
/**
* bpf_skb_change_type(skb, type)
* Change packet type of skb.
* @skb: pointer to skb
* @type: new skb->pkt_type type
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_type,
/**
* bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 skb failed the cgroup2 descendant test
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
BPF_FUNC_skb_in_cgroup,
/**
* bpf_get_hash_recalc(skb)
* Retrieve and possibly recalculate skb->hash.
* @skb: pointer to skb
* Return: hash
*/
BPF_FUNC_get_hash_recalc,
/**
* u64 bpf_get_current_task(void)
* Returns current task_struct
* Return: current
*/
BPF_FUNC_get_current_task,
/**
* bpf_probe_write_user(void *dst, void *src, int len)
* safely attempt to write to a location
* @dst: destination address in userspace
* @src: source address on stack
* @len: number of bytes to copy
* Return: 0 on success or negative error
*/
BPF_FUNC_probe_write_user,
__BPF_FUNC_MAX_ID, __BPF_FUNC_MAX_ID,
}; };
...@@ -347,6 +410,12 @@ enum bpf_func_id { ...@@ -347,6 +410,12 @@ enum bpf_func_id {
#define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2) #define BPF_F_DONT_FRAGMENT (1ULL << 2)
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* user accessible mirror of in-kernel sk_buff. /* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure * new fields can only be added to the end of this structure
*/ */
...@@ -366,6 +435,8 @@ struct __sk_buff { ...@@ -366,6 +435,8 @@ struct __sk_buff {
__u32 cb[5]; __u32 cb[5];
__u32 hash; __u32 hash;
__u32 tc_classid; __u32 tc_classid;
__u32 data;
__u32 data_end;
}; };
struct bpf_tunnel_key { struct bpf_tunnel_key {
...@@ -380,5 +451,25 @@ struct bpf_tunnel_key { ...@@ -380,5 +451,25 @@ struct bpf_tunnel_key {
__u32 tunnel_label; __u32 tunnel_label;
}; };
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
* in packet drop.
*/
enum xdp_action {
XDP_ABORTED = 0,
XDP_DROP,
XDP_PASS,
XDP_TX,
};
/* user accessible metadata for XDP packet hook
* new fields must be added to the end of this structure
*/
struct xdp_md {
__u32 data;
__u32 data_end;
};
#endif /* _UAPI__LINUX_BPF_H__ */ #endif /* _UAPI__LINUX_BPF_H__ */
)********" )********"
...@@ -408,3 +408,104 @@ error: ...@@ -408,3 +408,104 @@ error:
return NULL; return NULL;
} }
int bpf_attach_xdp(const char *dev_name, int progfd) {
struct sockaddr_nl sa;
int sock, seq = 0, len, ret = -1;
char buf[4096];
struct nlattr *nla, *nla_xdp;
struct {
struct nlmsghdr nh;
struct ifinfomsg ifinfo;
char attrbuf[64];
} req;
struct nlmsghdr *nh;
struct nlmsgerr *err;
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
fprintf(stderr, "bpf: opening a netlink socket: %s\n", strerror(errno));
return -1;
}
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
fprintf(stderr, "bpf: bind to netlink: %s\n", strerror(errno));
goto cleanup;
}
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
req.nh.nlmsg_type = RTM_SETLINK;
req.nh.nlmsg_pid = 0;
req.nh.nlmsg_seq = ++seq;
req.ifinfo.ifi_family = AF_UNSPEC;
req.ifinfo.ifi_index = if_nametoindex(dev_name);
if (req.ifinfo.ifi_index == 0) {
fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
goto cleanup;
}
nla = (struct nlattr *)(((char *)&req)
+ NLMSG_ALIGN(req.nh.nlmsg_len));
nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
// we specify the FD passed over by the user
nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
memcpy((char *)nla_xdp + NLA_HDRLEN, &progfd, sizeof(progfd));
nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
fprintf(stderr, "bpf: send to netlink: %s\n", strerror(errno));
goto cleanup;
}
len = recv(sock, buf, sizeof(buf), 0);
if (len < 0) {
fprintf(stderr, "bpf: recv from netlink: %s\n", strerror(errno));
goto cleanup;
}
for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != getpid()) {
fprintf(stderr, "bpf: Wrong pid %d, expected %d\n",
nh->nlmsg_pid, getpid());
errno = EBADMSG;
goto cleanup;
}
if (nh->nlmsg_seq != seq) {
fprintf(stderr, "bpf: Wrong seq %d, expected %d\n",
nh->nlmsg_seq, seq);
errno = EBADMSG;
goto cleanup;
}
switch (nh->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(nh);
if (!err->error)
continue;
fprintf(stderr, "bpf: nlmsg error %s\n", strerror(-err->error));
errno = -err->error;
goto cleanup;
case NLMSG_DONE:
break;
}
}
ret = 0;
cleanup:
close(sock);
return ret;
}
...@@ -61,6 +61,9 @@ int bpf_detach_tracepoint(const char *tp_category, const char *tp_name); ...@@ -61,6 +61,9 @@ int bpf_detach_tracepoint(const char *tp_category, const char *tp_name);
void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid, int cpu); void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid, int cpu);
/* attached a prog expressed by progfd to the device specified in dev_name */
int bpf_attach_xdp(const char *dev_name, int progfd);
#define LOG_BUF_SIZE 65536 #define LOG_BUF_SIZE 65536
extern char bpf_log_buf[LOG_BUF_SIZE]; extern char bpf_log_buf[LOG_BUF_SIZE];
......
...@@ -21,6 +21,7 @@ import multiprocessing ...@@ -21,6 +21,7 @@ import multiprocessing
import os import os
import re import re
import struct import struct
import errno
import sys import sys
basestring = (unicode if sys.version_info[0] < 3 else str) basestring = (unicode if sys.version_info[0] < 3 else str)
...@@ -67,6 +68,7 @@ class BPF(object): ...@@ -67,6 +68,7 @@ class BPF(object):
SCHED_CLS = 3 SCHED_CLS = 3
SCHED_ACT = 4 SCHED_ACT = 4
TRACEPOINT = 5 TRACEPOINT = 5
XDP = 6
_probe_repl = re.compile("[^a-zA-Z0-9_]") _probe_repl = re.compile("[^a-zA-Z0-9_]")
_sym_caches = {} _sym_caches = {}
...@@ -439,6 +441,39 @@ class BPF(object): ...@@ -439,6 +441,39 @@ class BPF(object):
raise Exception("Failed to detach BPF from kprobe") raise Exception("Failed to detach BPF from kprobe")
self._del_kprobe(ev_name) self._del_kprobe(ev_name)
@staticmethod
def attach_xdp(dev, fn):
'''
This function attaches a BPF function to a device on the device
driver level (XDP)
'''
if not isinstance(fn, BPF.Function):
raise Exception("arg 1 must be of type BPF.Function")
res = lib.bpf_attach_xdp(dev.encode("ascii"), fn.fd)
if res < 0:
err_no = ct.get_errno()
if err_no == errno.EBADMSG:
raise Exception("Internal error while attaching BFP to device,"+
" try increasing the debug level!")
else:
errstr = os.strerror(err_no)
raise Exception("Failed to attach BPF to device %s: %s"
% (dev, errstr))
@staticmethod
def remove_xdp(dev):
'''
This function removes any BPF function from a device on the
device driver level (XDP)
'''
res = lib.bpf_attach_xdp(dev.encode("ascii"), -1)
if res < 0:
errstr = os.strerror(ct.get_errno())
raise Exception("Failed to detach BPF from device %s: %s"
% (dev, errstr))
@classmethod @classmethod
def _check_path_symbol(cls, module, symname, addr): def _check_path_symbol(cls, module, symname, addr):
sym = bcc_symbol() sym = bcc_symbol()
......
...@@ -108,6 +108,9 @@ lib.perf_reader_free.argtypes = [ct.c_void_p] ...@@ -108,6 +108,9 @@ lib.perf_reader_free.argtypes = [ct.c_void_p]
lib.perf_reader_fd.restype = int lib.perf_reader_fd.restype = int
lib.perf_reader_fd.argtypes = [ct.c_void_p] lib.perf_reader_fd.argtypes = [ct.c_void_p]
lib.bpf_attach_xdp.restype = ct.c_int;
lib.bpf_attach_xdp.argtypes = [ct.c_char_p, ct.c_int]
# bcc symbol helpers # bcc symbol helpers
class bcc_symbol(ct.Structure): class bcc_symbol(ct.Structure):
_fields_ = [ _fields_ = [
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment