Commit bdad3840 authored by Martin KaFai Lau's avatar Martin KaFai Lau

Add perf_submit_skb

For BPF_PROG_TYPE_SCHED_CLS/ACT, the upstream kernel has recently added a
feature to efficiently output skb + meta data:
commit 555c8a8623a3 ("bpf: avoid stack copy and use skb ctx for event output")

This patch adds perf_submit_skb to BPF_PERF_OUTPUT macro.  It takes
an extra u32 argument.  perf_submit_skb will then be expanded to
bpf_perf_event_output properly to consider the newly added
u32 argument as the skb's len.

Other than the above described changes, perf_submit_skb is almost
a carbon copy of the perf_submit except the removal of the 'string name'
variable since I cannot find a specific use of it.

Note that the 3rd param type of bpf_perf_event_output has also been
changed from u32 to u64.

Added a sample program tc_perf_event.py.  Here is how the output
looks like:
[root@arch-fb-vm1 networking]# ./tc_perf_event.py
Try: "ping -6 ff02::1%me"

CPU SRC IP                           DST IP       Magic
0   fe80::982f:5dff:fec1:e52b        ff02::1      0xfaceb00c
0   fe80::982f:5dff:fec1:e52b        ff02::1      0xfaceb00c
0   fe80::982f:5dff:fec1:e52b        ff02::1      0xfaceb00c
1   fe80::982f:5dff:fec1:e52b        ff02::1      0xfaceb00c
1   fe80::982f:5dff:fec1:e52b        ff02::1      0xfaceb00c
1   fe80::982f:5dff:fec1:e52b        ff02::1      0xfaceb00c
parent a47a3b15
set(EXAMPLE_FILES simulation.py) set(EXAMPLE_FILES simulation.py)
set(EXAMPLE_PROGRAMS simple_tc.py) set(EXAMPLE_PROGRAMS simple_tc.py)
set(EXAMPLE_PROGRAMS tc_perf_event.py)
install(FILES ${EXAMPLE_FILES} DESTINATION share/bcc/examples/networking) install(FILES ${EXAMPLE_FILES} DESTINATION share/bcc/examples/networking)
install(PROGRAMS ${EXAMPLE_PROGRAMS} DESTINATION share/bcc/examples/networking) install(PROGRAMS ${EXAMPLE_PROGRAMS} DESTINATION share/bcc/examples/networking)
......
#!/usr/bin/env python
#
# tc_perf_event.py Output skb and meta data through perf event
#
# Copyright (c) 2016-present, Facebook, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
from bcc import BPF
import ctypes as ct
import pyroute2
import socket
bpf_txt = """
#include <uapi/linux/if_ether.h>
#include <uapi/linux/in6.h>
#include <uapi/linux/ipv6.h>
#include <uapi/linux/pkt_cls.h>
#include <uapi/linux/bpf.h>
BPF_PERF_OUTPUT(skb_events);
struct eth_hdr {
unsigned char h_dest[ETH_ALEN];
unsigned char h_source[ETH_ALEN];
unsigned short h_proto;
};
int handle_egress(struct __sk_buff *skb)
{
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
struct eth_hdr *eth = data;
struct ipv6hdr *ip6h = data + sizeof(*eth);
u32 magic = 0xfaceb00c;
/* single length check */
if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
return TC_ACT_OK;
if (eth->h_proto == htons(ETH_P_IPV6) &&
ip6h->nexthdr == IPPROTO_ICMPV6)
skb_events.perf_submit_skb(skb, skb->len, &magic, sizeof(magic));
return TC_ACT_OK;
}"""
def print_skb_event(cpu, data, size):
class SkbEvent(ct.Structure):
_fields_ = [ ("magic", ct.c_uint32),
("raw", ct.c_ubyte * (size - ct.sizeof(ct.c_uint32))) ]
skb_event = ct.cast(data, ct.POINTER(SkbEvent)).contents
icmp_type = int(skb_event.raw[54])
# Only print for echo request
if icmp_type == 128:
src_ip = bytes(skb_event.raw[22:38])
dst_ip = bytes(skb_event.raw[38:54])
print("%-3s %-32s %-12s 0x%08x" %
(cpu, socket.inet_ntop(socket.AF_INET6, src_ip),
socket.inet_ntop(socket.AF_INET6, dst_ip),
skb_event.magic))
try:
b = BPF(text=bpf_txt)
fn = b.load_func("handle_egress", BPF.SCHED_CLS)
ipr = pyroute2.IPRoute()
ipr.link("add", ifname="me", kind="veth", peer="you")
me = ipr.link_lookup(ifname="me")[0]
you = ipr.link_lookup(ifname="you")[0]
for idx in (me, you):
ipr.link('set', index=idx, state='up')
ipr.tc("add", "clsact", me)
ipr.tc("add-filter", "bpf", me, ":1", fd=fn.fd, name=fn.name,
parent="ffff:fff3", classid=1, direct_action=True)
b["skb_events"].open_perf_buffer(print_skb_event)
print('Try: "ping -6 ff02::1%me"\n')
print("%-3s %-32s %-12s %-10s" % ("CPU", "SRC IP", "DST IP", "Magic"))
while True:
b.kprobe_poll()
finally:
if "me" in locals(): ipr.link("del", index=me)
...@@ -67,6 +67,7 @@ struct _name##_table_t { \ ...@@ -67,6 +67,7 @@ struct _name##_table_t { \
u32 leaf; \ u32 leaf; \
/* map.perf_submit(ctx, data, data_size) */ \ /* map.perf_submit(ctx, data, data_size) */ \
int (*perf_submit) (void *, void *, u32); \ int (*perf_submit) (void *, void *, u32); \
int (*perf_submit_skb) (void *, u32, void *, u32); \
u32 data[0]; \ u32 data[0]; \
}; \ }; \
__attribute__((section("maps/perf_output"))) \ __attribute__((section("maps/perf_output"))) \
...@@ -171,7 +172,7 @@ static int (*bpf_redirect)(int ifindex, u32 flags) = ...@@ -171,7 +172,7 @@ static int (*bpf_redirect)(int ifindex, u32 flags) =
(void *) BPF_FUNC_redirect; (void *) BPF_FUNC_redirect;
static u32 (*bpf_get_route_realm)(void *ctx) = static u32 (*bpf_get_route_realm)(void *ctx) =
(void *) BPF_FUNC_get_route_realm; (void *) BPF_FUNC_get_route_realm;
static int (*bpf_perf_event_output)(void *ctx, void *map, u32 index, void *data, u32 size) = static int (*bpf_perf_event_output)(void *ctx, void *map, u64 index, void *data, u32 size) =
(void *) BPF_FUNC_perf_event_output; (void *) BPF_FUNC_perf_event_output;
static int (*bpf_skb_load_bytes)(void *ctx, int offset, void *to, u32 len) = static int (*bpf_skb_load_bytes)(void *ctx, int offset, void *to, u32 len) =
(void *) BPF_FUNC_skb_load_bytes; (void *) BPF_FUNC_skb_load_bytes;
......
...@@ -396,6 +396,21 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) { ...@@ -396,6 +396,21 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
Call->getArg(2)->getLocEnd())); Call->getArg(2)->getLocEnd()));
txt = "bpf_perf_event_output(" + arg0 + ", bpf_pseudo_fd(1, " + fd + ")"; txt = "bpf_perf_event_output(" + arg0 + ", bpf_pseudo_fd(1, " + fd + ")";
txt += ", bpf_get_smp_processor_id(), " + args_other + ")"; txt += ", bpf_get_smp_processor_id(), " + args_other + ")";
} else if (memb_name == "perf_submit_skb") {
string skb = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
Call->getArg(0)->getLocEnd()));
string skb_len = rewriter_.getRewrittenText(SourceRange(Call->getArg(1)->getLocStart(),
Call->getArg(1)->getLocEnd()));
string meta = rewriter_.getRewrittenText(SourceRange(Call->getArg(2)->getLocStart(),
Call->getArg(2)->getLocEnd()));
string meta_len = rewriter_.getRewrittenText(SourceRange(Call->getArg(3)->getLocStart(),
Call->getArg(3)->getLocEnd()));
txt = "bpf_perf_event_output(" +
skb + ", " +
"bpf_pseudo_fd(1, " + fd + "), " +
"((__u64)" + skb_len + " << 32) | BPF_F_CURRENT_CPU, " +
meta + ", " +
meta_len + ");";
} else if (memb_name == "get_stackid") { } else if (memb_name == "get_stackid") {
if (table_it->type == BPF_MAP_TYPE_STACK_TRACE) { if (table_it->type == BPF_MAP_TYPE_STACK_TRACE) {
string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(), string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment