Commit 4d97f7fa authored by 4ast's avatar 4ast

Merge pull request #255 from iovisor/bblanco_dev

Add ability to consume perf events in python
parents b262e26d c61e75b4
...@@ -35,7 +35,7 @@ endif() ...@@ -35,7 +35,7 @@ endif()
# tell the shared library where it is being installed so it can find shared header files # tell the shared library where it is being installed so it can find shared header files
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBCC_INSTALL_PREFIX='\"${CMAKE_INSTALL_PREFIX}\"'") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBCC_INSTALL_PREFIX='\"${CMAKE_INSTALL_PREFIX}\"'")
add_library(bcc SHARED bpf_common.cc bpf_module.cc libbpf.c) add_library(bcc SHARED bpf_common.cc bpf_module.cc libbpf.c perf_reader.c)
set_target_properties(bcc PROPERTIES VERSION ${REVISION_LAST} SOVERSION 0) set_target_properties(bcc PROPERTIES VERSION ${REVISION_LAST} SOVERSION 0)
# BPF is still experimental otherwise it should be available # BPF is still experimental otherwise it should be available
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <unistd.h> #include <unistd.h>
#include "libbpf.h" #include "libbpf.h"
#include "perf_reader.h"
// TODO: remove these defines when linux-libc-dev exports them properly // TODO: remove these defines when linux-libc-dev exports them properly
...@@ -175,8 +176,8 @@ int bpf_attach_socket(int sock, int prog) { ...@@ -175,8 +176,8 @@ int bpf_attach_socket(int sock, int prog) {
return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog)); return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
} }
static int bpf_attach_tracing_event(int progfd, const char *event_path, pid_t pid, int cpu, int group_fd) static int bpf_attach_tracing_event(int progfd, const char *event_path,
{ struct perf_reader *reader, int pid, int cpu, int group_fd) {
int efd = -1, rc = -1, pfd = -1; int efd = -1, rc = -1, pfd = -1;
ssize_t bytes = -1; ssize_t bytes = -1;
char buf[256]; char buf[256];
...@@ -197,7 +198,7 @@ static int bpf_attach_tracing_event(int progfd, const char *event_path, pid_t pi ...@@ -197,7 +198,7 @@ static int bpf_attach_tracing_event(int progfd, const char *event_path, pid_t pi
buf[bytes] = '\0'; buf[bytes] = '\0';
attr.config = strtol(buf, NULL, 0); attr.config = strtol(buf, NULL, 0);
attr.type = PERF_TYPE_TRACEPOINT; attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW; attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
attr.sample_period = 1; attr.sample_period = 1;
attr.wakeup_events = 1; attr.wakeup_events = 1;
pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC); pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
...@@ -205,6 +206,10 @@ static int bpf_attach_tracing_event(int progfd, const char *event_path, pid_t pi ...@@ -205,6 +206,10 @@ static int bpf_attach_tracing_event(int progfd, const char *event_path, pid_t pi
perror("perf_event_open"); perror("perf_event_open");
goto cleanup; goto cleanup;
} }
if (perf_reader_mmap(reader, pfd, attr.sample_type) < 0)
goto cleanup;
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) { if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) {
perror("ioctl(PERF_EVENT_IOC_SET_BPF)"); perror("ioctl(PERF_EVENT_IOC_SET_BPF)");
goto cleanup; goto cleanup;
...@@ -226,11 +231,17 @@ cleanup: ...@@ -226,11 +231,17 @@ cleanup:
return rc; return rc;
} }
int bpf_attach_kprobe(int progfd, const char *event, void * bpf_attach_kprobe(int progfd, const char *event,
const char *event_desc, pid_t pid, const char *event_desc, pid_t pid,
int cpu, int group_fd) { int cpu, int group_fd, perf_reader_cb cb,
void *cb_cookie) {
int rc = -1, kfd = -1; int rc = -1, kfd = -1;
char buf[256]; char buf[256];
struct perf_reader *reader = NULL;
reader = perf_reader_new(-1, 8, cb, cb_cookie);
if (!reader)
goto cleanup;
kfd = open("/sys/kernel/debug/tracing/kprobe_events", O_WRONLY | O_APPEND, 0); kfd = open("/sys/kernel/debug/tracing/kprobe_events", O_WRONLY | O_APPEND, 0);
if (kfd < 0) { if (kfd < 0) {
...@@ -246,13 +257,17 @@ int bpf_attach_kprobe(int progfd, const char *event, ...@@ -246,13 +257,17 @@ int bpf_attach_kprobe(int progfd, const char *event,
} }
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/kprobes/%s", event); snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/kprobes/%s", event);
rc = bpf_attach_tracing_event(progfd, buf, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/); rc = bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd);
cleanup: cleanup:
if (kfd >= 0) if (kfd >= 0)
close(kfd); close(kfd);
if (reader && rc < 0) {
perf_reader_free(reader);
reader = NULL;
}
return rc; return reader;
} }
int bpf_detach_kprobe(const char *event_desc) { int bpf_detach_kprobe(const char *event_desc) {
......
/*
* Copyright (c) 2015 PLUMgrid, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <linux/perf_event.h>
#include <poll.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include "libbpf.h"
#include "perf_reader.h"
struct perf_reader {
perf_reader_cb cb;
void *cb_cookie; // to be returned in the cb
void *buf; // for keeping segmented data
size_t buf_size;
void *base;
int page_size;
int page_cnt;
int fd;
uint64_t sample_type;
};
struct perf_reader * perf_reader_new(int fd, int page_cnt, perf_reader_cb cb, void *cb_cookie) {
struct perf_reader *reader = calloc(1, sizeof(struct perf_reader));
if (!reader)
return NULL;
reader->cb = cb;
reader->cb_cookie = cb_cookie;
reader->fd = fd;
reader->page_size = getpagesize();
reader->page_cnt = page_cnt;
return reader;
}
void perf_reader_free(void *ptr) {
if (ptr) {
struct perf_reader *reader = ptr;
munmap(reader->base, reader->page_size * (reader->page_cnt + 1));
if (reader->fd >= 0)
close(reader->fd);
free(reader->buf);
free(ptr);
}
}
int perf_reader_mmap(struct perf_reader *reader, int fd, uint64_t sample_type) {
int mmap_size = reader->page_size * (reader->page_cnt + 1);
if (!reader->cb)
return 0;
reader->base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE , MAP_SHARED, fd, 0);
if (reader->base == MAP_FAILED) {
perror("mmap");
return -1;
}
reader->fd = fd;
reader->sample_type = sample_type;
return 0;
}
struct perf_sample_trace_common {
uint16_t id;
uint8_t flags;
uint8_t preempt_count;
int pid;
};
struct perf_sample_trace_kprobe {
struct perf_sample_trace_common common;
uint64_t ip;
};
static void sample_parse(struct perf_reader *reader, void *data, int size) {
uint8_t *ptr = data;
struct perf_event_header *header = (void *)data;
struct perf_sample_trace_kprobe *tk = NULL;
uint64_t *callchain = NULL;
uint64_t num_callchain = 0;
ptr += sizeof(*header);
if (ptr > (uint8_t *)data + size) {
fprintf(stderr, "%s: corrupt sample header\n", __FUNCTION__);
return;
}
if (reader->sample_type & PERF_SAMPLE_CALLCHAIN) {
struct {
uint64_t nr;
uint64_t ips[0];
} *cc = (void *)ptr;
ptr += sizeof(cc->nr) + sizeof(*cc->ips) * cc->nr;
// size sanity check
if (ptr > (uint8_t *)data + size) {
fprintf(stderr, "%s: corrupt callchain sample\n", __FUNCTION__);
return;
}
int i;
// don't include magic numbers in the call chain
for (i = 0; i < cc->nr; ++i) {
if (cc->ips[i] == PERF_CONTEXT_USER)
break;
if (cc->ips[i] >= PERF_CONTEXT_MAX)
continue;
if (!callchain)
callchain = &cc->ips[i];
++num_callchain;
}
}
// for kprobes, raw samples just include the common data structure and the
// instruction pointer
if (reader->sample_type & PERF_SAMPLE_RAW) {
struct {
uint32_t size;
char data[0];
} *raw = (void *)ptr;
ptr += sizeof(raw->size) + raw->size;
if (ptr > (uint8_t *)data + size) {
fprintf(stderr, "%s: corrupt raw sample\n", __FUNCTION__);
return;
}
tk = (void *)raw->data;
}
// sanity check
if (ptr != (uint8_t *)data + size) {
fprintf(stderr, "%s: extra data at end of sample\n", __FUNCTION__);
return;
}
// call out to the user with the parsed data
if (reader->cb)
reader->cb(reader->cb_cookie, tk ? tk->common.pid : -1, num_callchain, callchain);
}
static uint64_t read_data_head(struct perf_event_mmap_page *perf_header) {
uint64_t data_head = *((volatile uint64_t *)&perf_header->data_head);
asm volatile("" ::: "memory");
return data_head;
}
static void write_data_tail(struct perf_event_mmap_page *perf_header, uint64_t data_tail) {
asm volatile("" ::: "memory");
perf_header->data_tail = data_tail;
}
static void event_read(struct perf_reader *reader) {
struct perf_event_mmap_page *perf_header = reader->base;
uint64_t buffer_size = (uint64_t)reader->page_size * reader->page_cnt;
uint64_t data_head;
uint8_t *base = (uint8_t *)reader->base + reader->page_size;
uint8_t *sentinel = (uint8_t *)reader->base + buffer_size + reader->page_size;
uint8_t *begin, *end;
// Consume all the events on this ring, calling the cb function for each one.
// The message may fall on the ring boundary, in which case copy the message
// into a malloced buffer.
for (data_head = read_data_head(perf_header); perf_header->data_tail != data_head;
data_head = read_data_head(perf_header)) {
uint64_t data_tail = perf_header->data_tail;
uint8_t *ptr;
begin = base + data_tail % buffer_size;
// event header is u64, won't wrap
struct perf_event_header *e = (void *)begin;
ptr = begin;
end = base + (data_tail + e->size) % buffer_size;
if (end < begin) {
// perf event wraps around the ring, make a contiguous copy
reader->buf = realloc(reader->buf, e->size);
size_t len = sentinel - begin;
memcpy(reader->buf, begin, len);
memcpy(reader->buf + len, base, e->size - len);
ptr = reader->buf;
}
if (e->type == PERF_RECORD_LOST)
fprintf(stderr, "Lost %lu samples\n", *(uint64_t *)(ptr + sizeof(*e)));
else if (e->type == PERF_RECORD_SAMPLE)
sample_parse(reader, ptr, e->size);
else
fprintf(stderr, "%s: unknown sample type %d\n", __FUNCTION__, e->type);
write_data_tail(perf_header, perf_header->data_tail + e->size);
}
}
int perf_reader_poll(int num_readers, struct perf_reader **readers) {
struct pollfd pfds[] = {
{readers[0]->fd, POLLIN},
};
if (poll(pfds, num_readers, -1) > 0) {
int i;
for (i = 0; i < num_readers; ++i) {
if (pfds[i].revents & POLLIN)
event_read(readers[i]);
}
}
return 0;
}
/*
* Copyright (c) 2015 PLUMgrid, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
struct perf_reader;
struct perf_reader * perf_reader_new(int fd, int page_cnt, perf_reader_cb cb, void *cb_cookie);
void perf_reader_free(void *ptr);
int perf_reader_mmap(struct perf_reader *reader, int fd, unsigned long sample_type);
int perf_reader_poll(int num_readers, struct perf_reader **readers);
...@@ -40,7 +40,12 @@ int bpf_attach_socket(int sockfd, int progfd); ...@@ -40,7 +40,12 @@ int bpf_attach_socket(int sockfd, int progfd);
/* create RAW socket and bind to interface 'name' */ /* create RAW socket and bind to interface 'name' */
int bpf_open_raw_sock(const char *name); int bpf_open_raw_sock(const char *name);
int bpf_attach_kprobe(int progfd, const char *event, const char *event_desc, int pid, int cpu, int group_fd); typedef void (*perf_reader_cb)(void *cb_cookie, int pid, uint64_t callchain_num,
void *callchain);
void * bpf_attach_kprobe(int progfd, const char *event, const char *event_desc,
int pid, int cpu, int group_fd, perf_reader_cb cb,
void *cb_cookie);
int bpf_detach_kprobe(const char *event_desc); int bpf_detach_kprobe(const char *event_desc);
#define LOG_BUF_SIZE 65536 #define LOG_BUF_SIZE 65536
......
...@@ -86,11 +86,17 @@ lib.bpf_attach_socket.argtypes = [ct.c_int, ct.c_int] ...@@ -86,11 +86,17 @@ lib.bpf_attach_socket.argtypes = [ct.c_int, ct.c_int]
lib.bpf_prog_load.restype = ct.c_int lib.bpf_prog_load.restype = ct.c_int
lib.bpf_prog_load.argtypes = [ct.c_int, ct.c_void_p, ct.c_size_t, lib.bpf_prog_load.argtypes = [ct.c_int, ct.c_void_p, ct.c_size_t,
ct.c_char_p, ct.c_uint, ct.c_char_p, ct.c_uint] ct.c_char_p, ct.c_uint, ct.c_char_p, ct.c_uint]
lib.bpf_attach_kprobe.restype = ct.c_int lib.bpf_attach_kprobe.restype = ct.c_void_p
lib.bpf_attach_kprobe.argtypes = [ct.c_int, ct.c_char_p, _CB_TYPE = ct.CFUNCTYPE(None, ct.py_object, ct.c_int,
ct.c_char_p, ct.c_int, ct.c_int, ct.c_int] ct.c_ulonglong, ct.POINTER(ct.c_ulonglong))
lib.bpf_attach_kprobe.argtypes = [ct.c_int, ct.c_char_p, ct.c_char_p, ct.c_int,
ct.c_int, ct.c_int, _CB_TYPE, ct.py_object]
lib.bpf_detach_kprobe.restype = ct.c_int lib.bpf_detach_kprobe.restype = ct.c_int
lib.bpf_detach_kprobe.argtypes = [ct.c_char_p] lib.bpf_detach_kprobe.argtypes = [ct.c_char_p]
lib.perf_reader_poll.restype = ct.c_int
lib.perf_reader_poll.argtypes = [ct.c_int, ct.POINTER(ct.c_void_p)]
lib.perf_reader_free.restype = None
lib.perf_reader_free.argtypes = [ct.c_void_p]
open_kprobes = {} open_kprobes = {}
tracefile = None tracefile = None
...@@ -104,9 +110,10 @@ stars_max = 40 ...@@ -104,9 +110,10 @@ stars_max = 40
@atexit.register @atexit.register
def cleanup_kprobes(): def cleanup_kprobes():
for k, v in open_kprobes.items(): for k, v in open_kprobes.items():
os.close(v) lib.perf_reader_free(v)
desc = "-:kprobes/%s" % k desc = "-:kprobes/%s" % k
lib.bpf_detach_kprobe(desc.encode("ascii")) lib.bpf_detach_kprobe(desc.encode("ascii"))
open_kprobes.clear()
if tracefile: if tracefile:
tracefile.close() tracefile.close()
...@@ -344,7 +351,7 @@ class BPF(object): ...@@ -344,7 +351,7 @@ class BPF(object):
raise Exception("Could not find file %s" % filename) raise Exception("Could not find file %s" % filename)
return filename return filename
def __init__(self, src_file="", hdr_file="", text=None, debug=0): def __init__(self, src_file="", hdr_file="", text=None, cb=None, debug=0):
"""Create a a new BPF module with the given source code. """Create a a new BPF module with the given source code.
Note: Note:
...@@ -360,6 +367,8 @@ class BPF(object): ...@@ -360,6 +367,8 @@ class BPF(object):
0x2: print BPF bytecode to stderr 0x2: print BPF bytecode to stderr
""" """
self._reader_cb_impl = _CB_TYPE(BPF._reader_cb)
self._user_cb = cb
self.debug = debug self.debug = debug
self.funcs = {} self.funcs = {}
self.tables = {} self.tables = {}
...@@ -502,6 +511,11 @@ class BPF(object): ...@@ -502,6 +511,11 @@ class BPF(object):
def __iter__(self): def __iter__(self):
return self.tables.__iter__() return self.tables.__iter__()
def _reader_cb(self, pid, callchain_num, callchain):
if self._user_cb:
cc = tuple(callchain[i] for i in range(0, callchain_num))
self._user_cb(pid, cc)
@staticmethod @staticmethod
def attach_raw_socket(fn, dev): def attach_raw_socket(fn, dev):
if not isinstance(fn, BPF.Function): if not isinstance(fn, BPF.Function):
...@@ -528,7 +542,7 @@ class BPF(object): ...@@ -528,7 +542,7 @@ class BPF(object):
(line != "\n" and line not in blacklist)] (line != "\n" and line not in blacklist)]
def attach_kprobe(self, event="", fn_name="", event_re="", def attach_kprobe(self, event="", fn_name="", event_re="",
pid=0, cpu=-1, group_fd=-1): pid=-1, cpu=0, group_fd=-1):
# allow the caller to glob multiple functions together # allow the caller to glob multiple functions together
if event_re: if event_re:
...@@ -544,8 +558,10 @@ class BPF(object): ...@@ -544,8 +558,10 @@ class BPF(object):
ev_name = "p_" + event.replace("+", "_").replace(".", "_") ev_name = "p_" + event.replace("+", "_").replace(".", "_")
desc = "p:kprobes/%s %s" % (ev_name, event) desc = "p:kprobes/%s %s" % (ev_name, event)
res = lib.bpf_attach_kprobe(fn.fd, ev_name.encode("ascii"), res = lib.bpf_attach_kprobe(fn.fd, ev_name.encode("ascii"),
desc.encode("ascii"), pid, cpu, group_fd) desc.encode("ascii"), pid, cpu, group_fd,
if res < 0: self._reader_cb_impl, ct.cast(id(self), ct.py_object))
res = ct.cast(res, ct.c_void_p)
if res == None:
raise Exception("Failed to attach BPF to kprobe") raise Exception("Failed to attach BPF to kprobe")
open_kprobes[ev_name] = res open_kprobes[ev_name] = res
return self return self
...@@ -579,8 +595,10 @@ class BPF(object): ...@@ -579,8 +595,10 @@ class BPF(object):
ev_name = "r_" + event.replace("+", "_").replace(".", "_") ev_name = "r_" + event.replace("+", "_").replace(".", "_")
desc = "r:kprobes/%s %s" % (ev_name, event) desc = "r:kprobes/%s %s" % (ev_name, event)
res = lib.bpf_attach_kprobe(fn.fd, ev_name.encode("ascii"), res = lib.bpf_attach_kprobe(fn.fd, ev_name.encode("ascii"),
desc.encode("ascii"), pid, cpu, group_fd) desc.encode("ascii"), pid, cpu, group_fd,
if res < 0: self._reader_cb_impl, ct.cast(id(self), ct.py_object))
res = ct.cast(res, ct.c_void_p)
if res == None:
raise Exception("Failed to attach BPF to kprobe") raise Exception("Failed to attach BPF to kprobe")
open_kprobes[ev_name] = res open_kprobes[ev_name] = res
return self return self
...@@ -751,3 +769,18 @@ class BPF(object): ...@@ -751,3 +769,18 @@ class BPF(object):
event_re is used while attaching and detaching probes event_re is used while attaching and detaching probes
""" """
return len(open_kprobes) return len(open_kprobes)
def kprobe_poll(self):
"""kprobe_poll(self)
Poll from the ring buffers for all of the open kprobes, calling the
cb() that was given in the BPF constructor for each entry.
"""
readers = (ct.c_void_p * len(open_kprobes))()
for i, v in enumerate(open_kprobes.values()):
readers[i] = v
try:
lib.perf_reader_poll(len(open_kprobes), readers)
except KeyboardInterrupt:
pass
...@@ -42,3 +42,5 @@ add_test(NAME py_test_clang WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ...@@ -42,3 +42,5 @@ add_test(NAME py_test_clang WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_clang sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_clang.py) COMMAND ${TEST_WRAPPER} py_clang sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_clang.py)
add_test(NAME py_test_histogram WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} add_test(NAME py_test_histogram WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_histogram sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_histogram.py) COMMAND ${TEST_WRAPPER} py_histogram sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_histogram.py)
add_test(NAME py_test_callchain WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_callchain sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_callchain.py)
#!/usr/bin/env python
# Copyright (c) PLUMgrid, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
from bcc import BPF
import time
from unittest import main, TestCase
class TestCallchain(TestCase):
def test_callchain1(self):
hist = {}
def cb(pid, callchain):
counter = hist.get(callchain, 0)
counter += 1
hist[callchain] = counter
b = BPF(text="""
#include <linux/ptrace.h>
int kprobe__finish_task_switch(struct pt_regs *ctx) {
return 1;
}
""", cb=cb)
start = time.time()
while time.time() < start + 1:
b.kprobe_poll()
for k, v in hist.items():
syms = [b.ksym(addr) for addr in k]
print("%-08d:" % v, syms)
if __name__ == "__main__":
main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment