Commit b1e3b670 authored by Brenden Blanco's avatar Brenden Blanco

Merge pull request #247 from iovisor/bblanco_dev

Add BPF_HISTOGRAM type and print support
parents 4037b4df 6255f1f0
......@@ -36,6 +36,7 @@ struct _name##_table_t { \
int (*update) (_key_type *, _leaf_type *); \
int (*delete) (_key_type *); \
void (*call) (void *, int index); \
void (*increment) (_key_type); \
_leaf_type data[_max_entries]; \
}; \
__attribute__((section("maps/" _table_type))) \
......@@ -55,6 +56,19 @@ struct _name##_table_t _name
#define BPF_HASH(...) \
BPF_HASHX(__VA_ARGS__, BPF_HASH3, BPF_HASH2, BPF_HASH1)(__VA_ARGS__)
#define BPF_HIST1(_name) \
BPF_TABLE("histogram", int, u64, _name, 64)
#define BPF_HIST2(_name, _key_type) \
BPF_TABLE("histogram", _key_type, u64, _name, 64)
#define BPF_HIST3(_name, _key_type, _size) \
BPF_TABLE("histogram", _key_type, u64, _name, _size)
#define BPF_HISTX(_1, _2, _3, NAME, ...) NAME
// Define a histogram, some arguments optional
// BPF_HISTOGRAM(name, key_type=int, size=64)
#define BPF_HISTOGRAM(...) \
BPF_HISTX(__VA_ARGS__, BPF_HIST3, BPF_HIST2, BPF_HIST1)(__VA_ARGS__)
// packet parsing state machine helpers
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
......
......@@ -67,9 +67,11 @@ bool BMapDeclVisitor::VisitRecordDecl(RecordDecl *D) {
for (auto F : D->getDefinition()->fields()) {
result_ += "[";
if (F->getType()->isPointerType())
result_ += "\"unsigned long long\"";
result_ += "\"" + F->getName().str() + "\", \"unsigned long long\"";
else
TraverseDecl(F);
if (const ConstantArrayType *T = dyn_cast<ConstantArrayType>(F->getType()))
result_ += ", [" + T->getSize().toString(10, false) + "]";
if (F->isBitField())
result_ += ", " + to_string(F->getBitWidthValue(C));
result_ += "], ";
......@@ -158,6 +160,23 @@ bool ProbeVisitor::VisitBinaryOperator(BinaryOperator *E) {
}
return true;
}
bool ProbeVisitor::VisitUnaryOperator(UnaryOperator *E) {
if (E->getOpcode() != UO_Deref)
return true;
if (memb_visited_.find(E) != memb_visited_.end())
return true;
if (!ProbeChecker(E, ptregs_).needs_probe())
return true;
memb_visited_.insert(E);
Expr *sub = E->getSubExpr();
string rhs = rewriter_.getRewrittenText(SourceRange(sub->getLocStart(), sub->getLocEnd()));
string text;
text = "({ typeof(" + E->getType().getAsString() + ") _val; memset(&_val, 0, sizeof(_val));";
text += " bpf_probe_read(&_val, sizeof(_val), (u64)";
text += rhs + "); _val; })";
rewriter_.ReplaceText(SourceRange(E->getLocStart(), E->getLocEnd()), text);
return true;
}
bool ProbeVisitor::VisitMemberExpr(MemberExpr *E) {
if (memb_visited_.find(E) != memb_visited_.end()) return true;
......@@ -194,7 +213,7 @@ bool ProbeVisitor::VisitMemberExpr(MemberExpr *E) {
}
BTypeVisitor::BTypeVisitor(ASTContext &C, Rewriter &rewriter, vector<TableDesc> &tables)
: C(C), rewriter_(rewriter), out_(llvm::errs()), tables_(tables) {
: C(C), diag_(C.getDiagnostics()), rewriter_(rewriter), out_(llvm::errs()), tables_(tables) {
}
bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
......@@ -276,7 +295,7 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
string map_update_policy = "BPF_ANY";
string txt;
if (memb_name == "lookup_or_init") {
string map_update_policy = "BPF_NOEXIST";
map_update_policy = "BPF_NOEXIST";
string name = Ref->getDecl()->getName();
string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
Call->getArg(0)->getLocEnd()));
......@@ -291,6 +310,19 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
txt += " if (!leaf) return 0;";
txt += "}";
txt += "leaf;})";
} else if (memb_name == "increment") {
string name = Ref->getDecl()->getName();
string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
Call->getArg(0)->getLocEnd()));
string lookup = "bpf_map_lookup_elem_(bpf_pseudo_fd(1, " + fd + ")";
string update = "bpf_map_update_elem_(bpf_pseudo_fd(1, " + fd + ")";
txt = "({ typeof(" + name + ".key) _key = " + arg0 + "; ";
if (table_it->type == BPF_MAP_TYPE_HASH) {
txt += "typeof(" + name + ".leaf) _zleaf; memset(&_zleaf, 0, sizeof(_zleaf)); ";
txt += update + ", &_key, &_zleaf, BPF_NOEXIST); ";
}
txt += "typeof(" + name + ".leaf) *_leaf = " + lookup + ", &_key); ";
txt += "if (_leaf) (*_leaf)++; })";
} else {
if (memb_name == "lookup") {
prefix = "bpf_map_lookup_elem";
......@@ -463,11 +495,21 @@ bool BTypeVisitor::VisitVarDecl(VarDecl *Decl) {
++i;
}
bpf_map_type map_type = BPF_MAP_TYPE_UNSPEC;
if (A->getName() == "maps/hash")
if (A->getName() == "maps/hash") {
map_type = BPF_MAP_TYPE_HASH;
else if (A->getName() == "maps/array")
} else if (A->getName() == "maps/array") {
map_type = BPF_MAP_TYPE_ARRAY;
} else if (A->getName() == "maps/histogram") {
if (table.key_desc == "\"int\"")
map_type = BPF_MAP_TYPE_ARRAY;
else if (A->getName() == "maps/prog") {
else
map_type = BPF_MAP_TYPE_HASH;
if (table.leaf_desc != "\"unsigned long long\"") {
unsigned diag_id = diag_.getCustomDiagID(DiagnosticsEngine::Error,
"histogram leaf type must be u64, got %0");
diag_.Report(Decl->getLocStart(), diag_id) << table.leaf_desc;
}
} else if (A->getName() == "maps/prog") {
struct utsname un;
if (uname(&un) == 0) {
int major = 0, minor = 0;
......@@ -485,8 +527,9 @@ bool BTypeVisitor::VisitVarDecl(VarDecl *Decl) {
table.type = map_type;
table.fd = bpf_create_map(map_type, table.key_size, table.leaf_size, table.max_entries);
if (table.fd < 0) {
C.getDiagnostics().Report(Decl->getLocStart(), diag::err_expected)
<< "valid bpf fd";
unsigned diag_id = C.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error,
"could not open bpf map: %0");
C.getDiagnostics().Report(Decl->getLocStart(), diag_id) << strerror(errno);
return false;
}
tables_.push_back(std::move(table));
......
......@@ -71,6 +71,7 @@ class BTypeVisitor : public clang::RecursiveASTVisitor<BTypeVisitor> {
private:
clang::ASTContext &C;
clang::DiagnosticsEngine &diag_;
clang::Rewriter &rewriter_; /// modifications to the source go into this class
llvm::raw_ostream &out_; /// for debugging
std::vector<TableDesc> &tables_; /// store the open FDs
......@@ -85,6 +86,7 @@ class ProbeVisitor : public clang::RecursiveASTVisitor<ProbeVisitor> {
bool VisitVarDecl(clang::VarDecl *Decl);
bool VisitCallExpr(clang::CallExpr *Call);
bool VisitBinaryOperator(clang::BinaryOperator *E);
bool VisitUnaryOperator(clang::UnaryOperator *E);
bool VisitMemberExpr(clang::MemberExpr *E);
void set_ptreg(clang::Decl *D) { ptregs_.insert(D); }
private:
......
......@@ -99,7 +99,7 @@ KALLSYMS = "/proc/kallsyms"
ksym_addrs = []
ksym_names = []
ksym_loaded = 0
stars_max = 38
stars_max = 40
@atexit.register
def cleanup_kprobes():
......@@ -238,38 +238,60 @@ class BPF(object):
text = text[:-1] + "+"
return text
def print_log2_hist(self, val_type="value"):
"""print_log2_hist(type=value)
def print_log2_hist(self, val_type="value", bucket_type="ptr"):
"""print_log2_hist(val_type="value", bucket_type="ptr")
Prints a table as a log2 histogram. The table must be stored as
log2. The type argument is optional, and is a column header.
log2. The val_type argument is optional, and is a column header.
If the histogram has a secondary key, multiple tables will print
and bucket_type can be used as a header description for each.
"""
if isinstance(self.Key(), ct.Structure):
tmp = {}
f1 = self.Key._fields_[0][0]
f2 = self.Key._fields_[1][0]
for k, v in self.items():
bucket = getattr(k, f1)
vals = tmp[bucket] = tmp.get(bucket, [0] * 65)
slot = getattr(k, f2)
vals[slot] = v.value
for bucket, vals in tmp.items():
print("\nBucket %s = %r" % (bucket_type, bucket))
self._print_log2_hist(vals, val_type, 0)
else:
vals = [0] * 65
for k, v in self.items():
vals[k.value] = v.value
self._print_log2_hist(vals, val_type, 0)
def _print_log2_hist(self, vals, val_type, val_max):
global stars_max
log2_dist_max = 64
idx_max = -1
val_max = 0
for i in range(1, log2_dist_max + 1):
try:
val = self[ct.c_int(i)].value
if (val > 0):
idx_max = i
if (val > val_max):
val_max = val
except:
break
for i, v in enumerate(vals):
if v > 0: idx_max = i
if v > val_max: val_max = v
if idx_max <= 32:
header = " %-19s : count distribution"
body = "%10d -> %-10d : %-8d |%-*s|"
stars = stars_max
else:
header = " %-29s : count distribution"
body = "%20d -> %-20d : %-8d |%-*s|"
stars = int(stars_max / 2)
if idx_max > 0:
print(" %-15s : count distribution" % val_type);
print(header % val_type);
for i in range(1, idx_max + 1):
low = (1 << i) >> 1
high = (1 << i) - 1
if (low == high):
low -= 1
try:
val = self[ct.c_int(i)].value
print("%8d -> %-8d : %-8d |%-*s|" % (low, high, val,
stars_max, self._stars(val, val_max, stars_max)))
except:
break
val = vals[i]
print(body % (low, high, val, stars,
self._stars(val, val_max, stars)))
def __iter__(self):
......@@ -407,7 +429,6 @@ class BPF(object):
u"_Bool": ct.c_bool,
u"char": ct.c_char,
u"wchar_t": ct.c_wchar,
u"char": ct.c_byte,
u"unsigned char": ct.c_ubyte,
u"short": ct.c_short,
u"unsigned short": ct.c_ushort,
......@@ -430,7 +451,12 @@ class BPF(object):
if len(t) == 2:
fields.append((t[0], BPF._decode_table_type(t[1])))
elif len(t) == 3:
if isinstance(t[2], list):
fields.append((t[0], BPF._decode_table_type(t[1]) * t[2][0]))
else:
fields.append((t[0], BPF._decode_table_type(t[1]), t[2]))
else:
raise Exception("Failed to decode type %s" % str(t))
cls = type(str(desc[0]), (ct.Structure,), dict(_fields_=fields))
return cls
......
......@@ -38,3 +38,5 @@ add_test(NAME py_test_brb2 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_brb2_c sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_brb2.py test_brb2.c)
add_test(NAME py_test_clang WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_clang sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_clang.py)
add_test(NAME py_test_histogram WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_histogram sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_histogram.py)
#!/usr/bin/env python
# Copyright (c) PLUMgrid, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
from bcc import BPF
from ctypes import c_int, c_ulonglong
import random
import time
from unittest import main, TestCase
class TestHistogram(TestCase):
def test_simple(self):
b = BPF(text="""
#include <uapi/linux/ptrace.h>
#include <linux/bpf.h>
BPF_HISTOGRAM(hist1);
BPF_HASH(stub);
int kprobe__htab_map_delete_elem(struct pt_regs *ctx, struct bpf_map *map, u64 *k) {
hist1.increment(bpf_log2l(*k));
return 0;
}
""")
for i in range(0, 32):
for j in range(0, random.randint(1, 10)):
try: del b["stub"][c_ulonglong(1 << i)]
except: pass
b["hist1"].print_log2_hist()
for i in range(32, 64):
for j in range(0, random.randint(1, 10)):
try: del b["stub"][c_ulonglong(1 << i)]
except: pass
b["hist1"].print_log2_hist()
def test_struct(self):
b = BPF(text="""
#include <uapi/linux/ptrace.h>
#include <linux/bpf.h>
typedef struct { void *map; u64 slot; } Key;
BPF_HISTOGRAM(hist1, Key, 1024);
BPF_HASH(stub1);
BPF_HASH(stub2);
int kprobe__htab_map_delete_elem(struct pt_regs *ctx, struct bpf_map *map, u64 *k) {
hist1.increment((Key){map, bpf_log2l(*k)});
return 0;
}
""")
for i in range(0, 64):
for j in range(0, random.randint(1, 10)):
try: del b["stub1"][c_ulonglong(1 << i)]
except: pass
try: del b["stub2"][c_ulonglong(1 << i)]
except: pass
b["hist1"].print_log2_hist()
def test_chars(self):
b = BPF(text="""
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
typedef struct { char name[TASK_COMM_LEN]; u64 slot; } Key;
BPF_HISTOGRAM(hist1, Key, 1024);
int kprobe__finish_task_switch(struct pt_regs *ctx, struct task_struct *prev) {
Key k = {.slot = bpf_log2l(prev->real_start_time)};
if (!bpf_get_current_comm(&k.name, sizeof(k.name)))
hist1.increment(k);
return 0;
}
""")
for i in range(0, 100): time.sleep(0.01)
b["hist1"].print_log2_hist()
if __name__ == "__main__":
main()
......@@ -45,7 +45,7 @@ bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/blkdev.h>
BPF_TABLE(\"array\", int, u64, dist, 64);
BPF_HISTOGRAM(dist);
BPF_HASH(start, struct request *);
// time block I/O
......@@ -70,9 +70,7 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
FACTOR
// store as histogram
int index = bpf_log2l(delta);
u64 *leaf = dist.lookup(&index);
if (leaf) (*leaf)++;
dist.increment(bpf_log2l(delta));
start.delete(&req);
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment