Commit cc27edfd authored by Sasha Goldshtein's avatar Sasha Goldshtein

Fixed bug with labels, added support for tuples in hash

parent 7983d6b6
......@@ -49,7 +49,7 @@ include in the BPF program, e.g. 'linux/blkdev.h' or 'linux/time.h'.
.SH SPECIFIER SYNTAX
The general specifier syntax is as follows:
.B {p,r}:[library]:function(signature)[:type:expr[:filter]][#label]
.B {p,r}:[library]:function(signature)[:type[,type...]:expr[,expr...][:filter]][#label]
.TP
.B {p,r}
Probe type \- "p" for function entry, "r" for function return;
......@@ -74,13 +74,13 @@ on the other hand, is only required if you plan to collect parameter values
based on that signature. For example, if you only want to collect the first
parameter, you don't have to specify the rest of the parameters in the signature.
.TP
.B [type]
The type of the expression to capture.
.B [type[,type...]]
The type(s) of the expression(s) to capture.
This is the type of the keys in the histogram or raw event collection that are
collected by the probes.
.TP
.B [expr]
The expression to capture.
.B [expr[,expr...]]
The expression(s) to capture.
These are the values that are assigned to the histogram or raw event collection.
You may use the parameters directly, or valid C expressions that involve the
parameters, such as "size % 10".
......@@ -143,6 +143,10 @@ Print histograms of sleep() and nanosleep() parameter values:
Spy on writes to STDOUT performed by process 2780, up to a string size of 120 characters:
#
.B argdist.py -p 2780 -z 120 -C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1'
.TP
Group files being read from and the read sizes from __vfs_read:
#
.B argdist.py -I 'linux/fs.h' -C 'p::__vfs_read(struct file *file, void *buf, size_t count):char*,size_t:file->f_path.dentry->d_iname,count:file->f_path.dentry->d_iname[0]!=0'
.SH SOURCE
This is from bcc.
.IP
......
......@@ -25,8 +25,8 @@ int PROBENAME(struct pt_regs *ctx SIGNATURE)
{
PREFIX
PID_FILTER
KEY_EXPR
if (!(FILTER)) return 0;
KEY_EXPR
COLLECT
return 0;
}
......@@ -92,11 +92,13 @@ u64 __time = bpf_ktime_get_ns();
# when entering the function.
self.args_to_probe = set()
regex = r"\$entry\((\w+)\)"
for arg in re.finditer(regex, self.expr):
self.args_to_probe.add(arg.group(1))
for expr in self.exprs:
for arg in re.finditer(regex, expr):
self.args_to_probe.add(arg.group(1))
for arg in re.finditer(regex, self.filter):
self.args_to_probe.add(arg.group(1))
if "$latency" in self.expr or "$latency" in self.filter:
if any(map(lambda expr: "$latency" in expr, self.exprs)) or \
"$latency" in self.filter:
self.args_to_probe.add("__latency")
self.param_types["__latency"] = "u64" # nanoseconds
for pname in self.args_to_probe:
......@@ -139,7 +141,9 @@ u64 __time = bpf_ktime_get_ns();
else:
entry_expr = "$entry(%s)" % pname
val_expr = "(*%s)" % vname
self.expr = self.expr.replace(entry_expr, val_expr)
for i in range(0, len(self.exprs)):
self.exprs[i] = self.exprs[i].replace(
entry_expr, val_expr)
self.filter = self.filter.replace(entry_expr,
val_expr)
......@@ -171,7 +175,17 @@ u64 __time = bpf_ktime_get_ns();
"but got '%s'" % parts[0])
if re.match(r"\w+\(.*\)", parts[2]) is None:
self._bail(("function signature '%s' has an invalid " +
"format") % parts[2])
"format") % parts[2])
def _parse_expr_types(self, expr_types):
if len(expr_types) == 0:
self._bail("no expr types specified")
self.expr_types = expr_types.split(',')
def _parse_exprs(self, exprs):
if len(exprs) == 0:
self._bail("no exprs specified")
self.exprs = exprs.split(',')
def __init__(self, type, specifier, pid):
self.raw_spec = specifier
......@@ -195,26 +209,31 @@ u64 __time = bpf_ktime_get_ns();
# the retval in a ret probe, or simply the value "1" otherwise.
self.is_default_expr = len(parts) < 5
if not self.is_default_expr:
self.expr_type = parts[3]
self.expr = parts[4]
self._parse_expr_types(parts[3])
self._parse_exprs(parts[4])
if len(self.exprs) != len(self.expr_types):
self._bail("mismatched # of exprs and types")
if self.type == "hist" and len(self.expr_types) > 1:
self._bail("histograms can only have 1 expr")
else:
if not self.is_ret_probe and self.type == "hist":
raise ValueError("dist probes must have expr")
self.expr_type = \
"u64" if not self.is_ret_probe else "int"
self.expr = "1" if not self.is_ret_probe else "$retval"
self._bail("histograms must have expr")
self.expr_types = \
["u64" if not self.is_ret_probe else "int"]
self.exprs = \
["1" if not self.is_ret_probe else "$retval"]
self.filter = "" if len(parts) != 6 else parts[5]
self._substitute_exprs()
# Do we need to attach an entry probe so that we can collect an
# argument that is required for an exit (return) probe?
def check(expr):
keywords = ["$entry", "$latency"]
return any(map(lambda kw: kw in expr, keywords))
self.entry_probe_required = self.is_ret_probe and \
("$entry" in self.expr or "$entry" in self.filter or
"$latency" in self.expr or "$latency" in self.filter)
(any(map(check, self.exprs)) or check(self.filter))
self.pid = pid
# Generating unique names for probes means we can attach
# many times to the same function.
self.probe_func_name = "%s_probe%d" % \
(self.function, Specifier.next_probe_index)
self.probe_hash_name = "%s_hash%d" % \
......@@ -222,17 +241,71 @@ u64 __time = bpf_ktime_get_ns();
Specifier.next_probe_index += 1
def _substitute_exprs(self):
self.expr = self.expr.replace("$retval",
"(%s)ctx->ax" % self.expr_type)
self.filter = self.filter.replace("$retval",
"(%s)ctx->ax" % self.expr_type)
self.expr = self._substitute_aliases(self.expr)
self.filter = self._substitute_aliases(self.filter)
def repl(expr):
expr = self._substitute_aliases(expr)
return expr.replace("$retval", "ctx->ax")
for i in range(0, len(self.exprs)):
self.exprs[i] = repl(self.exprs[i])
self.filter = repl(self.filter)
def _is_string(self, expr_type):
return expr_type == "char*" or expr_type == "char *"
def _generate_hash_field(self, i):
if self._is_string(self.expr_types[i]):
return "struct __string_t v%d;\n" % i
else:
return "%s v%d;\n" % (self.expr_types[i], i)
def _is_string_probe(self):
return self.expr_type == "char*" or self.expr_type == "char *"
def _generate_field_assignment(self, i):
if self._is_string(self.expr_types[i]):
return "bpf_probe_read(" + \
"&__key.v%d.s, sizeof(__key.v%d.s), %s);\n" % \
(i, i, self.exprs[i])
else:
return "__key.v%d = %s;\n" % (i, self.exprs[i])
def _generate_hash_decl(self):
if self.type == "hist":
return "BPF_HISTOGRAM(%s, %s);" % \
(self.probe_hash_name, self.expr_types[0])
else:
text = "struct %s_key_t {\n" % self.probe_hash_name
for i in range(0, len(self.expr_types)):
text += self._generate_hash_field(i)
text += "};\n"
text += "BPF_HASH(%s, struct %s_key_t, u64);\n" % \
(self.probe_hash_name, self.probe_hash_name)
return text
def _generate_key_assignment(self):
if self.type == "hist":
return "%s __key = %s;\n" % \
(self.expr_types[0], self.exprs[0])
else:
text = "struct %s_key_t __key = {};\n" % \
self.probe_hash_name
for i in range(0, len(self.exprs)):
text += self._generate_field_assignment(i)
return text
def _generate_hash_update(self):
if self.type == "hist":
return "%s.increment(bpf_log2l(__key));" % \
self.probe_hash_name
else:
return "%s.increment(__key);" % self.probe_hash_name
def generate_text(self, string_size):
def _generate_pid_filter(self):
# Kernel probes need to explicitly filter pid, because the
# attach interface doesn't support pid filtering
if self.pid is not None and not self.is_user:
return "u32 pid = bpf_get_current_pid_tgid();\n" + \
"if (pid != %d) { return 0; }" % self.pid
else:
return ""
def generate_text(self):
# We don't like tools writing tools (Brendan Gregg), but this
# is an exception because we're letting the user fully
# customize the values we probe. As a rule of thumb though,
......@@ -246,6 +319,8 @@ u64 __time = bpf_ktime_get_ns();
if self.entry_probe_required:
program = self._generate_entry_probe()
prefix = self._generate_retprobe_prefix()
# Replace $entry(paramname) with a reference to the
# value we collected when entering the function:
self._replace_entry_exprs()
program += self.probe_text.replace("PROBENAME",
......@@ -254,39 +329,12 @@ u64 __time = bpf_ktime_get_ns();
or self.is_ret_probe \
else ", " + self.signature
program = program.replace("SIGNATURE", signature)
if self.pid is not None and not self.is_user:
# Kernel probes need to explicitly filter pid
program = program.replace("PID_FILTER",
"u32 pid = bpf_get_current_pid_tgid();\n" + \
"if (pid != %d) { return 0; }" % self.pid)
else:
program = program.replace("PID_FILTER", "")
if self._is_string_probe():
decl = """
struct %s_key_t { char key[%d]; };
BPF_HASH(%s, struct %s_key_t, u64);
""" \
% (self.function, string_size,
self.probe_hash_name, self.function)
collect = "%s.increment(__key);" % self.probe_hash_name
key_expr = """
struct %s_key_t __key = {0};
bpf_probe_read(&__key.key, sizeof(__key.key), %s);
""" \
% (self.function, self.expr)
elif self.type == "freq":
decl = "BPF_HASH(%s, %s, u64);" % \
(self.probe_hash_name, self.expr_type)
collect = "%s.increment(__key);" % self.probe_hash_name
key_expr = "%s __key = %s;" % \
(self.expr_type, self.expr)
elif self.type == "hist":
decl = "BPF_HISTOGRAM(%s, %s);" % \
(self.probe_hash_name, self.expr_type)
collect = "%s.increment(bpf_log2l(__key));" % \
self.probe_hash_name
key_expr = "%s __key = %s;" % \
(self.expr_type, self.expr)
program = program.replace("PID_FILTER",
self._generate_pid_filter())
decl = self._generate_hash_decl()
key_expr = self._generate_key_assignment()
collect = self._generate_hash_update()
program = program.replace("DATA_DECL", decl)
program = program.replace("KEY_EXPR", key_expr)
program = program.replace("FILTER",
......@@ -318,6 +366,40 @@ bpf_probe_read(&__key.key, sizeof(__key.key), %s);
if self.entry_probe_required:
self._attach_entry_probe()
def _v2s(self, v):
# Most fields can be converted with plain str(), but strings
# are wrapped in a __string_t which has an .s field
if "__string_t" in type(v).__name__:
return str(v.s)
return str(v)
def _display_expr(self, i):
# Replace ugly latency calculation with $latency
expr = self.exprs[i].replace(
"(bpf_ktime_get_ns() - *____latency_val)", "$latency")
# Replace alias values back with the alias name
for alias, subst in Specifier.aliases.items():
expr = expr.replace(subst, alias)
# Replace retval expression with $retval
expr = expr.replace("ctx->ax", "$retval")
# Replace ugly (*__param_val) expressions with param name
return re.sub(r"\(\*__(\w+)_val\)", r"\1", expr)
def _display_key(self, key):
if self.is_default_expr:
if not self.is_ret_probe:
return "total calls"
else:
return "retval = %s" % str(key.v0)
else:
# The key object has v0, ..., vk fields containing
# the values of the expressions from self.exprs
def str_i(i):
key_i = self._v2s(getattr(key, "v%d" % i))
return "%s = %s" % \
(self._display_expr(i), key_i)
return ", ".join(map(str_i, range(0, len(self.exprs))))
def display(self, top):
data = self.bpf.get_table(self.probe_hash_name)
if self.type == "freq":
......@@ -327,8 +409,6 @@ bpf_probe_read(&__key.key, sizeof(__key.key), %s);
if top is not None:
data = data[-top:]
for key, value in data:
key_val = key.key if self._is_string_probe() \
else str(key.value)
# Print some nice values if the user didn't
# specify an expression to probe
if self.is_default_expr:
......@@ -336,21 +416,19 @@ bpf_probe_read(&__key.key, sizeof(__key.key), %s);
key_str = "total calls"
else:
key_str = "retval = %s" % \
key_val
self._v2s(key.v0)
else:
key_str = "%s = %s" % \
(self.expr, key_val)
key_str = self._display_key(key)
print("\t%-10s %s" % \
(str(value.value), key_str))
elif self.type == "hist":
label = self.label or \
(self.expr if not self.is_default_expr \
else "retval")
label = self.label or (self._display_expr(0)
if not self.is_default_expr else "retval")
data.print_log2_hist(val_type=label)
examples = """
Probe specifier syntax:
{p,r}:[library]:function(signature)[:type:expr[:filter]][#label]
{p,r}:[library]:function(signature)[:type[,type...]:expr[,expr...][:filter]][#label]
Where:
p,r -- probe at function entry or at function exit
in exit probes: can use $retval, $entry(param), $latency
......@@ -358,8 +436,8 @@ Where:
(leave empty for kernel functions)
function -- the function name to trace
signature -- the function's parameters, as in the C header
type -- the type of the expression to collect
expr -- the expression to collect
type -- the type of the expression to collect (supports multiple)
expr -- the expression to collect (supports multiple)
filter -- the filter that is applied to collected values
label -- the label for this probe in the resulting output
......@@ -372,7 +450,7 @@ argdist.py -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
Print a frequency count of how many times process 1005 called malloc
with an allocation size of 16 bytes
argdist.py -C 'r:c:gets():char*:$retval#snooped strings'
argdist.py -C 'r:c:gets():char*:(char*)$retval#snooped strings'
Snoop on all strings returned by gets()
argdist.py -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte'
......@@ -388,7 +466,7 @@ argdist.py -p 1005 -C 'p:c:write(int fd):int:fd' -T 5
the top 5 busiest fds
argdist.py -p 1005 -H 'r:c:read()'
Print a histogram of error codes returned by read() in process 1005
Print a histogram of results (sizes) returned by read() in process 1005
argdist.py -C 'r::__vfs_read():u32:$PID:$latency > 100000'
Print frequency of reads by process where the latency was >0.1ms
......@@ -451,11 +529,15 @@ if len(specifiers) == 0:
print("at least one specifier is required")
exit(1)
bpf_source = "#include <uapi/linux/ptrace.h>\n"
bpf_source = """
struct __string_t { char s[%d]; };
#include <uapi/linux/ptrace.h>
""" % args.string_size
for include in (args.include or []):
bpf_source += "#include <%s>\n" % include
for specifier in specifiers:
bpf_source += specifier.generate_text(args.string_size)
bpf_source += specifier.generate_text()
if args.verbose:
print(bpf_source)
......
......@@ -156,13 +156,13 @@ What about reads? You could trace gets() across the system and print the
strings input by the user (note how "r" is used instead of "p" to attach a
probe to the function's return):
# ./argdist.py -i 10 -n 1 -C 'r:c:gets():char*:$retval:$retval!=0'
# ./argdist.py -i 10 -n 1 -C 'r:c:gets():char*:(char*)$retval:$retval!=0'
[02:12:23]
r:c:gets():char*:$retval:$retval!=0
COUNT EVENT
1 (char*)ctx->ax = hi there
3 (char*)ctx->ax = sasha
8 (char*)ctx->ax = hello
1 (char*)$retval = hi there
3 (char*)$retval = sasha
8 (char*)$retval = hello
Similarly, we could get a histogram of the error codes returned by read():
......@@ -192,18 +192,16 @@ longer than 0.1ms (100,000ns):
[01:08:48]
r::__vfs_read():u32:$PID:$latency > 100000
COUNT EVENT
1 bpf_get_current_pid_tgid() = 10457
21 bpf_get_current_pid_tgid() = 2780
1 $PID = 10457
21 $PID = 2780
[01:08:49]
r::__vfs_read():u32:$PID:$latency > 100000
COUNT EVENT
1 bpf_get_current_pid_tgid() = 10457
21 bpf_get_current_pid_tgid() = 2780
1 $PID = 10457
21 $PID = 2780
^C
As you see, the $PID alias is expanded to the BPF function bpf_get_current_pid_tgid(),
which returns the current process' pid. It looks like process 2780 performed
21 slow reads.
It looks like process 2780 performed 21 slow reads.
Occasionally, entry parameter values are also interesting. For example, you
might be curious how long it takes malloc() to allocate memory -- nanoseconds
......@@ -231,6 +229,39 @@ and take 2-15 nanoseconds per byte. Other allocations are slower, and take
64-127 nanoseconds per byte. And some allocations are slower still, and take
multiple microseconds per byte.
You could also group results by more than one field. For example, __kmalloc
takes an additional flags parameter that describes how to allocate memory:
# ./argdist.py -I 'linux/slab.h' -C 'p::__kmalloc(size_t size, gfp_t flags):gfp_t,size_t:flags,size'
[03:42:29]
p::__kmalloc(size_t size, gfp_t flags):gfp_t,size_t:flags,size
COUNT EVENT
1 flags = 16, size = 152
2 flags = 131280, size = 8
7 flags = 131280, size = 16
[03:42:30]
p::__kmalloc(size_t size, gfp_t flags):gfp_t,size_t:flags,size
COUNT EVENT
1 flags = 16, size = 152
6 flags = 131280, size = 8
19 flags = 131280, size = 16
[03:42:31]
p::__kmalloc(size_t size, gfp_t flags):gfp_t,size_t:flags,size
COUNT EVENT
2 flags = 16, size = 152
10 flags = 131280, size = 8
31 flags = 131280, size = 16
[03:42:32]
p::__kmalloc(size_t size, gfp_t flags):gfp_t,size_t:flags,size
COUNT EVENT
2 flags = 16, size = 152
14 flags = 131280, size = 8
43 flags = 131280, size = 16
^C
The flags value must be expanded by hand, but it's still helpful to eliminate
certain kinds of allocations or visually group them together.
Here's a final example that finds how many write() system calls are performed
by each process on the system:
......@@ -238,15 +269,15 @@ by each process on the system:
[06:47:18]
write by process
COUNT EVENT
3 bpf_get_current_pid_tgid() = 8889
7 bpf_get_current_pid_tgid() = 7615
7 bpf_get_current_pid_tgid() = 2480
3 $PID = 8889
7 $PID = 7615
7 $PID = 2480
[06:47:19]
write by process
COUNT EVENT
9 bpf_get_current_pid_tgid() = 8889
23 bpf_get_current_pid_tgid() = 7615
23 bpf_get_current_pid_tgid() = 2480
9 $PID = 8889
23 $PID = 7615
23 $PID = 2480
USAGE message:
......@@ -280,7 +311,7 @@ optional arguments:
additional header files to include in the BPF program
Probe specifier syntax:
{p,r}:[library]:function(signature)[:type:expr[:filter]][#label]
{p,r}:[library]:function(signature)[:type[,type...]:expr[,expr...][:filter]][#label]
Where:
p,r -- probe at function entry or at function exit
in exit probes: can use $retval, $entry(param), $latency
......@@ -288,8 +319,8 @@ Where:
(leave empty for kernel functions)
function -- the function name to trace
signature -- the function's parameters, as in the C header
type -- the type of the expression to collect
expr -- the expression to collect
type -- the type of the expression to collect (supports multiple)
expr -- the expression to collect (supports multiple)
filter -- the filter that is applied to collected values
label -- the label for this probe in the resulting output
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment