Commit 7983d6b6 authored by Sasha Goldshtein's avatar Sasha Goldshtein

Improved error messages for invalid probe syntax, added -I switch to add...

Improved error messages for invalid probe syntax, added -I switch to add include files, fixed bug with labels -- now using # to indicate label
parent 0cc65a73
......@@ -2,14 +2,14 @@
.SH NAME
argdist \- Trace a function and display a histogram or frequency count of its parameter values. Uses Linux eBPF/bcc.
.SH SYNOPSIS
.B argdist [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL] [-n COUNT] [-v] [-T TOP] [-H specifier [specifier ...]] [-C specifier [specifier ...]]
.B argdist [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL] [-n COUNT] [-v] [-T TOP] [-H specifier [specifier ...]] [-C specifier [specifier ...]] [-I header [header ...]]
.SH DESCRIPTION
argdist attaches to function entry and exit points, collects specified parameter
values, and stores them in a histogram or a frequency collection that counts
the number of times a parameter value occurred. It can also filter parameter
values and instrument multiple entry points at once.
This currently only works on x86_64. Check for future versions.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF and bcc.
.SH OPTIONS
......@@ -24,10 +24,10 @@ Trace only functions in the process PID.
When collecting string arguments (of type char*), collect up to STRING_SIZE
characters. Longer strings will be truncated.
.TP
-i INTERVAL
\-i INTERVAL
Print the collected data every INTERVAL seconds. The default is 1 second.
.TP
-n NUMBER
\-n NUMBER
Print the collected data COUNT times and then exit.
.TP
\-v
......@@ -36,14 +36,20 @@ Display the generated BPF program, for debugging purposes.
\-T TOP
When collecting frequency counts, display only the top TOP entries.
.TP
-H SPECIFIER, -C SPECIFIER
\-H SPECIFIER, \-C SPECIFIER
One or more probe specifications that instruct argdist which functions to
probe, which parameters to collect, how to aggregate them, and whether to perform
any filtering. See SPECIFIER SYNTAX below.
.TP
\-I HEADER
One or more header files that should be included in the BPF program. This
enables the use of structure definitions, enumerations, and constants that
are available in these headers. You should provide the same path you would
include in the BPF program, e.g. 'linux/blkdev.h' or 'linux/time.h'.
.SH SPECIFIER SYNTAX
The general specifier syntax is as follows:
.B {p,r}:[library]:function(signature)[:type:expr[:filter]][;label]
.B {p,r}:[library]:function(signature)[:type:expr[:filter]][#label]
.TP
.B {p,r}
Probe type \- "p" for function entry, "r" for function return;
......@@ -132,7 +138,7 @@ Count fork() calls in libc across all processes, grouped by pid:
.TP
Print histograms of sleep() and nanosleep() parameter values:
#
.B argdist.py -H 'p:c:sleep(u32 seconds):u32:seconds' 'p:c:nanosleep(struct timespec { time_t tv_sec; long tv_nsec; } *req):long:req->tv_nsec'
.B argdist.py -I 'linux/time.h' -H 'p:c:sleep(u32 seconds):u32:seconds' 'p:c:nanosleep(struct timespec *req):long:req->tv_nsec'
.TP
Spy on writes to STDOUT performed by process 2780, up to a string size of 120 characters:
#
......
......@@ -7,6 +7,7 @@
# [-n COUNT] [-v] [-T TOP]
# [-C specifier [specifier ...]]
# [-H specifier [specifier ...]]
# [-I header [header ...]]
#
# Licensed under the Apache License, Version 2.0 (the "License")
# Copyright (C) 2016 Sasha Goldshtein.
......@@ -152,28 +153,42 @@ u64 __time = bpf_ktime_get_ns();
self.bpf.attach_kprobe(event=self.function,
fn_name=self.entry_probe_func)
def _bail(self, error):
raise ValueError("error parsing probe '%s': %s" %
(self.raw_spec, error))
def _validate_specifier(self):
# Everything after '#' is the probe label, ignore it
spec = self.raw_spec.split('#')[0]
parts = spec.strip().split(':')
if len(parts) < 3:
self._bail("at least the probe type, library, and " +
"function signature must be specified")
if len(parts) > 6:
self._bail("extraneous ':'-separated parts detected")
if parts[0] not in ["r", "p"]:
self._bail("probe type must be either 'p' or 'r', " +
"but got '%s'" % parts[0])
if re.match(r"\w+\(.*\)", parts[2]) is None:
self._bail(("function signature '%s' has an invalid " +
"format") % parts[2])
def __init__(self, type, specifier, pid):
self.raw_spec = specifier
spec_and_label = specifier.split(';')
self._validate_specifier()
spec_and_label = specifier.split('#')
self.label = spec_and_label[1] \
if len(spec_and_label) == 2 else None
parts = spec_and_label[0].strip().split(':')
if len(parts) < 3 or len(parts) > 6:
raise ValueError("invalid specifier format")
self.type = type # hist or freq
self.is_ret_probe = parts[0] == "r"
if self.type != "hist" and self.type != "freq":
raise ValueError("unrecognized probe type")
if parts[0] not in ["r", "p"]:
raise ValueError("unrecognized probe type")
self.library = parts[1]
self.is_user = len(self.library) > 0
fparts = parts[2].split('(')
if len(fparts) != 2:
raise ValueError("invalid specifier format")
self.function = fparts[0]
self.signature = fparts[1][:-1]
self.function = fparts[0].strip()
self.signature = fparts[1].strip()[:-1]
self._parse_signature()
# If the user didn't specify an expression to probe, we probe
......@@ -335,7 +350,7 @@ bpf_probe_read(&__key.key, sizeof(__key.key), %s);
examples = """
Probe specifier syntax:
{p,r}:[library]:function(signature)[:type:expr[:filter]][;label]
{p,r}:[library]:function(signature)[:type:expr[:filter]][#label]
Where:
p,r -- probe at function entry or at function exit
in exit probes: can use $retval, $entry(param), $latency
......@@ -357,12 +372,16 @@ argdist.py -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
Print a frequency count of how many times process 1005 called malloc
with an allocation size of 16 bytes
argdist.py -C 'r:c:gets():char*:$retval;snooped strings'
argdist.py -C 'r:c:gets():char*:$retval#snooped strings'
Snoop on all strings returned by gets()
argdist.py -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size);ns per byte'
argdist.py -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte'
Print a histogram of nanoseconds per byte from kmalloc allocations
argdist.py -I 'linux/slab.h' \\
-C 'p::__kmalloc(size_t size, gfp_t flags):size_t:size:flags&GFP_ATOMIC'
Print frequency count of kmalloc allocation sizes that have GFP_ATOMIC
argdist.py -p 1005 -C 'p:c:write(int fd):int:fd' -T 5
Print frequency counts of how many times writes were issued to a
particular file descriptor number, in process 1005, but only show
......@@ -382,13 +401,13 @@ argdist.py -H \\
Print a histogram of buffer sizes passed to write() across all
processes, where the file descriptor was 1 (STDOUT)
argdist.py -C 'p:c:fork();fork calls'
argdist.py -C 'p:c:fork()#fork calls'
Count fork() calls in libc across all processes
Can also use funccount.py, which is easier and more flexible
argdist.py -H \\
argdist.py -I 'linux/time.h' -H \\
'p:c:sleep(u32 seconds):u32:seconds' \\
'p:c:nanosleep(struct timespec { time_t tv_sec; long tv_nsec; } *req):long:req->tv_nsec'
'p:c:nanosleep(struct timespec *req):long:req->tv_nsec'
Print histograms of sleep() and nanosleep() parameter values
argdist.py -p 2780 -z 120 \\
......@@ -414,9 +433,13 @@ parser.add_argument("-v", "--verbose", action="store_true",
parser.add_argument("-T", "--top", type=int,
help="number of top results to show (not applicable to histograms)")
parser.add_argument("-H", "--histogram", nargs="*", dest="histspecifier",
metavar="specifier",
help="probe specifier to capture histogram of (see examples below)")
parser.add_argument("-C", "--count", nargs="*", dest="countspecifier",
metavar="specifier",
help="probe specifier to capture count of (see examples below)")
parser.add_argument("-I", "--include", nargs="*", metavar="header",
help="additional header files to include in the BPF program")
args = parser.parse_args()
specifiers = []
......@@ -429,6 +452,8 @@ if len(specifiers) == 0:
exit(1)
bpf_source = "#include <uapi/linux/ptrace.h>\n"
for include in (args.include or []):
bpf_source += "#include <%s>\n" % include
for specifier in specifiers:
bpf_source += specifier.generate_text(args.string_size)
......
......@@ -253,8 +253,8 @@ USAGE message:
# argdist.py -h
usage: argdist.py [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL] [-n COUNT] [-v]
[-T TOP] [-H [HISTSPECIFIER [HISTSPECIFIER ...]]]
[-C [COUNTSPECIFIER [COUNTSPECIFIER ...]]]
[-T TOP] [-H [specifier [specifier ...]]]
[-C [specifier [specifier ...]]] [-I [header [header ...]]]
Trace a function and display a summary of its parameter values.
......@@ -270,15 +270,17 @@ optional arguments:
-v, --verbose print resulting BPF program code before executing
-T TOP, --top TOP number of top results to show (not applicable to
histograms)
-H [HISTSPECIFIER [HISTSPECIFIER ...]], --histogram [HISTSPECIFIER [HISTSPECIFIER ...]]
-H [specifier [specifier ...]], --histogram [specifier [specifier ...]]
probe specifier to capture histogram of (see examples
below)
-C [COUNTSPECIFIER [COUNTSPECIFIER ...]], --count [COUNTSPECIFIER [COUNTSPECIFIER ...]]
-C [specifier [specifier ...]], --count [specifier [specifier ...]]
probe specifier to capture count of (see examples
below)
-I [header [header ...]], --include [header [header ...]]
additional header files to include in the BPF program
Probe specifier syntax:
{p,r}:[library]:function(signature)[:type:expr[:filter]][;label]
{p,r}:[library]:function(signature)[:type:expr[:filter]][#label]
Where:
p,r -- probe at function entry or at function exit
in exit probes: can use $retval, $entry(param), $latency
......@@ -300,12 +302,16 @@ argdist.py -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
Print a frequency count of how many times process 1005 called malloc
with an allocation size of 16 bytes
argdist.py -C 'r:c:gets():char*:$retval;snooped strings'
argdist.py -C 'r:c:gets():char*:$retval#snooped strings'
Snoop on all strings returned by gets()
argdist.py -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size);ns per byte'
argdist.py -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte'
Print a histogram of nanoseconds per byte from kmalloc allocations
argdist.py -I 'linux/slab.h' \
-C 'p::__kmalloc(size_t size, gfp_t flags):size_t:size:flags&GFP_ATOMIC'
Print frequency count of kmalloc allocation sizes that have GFP_ATOMIC
argdist.py -p 1005 -C 'p:c:write(int fd):int:fd' -T 5
Print frequency counts of how many times writes were issued to a
particular file descriptor number, in process 1005, but only show
......@@ -325,17 +331,16 @@ argdist.py -H \
Print a histogram of buffer sizes passed to write() across all
processes, where the file descriptor was 1 (STDOUT)
argdist.py -C 'p:c:fork();fork calls'
argdist.py -C 'p:c:fork()#fork calls'
Count fork() calls in libc across all processes
Can also use funccount.py, which is easier and more flexible
argdist.py -H \
argdist.py -I 'linux/time.h' -H \
'p:c:sleep(u32 seconds):u32:seconds' \
'p:c:nanosleep(struct timespec { time_t tv_sec; long tv_nsec; } *req):long:req->tv_nsec'
'p:c:nanosleep(struct timespec *req):long:req->tv_nsec'
Print histograms of sleep() and nanosleep() parameter values
argdist.py -p 2780 -z 120 \
-C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1'
Spy on writes to STDOUT performed by process 2780, up to a string size
of 120 characters
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment