Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bcc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
bcc
Commits
4cd70632
Commit
4cd70632
authored
Feb 11, 2016
by
Sasha Goldshtein
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implemented histogram functionality, including strings; added examples
parent
580788ff
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
349 additions
and
42 deletions
+349
-42
tools/gentrace.py
tools/gentrace.py
+130
-42
tools/gentrace_examples.txt
tools/gentrace_examples.txt
+219
-0
No files found.
tools/gentrace.py
View file @
4cd70632
...
...
@@ -3,116 +3,204 @@
# gentrace.py Trace a function and display a histogram or summary of its
# parameter values.
#
# USAGE: TODO
# USAGE: gentrace.py [-h] [-p PID] [-z STRING_SIZE]
# [-s SPECIFIER [SPECIFIER ...]]
# [interval] [count]
#
# Copyright (C) 2016 Sasha Goldshtein.
from
bcc
import
BPF
from
time
import
sleep
from
time
import
sleep
,
strftime
import
argparse
class
Specifier
(
object
):
text
=
"""
DATA_DECL
int PROBENAME(struct pt_regs *ctx
,
SIGNATURE)
int PROBENAME(struct pt_regs *ctx SIGNATURE)
{
EXPR_TYPE __key = EXPR;
KEY_EXPR
if (!(FILTER)) return 0;
COLLECT
return 0;
}
"""
def
__init__
(
self
,
specifier
,
library
):
# <raw|hist>:lib:function(signature)[:type:expr[:filter]]
def
__init__
(
self
,
specifier
,
pid
):
self
.
raw_spec
=
specifier
parts
=
specifier
.
strip
().
split
(
':'
)
if
len
(
parts
)
<
4
or
len
(
parts
)
>
5
:
if
len
(
parts
)
<
3
or
len
(
parts
)
>
6
:
raise
ValueError
(
"invalid specifier format"
)
self
.
type
=
parts
[
0
]
# hist or raw
if
self
.
type
!=
"hist"
and
self
.
type
!=
"raw"
:
raise
ValueError
(
"unrecognized probe type"
)
fparts
=
parts
[
1
].
split
(
'('
)
self
.
library
=
parts
[
1
]
fparts
=
parts
[
2
].
split
(
'('
)
if
len
(
fparts
)
!=
2
:
raise
ValueError
(
"invalid specifier format"
)
self
.
function
=
fparts
[
0
]
self
.
signature
=
fparts
[
1
][:
-
1
]
self
.
expr_type
=
parts
[
2
]
self
.
expr
=
parts
[
3
]
self
.
filter
=
None
if
len
(
parts
)
!=
5
else
parts
[
4
]
self
.
library
=
library
if
len
(
parts
)
>=
5
:
self
.
expr_type
=
parts
[
3
]
self
.
expr
=
parts
[
4
]
else
:
self
.
expr_type
=
"u64"
self
.
expr
=
"1"
self
.
filter
=
None
if
len
(
parts
)
!=
6
else
parts
[
5
]
self
.
pid
=
pid
self
.
probe_func_name
=
self
.
function
+
"_probe"
self
.
probe_hash_name
=
self
.
function
+
"_hash"
def
generate_text
(
self
):
# TODO Need special treatment of strings (char *) expressions
def
_is_string_probe
(
self
):
return
self
.
expr_type
==
"char*"
or
self
.
expr_type
==
"char *"
def
generate_text
(
self
,
string_size
):
program
=
self
.
text
.
replace
(
"PROBENAME"
,
self
.
probe_func_name
)
program
=
program
.
replace
(
"SIGNATURE"
,
self
.
signature
)
if
self
.
type
==
"raw"
:
signature
=
""
if
len
(
self
.
signature
)
==
0
\
else
","
+
self
.
signature
program
=
program
.
replace
(
"SIGNATURE"
,
signature
)
if
self
.
_is_string_probe
():
decl
=
"""
struct %s_key_t { char key[%d]; };
BPF_HASH(%s, struct %s_key_t, u64);
"""
\
%
(
self
.
function
,
string_size
,
self
.
probe_hash_name
,
self
.
function
)
collect
=
"%s.increment(__key);"
%
self
.
probe_hash_name
key_expr
=
"""
struct %s_key_t __key = {0};
bpf_probe_read(&__key.key, sizeof(__key.key), %s);
"""
\
%
(
self
.
function
,
self
.
expr
)
elif
self
.
type
==
"raw"
:
decl
=
"BPF_HASH(%s, %s, u64);"
%
\
(
self
.
probe_hash_name
,
self
.
expr_type
)
collect
=
"u64 zero = 0, *val; val = %s.lookup_or_init(&__key, &zero); (*val)++;"
%
self
.
probe_hash_name
collect
=
"%s.increment(__key);"
%
self
.
probe_hash_name
key_expr
=
"%s __key = %s;"
%
\
(
self
.
expr_type
,
self
.
expr
)
elif
self
.
type
==
"hist"
:
pass
# TODO
decl
=
"BPF_HISTOGRAM(%s, %s);"
%
\
(
self
.
probe_hash_name
,
self
.
expr_type
)
collect
=
"%s.increment(bpf_log2l(__key));"
%
\
self
.
probe_hash_name
key_expr
=
"%s __key = %s;"
%
\
(
self
.
expr_type
,
self
.
expr
)
program
=
program
.
replace
(
"DATA_DECL"
,
decl
)
program
=
program
.
replace
(
"
EXPR_TYPE"
,
self
.
expr_type
)
program
=
program
.
replace
(
"
EXPR"
,
self
.
expr
)
program
=
program
.
replace
(
"
KEY_EXPR"
,
key_expr
)
program
=
program
.
replace
(
"
FILTER"
,
self
.
filter
or
"1"
)
program
=
program
.
replace
(
"COLLECT"
,
collect
)
return
program
def
attach
(
self
,
bpf
):
self
.
bpf
=
bpf
if
self
.
library
is
not
None
:
if
len
(
self
.
library
)
>
0
:
bpf
.
attach_uprobe
(
name
=
self
.
library
,
sym
=
self
.
function
,
fn_name
=
self
.
probe_func_name
)
fn_name
=
self
.
probe_func_name
,
pid
=
self
.
pid
or
-
1
)
else
:
bpf
.
attach_kprobe
(
event
=
self
.
function
,
fn_name
=
self
.
probe_func_name
)
def
display
(
self
):
print
(
self
.
raw_spec
)
data
=
self
.
bpf
.
get_table
(
self
.
probe_hash_name
)
if
self
.
type
==
"raw"
:
for
key
,
value
in
sorted
(
data
.
items
(),
key
=
lambda
kv
:
kv
[
1
].
value
):
print
(
"%20s = %-20s count = %s"
%
(
self
.
expr
,
str
(
key
.
value
),
str
(
value
.
value
)))
print
(
"
\
t
%-10s %s"
%
(
"COUNT"
,
"EVENT"
))
for
key
,
value
in
sorted
(
data
.
items
(),
key
=
lambda
kv
:
kv
[
1
].
value
):
if
self
.
_is_string_probe
():
key_str
=
key
.
key
else
:
key_str
=
str
(
key
.
value
)
print
(
"
\
t
%-10s %s = %s"
%
(
str
(
value
.
value
),
self
.
expr
,
key_str
))
elif
self
.
type
==
"hist"
:
pass
# TODO
data
.
print_log2_hist
(
val_type
=
self
.
expr
)
examples
=
"""
Probe specifier syntax:
<raw|hist>:[library]:function(signature)[:type:expr[:filter]]
Where:
<raw|hist> -- collect raw data or a histogram of values
library -- the library that contains the function
(leave empty for kernel functions)
function -- the function name to trace
signature -- the function's parameters, as in the C header
type -- the type of the expression to collect
expr -- the expression to collect
filter -- a filter that is applied to collected values
EXAMPLES:
TODO
"""
# probe specifier syntax:
# <hist|raw>:<function name>(<signature>):<type>:<expr>[:filter]
# hist:malloc(size_t size):size_t:size:size>16
# hist:fwrite(FILE* f, size_t count):size_t:count
# raw:printf(char * format):char:format[0]
# hist:fwrite(FILE* file):int:file->fd:file->fd==2
gentrace.py -s "hist::__kmalloc(u64 size):u64:size"
Print a histogram of allocation sizes passed to kmalloc
gentrace.py -p 1005 -s "raw:c:malloc(size_t size):size_t:size:size==16"
Print a raw count of how many times process 1005 called malloc with
an allocation size of 16 bytes
gentrace.py -p 1005 -s "raw:c:write(int fd):int:fd"
Print raw counts of how many times writes were issued to a particular
file descriptor number, in process 1005
gentrace.py -s "hist:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1"
Print a histogram of buffer sizes passed to write() across all
processes, where the file descriptor was 1 (STDOUT)
gentrace.py -s "raw:c:fork"
Count fork() calls in libc across all processes
Can also use funccount.py, which is easier and more flexible
gentrace.py -s
\
\
"hist:c:sleep(u32 seconds):u32:seconds"
\
\
"hist:c:nanosleep(struct timespec { time_t tv_sec; long tv_nsec; } *req):long:req->tv_nsec"
Print histograms of sleep() and nanosleep() parameter values
gentrace.py -p 2780 -s -z 120 "raw:c:write(int fd, char* buf, size_t len):char*:buf:fd==1"
Spy on writes to STDOUT performed by process 2780, up to a string size
of 120 characters
"""
parser
=
argparse
.
ArgumentParser
(
description
=
"Trace a function and display a summary of its parameter values."
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
epilog
=
examples
)
parser
.
add_argument
(
"-
l"
,
"--library"
,
help
=
"
the library which contains the function to trace
"
)
parser
.
add_argument
(
"-
s"
,
"--specifier"
,
nargs
=
"+"
,
dest
=
"specifiers"
,
help
=
"
the probe specifier
"
)
parser
.
add_argument
(
"interval"
,
nargs
=
"?"
,
default
=
1
,
parser
.
add_argument
(
"-
p"
,
"--pid"
,
type
=
int
,
help
=
"
id of the process to trace (optional)
"
)
parser
.
add_argument
(
"-
z"
,
"--string-size"
,
default
=
80
,
type
=
int
,
help
=
"
maximum string size to read from char* arguments
"
)
parser
.
add_argument
(
"interval"
,
nargs
=
"?"
,
default
=
1
,
type
=
int
,
help
=
"output interval, in seconds"
)
parser
.
add_argument
(
"count"
,
nargs
=
"?"
,
parser
.
add_argument
(
"count"
,
nargs
=
"?"
,
type
=
int
,
help
=
"number of outputs"
)
parser
.
add_argument
(
"-s"
,
"--specifier"
,
nargs
=
"+"
,
dest
=
"specifiers"
,
help
=
"the probe specifiers (see examples below)"
)
args
=
parser
.
parse_args
()
specifiers
=
[]
for
specifier
in
args
.
specifiers
:
specifiers
.
append
(
Specifier
(
specifier
,
args
.
library
))
specifiers
.
append
(
Specifier
(
specifier
,
args
.
pid
))
bpf_source
=
"#include <uapi/linux/ptrace.h>
\
n
"
for
specifier
in
specifiers
:
bpf_source
+=
specifier
.
generate_text
()
bpf_source
+=
specifier
.
generate_text
(
args
.
string_size
)
bpf
=
BPF
(
text
=
bpf_source
)
for
specifier
in
specifiers
:
specifier
.
attach
(
bpf
)
count_so_far
=
0
while
True
:
print
(
"COLLECTED DATA:"
)
try
:
sleep
(
args
.
interval
)
except
KeyboardInterrupt
:
exit
()
print
(
"[%s]"
%
strftime
(
"%H:%M:%S"
))
for
specifier
in
specifiers
:
specifier
.
display
()
sleep
(
args
.
interval
)
count_so_far
+=
1
if
args
.
count
is
not
None
and
count_so_far
>=
args
.
count
:
exit
()
tools/gentrace_examples.txt
0 → 100644
View file @
4cd70632
Demonstrations of gentrace.
gentrace probes functions you specify and collects parameter values into a
histogram or a counting table. This can be used to understand the distribution
of values a certain parameter takes, filter and print interesting parameters
without attaching a debugger, and getting general execution statistics on
various functions.
For example, suppose you want to find what allocation sizes are common in
your application:
# ./gentrace.py -p 2420 -s "raw:c:malloc(size_t size):size_t:size"
[01:42:29]
raw:c:malloc(size_t size):size_t:size
COUNT EVENT
[01:42:30]
raw:c:malloc(size_t size):size_t:size
COUNT EVENT
[01:42:31]
raw:c:malloc(size_t size):size_t:size
COUNT EVENT
1 size = 16
[01:42:32]
raw:c:malloc(size_t size):size_t:size
COUNT EVENT
2 size = 16
[01:42:33]
raw:c:malloc(size_t size):size_t:size
COUNT EVENT
3 size = 16
[01:42:34]
raw:c:malloc(size_t size):size_t:size
COUNT EVENT
4 size = 16
^C
It seems that the application is allocating blocks of size 16. The COUNT
column contains the number of occurrences of a particular event, and the
EVENT column describes the event. In this case, the "size" parameter was
probed and its value was 16, repeatedly.
Now, suppose you wanted a histogram of buffer sizes passed to the write()
function across the system:
# ./gentrace.py -s "hist:c:write(int fd, void *buf, size_t len):size_t:len"
[01:45:22]
hist:c:write(int fd, void *buf, size_t len):size_t:len
len : count distribution
0 -> 1 : 0 | |
2 -> 3 : 2 |************* |
4 -> 7 : 0 | |
8 -> 15 : 2 |************* |
16 -> 31 : 0 | |
32 -> 63 : 6 |****************************************|
[01:45:23]
hist:c:write(int fd, void *buf, size_t len):size_t:len
len : count distribution
0 -> 1 : 0 | |
2 -> 3 : 11 |*************** |
4 -> 7 : 0 | |
8 -> 15 : 4 |***** |
16 -> 31 : 0 | |
32 -> 63 : 28 |****************************************|
64 -> 127 : 12 |***************** |
[01:45:24]
hist:c:write(int fd, void *buf, size_t len):size_t:len
len : count distribution
0 -> 1 : 0 | |
2 -> 3 : 21 |**************** |
4 -> 7 : 0 | |
8 -> 15 : 6 |**** |
16 -> 31 : 0 | |
32 -> 63 : 52 |****************************************|
64 -> 127 : 26 |******************** |
^C
It seems that most writes fall into three buckets: very small writes of 2-3
bytes, medium writes of 32-63 bytes, and larger writes of 64-127 bytes.
But these are writes across the board -- what if you wanted to focus on writes
to STDOUT?
# ./gentrace.py -s "hist:c:write(int fd, void *buf, size_t len):size_t:len:fd==1"
[01:47:17]
hist:c:write(int fd, void *buf, size_t len):size_t:len:fd==1
len : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 0 | |
8 -> 15 : 1 |****************************************|
16 -> 31 : 0 | |
32 -> 63 : 1 |****************************************|
[01:47:18]
hist:c:write(int fd, void *buf, size_t len):size_t:len:fd==1
len : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 0 | |
8 -> 15 : 2 |************* |
16 -> 31 : 0 | |
32 -> 63 : 3 |******************** |
64 -> 127 : 6 |****************************************|
[01:47:19]
hist:c:write(int fd, void *buf, size_t len):size_t:len:fd==1
len : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 0 | |
8 -> 15 : 3 |********* |
16 -> 31 : 0 | |
32 -> 63 : 5 |*************** |
64 -> 127 : 13 |****************************************|
^C
The "fd==1" part is a filter that is applied to every invocation of write().
Only if the filter condition is true, the value is recorded.
You can also use gentrace to trace kernel functions. For example, suppose you
wanted a histogram of kernel allocation (kmalloc) sizes across the system,
printed twice with 3 second intervals:
# ./gentrace.py 3 2 -s "hist::__kmalloc(size_t size):size_t:size"
[01:50:00]
hist::__kmalloc(size_t size):size_t:size
size : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 0 | |
8 -> 15 : 6 |****************************************|
[01:50:03]
hist::__kmalloc(size_t size):size_t:size
size : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 0 | |
8 -> 15 : 22 |****************************************|
16 -> 31 : 0 | |
32 -> 63 : 0 | |
64 -> 127 : 5 |********* |
128 -> 255 : 2 |*** |
Occasionally, numeric information isn't enough and you want to capture strings.
What are the strings printed by puts() across the system?
# ./gentrace.py 10 1 -s "raw:c:puts(char *str):char*:str"
[01:53:54]
raw:c:puts(char *str):char*:str
COUNT EVENT
2 str = Press ENTER to start.
It looks like the message "Press ENTER to start." was printed twice during the
10 seconds we were tracing.
USAGE message:
# ./gentrace.py -h
usage: gentrace.py [-h] [-p PID] [-z STRING_SIZE]
[-s SPECIFIERS [SPECIFIERS ...]]
[interval] [count]
Trace a function and display a summary of its parameter values.
positional arguments:
interval output interval, in seconds
count number of outputs
optional arguments:
-h, --help show this help message and exit
-p PID, --pid PID id of the process to trace (optional)
-z STRING_SIZE, --string-size STRING_SIZE
maximum string size to read from char* arguments
-s SPECIFIERS [SPECIFIERS ...], --specifier SPECIFIERS [SPECIFIERS ...]
the probe specifiers (see examples below)
Probe specifier syntax:
<raw|hist>:[library]:function(signature)[:type:expr[:filter]]
Where:
<raw|hist> -- collect raw data or a histogram of values
library -- the library that contains the function
(leave empty for kernel functions)
function -- the function name to trace
signature -- the function's parameters, as in the C header
type -- the type of the expression to collect
expr -- the expression to collect
filter -- a filter that is applied to collected values
EXAMPLES:
gentrace.py -s "hist::__kmalloc(u64 size):u64:size"
Print a histogram of allocation sizes passed to kmalloc
gentrace.py -p 1005 -s "raw:c:malloc(size_t size):size_t:size:size==16"
Print a raw count of how many times process 1005 called malloc with
an allocation size of 16 bytes
gentrace.py -p 1005 -s "raw:c:write(int fd):int:fd"
Print raw counts of how many times writes were issued to a particular
file descriptor number, in process 1005
gentrace.py -s "hist:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1"
Print a histogram of buffer sizes passed to write() across all
processes, where the file descriptor was 1 (STDOUT)
gentrace.py -s "raw:c:fork"
Count fork() calls in libc across all processes
Can also use funccount.py, which is easier and more flexible
gentrace.py -s \
"hist:c:sleep(u32 seconds):u32:seconds" \
"hist:c:nanosleep(struct timespec { time_t tv_sec; long tv_nsec; } *req):long:req->tv_nsec"
Print histograms of sleep() and nanosleep() parameter values
gentrace.py -p 2780 -s -z 120 "raw:c:write(int fd, char* buf, size_t len):char*:buf:fd==1"
Spy on writes to STDOUT performed by process 2780, up to a string size
of 120 characters
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment