Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bcc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
bcc
Commits
b0098f2e
Commit
b0098f2e
authored
Jun 08, 2018
by
yonghong-song
Committed by
GitHub
Jun 08, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1797 from dpayne/feat/add_stack_frames_to_funcslower
Feat/add stack frames to funcslower
parents
bd107186
bc18b54a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
172 additions
and
18 deletions
+172
-18
man/man8/funcslower.8
man/man8/funcslower.8
+15
-2
tools/funcslower.py
tools/funcslower.py
+114
-15
tools/funcslower_example.txt
tools/funcslower_example.txt
+43
-1
No files found.
man/man8/funcslower.8
View file @
b0098f2e
.TH funcslower 8 "2017-03-30" "USER COMMANDS"
.SH NAME
f
ile
slower \- Trace slow kernel or user function calls.
f
unc
slower \- Trace slow kernel or user function calls.
.SH SYNOPSIS
.B f
ileslower [\-h] [\-p PID
] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T] [-t] [-v] function [function ...]
.B f
uncslower [\-hf] [\-p PID] [\-U | \-K
] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T] [-t] [-v] function [function ...]
.SH DESCRIPTION
This script traces a kernel or user function's entry and return points, and
prints a message when the function's latency exceeded the specified threshold.
...
...
@@ -36,6 +36,15 @@ Print a HH:MM:SS timestamp with each entry.
\-t
Print a seconds timestamp with each entry, at microsecond resolution.
.TP
\-f
Print output in folded stack format.
.TP
\-U
Show stacks from user space only (no kernel space stacks).
.TP
\-K
Show stacks from kernel space only (no user space stacks).
.TP
\-v
Print the resulting BPF program, for debugging purposes.
.TP
...
...
@@ -61,6 +70,10 @@ Trace both malloc() and free() slower than 10us, in pid 135 only:
Trace the write syscall and print its first 4 arguments:
#
.B funcslower -a 4 SyS_write
.TP
Trace opens from libc and print the user and kernel stack frames:
#
.B funcslower -UK c:open
.SH FIELDS
.TP
TIME
...
...
tools/funcslower.py
View file @
b0098f2e
...
...
@@ -28,12 +28,14 @@ import ctypes as ct
import
time
examples
=
"""examples:
./funcslower vfs_write # trace vfs_write calls slower than 1ms
./funcslower -m 10 vfs_write # same, but slower than 10ms
./funcslower -u 10 c:open # trace open calls slower than 10us
./funcslower -p 135 c:open # trace pid 135 only
./funcslower c:malloc c:free # trace both malloc and free slower than 1ms
./funcslower -a 2 c:open # show first two arguments to open
./funcslower vfs_write # trace vfs_write calls slower than 1ms
./funcslower -m 10 vfs_write # same, but slower than 10ms
./funcslower -u 10 c:open # trace open calls slower than 10us
./funcslower -p 135 c:open # trace pid 135 only
./funcslower c:malloc c:free # trace both malloc and free slower than 1ms
./funcslower -a 2 c:open # show first two arguments to open
./funcslower -UK -m 10 c:open # Show user and kernel stack frame of open calls slower than 10ms
./funcslower -f -UK c:open # Output in folded format for flame graphs
"""
parser
=
argparse
.
ArgumentParser
(
description
=
"Trace slow kernel or user function calls."
,
...
...
@@ -57,6 +59,12 @@ parser.add_argument(metavar="function", nargs="+", dest="functions",
help
=
"function(s) to trace"
)
parser
.
add_argument
(
"--ebpf"
,
action
=
"store_true"
,
help
=
argparse
.
SUPPRESS
)
parser
.
add_argument
(
"-f"
,
"--folded"
,
action
=
"store_true"
,
help
=
"output folded format, one line per stack (for flame graphs)"
)
parser
.
add_argument
(
"-U"
,
"--user-stack"
,
action
=
"store_true"
,
help
=
"output user stack trace"
)
parser
.
add_argument
(
"-K"
,
"--kernel-stack"
,
action
=
"store_true"
,
help
=
"output kernel stack trace"
)
args
=
parser
.
parse_args
()
# fractions are allowed, but rounded to an integer nanosecond
...
...
@@ -86,6 +94,13 @@ struct data_t {
u64 duration_ns;
u64 retval;
char comm[TASK_COMM_LEN];
#ifdef USER_STACKS
int user_stack_id;
#endif
#ifdef KERNEL_STACKS
int kernel_stack_id;
u64 kernel_ip;
#endif
#ifdef GRAB_ARGS
u64 args[6];
#endif
...
...
@@ -94,6 +109,10 @@ struct data_t {
BPF_HASH(entryinfo, u64, struct entry_t);
BPF_PERF_OUTPUT(events);
#if defined(USER_STACKS) || defined(KERNEL_STACKS)
BPF_STACK_TRACE(stacks, 2048);
#endif
static int trace_entry(struct pt_regs *ctx, int id)
{
u64 tgid_pid = bpf_get_current_pid_tgid();
...
...
@@ -142,6 +161,41 @@ int trace_return(struct pt_regs *ctx)
data.start_ns = entryp->start_ns;
data.duration_ns = delta_ns;
data.retval = PT_REGS_RC(ctx);
#ifdef USER_STACKS
data.user_stack_id = stacks.get_stackid(ctx, BPF_F_USER_STACK);
#endif
#ifdef KERNEL_STACKS
data.kernel_stack_id = stacks.get_stackid(ctx, 0);
if (data.kernel_stack_id >= 0) {
u64 ip = PT_REGS_IP(ctx);
u64 page_offset;
// if ip isn't sane, leave key ips as zero for later checking
#if defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE)
// x64, 4.16, ..., 4.11, etc., but some earlier kernel didn't have it
page_offset = __PAGE_OFFSET_BASE;
#elif defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE_L4)
// x64, 4.17, and later
#if defined(CONFIG_DYNAMIC_MEMORY_LAYOUT) && defined(CONFIG_X86_5LEVEL)
page_offset = __PAGE_OFFSET_BASE_L5;
#else
page_offset = __PAGE_OFFSET_BASE_L4;
#endif
#else
// earlier x86_64 kernels, e.g., 4.6, comes here
// arm64, s390, powerpc, x86_32
page_offset = PAGE_OFFSET;
#endif
if (ip > page_offset) {
data.kernel_ip = ip;
}
}
#endif
#ifdef GRAB_ARGS
bpf_probe_read(&data.args[0], sizeof(data.args), entryp->args);
#endif
...
...
@@ -155,6 +209,10 @@ int trace_return(struct pt_regs *ctx)
bpf_text
=
bpf_text
.
replace
(
'DURATION_NS'
,
str
(
duration_ns
))
if
args
.
arguments
:
bpf_text
=
"#define GRAB_ARGS
\
n
"
+
bpf_text
if
args
.
user_stack
:
bpf_text
=
"#define USER_STACKS
\
n
"
+
bpf_text
if
args
.
kernel_stack
:
bpf_text
=
"#define KERNEL_STACKS
\
n
"
+
bpf_text
if
args
.
tgid
:
bpf_text
=
bpf_text
.
replace
(
'TGID_FILTER'
,
'tgid != %d'
%
args
.
tgid
)
else
:
...
...
@@ -193,17 +251,22 @@ class Data(ct.Structure):
(
"duration_ns"
,
ct
.
c_ulonglong
),
(
"retval"
,
ct
.
c_ulonglong
),
(
"comm"
,
ct
.
c_char
*
TASK_COMM_LEN
)
]
+
([(
"args"
,
ct
.
c_ulonglong
*
6
)]
if
args
.
arguments
else
[])
]
+
([(
"args"
,
ct
.
c_ulonglong
*
6
)]
if
args
.
arguments
else
[])
+
\
([(
"user_stack_id"
,
ct
.
c_int
)]
if
args
.
user_stack
else
[])
+
\
([(
"kernel_stack_id"
,
ct
.
c_int
),(
"kernel_ip"
,
ct
.
c_ulonglong
)]
if
args
.
kernel_stack
else
[])
time_designator
=
"us"
if
args
.
min_us
else
"ms"
time_value
=
args
.
min_us
or
args
.
min_ms
or
1
time_multiplier
=
1000
if
args
.
min_us
else
1000000
time_col
=
args
.
time
or
args
.
timestamp
print
(
"Tracing function calls slower than %g %s... Ctrl+C to quit."
%
(
time_value
,
time_designator
))
print
(((
"%-10s "
%
"TIME"
if
time_col
else
""
)
+
"%-14s %-6s %7s %16s %s"
)
%
(
"COMM"
,
"PID"
,
"LAT(%s)"
%
time_designator
,
"RVAL"
,
"FUNC"
+
(
" ARGS"
if
args
.
arguments
else
""
)))
# Do not print header when folded
if
not
args
.
folded
:
print
(
"Tracing function calls slower than %g %s... Ctrl+C to quit."
%
(
time_value
,
time_designator
))
print
(((
"%-10s "
%
"TIME"
if
time_col
else
""
)
+
"%-14s %-6s %7s %16s %s"
)
%
(
"COMM"
,
"PID"
,
"LAT(%s)"
%
time_designator
,
"RVAL"
,
"FUNC"
+
(
" ARGS"
if
args
.
arguments
else
""
)))
earliest_ts
=
0
...
...
@@ -222,12 +285,48 @@ def args_str(event):
return
""
return
str
.
join
(
" "
,
[
"0x%x"
%
arg
for
arg
in
event
.
args
[:
args
.
arguments
]])
def
print_stack
(
event
):
user_stack
=
[]
stack_traces
=
b
.
get_table
(
"stacks"
)
if
args
.
user_stack
and
event
.
user_stack_id
>
0
:
user_stack
=
stack_traces
.
walk
(
event
.
user_stack_id
)
kernel_stack
=
[]
if
args
.
kernel_stack
and
event
.
kernel_stack_id
>
0
:
kernel_tmp
=
stack_traces
.
walk
(
event
.
kernel_stack_id
)
# fix kernel stack
for
addr
in
kernel_tmp
:
kernel_stack
.
append
(
addr
)
do_delimiter
=
user_stack
and
kernel_stack
if
args
.
folded
:
# print folded stack output
user_stack
=
list
(
user_stack
)
kernel_stack
=
list
(
kernel_stack
)
line
=
[
event
.
comm
.
decode
()]
+
\
[
b
.
sym
(
addr
,
event
.
tgid_pid
)
for
addr
in
reversed
(
user_stack
)]
+
\
(
do_delimiter
and
[
"-"
]
or
[])
+
\
[
b
.
ksym
(
addr
)
for
addr
in
reversed
(
kernel_stack
)]
print
(
"%s %d"
%
(
";"
.
join
(
line
),
1
))
else
:
# print default multi-line stack output.
for
addr
in
kernel_stack
:
print
(
" %s"
%
b
.
ksym
(
addr
))
for
addr
in
user_stack
:
print
(
" %s"
%
b
.
sym
(
addr
,
event
.
tgid_pid
))
def
print_event
(
cpu
,
data
,
size
):
event
=
ct
.
cast
(
data
,
ct
.
POINTER
(
Data
)).
contents
ts
=
float
(
event
.
duration_ns
)
/
time_multiplier
print
((
time_str
(
event
)
+
"%-14.14s %-6s %7.2f %16x %s %s"
)
%
(
event
.
comm
.
decode
(),
event
.
tgid_pid
>>
32
,
ts
,
event
.
retval
,
args
.
functions
[
event
.
id
],
args_str
(
event
)))
if
not
args
.
folded
:
print
((
time_str
(
event
)
+
"%-14.14s %-6s %7.2f %16x %s %s"
)
%
(
event
.
comm
.
decode
(),
event
.
tgid_pid
>>
32
,
ts
,
event
.
retval
,
args
.
functions
[
event
.
id
],
args_str
(
event
)))
if
args
.
user_stack
or
args
.
kernel_stack
:
print_stack
(
event
)
b
[
"events"
].
open_perf_buffer
(
print_event
,
page_cnt
=
64
)
while
True
:
...
...
tools/funcslower_example.txt
View file @
b0098f2e
...
...
@@ -75,9 +75,46 @@ This shows the first two arguments to __kmalloc -- the first one is the size
of the requested allocation. The return value is also shown (null return values
would indicate a failure).
# ./funcslower -U -m 30 '/usr/sbin/nginx:database_write'
Tracing function calls slower than 30 ms... Ctrl+C to quit.
COMM PID LAT(ms) RVAL FUNC
nginx 1617 30.15 9 /usr/sbin/nginx:database_write
DataBaseProvider::setData(std::string const&, record_s&)
UserDataProvider::saveRecordData(RecordData const&)
RequestProcessor::writeResponse(int)
RequestProcessor::processRequest()
RequestRouter::processRequest(RequestWrapper*, ResponseWrapper*)
ngx_http_core_content_phase
ngx_http_core_run_phases
ngx_http_process_request
ngx_process_events_and_timers
ngx_spawn_process
ngx_master_process_cycle
main
__libc_start_main
[unknown]
nginx 1629 30.14 9 /usr/sbin/nginx:database_write
DataBaseProvider::setData(std::string const&, record_s&)
UserDataProvider::saveRecordData(RecordData const&)
RequestProcessor::writeResponse(int)
RequestProcessor::processRequest()
RequestRouter::processRequest(RequestWrapper*, ResponseWrapper*)
ngx_http_core_content_phase
ngx_http_core_run_phases
ngx_http_process_request
ngx_process_events_and_timers
ngx_spawn_process
ngx_master_process_cycle
main
__libc_start_main
[unknown]
^C
Shows the user space stack trace of calls to the user space function call open taking longer than 30 ms.
USAGE message:
usage: funcslower.py [-h
] [-p PID
] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T]
usage: funcslower.py [-h
f] [-p PID] [-U | -K
] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T]
[-t] [-v]
function [function ...]
...
...
@@ -93,6 +130,11 @@ optional arguments:
minimum duration to trace (ms)
-u MIN_US, --min-us MIN_US
minimum duration to trace (us)
-U, --user-stack
show stacks from user space
-K, --kernel-stack
show stacks from kernel space
-f print output in folded stack format.
-a ARGUMENTS, --arguments ARGUMENTS
print this many entry arguments, as hex
-T, --time show HH:MM:SS timestamp
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment