Commit b0098f2e authored by yonghong-song's avatar yonghong-song Committed by GitHub

Merge pull request #1797 from dpayne/feat/add_stack_frames_to_funcslower

Feat/add stack frames to funcslower
parents bd107186 bc18b54a
.TH funcslower 8 "2017-03-30" "USER COMMANDS" .TH funcslower 8 "2017-03-30" "USER COMMANDS"
.SH NAME .SH NAME
fileslower \- Trace slow kernel or user function calls. funcslower \- Trace slow kernel or user function calls.
.SH SYNOPSIS .SH SYNOPSIS
.B fileslower [\-h] [\-p PID] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T] [-t] [-v] function [function ...] .B funcslower [\-hf] [\-p PID] [\-U | \-K] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T] [-t] [-v] function [function ...]
.SH DESCRIPTION .SH DESCRIPTION
This script traces a kernel or user function's entry and return points, and This script traces a kernel or user function's entry and return points, and
prints a message when the function's latency exceeded the specified threshold. prints a message when the function's latency exceeded the specified threshold.
...@@ -36,6 +36,15 @@ Print a HH:MM:SS timestamp with each entry. ...@@ -36,6 +36,15 @@ Print a HH:MM:SS timestamp with each entry.
\-t \-t
Print a seconds timestamp with each entry, at microsecond resolution. Print a seconds timestamp with each entry, at microsecond resolution.
.TP .TP
\-f
Print output in folded stack format.
.TP
\-U
Show stacks from user space only (no kernel space stacks).
.TP
\-K
Show stacks from kernel space only (no user space stacks).
.TP
\-v \-v
Print the resulting BPF program, for debugging purposes. Print the resulting BPF program, for debugging purposes.
.TP .TP
...@@ -61,6 +70,10 @@ Trace both malloc() and free() slower than 10us, in pid 135 only: ...@@ -61,6 +70,10 @@ Trace both malloc() and free() slower than 10us, in pid 135 only:
Trace the write syscall and print its first 4 arguments: Trace the write syscall and print its first 4 arguments:
# #
.B funcslower -a 4 SyS_write .B funcslower -a 4 SyS_write
.TP
Trace opens from libc and print the user and kernel stack frames:
#
.B funcslower -UK c:open
.SH FIELDS .SH FIELDS
.TP .TP
TIME TIME
......
...@@ -28,12 +28,14 @@ import ctypes as ct ...@@ -28,12 +28,14 @@ import ctypes as ct
import time import time
examples = """examples: examples = """examples:
./funcslower vfs_write # trace vfs_write calls slower than 1ms ./funcslower vfs_write # trace vfs_write calls slower than 1ms
./funcslower -m 10 vfs_write # same, but slower than 10ms ./funcslower -m 10 vfs_write # same, but slower than 10ms
./funcslower -u 10 c:open # trace open calls slower than 10us ./funcslower -u 10 c:open # trace open calls slower than 10us
./funcslower -p 135 c:open # trace pid 135 only ./funcslower -p 135 c:open # trace pid 135 only
./funcslower c:malloc c:free # trace both malloc and free slower than 1ms ./funcslower c:malloc c:free # trace both malloc and free slower than 1ms
./funcslower -a 2 c:open # show first two arguments to open ./funcslower -a 2 c:open # show first two arguments to open
./funcslower -UK -m 10 c:open # Show user and kernel stack frame of open calls slower than 10ms
./funcslower -f -UK c:open # Output in folded format for flame graphs
""" """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Trace slow kernel or user function calls.", description="Trace slow kernel or user function calls.",
...@@ -57,6 +59,12 @@ parser.add_argument(metavar="function", nargs="+", dest="functions", ...@@ -57,6 +59,12 @@ parser.add_argument(metavar="function", nargs="+", dest="functions",
help="function(s) to trace") help="function(s) to trace")
parser.add_argument("--ebpf", action="store_true", parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS) help=argparse.SUPPRESS)
parser.add_argument("-f", "--folded", action="store_true",
help="output folded format, one line per stack (for flame graphs)")
parser.add_argument("-U", "--user-stack",
action="store_true", help="output user stack trace")
parser.add_argument("-K", "--kernel-stack",
action="store_true", help="output kernel stack trace")
args = parser.parse_args() args = parser.parse_args()
# fractions are allowed, but rounded to an integer nanosecond # fractions are allowed, but rounded to an integer nanosecond
...@@ -86,6 +94,13 @@ struct data_t { ...@@ -86,6 +94,13 @@ struct data_t {
u64 duration_ns; u64 duration_ns;
u64 retval; u64 retval;
char comm[TASK_COMM_LEN]; char comm[TASK_COMM_LEN];
#ifdef USER_STACKS
int user_stack_id;
#endif
#ifdef KERNEL_STACKS
int kernel_stack_id;
u64 kernel_ip;
#endif
#ifdef GRAB_ARGS #ifdef GRAB_ARGS
u64 args[6]; u64 args[6];
#endif #endif
...@@ -94,6 +109,10 @@ struct data_t { ...@@ -94,6 +109,10 @@ struct data_t {
BPF_HASH(entryinfo, u64, struct entry_t); BPF_HASH(entryinfo, u64, struct entry_t);
BPF_PERF_OUTPUT(events); BPF_PERF_OUTPUT(events);
#if defined(USER_STACKS) || defined(KERNEL_STACKS)
BPF_STACK_TRACE(stacks, 2048);
#endif
static int trace_entry(struct pt_regs *ctx, int id) static int trace_entry(struct pt_regs *ctx, int id)
{ {
u64 tgid_pid = bpf_get_current_pid_tgid(); u64 tgid_pid = bpf_get_current_pid_tgid();
...@@ -142,6 +161,41 @@ int trace_return(struct pt_regs *ctx) ...@@ -142,6 +161,41 @@ int trace_return(struct pt_regs *ctx)
data.start_ns = entryp->start_ns; data.start_ns = entryp->start_ns;
data.duration_ns = delta_ns; data.duration_ns = delta_ns;
data.retval = PT_REGS_RC(ctx); data.retval = PT_REGS_RC(ctx);
#ifdef USER_STACKS
data.user_stack_id = stacks.get_stackid(ctx, BPF_F_USER_STACK);
#endif
#ifdef KERNEL_STACKS
data.kernel_stack_id = stacks.get_stackid(ctx, 0);
if (data.kernel_stack_id >= 0) {
u64 ip = PT_REGS_IP(ctx);
u64 page_offset;
// if ip isn't sane, leave key ips as zero for later checking
#if defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE)
// x64, 4.16, ..., 4.11, etc., but some earlier kernel didn't have it
page_offset = __PAGE_OFFSET_BASE;
#elif defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE_L4)
// x64, 4.17, and later
#if defined(CONFIG_DYNAMIC_MEMORY_LAYOUT) && defined(CONFIG_X86_5LEVEL)
page_offset = __PAGE_OFFSET_BASE_L5;
#else
page_offset = __PAGE_OFFSET_BASE_L4;
#endif
#else
// earlier x86_64 kernels, e.g., 4.6, comes here
// arm64, s390, powerpc, x86_32
page_offset = PAGE_OFFSET;
#endif
if (ip > page_offset) {
data.kernel_ip = ip;
}
}
#endif
#ifdef GRAB_ARGS #ifdef GRAB_ARGS
bpf_probe_read(&data.args[0], sizeof(data.args), entryp->args); bpf_probe_read(&data.args[0], sizeof(data.args), entryp->args);
#endif #endif
...@@ -155,6 +209,10 @@ int trace_return(struct pt_regs *ctx) ...@@ -155,6 +209,10 @@ int trace_return(struct pt_regs *ctx)
bpf_text = bpf_text.replace('DURATION_NS', str(duration_ns)) bpf_text = bpf_text.replace('DURATION_NS', str(duration_ns))
if args.arguments: if args.arguments:
bpf_text = "#define GRAB_ARGS\n" + bpf_text bpf_text = "#define GRAB_ARGS\n" + bpf_text
if args.user_stack:
bpf_text = "#define USER_STACKS\n" + bpf_text
if args.kernel_stack:
bpf_text = "#define KERNEL_STACKS\n" + bpf_text
if args.tgid: if args.tgid:
bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid) bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid)
else: else:
...@@ -193,17 +251,22 @@ class Data(ct.Structure): ...@@ -193,17 +251,22 @@ class Data(ct.Structure):
("duration_ns", ct.c_ulonglong), ("duration_ns", ct.c_ulonglong),
("retval", ct.c_ulonglong), ("retval", ct.c_ulonglong),
("comm", ct.c_char * TASK_COMM_LEN) ("comm", ct.c_char * TASK_COMM_LEN)
] + ([("args", ct.c_ulonglong * 6)] if args.arguments else []) ] + ([("args", ct.c_ulonglong * 6)] if args.arguments else []) + \
([("user_stack_id", ct.c_int)] if args.user_stack else []) + \
([("kernel_stack_id", ct.c_int),("kernel_ip", ct.c_ulonglong)] if args.kernel_stack else [])
time_designator = "us" if args.min_us else "ms" time_designator = "us" if args.min_us else "ms"
time_value = args.min_us or args.min_ms or 1 time_value = args.min_us or args.min_ms or 1
time_multiplier = 1000 if args.min_us else 1000000 time_multiplier = 1000 if args.min_us else 1000000
time_col = args.time or args.timestamp time_col = args.time or args.timestamp
print("Tracing function calls slower than %g %s... Ctrl+C to quit." %
(time_value, time_designator)) # Do not print header when folded
print((("%-10s " % "TIME" if time_col else "") + "%-14s %-6s %7s %16s %s") % if not args.folded:
("COMM", "PID", "LAT(%s)" % time_designator, "RVAL", print("Tracing function calls slower than %g %s... Ctrl+C to quit." %
"FUNC" + (" ARGS" if args.arguments else ""))) (time_value, time_designator))
print((("%-10s " % "TIME" if time_col else "") + "%-14s %-6s %7s %16s %s") %
("COMM", "PID", "LAT(%s)" % time_designator, "RVAL",
"FUNC" + (" ARGS" if args.arguments else "")))
earliest_ts = 0 earliest_ts = 0
...@@ -222,12 +285,48 @@ def args_str(event): ...@@ -222,12 +285,48 @@ def args_str(event):
return "" return ""
return str.join(" ", ["0x%x" % arg for arg in event.args[:args.arguments]]) return str.join(" ", ["0x%x" % arg for arg in event.args[:args.arguments]])
def print_stack(event):
user_stack = []
stack_traces = b.get_table("stacks")
if args.user_stack and event.user_stack_id > 0:
user_stack = stack_traces.walk(event.user_stack_id)
kernel_stack = []
if args.kernel_stack and event.kernel_stack_id > 0:
kernel_tmp = stack_traces.walk(event.kernel_stack_id)
# fix kernel stack
for addr in kernel_tmp:
kernel_stack.append(addr)
do_delimiter = user_stack and kernel_stack
if args.folded:
# print folded stack output
user_stack = list(user_stack)
kernel_stack = list(kernel_stack)
line = [event.comm.decode()] + \
[b.sym(addr, event.tgid_pid) for addr in reversed(user_stack)] + \
(do_delimiter and ["-"] or []) + \
[b.ksym(addr) for addr in reversed(kernel_stack)]
print("%s %d" % (";".join(line), 1))
else:
# print default multi-line stack output.
for addr in kernel_stack:
print(" %s" % b.ksym(addr))
for addr in user_stack:
print(" %s" % b.sym(addr, event.tgid_pid))
def print_event(cpu, data, size): def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents event = ct.cast(data, ct.POINTER(Data)).contents
ts = float(event.duration_ns) / time_multiplier ts = float(event.duration_ns) / time_multiplier
print((time_str(event) + "%-14.14s %-6s %7.2f %16x %s %s") % if not args.folded:
(event.comm.decode(), event.tgid_pid >> 32, print((time_str(event) + "%-14.14s %-6s %7.2f %16x %s %s") %
ts, event.retval, args.functions[event.id], args_str(event))) (event.comm.decode(), event.tgid_pid >> 32,
ts, event.retval, args.functions[event.id], args_str(event)))
if args.user_stack or args.kernel_stack:
print_stack(event)
b["events"].open_perf_buffer(print_event, page_cnt=64) b["events"].open_perf_buffer(print_event, page_cnt=64)
while True: while True:
......
...@@ -75,9 +75,46 @@ This shows the first two arguments to __kmalloc -- the first one is the size ...@@ -75,9 +75,46 @@ This shows the first two arguments to __kmalloc -- the first one is the size
of the requested allocation. The return value is also shown (null return values of the requested allocation. The return value is also shown (null return values
would indicate a failure). would indicate a failure).
# ./funcslower -U -m 30 '/usr/sbin/nginx:database_write'
Tracing function calls slower than 30 ms... Ctrl+C to quit.
COMM PID LAT(ms) RVAL FUNC
nginx 1617 30.15 9 /usr/sbin/nginx:database_write
DataBaseProvider::setData(std::string const&, record_s&)
UserDataProvider::saveRecordData(RecordData const&)
RequestProcessor::writeResponse(int)
RequestProcessor::processRequest()
RequestRouter::processRequest(RequestWrapper*, ResponseWrapper*)
ngx_http_core_content_phase
ngx_http_core_run_phases
ngx_http_process_request
ngx_process_events_and_timers
ngx_spawn_process
ngx_master_process_cycle
main
__libc_start_main
[unknown]
nginx 1629 30.14 9 /usr/sbin/nginx:database_write
DataBaseProvider::setData(std::string const&, record_s&)
UserDataProvider::saveRecordData(RecordData const&)
RequestProcessor::writeResponse(int)
RequestProcessor::processRequest()
RequestRouter::processRequest(RequestWrapper*, ResponseWrapper*)
ngx_http_core_content_phase
ngx_http_core_run_phases
ngx_http_process_request
ngx_process_events_and_timers
ngx_spawn_process
ngx_master_process_cycle
main
__libc_start_main
[unknown]
^C
Shows the user space stack trace of calls to the user space function call open taking longer than 30 ms.
USAGE message: USAGE message:
usage: funcslower.py [-h] [-p PID] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T] usage: funcslower.py [-hf] [-p PID] [-U | -K] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T]
[-t] [-v] [-t] [-v]
function [function ...] function [function ...]
...@@ -93,6 +130,11 @@ optional arguments: ...@@ -93,6 +130,11 @@ optional arguments:
minimum duration to trace (ms) minimum duration to trace (ms)
-u MIN_US, --min-us MIN_US -u MIN_US, --min-us MIN_US
minimum duration to trace (us) minimum duration to trace (us)
-U, --user-stack
show stacks from user space
-K, --kernel-stack
show stacks from kernel space
-f print output in folded stack format.
-a ARGUMENTS, --arguments ARGUMENTS -a ARGUMENTS, --arguments ARGUMENTS
print this many entry arguments, as hex print this many entry arguments, as hex
-T, --time show HH:MM:SS timestamp -T, --time show HH:MM:SS timestamp
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment