Commit b0098f2e authored by yonghong-song's avatar yonghong-song Committed by GitHub

Merge pull request #1797 from dpayne/feat/add_stack_frames_to_funcslower

Feat/add stack frames to funcslower
parents bd107186 bc18b54a
.TH funcslower 8 "2017-03-30" "USER COMMANDS"
.SH NAME
fileslower \- Trace slow kernel or user function calls.
funcslower \- Trace slow kernel or user function calls.
.SH SYNOPSIS
.B fileslower [\-h] [\-p PID] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T] [-t] [-v] function [function ...]
.B funcslower [\-hf] [\-p PID] [\-U | \-K] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T] [-t] [-v] function [function ...]
.SH DESCRIPTION
This script traces a kernel or user function's entry and return points, and
prints a message when the function's latency exceeded the specified threshold.
......@@ -36,6 +36,15 @@ Print a HH:MM:SS timestamp with each entry.
\-t
Print a seconds timestamp with each entry, at microsecond resolution.
.TP
\-f
Print output in folded stack format.
.TP
\-U
Show stacks from user space only (no kernel space stacks).
.TP
\-K
Show stacks from kernel space only (no user space stacks).
.TP
\-v
Print the resulting BPF program, for debugging purposes.
.TP
......@@ -61,6 +70,10 @@ Trace both malloc() and free() slower than 10us, in pid 135 only:
Trace the write syscall and print its first 4 arguments:
#
.B funcslower -a 4 SyS_write
.TP
Trace opens from libc and print the user and kernel stack frames:
#
.B funcslower -UK c:open
.SH FIELDS
.TP
TIME
......
......@@ -28,12 +28,14 @@ import ctypes as ct
import time
examples = """examples:
./funcslower vfs_write # trace vfs_write calls slower than 1ms
./funcslower -m 10 vfs_write # same, but slower than 10ms
./funcslower -u 10 c:open # trace open calls slower than 10us
./funcslower -p 135 c:open # trace pid 135 only
./funcslower c:malloc c:free # trace both malloc and free slower than 1ms
./funcslower -a 2 c:open # show first two arguments to open
./funcslower vfs_write # trace vfs_write calls slower than 1ms
./funcslower -m 10 vfs_write # same, but slower than 10ms
./funcslower -u 10 c:open # trace open calls slower than 10us
./funcslower -p 135 c:open # trace pid 135 only
./funcslower c:malloc c:free # trace both malloc and free slower than 1ms
./funcslower -a 2 c:open # show first two arguments to open
./funcslower -UK -m 10 c:open # Show user and kernel stack frame of open calls slower than 10ms
./funcslower -f -UK c:open # Output in folded format for flame graphs
"""
parser = argparse.ArgumentParser(
description="Trace slow kernel or user function calls.",
......@@ -57,6 +59,12 @@ parser.add_argument(metavar="function", nargs="+", dest="functions",
help="function(s) to trace")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
parser.add_argument("-f", "--folded", action="store_true",
help="output folded format, one line per stack (for flame graphs)")
parser.add_argument("-U", "--user-stack",
action="store_true", help="output user stack trace")
parser.add_argument("-K", "--kernel-stack",
action="store_true", help="output kernel stack trace")
args = parser.parse_args()
# fractions are allowed, but rounded to an integer nanosecond
......@@ -86,6 +94,13 @@ struct data_t {
u64 duration_ns;
u64 retval;
char comm[TASK_COMM_LEN];
#ifdef USER_STACKS
int user_stack_id;
#endif
#ifdef KERNEL_STACKS
int kernel_stack_id;
u64 kernel_ip;
#endif
#ifdef GRAB_ARGS
u64 args[6];
#endif
......@@ -94,6 +109,10 @@ struct data_t {
BPF_HASH(entryinfo, u64, struct entry_t);
BPF_PERF_OUTPUT(events);
#if defined(USER_STACKS) || defined(KERNEL_STACKS)
BPF_STACK_TRACE(stacks, 2048);
#endif
static int trace_entry(struct pt_regs *ctx, int id)
{
u64 tgid_pid = bpf_get_current_pid_tgid();
......@@ -142,6 +161,41 @@ int trace_return(struct pt_regs *ctx)
data.start_ns = entryp->start_ns;
data.duration_ns = delta_ns;
data.retval = PT_REGS_RC(ctx);
#ifdef USER_STACKS
data.user_stack_id = stacks.get_stackid(ctx, BPF_F_USER_STACK);
#endif
#ifdef KERNEL_STACKS
data.kernel_stack_id = stacks.get_stackid(ctx, 0);
if (data.kernel_stack_id >= 0) {
u64 ip = PT_REGS_IP(ctx);
u64 page_offset;
// if ip isn't sane, leave key ips as zero for later checking
#if defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE)
// x64, 4.16, ..., 4.11, etc., but some earlier kernel didn't have it
page_offset = __PAGE_OFFSET_BASE;
#elif defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE_L4)
// x64, 4.17, and later
#if defined(CONFIG_DYNAMIC_MEMORY_LAYOUT) && defined(CONFIG_X86_5LEVEL)
page_offset = __PAGE_OFFSET_BASE_L5;
#else
page_offset = __PAGE_OFFSET_BASE_L4;
#endif
#else
// earlier x86_64 kernels, e.g., 4.6, comes here
// arm64, s390, powerpc, x86_32
page_offset = PAGE_OFFSET;
#endif
if (ip > page_offset) {
data.kernel_ip = ip;
}
}
#endif
#ifdef GRAB_ARGS
bpf_probe_read(&data.args[0], sizeof(data.args), entryp->args);
#endif
......@@ -155,6 +209,10 @@ int trace_return(struct pt_regs *ctx)
bpf_text = bpf_text.replace('DURATION_NS', str(duration_ns))
if args.arguments:
bpf_text = "#define GRAB_ARGS\n" + bpf_text
if args.user_stack:
bpf_text = "#define USER_STACKS\n" + bpf_text
if args.kernel_stack:
bpf_text = "#define KERNEL_STACKS\n" + bpf_text
if args.tgid:
bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid)
else:
......@@ -193,17 +251,22 @@ class Data(ct.Structure):
("duration_ns", ct.c_ulonglong),
("retval", ct.c_ulonglong),
("comm", ct.c_char * TASK_COMM_LEN)
] + ([("args", ct.c_ulonglong * 6)] if args.arguments else [])
] + ([("args", ct.c_ulonglong * 6)] if args.arguments else []) + \
([("user_stack_id", ct.c_int)] if args.user_stack else []) + \
([("kernel_stack_id", ct.c_int),("kernel_ip", ct.c_ulonglong)] if args.kernel_stack else [])
time_designator = "us" if args.min_us else "ms"
time_value = args.min_us or args.min_ms or 1
time_multiplier = 1000 if args.min_us else 1000000
time_col = args.time or args.timestamp
print("Tracing function calls slower than %g %s... Ctrl+C to quit." %
(time_value, time_designator))
print((("%-10s " % "TIME" if time_col else "") + "%-14s %-6s %7s %16s %s") %
("COMM", "PID", "LAT(%s)" % time_designator, "RVAL",
"FUNC" + (" ARGS" if args.arguments else "")))
# Do not print header when folded
if not args.folded:
print("Tracing function calls slower than %g %s... Ctrl+C to quit." %
(time_value, time_designator))
print((("%-10s " % "TIME" if time_col else "") + "%-14s %-6s %7s %16s %s") %
("COMM", "PID", "LAT(%s)" % time_designator, "RVAL",
"FUNC" + (" ARGS" if args.arguments else "")))
earliest_ts = 0
......@@ -222,12 +285,48 @@ def args_str(event):
return ""
return str.join(" ", ["0x%x" % arg for arg in event.args[:args.arguments]])
def print_stack(event):
user_stack = []
stack_traces = b.get_table("stacks")
if args.user_stack and event.user_stack_id > 0:
user_stack = stack_traces.walk(event.user_stack_id)
kernel_stack = []
if args.kernel_stack and event.kernel_stack_id > 0:
kernel_tmp = stack_traces.walk(event.kernel_stack_id)
# fix kernel stack
for addr in kernel_tmp:
kernel_stack.append(addr)
do_delimiter = user_stack and kernel_stack
if args.folded:
# print folded stack output
user_stack = list(user_stack)
kernel_stack = list(kernel_stack)
line = [event.comm.decode()] + \
[b.sym(addr, event.tgid_pid) for addr in reversed(user_stack)] + \
(do_delimiter and ["-"] or []) + \
[b.ksym(addr) for addr in reversed(kernel_stack)]
print("%s %d" % (";".join(line), 1))
else:
# print default multi-line stack output.
for addr in kernel_stack:
print(" %s" % b.ksym(addr))
for addr in user_stack:
print(" %s" % b.sym(addr, event.tgid_pid))
def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents
ts = float(event.duration_ns) / time_multiplier
print((time_str(event) + "%-14.14s %-6s %7.2f %16x %s %s") %
(event.comm.decode(), event.tgid_pid >> 32,
ts, event.retval, args.functions[event.id], args_str(event)))
if not args.folded:
print((time_str(event) + "%-14.14s %-6s %7.2f %16x %s %s") %
(event.comm.decode(), event.tgid_pid >> 32,
ts, event.retval, args.functions[event.id], args_str(event)))
if args.user_stack or args.kernel_stack:
print_stack(event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while True:
......
......@@ -75,9 +75,46 @@ This shows the first two arguments to __kmalloc -- the first one is the size
of the requested allocation. The return value is also shown (null return values
would indicate a failure).
# ./funcslower -U -m 30 '/usr/sbin/nginx:database_write'
Tracing function calls slower than 30 ms... Ctrl+C to quit.
COMM PID LAT(ms) RVAL FUNC
nginx 1617 30.15 9 /usr/sbin/nginx:database_write
DataBaseProvider::setData(std::string const&, record_s&)
UserDataProvider::saveRecordData(RecordData const&)
RequestProcessor::writeResponse(int)
RequestProcessor::processRequest()
RequestRouter::processRequest(RequestWrapper*, ResponseWrapper*)
ngx_http_core_content_phase
ngx_http_core_run_phases
ngx_http_process_request
ngx_process_events_and_timers
ngx_spawn_process
ngx_master_process_cycle
main
__libc_start_main
[unknown]
nginx 1629 30.14 9 /usr/sbin/nginx:database_write
DataBaseProvider::setData(std::string const&, record_s&)
UserDataProvider::saveRecordData(RecordData const&)
RequestProcessor::writeResponse(int)
RequestProcessor::processRequest()
RequestRouter::processRequest(RequestWrapper*, ResponseWrapper*)
ngx_http_core_content_phase
ngx_http_core_run_phases
ngx_http_process_request
ngx_process_events_and_timers
ngx_spawn_process
ngx_master_process_cycle
main
__libc_start_main
[unknown]
^C
Shows the user space stack trace of calls to the user space function call open taking longer than 30 ms.
USAGE message:
usage: funcslower.py [-h] [-p PID] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T]
usage: funcslower.py [-hf] [-p PID] [-U | -K] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS] [-T]
[-t] [-v]
function [function ...]
......@@ -93,6 +130,11 @@ optional arguments:
minimum duration to trace (ms)
-u MIN_US, --min-us MIN_US
minimum duration to trace (us)
-U, --user-stack
show stacks from user space
-K, --kernel-stack
show stacks from kernel space
-f print output in folded stack format.
-a ARGUMENTS, --arguments ARGUMENTS
print this many entry arguments, as hex
-T, --time show HH:MM:SS timestamp
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment