Commit e14f27fc authored by mcaleavya's avatar mcaleavya

migrated statsnoop to use bpf_perf_event_output

parent 1060d28f
...@@ -12,6 +12,10 @@ applications that are failing, especially on startup. ...@@ -12,6 +12,10 @@ applications that are failing, especially on startup.
This works by tracing various kernel sys_stat() functions using dynamic This works by tracing various kernel sys_stat() functions using dynamic
tracing, and will need updating to match any changes to these functions. tracing, and will need updating to match any changes to these functions.
This makes use of a Linux 4.5 feature (bpf_perf_event_output());
for kernels older than 4.5, see the version under tools/old,
which uses an older mechanism.
Since this uses BPF, only the root user can use this tool. Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS .SH REQUIREMENTS
CONFIG_BPF and bcc. CONFIG_BPF and bcc.
......
#!/usr/bin/python
# @lint-avoid-python-3-compatibility-imports
#
# statsnoop Trace stat() syscalls.
# For Linux, uses BCC, eBPF. Embedded C.
#
# USAGE: statsnoop [-h] [-t] [-x] [-p PID]
#
# Copyright 2016 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 08-Feb-2016 Brendan Gregg Created this.
from __future__ import print_function
from bcc import BPF
import argparse
# arguments
examples = """examples:
./statsnoop # trace all stat() syscalls
./statsnoop -t # include timestamps
./statsnoop -x # only show failed stats
./statsnoop -p 181 # only trace PID 181
"""
parser = argparse.ArgumentParser(
description="Trace stat() syscalls",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-t", "--timestamp", action="store_true",
help="include timestamp on output")
parser.add_argument("-x", "--failed", action="store_true",
help="only show failed stats")
parser.add_argument("-p", "--pid",
help="trace this PID only")
args = parser.parse_args()
debug = 0
# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
BPF_HASH(args_filename, u32, const char *);
int trace_entry(struct pt_regs *ctx, const char __user *filename)
{
u32 pid = bpf_get_current_pid_tgid();
FILTER
args_filename.update(&pid, &filename);
return 0;
};
int trace_return(struct pt_regs *ctx)
{
const char **filenamep;
int ret = ctx->ax;
u32 pid = bpf_get_current_pid_tgid();
filenamep = args_filename.lookup(&pid);
if (filenamep == 0) {
// missed entry
return 0;
}
bpf_trace_printk("%d %s\\n", ret, *filenamep);
args_filename.delete(&pid);
return 0;
}
"""
if args.pid:
bpf_text = bpf_text.replace('FILTER',
'if (pid != %s) { return 0; }' % args.pid)
else:
bpf_text = bpf_text.replace('FILTER', '')
if debug:
print(bpf_text)
# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event="sys_stat", fn_name="trace_entry")
b.attach_kprobe(event="sys_statfs", fn_name="trace_entry")
b.attach_kprobe(event="sys_newstat", fn_name="trace_entry")
b.attach_kretprobe(event="sys_stat", fn_name="trace_return")
b.attach_kretprobe(event="sys_statfs", fn_name="trace_return")
b.attach_kretprobe(event="sys_newstat", fn_name="trace_return")
# header
if args.timestamp:
print("%-14s" % ("TIME(s)"), end="")
print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH"))
start_ts = 0
# format output
while 1:
(task, pid, cpu, flags, ts, msg) = b.trace_fields()
(ret_s, filename) = msg.split(" ", 1)
ret = int(ret_s)
if (args.failed and (ret >= 0)):
continue
# split return value into FD and errno columns
if ret >= 0:
fd_s = ret
err = 0
else:
fd_s = "-1"
err = - ret
# print columns
if args.timestamp:
if start_ts == 0:
start_ts = ts
print("%-14.9f" % (ts - start_ts), end="")
print("%-6d %-16s %4s %3s %s" % (pid, task, fd_s, err, filename))
...@@ -10,10 +10,12 @@ ...@@ -10,10 +10,12 @@
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
# #
# 08-Feb-2016 Brendan Gregg Created this. # 08-Feb-2016 Brendan Gregg Created this.
# 17-Feb-2016 Allan McAleavy updated for BPF_PERF_OUTPUT
from __future__ import print_function from __future__ import print_function
from bcc import BPF from bcc import BPF
import argparse import argparse
import ctypes as ct
# arguments # arguments
examples = """examples: examples = """examples:
...@@ -38,32 +40,67 @@ debug = 0 ...@@ -38,32 +40,67 @@ debug = 0
# define BPF program # define BPF program
bpf_text = """ bpf_text = """
#include <uapi/linux/ptrace.h> #include <uapi/linux/ptrace.h>
#include <uapi/linux/limits.h>
#include <linux/sched.h>
struct val_t {
u32 pid;
u64 ts;
char comm[TASK_COMM_LEN];
const char *fname;
};
struct data_t {
u32 pid;
u64 ts;
u64 delta;
int ret;
char comm[TASK_COMM_LEN];
char fname[NAME_MAX];
};
BPF_HASH(args_filename, u32, const char *); BPF_HASH(args_filename, u32, const char *);
BPF_HASH(infotmp, u32,struct val_t);
BPF_PERF_OUTPUT(events);
int trace_entry(struct pt_regs *ctx, const char __user *filename) int trace_entry(struct pt_regs *ctx, const char __user *filename)
{ {
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid(); u32 pid = bpf_get_current_pid_tgid();
FILTER FILTER
args_filename.update(&pid, &filename); if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
val.pid = bpf_get_current_pid_tgid();
val.ts = bpf_ktime_get_ns();
val.fname = filename;
infotmp.update(&pid, &val);
}
return 0; return 0;
}; };
int trace_return(struct pt_regs *ctx) int trace_return(struct pt_regs *ctx)
{ {
const char **filenamep;
int ret = ctx->ax;
u32 pid = bpf_get_current_pid_tgid(); u32 pid = bpf_get_current_pid_tgid();
struct val_t *valp;
struct data_t data = {};
u64 tsp = bpf_ktime_get_ns();
filenamep = args_filename.lookup(&pid); valp = infotmp.lookup(&pid);
if (filenamep == 0) { if (valp == 0) {
// missed entry // missed entry
return 0; return 0;
} }
bpf_probe_read(&data.comm,sizeof(data.comm), valp->comm);
bpf_trace_printk("%d %s\\n", ret, *filenamep); bpf_probe_read(&data.fname,sizeof(data.fname),(void *)valp->fname);
data.pid = valp->pid;
data.delta = tsp - valp->ts;
data.ts = tsp /1000;
data.ret = ctx->ax;
events.perf_submit(ctx,&data,sizeof(data));
infotmp.delete(&pid);
args_filename.delete(&pid); args_filename.delete(&pid);
return 0; return 0;
...@@ -86,33 +123,65 @@ b.attach_kretprobe(event="sys_stat", fn_name="trace_return") ...@@ -86,33 +123,65 @@ b.attach_kretprobe(event="sys_stat", fn_name="trace_return")
b.attach_kretprobe(event="sys_statfs", fn_name="trace_return") b.attach_kretprobe(event="sys_statfs", fn_name="trace_return")
b.attach_kretprobe(event="sys_newstat", fn_name="trace_return") b.attach_kretprobe(event="sys_newstat", fn_name="trace_return")
TASK_COMM_LEN = 16 # linux/sched.h
NAME_MAX = 255 # linux/limits.h
class Data(ct.Structure):
_fields_ = [
("pid", ct.c_ulonglong),
("ts", ct.c_ulonglong),
("delta", ct.c_ulonglong),
("ret", ct.c_int),
("comm", ct.c_char * TASK_COMM_LEN),
("fname", ct.c_char * NAME_MAX)
]
start_ts = 0
prev_ts = 0
delta = 0
# header # header
if args.timestamp: if args.timestamp:
print("%-14s" % ("TIME(s)"), end="") print("%-14s" % ("TIME(s)"), end="")
print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH")) print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH"))
start_ts = 0 # process event
def print_event(cpu, data, size):
# format output event = ct.cast(data, ct.POINTER(Data)).contents
while 1: global start_ts
(task, pid, cpu, flags, ts, msg) = b.trace_fields() global prev_ts
(ret_s, filename) = msg.split(" ", 1) global delta
global cont
ret = int(ret_s)
if (args.failed and (ret >= 0)):
continue
# split return value into FD and errno columns # split return value into FD and errno columns
if ret >= 0: if event.ret >= 0:
fd_s = ret fd_s = event.ret
err = 0 err = 0
else: else:
fd_s = "-1" fd_s = -1
err = - ret err = - event.ret
# print columns
if args.timestamp:
if start_ts == 0: if start_ts == 0:
start_ts = ts prev_ts = start_ts
print("%-14.9f" % (ts - start_ts), end="")
print("%-6d %-16s %4s %3s %s" % (pid, task, fd_s, err, filename)) if start_ts == 1:
delta = float(delta) + (event.ts - prev_ts)
if (args.failed and (event.ret >= 0)):
start_ts = 1
prev_ts = event.ts
return
if args.timestamp:
print("%-14.9f" % (delta / 1000000), end="")
print("%-6d %-16s %4d %3d %s" % (event.pid, event.comm,
fd_s, err, event.fname))
prev_ts = event.ts
start_ts = 1
# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
while 1:
b.kprobe_poll()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment