Commit 2757f0ef authored by Brendan Gregg's avatar Brendan Gregg

dcsnoop

parent 5bfadab8
......@@ -72,6 +72,7 @@ Tools:
- tools/[bitesize](tools/bitesize.py): Show per process I/O size histogram. [Examples](tools/bitesize_example.txt).
- tools/[cachestat](tools/cachestat.py): Trace page cache hit/miss ratio. [Examples](tools/cachestat_example.txt).
- tools/[execsnoop](tools/execsnoop.py): Trace new processes via exec() syscalls. [Examples](tools/execsnoop_example.txt).
- tools/[dcsnoop](tools/dcsnoop.py): Trace directory entry cache (dcache) lookups. [Examples](tools/dcsnoop_example.txt).
- tools/[dcstat](tools/dcstat.py): Directory entry cache (dcache) stats. [Examples](tools/dcstat_example.txt).
- tools/[filelife](tools/filelife.py): Trace the lifespan of short-lived files. [Examples](tools/filelife_example.txt).
- tools/[fileslower](tools/fileslower.py): Trace slow synchronous file reads and writes. [Examples](tools/fileslower_example.txt).
......
.TH dcsnoop 8 "2016-02-10" "USER COMMANDS"
.SH NAME
dcsnoop \- Trace directory entry cache (dcache) lookups. Uses Linux eBPF/bcc.
.SH SYNOPSIS
.B dcsnoop [\-h] [\-a]
.SH DESCRIPTION
By default, this traces every failed dcache lookup (cache miss), and shows the
process performing the lookup and the filename requested. A \-a option can be
used to show all lookups, not just failed ones.
The output of this tool can be verbose, and is intended for further
investigations of dcache performance beyond dcstat(8), which prints
per-second summaries.
This uses kernel dynamic tracing of the d_lookup() function, and will need
and will need updating to match any changes to this function.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF and bcc.
.SH OPTIONS
.TP
\-h
Print usage message.
.TP
\-a
Trace references, not just failed lookups.
.SH EXAMPLES
.TP
Trace failed dcache lookups:
#
.B dcsnoop
.TP
Trace all dcache lookups:
#
.B dcsnoop \-a
.SH FIELDS
.TP
TIME(s)
Time of lookup, in seconds.
.TP
PID
Process ID.
.TP
COMM
Process name.
.TP
T
Type: R == reference (only visible with \-a), M == miss. A miss will print two
lines, one for the reference, and one for the miss.
.TP
FILE
The file name component that was being looked up. This contains trailing
pathname components (after '/'), which will be the subject of subsequent
lookups.
.SH OVERHEAD
File name lookups can be frequent (depending on the workload), and this tool
prints a line for each failed lookup, and with \-a, each reference as well. The
output may be verbose, and the incurred overhead, while optimized to some
extent, may still be from noticeable to significant. This is only really
intended for deeper investigations beyond dcstat(8), when absolutely necessary.
Measure and quantify the overhead in a test environment before use.
.SH SOURCE
This is from bcc.
.IP
https://github.com/iovisor/bcc
.PP
Also look in the bcc distribution for a companion _examples.txt file containing
example usage, output, and commentary for this tool.
.SH OS
Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
Brendan Gregg
.SH SEE ALSO
dcstat(1)
#!/usr/bin/python
# @lint-avoid-python-3-compatibility-imports
#
# dcsnoop Trace directory entry cache (dcache) lookups.
# For Linux, uses BCC, eBPF. Embedded C.
#
# USAGE: dcsnoop [-h] [-a]
#
# By default, this traces every failed dcache lookup, and shows the process
# performing the lookup and the filename requested. A -a option can be used
# to show all lookups, not just failed ones.
#
# This uses kernel dynamic tracing of the d_lookup() function, and will need
# to be modified to match kernel changes.
#
# Also see dcstat(8), for per-second summaries.
#
# Copyright 2016 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 09-Feb-2016 Brendan Gregg Created this.
from __future__ import print_function
from bcc import BPF
import argparse
import re
# arguments
examples = """examples:
./dcsnoop # trace failed dcache lookups
./dcsnoop -a # trace all dcache lookups
"""
parser = argparse.ArgumentParser(
description="Trace directory entry cache (dcache) lookups",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-a", "--all", action="store_true",
help="trace all lookups (default is fails only)")
args = parser.parse_args()
# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/fs.h>
#include <linux/sched.h>
#define MAX_FILE_LEN 64
struct entry_t {
char name[MAX_FILE_LEN];
};
BPF_HASH(entrybypid, u32, struct entry_t);
/* from fs/namei.c: */
struct nameidata {
struct path path;
struct qstr last;
// [...]
};
int trace_fast(struct pt_regs *ctx, struct nameidata *nd, struct path *path)
{
bpf_trace_printk("R %s\\n", nd->last.name);
return 1;
}
int kprobe__d_lookup(struct pt_regs *ctx, const struct dentry *parent,
const struct qstr *name)
{
u32 pid = bpf_get_current_pid_tgid();
struct entry_t entry = {};
if (name->name) {
bpf_probe_read(&entry.name, sizeof(entry.name), (void *)name->name);
}
entrybypid.update(&pid, &entry);
return 0;
}
int kretprobe__d_lookup(struct pt_regs *ctx)
{
u32 pid = bpf_get_current_pid_tgid();
struct entry_t *ep;
ep = entrybypid.lookup(&pid);
if (ep == 0) {
return 0; // missed entry
}
if (ctx->ax == 0) {
bpf_trace_printk("M %s\\n", ep->name);
}
entrybypid.delete(&pid);
return 0;
}
"""
# initialize BPF
b = BPF(text=bpf_text)
if args.all:
b.attach_kprobe(event="lookup_fast", fn_name="trace_fast")
# header
print("%-11s %-6s %-16s %1s %s" % ("TIME(s)", "PID", "COMM", "T", "FILE"))
start_ts = 0
# format output
while 1:
(task, pid, cpu, flags, ts, msg) = b.trace_fields()
try:
(type, file) = msg.split(" ", 1)
except ValueError:
continue
if start_ts == 0:
start_ts = ts
print("%-11.6f %-6s %-16s %1s %s" % (ts - start_ts, pid, task, type, file))
Demonstrations of dcsnoop, the Linux eBPF/bcc version.
dcsnoop traces directory entry cache (dcache) lookups, and can be used for
further investigation beyond dcstat(8). The output is likely verbose, as
dcache lookups are likely frequent. By default, only failed lookups are shown.
For example:
# ./dcsnoop.py
TIME(s) PID COMM T FILE
0.002837 1643 snmpd M net/dev
0.002852 1643 snmpd M 1643
0.002856 1643 snmpd M net
0.002863 1643 snmpd M dev
0.002952 1643 snmpd M net/if_inet6
0.002964 1643 snmpd M if_inet6
0.003180 1643 snmpd M net/ipv4/neigh/eth0/retrans_time_ms
0.003192 1643 snmpd M ipv4/neigh/eth0/retrans_time_ms
0.003197 1643 snmpd M neigh/eth0/retrans_time_ms
0.003203 1643 snmpd M eth0/retrans_time_ms
0.003206 1643 snmpd M retrans_time_ms
0.003245 1643 snmpd M ipv6/neigh/eth0/retrans_time_ms
0.003249 1643 snmpd M neigh/eth0/retrans_time_ms
0.003252 1643 snmpd M eth0/retrans_time_ms
0.003255 1643 snmpd M retrans_time_ms
0.003287 1643 snmpd M conf/eth0/forwarding
0.003292 1643 snmpd M eth0/forwarding
0.003295 1643 snmpd M forwarding
0.003326 1643 snmpd M base_reachable_time_ms
[...]
I ran a drop caches at the same time as executing this tool. The output shows
the processes, the type of event ("T" column: M == miss, R == reference),
and the filename for the dcache lookup.
The way the dcache is currently implemented, each component of a path is
checked in turn. The first line, showing "net/dev" from snmp, will be a lookup
for "net" in a directory (that isn't shown here). If it finds "net", it will
then lookup "dev" inside net. You can see this sequence a little later,
starting at time 0.003180, where a pathname is being searched
directory by directory.
The -a option will show all lookups, although be warned, the output will be
very verbose. For example:
# ./dcsnoop
TIME(s) PID COMM T FILE
0.000000 20279 dcsnoop.py M p_lookup_fast
0.000010 20279 dcsnoop.py M enable
0.000013 20279 dcsnoop.py M id
0.000015 20279 dcsnoop.py M filter
0.000017 20279 dcsnoop.py M trigger
0.000019 20279 dcsnoop.py M format
0.006148 20279 dcsnoop.py R sys/kernel/debug/tracing/trace_pipe
0.006158 20279 dcsnoop.py R kernel/debug/tracing/trace_pipe
0.006161 20279 dcsnoop.py R debug/tracing/trace_pipe
0.006164 20279 dcsnoop.py R tracing/trace_pipe
0.006166 20279 dcsnoop.py R trace_pipe
0.015900 1643 snmpd R proc/sys/net/ipv6/conf/lo/forwarding
0.015901 1643 snmpd R sys/net/ipv6/conf/lo/forwarding
0.015901 1643 snmpd R net/ipv6/conf/lo/forwarding
0.015902 1643 snmpd R ipv6/conf/lo/forwarding
0.015903 1643 snmpd R conf/lo/forwarding
0.015904 1643 snmpd R lo/forwarding
0.015905 1643 snmpd M lo/forwarding
0.015908 1643 snmpd R forwarding
0.015909 1643 snmpd M forwarding
0.015937 1643 snmpd R proc/sys/net/ipv6/neigh/lo/base_reachable_time_ms
0.015937 1643 snmpd R sys/net/ipv6/neigh/lo/base_reachable_time_ms
0.015938 1643 snmpd R net/ipv6/neigh/lo/base_reachable_time_ms
0.015939 1643 snmpd R ipv6/neigh/lo/base_reachable_time_ms
0.015940 1643 snmpd R neigh/lo/base_reachable_time_ms
0.015941 1643 snmpd R lo/base_reachable_time_ms
0.015941 1643 snmpd R base_reachable_time_ms
0.015943 1643 snmpd M base_reachable_time_ms
0.043569 1876 supervise M 20281
0.043573 1886 supervise M 20280
0.043582 1886 supervise R supervise/status.new
[...]
USAGE message:
# ./dcsnoop.py -h
usage: dcsnoop.py [-h] [-a]
Trace directory entry cache (dcache) lookups
optional arguments:
-h, --help show this help message and exit
-a, --all trace all lookups (default is fails only)
examples:
./dcsnoop # trace failed dcache lookups
./dcsnoop -a # trace all dcache lookups
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment