Commit 48fbc3ea authored by Brendan Gregg's avatar Brendan Gregg

some bcc examples and tools

parent 5484badb
/*
* bitehist.c Block I/O size histogram.
* For Linux, uses BCC, eBPF. See .py file.
*
* Based on eBPF sample tracex2 by Alexi Starovoitov.
* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* 15-Aug-2015 Brendan Gregg Created this.
*/
#include <uapi/linux/ptrace.h>
#include "../../linux-4.2-rc5/include/linux/blkdev.h"
BPF_TABLE("array", int, u64, dist, 64);
static unsigned int log2(unsigned int v)
{
unsigned int r;
unsigned int shift;
r = (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
shift = (v > 0xF) << 2; v >>= shift; r |= shift;
shift = (v > 0x3) << 1; v >>= shift; r |= shift;
r |= (v >> 1);
return r;
}
static unsigned int log2l(unsigned long v)
{
unsigned int hi = v >> 32;
if (hi)
return log2(hi) + 32 + 1;
else
return log2(v) + 1;
}
int do_request(struct pt_regs *ctx, struct request *req)
{
int index = log2l(req->__data_len / 1024);
u64 *leaf = dist.lookup(&index);
if (leaf) (*leaf)++;
return 0;
}
#!/usr/bin/python
#
# bitehist.py Block I/O size histogram.
# For Linux, uses BCC, eBPF. See .c file.
#
# Written as a basic example of using a histogram to show a distribution.
#
# USAGE: bitehist.py [interval [count]]
#
# The default interval is 5 seconds. A Ctrl-C will print the partially
# gathered histogram then exit.
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 15-Aug-2015 Brendan Gregg Created this.
from bpf import BPF
from ctypes import c_ushort, c_int, c_ulonglong
from time import sleep
from sys import argv
def usage():
print("USAGE: %s [interval [count]]" % argv[0])
exit()
# arguments
interval = 5
count = -1
if len(argv) > 1:
try:
interval = int(argv[1])
if interval == 0:
raise
if len(argv) > 2:
count = int(argv[2])
except: # also catches -h, --help
usage()
# load BPF program
b = BPF(src_file = "bitehist.c")
BPF.attach_kprobe(b.load_func("do_request", BPF.KPROBE), "blk_start_request")
dist = b.get_table("dist", c_int, c_ulonglong)
dist_max = 64
# header
print("Tracing... Hit Ctrl-C to end.")
last = {}
for i in range(1, dist_max + 1):
last[i] = 0
# functions
stars_max = 38
def stars(val, val_max, width):
i = 0
text = ""
while (1):
if (i > (width * val / val_max) - 1) or (i > width - 1):
break
text += "*"
i += 1
if val > val_max:
text = text[:-1] + "+"
return text
def print_log2_hist(d, val_type):
idx_max = -1
val_max = 0
for i in range(1, dist_max + 1):
try:
val = dist[c_int(i)].value - last[i]
if (val > 0):
idx_max = i
if (val > val_max):
val_max = val
except:
break
if idx_max > 0:
print(" %-15s : count distribution" % val_type);
for i in range(1, idx_max + 1):
low = (1 << i) >> 1
high = (1 << i) - 1
if (low == high):
low -= 1
try:
val = dist[c_int(i)].value - last[i]
print("%8d -> %-8d : %-8d |%-*s|" % (low, high, val,
stars_max, stars(val, val_max, stars_max)))
last[i] = dist[c_int(i)].value
except:
break
# output
loop = 0
do_exit = 0
while (1):
if count > 0:
loop += 1
if loop > count:
exit()
try:
sleep(interval)
except KeyboardInterrupt:
pass; do_exit = 1
print
print_log2_hist(dist, "kbytes")
if do_exit:
exit()
Demonstrations of bitehist.py, the Linux eBPF/bcc version.
This prints a power-of-2 histogram to show the block I/O size distribution.
By default, a summary is printed every five seconds:
# ./bitehist.py
Tracing... Hit Ctrl-C to end.
kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 26 |************* |
8 -> 15 : 3 |* |
16 -> 31 : 5 |** |
32 -> 63 : 6 |*** |
64 -> 127 : 7 |*** |
128 -> 255 : 75 |**************************************|
kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 83 |**************************************|
8 -> 15 : 2 | |
16 -> 31 : 6 |** |
32 -> 63 : 6 |** |
64 -> 127 : 5 |** |
128 -> 255 : 21 |********* |
^C
kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 8 |**************************************|
The first output shows a bimodal distribution. The largest mode of 75 I/O were
between 128 and 255 Kbytes in size, and another mode of 26 I/O were between 4
and 7 Kbytes in size.
The next output summary shows the workload is doing more 4 - 7 Kbyte I/O.
The final output is partial, showing what was measured until Ctrl-C was hit.
For an output interval of one second, and three summaries:
# ./bitehist.py 1 3
Tracing... Hit Ctrl-C to end.
kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 4 |**************************************|
kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 5 |**************************************|
8 -> 15 : 0 | |
16 -> 31 : 0 | |
32 -> 63 : 1 |******* |
kbytes : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 4 |**************************************|
Full usage:
# ./bitehist.py -h
USAGE: ./bitehist.py [interval [count]]
/*
* disksnoop.c Trace block device I/O: basic version of iosnoop.
* For Linux, uses BCC, eBPF. See .py file.
*
* Copyright (c) 2015 Brendan Gregg.
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* 11-Aug-2015 Brendan Gregg Created this.
*/
#include <uapi/linux/ptrace.h>
#include "../../linux-4.2-rc5/include/linux/blkdev.h"
struct key_t {
struct request *req;
};
BPF_TABLE("hash", struct key_t, u64, start, 10240);
int do_request(struct pt_regs *ctx, struct request *req) {
struct key_t key = {};
u64 ts;
// stash start timestamp by request ptr
ts = bpf_ktime_get_ns();
key.req = req;
start.update(&key, &ts);
return 0;
}
int do_completion(struct pt_regs *ctx, struct request *req) {
struct key_t key = {};
u64 *tsp, delta;
key.req = req;
tsp = start.lookup(&key);
if (tsp != 0) {
delta = bpf_ktime_get_ns() - *tsp;
bpf_trace_printk("%d %x %d\n", req->__data_len,
req->cmd_flags, delta / 1000);
start.delete(&key);
}
return 0;
}
#!/usr/bin/python
#
# disksnoop.py Trace block device I/O: basic version of iosnoop.
# For Linux, uses BCC, eBPF. See .c file.
#
# Written as a basic example of tracing latency.
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 11-Aug-2015 Brendan Gregg Created this.
from bpf import BPF
import sys
REQ_WRITE = 1 # from include/linux/blk_types.h
# load BPF program
b = BPF(src_file="disksnoop.c")
BPF.attach_kprobe(b.load_func("do_request", BPF.KPROBE), "blk_start_request")
BPF.attach_kprobe(b.load_func("do_completion", BPF.KPROBE), "blk_update_request")
# header
print "%-18s %-2s %-7s %8s" % ("TIME(s)", "T", "BYTES", "LAT(ms)")
# open trace pipe
try:
trace = open("/sys/kernel/debug/tracing/trace_pipe", "r")
except:
print >> sys.stderr, "ERROR: opening trace_pipe"
exit(1)
# format output
while 1:
try:
line = trace.readline().rstrip()
except KeyboardInterrupt:
pass; exit()
prolog, time_s, colon, bytes_s, flags_s, us_s = \
line.rsplit(" ", 5)
time_s = time_s[:-1] # strip trailing ":"
flags = int(flags_s, 16)
if flags & REQ_WRITE:
type_s = "W"
elif bytes_s == "0": # see blk_fill_rwbs() for logic
type_s = "M"
else:
type_s = "R"
ms = float(int(us_s, 10)) / 1000
print "%-18s %-2s %-7s %8.2f" % (time_s, type_s, bytes_s, ms)
Demonstrations of disksnoop.py, the Linux eBPF/bcc version.
This traces block I/O, a prints a line to summarize each I/O completed:
# ./disksnoop.py
TIME(s) T BYTES LAT(ms)
16458043.435457 W 4096 2.73
16458043.435981 W 4096 3.24
16458043.436012 W 4096 3.13
16458043.437326 W 4096 4.44
16458044.126545 R 4096 42.82
16458044.129872 R 4096 3.24
16458044.130705 R 4096 0.73
16458044.142813 R 4096 12.01
16458044.147302 R 4096 4.33
16458044.148117 R 4096 0.71
16458044.148950 R 4096 0.70
16458044.164332 R 4096 15.29
16458044.168003 R 4096 3.58
16458044.171676 R 4096 3.59
16458044.172453 R 4096 0.72
16458044.173213 R 4096 0.71
16458044.173989 R 4096 0.72
16458044.174739 R 4096 0.70
16458044.190334 R 4096 15.52
16458044.196608 R 4096 6.17
16458044.203091 R 4096 6.35
The output includes a basic timestamp (in seconds), the type of I/O (W == write,
R == read, M == metadata), the size of the I/O in bytes, and the latency (or
duration) of the I/O in milliseconds.
The latency is measured from I/O request to the device, to the device
completion. This excludes latency spent queued in the OS.
Most of the I/O in this example were 0.7 and 4 milliseconds in duration. There
was an outlier of 42.82 milliseconds, a read which followed many writes (the
high latency may have been caused by the writes still being serviced on the
storage device).
/*
* vfsreadlat.c VFS read latency distribution.
* For Linux, uses BCC, eBPF. See .py file.
*
* Based on eBPF sample tracex2 by Alexi Starovoitov.
* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* 15-Aug-2015 Brendan Gregg Created this.
*/
#include <uapi/linux/ptrace.h>
struct key_t {
u32 pid;
};
BPF_TABLE("hash", struct key_t, u64, start, 10240);
BPF_TABLE("array", int, u64, dist, 64);
static unsigned int log2(unsigned int v)
{
unsigned int r;
unsigned int shift;
r = (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
shift = (v > 0xF) << 2; v >>= shift; r |= shift;
shift = (v > 0x3) << 1; v >>= shift; r |= shift;
r |= (v >> 1);
return r;
}
static unsigned int log2l(unsigned long v)
{
unsigned int hi = v >> 32;
if (hi)
return log2(hi) + 32 + 1;
else
return log2(v) + 1;
}
int do_entry(struct pt_regs *ctx)
{
struct key_t key = {};
u64 ts, *val, zero = 0;
key.pid = bpf_get_current_pid_tgid();
ts = bpf_ktime_get_ns();
start.update(&key, &ts);
return 0;
}
int do_return(struct pt_regs *ctx)
{
struct key_t key = {};
u64 *tsp, delta;
key.pid = bpf_get_current_pid_tgid();
tsp = start.lookup(&key);
if (tsp != 0) {
delta = bpf_ktime_get_ns() - *tsp;
int index = log2l(delta / 1000);
u64 *leaf = dist.lookup(&index);
if (leaf) (*leaf)++;
start.delete(&key);
}
return 0;
}
#!/usr/bin/python
#
# vfsreadlat.py VFS read latency distribution.
# For Linux, uses BCC, eBPF. See .c file.
#
# Written as a basic example of a function latency distribution histogram.
#
# USAGE: vfsreadlat.py [interval [count]]
#
# The default interval is 5 seconds. A Ctrl-C will print the partially
# gathered histogram then exit.
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 15-Aug-2015 Brendan Gregg Created this.
from bpf import BPF
from ctypes import c_ushort, c_int, c_ulonglong
from time import sleep
from sys import argv
def usage():
print("USAGE: %s [interval [count]]" % argv[0])
exit()
# arguments
interval = 5
count = -1
if len(argv) > 1:
try:
interval = int(argv[1])
if interval == 0:
raise
if len(argv) > 2:
count = int(argv[2])
except: # also catches -h, --help
usage()
# load BPF program
b = BPF(src_file = "vfsreadlat.c")
BPF.attach_kprobe(b.load_func("do_entry", BPF.KPROBE), "vfs_read")
BPF.attach_kretprobe(b.load_func("do_return", BPF.KPROBE), "vfs_read")
dist = b.get_table("dist", c_int, c_ulonglong)
dist_max = 64
# header
print("Tracing... Hit Ctrl-C to end.")
last = {}
for i in range(1, dist_max + 1):
last[i] = 0
# functions
stars_max = 38
def stars(val, val_max, width):
i = 0
text = ""
while (1):
if (i > (width * val / val_max) - 1) or (i > width - 1):
break
text += "*"
i += 1
if val > val_max:
text = text[:-1] + "+"
return text
def print_log2_hist(d, val_type):
idx_max = -1
val_max = 0
for i in range(1, dist_max + 1):
try:
val = dist[c_int(i)].value - last[i]
if (val > 0):
idx_max = i
if (val > val_max):
val_max = val
except:
break
if idx_max > 0:
print(" %-15s : count distribution" % val_type);
for i in range(1, idx_max + 1):
low = (1 << i) >> 1
high = (1 << i) - 1
if (low == high):
low -= 1
try:
val = dist[c_int(i)].value - last[i]
print("%8d -> %-8d : %-8d |%-*s|" % (low, high, val,
stars_max, stars(val, val_max, stars_max)))
last[i] = dist[c_int(i)].value
except:
break
# output
loop = 0
do_exit = 0
while (1):
if count > 0:
loop += 1
if loop > count:
exit()
try:
sleep(interval)
except KeyboardInterrupt:
pass; do_exit = 1
print
print_log2_hist(dist, "usecs")
if do_exit:
exit()
Demonstrations of vfsreadlat.py, the Linux eBPF/bcc version.
This example traces the latency of vfs_read (time from call to return), printing
it as a histogram distribution. By default, output is every five seconds:
# ./vfsreadlat.py
Tracing... Hit Ctrl-C to end.
usecs : count distribution
0 -> 1 : 4457 |*************************************+|
2 -> 3 : 447 |*** |
4 -> 7 : 2059 |***************** |
8 -> 15 : 1179 |********** |
16 -> 31 : 63 | |
32 -> 63 : 0 | |
64 -> 127 : 2 | |
128 -> 255 : 0 | |
256 -> 511 : 3 | |
512 -> 1023 : 1 | |
1024 -> 2047 : 3 | |
2048 -> 4095 : 2 | |
4096 -> 8191 : 0 | |
8192 -> 16383 : 0 | |
16384 -> 32767 : 0 | |
32768 -> 65535 : 0 | |
65536 -> 131071 : 4 | |
131072 -> 262143 : 2 | |
262144 -> 524287 : 0 | |
524288 -> 1048575 : 4 | |
^C
usecs : count distribution
0 -> 1 : 241 |*************************************+|
2 -> 3 : 17 |** |
4 -> 7 : 2 | |
8 -> 15 : 4 | |
16 -> 31 : 2 | |
32 -> 63 : 0 | |
64 -> 127 : 1 | |
128 -> 255 : 0 | |
256 -> 511 : 1 | |
512 -> 1023 : 1 | |
1024 -> 2047 : 0 | |
2048 -> 4095 : 1 | |
4096 -> 8191 : 0 | |
8192 -> 16383 : 0 | |
16384 -> 32767 : 0 | |
32768 -> 65535 : 0 | |
65536 -> 131071 : 0 | |
131072 -> 262143 : 0 | |
262144 -> 524287 : 0 | |
524288 -> 1048575 : 1 | |
These examples show outliers in the 524 - 1048 milliseconds range. Since
vfs_read() will catch many types of events, this could be anything including
keystroke latency on ssh sessions. Further drilling with bcc will be necessary
to identify more details.
Full usage:
# ./vfsreadlat.py -h
USAGE: ./vfsreadlat.py [interval [count]]
.TH pidpersec 8 "2015-08-18" "USER COMMANDS"
.SH NAME
pidpersec \- Count new processes (via fork()). Uses Linux eBPF/bcc.
.SH SYNOPSIS
.B pidpersec
.SH DESCRIPTION
pidpersec shows how many new processes were created each second. There
can be performance issues caused by many short-lived processes, which may not
be visible in sampling tools like top(1). pidpersec provides one way to
investigate this behavior.
This works by tracing the kernel sched_fork() function using dynamic tracing,
and will need updating to match any changes to this function.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF and bcc.
.SH EXAMPLES
.TP
Count new processes created each second:
#
.B pidpersec
.SH OVERHEAD
This traces the kernel fork function, and maintains an in-kernel count which is
read asynchronously from user-space. As the rate of this is generally expected to
be low (<< 1000/s), the overhead is also expected to be negligible.
.SH SOURCE
This is from bcc.
.IP
https://github.com/iovisor/bcc
.PP
Also look in the bcc distribution for a companion _examples.txt file containing
example usage, output, and commentary for this tool.
.SH OS
Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
Brendan Gregg
.SH SEE ALSO
top(1)
.TH syncsnoop 8 "2015-08-18" "USER COMMANDS"
.SH NAME
syncsnoop \- Trace sync() syscall. Uses Linux eBPF/bcc.
.SH SYNOPSIS
.B syncsnoop
.SH DESCRIPTION
syncsnoop traces calls to sync(), which flushes file system buffers to
storage devices. These calls can cause performance perturbations, and it can
be useful to know if they are happening and how frequently.
This works by tracing the kernel sys_sync() function using dynamic tracing, and
will need updating to match any changes to this function.
This program is also a basic example of eBPF/bcc.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF and bcc.
.SH EXAMPLES
.TP
Trace calls to sync():
#
.B syncsnoop
.SH FIELDS
.TP
TIME(s)
Time of the call, in seconds.
.TP
CALL
Call traced.
.SH OVERHEAD
This traces the kernel sync function and prints output for each event. As the
rate of this is generally expected to be low (<< 100/s), the overhead is also
expected to be negligible.
.SH SOURCE
This is from bcc.
.IP
https://github.com/iovisor/bcc
.PP
Also look in the bcc distribution for a companion _examples.txt file containing
example usage, output, and commentary for this tool.
.SH OS
Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
Brendan Gregg
.SH SEE ALSO
iostat(1)
.TH vfscount 8 "2015-08-18" "USER COMMANDS"
.SH NAME
vfscount \- Count some common VFS calls. Uses Linux eBPF/bcc.
.SH SYNOPSIS
.B vfscount
.SH DESCRIPTION
This counts common VFS calls. This can be useful for general workload
characterization of these operations.
This works by tracing some kernel vfs functions using dynamic tracing, and will
need updating to match any changes to these functions. Edit the script to
customize and add functions to trace, which is easy to do.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF and bcc.
.SH EXAMPLES
.TP
Count some VFS calls until Ctrl-C is hit:
#
.B vfscount
.SH FIELDS
.TP
ADDR
Address of the instruction pointer that was traced (only useful if the FUNC column is suspicious and you would like to double check the translation).
.TP
FUNC
Kernel function name
.TP
COUNT
Number of calls while tracing
.SH OVERHEAD
This traces various kernel vfs functions and maintains in-kernel counts, which
are asynchronously copied to user-space. While the rate of VFS operations can
be very high (>1M/sec), this is a relatively efficient way to trace these
events, and so the overhead is expected to be small for normal workloads.
Measure in a test environment.
.SH SOURCE
This is from bcc.
.IP
https://github.com/iovisor/bcc
.PP
Also look in the bcc distribution for a companion _examples.txt file containing
example usage, output, and commentary for this tool.
.SH OS
Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
Brendan Gregg
.SH SEE ALSO
vfsstat(8)
.TH vfsstat 8 "2015-08-18" "USER COMMANDS"
.SH NAME
vfsstat \- Statistics for some common VFS calls. Uses Linux eBPF/bcc.
.SH SYNOPSIS
.B vfsstat
[interval [count]]
.SH DESCRIPTION
This traces some common VFS calls and prints per-second summaries. This can
be useful for general workload characterization, and looking for patterns
in operation usage over time.
This works by tracing some kernel vfs functions using dynamic tracing, and will
need updating to match any changes to these functions. Edit the script to
customize which functions are traced. Also see vfscount, which is more
easily customized to trace multiple functions.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF and bcc.
.SH EXAMPLES
.TP
Print summaries each second:
#
.B vfsstat
.TP
Print output every five seconds, three times:
#
.B vfsstat 5 3
.SH FIELDS
.TP
READ/s
Number of vfs_read() calls as a per-second average.
.TP
WRITE/s
Number of vfs_write() calls as a per-second average.
.TP
CREATE/s
Number of vfs_create() calls as a per-second average.
.TP
OPEN/s
Number of vfs_open() calls as a per-second average.
.TP
FSYNC/s
Number of vfs_fsync() calls as a per-second average.
.SH OVERHEAD
This traces various kernel vfs functions and maintains in-kernel counts, which
are asynchronously copied to user-space. While the rate of VFS operations can
be very high (>1M/sec), this is a relatively efficient way to trace these
events, and so the overhead is expected to be small for normal workloads.
Measure in a test environment.
.SH SOURCE
This is from bcc.
.IP
https://github.com/iovisor/bcc
.PP
Also look in the bcc distribution for a companion _examples.txt file containing
example usage, output, and commentary for this tool.
.SH OS
Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
Brendan Gregg
.SH SEE ALSO
vfscount(8)
#!/usr/bin/python
#
# pidpersec Count new processes (via fork).
# For Linux, uses BCC, eBPF. See .c file.
#
# USAGE: pidpersec
#
# Written as a basic example of counting an event.
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 11-Aug-2015 Brendan Gregg Created this.
from bpf import BPF
from ctypes import c_ushort, c_int, c_ulonglong
from time import sleep, strftime
# load BPF program
b = BPF(src_file = "pidpersec.c")
BPF.attach_kprobe(b.load_func("do_count", BPF.KPROBE), "sched_fork")
stats = b.get_table("stats", c_int, c_ulonglong)
# stat indexes
S_COUNT = 1
# header
print "Tracing... Ctrl-C to end."
# output
last = 0
while (1):
try:
sleep(1)
except KeyboardInterrupt:
pass; exit()
print "%s: PIDs/sec: %d" % (strftime("%H:%M:%S"),
(stats[c_int(S_COUNT)].value - last))
last = stats[c_int(S_COUNT)].value
/*
* pidpersec.c Count new processes (via fork).
* For Linux, uses BCC, eBPF. See the Python front-end.
*
* USAGE: pidpersec.py
*
* Copyright (c) 2015 Brendan Gregg.
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* 11-Aug-2015 Brendan Gregg Created this.
*/
#include <uapi/linux/ptrace.h>
enum stat_types {
S_COUNT = 1,
S_MAXSTAT
};
BPF_TABLE("array", int, u64, stats, S_MAXSTAT + 1);
void stats_increment(int key) {
u64 *leaf = stats.lookup(&key);
if (leaf) (*leaf)++;
}
void do_count(struct pt_regs *ctx) { stats_increment(S_COUNT); }
Demonstrations of pidpersec, the Linux eBPF/bcc version.
This shows the number of new processes created per second, measured by tracing
the kernel fork() routine:
# ./pidpersec
Tracing... Ctrl-C to end.
18:33:06: PIDs/sec: 4
18:33:07: PIDs/sec: 5
18:33:08: PIDs/sec: 4
18:33:09: PIDs/sec: 4
18:33:10: PIDs/sec: 21
18:33:11: PIDs/sec: 5
18:33:12: PIDs/sec: 4
18:33:13: PIDs/sec: 4
Each second there are four new processes (this happens to be caused by a
launcher script that is retrying in a loop, and encountering errors).
At 18:33:10, I typed "man ls" in another server session, which caused an
increase in the number of new processes as the necessary commands were run.
#!/usr/bin/python
#
# syncsnoop Trace sync() syscall.
# For Linux, uses BCC, eBPF. Embedded C.
#
# Written as a basic example of BCC trace & reformat. See
# examples/hello_world.py for a BCC trace with default output example.
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 13-Aug-2015 Brendan Gregg Created this.
from bpf import BPF
import sys
# load BPF program
b = BPF(text = """
int do_sync(void *ctx) {
bpf_trace_printk("sync()\\n");
return 0;
};
""")
BPF.attach_kprobe(b.load_func("do_sync", BPF.KPROBE), "sys_sync")
# header
print "%-18s %s" % ("TIME(s)", "CALL")
# open trace pipe
try:
trace = open("/sys/kernel/debug/tracing/trace_pipe", "r")
except:
print >> sys.stderr, "ERROR: opening trace_pipe"
exit(1)
# format output
while 1:
try:
line = trace.readline().rstrip()
except KeyboardInterrupt:
pass; exit()
prolog, time_s, colon, word = line.rsplit(" ", 3)
time_s = time_s[:-1] # strip trailing ":"
print "%-18s %s" % (time_s, word)
Demonstrations of syncsnoop, the Linux eBPF/bcc version.
This program traces calls to the kernel sync() routine, with basic timestamps:
# ./syncsnoop
TIME(s) CALL
16458148.611952 sync()
16458151.533709 sync()
^C
While tracing, the "sync" command was executed in another server session.
This can be useful to identify that sync() is being called, and its frequency.
#!/usr/bin/python
#
# vfscount Count some VFS calls.
# For Linux, uses BCC, eBPF. See .c file.
#
# Written as a basic example of counting functions.
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 14-Aug-2015 Brendan Gregg Created this.
from bpf import BPF
from ctypes import c_ushort, c_int, c_ulonglong
from time import sleep, strftime
from sys import stderr
# kernel symbol translation
ksym_addrs = [] # addresses for binary search
ksym_names = [] # same index as ksym_addrs
def load_kallsyms():
symfile = "/proc/kallsyms"
try:
syms = open(symfile, "r")
except:
print >> stderr, "ERROR: reading " + symfile
exit()
line = syms.readline()
for line in iter(syms):
cols = line.split()
name = cols[2]
if name[:4] != "vfs_": # perf optimization
continue
addr = int(cols[0], 16)
ksym_addrs.append(addr)
ksym_names.append(name)
syms.close()
def ksym(addr):
start = -1
end = len(ksym_addrs)
while end != start + 1:
mid = (start + end) / 2
if addr < ksym_addrs[mid]:
end = mid
else:
start = mid
if start == -1:
return "[unknown]"
return ksym_names[start]
load_kallsyms()
# load BPF program
b = BPF(src_file = "vfscount.c")
fn = b.load_func("do_count", BPF.KPROBE)
BPF.attach_kprobe(fn, "vfs_read")
BPF.attach_kprobe(fn, "vfs_write")
BPF.attach_kprobe(fn, "vfs_fsync")
BPF.attach_kprobe(fn, "vfs_open")
BPF.attach_kprobe(fn, "vfs_create")
counts = b.get_table("counts")
# header
print "Tracing... Ctrl-C to end."
# output
try:
sleep(99999999)
except KeyboardInterrupt:
pass
print "\n%-16s %-12s %8s" % ("ADDR", "FUNC", "COUNT")
for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
print "%-16x %-12s %8d" % (k.ip, ksym(k.ip), v.value)
/*
* vfscount.c Count some VFS calls.
* For Linux, uses BCC, eBPF. See the Python front-end.
*
* USAGE: vfscount.py
*
* Copyright (c) 2015 Brendan Gregg.
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* 14-Aug-2015 Brendan Gregg Created this.
*/
#include <uapi/linux/ptrace.h>
struct key_t {
u64 ip;
};
BPF_TABLE("hash", struct key_t, u64, counts, 256);
int do_count(struct pt_regs *ctx) {
struct key_t key = {};
u64 zero = 0, *val;
key.ip = ctx->ip;
val = counts.lookup_or_init(&key, &zero);
(*val)++;
return 0;
}
Demonstrations of vfscount, the Linux eBPF/bcc version.
This counts VFS calls, by tracing various kernel calls beginning with "vfs_"
(edit the script to customize):
# ./vfscount
Tracing... Ctrl-C to end.
^C
ADDR FUNC COUNT
ffffffff811f2cc1 vfs_create 24
ffffffff811e71c1 vfs_write 203
ffffffff811e6061 vfs_open 765
ffffffff811e7091 vfs_read 1852
This can be useful for workload characterization, to see what types of
operations are in use.
#!/usr/bin/python
#
# vfsstat Count some VFS calls.
# For Linux, uses BCC, eBPF. See .c file.
#
# Written as a basic example of counting multiple events as a stat tool.
#
# USAGE: vfsstat [interval [count]]
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 14-Aug-2015 Brendan Gregg Created this.
from __future__ import print_function
from bpf import BPF
from ctypes import c_ushort, c_int, c_ulonglong
from time import sleep, strftime
from sys import argv
def usage():
print("USAGE: %s [interval [count]]" % argv[0])
exit()
# arguments
interval = 1
count = -1
if len(argv) > 1:
try:
interval = int(argv[1])
if interval == 0:
raise
if len(argv) > 2:
count = int(argv[2])
except: # also catches -h, --help
usage()
# load BPF program
b = BPF(src_file = "vfsstat.c")
BPF.attach_kprobe(b.load_func("do_read", BPF.KPROBE), "vfs_read")
BPF.attach_kprobe(b.load_func("do_write", BPF.KPROBE), "vfs_write")
BPF.attach_kprobe(b.load_func("do_fsync", BPF.KPROBE), "vfs_fsync")
BPF.attach_kprobe(b.load_func("do_open", BPF.KPROBE), "vfs_open")
BPF.attach_kprobe(b.load_func("do_create", BPF.KPROBE), "vfs_create")
stats = b.get_table("stats", c_int, c_ulonglong)
# stat column labels and indexes
stat_types = {
"READ" : 1,
"WRITE" : 2,
"FSYNC" : 3,
"OPEN" : 4,
"CREATE" : 5
}
# header
print("%-8s " % "TIME", end="")
last = {}
for stype in stat_types.keys():
print(" %8s" % (stype + "/s"), end="")
idx = stat_types[stype]
last[idx] = 0
print("")
# output
i = 0
while (1):
if count > 0:
i += 1
if i > count:
exit()
try:
sleep(interval)
except KeyboardInterrupt:
pass; exit()
print("%-8s: " % strftime("%H:%M:%S"), end="")
# print each statistic as a column
for stype in stat_types.keys():
idx = stat_types[stype]
try:
delta = stats[c_int(idx)].value - last[idx]
print(" %8d" % (delta / interval), end="")
last[idx] = stats[c_int(idx)].value
except:
print(" %8d" % 0, end="")
print("")
/*
* vfsstat.c Count some VFS calls.
* For Linux, uses BCC, eBPF. See the Python front-end.
*
* USAGE: vfsstat.py [interval [count]]
*
* Copyright (c) 2015 Brendan Gregg.
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* 14-Aug-2015 Brendan Gregg Created this.
*/
#include <uapi/linux/ptrace.h>
enum stat_types {
S_READ = 1,
S_WRITE,
S_FSYNC,
S_OPEN,
S_CREATE,
S_MAXSTAT
};
BPF_TABLE("array", int, u64, stats, S_MAXSTAT + 1);
void stats_increment(int key) {
u64 *leaf = stats.lookup(&key);
if (leaf) (*leaf)++;
}
void do_read(struct pt_regs *ctx) { stats_increment(S_READ); }
void do_write(struct pt_regs *ctx) { stats_increment(S_WRITE); }
void do_fsync(struct pt_regs *ctx) { stats_increment(S_FSYNC); }
void do_open(struct pt_regs *ctx) { stats_increment(S_OPEN); }
void do_create(struct pt_regs *ctx) { stats_increment(S_CREATE); }
Demonstrations of vfsstat, the Linux eBPF/bcc version.
This traces some common VFS calls and prints per-second summaries. By default,
the output interval is one second:
# ./vfsstat
TIME READ/s WRITE/s CREATE/s OPEN/s FSYNC/s
18:35:32: 231 12 4 98 0
18:35:33: 274 13 4 106 0
18:35:34: 586 86 4 251 0
18:35:35: 241 15 4 99 0
18:35:36: 232 10 4 98 0
18:35:37: 244 10 4 107 0
18:35:38: 235 13 4 97 0
18:35:39: 6749 2633 4 1446 0
18:35:40: 277 31 4 115 0
18:35:41: 238 16 6 102 0
18:35:42: 284 50 8 114 0
^C
Here we are using an output interval of five seconds, and printing three output
lines:
# ./vfsstat 5 3
TIME READ/s WRITE/s CREATE/s OPEN/s FSYNC/s
18:35:55: 238 8 3 101 0
18:36:00: 962 233 4 247 0
18:36:05: 241 8 3 100 0
Full usage:
# ./vfsstat -h
USAGE: ./vfsstat [interval [count]]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment