Commit 9fa15627 authored by Brendan Gregg's avatar Brendan Gregg

biolatency

parent 74016c3f
......@@ -64,6 +64,7 @@ Examples:
Tools:
- tools/[biolatency](tools/biolatency): Summarize block device I/O latency as a histogram. [Examples](tools/biolatency_example.txt).
- tools/[biosnoop](tools/biosnoop): Trace block device I/O with PID and latency. [Examples](tools/biosnoop_example.txt).
- tools/[funccount](tools/funccount): Count kernel function calls. [Examples](tools/funccount_example.txt).
- tools/[funclatency](tools/funclatency): Time kernel functions and show their latency distribution. [Examples](tools/funclatency_example.txt).
......
.TH biolatency 8 "2015-08-20" "USER COMMANDS"
.SH NAME
biolatency \- Summarize block device I/O latency as a histogram.
.SH SYNOPSIS
.B biolatency [\-h] [\-T] [\-Q] [\-m] [interval [count]]
.SH DESCRIPTION
biolatency traces block device I/O (disk I/O), and records the distribution
of I/O latency (time). This is printed as a histogram either on Ctrl-C, or
after a given interval in seconds.
The latency of the disk I/O is measured from the issue to the device to its
completion. A \-Q option can be used to include time queued in the kernel.
This tool uses in-kernel eBPF maps for storing timestamps and the histogram,
for efficiency.
This works by tracing various kernel blk_*() functions using dynamic tracing,
and will need updating to match any changes to these functions.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF and bcc.
.SH OPTIONS
\-h
Print usage message.
.TP
\-T
Include timestamps on output.
.TP
\-m
Output histogram in milliseconds.
.TP
interval
Output interval, in seconds.
.TP
count
Number of outputs.
.SH EXAMPLES
.TP
Summarize block device I/O latency as a histogram:
#
.B biolatency
.TP
Print 1 second summaries, 10 times:
#
.B biolatency 1 10
.TP
Print 1 second summaries, using milliseconds as units for the histogram, and
include timestamps on output:
#
.B biolatency \-mT 1
.TP
Include OS queued time in I/O time:
#
.B biolatency \-Q
.SH FIELDS
.TP
usecs
Microsecond range
.TP
mecs
Millisecond range
.TP
count
How many I/O fell into this range
.TP
distribution
An ASCII bar chart to visualize the distribution (count column)
.SH OVERHEAD
This traces kernel functions and maintains in-kernel timestamps and a histgroam,
which are asynchronously copied to user-space. This method is very efficient,
and the overhead for most storage I/O rates (< 10k IOPS) should be negligible.
If you have a higher IOPS storage environment, test and quantify the overhead
before use.
.SH SOURCE
This is from bcc.
.IP
https://github.com/iovisor/bcc
.PP
Also look in the bcc distribution for a companion _examples.txt file containing
example usage, output, and commentary for this tool.
.SH OS
Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
Brendan Gregg
.SH SEE ALSO
biosnoop(8)
#!/usr/bin/python
#
# biolatency Summarize block device I/O latency as a histogram.
# For Linux, uses BCC, eBPF.
#
# USAGE: biolatency [-h] [-T] [-Q] [-m] [interval] [count]
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 20-Sep-2015 Brendan Gregg Created this.
from __future__ import print_function
from bcc import BPF
from time import sleep, strftime
import argparse
# arguments
examples = """examples:
./biolatency # summarize block I/O latency as a histogram
./biolatency 1 10 # print 1 second summaries, 10 times
./biolatency -mT 1 # 1s summaries, milliseconds, and timestamps
./biolatency -Q # include OS queued time in I/O time
"""
parser = argparse.ArgumentParser(
description="Summarize block device I/O latency as a histogram",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-T", "--timestamp", action="store_true",
help="include timestamp on output")
parser.add_argument("-Q", "--queued", action="store_true",
help="include OS queued time in I/O time")
parser.add_argument("-m", "--milliseconds", action="store_true",
help="millisecond histogram")
parser.add_argument("interval", nargs="?", default=99999999,
help="output interval, in seconds")
parser.add_argument("count", nargs="?", default=99999999,
help="number of outputs")
args = parser.parse_args()
countdown = int(args.count)
debug = 0
# load BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/blkdev.h>
BPF_TABLE(\"array\", int, u64, dist, 64);
BPF_HASH(start, struct request *);
// time block I/O
int trace_req_start(struct pt_regs *ctx, struct request *req)
{
u64 ts = bpf_ktime_get_ns();
start.update(&req, &ts);
return 0;
}
// output
int trace_req_completion(struct pt_regs *ctx, struct request *req)
{
u64 *tsp, delta;
// fetch timestamp and calculate delta
tsp = start.lookup(&req);
if (tsp == 0) {
return 0; // missed issue
}
delta = bpf_ktime_get_ns() - *tsp;
FACTOR
// store as histogram
int index = bpf_log2l(delta);
u64 *leaf = dist.lookup(&index);
if (leaf) (*leaf)++;
start.delete(&req);
return 0;
}
"""
if args.milliseconds:
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
label = "msecs"
else:
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
label = "usecs"
if debug:
print(bpf_text)
# load BPF program
b = BPF(text=bpf_text)
if args.queued:
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start")
else:
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_account_io_completion",
fn_name="trace_req_completion")
print("Tracing block device I/O... Hit Ctrl-C to end.")
# output
exiting = 0 if args.interval else 1
dist = b.get_table("dist")
while (1):
try:
sleep(int(args.interval))
except KeyboardInterrupt:
exiting=1
print()
if args.timestamp:
print("%-8s\n" % strftime("%H:%M:%S"), end="")
dist.print_log2_hist(label)
dist.clear()
countdown -= 1
if exiting or countdown == 0:
exit()
Demonstrations of biolatency, the Linux eBPF/bcc version.
biolatency traces block device I/O (disk I/O), and records the distribution
of I/O latency (time), printing this as a histogram when Ctrl-C is hit.
For example:
# ./biolatency
Tracing block device I/O... Hit Ctrl-C to end.
^C
usecs : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 0 | |
8 -> 15 : 0 | |
16 -> 31 : 0 | |
32 -> 63 : 0 | |
64 -> 127 : 1 | |
128 -> 255 : 12 |******** |
256 -> 511 : 15 |********** |
512 -> 1023 : 43 |******************************* |
1024 -> 2047 : 52 |**************************************|
2048 -> 4095 : 47 |********************************** |
4096 -> 8191 : 52 |**************************************|
8192 -> 16383 : 36 |************************** |
16384 -> 32767 : 15 |********** |
32768 -> 65535 : 2 |* |
65536 -> 131071 : 2 |* |
The latency of the disk I/O is measured from the issue to the device to its
completion. A -Q option can be used to include time queued in the kernel.
This example output shows a large mode of latency from about 128 microseconds
to about 32767 microseconds (33 milliseconds). The bulk of the I/O was
between 1 and 8 ms, which is the expected block device latency for
rotational storage devices.
The highest latency seen while tracing was between 65 and 131 milliseconds:
the last row printed, for which there were 2 I/O.
For efficiency, biolatency uses an in-kernel eBPF map to store timestamps
with requests, and another in-kernel map to store the histogram (the "count")
column, which is copied to user-space only when output is printed. These
methods lower the perormance overhead when tracing is performed.
In the following example, the -m option is used to print a histogram using
milliseconds as the units (which eliminates the first several rows), -T to
print timestamps with the output, and to print 1 second summaries 5 times:
# ./biolatency -mT 1 5
Tracing block device I/O... Hit Ctrl-C to end.
06:20:16
msecs : count distribution
0 -> 1 : 36 |**************************************|
2 -> 3 : 1 |* |
4 -> 7 : 3 |*** |
8 -> 15 : 17 |***************** |
16 -> 31 : 33 |********************************** |
32 -> 63 : 7 |******* |
64 -> 127 : 6 |****** |
06:20:17
msecs : count distribution
0 -> 1 : 96 |************************************ |
2 -> 3 : 25 |********* |
4 -> 7 : 29 |*********** |
8 -> 15 : 62 |*********************** |
16 -> 31 : 100 |**************************************|
32 -> 63 : 62 |*********************** |
64 -> 127 : 18 |****** |
06:20:18
msecs : count distribution
0 -> 1 : 68 |************************* |
2 -> 3 : 76 |**************************** |
4 -> 7 : 20 |******* |
8 -> 15 : 48 |***************** |
16 -> 31 : 103 |**************************************|
32 -> 63 : 49 |****************** |
64 -> 127 : 17 |****** |
06:20:19
msecs : count distribution
0 -> 1 : 522 |*************************************+|
2 -> 3 : 225 |**************** |
4 -> 7 : 38 |** |
8 -> 15 : 8 | |
16 -> 31 : 1 | |
06:20:20
msecs : count distribution
0 -> 1 : 436 |**************************************|
2 -> 3 : 106 |********* |
4 -> 7 : 34 |** |
8 -> 15 : 19 |* |
16 -> 31 : 1 | |
How the I/O latency distribution changes over time can be seen.
The -Q option begins measuring I/O latency from when the request was first
queued in the kernel, and includes queuing latency:
# ./biolatency -Q
Tracing block device I/O... Hit Ctrl-C to end.
^C
usecs : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 0 | |
8 -> 15 : 0 | |
16 -> 31 : 0 | |
32 -> 63 : 0 | |
64 -> 127 : 0 | |
128 -> 255 : 3 |* |
256 -> 511 : 37 |************** |
512 -> 1023 : 30 |*********** |
1024 -> 2047 : 18 |******* |
2048 -> 4095 : 22 |******** |
4096 -> 8191 : 14 |***** |
8192 -> 16383 : 48 |******************* |
16384 -> 32767 : 96 |**************************************|
32768 -> 65535 : 31 |************ |
65536 -> 131071 : 26 |********** |
131072 -> 262143 : 12 |**** |
This better reflects the latency suffered by the application (if it is
synchronous I/O), whereas the default mode without kernel queueing better
reflects the performance of the device.
Note that the storage device (and storage device controller) usually have
queues of their own, which are always included in the latency, with or
without -Q.
USAGE message:
# ./biolatency -h
usage: biolatency [-h] [-T] [-Q] [-m] [interval] [count]
Summarize block device I/O latency as a histogram
positional arguments:
interval output interval, in seconds
count number of outputs
optional arguments:
-h, --help show this help message and exit
-T, --timestamp include timestamp on output
-Q, --queued include OS queued time in I/O time
-m, --milliseconds millisecond histogram
examples:
./biolatency # summarize block I/O latency as a histogram
./biolatency 1 10 # print 1 second summaries, 10 times
./biolatency -mT 1 # 1s summaries, milliseconds, and timestamps
./biolatency -Q # include OS queued time in I/O time
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment