Commit 24956989 authored by Sasha Goldshtein's avatar Sasha Goldshtein

dbstat: Collect histograms of MySQL/PostgreSQL query latencies

This tool traces MySQL/PostgreSQL queries, and aggregates their
latencies into a histogram. The histogram is then printed when the
tool is stopped, or at user-specified intervals.
parent dd37d638
......@@ -71,4 +71,4 @@ Unstable - in development.
.SH AUTHOR
Sasha Goldshtein, Brendan Gregg
.SH SEE ALSO
biosnoop(8), mysqld_qslower(8)
biosnoop(8), mysqld_qslower(8), dbstat(8)
.TH dbstat 8 "2017-02-15" "USER COMMANDS"
.SH NAME
dbstat \- Collect histograms of MySQL/PostgreSQL query latencies.
.SH SYNOPSIS
. B dbstat [-v] [-p PID [PID ...]] [-m THRESHOLD] [-u] [-i INTERVAL] {mysql,postgres}
.SH DESCRIPTION
This traces queries served by a MySQL or PostgreSQL server, and collects a
histogram of query latencies. The histogram is printed at the end of collection,
or at specified intervals.
This uses User Statically-Defined Tracing (USDT) probes, a feature added to
MySQL and PostgreSQL for DTrace support, but which may not be enabled on a
given installation. See requirements.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF, bcc, and MySQL server with USDT probe support (when configuring
the build: \-DENABLE_DTRACE=1) or PostgreSQL server with USDT probe support
(when configuring the build: \-\-enable-dtrace).
.SH OPTIONS
\-h
Print usage message.
.TP
\-p PID
Trace this PID. If no PID is specified, the tool will attempt to automatically
detect the MySQL or PostgreSQL processes running on the system.
.TP
\-m THRESHOLD
Minimum query latency (duration) to trace, in milliseconds.
Default is all queries.
.TP
\-u
Display query latencies in microseconds (default: milliseconds).
.TP
\-i INTERVAL
Print summaries (histograms) at this interval, specified in seconds.
.TP
{mysql,postgres}
The database engine to trace.
.SH EXAMPLES
.TP
Display histogram of MySQL query latencies:
#
.B dbstat mysql
.TP
Display histogram of PostgreSQL query latencies slower than 10ms in pid 408:
#
.B dbstat postgres -p 408 -m 10
.TP
Display histogram of PostgreSQL query latencies at 3-second intervals:
#
.B dbstat postgres -i 3
.SH OVERHEAD
This adds low-overhead instrumentation to queries, and only emits output
data from kernel to user-level if they query exceeds the threshold. If the
server query rate is less than 1,000/sec, the overhead is expected to be
negligible. If the query rate is higher, test to gauge overhead.
.SH SOURCE
This is from bcc.
.IP
https://github.com/iovisor/bcc
.PP
Also look in the bcc distribution for a companion _examples.txt file containing
example usage, output, and commentary for this tool.
.SH OS
Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
Sasha Goldshtein
.SH SEE ALSO
dbslower(8)
#!/usr/bin/python
#
# dbstat Display a histogram of MySQL and PostgreSQL query latencies.
#
# USAGE: dbstat [-v] [-p PID [PID ...]] [-m THRESHOLD] [-u]
# [-i INTERVAL] {mysql,postgres}
#
# This tool uses USDT probes, which means it needs MySQL and PostgreSQL built
# with USDT (DTrace) support.
#
# Copyright 2017, Sasha Goldshtein
# Licensed under the Apache License, Version 2.0
#
# 15-Feb-2017 Sasha Goldshtein Created this.
from bcc import BPF, USDT
import argparse
import subprocess
from time import sleep, strftime
examples = """
dbstat postgres # display a histogram of PostgreSQL query latencies
dbstat mysql -v # display MySQL latencies and print the BPF program
dbstat mysql -u # display query latencies in microseconds (default: ms)
dbstat mysql -m 5 # trace only queries slower than 5ms
dbstat mysql -p 408 # trace queries in a specific process
"""
parser = argparse.ArgumentParser(
description="",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-v", "--verbose", action="store_true",
help="print the BPF program")
parser.add_argument("db", choices=["mysql", "postgres"],
help="the database engine to use")
parser.add_argument("-p", "--pid", type=int, nargs='*',
dest="pids", metavar="PID", help="the pid(s) to trace")
parser.add_argument("-m", "--threshold", type=int, default=0,
help="trace queries slower than this threshold (ms)")
parser.add_argument("-u", "--microseconds", action="store_true",
help="display query latencies in microseconds (default: milliseconds)")
parser.add_argument("-i", "--interval", type=int, default=99999999999,
help="print summary at this interval (seconds)")
args = parser.parse_args()
if not args.pids or len(args.pids) == 0:
if args.db == "mysql":
args.pids = map(int, subprocess.check_output(
"pidof mysqld".split()).split())
elif args.db == "postgres":
args.pids = map(int, subprocess.check_output(
"pidof postgres".split()).split())
program = """
#include <linux/ptrace.h>
BPF_HASH(temp, u64, u64);
BPF_HISTOGRAM(latency);
int probe_start(struct pt_regs *ctx) {
u64 timestamp = bpf_ktime_get_ns();
u64 pid = bpf_get_current_pid_tgid();
temp.update(&pid, &timestamp);
return 0;
}
int probe_end(struct pt_regs *ctx) {
u64 *timestampp;
u64 pid = bpf_get_current_pid_tgid();
timestampp = temp.lookup(&pid);
if (!timestampp)
return 0;
u64 delta = bpf_ktime_get_ns() - *timestampp;
FILTER
delta /= SCALE;
latency.increment(bpf_log2l(delta));
temp.delete(&pid);
return 0;
}
"""
program = program.replace("SCALE", str(1000 if args.microseconds else 1000000))
program = program.replace("FILTER", "" if args.threshold == 0 else \
"if (delta / 1000000 < %d) { return 0; }" % args.threshold)
usdts = map(lambda pid: USDT(pid=pid), args.pids)
for usdt in usdts:
usdt.enable_probe("query__start", "probe_start")
usdt.enable_probe("query__done", "probe_end")
bpf = BPF(text=program, usdt_contexts=usdts)
if args.verbose:
print('\n'.join(map(lambda u: u.get_text(), usdts)))
print(program)
print("Tracing database queries for pids %s slower than %d ms..." %
(', '.join(map(str, args.pids)), args.threshold))
latencies = bpf["latency"]
def print_hist():
print("[%s]" % strftime("%H:%M:%S"))
latencies.print_log2_hist("query latency (%s)" %
("us" if args.microseconds else "ms"))
print("")
latencies.clear()
while True:
try:
sleep(args.interval)
print_hist()
except KeyboardInterrupt:
print_hist()
break
Demonstrations of dbstat, the Linux eBPF/bcc version.
dbstat traces queries performed by a MySQL or PostgreSQL database process, and
displays a histogram of query latencies. For example:
# dbstat mysql
Tracing database queries for pids 25776 slower than 0 ms...
query latency (ms) : count distribution
0 -> 1 : 990 |****************************************|
2 -> 3 : 7 | |
4 -> 7 : 0 | |
8 -> 15 : 0 | |
16 -> 31 : 0 | |
32 -> 63 : 0 | |
64 -> 127 : 0 | |
128 -> 255 : 0 | |
256 -> 511 : 0 | |
512 -> 1023 : 0 | |
1024 -> 2047 : 2 | |
^C
It's immediately evident that the vast majority of queries finish very quickly,
in under 1ms, but there are some super-slow queries occasionally, in the 1-2
seconds bucket.
We can filter out the shorter queries with the -m switch:
# dbstat mysql -m 1000
Tracing database queries for pids 25776 slower than 1000 ms...
query latency (ms) : count distribution
0 -> 1 : 0 | |
2 -> 3 : 0 | |
4 -> 7 : 0 | |
8 -> 15 : 0 | |
16 -> 31 : 0 | |
32 -> 63 : 0 | |
64 -> 127 : 0 | |
128 -> 255 : 0 | |
256 -> 511 : 0 | |
512 -> 1023 : 0 | |
1024 -> 2047 : 8 |****************************************|
^C
By default, dbstat will try to detect mysqld and postgres processes, but if
necessary, you can specify the process ids with the -p switch. Here, the -i
switch is also used to request histograms at 3 second intervals:
# dbstat mysql -p $(pidof mysql) -i 3
Tracing database queries for pids 25776 slower than 0 ms...
[06:14:36]
query latency (ms) : count distribution
0 -> 1 : 758 |****************************************|
2 -> 3 : 1 | |
4 -> 7 : 0 | |
8 -> 15 : 0 | |
16 -> 31 : 0 | |
32 -> 63 : 0 | |
64 -> 127 : 0 | |
128 -> 255 : 0 | |
256 -> 511 : 0 | |
512 -> 1023 : 0 | |
1024 -> 2047 : 1 | |
[06:14:39]
query latency (ms) : count distribution
0 -> 1 : 436 |****************************************|
2 -> 3 : 2 | |
4 -> 7 : 0 | |
8 -> 15 : 0 | |
16 -> 31 : 0 | |
32 -> 63 : 0 | |
64 -> 127 : 0 | |
128 -> 255 : 0 | |
256 -> 511 : 0 | |
512 -> 1023 : 0 | |
1024 -> 2047 : 1 | |
[06:14:42]
query latency (ms) : count distribution
0 -> 1 : 399 |****************************************|
2 -> 3 : 0 | |
4 -> 7 : 0 | |
8 -> 15 : 0 | |
16 -> 31 : 0 | |
32 -> 63 : 0 | |
64 -> 127 : 0 | |
128 -> 255 : 0 | |
256 -> 511 : 0 | |
512 -> 1023 : 0 | |
1024 -> 2047 : 1 | |
^C
USAGE:
# dbstat -h
usage: dbstat.py [-h] [-v] [-p [PID [PID ...]]] [-m THRESHOLD] [-u]
[-i INTERVAL]
{mysql,postgres}
positional arguments:
{mysql,postgres} the database engine to use
optional arguments:
-h, --help show this help message and exit
-v, --verbose print the BPF program
-p [PID [PID ...]], --pid [PID [PID ...]]
the pid(s) to trace
-m THRESHOLD, --threshold THRESHOLD
trace queries slower than this threshold (ms)
-u, --microseconds display query latencies in microseconds (default:
milliseconds)
-i INTERVAL, --interval INTERVAL
print summary at this interval (seconds)
dbstat postgres # display a histogram of PostgreSQL query latencies
dbstat mysql -v # display MySQL latencies and print the BPF program
dbstat mysql -u # display query latencies in microseconds (default: ms)
dbstat mysql -m 5 # trace only queries slower than 5ms
dbstat mysql -p 408 # trace queries in a specific process
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment