Commit dd37d638 authored by Sasha Goldshtein's avatar Sasha Goldshtein

dbslower: MySQL/PostgreSQL query tracing tool

This tool traces MySQL/PostgreSQL queries, including an optional
minimum duration threshold. This is based on `mysqld_qslower` but
adapted to automatically detect MySQL/PostgreSQL processes, and a
couple of other code fixes.

I believe at this time, `mysqld_qslower` can be retired, as this
tool completely supersedes it.
parent 5f354e5a
.TH dbslower 8 "2017-02-15" "USER COMMANDS"
.SH NAME
dbslower \- Trace MySQL/PostgreSQL server queries slower than a threshold.
.SH SYNOPSIS
.B dbslower [-v] [-p PID [PID ...]] [-m THRESHOLD] {mysql,postgres}
.SH DESCRIPTION
This traces queries served by a MySQL or PostgreSQL server, and prints
those that exceed a latency (query time) threshold. By default a threshold of
1 ms is used.
This uses User Statically-Defined Tracing (USDT) probes, a feature added to
MySQL and PostgreSQL for DTrace support, but which may not be enabled on a
given installation. See requirements.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF, bcc, and MySQL server with USDT probe support (when configuring
the build: \-DENABLE_DTRACE=1) or PostgreSQL server with USDT probe support
(when configuring the build: \-\-enable-dtrace).
.SH OPTIONS
\-h
Print usage message.
.TP
\-p PID
Trace this PID. If no PID is specified, the tool will attempt to automatically
detect the MySQL or PostgreSQL processes running on the system.
.TP
\-m THRESHOLD
Minimum query latency (duration) to trace, in milliseconds. Default is 1 ms.
.TP
{mysql,postgres}
The database engine to trace.
.SH EXAMPLES
.TP
Trace MySQL server queries slower than 1 ms:
#
.B dbslower mysql
.TP
Trace slower than 10 ms for PostgreSQL in process 408:
#
.B dbslower postgres -p 408 -m 10
.SH FIELDS
.TP
TIME(s)
Time of query start, in seconds.
.TP
PID
Process ID of the traced server.
.TP
MS
Milliseconds for the query, from start to end.
.TP
QUERY
Query string, truncated to 256 characters.
.SH OVERHEAD
This adds low-overhead instrumentation to queries, and only emits output
data from kernel to user-level if they query exceeds the threshold. If the
server query rate is less than 1,000/sec, the overhead is expected to be
negligible. If the query rate is higher, test to gauge overhead.
.SH SOURCE
This is from bcc.
.IP
https://github.com/iovisor/bcc
.PP
Also look in the bcc distribution for a companion _examples.txt file containing
example usage, output, and commentary for this tool.
.SH OS
Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
Sasha Goldshtein, Brendan Gregg
.SH SEE ALSO
biosnoop(8), mysqld_qslower(8)
#!/usr/bin/python
#
# dbslower Trace MySQL and PostgreSQL queries slower than a threshold.
#
# USAGE: dbslower [-v] [-p PID [PID ...]] [-m THRESHOLD] {mysql,postgres}
#
# By default, a threshold of 1ms is used. Set the threshold to 0 to trace all
# queries (verbose). If no PID is provided, the script attempts to discover
# all MySQL or PostgreSQL database processes.
#
# This tool uses USDT probes, which means it needs MySQL and PostgreSQL built
# with USDT (DTrace) support.
#
# Strongly inspired by Brendan Gregg's work on the mysqld_qslower script.
#
# Copyright 2017, Sasha Goldshtein
# Licensed under the Apache License, Version 2.0
#
# 15-Feb-2017 Sasha Goldshtein Created this.
from bcc import BPF, USDT
import argparse
import ctypes as ct
import subprocess
examples = """examples:
dbslower postgres # trace PostgreSQL queries slower than 1ms
dbslower postgres -p 188 322 # trace specific PostgreSQL processes
dbslower mysql -p 480 -m 30 # trace MySQL queries slower than 30ms
dbslower mysql -p 480 -v # trace MySQL queries and print the BPF program
"""
parser = argparse.ArgumentParser(
description="",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-v", "--verbose", action="store_true",
help="print the BPF program")
parser.add_argument("db", choices=["mysql", "postgres"],
help="the database engine to use")
parser.add_argument("-p", "--pid", type=int, nargs='*',
dest="pids", metavar="PID", help="the pid(s) to trace")
parser.add_argument("-m", "--threshold", type=int, default=1,
help="trace queries slower than this threshold (ms)")
args = parser.parse_args()
if not args.pids or len(args.pids) == 0:
if args.db == "mysql":
args.pids = map(int, subprocess.check_output(
"pidof mysqld".split()).split())
elif args.db == "postgres":
args.pids = map(int, subprocess.check_output(
"pidof postgres".split()).split())
threshold_ns = args.threshold * 1000000
program = """
#include <linux/ptrace.h>
struct temp_t {
u64 timestamp;
char *query;
};
struct data_t {
u64 pid;
u64 timestamp;
u64 duration;
char query[256];
};
BPF_HASH(temp, u64, struct temp_t);
BPF_PERF_OUTPUT(events);
int probe_start(struct pt_regs *ctx) {
struct temp_t tmp = {};
tmp.timestamp = bpf_ktime_get_ns();
bpf_usdt_readarg(1, ctx, &tmp.query);
u64 pid = bpf_get_current_pid_tgid();
temp.update(&pid, &tmp);
return 0;
}
int probe_end(struct pt_regs *ctx) {
struct temp_t *tempp;
u64 pid = bpf_get_current_pid_tgid();
tempp = temp.lookup(&pid);
if (!tempp)
return 0;
u64 delta = bpf_ktime_get_ns() - tempp->timestamp;
if (delta >= """ + str(threshold_ns) + """) {
struct data_t data = {};
data.pid = pid >> 32; // only process id
data.timestamp = tempp->timestamp;
data.duration = delta;
bpf_probe_read(&data.query, sizeof(data.query), tempp->query);
events.perf_submit(ctx, &data, sizeof(data));
}
temp.delete(&pid);
return 0;
}
"""
usdts = map(lambda pid: USDT(pid=pid), args.pids)
for usdt in usdts:
usdt.enable_probe("query__start", "probe_start")
usdt.enable_probe("query__done", "probe_end")
bpf = BPF(text=program, usdt_contexts=usdts)
if args.verbose:
print('\n'.join(map(lambda u: u.get_text(), usdts)))
print(program)
class Data(ct.Structure):
_fields_ = [
("pid", ct.c_ulonglong),
("timestamp", ct.c_ulonglong),
("delta", ct.c_ulonglong),
("query", ct.c_char * 256)
]
start = BPF.monotonic_time()
def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents
print("%-14.6f %-6d %8.3f %s" % (
float(event.timestamp - start) / 1000000000,
event.pid, float(event.delta) / 1000000, event.query))
print("Tracing database queries for pids %s slower than %d ms..." %
(', '.join(map(str, args.pids)), args.threshold))
print("%-14s %-6s %8s %s" % ("TIME(s)", "PID", "MS", "QUERY"))
bpf["events"].open_perf_buffer(print_event)
while True:
bpf.kprobe_poll()
Demonstrations of dbslower, the Linux eBPF/bcc version.
dbslower traces queries served by a MySQL or PostgreSQL server, and prints
those that exceed a latency (query time) threshold. By default a threshold of
1 ms is used. For example:
# dbslower mysql
Tracing database queries for pids 25776 slower than 1 ms...
TIME(s) PID MS QUERY
1.315800 25776 2000.999 call getproduct(97)
3.360380 25776 3.226 call getproduct(6)
^C
This traced two queries slower than 1ms, one of which is very slow: over 2
seconds. We can filter out the shorter ones and keep only the really slow ones:
# dbslower mysql -m 1000
Tracing database queries for pids 25776 slower than 1000 ms...
TIME(s) PID MS QUERY
1.421264 25776 2002.183 call getproduct(97)
3.572617 25776 2001.381 call getproduct(97)
5.661411 25776 2001.867 call getproduct(97)
7.748296 25776 2001.329 call getproduct(97)
^C
This looks like a pattern -- we keep making this slow query every 2 seconds
or so, and it takes approximately 2 seconds to run.
By default, dbslower will try to detect mysqld and postgres processes, but if
necessary, you can specify the process ids with the -p switch:
# dbslower mysql -p $(pidof mysql)
Tracing database queries for pids 25776 slower than 1 ms...
TIME(s) PID MS QUERY
2.002125 25776 3.340 call getproduct(7)
2.045006 25776 2001.558 call getproduct(97)
4.131863 25776 2002.275 call getproduct(97)
6.190513 25776 3.248 call getproduct(33)
^C
Specifying 0 as the threshold will print all the queries:
# dbslower mysql -m 0
Tracing database queries for pids 25776 slower than 0 ms...
TIME(s) PID MS QUERY
6.003720 25776 2.363 /* mysql-connector-java-5.1.40 ( Revision: 402933ef52cad9aa82624e80acbea46e3a701ce6 ) */SELECT @@session.auto_increment_increment AS auto_increment_increment, @@character_set_client AS character_set_client, @@character_set_connection AS character_set_conn
6.599219 25776 0.068 SET NAMES latin1
6.613944 25776 0.057 SET character_set_results = NULL
6.645228 25776 0.059 SET autocommit=1
6.653798 25776 0.059 SET sql_mode='NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION,STRICT_TRANS_TABLES'
6.682184 25776 2.526 select * from users where id = 0
6.767888 25776 0.288 select id from products where userid = 0
6.790642 25776 2.255 call getproduct(0)
6.809865 25776 0.218 call getproduct(1)
6.846878 25776 0.248 select * from users where id = 1
6.847623 25776 0.166 select id from products where userid = 1
6.867363 25776 0.244 call getproduct(2)
6.868162 25776 0.107 call getproduct(3)
6.874726 25776 0.208 select * from users where id = 2
6.881722 25776 0.260 select id from products where userid = 2
^C
Here we can see the MySQL connector initialization and connection establishment,
before the actual queries start coming in.
USAGE:
# dbslower -h
usage: dbslower.py [-h] [-v] [-p [PIDS [PIDS ...]]] [-m THRESHOLD]
{mysql,postgres}
positional arguments:
{mysql,postgres} the database engine to use
optional arguments:
-h, --help show this help message and exit
-v, --verbose print the BPF program
-p [PID [PID ...]], --pid [PID [PID ...]]
the pid(s) to trace
-m THRESHOLD, --threshold THRESHOLD
trace queries slower than this threshold (ms)
examples:
dbslower postgres # trace PostgreSQL queries slower than 1ms
dbslower postgres -p 188 322 # trace specific PostgreSQL processes
dbslower mysql -p 480 -m 30 # trace MySQL queries slower than 30ms
dbslower mysql -p 480 -v # trace MySQL queries and print the BPF program
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment