Commit a1ac2f90 authored by Jean-Tiare Le Bigot's avatar Jean-Tiare Le Bigot

add solisten tool. Currently TCP only.

parent 5ee0c7aa
......@@ -100,6 +100,7 @@ Examples:
- tools/[pidpersec](tools/pidpersec.py): Count new processes (via fork). [Examples](tools/pidpersec_example.txt).
- tools/[runqlat](tools/runqlat.py): Run queue (scheduler) latency as a histogram. [Examples](tools/runqlat_example.txt).
- tools/[softirqs](tools/softirqs.py): Measure soft IRQ (soft interrupt) event time. [Examples](tools/softirqs_example.txt).
- tools/[solisten](tools/solisten.py): Trace TCP socket listen. [Examples](tools/solisten_example.txt).
- tools/[stackcount](tools/stackcount.py): Count kernel function calls and their stack traces. [Examples](tools/stackcount_example.txt).
- tools/[stacksnoop](tools/stacksnoop.py): Trace a kernel function and print all kernel stack traces. [Examples](tools/stacksnoop_example.txt).
- tools/[statsnoop](tools/statsnoop.py): Trace stat() syscalls. [Examples](tools/statsnoop_example.txt).
......
#!/usr/bin/env python
#
# solisten Trace TCP listen events
# For Linux, uses BCC, eBPF. Embedded C.
#
# USAGE: solisten.py [-h] [-p PID] [--show-netns]
#
# This is provided as a basic example of TCP connection & socket tracing.
# It could be usefull in scenarios where load balancers needs to be updated
# dynamically as application is fully initialized.
#
# All IPv4 listen attempts are traced, even if they ultimately fail or the
# the listening program is not willing to accept().
#
# Copyright (c) 2016 Jean-Tiare Le Bigot.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 04-Mar-2016 Jean-Tiare Le Bigot Created this.
import os
import socket
import netaddr
import argparse
from bcc import BPF
import ctypes as ct
# Arguments
examples = """Examples:
./solisten.py # Stream socket listen
./solisten.py -p 1234 # Stream socket listen for specified PID only
./solisten.py --netns 4242 # Stream socket listen for specified network namespace ID only
./solisten.py --show-netns # Show network namespace ID. Probably usefull if you run containers
"""
parser = argparse.ArgumentParser(
description="Stream sockets listen",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("--show-netns", action="store_true",
help="show network namespace")
parser.add_argument("-p", "--pid", default=0, type=int,
help="trace this PID only")
parser.add_argument("-n", "--netns", default=0, type=int,
help="trace this Network Namespace only")
# BPF Program
bpf_text = """
#include <net/sock.h>
#include <net/inet_sock.h>
#include <net/net_namespace.h>
#include <bcc/proto.h>
// Endian conversion. We can't use kernel version here as it uses inline
// assembly, neither libc version as we can't import it here. Adapted from both.
#if defined(__LITTLE_ENDIAN)
#define bcc_be32_to_cpu(x) ((u32)(__builtin_bswap32)((x)))
#define bcc_be64_to_cpu(x) ((u64)(__builtin_bswap64)((x)))
#elif defined(__BIG_ENDIAN)
#define bcc_be32_to_cpu(x) (x)
#define bcc_be64_to_cpu(x) (x)
#else
#error Host endianness not defined
#endif
// Common structure for UDP/TCP IPv4/IPv6
struct listen_evt_t {
u64 ts_us;
u64 pid_tgid;
u64 backlog;
u64 netns;
u64 proto; // familiy << 16 | type
u64 lport; // use only 16 bits
u64 laddr[2]; // IPv4: store in laddr[0]
char task[TASK_COMM_LEN];
};
BPF_PERF_OUTPUT(listen_evt);
// Send an event for each IPv4 listen with PID, bound address and port
int kprobe__inet_listen(struct pt_regs *ctx, struct socket *sock, int backlog)
{
// cast types. Intermediate cast not needed, kept for readability
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
// Built event for userland
struct listen_evt_t evt = {
.ts_us = bpf_ktime_get_ns() / 1000,
.backlog = backlog,
};
// Get process comm. Needs LLVM >= 3.7.1 see https://github.com/iovisor/bcc/issues/393
bpf_get_current_comm(evt.task, TASK_COMM_LEN);
// Get socket IP family
u16 family = sk->__sk_common.skc_family;
evt.proto = family << 16 | SOCK_STREAM;
// Get PID
evt.pid_tgid = bpf_get_current_pid_tgid();
##FILTER_PID##
// Get port
bpf_probe_read(&evt.lport, sizeof(u16), &(inet->inet_sport));
evt.lport = ntohs(evt.lport);
// Get network namespace id, if kernel supports it
#ifdef CONFIG_NET_NS
evt.netns = sk->__sk_common.skc_net.net->ns.inum;
#else
evt.netns = 0;
#endif
##FILTER_NETNS##
// Get IP
if (family == AF_INET) {
bpf_probe_read(evt.laddr, sizeof(u32), &(inet->inet_rcv_saddr));
evt.laddr[0] = bcc_be32_to_cpu(evt.laddr[0]);
} else if (family == AF_INET6) {
bpf_probe_read(evt.laddr, sizeof(evt.laddr), sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
evt.laddr[0] = bcc_be64_to_cpu(evt.laddr[0]);
evt.laddr[1] = bcc_be64_to_cpu(evt.laddr[1]);
}
// Send event to userland
listen_evt.perf_submit(ctx, &evt, sizeof(evt));
return 0;
};
"""
# event data
TASK_COMM_LEN = 16 # linux/sched.h
class ListenEvt(ct.Structure):
_fields_ = [
("ts_us", ct.c_ulonglong),
("pid_tgid", ct.c_ulonglong),
("backlog", ct.c_ulonglong),
("netns", ct.c_ulonglong),
("proto", ct.c_ulonglong),
("lport", ct.c_ulonglong),
("laddr", ct.c_ulonglong * 2),
("task", ct.c_char * TASK_COMM_LEN)
]
# TODO: properties to unpack protocol / ip / pid / tgid ...
# Format output
def event_printer(show_netns):
def print_event(cpu, data, size):
# Decode event
event = ct.cast(data, ct.POINTER(ListenEvt)).contents
pid = event.pid_tgid & 0xffffffff
proto_family = event.proto & 0xff
proto_type = event.proto >> 16 & 0xff
if proto_family == socket.SOCK_STREAM:
protocol = "TCP"
elif proto_family == socket.SOCK_DGRAM:
protocol = "UDP"
else:
protocol = "UNK"
address = ""
if proto_type == socket.AF_INET:
protocol += "v4"
address = netaddr.IPAddress(event.laddr[0])
elif proto_type == socket.AF_INET6:
address = netaddr.IPAddress(event.laddr[0]<<64 | event.laddr[1], version=6)
protocol += "v6"
# Display
if show_netns:
print("%-6d %-12.12s %-12s %-6s %-8s %-5s %-39s" % (
pid, event.task, event.netns, protocol, event.backlog,
event.lport, address,
))
else:
print("%-6d %-12.12s %-6s %-8s %-5s %-39s" % (
pid, event.task, protocol, event.backlog,
event.lport, address,
))
return print_event
if __name__ == "__main__":
# Parse arguments
args = parser.parse_args()
pid_filter = ""
netns_filter = ""
if args.pid:
pid_filter = "if (evt.pid_tgid != %d) return 0;" % args.pid
if args.netns:
netns_filter = "if (evt.netns != %d) return 0;" % args.netns
bpf_text = bpf_text.replace("##FILTER_PID##", pid_filter)
bpf_text = bpf_text.replace("##FILTER_NETNS##", netns_filter)
# Initialize BPF
b = BPF(text=bpf_text)
b["listen_evt"].open_perf_buffer(event_printer(args.show_netns))
# Print headers
if args.show_netns:
print("%-6s %-12s %-12s %-6s %-8s %-5s %-39s" % ("PID", "COMM", "NETNS", "PROTO", "BACKLOG", "PORT", "ADDR"))
else:
print("%-6s %-12s %-6s %-8s %-5s %-39s" % ("PID", "COMM", "PROTO", "BACKLOG", "PORT", "ADDR"))
# Read events
while 1:
b.kprobe_poll()
Demonstrations of solisten.py, the Linux eBPF/bcc version.
This tool traces the kernel function called when a program wants to listen
for TCP connections. It will not see UDP neither UNIX domain sockets.
It can be used to dynamically update a load balancer as a program is actually
ready to accept connexion, hence avoiding the "downtime" while it is initializing.
# ./solisten.py --show-netns
PID COMM NETNS PROTO BACKLOG ADDR PORT
3643 nc 4026531957 TCPv4 1 0.0.0.0 4242
3659 nc 4026531957 TCPv6 1 2001:f0d0:1002:51::4 4242
4221 redis-server 4026532165 TCPv6 128 :: 6379
4221 redis-server 4026532165 TCPv4 128 0.0.0.0 6379
6067 nginx 4026531957 TCPv4 128 0.0.0.0 80
6067 nginx 4026531957 TCPv6 128 :: 80
6069 nginx 4026531957 TCPv4 128 0.0.0.0 80
6069 nginx 4026531957 TCPv6 128 :: 80
6069 nginx 4026531957 TCPv4 128 0.0.0.0 80
6069 nginx 4026531957 TCPv6 128 :: 80
This output show the listen event from 3 programs. Netcat was started twice as
shown by the 2 different PIDs. The first time on the wilcard IPv4, the second
time on an IPv6. Netcat being a "one shot" program. It can accept a single
connection, hence the backlog of "1".
The next program is redis-server. As the netns column shows, it is in a
different network namespace than netcat and nginx. In this specific case
it was launched in a docker container. It listens both on IPv4 and IPv4
with up to 128 pending connections.
Determining the actual container is out if the scope of this tool. It could
be derived by scrapping /proc/<PID>/cgroup. Note that this is racy.
The overhead of this tool is negligeable as it traces listen() calls which are
invoked in the initialization path of a program. The operation part will remain
unaffected. In particular, accept() calls will not be affected. Neither
individual read() and write().
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment