Commit 11de2985 authored by Mark Drayton's avatar Mark Drayton Committed by 4ast

IPv6 support for tcp* tools (#582)

* tcpretrans: support full IPv6 addresses, fix --lossprobe

* tcpaccept: support full IPv6 addresses, fix timestamps

* tcpconnect: support full IPv6 addresses, fix timestamps

* tcpconnlat: support full IPv6 addresses, fix timestamps
parent 83064b0e
...@@ -55,12 +55,10 @@ IP ...@@ -55,12 +55,10 @@ IP
IP address family (4 or 6) IP address family (4 or 6)
.TP .TP
RADDR RADDR
Remote IP address. IPv4 as a dotted quad, IPv6 shows "..." then the last 4 Remote IP address.
bytes (check for newer versions of this tool for the full address).
.TP .TP
LADDR LADDR
Local IP address. IPv4 as a dotted quad, IPv6 shows "..." then the last 4 Local IP address.
bytes (check for newer versions of this tool for the full address).
.TP .TP
LPORT LPORT
Local port Local port
......
...@@ -58,12 +58,10 @@ IP ...@@ -58,12 +58,10 @@ IP
IP address family (4 or 6). IP address family (4 or 6).
.TP .TP
SADDR SADDR
Source IP address. IPv4 as a dotted quad, IPv6 shows "..." then the last 4 Source IP address.
bytes (check for newer versions of this tool for the full address).
.TP .TP
DADDR DADDR
Destination IP address. IPv4 as a dotted quad, IPv6 shows "..." then the last 4 Destination IP address.
bytes (check for newer versions of this tool for the full address).
.TP .TP
DPORT DPORT
Destination port Destination port
......
...@@ -47,8 +47,7 @@ IP ...@@ -47,8 +47,7 @@ IP
IP address family (4 or 6). IP address family (4 or 6).
.TP .TP
LADDR LADDR
Local IP address. IPv4 as a dotted quad, IPv6 shows "..." then the last 4 Local IP address.
bytes (check for newer versions of this tool for the full address).
.TP .TP
LPORT LPORT
Local port. Local port.
...@@ -57,8 +56,7 @@ T> ...@@ -57,8 +56,7 @@ T>
Type of event: R> == retransmit, L> == tail loss probe. Type of event: R> == retransmit, L> == tail loss probe.
.TP .TP
RADDR RADDR
Remote IP address. IPv4 as a dotted quad, IPv6 shows "..." then the last 4 Remote IP address.
bytes (check for newer versions of this tool for the full address).
.TP .TP
RPORT RPORT
Remote port. Remote port.
......
...@@ -9,10 +9,6 @@ ...@@ -9,10 +9,6 @@
# This uses dynamic tracing of the kernel inet_csk_accept() socket function # This uses dynamic tracing of the kernel inet_csk_accept() socket function
# (from tcp_prot.accept), and will need to be modified to match kernel changes. # (from tcp_prot.accept), and will need to be modified to match kernel changes.
# #
# IPv4 addresses are printed as dotted quads. For IPv6 addresses, the last four
# bytes are printed after "..."; check for future versions with better IPv6
# support.
#
# Copyright (c) 2015 Brendan Gregg. # Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
# #
...@@ -21,6 +17,8 @@ ...@@ -21,6 +17,8 @@
from __future__ import print_function from __future__ import print_function
from bcc import BPF from bcc import BPF
from socket import inet_ntop, AF_INET, AF_INET6
from struct import pack
import argparse import argparse
import ctypes as ct import ctypes as ct
...@@ -52,21 +50,20 @@ struct ipv4_data_t { ...@@ -52,21 +50,20 @@ struct ipv4_data_t {
// XXX: switch some to u32's when supported // XXX: switch some to u32's when supported
u64 ts_us; u64 ts_us;
u64 pid; u64 pid;
u64 ip;
u64 saddr; u64 saddr;
u64 daddr; u64 daddr;
u64 ip;
u64 lport; u64 lport;
char task[TASK_COMM_LEN]; char task[TASK_COMM_LEN];
}; };
BPF_PERF_OUTPUT(ipv4_events); BPF_PERF_OUTPUT(ipv4_events);
struct ipv6_data_t { struct ipv6_data_t {
// XXX: update to transfer full ipv6 addrs
u64 ts_us; u64 ts_us;
u64 pid; u64 pid;
unsigned __int128 saddr;
unsigned __int128 daddr;
u64 ip; u64 ip;
u64 saddr;
u64 daddr;
u64 lport; u64 lport;
char task[TASK_COMM_LEN]; char task[TASK_COMM_LEN];
}; };
...@@ -106,14 +103,10 @@ int kretprobe__inet_csk_accept(struct pt_regs *ctx) ...@@ -106,14 +103,10 @@ int kretprobe__inet_csk_accept(struct pt_regs *ctx)
} else if (family == AF_INET6) { } else if (family == AF_INET6) {
struct ipv6_data_t data6 = {.pid = pid, .ip = 6}; struct ipv6_data_t data6 = {.pid = pid, .ip = 6};
data6.ts_us = bpf_ktime_get_ns() / 1000; data6.ts_us = bpf_ktime_get_ns() / 1000;
// just grab the last 4 bytes for now bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
u32 saddr = 0, daddr = 0; &newsk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
bpf_probe_read(&saddr, sizeof(saddr), bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
&newsk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[3]); &newsk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
bpf_probe_read(&daddr, sizeof(daddr),
&newsk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[3]);
data6.saddr = bpf_ntohl(saddr);
data6.daddr = bpf_ntohl(daddr);
data6.lport = lport; data6.lport = lport;
bpf_get_current_comm(&data6.task, sizeof(data6.task)); bpf_get_current_comm(&data6.task, sizeof(data6.task));
ipv6_events.perf_submit(ctx, &data6, sizeof(data6)); ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
...@@ -135,23 +128,25 @@ if debug: ...@@ -135,23 +128,25 @@ if debug:
# event data # event data
TASK_COMM_LEN = 16 # linux/sched.h TASK_COMM_LEN = 16 # linux/sched.h
class Data_ipv4(ct.Structure): class Data_ipv4(ct.Structure):
_fields_ = [ _fields_ = [
("ts_us", ct.c_ulonglong), ("ts_us", ct.c_ulonglong),
("pid", ct.c_ulonglong), ("pid", ct.c_ulonglong),
("ip", ct.c_ulonglong),
("saddr", ct.c_ulonglong), ("saddr", ct.c_ulonglong),
("daddr", ct.c_ulonglong), ("daddr", ct.c_ulonglong),
("ip", ct.c_ulonglong),
("lport", ct.c_ulonglong), ("lport", ct.c_ulonglong),
("task", ct.c_char * TASK_COMM_LEN) ("task", ct.c_char * TASK_COMM_LEN)
] ]
class Data_ipv6(ct.Structure): class Data_ipv6(ct.Structure):
_fields_ = [ _fields_ = [
("ts_us", ct.c_ulonglong), ("ts_us", ct.c_ulonglong),
("pid", ct.c_ulonglong), ("pid", ct.c_ulonglong),
("saddr", (ct.c_ulonglong * 2)),
("daddr", (ct.c_ulonglong * 2)),
("ip", ct.c_ulonglong), ("ip", ct.c_ulonglong),
("saddr", ct.c_ulonglong),
("daddr", ct.c_ulonglong),
("lport", ct.c_ulonglong), ("lport", ct.c_ulonglong),
("task", ct.c_char * TASK_COMM_LEN) ("task", ct.c_char * TASK_COMM_LEN)
] ]
...@@ -159,21 +154,25 @@ class Data_ipv6(ct.Structure): ...@@ -159,21 +154,25 @@ class Data_ipv6(ct.Structure):
# process event # process event
def print_ipv4_event(cpu, data, size): def print_ipv4_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv4)).contents event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
global start_ts
if args.timestamp: if args.timestamp:
if start_ts == 0: if start_ts == 0:
start_ts = event.ts_us start_ts = event.ts_us
print("%-9.3f" % ((event.ts_us - start_ts) / 100000), end="") print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, event.task, print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, event.task,
event.ip, inet_ntoa(event.daddr), inet_ntoa(event.saddr), event.ip, inet_ntop(AF_INET, pack("I", event.daddr)),
event.lport)) inet_ntop(AF_INET, pack("I", event.saddr)), event.lport))
def print_ipv6_event(cpu, data, size): def print_ipv6_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv6)).contents event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
global start_ts
if args.timestamp: if args.timestamp:
if start_ts == 0: if start_ts == 0:
start_ts = event.ts_us start_ts = event.ts_us
print("%-9.3f" % ((event.ts_us - start_ts) / 100000), end="") print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
print("%-6d %-12.12s %-2d ...%-13x ...%-13x %-4d" % (event.pid, print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, event.task,
event.task, event.ip, event.daddr, event.saddr, event.lport)) event.ip, inet_ntop(AF_INET6, event.daddr),
inet_ntop(AF_INET6, event.saddr), event.lport))
# initialize BPF # initialize BPF
b = BPF(text=bpf_text) b = BPF(text=bpf_text)
...@@ -186,15 +185,6 @@ print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "RADDR", ...@@ -186,15 +185,6 @@ print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "RADDR",
start_ts = 0 start_ts = 0
def inet_ntoa(addr):
dq = ''
for i in range(0, 4):
dq = dq + str(addr & 0xff)
if (i != 3):
dq = dq + '.'
addr = addr >> 8
return dq
# read events # read events
b["ipv4_events"].open_perf_buffer(print_ipv4_event) b["ipv4_events"].open_perf_buffer(print_ipv4_event)
b["ipv6_events"].open_perf_buffer(print_ipv6_event) b["ipv6_events"].open_perf_buffer(print_ipv6_event)
......
...@@ -9,14 +9,11 @@ addresses changed to protect the innocent): ...@@ -9,14 +9,11 @@ addresses changed to protect the innocent):
PID COMM IP RADDR LADDR LPORT PID COMM IP RADDR LADDR LPORT
907 sshd 4 192.168.56.1 192.168.56.102 22 907 sshd 4 192.168.56.1 192.168.56.102 22
907 sshd 4 127.0.0.1 127.0.0.1 22 907 sshd 4 127.0.0.1 127.0.0.1 22
5389 perl 6 ...fec0ae21 ...fec0ae21 7001 5389 perl 6 1234:ab12:2040:5020:2299:0:5:0 1234:ab12:2040:5020:2299:0:5:0 7001
This output shows three connections, two to PID 907, an "sshd" process listening This output shows three connections, two IPv4 connections to PID 907, an "sshd"
on port 22, and one to a "perl" process listening on port 7001. process listening on port 22, and one IPv6 connection to a "perl" process
listening on port 7001.
The sshd connections were IPv4, and the addresses are printed as dotted quads.
The perl connection was IPv6, and the last 4 bytes of each address is printed
(for now; check for updated versions).
The overhead of this tool should be negligible, since it is only tracing the The overhead of this tool should be negligible, since it is only tracing the
kernel function performing accept. It is not tracing every packet and then kernel function performing accept. It is not tracing every packet and then
...@@ -31,6 +28,7 @@ The -t option prints a timestamp column: ...@@ -31,6 +28,7 @@ The -t option prints a timestamp column:
# ./tcpaccept -t # ./tcpaccept -t
TIME(s) PID COMM IP RADDR LADDR LPORT TIME(s) PID COMM IP RADDR LADDR LPORT
0.000 907 sshd 4 127.0.0.1 127.0.0.1 22 0.000 907 sshd 4 127.0.0.1 127.0.0.1 22
0.010 5389 perl 6 1234:ab12:2040:5020:2299:0:5:0 1234:ab12:2040:5020:2299:0:5:0 7001
0.992 907 sshd 4 127.0.0.1 127.0.0.1 22 0.992 907 sshd 4 127.0.0.1 127.0.0.1 22
1.984 907 sshd 4 127.0.0.1 127.0.0.1 22 1.984 907 sshd 4 127.0.0.1 127.0.0.1 22
......
...@@ -11,10 +11,6 @@ ...@@ -11,10 +11,6 @@
# This uses dynamic tracing of kernel functions, and will need to be updated # This uses dynamic tracing of kernel functions, and will need to be updated
# to match kernel changes. # to match kernel changes.
# #
# IPv4 addresses are printed as dotted quads. For IPv6 addresses, the last four
# bytes are printed after "..."; check for future versions with better IPv6
# support.
#
# Copyright (c) 2015 Brendan Gregg. # Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
# #
...@@ -24,8 +20,8 @@ ...@@ -24,8 +20,8 @@
from __future__ import print_function from __future__ import print_function
from bcc import BPF from bcc import BPF
import argparse import argparse
import re from socket import inet_ntop, AF_INET, AF_INET6
from struct import pack, unpack_from from struct import pack
import ctypes as ct import ctypes as ct
# arguments # arguments
...@@ -58,9 +54,9 @@ struct ipv4_data_t { ...@@ -58,9 +54,9 @@ struct ipv4_data_t {
// XXX: switch some to u32's when supported // XXX: switch some to u32's when supported
u64 ts_us; u64 ts_us;
u64 pid; u64 pid;
u64 ip;
u64 saddr; u64 saddr;
u64 daddr; u64 daddr;
u64 ip;
u64 dport; u64 dport;
char task[TASK_COMM_LEN]; char task[TASK_COMM_LEN];
}; };
...@@ -69,9 +65,9 @@ BPF_PERF_OUTPUT(ipv4_events); ...@@ -69,9 +65,9 @@ BPF_PERF_OUTPUT(ipv4_events);
struct ipv6_data_t { struct ipv6_data_t {
u64 ts_us; u64 ts_us;
u64 pid; u64 pid;
unsigned __int128 saddr;
unsigned __int128 daddr;
u64 ip; u64 ip;
u64 saddr[2];
u64 daddr[2];
u64 dport; u64 dport;
char task[TASK_COMM_LEN]; char task[TASK_COMM_LEN];
}; };
...@@ -125,15 +121,10 @@ static int trace_connect_return(struct pt_regs *ctx, short ipver) ...@@ -125,15 +121,10 @@ static int trace_connect_return(struct pt_regs *ctx, short ipver)
} else /* 6 */ { } else /* 6 */ {
struct ipv6_data_t data6 = {.pid = pid, .ip = ipver}; struct ipv6_data_t data6 = {.pid = pid, .ip = ipver};
data6.ts_us = bpf_ktime_get_ns() / 1000; data6.ts_us = bpf_ktime_get_ns() / 1000;
// just grab the last 4 bytes for now bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
bpf_probe_read(&data6.saddr[0], sizeof(data6.saddr[0]), &skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
&skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0]); bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
bpf_probe_read(&data6.saddr[1], sizeof(data6.saddr[1]), &skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
&skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2]);
bpf_probe_read(&data6.daddr[0], sizeof(data6.daddr[0]),
&skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0]);
bpf_probe_read(&data6.daddr[1], sizeof(data6.daddr[1]),
&skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2]);
data6.dport = ntohs(dport); data6.dport = ntohs(dport);
bpf_get_current_comm(&data6.task, sizeof(data6.task)); bpf_get_current_comm(&data6.task, sizeof(data6.task));
ipv6_events.perf_submit(ctx, &data6, sizeof(data6)); ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
...@@ -166,23 +157,25 @@ if debug: ...@@ -166,23 +157,25 @@ if debug:
# event data # event data
TASK_COMM_LEN = 16 # linux/sched.h TASK_COMM_LEN = 16 # linux/sched.h
class Data_ipv4(ct.Structure): class Data_ipv4(ct.Structure):
_fields_ = [ _fields_ = [
("ts_us", ct.c_ulonglong), ("ts_us", ct.c_ulonglong),
("pid", ct.c_ulonglong), ("pid", ct.c_ulonglong),
("ip", ct.c_ulonglong),
("saddr", ct.c_ulonglong), ("saddr", ct.c_ulonglong),
("daddr", ct.c_ulonglong), ("daddr", ct.c_ulonglong),
("ip", ct.c_ulonglong),
("dport", ct.c_ulonglong), ("dport", ct.c_ulonglong),
("task", ct.c_char * TASK_COMM_LEN) ("task", ct.c_char * TASK_COMM_LEN)
] ]
class Data_ipv6(ct.Structure): class Data_ipv6(ct.Structure):
_fields_ = [ _fields_ = [
("ts_us", ct.c_ulonglong), ("ts_us", ct.c_ulonglong),
("pid", ct.c_ulonglong), ("pid", ct.c_ulonglong),
("saddr", (ct.c_ulonglong * 2)),
("daddr", (ct.c_ulonglong * 2)),
("ip", ct.c_ulonglong), ("ip", ct.c_ulonglong),
("saddr", ct.c_ulonglong * 2),
("daddr", ct.c_ulonglong * 2),
("dport", ct.c_ulonglong), ("dport", ct.c_ulonglong),
("task", ct.c_char * TASK_COMM_LEN) ("task", ct.c_char * TASK_COMM_LEN)
] ]
...@@ -190,25 +183,25 @@ class Data_ipv6(ct.Structure): ...@@ -190,25 +183,25 @@ class Data_ipv6(ct.Structure):
# process event # process event
def print_ipv4_event(cpu, data, size): def print_ipv4_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv4)).contents event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
global start_ts
if args.timestamp: if args.timestamp:
if start_ts == 0: if start_ts == 0:
start_ts = event.ts_us start_ts = event.ts_us
print("%-9.3f" % ((event.ts_us - start_ts) / 100000), end="") print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, event.task, print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, event.task,
event.ip, inet_ntoa(event.saddr), inet_ntoa(event.daddr), event.ip, inet_ntop(AF_INET, pack("I", event.saddr)),
event.dport)) inet_ntop(AF_INET, pack("I", event.daddr)), event.dport))
def print_ipv6_event(cpu, data, size): def print_ipv6_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv6)).contents event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
global start_ts
if args.timestamp: if args.timestamp:
if start_ts == 0: if start_ts == 0:
start_ts = event.ts_us start_ts = event.ts_us
print("%-9.3f" % ((event.ts_us - start_ts) / 100000), end="") print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid,
event.task, event.ip, event.task, event.ip, inet_ntop(AF_INET6, event.saddr),
inet6_ntoa(event.saddr[1] << 64 | event.saddr[0]), inet_ntop(AF_INET6, event.daddr), event.dport))
inet6_ntoa(event.daddr[1] << 64 | event.daddr[0]),
event.dport))
# initialize BPF # initialize BPF
b = BPF(text=bpf_text) b = BPF(text=bpf_text)
...@@ -225,25 +218,6 @@ print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "SADDR", ...@@ -225,25 +218,6 @@ print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "SADDR",
start_ts = 0 start_ts = 0
def inet_ntoa(addr):
# u32 to dotted quad string
dq = ''
for i in range(0, 4):
dq = dq + str(addr & 0xff)
if (i != 3):
dq = dq + '.'
addr = addr >> 8
return dq
def inet6_ntoa(addr):
# see RFC4291 summary in RFC5952 section 2
s = ":".join(["%x" % x for x in unpack_from(">HHHHHHHH",
pack("QQ", addr & 0xffffffff, addr >> 64))])
# compress left-most zero run only (change to most for RFC5952):
s = re.sub(r'(^|:)0:(0:)+', r'::', s, 1)
return s
# read events # read events
b["ipv4_events"].open_perf_buffer(print_ipv4_event) b["ipv4_events"].open_perf_buffer(print_ipv4_event)
b["ipv6_events"].open_perf_buffer(print_ipv6_event) b["ipv6_events"].open_perf_buffer(print_ipv6_event)
......
...@@ -9,10 +9,6 @@ ...@@ -9,10 +9,6 @@
# This uses dynamic tracing of kernel functions, and will need to be updated # This uses dynamic tracing of kernel functions, and will need to be updated
# to match kernel changes. # to match kernel changes.
# #
# IPv4 addresses are printed as dotted quads. For IPv6 addresses, the last four
# bytes are printed after "..."; check for future versions with better IPv6
# support.
#
# Copyright 2016 Netflix, Inc. # Copyright 2016 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
# #
...@@ -20,6 +16,8 @@ ...@@ -20,6 +16,8 @@
from __future__ import print_function from __future__ import print_function
from bcc import BPF from bcc import BPF
from socket import inet_ntop, AF_INET, AF_INET6
from struct import pack
import argparse import argparse
import ctypes as ct import ctypes as ct
...@@ -44,6 +42,7 @@ debug = 0 ...@@ -44,6 +42,7 @@ debug = 0
bpf_text = """ bpf_text = """
#include <uapi/linux/ptrace.h> #include <uapi/linux/ptrace.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/tcp_states.h>
#include <bcc/proto.h> #include <bcc/proto.h>
struct info_t { struct info_t {
...@@ -58,9 +57,9 @@ struct ipv4_data_t { ...@@ -58,9 +57,9 @@ struct ipv4_data_t {
// XXX: switch some to u32's when supported // XXX: switch some to u32's when supported
u64 ts_us; u64 ts_us;
u64 pid; u64 pid;
u64 ip;
u64 saddr; u64 saddr;
u64 daddr; u64 daddr;
u64 ip;
u64 dport; u64 dport;
u64 delta_us; u64 delta_us;
char task[TASK_COMM_LEN]; char task[TASK_COMM_LEN];
...@@ -68,12 +67,11 @@ struct ipv4_data_t { ...@@ -68,12 +67,11 @@ struct ipv4_data_t {
BPF_PERF_OUTPUT(ipv4_events); BPF_PERF_OUTPUT(ipv4_events);
struct ipv6_data_t { struct ipv6_data_t {
// XXX: update to transfer full ipv6 addrs
u64 ts_us; u64 ts_us;
u64 pid; u64 pid;
unsigned __int128 saddr;
unsigned __int128 daddr;
u64 ip; u64 ip;
u64 saddr;
u64 daddr;
u64 dport; u64 dport;
u64 delta_us; u64 delta_us;
char task[TASK_COMM_LEN]; char task[TASK_COMM_LEN];
...@@ -132,14 +130,10 @@ int trace_tcp_rcv_state_process(struct pt_regs *ctx, struct sock *sk) ...@@ -132,14 +130,10 @@ int trace_tcp_rcv_state_process(struct pt_regs *ctx, struct sock *sk)
} else /* AF_INET6 */ { } else /* AF_INET6 */ {
struct ipv6_data_t data6 = {.pid = infop->pid, .ip = 6}; struct ipv6_data_t data6 = {.pid = infop->pid, .ip = 6};
data6.ts_us = now / 1000; data6.ts_us = now / 1000;
// just grab the last 4 bytes for now bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
u32 saddr = 0, daddr = 0; &skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
bpf_probe_read(&saddr, sizeof(saddr), bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
&skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[3]); &skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
bpf_probe_read(&daddr, sizeof(daddr),
&skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32[3]);
data6.saddr = bpf_ntohl(saddr);
data6.daddr = bpf_ntohl(daddr);
data6.dport = ntohs(dport); data6.dport = ntohs(dport);
data6.delta_us = (now - ts) / 1000; data6.delta_us = (now - ts) / 1000;
__builtin_memcpy(&data6.task, infop->task, sizeof(data6.task)); __builtin_memcpy(&data6.task, infop->task, sizeof(data6.task));
...@@ -170,60 +164,56 @@ b.attach_kprobe(event="tcp_rcv_state_process", ...@@ -170,60 +164,56 @@ b.attach_kprobe(event="tcp_rcv_state_process",
# event data # event data
TASK_COMM_LEN = 16 # linux/sched.h TASK_COMM_LEN = 16 # linux/sched.h
class Data_ipv4(ct.Structure): class Data_ipv4(ct.Structure):
_fields_ = [ _fields_ = [
("ts_us", ct.c_ulonglong), ("ts_us", ct.c_ulonglong),
("pid", ct.c_ulonglong), ("pid", ct.c_ulonglong),
("ip", ct.c_ulonglong),
("saddr", ct.c_ulonglong), ("saddr", ct.c_ulonglong),
("daddr", ct.c_ulonglong), ("daddr", ct.c_ulonglong),
("ip", ct.c_ulonglong),
("dport", ct.c_ulonglong), ("dport", ct.c_ulonglong),
("delta_us", ct.c_ulonglong), ("delta_us", ct.c_ulonglong),
("task", ct.c_char * TASK_COMM_LEN) ("task", ct.c_char * TASK_COMM_LEN)
] ]
class Data_ipv6(ct.Structure): class Data_ipv6(ct.Structure):
_fields_ = [ _fields_ = [
("ts_us", ct.c_ulonglong), ("ts_us", ct.c_ulonglong),
("pid", ct.c_ulonglong), ("pid", ct.c_ulonglong),
("saddr", (ct.c_ulonglong * 2)),
("daddr", (ct.c_ulonglong * 2)),
("ip", ct.c_ulonglong), ("ip", ct.c_ulonglong),
("saddr", ct.c_ulonglong),
("daddr", ct.c_ulonglong),
("dport", ct.c_ulonglong), ("dport", ct.c_ulonglong),
("delta_us", ct.c_ulonglong), ("delta_us", ct.c_ulonglong),
("task", ct.c_char * TASK_COMM_LEN) ("task", ct.c_char * TASK_COMM_LEN)
] ]
# functions
def inet_ntoa(addr):
dq = ''
for i in range(0, 4):
dq = dq + str(addr & 0xff)
if (i != 3):
dq = dq + '.'
addr = addr >> 8
return dq
# process event # process event
start_ts = 0 start_ts = 0
def print_ipv4_event(cpu, data, size): def print_ipv4_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv4)).contents event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
global start_ts global start_ts
if args.timestamp: if args.timestamp:
if start_ts == 0: if start_ts == 0:
start_ts = event.ts_us start_ts = event.ts_us
print("%-9.3f" % ((event.ts_us - start_ts) / 100000), end="") print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
print("%-6d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid, event.task, print("%-6d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid, event.task,
event.ip, inet_ntoa(event.saddr), inet_ntoa(event.daddr), event.ip, inet_ntop(AF_INET, pack("I", event.saddr)),
event.dport, float(event.delta_us) / 1000)) inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
float(event.delta_us) / 1000))
def print_ipv6_event(cpu, data, size): def print_ipv6_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv6)).contents event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
global start_ts global start_ts
if args.timestamp: if args.timestamp:
if start_ts == 0: if start_ts == 0:
start_ts = event.ts_us start_ts = event.ts_us
print("%-9.3f" % ((event.ts_us - start_ts) / 100000), end="") print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
print("%-6d %-12.12s %-2d ...%-13x ...%-13x %-5d %.2f" % (event.pid, print("%-6d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid, event.task,
event.task, event.ip, event.saddr, event.daddr, event.dport, event.ip, inet_ntop(AF_INET6, event.saddr),
inet_ntop(AF_INET6, event.daddr), event.dport,
float(event.delta_us) / 1000)) float(event.delta_us) / 1000))
# header # header
......
...@@ -15,6 +15,8 @@ PID COMM IP SADDR DADDR DPORT LAT(ms) ...@@ -15,6 +15,8 @@ PID COMM IP SADDR DADDR DPORT LAT(ms)
1690 wget 4 10.153.223.157 66.220.156.68 443 0.95 1690 wget 4 10.153.223.157 66.220.156.68 443 0.95
1690 wget 4 10.153.223.157 66.220.156.68 443 0.99 1690 wget 4 10.153.223.157 66.220.156.68 443 0.99
2852 curl 4 10.153.223.157 23.101.17.61 80 250.86 2852 curl 4 10.153.223.157 23.101.17.61 80 250.86
20337 python2.7 6 1234:ab12:2040:5020:2299:0:5:0 1234:ab12:20:9f1d:2299:dde9:0:f5 7001 62.20
21588 nc 6 ::1 ::1 80 0.05
[...] [...]
The first line shows a connection from the "wget" process to the IPv4 The first line shows a connection from the "wget" process to the IPv4
......
...@@ -9,10 +9,6 @@ ...@@ -9,10 +9,6 @@
# This uses dynamic tracing of kernel functions, and will need to be updated # This uses dynamic tracing of kernel functions, and will need to be updated
# to match kernel changes. # to match kernel changes.
# #
# IPv4 addresses are printed as dotted quads. For IPv6 addresses, the last four
# bytes are printed after "..."; check for future versions with better IPv6
# support.
#
# Copyright 2016 Netflix, Inc. # Copyright 2016 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
# #
...@@ -22,6 +18,8 @@ from __future__ import print_function ...@@ -22,6 +18,8 @@ from __future__ import print_function
from bcc import BPF from bcc import BPF
import argparse import argparse
from time import strftime from time import strftime
from socket import inet_ntop, AF_INET, AF_INET6
from struct import pack
import ctypes as ct import ctypes as ct
# arguments # arguments
...@@ -61,11 +59,10 @@ struct ipv4_data_t { ...@@ -61,11 +59,10 @@ struct ipv4_data_t {
BPF_PERF_OUTPUT(ipv4_events); BPF_PERF_OUTPUT(ipv4_events);
struct ipv6_data_t { struct ipv6_data_t {
// XXX: update to transfer full ipv6 addrs
u64 pid; u64 pid;
u64 ip; u64 ip;
u64 saddr; unsigned __int128 saddr;
u64 daddr; unsigned __int128 daddr;
u64 lport; u64 lport;
u64 dport; u64 dport;
u64 state; u64 state;
...@@ -102,14 +99,10 @@ static int trace_event(struct pt_regs *ctx, struct sock *sk, int type) ...@@ -102,14 +99,10 @@ static int trace_event(struct pt_regs *ctx, struct sock *sk, int type)
} else if (family == AF_INET6) { } else if (family == AF_INET6) {
struct ipv6_data_t data6 = {.pid = pid, .ip = 6, .type = type}; struct ipv6_data_t data6 = {.pid = pid, .ip = 6, .type = type};
// just grab the last 4 bytes for now bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
u32 saddr = 0, daddr = 0; &skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
bpf_probe_read(&saddr, sizeof(saddr), bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
&skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[3]); &skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
bpf_probe_read(&daddr, sizeof(daddr),
&skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32[3]);
data6.saddr = bpf_ntohl(saddr);
data6.daddr = bpf_ntohl(daddr);
data6.lport = lport; data6.lport = lport;
data6.dport = dport; data6.dport = dport;
data6.state = state; data6.state = state;
...@@ -134,7 +127,6 @@ int trace_tlp(struct pt_regs *ctx, struct sock *sk) ...@@ -134,7 +127,6 @@ int trace_tlp(struct pt_regs *ctx, struct sock *sk)
""" """
# event data # event data
TASK_COMM_LEN = 16 # linux/sched.h
class Data_ipv4(ct.Structure): class Data_ipv4(ct.Structure):
_fields_ = [ _fields_ = [
("pid", ct.c_ulonglong), ("pid", ct.c_ulonglong),
...@@ -146,12 +138,13 @@ class Data_ipv4(ct.Structure): ...@@ -146,12 +138,13 @@ class Data_ipv4(ct.Structure):
("state", ct.c_ulonglong), ("state", ct.c_ulonglong),
("type", ct.c_ulonglong) ("type", ct.c_ulonglong)
] ]
class Data_ipv6(ct.Structure): class Data_ipv6(ct.Structure):
_fields_ = [ _fields_ = [
("pid", ct.c_ulonglong), ("pid", ct.c_ulonglong),
("ip", ct.c_ulonglong), ("ip", ct.c_ulonglong),
("saddr", ct.c_ulonglong), ("saddr", (ct.c_ulonglong * 2)),
("daddr", ct.c_ulonglong), ("daddr", (ct.c_ulonglong * 2)),
("lport", ct.c_ulonglong), ("lport", ct.c_ulonglong),
("dport", ct.c_ulonglong), ("dport", ct.c_ulonglong),
("state", ct.c_ulonglong), ("state", ct.c_ulonglong),
...@@ -163,15 +156,6 @@ type = {} ...@@ -163,15 +156,6 @@ type = {}
type[1] = 'R' type[1] = 'R'
type[2] = 'L' type[2] = 'L'
def inet_ntoa(addr):
dq = ''
for i in range(0, 4):
dq = dq + str(addr & 0xff)
if (i != 3):
dq = dq + '.'
addr = addr >> 8
return dq
# from include/net/tcp_states.h: # from include/net/tcp_states.h:
tcpstate = {} tcpstate = {}
tcpstate[1] = 'ESTABLISHED' tcpstate[1] = 'ESTABLISHED'
...@@ -192,23 +176,25 @@ def print_ipv4_event(cpu, data, size): ...@@ -192,23 +176,25 @@ def print_ipv4_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv4)).contents event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % ( print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
strftime("%H:%M:%S"), event.pid, event.ip, strftime("%H:%M:%S"), event.pid, event.ip,
"%s:%s" % (inet_ntoa(event.saddr), event.lport), "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.lport),
type[event.type], type[event.type],
"%s:%s" % (inet_ntoa(event.daddr), event.dport), "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport),
tcpstate[event.state])) tcpstate[event.state]))
def print_ipv6_event(cpu, data, size): def print_ipv6_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv6)).contents event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % ( print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
strftime("%H:%M:%S"), event.pid, event.ip, strftime("%H:%M:%S"), event.pid, event.ip,
"...%x:%d" % (event.saddr, event.lport), "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.lport),
type[event.type], type[event.type],
"...%x:%d" % (event.daddr, event.dport), "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport),
tcpstate[event.state])) tcpstate[event.state]))
# initialize BPF # initialize BPF
b = BPF(text=bpf_text) b = BPF(text=bpf_text)
b.attach_kprobe(event="tcp_retransmit_skb", fn_name="trace_retransmit") b.attach_kprobe(event="tcp_retransmit_skb", fn_name="trace_retransmit")
b.attach_kprobe(event="tcp_send_loss_probe", fn_name="trace_tlp") if args.lossprobe:
b.attach_kprobe(event="tcp_send_loss_probe", fn_name="trace_tlp")
# header # header
print("%-8s %-6s %-2s %-20s %1s> %-20s %-4s" % ("TIME", "PID", "IP", print("%-8s %-6s %-2s %-20s %1s> %-20s %-4s" % ("TIME", "PID", "IP",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment