tcplife: switch to the new sock:inet_sock_set_state tracepoint

fd93dc04 · Brendan Gregg · 913450f1 · fd93dc04 · fd93dc04
Commit fd93dc04 authored Mar 19, 2018 by Brendan Gregg
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 16 deletions

man/man8/tcplife.8 man/man8/tcplife.8 +4 -4

tools/tcplife.py tools/tcplife.py +13 -12

No files found.
--- a/man/man8/tcplife.8
+++ b/man/man8/tcplife.8
@@ -10,10 +10,10 @@ duration, and throughput for the session. This is useful for workload
 characterisation and flow accounting: identifying what connections are
 happening, with the bytes transferred.

-This tool works using the tcp:tcp_set_state tracepoint if it exists, added
-to Linux 4.15, and switches to using kernel dynamic tracing for older kernels.
-Only TCP state changes are traced, so it is expected that the overhead of
-this tool is much lower than typical send/receive tracing.
+This tool works using the sock:inet_sock_set_state tracepoint if it exists,
+added to Linux 4.16, and switches to using kernel dynamic tracing for older
+kernels. Only TCP state changes are traced, so it is expected that the
+overhead of this tool is much lower than typical send/receive tracing.

 Since this uses BPF, only the root user can use this tool.
 .SH REQUIREMENTS

--- a/tools/tcplife.py
+++ b/tools/tcplife.py
@@ -6,8 +6,9 @@
 #
 # USAGE: tcplife [-h] [-C] [-S] [-p PID] [interval [count]]
 #
-# This uses the tcp:tcp_set_state tracepoint if it exists (added to
-# Linux 4.15), else it uses kernel dynamic tracing of tcp_set_state().
+# This uses the sock:inet_sock_set_state tracepoint if it exists (added to
+# Linux 4.16, and replacing the earlier tcp:tcp_set_state), else it uses
+# kernel dynamic tracing of tcp_set_state().
 #
 # While throughput counters are emitted, they are fetched in a low-overhead
 # manner: reading members of the tcp_info struct on TCP close. ie, we do not
@@ -110,9 +111,9 @@ BPF_HASH(whoami, struct sock *, struct id_t);

 #
 # XXX: The following is temporary code for older kernels, Linux 4.14 and
-# older. It uses kprobes to instrument tcp_set_state(). On Linux 4.15 and
-# later, the tcp:tcp_set_state tracepoint should be used instead, as is
-# done by the code that follows this. In the distant future (2021?), this
+# older. It uses kprobes to instrument tcp_set_state(). On Linux 4.16 and
+# later, the sock:inet_sock_set_state tracepoint should be used instead, as
+# is done by the code that follows this. In the distant future (2021?), this
 # kprobe code can be removed. This is why there is so much code
 # duplication: to make removal easier.
 #
@@ -235,10 +236,13 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
 """

 bpf_text_tracepoint = """
-TRACEPOINT_PROBE(tcp, tcp_set_state)
+TRACEPOINT_PROBE(sock, inet_sock_set_state)
 {
+    if (args->protocol != IPPROTO_TCP)
+        return 0;
+
    u32 pid = bpf_get_current_pid_tgid() >> 32;
-    // sk is mostly used as a UUID, once for skc_family, and two tcp stats:
+    // sk is mostly used as a UUID, and for two tcp stats:
    struct sock *sk = (struct sock *)args->skaddr;

    // lport is either used in a filter here, or later
@@ -310,10 +314,7 @@ TRACEPOINT_PROBE(tcp, tcp_set_state)
    bpf_probe_read(&rx_b, sizeof(rx_b), &tp->bytes_received);
    bpf_probe_read(&tx_b, sizeof(tx_b), &tp->bytes_acked);

-    u16 family = 0;
-    bpf_probe_read(&family, sizeof(family), &sk->__sk_common.skc_family);
-
-    if (family == AF_INET) {
+    if (args->family == AF_INET) {
        struct ipv4_data_t data4 = {.span_us = delta_us,
            .rx_b = rx_b, .tx_b = tx_b};
        data4.ts_us = bpf_ktime_get_ns() / 1000;
@@ -354,7 +355,7 @@ TRACEPOINT_PROBE(tcp, tcp_set_state)
 }
 """

-if (BPF.tracepoint_exists("tcp", "tcp_set_state")):
+if (BPF.tracepoint_exists("sock", "inet_sock_set_state")):
    bpf_text += bpf_text_tracepoint
 else:
    bpf_text += bpf_text_kprobe