Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bcc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
bcc
Commits
82ef5a0d
Commit
82ef5a0d
authored
Dec 30, 2017
by
yonghong-song
Committed by
GitHub
Dec 30, 2017
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1508 from brendangregg/master
tcplife: add tcp:tcp_set_state tracepoint support
parents
bbfd6ea2
efa6ee93
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
147 additions
and
6 deletions
+147
-6
man/man8/tcplife.8
man/man8/tcplife.8
+4
-4
src/python/bcc/__init__.py
src/python/bcc/__init__.py
+5
-0
tools/tcplife.py
tools/tcplife.py
+138
-2
No files found.
man/man8/tcplife.8
View file @
82ef5a0d
...
@@ -10,10 +10,10 @@ duration, and throughput for the session. This is useful for workload
...
@@ -10,10 +10,10 @@ duration, and throughput for the session. This is useful for workload
characterisation and flow accounting: identifying what connections are
characterisation and flow accounting: identifying what connections are
happening, with the bytes transferred.
happening, with the bytes transferred.
This tool works
by using kernel dynamic tracing, and will need to be updat
ed
This tool works
using the tcp:tcp_set_state tracepoint if it exists, add
ed
if the kernel implementation changes. Only TCP state changes are traced, so
to Linux 4.15, and switches to using kernel dynamic tracing for older kernels.
it is expected that the overhead of this tool is much lower than typical
Only TCP state changes are traced, so it is expected that the overhead of
send/receive tracing.
this tool is much lower than typical
send/receive tracing.
Since this uses BPF, only the root user can use this tool.
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
.SH REQUIREMENTS
...
...
src/python/bcc/__init__.py
View file @
82ef5a0d
...
@@ -644,6 +644,11 @@ class BPF(object):
...
@@ -644,6 +644,11 @@ class BPF(object):
results
.
append
(
tp
)
results
.
append
(
tp
)
return
results
return
results
@
staticmethod
def
tracepoint_exists
(
category
,
event
):
evt_dir
=
os
.
path
.
join
(
TRACEFS
,
"events"
,
category
,
event
)
return
os
.
path
.
isdir
(
evt_dir
)
def
attach_tracepoint
(
self
,
tp
=
""
,
tp_re
=
""
,
fn_name
=
""
):
def
attach_tracepoint
(
self
,
tp
=
""
,
tp_re
=
""
,
fn_name
=
""
):
"""attach_tracepoint(tp="", tp_re="", fn_name="")
"""attach_tracepoint(tp="", tp_re="", fn_name="")
...
...
tools/tcplife.py
View file @
82ef5a0d
...
@@ -6,8 +6,8 @@
...
@@ -6,8 +6,8 @@
#
#
# USAGE: tcplife [-h] [-C] [-S] [-p PID] [interval [count]]
# USAGE: tcplife [-h] [-C] [-S] [-p PID] [interval [count]]
#
#
# This uses
dynamic tracing of kernel functions, and will need to be updated
# This uses
the tcp:tcp_set_state tracepoint if it exists (added to
#
to match kernel changes
.
#
Linux 4.15), else it uses kernel dynamic tracing of tcp_set_state()
.
#
#
# While throughput counters are emitted, they are fetched in a low-overhead
# While throughput counters are emitted, they are fetched in a low-overhead
# manner: reading members of the tcp_info struct on TCP close. ie, we do not
# manner: reading members of the tcp_info struct on TCP close. ie, we do not
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
# IDEA: Julia Evans
# IDEA: Julia Evans
#
#
# 18-Oct-2016 Brendan Gregg Created this.
# 18-Oct-2016 Brendan Gregg Created this.
# 29-Dec-2017 " " Added tracepoint support.
from
__future__
import
print_function
from
__future__
import
print_function
from
bcc
import
BPF
from
bcc
import
BPF
...
@@ -103,7 +104,17 @@ struct id_t {
...
@@ -103,7 +104,17 @@ struct id_t {
char task[TASK_COMM_LEN];
char task[TASK_COMM_LEN];
};
};
BPF_HASH(whoami, struct sock *, struct id_t);
BPF_HASH(whoami, struct sock *, struct id_t);
"""
#
# XXX: The following is temporary code for older kernels, Linux 4.14 and
# older. It uses kprobes to instrument tcp_set_state(). On Linux 4.15 and
# later, the tcp:tcp_set_state tracepoint should be used instead, as is
# done by the code that follows this. In the distant future (2021?), this
# kprobe code can be removed. This is why there is so much code
# duplication: to make removal easier.
#
bpf_text_kprobe
=
"""
int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
{
{
u32 pid = bpf_get_current_pid_tgid() >> 32;
u32 pid = bpf_get_current_pid_tgid() >> 32;
...
@@ -221,6 +232,131 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
...
@@ -221,6 +232,131 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
}
}
"""
"""
bpf_text_tracepoint
=
"""
TRACEPOINT_PROBE(tcp, tcp_set_state)
{
u32 pid = bpf_get_current_pid_tgid() >> 32;
// sk is mostly used as a UUID, once for skc_family, and two tcp stats:
struct sock *sk = (struct sock *)args->skaddr;
// lport is either used in a filter here, or later
u16 lport = args->sport;
FILTER_LPORT
// dport is either used in a filter here, or later
u16 dport = args->dport;
FILTER_DPORT
/*
* This tool includes PID and comm context. It's best effort, and may
* be wrong in some situations. It currently works like this:
* - record timestamp on any state < TCP_FIN_WAIT1
* - cache task context on:
* TCP_SYN_SENT: tracing from client
* TCP_LAST_ACK: client-closed from server
* - do output on TCP_CLOSE:
* fetch task context if cached, or use current task
*/
// capture birth time
if (args->newstate < TCP_FIN_WAIT1) {
/*
* Matching just ESTABLISHED may be sufficient, provided no code-path
* sets ESTABLISHED without a tcp_set_state() call. Until we know
* that for sure, match all early states to increase chances a
* timestamp is set.
* Note that this needs to be set before the PID filter later on,
* since the PID isn't reliable for these early stages, so we must
* save all timestamps and do the PID filter later when we can.
*/
u64 ts = bpf_ktime_get_ns();
birth.update(&sk, &ts);
}
// record PID & comm on SYN_SENT
if (args->newstate == TCP_SYN_SENT || args->newstate == TCP_LAST_ACK) {
// now we can PID filter, both here and a little later on for CLOSE
FILTER_PID
struct id_t me = {.pid = pid};
bpf_get_current_comm(&me.task, sizeof(me.task));
whoami.update(&sk, &me);
}
if (args->newstate != TCP_CLOSE)
return 0;
// calculate lifespan
u64 *tsp, delta_us;
tsp = birth.lookup(&sk);
if (tsp == 0) {
whoami.delete(&sk); // may not exist
return 0; // missed create
}
delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
birth.delete(&sk);
// fetch possible cached data, and filter
struct id_t *mep;
mep = whoami.lookup(&sk);
if (mep != 0)
pid = mep->pid;
FILTER_PID
// get throughput stats. see tcp_get_info().
u64 rx_b = 0, tx_b = 0, sport = 0;
struct tcp_sock *tp = (struct tcp_sock *)sk;
bpf_probe_read(&rx_b, sizeof(rx_b), &tp->bytes_received);
bpf_probe_read(&tx_b, sizeof(tx_b), &tp->bytes_acked);
u16 family = 0;
bpf_probe_read(&family, sizeof(family), &sk->__sk_common.skc_family);
if (family == AF_INET) {
struct ipv4_data_t data4 = {.span_us = delta_us,
.rx_b = rx_b, .tx_b = tx_b};
data4.ts_us = bpf_ktime_get_ns() / 1000;
bpf_probe_read(&data4.saddr, sizeof(u32), args->saddr);
bpf_probe_read(&data4.daddr, sizeof(u32), args->daddr);
// a workaround until data4 compiles with separate lport/dport
data4.ports = dport + ((0ULL + lport) << 32);
data4.pid = pid;
if (mep == 0) {
bpf_get_current_comm(&data4.task, sizeof(data4.task));
} else {
bpf_probe_read(&data4.task, sizeof(data4.task), (void *)mep->task);
}
ipv4_events.perf_submit(args, &data4, sizeof(data4));
} else /* 6 */ {
struct ipv6_data_t data6 = {.span_us = delta_us,
.rx_b = rx_b, .tx_b = tx_b};
data6.ts_us = bpf_ktime_get_ns() / 1000;
bpf_probe_read(&data6.saddr, sizeof(data6.saddr), args->saddr_v6);
bpf_probe_read(&data6.daddr, sizeof(data6.daddr), args->saddr_v6);
// a workaround until data6 compiles with separate lport/dport
data6.ports = dport + ((0ULL + lport) << 32);
data6.pid = pid;
if (mep == 0) {
bpf_get_current_comm(&data6.task, sizeof(data6.task));
} else {
bpf_probe_read(&data6.task, sizeof(data6.task), (void *)mep->task);
}
ipv6_events.perf_submit(args, &data6, sizeof(data6));
}
if (mep != 0)
whoami.delete(&sk);
return 0;
}
"""
if
(
BPF
.
tracepoint_exists
(
"tcp"
,
"tcp_set_state"
)):
bpf_text
+=
bpf_text_tracepoint
else
:
bpf_text
+=
bpf_text_kprobe
# code substitutions
# code substitutions
if
args
.
pid
:
if
args
.
pid
:
bpf_text
=
bpf_text
.
replace
(
'FILTER_PID'
,
bpf_text
=
bpf_text
.
replace
(
'FILTER_PID'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment