Commit c7310282 authored by Kirill Smelkov's avatar Kirill Smelkov

X pinglat: Getting access to own traceback is forbidden by BPF

Switch to estimate end of net tx by icmp_echo kretprobe (icmp_echo calls
icmp_reply which in turn cals ... -> net_dev_start_xmit).
parent 3ea54b47
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# pinglat - measure local and remote ICMP ECHO processing latency # pinglat - measure local and remote ICMP ECHO software processing latency
from bcc import BPF from bcc import BPF
from time import sleep from time import sleep
# name of interface we are tracing on # name of interface we are tracing on
ifname="eth0" ifname="eth0"
icmp_echo_ip = BPF.ksymname("icmp_echo") #icmp_echo_ip = BPF.ksymname("icmp_echo")
assert icmp_echo_ip != -1 #assert icmp_echo_ip != -1
prog = r""" prog = r"""
#include <linux/interrupt.h> #include <linux/interrupt.h>
...@@ -66,64 +66,77 @@ int kprobe__icmp_echo(struct pt_regs *ctx, struct sk_buff *skb) { ...@@ -66,64 +66,77 @@ int kprobe__icmp_echo(struct pt_regs *ctx, struct sk_buff *skb) {
} }
// remember t(reply) // remember t(reply)
TRACEPOINT_PROBE(net, net_dev_xmit) { //TRACEPOINT_PROBE(net, net_dev_xmit) {
const char *devname = (void *)args + (args->data_loc_name & 0xffff); // const char *devname = (void *)args + (args->data_loc_name & 0xffff);
""" int kretprobe__icmp_echo(struct pt_regs *ctx) {
# devname != ifname -> return
prog += " char c;\n"
for i, c in enumerate(ifname):
prog += " bpf_probe_read(&c, 1, &devname[%d]); if (c != '%s') return 0;\n" % (i, c)
prog += r"""
// make sure tx is called from under icmp_echo - if not - ignore
// 0 0 swapper/1 net_dev_xmit
// dev_hard_start_xmit+0x123 [kernel]
// dev_hard_start_xmit+0x123 [kernel]
// sch_direct_xmit+0xf1 [kernel]
// __dev_queue_xmit+0x45a [kernel]
// ip_finish_output2+0x2a8 [kernel]
// ip_output+0x72 [kernel]
// ip_output+0x72 [kernel]
// ip_send_skb+0x15 [kernel]
// icmp_reply.constprop.25+0x24f [kernel]
// icmp_echo.part.23+0x5e [kernel]
// skb_checksum+0x32 [kernel]
// csum_partial_ext+0x0 [kernel]
// csum_block_add_ext+0x0 [kernel]
// __skb_checksum_complete+0x1c [kernel]
// icmp_echo+0x27 [kernel] <--
// icmp_rcv+0x26f [kernel]
int stkid = traceback.get_stackid(args, 14 | BPF_F_REUSE_STACKID); // XXX recheck 14 skip, fragile
if (stkid < 0) {
return 0; // error
}
struct bpf_stacktrace *tb;
tb = traceback.lookup(&stkid);
if (!(ICMP_ECHO_IP <= tb->ip[0] && tb->ip[0] < ICMP_ECHO_IP + 0x40)) { // XXX fragile
return 0; // called not from under icmp_echo
}
bpf_trace_printk("net tx from under icmp_echo\n");
return 0;
int z=0; u64 z64=0; int z=0; u64 z64=0;
u64 ts = bpf_ktime_get_ns(); u64 ts = bpf_ktime_get_ns();
u64 dtint = *tlastint.lookup_or_init(&z, &z64); u64 dtint = ts - *tlastint.lookup_or_init(&z, &z64);
u64 dtecho = *tlastecho.lookup_or_init(&z, &z64); u64 dtecho = ts - *tlastecho.lookup_or_init(&z, &z64);
dist_dt_echo_tx.increment(bpf_log2l(dtecho / (u64)(1E3))); dist_dt_echo_tx.increment(bpf_log2l(dtecho / (u64)(1E3)));
dist_dt_int_tx .increment(bpf_log2l(dtint / (u64)(1E3))); dist_dt_int_tx .increment(bpf_log2l(dtint / (u64)(1E3)));
//bpf_trace_printk("net tx from under icmp_echo\n");
return 0; return 0;
} }
""" """
# # devname != ifname -> return
# prog += " char c;\n"
# for i, c in enumerate(ifname):
# prog += " bpf_probe_read(&c, 1, &devname[%d]); if (c != '%s') return 0;\n" % (i, c)
#
#
# prog += r"""
# // make sure tx is called from under icmp_echo - if not - ignore
# // 0 0 swapper/1 net_dev_xmit
# // dev_hard_start_xmit+0x123 [kernel]
# // dev_hard_start_xmit+0x123 [kernel]
# // sch_direct_xmit+0xf1 [kernel]
# // __dev_queue_xmit+0x45a [kernel]
# // ip_finish_output2+0x2a8 [kernel]
# // ip_output+0x72 [kernel]
# // ip_output+0x72 [kernel]
# // ip_send_skb+0x15 [kernel]
# // icmp_reply.constprop.25+0x24f [kernel]
# // icmp_echo.part.23+0x5e [kernel]
# // skb_checksum+0x32 [kernel]
# // csum_partial_ext+0x0 [kernel]
# // csum_block_add_ext+0x0 [kernel]
# // __skb_checksum_complete+0x1c [kernel]
# // icmp_echo+0x27 [kernel] <--
# // icmp_rcv+0x26f [kernel]
# int stkid = traceback.get_stackid(args, 14 | BPF_F_REUSE_STACKID); // XXX recheck 14 skip, fragile
# if (stkid < 0) {
# return 0; // error
# }
#
# struct bpf_stacktrace *tb;
# tb = traceback.lookup(&stkid);
# if (!(ICMP_ECHO_IP <= tb->ip[0] && tb->ip[0] < ICMP_ECHO_IP + 0x40)) { // XXX fragile
# return 0; // called not from under icmp_echo
# }
#
# bpf_trace_printk("net tx from under icmp_echo\n");
# return 0;
#
#
# int z=0; u64 z64=0;
# u64 ts = bpf_ktime_get_ns();
#
# u64 dtint = ts - *tlastint.lookup_or_init(&z, &z64);
# u64 dtecho = ts - *tlastecho.lookup_or_init(&z, &z64);
#
# dist_dt_echo_tx.increment(bpf_log2l(dtecho / (u64)(1E3)));
# dist_dt_int_tx .increment(bpf_log2l(dtint / (u64)(1E3)));
#
# return 0;
# }
# """
prog = prog.replace("IFNAME", ifname) prog = prog.replace("IFNAME", ifname)
prog = prog.replace("ICMP_ECHO_IP", "0x%x" % icmp_echo_ip) #prog = prog.replace("ICMP_ECHO_IP", "0x%x" % icmp_echo_ip)
print prog print prog
...@@ -132,6 +145,7 @@ b = BPF(text=prog) ...@@ -132,6 +145,7 @@ b = BPF(text=prog)
while 1: while 1:
sleep(3) sleep(3)
print '-'*40
b["dist_dt_int_echo"].print_log2_hist("int - icmp_echo (μs)") b["dist_dt_int_echo"].print_log2_hist("int - icmp_echo (μs)")
b["dist_dt_int_echo"].clear() b["dist_dt_int_echo"].clear()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment