Commit 0f4b6d6d authored by Kirill Smelkov's avatar Kirill Smelkov

X pinglat: A bit more robust - check out of sync events and try to count not from last interrupt

Although last point - it now gets counted from prev TX time which is
wrong to do (nint = 2 and so latencies are high):

----------------------------------------
     nint          : count     distribution
        0          : 0        |                                        |
        1          : 9        |                                        |
        2          : 23421    |****************************************|
        3          : 34       |                                        |
        4          : 7        |                                        |
     int - icmp_echo (μs) : count     distribution
         0 -> 1          : 0        |                                        |
         2 -> 3          : 0        |                                        |
         4 -> 7          : 0        |                                        |
         8 -> 15         : 0        |                                        |
        16 -> 31         : 12       |                                        |
        32 -> 63         : 26       |                                        |
        64 -> 127        : 23099    |****************************************|
       128 -> 255        : 317      |                                        |
       256 -> 511        : 5        |                                        |
       512 -> 1023       : 6        |                                        |
      1024 -> 2047       : 1        |                                        |
      2048 -> 4095       : 1        |                                        |
      4096 -> 8191       : 2        |                                        |
     icmp_echo - tx (μs) : count     distribution
         0 -> 1          : 23125    |****************************************|
         2 -> 3          : 266      |                                        |
         4 -> 7          : 66       |                                        |
         8 -> 15         : 3        |                                        |
        16 -> 31         : 9        |                                        |
     int - tx (μs)      : count     distribution
         0 -> 1          : 0        |                                        |
         2 -> 3          : 0        |                                        |
         4 -> 7          : 0        |                                        |
         8 -> 15         : 0        |                                        |
        16 -> 31         : 8        |                                        |
        32 -> 63         : 30       |                                        |
        64 -> 127        : 23084    |****************************************|
       128 -> 255        : 330      |                                        |
       256 -> 511        : 5        |                                        |
       512 -> 1023       : 6        |                                        |
      1024 -> 2047       : 1        |                                        |
      2048 -> 4095       : 1        |                                        |
      4096 -> 8191       : 2        |                                        |
----------------------------------------
parent c7310282
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pinglat - measure local and remote ICMP ECHO software processing latency
# pinglat - estimate upper bound for local and remote ICMP ECHO software processing latency
from bcc import BPF
from time import sleep
......@@ -14,15 +14,36 @@ prog = r"""
#include <linux/interrupt.h>
#include <linux/icmp.h>
BPF_ARRAY(tlastint, u64, 1); // t of last interrupt on iface
BPF_ARRAY(tlastecho, u64, 1); // t of last icmp_echo
// event represents 1 INTR->icmp_echo->icmp_reply event
struct event {
u64 tint; // t of interrupt on iface
int nint; // # of interrupt on iface before it got to icmp_echo
// ( potentially there can be several interrupts before we get to icmp_echo.
// we take conservative approach and start counting time from the first )
u64 techo; // t when it got to icmp_echo
// u64 techo_reply; // t when icmp_echo finished (it calls icmp_reply inside)
};
BPF_ARRAY(event, struct event, 1); // current in-flight event
// BPF_ARRAY(tlastint, u64, 1); // t of last interrupt on iface
// BPF_ARRAY(tlastecho, u64, 1); // t of last icmp_echo
BPF_HISTOGRAM(dist_dt_int_echo); // dt int - icmp_echo
BPF_HISTOGRAM(dist_dt_echo_tx); // dt icmp_echo - tx
BPF_HISTOGRAM(dist_dt_int_tx); // dt int - tx
BPF_HISTOGRAM(dist_nint);
// for determining call stack in net_dev_xmit; 1024 - not 1 - because calls can be done concurrently
BPF_STACK_TRACE(traceback, 1024);
// // for determining call stack in net_dev_xmit; 1024 - not 1 - because calls can be done concurrently
// BPF_STACK_TRACE(traceback, 1024);
// # out-of-sync events
// 1 - missed interrupt
// 2 - second icmp_echo without processing for first completed
// 3 - icmp_echo ret without interrupt
// 4 - icmp_echo ret without icmp_echo
BPF_HISTOGRAM(outofsync);
// remember t(last-interrupt) on interface
int kprobe__handle_irq_event_percpu(struct pt_regs *ctx, struct irq_desc *desc) {
......@@ -35,10 +56,18 @@ for i, c in enumerate(ifname):
prog += " bpf_probe_read(&c, 1, &irqname[%d]); if (c != '%s') return 0;\n" % (i, c)
prog += r"""
int z=0; u64 z64=0;
u64 ts = bpf_ktime_get_ns();
*tlastint.lookup_or_init(&z, &z64) = ts;
int z=0; struct event zev = {};
struct event *ev = event.lookup_or_init(&z, &zev);
if (ev->tint != 0) {
// interrupt came but icmp_echo not yet handled
ev->nint++;
return 0;
}
ev->tint = ts;
ev->nint = 1;
//bpf_trace_printk("interrupt: %s\n", irqname);
return 0;
......@@ -47,19 +76,32 @@ prog += r"""
// remember t(icmp_echo) - when we received ICMP ECHO
int kprobe__icmp_echo(struct pt_regs *ctx, struct sk_buff *skb) {
int z=0; u64 z64=0;
u64 ts = bpf_ktime_get_ns();
*tlastecho.lookup_or_init(&z, &z64) = ts;
int z=0; struct event zev = {};
struct event *ev = event.lookup_or_init(&z, &zev);
if (ev->tint == 0) {
// missed interrupt
outofsync.increment(1);
return 0;
}
if (ev->techo != 0) {
// second icmp_echo without previous processed
outofsync.increment(2);
return 0;
}
ev->techo = ts;
// update t(int) - t(icmp_echo) along the way
struct icmphdr h;
bpf_probe_read(&h, sizeof(h), skb->head + skb->transport_header); // = icmp_hdr(skb)
//struct icmphdr h;
//bpf_probe_read(&h, sizeof(h), skb->head + skb->transport_header); // = icmp_hdr(skb)
u64 tint = *tlastint.lookup_or_init(&z, &z64);
//u64 tint = *tlastint.lookup_or_init(&z, &z64);
u64 dt = ts - tint;
dist_dt_int_echo.increment(bpf_log2l(dt / (u64)(1E3)));
//u64 dt = ts - tint;
//dist_dt_int_echo.increment(bpf_log2l(dt / (u64)(1E3)));
//bpf_trace_printk("ping id: %d seq: %d dint: %dns\n", h.un.echo.id, h.un.echo.sequence, ts - tint);
return 0;
......@@ -69,17 +111,36 @@ int kprobe__icmp_echo(struct pt_regs *ctx, struct sk_buff *skb) {
//TRACEPOINT_PROBE(net, net_dev_xmit) {
// const char *devname = (void *)args + (args->data_loc_name & 0xffff);
int kretprobe__icmp_echo(struct pt_regs *ctx) {
int z=0; u64 z64=0;
u64 ts = bpf_ktime_get_ns();
u64 dtint = ts - *tlastint.lookup_or_init(&z, &z64);
u64 dtecho = ts - *tlastecho.lookup_or_init(&z, &z64);
dist_dt_echo_tx.increment(bpf_log2l(dtecho / (u64)(1E3)));
dist_dt_int_tx .increment(bpf_log2l(dtint / (u64)(1E3)));
//bpf_trace_printk("net tx from under icmp_echo\n");
return 0;
u64 ts = bpf_ktime_get_ns();
int z=0; struct event zev = {};
struct event *ev = event.lookup_or_init(&z, &zev);
if (ev->tint == 0) {
// icmp_echo ret without interrupt
outofsync.increment(3);
return 0;
}
if (ev->techo == 0) {
// icmp_echo ret without icmp_echo
outofsync.increment(4);
return 0;
}
u64 dt_int_echo = ev->techo - ev->tint;
u64 dt_int_tx = ts - ev->tint;
u64 dt_echo_tx = ts - ev->techo;
int nint = ev->nint;
*ev = zev;
dist_dt_int_echo .increment(bpf_log2l(dt_int_echo / (u64)(1E3)));
dist_dt_echo_tx .increment(bpf_log2l(dt_echo_tx / (u64)(1E3)));
dist_dt_int_tx .increment(bpf_log2l(dt_int_tx / (u64)(1E3)));
dist_nint .increment(nint);
//bpf_trace_printk("net tx from under icmp_echo\n");
return 0;
}
"""
......@@ -138,7 +199,7 @@ int kretprobe__icmp_echo(struct pt_regs *ctx) {
prog = prog.replace("IFNAME", ifname)
#prog = prog.replace("ICMP_ECHO_IP", "0x%x" % icmp_echo_ip)
print prog
#print prog
b = BPF(text=prog)
#b.trace_print()
......@@ -146,6 +207,12 @@ b = BPF(text=prog)
while 1:
sleep(3)
print '-'*40
b["outofsync"].print_linear_hist("outofsync")
b["outofsync"].clear()
b["dist_nint"].print_linear_hist("nint")
b["dist_nint"].clear()
b["dist_dt_int_echo"].print_log2_hist("int - icmp_echo (μs)")
b["dist_dt_int_echo"].clear()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment