Commit 141b6b2a authored by Cong Wang's avatar Cong Wang Committed by David S. Miller

net: add a generic tracepoint for TX queue timeout

Although devlink health report does a nice job on reporting TX
timeout and other NIC errors, unfortunately it requires drivers
to support it but currently only mlx5 has implemented it.
Before other drivers could catch up, it is useful to have a
generic tracepoint to monitor this kind of TX timeout. We have
been suffering TX timeout with different drivers, we plan to
start to monitor it with rasdaemon which just needs a new tracepoint.

Sample output:

  ksoftirqd/1-16    [001] ..s2   144.043173: net_dev_xmit_timeout: dev=ens3 driver=e1000 queue=0

Cc: Eran Ben Elisha <eranbe@mellanox.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: default avatarCong Wang <xiyou.wangcong@gmail.com>
Acked-by: default avatarJiri Pirko <jiri@mellanox.com>
Reviewed-by: default avatarEran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f3f050a4
...@@ -95,6 +95,29 @@ TRACE_EVENT(net_dev_xmit, ...@@ -95,6 +95,29 @@ TRACE_EVENT(net_dev_xmit,
__get_str(name), __entry->skbaddr, __entry->len, __entry->rc) __get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
); );
TRACE_EVENT(net_dev_xmit_timeout,
TP_PROTO(struct net_device *dev,
int queue_index),
TP_ARGS(dev, queue_index),
TP_STRUCT__entry(
__string( name, dev->name )
__string( driver, netdev_drivername(dev))
__field( int, queue_index )
),
TP_fast_assign(
__assign_str(name, dev->name);
__assign_str(driver, netdev_drivername(dev));
__entry->queue_index = queue_index;
),
TP_printk("dev=%s driver=%s queue=%d",
__get_str(name), __get_str(driver), __entry->queue_index)
);
DECLARE_EVENT_CLASS(net_dev_template, DECLARE_EVENT_CLASS(net_dev_template,
TP_PROTO(struct sk_buff *skb), TP_PROTO(struct sk_buff *skb),
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
#include <net/dst.h> #include <net/dst.h>
#include <trace/events/qdisc.h> #include <trace/events/qdisc.h>
#include <trace/events/net.h>
#include <net/xfrm.h> #include <net/xfrm.h>
/* Qdisc to use by default */ /* Qdisc to use by default */
...@@ -441,6 +442,7 @@ static void dev_watchdog(struct timer_list *t) ...@@ -441,6 +442,7 @@ static void dev_watchdog(struct timer_list *t)
} }
if (some_queue_timedout) { if (some_queue_timedout) {
trace_net_dev_xmit_timeout(dev, i);
WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
dev->name, netdev_drivername(dev), i); dev->name, netdev_drivername(dev), i);
dev->netdev_ops->ndo_tx_timeout(dev); dev->netdev_ops->ndo_tx_timeout(dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment