Commit 6f2684bf authored by Peilin Ye's avatar Peilin Ye Committed by Martin KaFai Lau

veth: Use tstats per-CPU traffic counters

Currently veth devices use the lstats per-CPU traffic counters, which only
cover TX traffic. veth_get_stats64() actually populates RX stats of a veth
device from its peer's TX counters, based on the assumption that a veth
device can _only_ receive packets from its peer, which is no longer true:

For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF
helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in
a different netns), skipping veth's peer device. Unfortunately, this kind
of traffic isn't currently accounted for in veth's RX stats.

In preparation for the fix, use tstats (instead of lstats) to maintain
both RX and TX counters for each veth device. We'll use RX counters for
bpf_redirect_peer() traffic, and keep using TX counters for the usual
"peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by
_adding_ RX count to peer's TX count, in order to cover both kinds of
traffic.

veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()")
for less confusion, but let's leave it to another patch to keep the fix
minimal.
Signed-off-by: default avatarPeilin Ye <peilin.ye@bytedance.com>
Co-developed-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarNikolay Aleksandrov <razor@blackwall.org>
Link: https://lore.kernel.org/r/20231114004220.6495-5-daniel@iogearbox.netSigned-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parent ae165827
......@@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
skb_tx_timestamp(skb);
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
if (!use_napi)
dev_lstats_add(dev, length);
dev_sw_netstats_tx_add(dev, 1, length);
else
__veth_xdp_flush(rq);
} else {
......@@ -387,14 +387,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}
static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
{
struct veth_priv *priv = netdev_priv(dev);
dev_lstats_read(dev, packets, bytes);
return atomic64_read(&priv->dropped);
}
static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
......@@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev,
struct veth_priv *priv = netdev_priv(dev);
struct net_device *peer;
struct veth_stats rx;
u64 packets, bytes;
tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
tot->tx_bytes = bytes;
tot->tx_packets = packets;
tot->tx_dropped = atomic64_read(&priv->dropped);
dev_fetch_sw_netstats(tot, dev->tstats);
veth_stats_rx(&rx, dev);
tot->tx_dropped += rx.xdp_tx_err;
tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
tot->rx_bytes = rx.xdp_bytes;
tot->rx_packets = rx.xdp_packets;
tot->rx_bytes += rx.xdp_bytes;
tot->rx_packets += rx.xdp_packets;
rcu_read_lock();
peer = rcu_dereference(priv->peer);
if (peer) {
veth_stats_tx(peer, &packets, &bytes);
tot->rx_bytes += bytes;
tot->rx_packets += packets;
struct rtnl_link_stats64 tot_peer = {};
dev_fetch_sw_netstats(&tot_peer, peer->tstats);
tot->rx_bytes += tot_peer.tx_bytes;
tot->rx_packets += tot_peer.tx_packets;
veth_stats_rx(&rx, peer);
tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
......@@ -1783,7 +1775,7 @@ static void veth_setup(struct net_device *dev)
NETIF_F_HW_VLAN_STAG_RX);
dev->needs_free_netdev = true;
dev->priv_destructor = veth_dev_free;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->max_mtu = ETH_MAX_MTU;
dev->hw_features = VETH_FEATURES;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment