net: enetc: add support for XDP_DROP and XDP_PASS

For the RX ring, enetc uses an allocation scheme based on pages split into two buffers, which is already very efficient in terms of preventing reallocations / maximizing reuse, so I see no reason why I would change that. +--------+--------+--------+--------+--------+--------+--------+ | | | | | | | | | half B | half B | half B | half B | half B | half B | half B | | | | | | | | | +--------+--------+--------+--------+--------+--------+--------+ | | | | | | | | | half A | half A | half A | half A | half A | half A | half A | RX ring | | | | | | | | +--------+--------+--------+--------+--------+--------+--------+ ^ ^ | | next_to_clean next_to_alloc next_to_use +--------+--------+--------+--------+--------+ | | | | | | | half B | half B | half B | half B | half B | | | | | | | +--------+--------+--------+--------+--------+--------+--------+ | | | | | | | | | half B | half B | half A | half A | half A | half A | half A | RX ring | | | | | | | | +--------+--------+--------+--------+--------+--------+--------+ | | | ^ ^ | half A | half A | | | | | | next_to_clean next_to_use +--------+--------+ ^ | next_to_alloc then when enetc_refill_rx_ring is called, whose purpose is to advance next_to_use, it sees that it can take buffers up to next_to_alloc, and it says "oh, hey, rx_swbd->page isn't NULL, I don't need to allocate one!". The only problem is that for default PAGE_SIZE values of 4096, buffer sizes are 2048 bytes. While this is enough for normal skb allocations at an MTU of 1500 bytes, for XDP it isn't, because the XDP headroom is 256 bytes, and including skb_shared_info and alignment, we end up being able to make use of only 1472 bytes, which is insufficient for the default MTU. To solve that problem, we implement scatter/gather processing in the driver, because we would really like to keep the existing allocation scheme. A packet of 1500 bytes is received in a buffer of 1472 bytes and another one of 28 bytes. Because the headroom required by XDP is different (and much larger) than the one required by the network stack, whenever a BPF program is added or deleted on the port, we drain the existing RX buffers and seed new ones with the required headroom. We also keep the required headroom in rx_ring->buffer_offset. The simplest way to implement XDP_PASS, where an skb must be created, is to create an xdp_buff based on the next_to_clean RX BDs, but not clear those BDs from the RX ring yet, just keep the original index at which the BDs for this frame started. Then, if the verdict is XDP_PASS, instead of converting the xdb_buff to an skb, we replay a call to enetc_build_skb (just as in the normal enetc_clean_rx_ring case), starting from the original BD index. We would also like to be minimally invasive to the regular RX data path, and not check whether there is a BPF program attached to the ring on every packet. So we create a separate RX ring processing function for XDP. Because we only install/remove the BPF program while the interface is down, we forgo the rcu_read_lock() in enetc_clean_rx_ring, since there shouldn't be any circumstance in which we are processing packets and there is a potentially freed BPF program attached to the RX ring. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>

net: enetc: add support for XDP_DROP and XDP_PASS
For the RX ring, enetc uses an allocation scheme based on pages split into two buffers, which is already very efficient in terms of preventing reallocations / maximizing reuse, so I see no reason why I would change that. +--------+--------+--------+--------+--------+--------+--------+ | | | | | | | | | half B | half B | half B | half B | half B | half B | half B | | | | | | | | | +--------+--------+--------+--------+--------+--------+--------+ | | | | | | | | | half A | half A | half A | half A | half A | half A | half A | RX ring | | | | | | | | +--------+--------+--------+--------+--------+--------+--------+ ^ ^ | | next_to_clean next_to_alloc next_to_use +--------+--------+--------+--------+--------+ | | | | | | | half B | half B | half B | half B | half B | | | | | | | +--------+--------+--------+--------+--------+--------+--------+ | | | | | | | | | half B | half B | half A | half A | half A | half A | half A | RX ring | | | | | | | | +--------+--------+--------+--------+--------+--------+--------+ | | | ^ ^ | half A | half A | | | | | | next_to_clean next_to_use +--------+--------+ ^ | next_to_alloc then when enetc_refill_rx_ring is called, whose purpose is to advance next_to_use, it sees that it can take buffers up to next_to_alloc, and it says "oh, hey, rx_swbd->page isn't NULL, I don't need to allocate one!". The only problem is that for default PAGE_SIZE values of 4096, buffer sizes are 2048 bytes. While this is enough for normal skb allocations at an MTU of 1500 bytes, for XDP it isn't, because the XDP headroom is 256 bytes, and including skb_shared_info and alignment, we end up being able to make use of only 1472 bytes, which is insufficient for the default MTU. To solve that problem, we implement scatter/gather processing in the driver, because we would really like to keep the existing allocation scheme. A packet of 1500 bytes is received in a buffer of 1472 bytes and another one of 28 bytes. Because the headroom required by XDP is different (and much larger) than the one required by the network stack, whenever a BPF program is added or deleted on the port, we drain the existing RX buffers and seed new ones with the required headroom. We also keep the required headroom in rx_ring->buffer_offset. The simplest way to implement XDP_PASS, where an skb must be created, is to create an xdp_buff based on the next_to_clean RX BDs, but not clear those BDs from the RX ring yet, just keep the original index at which the BDs for this frame started. Then, if the verdict is XDP_PASS, instead of converting the xdb_buff to an skb, we replay a call to enetc_build_skb (just as in the normal enetc_clean_rx_ring case), starting from the original BD index. We would also like to be minimally invasive to the regular RX data path, and not check whether there is a BPF program attached to the ring on every packet. So we create a separate RX ring processing function for XDP. Because we only install/remove the BPF program while the interface is down, we forgo the rcu_read_lock() in enetc_clean_rx_ring, since there shouldn't be any circumstance in which we are processing packets and there is a potentially freed BPF program attached to the RX ring. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
d1b15102 · Vladimir Oltean · David S. Miller · 65d0cbb4 · d1b15102 · d1b15102
Commit d1b15102 authored Mar 31, 2021 by Vladimir Oltean Committed by David S. Miller Mar 31, 2021
4 changed files
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -33,6 +33,8 @@ struct enetc_tx_swbd {
 #define ENETC_RXB_PAD		NET_SKB_PAD /* add extra space if needed */
 #define ENETC_RXB_DMA_SIZE	\
 	(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD)
+#define ENETC_RXB_DMA_SIZE_XDP	\
+	(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM)
 struct enetc_rx_swbd {
 	dma_addr_t dma;
@@ -44,6 +46,12 @@ struct enetc_ring_stats {
 	unsigned int packets;
 	unsigned int bytes;
 	unsigned int rx_alloc_errs;
+	unsigned int xdp_drops;
+};
+struct enetc_xdp_data {
+	struct xdp_rxq_info rxq;
+	struct bpf_prog *prog;
 };
 #define ENETC_RX_RING_DEFAULT_SIZE	512
@@ -72,6 +80,9 @@ struct enetc_bdr {
 	};
 	void __iomem *idr; /* Interrupt Detect Register pointer */
+	int buffer_offset;
+	struct enetc_xdp_data xdp;
 	struct enetc_ring_stats stats;
 	dma_addr_t bd_dma_base;
@@ -276,6 +287,8 @@ struct enetc_ndev_priv {
 	struct phylink *phylink;
 	int ic_mode;
 	u32 tx_ictt;
+	struct bpf_prog *xdp_prog;
 };
 /* Messaging */
@@ -315,6 +328,7 @@ int enetc_set_features(struct net_device *ndev,
 int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
 int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 		   void *type_data);
+int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp);
 /* ethtool */
 void enetc_set_ethtool_ops(struct net_device *ndev);

--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -192,6 +192,7 @@ static const struct {
 static const char rx_ring_stats[][ETH_GSTRING_LEN] = {
 	"Rx ring %2d frames",
 	"Rx ring %2d alloc errors",
+	"Rx ring %2d XDP drops",
 };
 static const char tx_ring_stats[][ETH_GSTRING_LEN] = {
@@ -273,6 +274,7 @@ static void enetc_get_ethtool_stats(struct net_device *ndev,
 	for (i = 0; i < priv->num_rx_rings; i++) {
 		data[o++] = priv->rx_ring[i]->stats.packets;
 		data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs;
+		data[o++] = priv->rx_ring[i]->stats.xdp_drops;
 	}
 	if (!enetc_si_is_pf(priv->si))

--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -707,6 +707,7 @@ static const struct net_device_ops enetc_ndev_ops = {
 	.ndo_set_features	= enetc_pf_set_features,
 	.ndo_do_ioctl		= enetc_ioctl,
 	.ndo_setup_tc		= enetc_setup_tc,
+	.ndo_bpf		= enetc_setup_bpf,
 };
 static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,