Commit d1b15102 authored by Vladimir Oltean's avatar Vladimir Oltean Committed by David S. Miller

net: enetc: add support for XDP_DROP and XDP_PASS

For the RX ring, enetc uses an allocation scheme based on pages split
into two buffers, which is already very efficient in terms of preventing
reallocations / maximizing reuse, so I see no reason why I would change
that.

 +--------+--------+--------+--------+--------+--------+--------+
 |        |        |        |        |        |        |        |
 | half B | half B | half B | half B | half B | half B | half B |
 |        |        |        |        |        |        |        |
 +--------+--------+--------+--------+--------+--------+--------+
 |        |        |        |        |        |        |        |
 | half A | half A | half A | half A | half A | half A | half A | RX ring
 |        |        |        |        |        |        |        |
 +--------+--------+--------+--------+--------+--------+--------+
     ^                                                     ^
     |                                                     |
 next_to_clean                                       next_to_alloc
                                                      next_to_use

                   +--------+--------+--------+--------+--------+
                   |        |        |        |        |        |
                   | half B | half B | half B | half B | half B |
                   |        |        |        |        |        |
 +--------+--------+--------+--------+--------+--------+--------+
 |        |        |        |        |        |        |        |
 | half B | half B | half A | half A | half A | half A | half A | RX ring
 |        |        |        |        |        |        |        |
 +--------+--------+--------+--------+--------+--------+--------+
 |        |        |   ^                                   ^
 | half A | half A |   |                                   |
 |        |        | next_to_clean                   next_to_use
 +--------+--------+
              ^
              |
         next_to_alloc

then when enetc_refill_rx_ring is called, whose purpose is to advance
next_to_use, it sees that it can take buffers up to next_to_alloc, and
it says "oh, hey, rx_swbd->page isn't NULL, I don't need to allocate
one!".

The only problem is that for default PAGE_SIZE values of 4096, buffer
sizes are 2048 bytes. While this is enough for normal skb allocations at
an MTU of 1500 bytes, for XDP it isn't, because the XDP headroom is 256
bytes, and including skb_shared_info and alignment, we end up being able
to make use of only 1472 bytes, which is insufficient for the default
MTU.

To solve that problem, we implement scatter/gather processing in the
driver, because we would really like to keep the existing allocation
scheme. A packet of 1500 bytes is received in a buffer of 1472 bytes and
another one of 28 bytes.

Because the headroom required by XDP is different (and much larger) than
the one required by the network stack, whenever a BPF program is added
or deleted on the port, we drain the existing RX buffers and seed new
ones with the required headroom. We also keep the required headroom in
rx_ring->buffer_offset.

The simplest way to implement XDP_PASS, where an skb must be created, is
to create an xdp_buff based on the next_to_clean RX BDs, but not clear
those BDs from the RX ring yet, just keep the original index at which
the BDs for this frame started. Then, if the verdict is XDP_PASS,
instead of converting the xdb_buff to an skb, we replay a call to
enetc_build_skb (just as in the normal enetc_clean_rx_ring case),
starting from the original BD index.

We would also like to be minimally invasive to the regular RX data path,
and not check whether there is a BPF program attached to the ring on
every packet. So we create a separate RX ring processing function for
XDP.

Because we only install/remove the BPF program while the interface is
down, we forgo the rcu_read_lock() in enetc_clean_rx_ring, since there
shouldn't be any circumstance in which we are processing packets and
there is a potentially freed BPF program attached to the RX ring.
Signed-off-by: default avatarVladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 65d0cbb4
...@@ -33,6 +33,8 @@ struct enetc_tx_swbd { ...@@ -33,6 +33,8 @@ struct enetc_tx_swbd {
#define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */ #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */
#define ENETC_RXB_DMA_SIZE \ #define ENETC_RXB_DMA_SIZE \
(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD) (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD)
#define ENETC_RXB_DMA_SIZE_XDP \
(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM)
struct enetc_rx_swbd { struct enetc_rx_swbd {
dma_addr_t dma; dma_addr_t dma;
...@@ -44,6 +46,12 @@ struct enetc_ring_stats { ...@@ -44,6 +46,12 @@ struct enetc_ring_stats {
unsigned int packets; unsigned int packets;
unsigned int bytes; unsigned int bytes;
unsigned int rx_alloc_errs; unsigned int rx_alloc_errs;
unsigned int xdp_drops;
};
struct enetc_xdp_data {
struct xdp_rxq_info rxq;
struct bpf_prog *prog;
}; };
#define ENETC_RX_RING_DEFAULT_SIZE 512 #define ENETC_RX_RING_DEFAULT_SIZE 512
...@@ -72,6 +80,9 @@ struct enetc_bdr { ...@@ -72,6 +80,9 @@ struct enetc_bdr {
}; };
void __iomem *idr; /* Interrupt Detect Register pointer */ void __iomem *idr; /* Interrupt Detect Register pointer */
int buffer_offset;
struct enetc_xdp_data xdp;
struct enetc_ring_stats stats; struct enetc_ring_stats stats;
dma_addr_t bd_dma_base; dma_addr_t bd_dma_base;
...@@ -276,6 +287,8 @@ struct enetc_ndev_priv { ...@@ -276,6 +287,8 @@ struct enetc_ndev_priv {
struct phylink *phylink; struct phylink *phylink;
int ic_mode; int ic_mode;
u32 tx_ictt; u32 tx_ictt;
struct bpf_prog *xdp_prog;
}; };
/* Messaging */ /* Messaging */
...@@ -315,6 +328,7 @@ int enetc_set_features(struct net_device *ndev, ...@@ -315,6 +328,7 @@ int enetc_set_features(struct net_device *ndev,
int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd); int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
void *type_data); void *type_data);
int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp);
/* ethtool */ /* ethtool */
void enetc_set_ethtool_ops(struct net_device *ndev); void enetc_set_ethtool_ops(struct net_device *ndev);
......
...@@ -192,6 +192,7 @@ static const struct { ...@@ -192,6 +192,7 @@ static const struct {
static const char rx_ring_stats[][ETH_GSTRING_LEN] = { static const char rx_ring_stats[][ETH_GSTRING_LEN] = {
"Rx ring %2d frames", "Rx ring %2d frames",
"Rx ring %2d alloc errors", "Rx ring %2d alloc errors",
"Rx ring %2d XDP drops",
}; };
static const char tx_ring_stats[][ETH_GSTRING_LEN] = { static const char tx_ring_stats[][ETH_GSTRING_LEN] = {
...@@ -273,6 +274,7 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, ...@@ -273,6 +274,7 @@ static void enetc_get_ethtool_stats(struct net_device *ndev,
for (i = 0; i < priv->num_rx_rings; i++) { for (i = 0; i < priv->num_rx_rings; i++) {
data[o++] = priv->rx_ring[i]->stats.packets; data[o++] = priv->rx_ring[i]->stats.packets;
data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs; data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs;
data[o++] = priv->rx_ring[i]->stats.xdp_drops;
} }
if (!enetc_si_is_pf(priv->si)) if (!enetc_si_is_pf(priv->si))
......
...@@ -707,6 +707,7 @@ static const struct net_device_ops enetc_ndev_ops = { ...@@ -707,6 +707,7 @@ static const struct net_device_ops enetc_ndev_ops = {
.ndo_set_features = enetc_pf_set_features, .ndo_set_features = enetc_pf_set_features,
.ndo_do_ioctl = enetc_ioctl, .ndo_do_ioctl = enetc_ioctl,
.ndo_setup_tc = enetc_setup_tc, .ndo_setup_tc = enetc_setup_tc,
.ndo_bpf = enetc_setup_bpf,
}; };
static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment