Commit 3ac77ecd authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-xdp-execute-xdp_do_flush-before-napi_complete_done'

Magnus Karlsson says:

====================
net: xdp: execute xdp_do_flush() before napi_complete_done()

Make sure that xdp_do_flush() is always executed before
napi_complete_done(). This is important for two reasons. First, a
redirect to an XSKMAP assumes that a call to xdp_do_redirect() from
napi context X on CPU Y will be followed by a xdp_do_flush() from the
same napi context and CPU. This is not guaranteed if the
napi_complete_done() is executed before xdp_do_flush(), as it tells
the napi logic that it is fine to schedule napi context X on another
CPU. Details from a production system triggering this bug using the
veth driver can be found in [1].

The second reason is that the XDP_REDIRECT logic in itself relies on
being inside a single NAPI instance through to the xdp_do_flush() call
for RCU protection of all in-kernel data structures. Details can be
found in [2].

The drivers have only been compile-tested since I do not own any of
the HW below. So if you are a maintainer, it would be great if you
could take a quick look to make sure I did not mess something up.

Note that these were the drivers I found that violated the ordering by
running a simple script and manually checking the ones that came up as
potential offenders. But the script was not perfect in any way. There
might still be offenders out there, since the script can generate
false negatives.

[1] https://lore.kernel.org/r/20221220185903.1105011-1-sbohrer@cloudflare.com
[2] https://lore.kernel.org/all/20210624160609.292325-1-toke@redhat.com/
====================

Link: https://lore.kernel.org/r/20230125074901.2737-1-magnus.karlsson@gmail.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 28b4387f a3191c4d
......@@ -2410,6 +2410,9 @@ static int dpaa_eth_poll(struct napi_struct *napi, int budget)
cleaned = qman_p_poll_dqrr(np->p, budget);
if (np->xdp_act & XDP_REDIRECT)
xdp_do_flush();
if (cleaned < budget) {
napi_complete_done(napi, cleaned);
qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
......@@ -2417,9 +2420,6 @@ static int dpaa_eth_poll(struct napi_struct *napi, int budget)
qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
}
if (np->xdp_act & XDP_REDIRECT)
xdp_do_flush();
return cleaned;
}
......
......@@ -1993,10 +1993,15 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
if (rx_cleaned >= budget ||
txconf_cleaned >= DPAA2_ETH_TXCONF_PER_NAPI) {
work_done = budget;
if (ch->xdp.res & XDP_REDIRECT)
xdp_do_flush();
goto out;
}
} while (store_cleaned);
if (ch->xdp.res & XDP_REDIRECT)
xdp_do_flush();
/* Update NET DIM with the values for this CDAN */
dpaa2_io_update_net_dim(ch->dpio, ch->stats.frames_per_cdan,
ch->stats.bytes_per_cdan);
......@@ -2032,9 +2037,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
txc_fq->dq_bytes = 0;
}
if (ch->xdp.res & XDP_REDIRECT)
xdp_do_flush_map();
else if (rx_cleaned && ch->xdp.res & XDP_TX)
if (rx_cleaned && ch->xdp.res & XDP_TX)
dpaa2_eth_xdp_tx_flush(priv, ch, &priv->fq[flowid]);
return work_done;
......
......@@ -608,12 +608,12 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
lan966x_fdma_rx_reload(rx);
}
if (counter < weight && napi_complete_done(napi, counter))
lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA);
if (redirect)
xdp_do_flush();
if (counter < weight && napi_complete_done(napi, counter))
lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA);
return counter;
}
......
......@@ -1438,6 +1438,10 @@ int qede_poll(struct napi_struct *napi, int budget)
rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) &&
qede_has_rx_work(fp->rxq)) ?
qede_rx_int(fp, budget) : 0;
if (fp->xdp_xmit & QEDE_XDP_REDIRECT)
xdp_do_flush();
/* Handle case where we are called by netpoll with a budget of 0 */
if (rx_work_done < budget || !budget) {
if (!qede_poll_is_more_work(fp)) {
......@@ -1457,9 +1461,6 @@ int qede_poll(struct napi_struct *napi, int budget)
qede_update_tx_producer(fp->xdp_tx);
}
if (fp->xdp_xmit & QEDE_XDP_REDIRECT)
xdp_do_flush_map();
return rx_work_done;
}
......
......@@ -1677,13 +1677,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
received = virtnet_receive(rq, budget, &xdp_xmit);
if (xdp_xmit & VIRTIO_XDP_REDIR)
xdp_do_flush();
/* Out of packets? */
if (received < budget)
virtqueue_napi_complete(napi, rq->vq, received);
if (xdp_xmit & VIRTIO_XDP_REDIR)
xdp_do_flush();
if (xdp_xmit & VIRTIO_XDP_TX) {
sq = virtnet_xdp_get_sq(vi);
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment