Commit ca8d0fa7 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-fix-xsk-wakeup'

Maxim Mikityanskiy says:

====================
This series addresses the issue described in the commit message of the
first patch: lack of synchronization between XSK wakeup and destroying
the resources used by XSK wakeup. The idea is similar to napi_synchronize.
The series contains fixes for the drivers that implement XSK.

v2 incorporates changes suggested by Björn:

1. Call synchronize_rcu in Intel drivers only if the XDP program is
   being unloaded.
2. Don't forget rcu_read_lock when wakeup is called from xsk_poll.
3. Use xs->zc as the condition to call ndo_xsk_wakeup.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents e4730423 c0fdccfd
......@@ -1152,7 +1152,7 @@ void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags);
static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
{
return !!vsi->xdp_prog;
return !!READ_ONCE(vsi->xdp_prog);
}
int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
......
......@@ -6823,8 +6823,8 @@ void i40e_down(struct i40e_vsi *vsi)
for (i = 0; i < vsi->num_queue_pairs; i++) {
i40e_clean_tx_ring(vsi->tx_rings[i]);
if (i40e_enabled_xdp_vsi(vsi)) {
/* Make sure that in-progress ndo_xdp_xmit
* calls are completed.
/* Make sure that in-progress ndo_xdp_xmit and
* ndo_xsk_wakeup calls are completed.
*/
synchronize_rcu();
i40e_clean_tx_ring(vsi->xdp_rings[i]);
......@@ -12546,8 +12546,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
old_prog = xchg(&vsi->xdp_prog, prog);
if (need_reset)
if (need_reset) {
if (!prog)
/* Wait until ndo_xsk_wakeup completes. */
synchronize_rcu();
i40e_reset_and_rebuild(pf, true, true);
}
for (i = 0; i < vsi->num_queue_pairs; i++)
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
......
......@@ -787,8 +787,12 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
struct i40e_ring *ring;
if (test_bit(__I40E_CONFIG_BUSY, pf->state))
return -ENETDOWN;
if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN;
......
......@@ -10261,7 +10261,12 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
/* If transitioning XDP modes reconfigure rings */
if (need_reset) {
int err = ixgbe_setup_tc(dev, adapter->hw_tcs);
int err;
if (!prog)
/* Wait until ndo_xsk_wakeup completes. */
synchronize_rcu();
err = ixgbe_setup_tc(dev, adapter->hw_tcs);
if (err) {
rcu_assign_pointer(adapter->xdp_prog, old_prog);
......
......@@ -709,10 +709,14 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
if (qid >= adapter->num_xdp_queues)
return -ENXIO;
if (!adapter->xdp_ring[qid]->xsk_umem)
ring = adapter->xdp_ring[qid];
if (test_bit(__IXGBE_TX_DISABLED, &ring->state))
return -ENETDOWN;
if (!ring->xsk_umem)
return -ENXIO;
ring = adapter->xdp_ring[qid];
if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
u64 eics = BIT_ULL(ring->q_vector->v_idx);
......
......@@ -760,7 +760,7 @@ enum {
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
MLX5E_STATE_XDP_TX_ENABLED,
MLX5E_STATE_XDP_OPEN,
MLX5E_STATE_XDP_ACTIVE,
};
struct mlx5e_rqt {
......
......@@ -75,12 +75,18 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
{
set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
if (priv->channels.params.xdp_prog)
set_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
}
static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
{
if (priv->channels.params.xdp_prog)
clear_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
/* let other device's napi(s) see our new state */
/* Let other device's napi(s) and XSK wakeups see our new state. */
synchronize_rcu();
}
......@@ -89,19 +95,9 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
}
static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv)
{
set_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
}
static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv)
{
clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
}
static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv)
static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv)
{
return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
}
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
......
......@@ -144,6 +144,7 @@ void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
napi_synchronize(&c->napi);
synchronize_rcu(); /* Sync with the XSK wakeup. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);
......
......@@ -14,7 +14,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
struct mlx5e_channel *c;
u16 ix;
if (unlikely(!mlx5e_xdp_is_open(priv)))
if (unlikely(!mlx5e_xdp_is_active(priv)))
return -ENETDOWN;
if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
......
......@@ -3000,12 +3000,9 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
int mlx5e_open_locked(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
bool is_xdp = priv->channels.params.xdp_prog;
int err;
set_bit(MLX5E_STATE_OPENED, &priv->state);
if (is_xdp)
mlx5e_xdp_set_open(priv);
err = mlx5e_open_channels(priv, &priv->channels);
if (err)
......@@ -3020,8 +3017,6 @@ int mlx5e_open_locked(struct net_device *netdev)
return 0;
err_clear_state_opened_flag:
if (is_xdp)
mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
return err;
}
......@@ -3053,8 +3048,6 @@ int mlx5e_close_locked(struct net_device *netdev)
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0;
if (priv->channels.params.xdp_prog)
mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
netif_carrier_off(priv->netdev);
......@@ -4371,16 +4364,6 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
return 0;
}
static int mlx5e_xdp_update_state(struct mlx5e_priv *priv)
{
if (priv->channels.params.xdp_prog)
mlx5e_xdp_set_open(priv);
else
mlx5e_xdp_set_closed(priv);
return 0;
}
static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
......@@ -4415,7 +4398,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
mlx5e_set_rq_type(priv->mdev, &new_channels.params);
old_prog = priv->channels.params.xdp_prog;
err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state);
err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
if (err)
goto unlock;
} else {
......
......@@ -334,12 +334,21 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
}
EXPORT_SYMBOL(xsk_umem_consume_tx);
static int xsk_zc_xmit(struct xdp_sock *xs)
static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{
struct net_device *dev = xs->dev;
int err;
rcu_read_lock();
err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
rcu_read_unlock();
return err;
}
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
XDP_WAKEUP_TX);
static int xsk_zc_xmit(struct xdp_sock *xs)
{
return xsk_wakeup(xs, XDP_WAKEUP_TX);
}
static void xsk_destruct_skb(struct sk_buff *skb)
......@@ -453,19 +462,16 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
__poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev;
struct xdp_umem *umem;
if (unlikely(!xsk_is_bound(xs)))
return mask;
dev = xs->dev;
umem = xs->umem;
if (umem->need_wakeup) {
if (dev->netdev_ops->ndo_xsk_wakeup)
dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
umem->need_wakeup);
if (xs->zc)
xsk_wakeup(xs, umem->need_wakeup);
else
/* Poll needs to drive Tx also in copy mode */
__xsk_sendmsg(sk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment