Commit 6198f446 authored by David S. Miller's avatar David S. Miller

Merge branch 'netpoll-make-sure-napi_list-is-safe-for-RCU-traversal'

Jakub Kicinski says:

====================
netpoll: make sure napi_list is safe for RCU traversal

This series is a follow-up to the fix in commit 96e97bc0 ("net:
disable netpoll on fresh napis"). To avoid any latent race conditions
convert dev->napi_list to a proper RCU list. We need minor restructuring
because it looks like netif_napi_del() used to be idempotent, and
it may be quite hard to track down everyone who depends on that.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8b40f21b 5251ef82
...@@ -825,9 +825,9 @@ static inline void bnx2x_del_all_napi_cnic(struct bnx2x *bp) ...@@ -825,9 +825,9 @@ static inline void bnx2x_del_all_napi_cnic(struct bnx2x *bp)
int i; int i;
for_each_rx_queue_cnic(bp, i) { for_each_rx_queue_cnic(bp, i) {
napi_hash_del(&bnx2x_fp(bp, i, napi)); __netif_napi_del(&bnx2x_fp(bp, i, napi));
netif_napi_del(&bnx2x_fp(bp, i, napi));
} }
synchronize_net();
} }
static inline void bnx2x_del_all_napi(struct bnx2x *bp) static inline void bnx2x_del_all_napi(struct bnx2x *bp)
...@@ -835,9 +835,9 @@ static inline void bnx2x_del_all_napi(struct bnx2x *bp) ...@@ -835,9 +835,9 @@ static inline void bnx2x_del_all_napi(struct bnx2x *bp)
int i; int i;
for_each_eth_queue(bp, i) { for_each_eth_queue(bp, i) {
napi_hash_del(&bnx2x_fp(bp, i, napi)); __netif_napi_del(&bnx2x_fp(bp, i, napi));
netif_napi_del(&bnx2x_fp(bp, i, napi));
} }
synchronize_net();
} }
int bnx2x_set_int_mode(struct bnx2x *bp); int bnx2x_set_int_mode(struct bnx2x *bp);
......
...@@ -8634,10 +8634,9 @@ static void bnxt_del_napi(struct bnxt *bp) ...@@ -8634,10 +8634,9 @@ static void bnxt_del_napi(struct bnxt *bp)
for (i = 0; i < bp->cp_nr_rings; i++) { for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_napi *bnapi = bp->bnapi[i];
napi_hash_del(&bnapi->napi); __netif_napi_del(&bnapi->napi);
netif_napi_del(&bnapi->napi);
} }
/* We called napi_hash_del() before netif_napi_del(), we need /* We called __netif_napi_del(), we need
* to respect an RCU grace period before freeing napi structures. * to respect an RCU grace period before freeing napi structures.
*/ */
synchronize_net(); synchronize_net();
......
...@@ -2529,13 +2529,15 @@ static void enic_dev_deinit(struct enic *enic) ...@@ -2529,13 +2529,15 @@ static void enic_dev_deinit(struct enic *enic)
{ {
unsigned int i; unsigned int i;
for (i = 0; i < enic->rq_count; i++) { for (i = 0; i < enic->rq_count; i++)
napi_hash_del(&enic->napi[i]); __netif_napi_del(&enic->napi[i]);
netif_napi_del(&enic->napi[i]);
}
if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
for (i = 0; i < enic->wq_count; i++) for (i = 0; i < enic->wq_count; i++)
netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); __netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]);
/* observe RCU grace period after __netif_napi_del() calls */
synchronize_net();
enic_free_vnic_resources(enic); enic_free_vnic_resources(enic);
enic_clear_intr_mode(enic); enic_clear_intr_mode(enic);
......
...@@ -1029,10 +1029,10 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx) ...@@ -1029,10 +1029,10 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
WRITE_ONCE(adapter->rx_ring[ring->queue_index], NULL); WRITE_ONCE(adapter->rx_ring[ring->queue_index], NULL);
adapter->q_vector[v_idx] = NULL; adapter->q_vector[v_idx] = NULL;
napi_hash_del(&q_vector->napi); __netif_napi_del(&q_vector->napi);
netif_napi_del(&q_vector->napi);
/* /*
* after a call to __netif_napi_del() napi may still be used and
* ixgbe_get_stats64() might access the rings on this vector, * ixgbe_get_stats64() might access the rings on this vector,
* we must wait a grace period before freeing it. * we must wait a grace period before freeing it.
*/ */
......
...@@ -3543,11 +3543,10 @@ static void myri10ge_free_slices(struct myri10ge_priv *mgp) ...@@ -3543,11 +3543,10 @@ static void myri10ge_free_slices(struct myri10ge_priv *mgp)
ss->fw_stats, ss->fw_stats_bus); ss->fw_stats, ss->fw_stats_bus);
ss->fw_stats = NULL; ss->fw_stats = NULL;
} }
napi_hash_del(&ss->napi); __netif_napi_del(&ss->napi);
netif_napi_del(&ss->napi);
} }
/* Wait till napi structs are no longer used, and then free ss. */ /* Wait till napi structs are no longer used, and then free ss. */
synchronize_rcu(); synchronize_net();
kfree(mgp->ss); kfree(mgp->ss);
mgp->ss = NULL; mgp->ss = NULL;
} }
......
...@@ -897,14 +897,13 @@ static void veth_napi_del(struct net_device *dev) ...@@ -897,14 +897,13 @@ static void veth_napi_del(struct net_device *dev)
struct veth_rq *rq = &priv->rq[i]; struct veth_rq *rq = &priv->rq[i];
napi_disable(&rq->xdp_napi); napi_disable(&rq->xdp_napi);
napi_hash_del(&rq->xdp_napi); __netif_napi_del(&rq->xdp_napi);
} }
synchronize_net(); synchronize_net();
for (i = 0; i < dev->real_num_rx_queues; i++) { for (i = 0; i < dev->real_num_rx_queues; i++) {
struct veth_rq *rq = &priv->rq[i]; struct veth_rq *rq = &priv->rq[i];
netif_napi_del(&rq->xdp_napi);
rq->rx_notify_masked = false; rq->rx_notify_masked = false;
ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
} }
......
...@@ -2604,12 +2604,11 @@ static void virtnet_free_queues(struct virtnet_info *vi) ...@@ -2604,12 +2604,11 @@ static void virtnet_free_queues(struct virtnet_info *vi)
int i; int i;
for (i = 0; i < vi->max_queue_pairs; i++) { for (i = 0; i < vi->max_queue_pairs; i++) {
napi_hash_del(&vi->rq[i].napi); __netif_napi_del(&vi->rq[i].napi);
netif_napi_del(&vi->rq[i].napi); __netif_napi_del(&vi->sq[i].napi);
netif_napi_del(&vi->sq[i].napi);
} }
/* We called napi_hash_del() before netif_napi_del(), /* We called __netif_napi_del(),
* we need to respect an RCU grace period before freeing vi->rq * we need to respect an RCU grace period before freeing vi->rq
*/ */
synchronize_net(); synchronize_net();
......
...@@ -70,6 +70,7 @@ struct udp_tunnel_nic; ...@@ -70,6 +70,7 @@ struct udp_tunnel_nic;
struct bpf_prog; struct bpf_prog;
struct xdp_buff; struct xdp_buff;
void synchronize_net(void);
void netdev_set_default_ethtool_ops(struct net_device *dev, void netdev_set_default_ethtool_ops(struct net_device *dev,
const struct ethtool_ops *ops); const struct ethtool_ops *ops);
...@@ -354,7 +355,7 @@ enum { ...@@ -354,7 +355,7 @@ enum {
NAPI_STATE_MISSED, /* reschedule a napi */ NAPI_STATE_MISSED, /* reschedule a napi */
NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_DISABLE, /* Disable pending */
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ NAPI_STATE_LISTED, /* NAPI added to system lists */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
}; };
...@@ -364,7 +365,7 @@ enum { ...@@ -364,7 +365,7 @@ enum {
NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED),
NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE),
NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC),
NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
}; };
...@@ -488,20 +489,6 @@ static inline bool napi_complete(struct napi_struct *n) ...@@ -488,20 +489,6 @@ static inline bool napi_complete(struct napi_struct *n)
return napi_complete_done(n, 0); return napi_complete_done(n, 0);
} }
/**
* napi_hash_del - remove a NAPI from global table
* @napi: NAPI context
*
* Warning: caller must observe RCU grace period
* before freeing memory containing @napi, if
* this function returns true.
* Note: core networking stack automatically calls it
* from netif_napi_del().
* Drivers might want to call this helper to combine all
* the needed RCU grace periods into a single one.
*/
bool napi_hash_del(struct napi_struct *napi);
/** /**
* napi_disable - prevent NAPI from scheduling * napi_disable - prevent NAPI from scheduling
* @n: NAPI context * @n: NAPI context
...@@ -2367,13 +2354,27 @@ static inline void netif_tx_napi_add(struct net_device *dev, ...@@ -2367,13 +2354,27 @@ static inline void netif_tx_napi_add(struct net_device *dev,
netif_napi_add(dev, napi, poll, weight); netif_napi_add(dev, napi, poll, weight);
} }
/**
* __netif_napi_del - remove a NAPI context
* @napi: NAPI context
*
* Warning: caller must observe RCU grace period before freeing memory
* containing @napi. Drivers might want to call this helper to combine
* all the needed RCU grace periods into a single one.
*/
void __netif_napi_del(struct napi_struct *napi);
/** /**
* netif_napi_del - remove a NAPI context * netif_napi_del - remove a NAPI context
* @napi: NAPI context * @napi: NAPI context
* *
* netif_napi_del() removes a NAPI context from the network device NAPI list * netif_napi_del() removes a NAPI context from the network device NAPI list
*/ */
void netif_napi_del(struct napi_struct *napi); static inline void netif_napi_del(struct napi_struct *napi)
{
__netif_napi_del(napi);
synchronize_net();
}
struct napi_gro_cb { struct napi_gro_cb {
/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
...@@ -2797,7 +2798,6 @@ static inline void unregister_netdevice(struct net_device *dev) ...@@ -2797,7 +2798,6 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev); int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev); void free_netdev(struct net_device *dev);
void netdev_freemem(struct net_device *dev); void netdev_freemem(struct net_device *dev);
void synchronize_net(void);
int init_dummy_netdev(struct net_device *dev); int init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct net_device *netdev_get_xmit_slave(struct net_device *dev,
......
...@@ -6533,8 +6533,7 @@ EXPORT_SYMBOL(napi_busy_loop); ...@@ -6533,8 +6533,7 @@ EXPORT_SYMBOL(napi_busy_loop);
static void napi_hash_add(struct napi_struct *napi) static void napi_hash_add(struct napi_struct *napi)
{ {
if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
return; return;
spin_lock(&napi_hash_lock); spin_lock(&napi_hash_lock);
...@@ -6555,20 +6554,14 @@ static void napi_hash_add(struct napi_struct *napi) ...@@ -6555,20 +6554,14 @@ static void napi_hash_add(struct napi_struct *napi)
/* Warning : caller is responsible to make sure rcu grace period /* Warning : caller is responsible to make sure rcu grace period
* is respected before freeing memory containing @napi * is respected before freeing memory containing @napi
*/ */
bool napi_hash_del(struct napi_struct *napi) static void napi_hash_del(struct napi_struct *napi)
{ {
bool rcu_sync_needed = false;
spin_lock(&napi_hash_lock); spin_lock(&napi_hash_lock);
if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { hlist_del_init_rcu(&napi->napi_hash_node);
rcu_sync_needed = true;
hlist_del_rcu(&napi->napi_hash_node);
}
spin_unlock(&napi_hash_lock); spin_unlock(&napi_hash_lock);
return rcu_sync_needed;
} }
EXPORT_SYMBOL_GPL(napi_hash_del);
static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
{ {
...@@ -6600,7 +6593,11 @@ static void init_gro_hash(struct napi_struct *napi) ...@@ -6600,7 +6593,11 @@ static void init_gro_hash(struct napi_struct *napi)
void netif_napi_add(struct net_device *dev, struct napi_struct *napi, void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight) int (*poll)(struct napi_struct *, int), int weight)
{ {
if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
return;
INIT_LIST_HEAD(&napi->poll_list); INIT_LIST_HEAD(&napi->poll_list);
INIT_HLIST_NODE(&napi->napi_hash_node);
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog; napi->timer.function = napi_watchdog;
init_gro_hash(napi); init_gro_hash(napi);
...@@ -6653,18 +6650,19 @@ static void flush_gro_hash(struct napi_struct *napi) ...@@ -6653,18 +6650,19 @@ static void flush_gro_hash(struct napi_struct *napi)
} }
/* Must be called in process context */ /* Must be called in process context */
void netif_napi_del(struct napi_struct *napi) void __netif_napi_del(struct napi_struct *napi)
{ {
might_sleep(); if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
if (napi_hash_del(napi)) return;
synchronize_net();
list_del_init(&napi->dev_list); napi_hash_del(napi);
list_del_rcu(&napi->dev_list);
napi_free_frags(napi); napi_free_frags(napi);
flush_gro_hash(napi); flush_gro_hash(napi);
napi->gro_bitmask = 0; napi->gro_bitmask = 0;
} }
EXPORT_SYMBOL(netif_napi_del); EXPORT_SYMBOL(__netif_napi_del);
static int napi_poll(struct napi_struct *n, struct list_head *repoll) static int napi_poll(struct napi_struct *n, struct list_head *repoll)
{ {
......
...@@ -297,7 +297,7 @@ static int netpoll_owner_active(struct net_device *dev) ...@@ -297,7 +297,7 @@ static int netpoll_owner_active(struct net_device *dev)
{ {
struct napi_struct *napi; struct napi_struct *napi;
list_for_each_entry(napi, &dev->napi_list, dev_list) { list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner == smp_processor_id()) if (napi->poll_owner == smp_processor_id())
return 1; return 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment