Commit 6198f446 authored by David S. Miller's avatar David S. Miller

Merge branch 'netpoll-make-sure-napi_list-is-safe-for-RCU-traversal'

Jakub Kicinski says:

====================
netpoll: make sure napi_list is safe for RCU traversal

This series is a follow-up to the fix in commit 96e97bc0 ("net:
disable netpoll on fresh napis"). To avoid any latent race conditions
convert dev->napi_list to a proper RCU list. We need minor restructuring
because it looks like netif_napi_del() used to be idempotent, and
it may be quite hard to track down everyone who depends on that.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8b40f21b 5251ef82
......@@ -825,9 +825,9 @@ static inline void bnx2x_del_all_napi_cnic(struct bnx2x *bp)
int i;
for_each_rx_queue_cnic(bp, i) {
napi_hash_del(&bnx2x_fp(bp, i, napi));
netif_napi_del(&bnx2x_fp(bp, i, napi));
__netif_napi_del(&bnx2x_fp(bp, i, napi));
}
synchronize_net();
}
static inline void bnx2x_del_all_napi(struct bnx2x *bp)
......@@ -835,9 +835,9 @@ static inline void bnx2x_del_all_napi(struct bnx2x *bp)
int i;
for_each_eth_queue(bp, i) {
napi_hash_del(&bnx2x_fp(bp, i, napi));
netif_napi_del(&bnx2x_fp(bp, i, napi));
__netif_napi_del(&bnx2x_fp(bp, i, napi));
}
synchronize_net();
}
int bnx2x_set_int_mode(struct bnx2x *bp);
......
......@@ -8634,10 +8634,9 @@ static void bnxt_del_napi(struct bnxt *bp)
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
napi_hash_del(&bnapi->napi);
netif_napi_del(&bnapi->napi);
__netif_napi_del(&bnapi->napi);
}
/* We called napi_hash_del() before netif_napi_del(), we need
/* We called __netif_napi_del(), we need
* to respect an RCU grace period before freeing napi structures.
*/
synchronize_net();
......
......@@ -2529,13 +2529,15 @@ static void enic_dev_deinit(struct enic *enic)
{
unsigned int i;
for (i = 0; i < enic->rq_count; i++) {
napi_hash_del(&enic->napi[i]);
netif_napi_del(&enic->napi[i]);
}
for (i = 0; i < enic->rq_count; i++)
__netif_napi_del(&enic->napi[i]);
if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
for (i = 0; i < enic->wq_count; i++)
netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]);
__netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]);
/* observe RCU grace period after __netif_napi_del() calls */
synchronize_net();
enic_free_vnic_resources(enic);
enic_clear_intr_mode(enic);
......
......@@ -1029,10 +1029,10 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
WRITE_ONCE(adapter->rx_ring[ring->queue_index], NULL);
adapter->q_vector[v_idx] = NULL;
napi_hash_del(&q_vector->napi);
netif_napi_del(&q_vector->napi);
__netif_napi_del(&q_vector->napi);
/*
* after a call to __netif_napi_del() napi may still be used and
* ixgbe_get_stats64() might access the rings on this vector,
* we must wait a grace period before freeing it.
*/
......
......@@ -3543,11 +3543,10 @@ static void myri10ge_free_slices(struct myri10ge_priv *mgp)
ss->fw_stats, ss->fw_stats_bus);
ss->fw_stats = NULL;
}
napi_hash_del(&ss->napi);
netif_napi_del(&ss->napi);
__netif_napi_del(&ss->napi);
}
/* Wait till napi structs are no longer used, and then free ss. */
synchronize_rcu();
synchronize_net();
kfree(mgp->ss);
mgp->ss = NULL;
}
......
......@@ -897,14 +897,13 @@ static void veth_napi_del(struct net_device *dev)
struct veth_rq *rq = &priv->rq[i];
napi_disable(&rq->xdp_napi);
napi_hash_del(&rq->xdp_napi);
__netif_napi_del(&rq->xdp_napi);
}
synchronize_net();
for (i = 0; i < dev->real_num_rx_queues; i++) {
struct veth_rq *rq = &priv->rq[i];
netif_napi_del(&rq->xdp_napi);
rq->rx_notify_masked = false;
ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
}
......
......@@ -2604,12 +2604,11 @@ static void virtnet_free_queues(struct virtnet_info *vi)
int i;
for (i = 0; i < vi->max_queue_pairs; i++) {
napi_hash_del(&vi->rq[i].napi);
netif_napi_del(&vi->rq[i].napi);
netif_napi_del(&vi->sq[i].napi);
__netif_napi_del(&vi->rq[i].napi);
__netif_napi_del(&vi->sq[i].napi);
}
/* We called napi_hash_del() before netif_napi_del(),
/* We called __netif_napi_del(),
* we need to respect an RCU grace period before freeing vi->rq
*/
synchronize_net();
......
......@@ -70,6 +70,7 @@ struct udp_tunnel_nic;
struct bpf_prog;
struct xdp_buff;
void synchronize_net(void);
void netdev_set_default_ethtool_ops(struct net_device *dev,
const struct ethtool_ops *ops);
......@@ -354,7 +355,7 @@ enum {
NAPI_STATE_MISSED, /* reschedule a napi */
NAPI_STATE_DISABLE, /* Disable pending */
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
NAPI_STATE_LISTED, /* NAPI added to system lists */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
};
......@@ -364,7 +365,7 @@ enum {
NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED),
NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE),
NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC),
NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
};
......@@ -488,20 +489,6 @@ static inline bool napi_complete(struct napi_struct *n)
return napi_complete_done(n, 0);
}
/**
* napi_hash_del - remove a NAPI from global table
* @napi: NAPI context
*
* Warning: caller must observe RCU grace period
* before freeing memory containing @napi, if
* this function returns true.
* Note: core networking stack automatically calls it
* from netif_napi_del().
* Drivers might want to call this helper to combine all
* the needed RCU grace periods into a single one.
*/
bool napi_hash_del(struct napi_struct *napi);
/**
* napi_disable - prevent NAPI from scheduling
* @n: NAPI context
......@@ -2367,13 +2354,27 @@ static inline void netif_tx_napi_add(struct net_device *dev,
netif_napi_add(dev, napi, poll, weight);
}
/**
* __netif_napi_del - remove a NAPI context
* @napi: NAPI context
*
* Warning: caller must observe RCU grace period before freeing memory
* containing @napi. Drivers might want to call this helper to combine
* all the needed RCU grace periods into a single one.
*/
void __netif_napi_del(struct napi_struct *napi);
/**
* netif_napi_del - remove a NAPI context
* @napi: NAPI context
*
* netif_napi_del() removes a NAPI context from the network device NAPI list
*/
void netif_napi_del(struct napi_struct *napi);
static inline void netif_napi_del(struct napi_struct *napi)
{
__netif_napi_del(napi);
synchronize_net();
}
struct napi_gro_cb {
/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
......@@ -2797,7 +2798,6 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
void netdev_freemem(struct net_device *dev);
void synchronize_net(void);
int init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
......
......@@ -6533,8 +6533,7 @@ EXPORT_SYMBOL(napi_busy_loop);
static void napi_hash_add(struct napi_struct *napi)
{
if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
return;
spin_lock(&napi_hash_lock);
......@@ -6555,20 +6554,14 @@ static void napi_hash_add(struct napi_struct *napi)
/* Warning : caller is responsible to make sure rcu grace period
* is respected before freeing memory containing @napi
*/
bool napi_hash_del(struct napi_struct *napi)
static void napi_hash_del(struct napi_struct *napi)
{
bool rcu_sync_needed = false;
spin_lock(&napi_hash_lock);
if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
rcu_sync_needed = true;
hlist_del_rcu(&napi->napi_hash_node);
}
hlist_del_init_rcu(&napi->napi_hash_node);
spin_unlock(&napi_hash_lock);
return rcu_sync_needed;
}
EXPORT_SYMBOL_GPL(napi_hash_del);
static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
{
......@@ -6600,7 +6593,11 @@ static void init_gro_hash(struct napi_struct *napi)
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
return;
INIT_LIST_HEAD(&napi->poll_list);
INIT_HLIST_NODE(&napi->napi_hash_node);
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog;
init_gro_hash(napi);
......@@ -6653,18 +6650,19 @@ static void flush_gro_hash(struct napi_struct *napi)
}
/* Must be called in process context */
void netif_napi_del(struct napi_struct *napi)
void __netif_napi_del(struct napi_struct *napi)
{
might_sleep();
if (napi_hash_del(napi))
synchronize_net();
list_del_init(&napi->dev_list);
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
return;
napi_hash_del(napi);
list_del_rcu(&napi->dev_list);
napi_free_frags(napi);
flush_gro_hash(napi);
napi->gro_bitmask = 0;
}
EXPORT_SYMBOL(netif_napi_del);
EXPORT_SYMBOL(__netif_napi_del);
static int napi_poll(struct napi_struct *n, struct list_head *repoll)
{
......
......@@ -297,7 +297,7 @@ static int netpoll_owner_active(struct net_device *dev)
{
struct napi_struct *napi;
list_for_each_entry(napi, &dev->napi_list, dev_list) {
list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner == smp_processor_id())
return 1;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment