Commit 038d49ba authored by David S. Miller's avatar David S. Miller

Merge branch 'Introduce-net_rwsem-to-protect-net_namespace_list'

Kirill Tkhai says:

====================
Introduce net_rwsem to protect net_namespace_list

The series introduces fine grained rw_semaphore, which will be used
instead of rtnl_lock() to protect net_namespace_list.

This improves scalability and allows to do non-exclusive sleepable
iteration for_each_net(), which is enough for most cases.

scripts/get_maintainer.pl gives enormous list of people, and I add
all to CC.

Note, that this patch is independent of "Close race between
{un, }register_netdevice_notifier and pernet_operations":
https://patchwork.ozlabs.org/project/netdev/list/?series=36495Signed-off-by: default avatarKirill Tkhai <ktkhai@virtuozzo.com>
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 906edee9 152f2531
...@@ -403,10 +403,12 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, ...@@ -403,10 +403,12 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
* our feet * our feet
*/ */
rtnl_lock(); rtnl_lock();
down_read(&net_rwsem);
for_each_net(net) for_each_net(net)
for_each_netdev(net, ndev) for_each_netdev(net, ndev)
if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev)) if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev))
add_netdev_ips(ib_dev, port, rdma_ndev, ndev); add_netdev_ips(ib_dev, port, rdma_ndev, ndev);
up_read(&net_rwsem);
rtnl_unlock(); rtnl_unlock();
} }
......
...@@ -37,6 +37,7 @@ extern int rtnl_lock_killable(void); ...@@ -37,6 +37,7 @@ extern int rtnl_lock_killable(void);
extern wait_queue_head_t netdev_unregistering_wq; extern wait_queue_head_t netdev_unregistering_wq;
extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore pernet_ops_rwsem;
extern struct rw_semaphore net_rwsem;
#ifdef CONFIG_PROVE_LOCKING #ifdef CONFIG_PROVE_LOCKING
extern bool lockdep_rtnl_is_held(void); extern bool lockdep_rtnl_is_held(void);
......
...@@ -291,6 +291,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) ...@@ -291,6 +291,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
#endif #endif
} }
/* Protected by net_rwsem */
#define for_each_net(VAR) \ #define for_each_net(VAR) \
list_for_each_entry(VAR, &net_namespace_list, list) list_for_each_entry(VAR, &net_namespace_list, list)
......
...@@ -1629,6 +1629,7 @@ int register_netdevice_notifier(struct notifier_block *nb) ...@@ -1629,6 +1629,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
goto unlock; goto unlock;
if (dev_boot_phase) if (dev_boot_phase)
goto unlock; goto unlock;
down_read(&net_rwsem);
for_each_net(net) { for_each_net(net) {
for_each_netdev(net, dev) { for_each_netdev(net, dev) {
err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
...@@ -1642,6 +1643,7 @@ int register_netdevice_notifier(struct notifier_block *nb) ...@@ -1642,6 +1643,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
call_netdevice_notifier(nb, NETDEV_UP, dev); call_netdevice_notifier(nb, NETDEV_UP, dev);
} }
} }
up_read(&net_rwsem);
unlock: unlock:
rtnl_unlock(); rtnl_unlock();
...@@ -1664,6 +1666,7 @@ int register_netdevice_notifier(struct notifier_block *nb) ...@@ -1664,6 +1666,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
} }
outroll: outroll:
up_read(&net_rwsem);
raw_notifier_chain_unregister(&netdev_chain, nb); raw_notifier_chain_unregister(&netdev_chain, nb);
goto unlock; goto unlock;
} }
...@@ -1694,6 +1697,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb) ...@@ -1694,6 +1697,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
if (err) if (err)
goto unlock; goto unlock;
down_read(&net_rwsem);
for_each_net(net) { for_each_net(net) {
for_each_netdev(net, dev) { for_each_netdev(net, dev) {
if (dev->flags & IFF_UP) { if (dev->flags & IFF_UP) {
...@@ -1704,6 +1708,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb) ...@@ -1704,6 +1708,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
} }
} }
up_read(&net_rwsem);
unlock: unlock:
rtnl_unlock(); rtnl_unlock();
return err; return err;
......
...@@ -33,6 +33,7 @@ static unsigned int fib_seq_sum(void) ...@@ -33,6 +33,7 @@ static unsigned int fib_seq_sum(void)
struct net *net; struct net *net;
rtnl_lock(); rtnl_lock();
down_read(&net_rwsem);
for_each_net(net) { for_each_net(net) {
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
...@@ -43,6 +44,7 @@ static unsigned int fib_seq_sum(void) ...@@ -43,6 +44,7 @@ static unsigned int fib_seq_sum(void)
} }
rcu_read_unlock(); rcu_read_unlock();
} }
up_read(&net_rwsem);
rtnl_unlock(); rtnl_unlock();
return fib_seq; return fib_seq;
......
...@@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list; ...@@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list;
LIST_HEAD(net_namespace_list); LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list);
/* Protects net_namespace_list. Nests iside rtnl_lock() */
DECLARE_RWSEM(net_rwsem);
EXPORT_SYMBOL_GPL(net_rwsem);
struct net init_net = { struct net init_net = {
.count = REFCOUNT_INIT(1), .count = REFCOUNT_INIT(1),
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
...@@ -309,9 +313,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) ...@@ -309,9 +313,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
if (error < 0) if (error < 0)
goto out_undo; goto out_undo;
} }
rtnl_lock(); down_write(&net_rwsem);
list_add_tail_rcu(&net->list, &net_namespace_list); list_add_tail_rcu(&net->list, &net_namespace_list);
rtnl_unlock(); up_write(&net_rwsem);
out: out:
return error; return error;
...@@ -450,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last) ...@@ -450,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last)
* and this work is the only process, that may delete * and this work is the only process, that may delete
* a net from net_namespace_list. So, when the below * a net from net_namespace_list. So, when the below
* is executing, the list may only grow. Thus, we do not * is executing, the list may only grow. Thus, we do not
* use for_each_net_rcu() or rtnl_lock(). * use for_each_net_rcu() or net_rwsem.
*/ */
for_each_net(tmp) { for_each_net(tmp) {
int id; int id;
...@@ -485,7 +489,7 @@ static void cleanup_net(struct work_struct *work) ...@@ -485,7 +489,7 @@ static void cleanup_net(struct work_struct *work)
down_read(&pernet_ops_rwsem); down_read(&pernet_ops_rwsem);
/* Don't let anyone else find us. */ /* Don't let anyone else find us. */
rtnl_lock(); down_write(&net_rwsem);
llist_for_each_entry(net, net_kill_list, cleanup_list) llist_for_each_entry(net, net_kill_list, cleanup_list)
list_del_rcu(&net->list); list_del_rcu(&net->list);
/* Cache last net. After we unlock rtnl, no one new net /* Cache last net. After we unlock rtnl, no one new net
...@@ -499,7 +503,7 @@ static void cleanup_net(struct work_struct *work) ...@@ -499,7 +503,7 @@ static void cleanup_net(struct work_struct *work)
* useless anyway, as netns_ids are destroyed there. * useless anyway, as netns_ids are destroyed there.
*/ */
last = list_last_entry(&net_namespace_list, struct net, list); last = list_last_entry(&net_namespace_list, struct net, list);
rtnl_unlock(); up_write(&net_rwsem);
llist_for_each_entry(net, net_kill_list, cleanup_list) { llist_for_each_entry(net, net_kill_list, cleanup_list) {
unhash_nsid(net, last); unhash_nsid(net, last);
...@@ -900,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list, ...@@ -900,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list,
list_add_tail(&ops->list, list); list_add_tail(&ops->list, list);
if (ops->init || (ops->id && ops->size)) { if (ops->init || (ops->id && ops->size)) {
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net() and cleanup_net() are not possible.
*/
for_each_net(net) { for_each_net(net) {
error = ops_init(ops, net); error = ops_init(ops, net);
if (error) if (error)
...@@ -923,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) ...@@ -923,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
LIST_HEAD(net_exit_list); LIST_HEAD(net_exit_list);
list_del(&ops->list); list_del(&ops->list);
/* See comment in __register_pernet_operations() */
for_each_net(net) for_each_net(net)
list_add_tail(&net->exit_list, &net_exit_list); list_add_tail(&net->exit_list, &net_exit_list);
ops_exit_list(ops, &net_exit_list); ops_exit_list(ops, &net_exit_list);
......
...@@ -418,9 +418,11 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) ...@@ -418,9 +418,11 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{ {
struct net *net; struct net *net;
down_read(&net_rwsem);
for_each_net(net) { for_each_net(net) {
__rtnl_kill_links(net, ops); __rtnl_kill_links(net, ops);
} }
up_read(&net_rwsem);
list_del(&ops->list); list_del(&ops->list);
} }
EXPORT_SYMBOL_GPL(__rtnl_link_unregister); EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
...@@ -438,6 +440,9 @@ static void rtnl_lock_unregistering_all(void) ...@@ -438,6 +440,9 @@ static void rtnl_lock_unregistering_all(void)
for (;;) { for (;;) {
unregistering = false; unregistering = false;
rtnl_lock(); rtnl_lock();
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net() and cleanup_net() are not possible.
*/
for_each_net(net) { for_each_net(net) {
if (net->dev_unreg_count > 0) { if (net->dev_unreg_count > 0) {
unregistering = true; unregistering = true;
......
...@@ -1763,14 +1763,14 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) ...@@ -1763,14 +1763,14 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
{ {
struct net *net; struct net *net;
rtnl_lock(); down_read(&net_rwsem);
for_each_net(net) { for_each_net(net) {
if (atomic_read(&net->ct.count) == 0) if (atomic_read(&net->ct.count) == 0)
continue; continue;
__nf_ct_unconfirmed_destroy(net); __nf_ct_unconfirmed_destroy(net);
nf_queue_nf_hook_drop(net); nf_queue_nf_hook_drop(net);
} }
rtnl_unlock(); up_read(&net_rwsem);
/* Need to wait for netns cleanup worker to finish, if its /* Need to wait for netns cleanup worker to finish, if its
* running -- it might have deleted a net namespace from * running -- it might have deleted a net namespace from
......
...@@ -2363,10 +2363,10 @@ static void __net_exit ovs_exit_net(struct net *dnet) ...@@ -2363,10 +2363,10 @@ static void __net_exit ovs_exit_net(struct net *dnet)
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp); __dp_destroy(dp);
rtnl_lock(); down_read(&net_rwsem);
for_each_net(net) for_each_net(net)
list_vports_from_net(net, dnet, &head); list_vports_from_net(net, dnet, &head);
rtnl_unlock(); up_read(&net_rwsem);
/* Detach all vports from given namespace. */ /* Detach all vports from given namespace. */
list_for_each_entry_safe(vport, vport_next, &head, detach_list) { list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
......
...@@ -347,13 +347,13 @@ void wireless_nlevent_flush(void) ...@@ -347,13 +347,13 @@ void wireless_nlevent_flush(void)
struct sk_buff *skb; struct sk_buff *skb;
struct net *net; struct net *net;
ASSERT_RTNL(); down_read(&net_rwsem);
for_each_net(net) { for_each_net(net) {
while ((skb = skb_dequeue(&net->wext_nlevents))) while ((skb = skb_dequeue(&net->wext_nlevents)))
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
GFP_KERNEL); GFP_KERNEL);
} }
up_read(&net_rwsem);
} }
EXPORT_SYMBOL_GPL(wireless_nlevent_flush); EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
...@@ -410,9 +410,7 @@ subsys_initcall(wireless_nlevent_init); ...@@ -410,9 +410,7 @@ subsys_initcall(wireless_nlevent_init);
/* Process events generated by the wireless layer or the driver. */ /* Process events generated by the wireless layer or the driver. */
static void wireless_nlevent_process(struct work_struct *work) static void wireless_nlevent_process(struct work_struct *work)
{ {
rtnl_lock();
wireless_nlevent_flush(); wireless_nlevent_flush();
rtnl_unlock();
} }
static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process); static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process);
......
...@@ -47,10 +47,10 @@ static inline void selinux_xfrm_notify_policyload(void) ...@@ -47,10 +47,10 @@ static inline void selinux_xfrm_notify_policyload(void)
{ {
struct net *net; struct net *net;
rtnl_lock(); down_read(&net_rwsem);
for_each_net(net) for_each_net(net)
rt_genid_bump_all(net); rt_genid_bump_all(net);
rtnl_unlock(); up_read(&net_rwsem);
} }
#else #else
static inline int selinux_xfrm_enabled(void) static inline int selinux_xfrm_enabled(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment