Merge branch 'net-speedup-netns-dismantles'

Eric Dumazet says: ==================== net: speedup netns dismantles From: Eric Dumazet <edumazet@google.com> In this series, I made network namespace deletions more scalable, by 4x on the little benchmark described in this cover letter. - Remove bottleneck on ipv6 addrconf, by replacing a global hash table to a per netns one. - Rework many (struct pernet_operations)->exit() handlers to exit_batch() ones. This removes many rtnl acquisitions, and gives to cleanup_net() kind of a priority over rtnl ownership. Tested on a host with 24 cpus (48 HT) Test script: for nr in {1..10} do (for i in {1..10000}; do unshare -n /bin/bash -c "ifconfig lo up"; done) & done wait for i in {1..10} do sleep 1 echo 3 >/proc/sys/vm/drop_caches grep net_namespace /proc/slabinfo done Before: We can see host struggles to clean the netns, even after there are no new creations. Memory cost is high, because each netns consumes a good amount of memory. time ./unshare10.sh net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 37214 37792 3968 1 1 : tunables 24 12 8 : slabdata 37214 37792 192 real 6m57.766s user 3m37.277s sys 40m4.826s After: We can see the script completes much faster, the kernel thread doing the cleanup_net() keeps up just fine. Memory cost is not too big. time ./unshare10.sh net_namespace 9945 9945 4096 1 1 : tunables 24 12 8 : slabdata 9945 9945 0 net_namespace 4087 4665 4096 1 1 : tunables 24 12 8 : slabdata 4087 4665 192 net_namespace 4082 4607 4096 1 1 : tunables 24 12 8 : slabdata 4082 4607 192 net_namespace 234 761 4096 1 1 : tunables 24 12 8 : slabdata 234 761 192 net_namespace 224 751 4096 1 1 : tunables 24 12 8 : slabdata 224 751 192 net_namespace 218 745 4096 1 1 : tunables 24 12 8 : slabdata 218 745 192 net_namespace 193 667 4096 1 1 : tunables 24 12 8 : slabdata 193 667 172 net_namespace 167 609 4096 1 1 : tunables 24 12 8 : slabdata 167 609 152 net_namespace 167 609 4096 1 1 : tunables 24 12 8 : slabdata 167 609 152 net_namespace 157 609 4096 1 1 : tunables 24 12 8 : slabdata 157 609 152 real 1m43.876s user 3m39.728s sys 7m36.342s ==================== Link: https://lore.kernel.org/r/20220208045038.2635826-1-eric.dumazet@gmail.comSigned-off-by: Jakub Kicinski <kuba@kernel.org>

Merge branch 'net-speedup-netns-dismantles'
Eric Dumazet says: ==================== net: speedup netns dismantles From: Eric Dumazet <edumazet@google.com> In this series, I made network namespace deletions more scalable, by 4x on the little benchmark described in this cover letter. - Remove bottleneck on ipv6 addrconf, by replacing a global hash table to a per netns one. - Rework many (struct pernet_operations)->exit() handlers to exit_batch() ones. This removes many rtnl acquisitions, and gives to cleanup_net() kind of a priority over rtnl ownership. Tested on a host with 24 cpus (48 HT) Test script: for nr in {1..10} do (for i in {1..10000}; do unshare -n /bin/bash -c "ifconfig lo up"; done) & done wait for i in {1..10} do sleep 1 echo 3 >/proc/sys/vm/drop_caches grep net_namespace /proc/slabinfo done Before: We can see host struggles to clean the netns, even after there are no new creations. Memory cost is high, because each netns consumes a good amount of memory. time ./unshare10.sh net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 82634 82634 3968 1 1 : tunables 24 12 8 : slabdata 82634 82634 0 net_namespace 37214 37792 3968 1 1 : tunables 24 12 8 : slabdata 37214 37792 192 real 6m57.766s user 3m37.277s sys 40m4.826s After: We can see the script completes much faster, the kernel thread doing the cleanup_net() keeps up just fine. Memory cost is not too big. time ./unshare10.sh net_namespace 9945 9945 4096 1 1 : tunables 24 12 8 : slabdata 9945 9945 0 net_namespace 4087 4665 4096 1 1 : tunables 24 12 8 : slabdata 4087 4665 192 net_namespace 4082 4607 4096 1 1 : tunables 24 12 8 : slabdata 4082 4607 192 net_namespace 234 761 4096 1 1 : tunables 24 12 8 : slabdata 234 761 192 net_namespace 224 751 4096 1 1 : tunables 24 12 8 : slabdata 224 751 192 net_namespace 218 745 4096 1 1 : tunables 24 12 8 : slabdata 218 745 192 net_namespace 193 667 4096 1 1 : tunables 24 12 8 : slabdata 193 667 172 net_namespace 167 609 4096 1 1 : tunables 24 12 8 : slabdata 167 609 152 net_namespace 167 609 4096 1 1 : tunables 24 12 8 : slabdata 167 609 152 net_namespace 157 609 4096 1 1 : tunables 24 12 8 : slabdata 157 609 152 real 1m43.876s user 3m39.728s sys 7m36.342s ==================== Link: https://lore.kernel.org/r/20220208045038.2635826-1-eric.dumazet@gmail.comSigned-off-by: Jakub Kicinski <kuba@kernel.org>
4caaf758 · Jakub Kicinski · b2309a71 · ee403248 · 4caaf758 · 4caaf758
Commit 4caaf758 authored Feb 08, 2022 by Jakub Kicinski
11 changed files
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -6048,27 +6048,38 @@ static int __net_init bond_net_init(struct net *net)
 	return 0;
 }

-static void __net_exit bond_net_exit(struct net *net)
+static void __net_exit bond_net_exit_batch(struct list_head *net_list)
 {
-	struct bond_net *bn = net_generic(net, bond_net_id);
-	struct bonding *bond, *tmp_bond;
+	struct bond_net *bn;
+	struct net *net;
 	LIST_HEAD(list);

-	bond_destroy_sysfs(bn);
+	list_for_each_entry(net, net_list, exit_list) {
+		bn = net_generic(net, bond_net_id);
+		bond_destroy_sysfs(bn);
+	}

 	/* Kill off any bonds created after unregistering bond rtnl ops */
 	rtnl_lock();
-	list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
-		unregister_netdevice_queue(bond->dev, &list);
+	list_for_each_entry(net, net_list, exit_list) {
+		struct bonding *bond, *tmp_bond;
+
+		bn = net_generic(net, bond_net_id);
+		list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
+			unregister_netdevice_queue(bond->dev, &list);
+	}
 	unregister_netdevice_many(&list);
 	rtnl_unlock();

-	bond_destroy_proc_dir(bn);
+	list_for_each_entry(net, net_list, exit_list) {
+		bn = net_generic(net, bond_net_id);
+		bond_destroy_proc_dir(bn);
+	}
 }

 static struct pernet_operations bond_net_ops = {
 	.init = bond_net_init,
-	.exit = bond_net_exit,
+	.exit_batch = bond_net_exit_batch,
 	.id   = &bond_net_id,
 	.size = sizeof(struct bond_net),
 };

--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -307,7 +307,6 @@ void __net_init bond_create_proc_dir(struct bond_net *bn)
 }

 /* Destroy the bonding directory under /proc/net, if empty.
- * Caller must hold rtnl_lock.
 */
 void __net_exit bond_destroy_proc_dir(struct bond_net *bn)
 {

--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -92,6 +92,11 @@ struct netns_ipv6 {
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
 	struct sock		*mc_autojoin_sk;
+
+	struct hlist_head	*inet6_addr_lst;
+	spinlock_t		addrconf_hash_lock;
+	struct delayed_work	addr_chk_work;
+
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 	struct mr_table		*mrt6;

--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net)
 	return 0;
 }

-static void __net_exit cangw_pernet_exit(struct net *net)
+static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
 {
+	struct net *net;
+
 	rtnl_lock();
-	cgw_remove_all_jobs(net);
+	list_for_each_entry(net, net_list, exit_list)
+		cgw_remove_all_jobs(net);
 	rtnl_unlock();
 }

 static struct pernet_operations cangw_pernet_ops = {
 	.init = cangw_pernet_init,
-	.exit = cangw_pernet_exit,
+	.exit_batch = cangw_pernet_exit_batch,
 };

 static __init int cgw_module_init(void)

--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10850,14 +10850,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
 	.exit = netdev_exit,
 };

-static void __net_exit default_device_exit(struct net *net)
+static void __net_exit default_device_exit_net(struct net *net)
 {
 	struct net_device *dev, *aux;
 	/*
 	 * Push all migratable network devices back to the
 	 * initial network namespace
 	 */
-	rtnl_lock();
+	ASSERT_RTNL();
 	for_each_netdev_safe(net, dev, aux) {
 		int err;
 		char fb_name[IFNAMSIZ];
@@ -10881,22 +10881,22 @@ static void __net_exit default_device_exit(struct net *net)
 			BUG();
 		}
 	}
-	rtnl_unlock();
 }

 static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
 {
-	/* Return with the rtnl_lock held when there are no network
+	/* Return (with the rtnl_lock held) when there are no network
 	 * devices unregistering in any network namespace in net_list.
 	 */
-	struct net *net;
-	bool unregistering;
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	bool unregistering;
+	struct net *net;

+	ASSERT_RTNL();
 	add_wait_queue(&netdev_unregistering_wq, &wait);
 	for (;;) {
 		unregistering = false;
-		rtnl_lock();
+
 		list_for_each_entry(net, net_list, exit_list) {
 			if (net->dev_unreg_count > 0) {
 				unregistering = true;
@@ -10908,6 +10908,7 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
 		__rtnl_unlock();

 		wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+		rtnl_lock();
 	}
 	remove_wait_queue(&netdev_unregistering_wq, &wait);
 }
@@ -10923,6 +10924,11 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 	struct net *net;
 	LIST_HEAD(dev_kill_list);

+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list) {
+		default_device_exit_net(net);
+		cond_resched();
+	}
 	/* To prevent network device cleanup code from dereferencing
 	 * loopback devices or network devices that have been freed
 	 * wait here for all pending unregistrations to complete,
@@ -10935,6 +10941,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 	 * default_device_exit_batch.
 	 */
 	rtnl_lock_unregistering(net_list);
+
 	list_for_each_entry(net, net_list, exit_list) {
 		for_each_netdev_reverse(net, dev) {
 			if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
@@ -10948,7 +10955,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 }

 static struct pernet_operations __net_initdata default_device_ops = {
-	.exit = default_device_exit,
 	.exit_batch = default_device_exit_batch,
 };


--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1556,7 +1556,7 @@ static void ip_fib_net_exit(struct net *net)
 {
 	int i;

-	rtnl_lock();
+	ASSERT_RTNL();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
@@ -1581,7 +1581,7 @@ static void ip_fib_net_exit(struct net *net)
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	fib4_rules_exit(net);
 #endif
-	rtnl_unlock();
+
 	kfree(net->ipv4.fib_table_hash);
 	fib4_notifier_exit(net);
 }
@@ -1608,7 +1608,9 @@ static int __net_init fib_net_init(struct net *net)
 out_proc:
 	nl_fib_lookup_exit(net);
 out_nlfl:
+	rtnl_lock();
 	ip_fib_net_exit(net);
+	rtnl_unlock();
 	goto out;
 }

@@ -1616,12 +1618,23 @@ static void __net_exit fib_net_exit(struct net *net)
 {
 	fib_proc_exit(net);
 	nl_fib_lookup_exit(net);
-	ip_fib_net_exit(net);
+}
+
+static void __net_exit fib_net_exit_batch(struct list_head *net_list)
+{
+	struct net *net;
+
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list)
+		ip_fib_net_exit(net);
+
+	rtnl_unlock();
 }

 static struct pernet_operations fib_net_ops = {
 	.init = fib_net_init,
 	.exit = fib_net_exit,
+	.exit_batch = fib_net_exit_batch,
 };

 void __init ip_fib_init(void)

--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -266,13 +266,12 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 {
 	struct mr_table *mrt, *next;

-	rtnl_lock();
+	ASSERT_RTNL();
 	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
 		list_del(&mrt->list);
 		ipmr_free_table(mrt);
 	}
 	fib_rules_unregister(net->ipv4.mr_rules_ops);
-	rtnl_unlock();
 }

 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -328,10 +327,9 @@ static int __net_init ipmr_rules_init(struct net *net)

 static void __net_exit ipmr_rules_exit(struct net *net)
 {
-	rtnl_lock();
+	ASSERT_RTNL();
 	ipmr_free_table(net->ipv4.mrt);
 	net->ipv4.mrt = NULL;
-	rtnl_unlock();
 }

 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -3075,7 +3073,9 @@ static int __net_init ipmr_net_init(struct net *net)
 proc_cache_fail:
 	remove_proc_entry("ip_mr_vif", net->proc_net);
 proc_vif_fail:
+	rtnl_lock();
 	ipmr_rules_exit(net);
+	rtnl_unlock();
 #endif
 ipmr_rules_fail:
 	ipmr_notifier_exit(net);
@@ -3090,12 +3090,22 @@ static void __net_exit ipmr_net_exit(struct net *net)
 	remove_proc_entry("ip_mr_vif", net->proc_net);
 #endif
 	ipmr_notifier_exit(net);
-	ipmr_rules_exit(net);
+}
+
+static void __net_exit ipmr_net_exit_batch(struct list_head *net_list)
+{
+	struct net *net;
+
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list)
+		ipmr_rules_exit(net);
+	rtnl_unlock();
 }

 static struct pernet_operations ipmr_net_ops = {
 	.init = ipmr_net_init,
 	.exit = ipmr_net_exit,
+	.exit_batch = ipmr_net_exit_batch,
 };

 int __init ip_mr_init(void)

--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3733,12 +3733,16 @@ void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
 }
 EXPORT_SYMBOL(nexthop_res_grp_activity_update);

-static void __net_exit nexthop_net_exit(struct net *net)
+static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
 {
+	struct net *net;
+
 	rtnl_lock();
-	flush_all_nexthops(net);
+	list_for_each_entry(net, net_list, exit_list) {
+		flush_all_nexthops(net);
+		kfree(net->nexthop.devhash);
+	}
 	rtnl_unlock();
-	kfree(net->nexthop.devhash);
 }

 static int __net_init nexthop_net_init(struct net *net)
@@ -3756,7 +3760,7 @@ static int __net_init nexthop_net_init(struct net *net)

 static struct pernet_operations nexthop_net_ops = {
 	.init = nexthop_net_init,
-	.exit = nexthop_net_exit,
+	.exit_batch = nexthop_net_exit_batch,
 };

 static int __init nexthop_init(void)

--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -493,16 +493,21 @@ static int __net_init fib6_rules_net_init(struct net *net)
 	goto out;
 }

-static void __net_exit fib6_rules_net_exit(struct net *net)
+static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list)
 {
+	struct net *net;
+
 	rtnl_lock();
-	fib_rules_unregister(net->ipv6.fib6_rules_ops);
+	list_for_each_entry(net, net_list, exit_list) {
+		fib_rules_unregister(net->ipv6.fib6_rules_ops);
+		cond_resched();
+	}
 	rtnl_unlock();
 }

 static struct pernet_operations fib6_rules_net_ops = {
 	.init = fib6_rules_net_init,
-	.exit = fib6_rules_net_exit,
+	.exit_batch = fib6_rules_net_exit_batch,
 };

 int __init fib6_rules_init(void)

--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -253,13 +253,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 {
 	struct mr_table *mrt, *next;

-	rtnl_lock();
+	ASSERT_RTNL();
 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 		list_del(&mrt->list);
 		ip6mr_free_table(mrt);
 	}
 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
-	rtnl_unlock();
 }

 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -316,10 +315,9 @@ static int __net_init ip6mr_rules_init(struct net *net)

 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
-	rtnl_lock();
+	ASSERT_RTNL();
 	ip6mr_free_table(net->ipv6.mrt6);
 	net->ipv6.mrt6 = NULL;
-	rtnl_unlock();
 }

 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -1323,7 +1321,9 @@ static int __net_init ip6mr_net_init(struct net *net)
 proc_cache_fail:
 	remove_proc_entry("ip6_mr_vif", net->proc_net);
 proc_vif_fail:
+	rtnl_lock();
 	ip6mr_rules_exit(net);
+	rtnl_unlock();
 #endif
 ip6mr_rules_fail:
 	ip6mr_notifier_exit(net);
@@ -1336,13 +1336,23 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 	remove_proc_entry("ip6_mr_cache", net->proc_net);
 	remove_proc_entry("ip6_mr_vif", net->proc_net);
 #endif
-	ip6mr_rules_exit(net);
 	ip6mr_notifier_exit(net);
 }

+static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
+{
+	struct net *net;
+
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list)
+		ip6mr_rules_exit(net);
+	rtnl_unlock();
+}
+
 static struct pernet_operations ip6mr_net_ops = {
 	.init = ip6mr_net_init,
 	.exit = ip6mr_net_exit,
+	.exit_batch = ip6mr_net_exit_batch,
 };

 int __init ip6_mr_init(void)