Commit b6e81138 authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by David S. Miller

af_unix: Define a per-netns hash table.

This commit adds a per netns hash table for AF_UNIX, which size is fixed
as UNIX_HASH_SIZE for now.

The first implementation defines a per-netns hash table as a single array
of lock and list:

	struct unix_hashbucket {
		spinlock_t		lock;
		struct hlist_head	head;
	};

	struct netns_unix {
		struct unix_hashbucket	*hash;
		...
	};

But, Eric pointed out memory cost that the structure has holes because of
sizeof(spinlock_t), which is 4 (or more if LOCKDEP is enabled). [0]  It
could be expensive on a host with thousands of netns and few AF_UNIX
sockets.  For this reason, a per-netns hash table uses two dense arrays.

	struct unix_table {
		spinlock_t		*locks;
		struct hlist_head	*buckets;
	};

	struct netns_unix {
		struct unix_table	table;
		...
	};

Note the length of the list has a significant impact rather than lock
contention, so having shared locks can be an option.  But, per-netns
locks and lists still perform better than the global locks and per-netns
lists. [1]

Also, this patch adds a change so that struct netns_unix disappears from
struct net if CONFIG_UNIX is disabled.

[0]: https://lore.kernel.org/netdev/CANn89iLVxO5aqx16azNU7p7Z-nz5NrnM5QTqOzueVxEnkVTxyg@mail.gmail.com/
[1]: https://lore.kernel.org/netdev/20220617175215.1769-1-kuniyu@amazon.com/Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f302d180
......@@ -120,7 +120,9 @@ struct net {
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
#if IS_ENABLED(CONFIG_UNIX)
struct netns_unix unx;
#endif
struct netns_nexthop nexthop;
struct netns_ipv4 ipv4;
#if IS_ENABLED(CONFIG_IPV6)
......
......@@ -5,8 +5,14 @@
#ifndef __NETNS_UNIX_H__
#define __NETNS_UNIX_H__
struct unix_table {
spinlock_t *locks;
struct hlist_head *buckets;
};
struct ctl_table_header;
struct netns_unix {
struct unix_table table;
int sysctl_max_dgram_qlen;
struct ctl_table_header *ctl;
};
......
......@@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = {
static int __net_init unix_net_init(struct net *net)
{
int error = -ENOMEM;
int i;
net->unx.sysctl_max_dgram_qlen = 10;
if (unix_sysctl_register(net))
......@@ -3567,18 +3567,44 @@ static int __net_init unix_net_init(struct net *net)
#ifdef CONFIG_PROC_FS
if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
sizeof(struct seq_net_private))) {
unix_sysctl_unregister(net);
goto out;
sizeof(struct seq_net_private)))
goto err_sysctl;
#endif
net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
sizeof(spinlock_t), GFP_KERNEL);
if (!net->unx.table.locks)
goto err_proc;
net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
sizeof(struct hlist_head),
GFP_KERNEL);
if (!net->unx.table.buckets)
goto free_locks;
for (i = 0; i < UNIX_HASH_SIZE; i++) {
spin_lock_init(&net->unx.table.locks[i]);
INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
}
return 0;
free_locks:
kvfree(net->unx.table.locks);
err_proc:
#ifdef CONFIG_PROC_FS
remove_proc_entry("unix", net->proc_net);
err_sysctl:
#endif
error = 0;
unix_sysctl_unregister(net);
out:
return error;
return -ENOMEM;
}
static void __net_exit unix_net_exit(struct net *net)
{
kvfree(net->unx.table.buckets);
kvfree(net->unx.table.locks);
unix_sysctl_unregister(net);
remove_proc_entry("unix", net->proc_net);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment