Commit 2f7ca90a authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by David S. Miller

af_unix: Remove unix_table_locks.

unix_table_locks are to protect the global hash table, unix_socket_table.
The previous commit removed it, so let's clean up the unnecessary locks.

Here is a test result on EC2 c5.9xlarge where 10 processes run concurrently
in different netns and bind 100,000 sockets for each.

  without this series : 1m 38s
  with this series    :    11s

It is ~10x faster because the global hash table is split into 10 netns in
this case.
Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent cf2f225e
...@@ -21,7 +21,6 @@ struct sock *unix_peer_get(struct sock *sk); ...@@ -21,7 +21,6 @@ struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_BITS 8 #define UNIX_HASH_BITS 8
extern unsigned int unix_tot_inflight; extern unsigned int unix_tot_inflight;
extern spinlock_t unix_table_locks[UNIX_HASH_SIZE];
struct unix_address { struct unix_address {
refcount_t refcnt; refcount_t refcnt;
......
...@@ -118,13 +118,11 @@ ...@@ -118,13 +118,11 @@
#include "scm.h" #include "scm.h"
spinlock_t unix_table_locks[UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_table_locks);
static atomic_long_t unix_nr_socks; static atomic_long_t unix_nr_socks;
/* SMP locking strategy: /* SMP locking strategy:
* hash table is protected with spinlock unix_table_locks * hash table is protected with spinlock.
* each socket state is protected by separate spin lock. * each socket state is protected by separate spinlock.
*/ */
static unsigned int unix_unbound_hash(struct sock *sk) static unsigned int unix_unbound_hash(struct sock *sk)
...@@ -166,9 +164,6 @@ static void unix_table_double_lock(struct net *net, ...@@ -166,9 +164,6 @@ static void unix_table_double_lock(struct net *net,
if (hash1 > hash2) if (hash1 > hash2)
swap(hash1, hash2); swap(hash1, hash2);
spin_lock(&unix_table_locks[hash1]);
spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING);
spin_lock(&net->unx.table.locks[hash1]); spin_lock(&net->unx.table.locks[hash1]);
spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
} }
...@@ -178,9 +173,6 @@ static void unix_table_double_unlock(struct net *net, ...@@ -178,9 +173,6 @@ static void unix_table_double_unlock(struct net *net,
{ {
spin_unlock(&net->unx.table.locks[hash1]); spin_unlock(&net->unx.table.locks[hash1]);
spin_unlock(&net->unx.table.locks[hash2]); spin_unlock(&net->unx.table.locks[hash2]);
spin_unlock(&unix_table_locks[hash1]);
spin_unlock(&unix_table_locks[hash2]);
} }
#ifdef CONFIG_SECURITY_NETWORK #ifdef CONFIG_SECURITY_NETWORK
...@@ -324,20 +316,16 @@ static void __unix_set_addr_hash(struct net *net, struct sock *sk, ...@@ -324,20 +316,16 @@ static void __unix_set_addr_hash(struct net *net, struct sock *sk,
static void unix_remove_socket(struct net *net, struct sock *sk) static void unix_remove_socket(struct net *net, struct sock *sk)
{ {
spin_lock(&unix_table_locks[sk->sk_hash]);
spin_lock(&net->unx.table.locks[sk->sk_hash]); spin_lock(&net->unx.table.locks[sk->sk_hash]);
__unix_remove_socket(sk); __unix_remove_socket(sk);
spin_unlock(&net->unx.table.locks[sk->sk_hash]); spin_unlock(&net->unx.table.locks[sk->sk_hash]);
spin_unlock(&unix_table_locks[sk->sk_hash]);
} }
static void unix_insert_unbound_socket(struct net *net, struct sock *sk) static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
{ {
spin_lock(&unix_table_locks[sk->sk_hash]);
spin_lock(&net->unx.table.locks[sk->sk_hash]); spin_lock(&net->unx.table.locks[sk->sk_hash]);
__unix_insert_socket(net, sk); __unix_insert_socket(net, sk);
spin_unlock(&net->unx.table.locks[sk->sk_hash]); spin_unlock(&net->unx.table.locks[sk->sk_hash]);
spin_unlock(&unix_table_locks[sk->sk_hash]);
} }
static struct sock *__unix_find_socket_byname(struct net *net, static struct sock *__unix_find_socket_byname(struct net *net,
...@@ -362,13 +350,11 @@ static inline struct sock *unix_find_socket_byname(struct net *net, ...@@ -362,13 +350,11 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
{ {
struct sock *s; struct sock *s;
spin_lock(&unix_table_locks[hash]);
spin_lock(&net->unx.table.locks[hash]); spin_lock(&net->unx.table.locks[hash]);
s = __unix_find_socket_byname(net, sunname, len, hash); s = __unix_find_socket_byname(net, sunname, len, hash);
if (s) if (s)
sock_hold(s); sock_hold(s);
spin_unlock(&net->unx.table.locks[hash]); spin_unlock(&net->unx.table.locks[hash]);
spin_unlock(&unix_table_locks[hash]);
return s; return s;
} }
...@@ -377,7 +363,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) ...@@ -377,7 +363,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
unsigned int hash = unix_bsd_hash(i); unsigned int hash = unix_bsd_hash(i);
struct sock *s; struct sock *s;
spin_lock(&unix_table_locks[hash]);
spin_lock(&net->unx.table.locks[hash]); spin_lock(&net->unx.table.locks[hash]);
sk_for_each(s, &net->unx.table.buckets[hash]) { sk_for_each(s, &net->unx.table.buckets[hash]) {
struct dentry *dentry = unix_sk(s)->path.dentry; struct dentry *dentry = unix_sk(s)->path.dentry;
...@@ -385,12 +370,10 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) ...@@ -385,12 +370,10 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
if (dentry && d_backing_inode(dentry) == i) { if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s); sock_hold(s);
spin_unlock(&net->unx.table.locks[hash]); spin_unlock(&net->unx.table.locks[hash]);
spin_unlock(&unix_table_locks[hash]);
return s; return s;
} }
} }
spin_unlock(&net->unx.table.locks[hash]); spin_unlock(&net->unx.table.locks[hash]);
spin_unlock(&unix_table_locks[hash]);
return NULL; return NULL;
} }
...@@ -1551,9 +1534,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, ...@@ -1551,9 +1534,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
* *
* The contents of *(otheru->addr) and otheru->path * The contents of *(otheru->addr) and otheru->path
* are seen fully set up here, since we have found * are seen fully set up here, since we have found
* otheru in hash under unix_table_locks. Insertion * otheru in hash under its lock. Insertion into the
* into the hash chain we'd found it in had been done * hash chain we'd found it in had been done in an
* in an earlier critical area protected by unix_table_locks, * earlier critical area protected by the chain's lock,
* the same one where we'd set *(otheru->addr) contents, * the same one where we'd set *(otheru->addr) contents,
* as well as otheru->path and otheru->addr itself. * as well as otheru->path and otheru->addr itself.
* *
...@@ -3253,7 +3236,6 @@ static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) ...@@ -3253,7 +3236,6 @@ static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
struct sock *sk; struct sock *sk;
while (bucket < UNIX_HASH_SIZE) { while (bucket < UNIX_HASH_SIZE) {
spin_lock(&unix_table_locks[bucket]);
spin_lock(&net->unx.table.locks[bucket]); spin_lock(&net->unx.table.locks[bucket]);
sk = unix_from_bucket(seq, pos); sk = unix_from_bucket(seq, pos);
...@@ -3261,7 +3243,6 @@ static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) ...@@ -3261,7 +3243,6 @@ static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
return sk; return sk;
spin_unlock(&net->unx.table.locks[bucket]); spin_unlock(&net->unx.table.locks[bucket]);
spin_unlock(&unix_table_locks[bucket]);
*pos = set_bucket_offset(++bucket, 1); *pos = set_bucket_offset(++bucket, 1);
} }
...@@ -3280,7 +3261,6 @@ static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, ...@@ -3280,7 +3261,6 @@ static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
spin_unlock(&unix_table_locks[bucket]);
*pos = set_bucket_offset(++bucket, 1); *pos = set_bucket_offset(++bucket, 1);
...@@ -3309,10 +3289,8 @@ static void unix_seq_stop(struct seq_file *seq, void *v) ...@@ -3309,10 +3289,8 @@ static void unix_seq_stop(struct seq_file *seq, void *v)
{ {
struct sock *sk = v; struct sock *sk = v;
if (sk) { if (sk)
spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
spin_unlock(&unix_table_locks[sk->sk_hash]);
}
} }
static int unix_seq_show(struct seq_file *seq, void *v) static int unix_seq_show(struct seq_file *seq, void *v)
...@@ -3337,7 +3315,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) ...@@ -3337,7 +3315,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
sock_i_ino(s)); sock_i_ino(s));
if (u->addr) { // under unix_table_locks here if (u->addr) { // under a hash table lock here
int i, len; int i, len;
seq_putc(seq, ' '); seq_putc(seq, ' ');
...@@ -3416,7 +3394,6 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) ...@@ -3416,7 +3394,6 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
} }
spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
spin_unlock(&unix_table_locks[start_sk->sk_hash]);
return expected; return expected;
} }
...@@ -3705,13 +3682,10 @@ static void __init bpf_iter_register(void) ...@@ -3705,13 +3682,10 @@ static void __init bpf_iter_register(void)
static int __init af_unix_init(void) static int __init af_unix_init(void)
{ {
int i, rc = -1; int rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
for (i = 0; i < UNIX_HASH_SIZE; i++)
spin_lock_init(&unix_table_locks[i]);
rc = proto_register(&unix_dgram_proto, 1); rc = proto_register(&unix_dgram_proto, 1);
if (rc != 0) { if (rc != 0) {
pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{ {
/* might or might not have unix_table_locks */ /* might or might not have a hash table lock */
struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr) if (!addr)
...@@ -208,7 +208,6 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -208,7 +208,6 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct sock *sk; struct sock *sk;
num = 0; num = 0;
spin_lock(&unix_table_locks[slot]);
spin_lock(&net->unx.table.locks[slot]); spin_lock(&net->unx.table.locks[slot]);
sk_for_each(sk, &net->unx.table.buckets[slot]) { sk_for_each(sk, &net->unx.table.buckets[slot]) {
if (num < s_num) if (num < s_num)
...@@ -220,14 +219,12 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -220,14 +219,12 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0) { NLM_F_MULTI) < 0) {
spin_unlock(&net->unx.table.locks[slot]); spin_unlock(&net->unx.table.locks[slot]);
spin_unlock(&unix_table_locks[slot]);
goto done; goto done;
} }
next: next:
num++; num++;
} }
spin_unlock(&net->unx.table.locks[slot]); spin_unlock(&net->unx.table.locks[slot]);
spin_unlock(&unix_table_locks[slot]);
} }
done: done:
cb->args[0] = slot; cb->args[0] = slot;
...@@ -242,18 +239,15 @@ static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino) ...@@ -242,18 +239,15 @@ static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino)
int i; int i;
for (i = 0; i < UNIX_HASH_SIZE; i++) { for (i = 0; i < UNIX_HASH_SIZE; i++) {
spin_lock(&unix_table_locks[i]);
spin_lock(&net->unx.table.locks[i]); spin_lock(&net->unx.table.locks[i]);
sk_for_each(sk, &net->unx.table.buckets[i]) { sk_for_each(sk, &net->unx.table.buckets[i]) {
if (ino == sock_i_ino(sk)) { if (ino == sock_i_ino(sk)) {
sock_hold(sk); sock_hold(sk);
spin_unlock(&net->unx.table.locks[i]); spin_unlock(&net->unx.table.locks[i]);
spin_unlock(&unix_table_locks[i]);
return sk; return sk;
} }
} }
spin_unlock(&net->unx.table.locks[i]); spin_unlock(&net->unx.table.locks[i]);
spin_unlock(&unix_table_locks[i]);
} }
return NULL; return NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment