Commit 976673ee authored by Chris Mason's avatar Chris Mason Committed by Andy Grover

rds: switch to rwlock on bind_lock

The bind_lock is almost entirely readonly, but it gets
hammered during normal operations and is a major bottleneck.

This commit changes it to an rwlock, which takes it from 80%
of the system time on a big numa machine down to much lower
numbers.

A better fix would involve RCU, which is done in a later commit
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent ce47f52f
...@@ -43,7 +43,7 @@ ...@@ -43,7 +43,7 @@
* This is now called for every incoming frame so we arguably care much more * This is now called for every incoming frame so we arguably care much more
* about it than we used to. * about it than we used to.
*/ */
static DEFINE_SPINLOCK(rds_bind_lock); static DEFINE_RWLOCK(rds_bind_lock);
static struct rb_root rds_bind_tree = RB_ROOT; static struct rb_root rds_bind_tree = RB_ROOT;
static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port, static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
...@@ -88,13 +88,13 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port) ...@@ -88,13 +88,13 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
struct rds_sock *rs; struct rds_sock *rs;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&rds_bind_lock, flags); read_lock_irqsave(&rds_bind_lock, flags);
rs = rds_bind_tree_walk(addr, port, NULL); rs = rds_bind_tree_walk(addr, port, NULL);
if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
rds_sock_addref(rs); rds_sock_addref(rs);
else else
rs = NULL; rs = NULL;
spin_unlock_irqrestore(&rds_bind_lock, flags); read_unlock_irqrestore(&rds_bind_lock, flags);
rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
ntohs(port)); ntohs(port));
...@@ -116,7 +116,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) ...@@ -116,7 +116,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
last = rover - 1; last = rover - 1;
} }
spin_lock_irqsave(&rds_bind_lock, flags); write_lock_irqsave(&rds_bind_lock, flags);
do { do {
if (rover == 0) if (rover == 0)
...@@ -137,7 +137,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) ...@@ -137,7 +137,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
rs, &addr, (int)ntohs(*port)); rs, &addr, (int)ntohs(*port));
} }
spin_unlock_irqrestore(&rds_bind_lock, flags); write_unlock_irqrestore(&rds_bind_lock, flags);
return ret; return ret;
} }
...@@ -146,7 +146,7 @@ void rds_remove_bound(struct rds_sock *rs) ...@@ -146,7 +146,7 @@ void rds_remove_bound(struct rds_sock *rs)
{ {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&rds_bind_lock, flags); write_lock_irqsave(&rds_bind_lock, flags);
if (rs->rs_bound_addr) { if (rs->rs_bound_addr) {
rdsdebug("rs %p unbinding from %pI4:%d\n", rdsdebug("rs %p unbinding from %pI4:%d\n",
...@@ -158,7 +158,7 @@ void rds_remove_bound(struct rds_sock *rs) ...@@ -158,7 +158,7 @@ void rds_remove_bound(struct rds_sock *rs)
rs->rs_bound_addr = 0; rs->rs_bound_addr = 0;
} }
spin_unlock_irqrestore(&rds_bind_lock, flags); write_unlock_irqrestore(&rds_bind_lock, flags);
} }
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment