Commit 710585d4 authored by Nicolas Dichtel's avatar Nicolas Dichtel Committed by Linus Torvalds

fs/proc: use a rb tree for the directory entries

When a lot of netdevices are created, one of the bottleneck is the
creation of proc entries.  This serie aims to accelerate this part.

The current implementation for the directories in /proc is using a single
linked list.  This is slow when handling directories with large numbers of
entries (eg netdevice-related entries when lots of tunnels are opened).

This patch replaces this linked list by a red-black tree.

Here are some numbers:

dummy30000.batch contains 30 000 times 'link add type dummy'.

Before the patch:
  $ time ip -b dummy30000.batch
  real    2m31.950s
  user    0m0.440s
  sys     2m21.440s
  $ time rmmod dummy
  real    1m35.764s
  user    0m0.000s
  sys     1m24.088s

After the patch:
  $ time ip -b dummy30000.batch
  real    2m0.874s
  user    0m0.448s
  sys     1m49.720s
  $ time rmmod dummy
  real    1m13.988s
  user    0m0.000s
  sys     1m1.008s

The idea of improving this part was suggested by Thierry Herbelot.

[akpm@linux-foundation.org: initialise proc_root.subdir at compile time]
Signed-off-by: default avatarNicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: default avatarDavid S. Miller <davem@davemloft.net>
Cc: Thierry Herbelot <thierry.herbelot@6wind.com>.
Acked-by: default avatar"Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9edad6ea
...@@ -31,9 +31,81 @@ static DEFINE_SPINLOCK(proc_subdir_lock); ...@@ -31,9 +31,81 @@ static DEFINE_SPINLOCK(proc_subdir_lock);
static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
{ {
if (de->namelen != len) if (len < de->namelen)
return 0; return -1;
return !memcmp(name, de->name, len); if (len > de->namelen)
return 1;
return memcmp(name, de->name, len);
}
static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
{
struct rb_node *node = rb_first(&dir->subdir);
if (node == NULL)
return NULL;
return rb_entry(node, struct proc_dir_entry, subdir_node);
}
static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
{
struct rb_node *node = rb_next(&dir->subdir_node);
if (node == NULL)
return NULL;
return rb_entry(node, struct proc_dir_entry, subdir_node);
}
static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
const char *name,
unsigned int len)
{
struct rb_node *node = dir->subdir.rb_node;
while (node) {
struct proc_dir_entry *de = container_of(node,
struct proc_dir_entry,
subdir_node);
int result = proc_match(len, name, de);
if (result < 0)
node = node->rb_left;
else if (result > 0)
node = node->rb_right;
else
return de;
}
return NULL;
}
static bool pde_subdir_insert(struct proc_dir_entry *dir,
struct proc_dir_entry *de)
{
struct rb_root *root = &dir->subdir;
struct rb_node **new = &root->rb_node, *parent = NULL;
/* Figure out where to put new node */
while (*new) {
struct proc_dir_entry *this =
container_of(*new, struct proc_dir_entry, subdir_node);
int result = proc_match(de->namelen, de->name, this);
parent = *new;
if (result < 0)
new = &(*new)->rb_left;
else if (result > 0)
new = &(*new)->rb_right;
else
return false;
}
/* Add new node and rebalance tree. */
rb_link_node(&de->subdir_node, parent, new);
rb_insert_color(&de->subdir_node, root);
return true;
} }
static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
...@@ -92,10 +164,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, ...@@ -92,10 +164,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
break; break;
len = next - cp; len = next - cp;
for (de = de->subdir; de ; de = de->next) { de = pde_subdir_find(de, cp, len);
if (proc_match(len, cp, de))
break;
}
if (!de) { if (!de) {
WARN(1, "name '%s'\n", name); WARN(1, "name '%s'\n", name);
return -ENOENT; return -ENOENT;
...@@ -183,19 +252,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, ...@@ -183,19 +252,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
struct inode *inode; struct inode *inode;
spin_lock(&proc_subdir_lock); spin_lock(&proc_subdir_lock);
for (de = de->subdir; de ; de = de->next) { de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len);
if (de->namelen != dentry->d_name.len) if (de) {
continue; pde_get(de);
if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { spin_unlock(&proc_subdir_lock);
pde_get(de); inode = proc_get_inode(dir->i_sb, de);
spin_unlock(&proc_subdir_lock); if (!inode)
inode = proc_get_inode(dir->i_sb, de); return ERR_PTR(-ENOMEM);
if (!inode) d_set_d_op(dentry, &simple_dentry_operations);
return ERR_PTR(-ENOMEM); d_add(dentry, inode);
d_set_d_op(dentry, &simple_dentry_operations); return NULL;
d_add(dentry, inode);
return NULL;
}
} }
spin_unlock(&proc_subdir_lock); spin_unlock(&proc_subdir_lock);
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
...@@ -225,7 +291,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, ...@@ -225,7 +291,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
return 0; return 0;
spin_lock(&proc_subdir_lock); spin_lock(&proc_subdir_lock);
de = de->subdir; de = pde_subdir_first(de);
i = ctx->pos - 2; i = ctx->pos - 2;
for (;;) { for (;;) {
if (!de) { if (!de) {
...@@ -234,7 +300,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, ...@@ -234,7 +300,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
} }
if (!i) if (!i)
break; break;
de = de->next; de = pde_subdir_next(de);
i--; i--;
} }
...@@ -249,7 +315,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, ...@@ -249,7 +315,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
} }
spin_lock(&proc_subdir_lock); spin_lock(&proc_subdir_lock);
ctx->pos++; ctx->pos++;
next = de->next; next = pde_subdir_next(de);
pde_put(de); pde_put(de);
de = next; de = next;
} while (de); } while (de);
...@@ -286,9 +352,8 @@ static const struct inode_operations proc_dir_inode_operations = { ...@@ -286,9 +352,8 @@ static const struct inode_operations proc_dir_inode_operations = {
static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
{ {
struct proc_dir_entry *tmp;
int ret; int ret;
ret = proc_alloc_inum(&dp->low_ino); ret = proc_alloc_inum(&dp->low_ino);
if (ret) if (ret)
return ret; return ret;
...@@ -308,17 +373,10 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp ...@@ -308,17 +373,10 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
} }
spin_lock(&proc_subdir_lock); spin_lock(&proc_subdir_lock);
for (tmp = dir->subdir; tmp; tmp = tmp->next)
if (strcmp(tmp->name, dp->name) == 0) {
WARN(1, "proc_dir_entry '%s/%s' already registered\n",
dir->name, dp->name);
break;
}
dp->next = dir->subdir;
dp->parent = dir; dp->parent = dir;
dir->subdir = dp; if (pde_subdir_insert(dir, dp) == false)
WARN(1, "proc_dir_entry '%s/%s' already registered\n",
dir->name, dp->name);
spin_unlock(&proc_subdir_lock); spin_unlock(&proc_subdir_lock);
return 0; return 0;
...@@ -354,6 +412,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ...@@ -354,6 +412,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
ent->namelen = qstr.len; ent->namelen = qstr.len;
ent->mode = mode; ent->mode = mode;
ent->nlink = nlink; ent->nlink = nlink;
ent->subdir = RB_ROOT;
atomic_set(&ent->count, 1); atomic_set(&ent->count, 1);
spin_lock_init(&ent->pde_unload_lock); spin_lock_init(&ent->pde_unload_lock);
INIT_LIST_HEAD(&ent->pde_openers); INIT_LIST_HEAD(&ent->pde_openers);
...@@ -485,7 +544,6 @@ void pde_put(struct proc_dir_entry *pde) ...@@ -485,7 +544,6 @@ void pde_put(struct proc_dir_entry *pde)
*/ */
void remove_proc_entry(const char *name, struct proc_dir_entry *parent) void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
{ {
struct proc_dir_entry **p;
struct proc_dir_entry *de = NULL; struct proc_dir_entry *de = NULL;
const char *fn = name; const char *fn = name;
unsigned int len; unsigned int len;
...@@ -497,14 +555,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) ...@@ -497,14 +555,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
} }
len = strlen(fn); len = strlen(fn);
for (p = &parent->subdir; *p; p=&(*p)->next ) { de = pde_subdir_find(parent, fn, len);
if (proc_match(len, fn, *p)) { if (de)
de = *p; rb_erase(&de->subdir_node, &parent->subdir);
*p = de->next;
de->next = NULL;
break;
}
}
spin_unlock(&proc_subdir_lock); spin_unlock(&proc_subdir_lock);
if (!de) { if (!de) {
WARN(1, "name '%s'\n", name); WARN(1, "name '%s'\n", name);
...@@ -516,16 +569,15 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) ...@@ -516,16 +569,15 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
if (S_ISDIR(de->mode)) if (S_ISDIR(de->mode))
parent->nlink--; parent->nlink--;
de->nlink = 0; de->nlink = 0;
WARN(de->subdir, "%s: removing non-empty directory " WARN(pde_subdir_first(de),
"'%s/%s', leaking at least '%s'\n", __func__, "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n",
de->parent->name, de->name, de->subdir->name); __func__, de->parent->name, de->name, pde_subdir_first(de)->name);
pde_put(de); pde_put(de);
} }
EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(remove_proc_entry);
int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
{ {
struct proc_dir_entry **p;
struct proc_dir_entry *root = NULL, *de, *next; struct proc_dir_entry *root = NULL, *de, *next;
const char *fn = name; const char *fn = name;
unsigned int len; unsigned int len;
...@@ -537,24 +589,18 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) ...@@ -537,24 +589,18 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
} }
len = strlen(fn); len = strlen(fn);
for (p = &parent->subdir; *p; p=&(*p)->next ) { root = pde_subdir_find(parent, fn, len);
if (proc_match(len, fn, *p)) {
root = *p;
*p = root->next;
root->next = NULL;
break;
}
}
if (!root) { if (!root) {
spin_unlock(&proc_subdir_lock); spin_unlock(&proc_subdir_lock);
return -ENOENT; return -ENOENT;
} }
rb_erase(&root->subdir_node, &parent->subdir);
de = root; de = root;
while (1) { while (1) {
next = de->subdir; next = pde_subdir_first(de);
if (next) { if (next) {
de->subdir = next->next; rb_erase(&next->subdir_node, &de->subdir);
next->next = NULL;
de = next; de = next;
continue; continue;
} }
......
...@@ -24,10 +24,9 @@ struct mempolicy; ...@@ -24,10 +24,9 @@ struct mempolicy;
* tree) of these proc_dir_entries, so that we can dynamically * tree) of these proc_dir_entries, so that we can dynamically
* add new files to /proc. * add new files to /proc.
* *
* The "next" pointer creates a linked list of one /proc directory, * parent/subdir are used for the directory structure (every /proc file has a
* while parent/subdir create the directory structure (every * parent, but "subdir" is empty for all non-directory entries).
* /proc file has a parent, but "subdir" is NULL for all * subdir_node is used to build the rb tree "subdir" of the parent.
* non-directory entries).
*/ */
struct proc_dir_entry { struct proc_dir_entry {
unsigned int low_ino; unsigned int low_ino;
...@@ -38,7 +37,9 @@ struct proc_dir_entry { ...@@ -38,7 +37,9 @@ struct proc_dir_entry {
loff_t size; loff_t size;
const struct inode_operations *proc_iops; const struct inode_operations *proc_iops;
const struct file_operations *proc_fops; const struct file_operations *proc_fops;
struct proc_dir_entry *next, *parent, *subdir; struct proc_dir_entry *parent;
struct rb_root subdir;
struct rb_node subdir_node;
void *data; void *data;
atomic_t count; /* use count */ atomic_t count; /* use count */
atomic_t in_use; /* number of callers into module in progress; */ atomic_t in_use; /* number of callers into module in progress; */
......
...@@ -192,6 +192,7 @@ static __net_init int proc_net_ns_init(struct net *net) ...@@ -192,6 +192,7 @@ static __net_init int proc_net_ns_init(struct net *net)
if (!netd) if (!netd)
goto out; goto out;
netd->subdir = RB_ROOT;
netd->data = net; netd->data = net;
netd->nlink = 2; netd->nlink = 2;
netd->namelen = 3; netd->namelen = 3;
......
...@@ -251,6 +251,7 @@ struct proc_dir_entry proc_root = { ...@@ -251,6 +251,7 @@ struct proc_dir_entry proc_root = {
.proc_iops = &proc_root_inode_operations, .proc_iops = &proc_root_inode_operations,
.proc_fops = &proc_root_operations, .proc_fops = &proc_root_operations,
.parent = &proc_root, .parent = &proc_root,
.subdir = RB_ROOT,
.name = "/proc", .name = "/proc",
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment