Commit 70fbcdf4 authored by Ram Pai's avatar Ram Pai Committed by Linus Torvalds

[PATCH] umount_tree() locking change

umount is done under the protection of the namespace semaphore.  This
can lead to intresting deadlocks when the last reference to a mount is
released, if filesystem code is in sufficiently nasty state.

This collects all the to-be-released-mounts and releases them after
releasing the namespace semaphore.  That both reduces the time we are
holding namespace semaphore and gets the things more robust.

Idea proposed by Al Viro.
Signed-off-by: default avatarRam Pai <linuxram@us.ibm.com>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 5b83d2c5
...@@ -394,32 +394,45 @@ int may_umount(struct vfsmount *mnt) ...@@ -394,32 +394,45 @@ int may_umount(struct vfsmount *mnt)
EXPORT_SYMBOL(may_umount); EXPORT_SYMBOL(may_umount);
static void umount_tree(struct vfsmount *mnt) static void release_mounts(struct list_head *head)
{
struct vfsmount *mnt;
while(!list_empty(head)) {
mnt = list_entry(head->next, struct vfsmount, mnt_hash);
list_del_init(&mnt->mnt_hash);
if (mnt->mnt_parent != mnt) {
struct dentry *dentry;
struct vfsmount *m;
spin_lock(&vfsmount_lock);
dentry = mnt->mnt_mountpoint;
m = mnt->mnt_parent;
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
spin_unlock(&vfsmount_lock);
dput(dentry);
mntput(m);
}
mntput(mnt);
}
}
static void umount_tree(struct vfsmount *mnt, struct list_head *kill)
{ {
struct vfsmount *p; struct vfsmount *p;
LIST_HEAD(kill);
for (p = mnt; p; p = next_mnt(p, mnt)) { for (p = mnt; p; p = next_mnt(p, mnt)) {
list_del(&p->mnt_list); list_del(&p->mnt_hash);
list_add(&p->mnt_list, &kill); list_add(&p->mnt_hash, kill);
__touch_namespace(p->mnt_namespace);
p->mnt_namespace = NULL;
} }
while (!list_empty(&kill)) { list_for_each_entry(p, kill, mnt_hash) {
mnt = list_entry(kill.next, struct vfsmount, mnt_list); list_del_init(&p->mnt_expire);
list_del_init(&mnt->mnt_list); list_del_init(&p->mnt_list);
list_del_init(&mnt->mnt_expire); __touch_namespace(p->mnt_namespace);
if (mnt->mnt_parent == mnt) { p->mnt_namespace = NULL;
spin_unlock(&vfsmount_lock); list_del_init(&p->mnt_child);
} else { if (p->mnt_parent != p)
struct nameidata old_nd; mnt->mnt_mountpoint->d_mounted--;
detach_mnt(mnt, &old_nd);
spin_unlock(&vfsmount_lock);
path_release(&old_nd);
}
mntput(mnt);
spin_lock(&vfsmount_lock);
} }
} }
...@@ -427,6 +440,7 @@ static int do_umount(struct vfsmount *mnt, int flags) ...@@ -427,6 +440,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
{ {
struct super_block *sb = mnt->mnt_sb; struct super_block *sb = mnt->mnt_sb;
int retval; int retval;
LIST_HEAD(umount_list);
retval = security_sb_umount(mnt, flags); retval = security_sb_umount(mnt, flags);
if (retval) if (retval)
...@@ -497,13 +511,14 @@ static int do_umount(struct vfsmount *mnt, int flags) ...@@ -497,13 +511,14 @@ static int do_umount(struct vfsmount *mnt, int flags)
retval = -EBUSY; retval = -EBUSY;
if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
if (!list_empty(&mnt->mnt_list)) if (!list_empty(&mnt->mnt_list))
umount_tree(mnt); umount_tree(mnt, &umount_list);
retval = 0; retval = 0;
} }
spin_unlock(&vfsmount_lock); spin_unlock(&vfsmount_lock);
if (retval) if (retval)
security_sb_umount_busy(mnt); security_sb_umount_busy(mnt);
up_write(&current->namespace->sem); up_write(&current->namespace->sem);
release_mounts(&umount_list);
return retval; return retval;
} }
...@@ -616,9 +631,11 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) ...@@ -616,9 +631,11 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
return res; return res;
Enomem: Enomem:
if (res) { if (res) {
LIST_HEAD(umount_list);
spin_lock(&vfsmount_lock); spin_lock(&vfsmount_lock);
umount_tree(res); umount_tree(res, &umount_list);
spin_unlock(&vfsmount_lock); spin_unlock(&vfsmount_lock);
release_mounts(&umount_list);
} }
return NULL; return NULL;
} }
...@@ -698,9 +715,11 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) ...@@ -698,9 +715,11 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
err = graft_tree(mnt, nd); err = graft_tree(mnt, nd);
if (err) { if (err) {
LIST_HEAD(umount_list);
spin_lock(&vfsmount_lock); spin_lock(&vfsmount_lock);
umount_tree(mnt); umount_tree(mnt, &umount_list);
spin_unlock(&vfsmount_lock); spin_unlock(&vfsmount_lock);
release_mounts(&umount_list);
} }
out: out:
...@@ -875,7 +894,8 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, ...@@ -875,7 +894,8 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
EXPORT_SYMBOL_GPL(do_add_mount); EXPORT_SYMBOL_GPL(do_add_mount);
static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
struct list_head *umounts)
{ {
spin_lock(&vfsmount_lock); spin_lock(&vfsmount_lock);
...@@ -893,16 +913,12 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) ...@@ -893,16 +913,12 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
* contributed by the vfsmount parent and the mntget above * contributed by the vfsmount parent and the mntget above
*/ */
if (atomic_read(&mnt->mnt_count) == 2) { if (atomic_read(&mnt->mnt_count) == 2) {
struct nameidata old_nd;
/* delete from the namespace */ /* delete from the namespace */
touch_namespace(mnt->mnt_namespace); touch_namespace(mnt->mnt_namespace);
list_del_init(&mnt->mnt_list); list_del_init(&mnt->mnt_list);
mnt->mnt_namespace = NULL; mnt->mnt_namespace = NULL;
detach_mnt(mnt, &old_nd); umount_tree(mnt, umounts);
spin_unlock(&vfsmount_lock); spin_unlock(&vfsmount_lock);
path_release(&old_nd);
mntput(mnt);
} else { } else {
/* /*
* Someone brought it back to life whilst we didn't have any * Someone brought it back to life whilst we didn't have any
...@@ -951,6 +967,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) ...@@ -951,6 +967,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
* - dispose of the corpse * - dispose of the corpse
*/ */
while (!list_empty(&graveyard)) { while (!list_empty(&graveyard)) {
LIST_HEAD(umounts);
mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
list_del_init(&mnt->mnt_expire); list_del_init(&mnt->mnt_expire);
...@@ -963,12 +980,11 @@ void mark_mounts_for_expiry(struct list_head *mounts) ...@@ -963,12 +980,11 @@ void mark_mounts_for_expiry(struct list_head *mounts)
spin_unlock(&vfsmount_lock); spin_unlock(&vfsmount_lock);
down_write(&namespace->sem); down_write(&namespace->sem);
expire_mount(mnt, mounts); expire_mount(mnt, mounts, &umounts);
up_write(&namespace->sem); up_write(&namespace->sem);
release_mounts(&umounts);
mntput(mnt); mntput(mnt);
put_namespace(namespace); put_namespace(namespace);
spin_lock(&vfsmount_lock); spin_lock(&vfsmount_lock);
} }
...@@ -1508,12 +1524,14 @@ void __init mnt_init(unsigned long mempages) ...@@ -1508,12 +1524,14 @@ void __init mnt_init(unsigned long mempages)
void __put_namespace(struct namespace *namespace) void __put_namespace(struct namespace *namespace)
{ {
struct vfsmount *root = namespace->root; struct vfsmount *root = namespace->root;
LIST_HEAD(umount_list);
namespace->root = NULL; namespace->root = NULL;
spin_unlock(&vfsmount_lock); spin_unlock(&vfsmount_lock);
down_write(&namespace->sem); down_write(&namespace->sem);
spin_lock(&vfsmount_lock); spin_lock(&vfsmount_lock);
umount_tree(root); umount_tree(root, &umount_list);
spin_unlock(&vfsmount_lock); spin_unlock(&vfsmount_lock);
up_write(&namespace->sem); up_write(&namespace->sem);
release_mounts(&umount_list);
kfree(namespace); kfree(namespace);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment