mnt: Add a per mount namespace limit on the number of mounts

CAI Qian <caiqian@redhat.com> pointed out that the semantics of shared subtrees make it possible to create an exponentially increasing number of mounts in a mount namespace. mkdir /tmp/1 /tmp/2 mount --make-rshared / for i in $(seq 1 20) ; do mount --bind /tmp/1 /tmp/2 ; done Will create create 2^20 or 1048576 mounts, which is a practical problem as some people have managed to hit this by accident. As such CVE-2016-6213 was assigned. Ian Kent <raven@themaw.net> described the situation for autofs users as follows: > The number of mounts for direct mount maps is usually not very large because of > the way they are implemented, large direct mount maps can have performance > problems. There can be anywhere from a few (likely case a few hundred) to less > than 10000, plus mounts that have been triggered and not yet expired. > > Indirect mounts have one autofs mount at the root plus the number of mounts that > have been triggered and not yet expired. > > The number of autofs indirect map entries can range from a few to the common > case of several thousand and in rare cases up to between 30000 and 50000. I've > not heard of people with maps larger than 50000 entries. > > The larger the number of map entries the greater the possibility for a large > number of active mounts so it's not hard to expect cases of a 1000 or somewhat > more active mounts. So I am setting the default number of mounts allowed per mount namespace at 100,000. This is more than enough for any use case I know of, but small enough to quickly stop an exponential increase in mounts. Which should be perfect to catch misconfigurations and malfunctioning programs. For anyone who needs a higher limit this can be changed by writing to the new /proc/sys/fs/mount-max sysctl. Tested-by: CAI Qian <caiqian@redhat.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> CVE-2016-6213 (backported from commit d2921684) [ luis: adjust context ] Signed-off-by: Luis Henriques <luis.henriques@canonical.com> Acked-by: Seth Forshee <seth.forshee@canonical.com> Acked-by: Tim Gardner <tim.gardner@canonical.com>

mnt: Add a per mount namespace limit on the number of mounts
CAI Qian <caiqian@redhat.com> pointed out that the semantics of shared subtrees make it possible to create an exponentially increasing number of mounts in a mount namespace. mkdir /tmp/1 /tmp/2 mount --make-rshared / for i in $(seq 1 20) ; do mount --bind /tmp/1 /tmp/2 ; done Will create create 2^20 or 1048576 mounts, which is a practical problem as some people have managed to hit this by accident. As such CVE-2016-6213 was assigned. Ian Kent <raven@themaw.net> described the situation for autofs users as follows: > The number of mounts for direct mount maps is usually not very large because of > the way they are implemented, large direct mount maps can have performance > problems. There can be anywhere from a few (likely case a few hundred) to less > than 10000, plus mounts that have been triggered and not yet expired. > > Indirect mounts have one autofs mount at the root plus the number of mounts that > have been triggered and not yet expired. > > The number of autofs indirect map entries can range from a few to the common > case of several thousand and in rare cases up to between 30000 and 50000. I've > not heard of people with maps larger than 50000 entries. > > The larger the number of map entries the greater the possibility for a large > number of active mounts so it's not hard to expect cases of a 1000 or somewhat > more active mounts. So I am setting the default number of mounts allowed per mount namespace at 100,000. This is more than enough for any use case I know of, but small enough to quickly stop an exponential increase in mounts. Which should be perfect to catch misconfigurations and malfunctioning programs. For anyone who needs a higher limit this can be changed by writing to the new /proc/sys/fs/mount-max sysctl. Tested-by: CAI Qian <caiqian@redhat.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> CVE-2016-6213 (backported from commit d2921684) [ luis: adjust context ] Signed-off-by: Luis Henriques <luis.henriques@canonical.com> Acked-by: Seth Forshee <seth.forshee@canonical.com> Acked-by: Tim Gardner <tim.gardner@canonical.com>
fd4b5fa6 · Eric W. Biederman · Luis Henriques · d5d9494d · fd4b5fa6 · fd4b5fa6
Commit fd4b5fa6 authored Nov 16, 2016 by Eric W. Biederman Committed by Luis Henriques Dec 06, 2016
7 changed files
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -265,6 +265,13 @@ aio-nr can grow to.

 ==============================================================

+mount-max:
+
+This denotes the maximum number of mounts that may exist
+in a mount namespace.
+
+==============================================================
+

 2. /proc/sys/fs/binfmt_misc
 ----------------------------------------------------------

--- a/fs/mount.h
+++ b/fs/mount.h
@@ -13,6 +13,8 @@ struct mnt_namespace {
 	u64			seq;	/* Sequence number to prevent loops */
 	wait_queue_head_t poll;
 	u64 event;
+	unsigned int		mounts; /* # of mounts in the namespace */
+	unsigned int		pending_mounts;
 };

 struct mnt_pcp {

--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
 #include "pnode.h"
 #include "internal.h"

+/* Maximum number of mounts in a mount namespace */
+unsigned int sysctl_mount_max __read_mostly = 100000;
+
 static unsigned int m_hash_mask __read_mostly;
 static unsigned int m_hash_shift __read_mostly;
 static unsigned int mp_hash_mask __read_mostly;
@@ -900,6 +903,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)

 	list_splice(&head, n->list.prev);

+	n->mounts += n->pending_mounts;
+	n->pending_mounts = 0;
+
 	attach_shadowed(mnt, parent, shadows);
 	touch_mnt_namespace(n);
 }
@@ -1420,11 +1426,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 		propagate_umount(&tmp_list);

 	while (!list_empty(&tmp_list)) {
+		struct mnt_namespace *ns;
 		bool disconnect;
 		p = list_first_entry(&tmp_list, struct mount, mnt_list);
 		list_del_init(&p->mnt_expire);
 		list_del_init(&p->mnt_list);
-		__touch_mnt_namespace(p->mnt_ns);
+		ns = p->mnt_ns;
+		if (ns) {
+			ns->mounts--;
+			__touch_mnt_namespace(ns);
+		}
 		p->mnt_ns = NULL;
 		if (how & UMOUNT_SYNC)
 			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1834,6 +1845,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
 	return 0;
 }

+int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
+{
+	unsigned int max = READ_ONCE(sysctl_mount_max);
+	unsigned int mounts = 0, old, pending, sum;
+	struct mount *p;
+
+	for (p = mnt; p; p = next_mnt(p, mnt))
+		mounts++;
+
+	old = ns->mounts;
+	pending = ns->pending_mounts;
+	sum = old + pending;
+	if ((old > sum) ||
+	    (pending > sum) ||
+	    (max < sum) ||
+	    (mounts > (max - sum)))
+		return -ENOSPC;
+
+	ns->pending_mounts = pending + mounts;
+	return 0;
+}
+
 /*
 *  @source_mnt : mount tree to be attached
 *  @nd         : place the mount tree @source_mnt is attached
@@ -1903,10 +1936,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 			struct path *parent_path)
 {
 	HLIST_HEAD(tree_list);
+	struct mnt_namespace *ns = dest_mnt->mnt_ns;
 	struct mount *child, *p;
 	struct hlist_node *n;
 	int err;

+	/* Is there space to add these mounts to the mount namespace? */
+	if (!parent_path) {
+		err = count_mounts(ns, source_mnt);
+		if (err)
+			goto out;
+	}
+
 	if (IS_MNT_SHARED(dest_mnt)) {
 		err = invent_group_ids(source_mnt, true);
 		if (err)
@@ -1943,11 +1984,13 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 out_cleanup_ids:
 	while (!hlist_empty(&tree_list)) {
 		child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+		child->mnt_parent->mnt_ns->pending_mounts = 0;
 		umount_tree(child, UMOUNT_SYNC);
 	}
 	unlock_mount_hash();
 	cleanup_group_ids(source_mnt, NULL);
 out:
+	ns->pending_mounts = 0;
 	return err;
 }

@@ -2774,6 +2817,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	init_waitqueue_head(&new_ns->poll);
 	new_ns->event = 0;
 	new_ns->user_ns = get_user_ns(user_ns);
+	new_ns->mounts = 0;
+	new_ns->pending_mounts = 0;
 	return new_ns;
 }

@@ -2823,6 +2868,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	q = new;
 	while (p) {
 		q->mnt_ns = new_ns;
+		new_ns->mounts++;
 		if (new_fs) {
 			if (&p->mnt == new_fs->root.mnt) {
 				new_fs->root.mnt = mntget(&q->mnt);
@@ -2861,6 +2907,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
 		struct mount *mnt = real_mount(m);
 		mnt->mnt_ns = new_ns;
 		new_ns->root = mnt;
+		new_ns->mounts++;
 		list_add(&mnt->mnt_list, &new_ns->list);
 	} else {
 		mntput(m);

--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -259,7 +259,7 @@ static int propagate_one(struct mount *m)
 		read_sequnlock_excl(&mount_lock);
 	}
 	hlist_add_head(&child->mnt_hash, list);
-	return 0;
+	return count_mounts(m->mnt_ns, child);
 }

 /*

--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -52,4 +52,5 @@ void mnt_set_mountpoint(struct mount *, struct mountpoint *,
 struct mount *copy_tree(struct mount *, struct dentry *, int);
 bool is_path_reachable(struct mount *, struct dentry *,
 			 const struct path *root);
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
 #endif /* _LINUX_PNODE_H */
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -96,4 +96,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts);

 extern dev_t name_to_dev_t(const char *name);

+extern unsigned int sysctl_mount_max;
+
 #endif /* _LINUX_MOUNT_H */
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -66,6 +66,7 @@
 #include <linux/kexec.h>
 #include <linux/bpf.h>
 #include <linux/efi.h>
+#include <linux/mount.h>

 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -1792,6 +1793,14 @@ static struct ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+	{
+		.procname	= "mount-max",
+		.data		= &sysctl_mount_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+	},
 	{ }
 };