Commit 794fbce4 authored by Seth Forshee's avatar Seth Forshee Committed by Tim Gardner

UBUNTU: SAUCE: cgroup: Use a new super block when mounting in a cgroup namespace

BugLink: http://bugs.launchpad.net/bugs/1566505

Currently a new mount of an existing hierarchy always reuses the
original super block, even when the new mount is in a cgroup
namespace. This sometimes conflicts with the user namespace mount
support, which requires a new mount of an existing super block to
be in the same user namespace as the original mount. When
mounting from non-init cgroup and user namespaces sget() will
fail.

To fix this we can pass a pointer to the cgroup ns to kernfs when
mounting, causing kernfs_test_super() to no longer match super
blocks from different cgroup namespaces.

However we do wish to continue sharing the cgroup_root between
mounts of the same heirarchy. The cgroup_root's lifetime is
governed by the reference count of its cgrp member, but this is
a percpu reference count and is not well suited to this new
situation. Instead a new reference count is added to the
cgroup_root structure to track the number of super blocks sharing
that root, and this refcnt is used to determine when to put the
cgroup reference.
Signed-off-by: default avatarSeth Forshee <seth.forshee@canonical.com>
Signed-off-by: default avatarTim Gardner <tim.gardner@canonical.com>
parent 6d6a1681
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/percpu-refcount.h> #include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h> #include <linux/percpu-rwsem.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/kref.h>
#ifdef CONFIG_CGROUPS #ifdef CONFIG_CGROUPS
...@@ -301,6 +302,9 @@ struct cgroup { ...@@ -301,6 +302,9 @@ struct cgroup {
struct cgroup_root { struct cgroup_root {
struct kernfs_root *kf_root; struct kernfs_root *kf_root;
/* Reference count for superblocks sharing this cgroup_root */
struct kref kref;
/* The bitmask of subsystems attached to this hierarchy */ /* The bitmask of subsystems attached to this hierarchy */
unsigned int subsys_mask; unsigned int subsys_mask;
......
...@@ -1940,6 +1940,7 @@ static void init_cgroup_root(struct cgroup_root *root, ...@@ -1940,6 +1940,7 @@ static void init_cgroup_root(struct cgroup_root *root,
{ {
struct cgroup *cgrp = &root->cgrp; struct cgroup *cgrp = &root->cgrp;
kref_init(&root->kref);
INIT_LIST_HEAD(&root->root_list); INIT_LIST_HEAD(&root->root_list);
atomic_set(&root->nr_cgrps, 1); atomic_set(&root->nr_cgrps, 1);
cgrp->root = root; cgrp->root = root;
...@@ -2044,11 +2045,28 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask) ...@@ -2044,11 +2045,28 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
return ret; return ret;
} }
static void cgroup_release_root(struct kref *kref)
{
struct cgroup_root *root = container_of(kref, struct cgroup_root, kref);
/*
* If @root doesn't have any mounts or children, start killing it.
* This prevents new mounts by disabling percpu_ref_tryget_live().
* cgroup_mount() may wait for @root's release.
*
* And don't kill the default root.
*/
if (!list_empty(&root->cgrp.self.children) ||
root == &cgrp_dfl_root)
cgroup_put(&root->cgrp);
else
percpu_ref_kill(&root->cgrp.self.refcnt);
}
static struct dentry *cgroup_mount(struct file_system_type *fs_type, static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name, int flags, const char *unused_dev_name,
void *data) void *data)
{ {
struct super_block *pinned_sb = NULL;
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
struct cgroup_root *root; struct cgroup_root *root;
...@@ -2144,22 +2162,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -2144,22 +2162,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
/* /*
* We want to reuse @root whose lifetime is governed by its * We want to reuse @root whose lifetime is governed by its
* ->cgrp. Let's check whether @root is alive and keep it * refcnt. If the refcnt is already zero then it's too late;
* that way. As cgroup_kill_sb() can happen anytime, we * sleep a bit and retry. Otherwise we get a reference and
* want to block it by pinning the sb so that @root doesn't * can reuse the root.
* get killed before mount is complete.
*
* With the sb pinned, tryget_live can reliably indicate
* whether @root can be reused. If it's being killed,
* drain it. We can use wait_queue for the wait but this
* path is super cold. Let's just sleep a bit and retry.
*/ */
pinned_sb = kernfs_pin_sb(root->kf_root, NULL); if (!kref_get_unless_zero(&root->kref)) {
if (IS_ERR(pinned_sb) ||
!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
if (!IS_ERR_OR_NULL(pinned_sb))
deactivate_super(pinned_sb);
msleep(10); msleep(10);
ret = restart_syscall(); ret = restart_syscall();
goto out_free; goto out_free;
...@@ -2212,8 +2220,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -2212,8 +2220,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
return ERR_PTR(ret); return ERR_PTR(ret);
} }
dentry = kernfs_mount(fs_type, flags, root->kf_root, dentry = kernfs_mount_ns(fs_type, flags, root->kf_root,
CGROUP_SUPER_MAGIC, &new_sb); CGROUP_SUPER_MAGIC, &new_sb, ns);
/* /*
* In non-init cgroup namespace, instead of root cgroup's * In non-init cgroup namespace, instead of root cgroup's
...@@ -2237,17 +2245,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -2237,17 +2245,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
dentry = nsdentry; dentry = nsdentry;
} }
if (IS_ERR(dentry) || !new_sb)
cgroup_put(&root->cgrp);
/* /*
* If @pinned_sb, we're reusing an existing root and holding an * On failure put the cgroup_root. If this is the last reference
* extra ref on its sb. Mount is complete. Put the extra ref. * cgroup_release_root will put the cgroup.
*/ */
if (pinned_sb) { if (IS_ERR(dentry))
WARN_ON(new_sb); kref_put(&root->kref, cgroup_release_root);
deactivate_super(pinned_sb);
}
put_cgroup_ns(ns); put_cgroup_ns(ns);
return dentry; return dentry;
...@@ -2258,19 +2261,7 @@ static void cgroup_kill_sb(struct super_block *sb) ...@@ -2258,19 +2261,7 @@ static void cgroup_kill_sb(struct super_block *sb)
struct kernfs_root *kf_root = kernfs_root_from_sb(sb); struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
struct cgroup_root *root = cgroup_root_from_kf(kf_root); struct cgroup_root *root = cgroup_root_from_kf(kf_root);
/* kref_put(&root->kref, cgroup_release_root);
* If @root doesn't have any mounts or children, start killing it.
* This prevents new mounts by disabling percpu_ref_tryget_live().
* cgroup_mount() may wait for @root's release.
*
* And don't kill the default root.
*/
if (!list_empty(&root->cgrp.self.children) ||
root == &cgrp_dfl_root)
cgroup_put(&root->cgrp);
else
percpu_ref_kill(&root->cgrp.self.refcnt);
kernfs_kill_sb(sb); kernfs_kill_sb(sb);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment