Commit 794fbce4 authored by Seth Forshee's avatar Seth Forshee Committed by Tim Gardner

UBUNTU: SAUCE: cgroup: Use a new super block when mounting in a cgroup namespace

BugLink: http://bugs.launchpad.net/bugs/1566505

Currently a new mount of an existing hierarchy always reuses the
original super block, even when the new mount is in a cgroup
namespace. This sometimes conflicts with the user namespace mount
support, which requires a new mount of an existing super block to
be in the same user namespace as the original mount. When
mounting from non-init cgroup and user namespaces sget() will
fail.

To fix this we can pass a pointer to the cgroup ns to kernfs when
mounting, causing kernfs_test_super() to no longer match super
blocks from different cgroup namespaces.

However we do wish to continue sharing the cgroup_root between
mounts of the same heirarchy. The cgroup_root's lifetime is
governed by the reference count of its cgrp member, but this is
a percpu reference count and is not well suited to this new
situation. Instead a new reference count is added to the
cgroup_root structure to track the number of super blocks sharing
that root, and this refcnt is used to determine when to put the
cgroup reference.
Signed-off-by: default avatarSeth Forshee <seth.forshee@canonical.com>
Signed-off-by: default avatarTim Gardner <tim.gardner@canonical.com>
parent 6d6a1681
......@@ -16,6 +16,7 @@
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
#ifdef CONFIG_CGROUPS
......@@ -301,6 +302,9 @@ struct cgroup {
struct cgroup_root {
struct kernfs_root *kf_root;
/* Reference count for superblocks sharing this cgroup_root */
struct kref kref;
/* The bitmask of subsystems attached to this hierarchy */
unsigned int subsys_mask;
......
......@@ -1940,6 +1940,7 @@ static void init_cgroup_root(struct cgroup_root *root,
{
struct cgroup *cgrp = &root->cgrp;
kref_init(&root->kref);
INIT_LIST_HEAD(&root->root_list);
atomic_set(&root->nr_cgrps, 1);
cgrp->root = root;
......@@ -2044,11 +2045,28 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
return ret;
}
static void cgroup_release_root(struct kref *kref)
{
struct cgroup_root *root = container_of(kref, struct cgroup_root, kref);
/*
* If @root doesn't have any mounts or children, start killing it.
* This prevents new mounts by disabling percpu_ref_tryget_live().
* cgroup_mount() may wait for @root's release.
*
* And don't kill the default root.
*/
if (!list_empty(&root->cgrp.self.children) ||
root == &cgrp_dfl_root)
cgroup_put(&root->cgrp);
else
percpu_ref_kill(&root->cgrp.self.refcnt);
}
static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
{
struct super_block *pinned_sb = NULL;
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup_subsys *ss;
struct cgroup_root *root;
......@@ -2144,22 +2162,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
/*
* We want to reuse @root whose lifetime is governed by its
* ->cgrp. Let's check whether @root is alive and keep it
* that way. As cgroup_kill_sb() can happen anytime, we
* want to block it by pinning the sb so that @root doesn't
* get killed before mount is complete.
*
* With the sb pinned, tryget_live can reliably indicate
* whether @root can be reused. If it's being killed,
* drain it. We can use wait_queue for the wait but this
* path is super cold. Let's just sleep a bit and retry.
* refcnt. If the refcnt is already zero then it's too late;
* sleep a bit and retry. Otherwise we get a reference and
* can reuse the root.
*/
pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
if (IS_ERR(pinned_sb) ||
!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
if (!kref_get_unless_zero(&root->kref)) {
mutex_unlock(&cgroup_mutex);
if (!IS_ERR_OR_NULL(pinned_sb))
deactivate_super(pinned_sb);
msleep(10);
ret = restart_syscall();
goto out_free;
......@@ -2212,8 +2220,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
return ERR_PTR(ret);
}
dentry = kernfs_mount(fs_type, flags, root->kf_root,
CGROUP_SUPER_MAGIC, &new_sb);
dentry = kernfs_mount_ns(fs_type, flags, root->kf_root,
CGROUP_SUPER_MAGIC, &new_sb, ns);
/*
* In non-init cgroup namespace, instead of root cgroup's
......@@ -2237,17 +2245,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
dentry = nsdentry;
}
if (IS_ERR(dentry) || !new_sb)
cgroup_put(&root->cgrp);
/*
* If @pinned_sb, we're reusing an existing root and holding an
* extra ref on its sb. Mount is complete. Put the extra ref.
* On failure put the cgroup_root. If this is the last reference
* cgroup_release_root will put the cgroup.
*/
if (pinned_sb) {
WARN_ON(new_sb);
deactivate_super(pinned_sb);
}
if (IS_ERR(dentry))
kref_put(&root->kref, cgroup_release_root);
put_cgroup_ns(ns);
return dentry;
......@@ -2258,19 +2261,7 @@ static void cgroup_kill_sb(struct super_block *sb)
struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
/*
* If @root doesn't have any mounts or children, start killing it.
* This prevents new mounts by disabling percpu_ref_tryget_live().
* cgroup_mount() may wait for @root's release.
*
* And don't kill the default root.
*/
if (!list_empty(&root->cgrp.self.children) ||
root == &cgrp_dfl_root)
cgroup_put(&root->cgrp);
else
percpu_ref_kill(&root->cgrp.self.refcnt);
kref_put(&root->kref, cgroup_release_root);
kernfs_kill_sb(sb);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment