Commit 1d5be6b2 authored by Tejun Heo's avatar Tejun Heo

cgroup: move module ref handling into rebind_subsystems()

Module ref handling in cgroup is rather weird.
parse_cgroupfs_options() grabs all the modules for the specified
subsystems.  A module ref is kept if the specified subsystem is newly
bound to the hierarchy.  If not, or the operation fails, the refs are
dropped.  This scatters module ref handling across multiple functions
making it difficult to track.  It also make the function nasty to use
for dynamic subsystem binding which is necessary for the planned
unified hierarchy.

There's nothing which requires the subsystem modules to be pinned
between parse_cgroupfs_options() and rebind_subsystems() in both mount
and remount paths.  parse_cgroupfs_options() can just parse and
rebind_subsystems() can handle pinning the subsystems that it wants to
bind, which is a natural part of its task - binding - anyway.

Move module ref handling into rebind_subsystems() which makes the code
a lot simpler - modules are gotten iff it's gonna be bound and put iff
unbound or binding fails.

v2: Li pointed out that if a controller module is unloaded between
    parsing and binding, rebind_subsystems() won't notice the missing
    controller as it only iterates through existing controllers.  Fix
    it by updating rebind_subsystems() to compare @added_mask to
    @pinned and fail with -ENOENT if they don't match.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarLi Zefan <lizefan@huawei.com>
parent f172e67c
...@@ -1003,6 +1003,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, ...@@ -1003,6 +1003,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
{ {
struct cgroup *cgrp = &root->top_cgroup; struct cgroup *cgrp = &root->top_cgroup;
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
unsigned long pinned = 0;
int i, ret; int i, ret;
BUG_ON(!mutex_is_locked(&cgroup_mutex)); BUG_ON(!mutex_is_locked(&cgroup_mutex));
...@@ -1010,20 +1011,32 @@ static int rebind_subsystems(struct cgroupfs_root *root, ...@@ -1010,20 +1011,32 @@ static int rebind_subsystems(struct cgroupfs_root *root,
/* Check that any added subsystems are currently free */ /* Check that any added subsystems are currently free */
for_each_subsys(ss, i) { for_each_subsys(ss, i) {
unsigned long bit = 1UL << i; if (!(added_mask & (1 << i)))
if (!(bit & added_mask))
continue; continue;
/* is the subsystem mounted elsewhere? */
if (ss->root != &cgroup_dummy_root) { if (ss->root != &cgroup_dummy_root) {
/* Subsystem isn't free */ ret = -EBUSY;
return -EBUSY; goto out_put;
}
/* pin the module */
if (!try_module_get(ss->module)) {
ret = -ENOENT;
goto out_put;
} }
pinned |= 1 << i;
}
/* subsys could be missing if unloaded between parsing and here */
if (added_mask != pinned) {
ret = -ENOENT;
goto out_put;
} }
ret = cgroup_populate_dir(cgrp, added_mask); ret = cgroup_populate_dir(cgrp, added_mask);
if (ret) if (ret)
return ret; goto out_put;
/* /*
* Nothing can fail from this point on. Remove files for the * Nothing can fail from this point on. Remove files for the
...@@ -1067,11 +1080,6 @@ static int rebind_subsystems(struct cgroupfs_root *root, ...@@ -1067,11 +1080,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
} else if (bit & root->subsys_mask) { } else if (bit & root->subsys_mask) {
/* Subsystem state should already exist */ /* Subsystem state should already exist */
BUG_ON(!cgrp->subsys[i]); BUG_ON(!cgrp->subsys[i]);
/*
* a refcount was taken, but we already had one, so
* drop the extra reference.
*/
module_put(ss->module);
#ifdef CONFIG_MODULE_UNLOAD #ifdef CONFIG_MODULE_UNLOAD
BUG_ON(ss->module && !module_refcount(ss->module)); BUG_ON(ss->module && !module_refcount(ss->module));
#endif #endif
...@@ -1088,6 +1096,12 @@ static int rebind_subsystems(struct cgroupfs_root *root, ...@@ -1088,6 +1096,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
root->flags |= CGRP_ROOT_SUBSYS_BOUND; root->flags |= CGRP_ROOT_SUBSYS_BOUND;
return 0; return 0;
out_put:
for_each_subsys(ss, i)
if (pinned & (1 << i))
module_put(ss->module);
return ret;
} }
static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
...@@ -1138,7 +1152,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) ...@@ -1138,7 +1152,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
char *token, *o = data; char *token, *o = data;
bool all_ss = false, one_ss = false; bool all_ss = false, one_ss = false;
unsigned long mask = (unsigned long)-1; unsigned long mask = (unsigned long)-1;
bool module_pin_failed = false;
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
int i; int i;
...@@ -1281,52 +1294,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) ...@@ -1281,52 +1294,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
if (!opts->subsys_mask && !opts->name) if (!opts->subsys_mask && !opts->name)
return -EINVAL; return -EINVAL;
/*
* Grab references on all the modules we'll need, so the subsystems
* don't dance around before rebind_subsystems attaches them. This may
* take duplicate reference counts on a subsystem that's already used,
* but rebind_subsystems handles this case.
*/
for_each_subsys(ss, i) {
if (!(opts->subsys_mask & (1UL << i)))
continue;
if (!try_module_get(cgroup_subsys[i]->module)) {
module_pin_failed = true;
break;
}
}
if (module_pin_failed) {
/*
* oops, one of the modules was going away. this means that we
* raced with a module_delete call, and to the user this is
* essentially a "subsystem doesn't exist" case.
*/
for (i--; i >= 0; i--) {
/* drop refcounts only on the ones we took */
unsigned long bit = 1UL << i;
if (!(bit & opts->subsys_mask))
continue;
module_put(cgroup_subsys[i]->module);
}
return -ENOENT;
}
return 0; return 0;
} }
static void drop_parsed_module_refcounts(unsigned long subsys_mask)
{
struct cgroup_subsys *ss;
int i;
mutex_lock(&cgroup_mutex);
for_each_subsys(ss, i)
if (subsys_mask & (1UL << i))
module_put(cgroup_subsys[i]->module);
mutex_unlock(&cgroup_mutex);
}
static int cgroup_remount(struct super_block *sb, int *flags, char *data) static int cgroup_remount(struct super_block *sb, int *flags, char *data)
{ {
int ret = 0; int ret = 0;
...@@ -1384,8 +1354,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) ...@@ -1384,8 +1354,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
if (ret)
drop_parsed_module_refcounts(opts.subsys_mask);
return ret; return ret;
} }
...@@ -1591,7 +1559,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1591,7 +1559,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
new_root = cgroup_root_from_opts(&opts); new_root = cgroup_root_from_opts(&opts);
if (IS_ERR(new_root)) { if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root); ret = PTR_ERR(new_root);
goto drop_modules; goto out_err;
} }
opts.new_root = new_root; opts.new_root = new_root;
...@@ -1600,7 +1568,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1600,7 +1568,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
if (IS_ERR(sb)) { if (IS_ERR(sb)) {
ret = PTR_ERR(sb); ret = PTR_ERR(sb);
cgroup_free_root(opts.new_root); cgroup_free_root(opts.new_root);
goto drop_modules; goto out_err;
} }
root = sb->s_fs_info; root = sb->s_fs_info;
...@@ -1708,9 +1676,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1708,9 +1676,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n"); pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
} }
} }
/* no subsys rebinding, so refcounts don't change */
drop_parsed_module_refcounts(opts.subsys_mask);
} }
kfree(opts.release_agent); kfree(opts.release_agent);
...@@ -1728,8 +1693,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1728,8 +1693,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
drop_new_super: drop_new_super:
deactivate_locked_super(sb); deactivate_locked_super(sb);
drop_modules:
drop_parsed_module_refcounts(opts.subsys_mask);
out_err: out_err:
kfree(opts.release_agent); kfree(opts.release_agent);
kfree(opts.name); kfree(opts.name);
...@@ -4837,7 +4800,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) ...@@ -4837,7 +4800,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
/* /*
* we shouldn't be called if the subsystem is in use, and the use of * we shouldn't be called if the subsystem is in use, and the use of
* try_module_get in parse_cgroupfs_options should ensure that it * try_module_get() in rebind_subsystems() should ensure that it
* doesn't start being used while we're killing it off. * doesn't start being used while we're killing it off.
*/ */
BUG_ON(ss->root != &cgroup_dummy_root); BUG_ON(ss->root != &cgroup_dummy_root);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment