Commit 09a503ea authored by Tejun Heo's avatar Tejun Heo

cgroup: decouple cgroup_subsys_state destruction from cgroup destruction

Currently, css (cgroup_subsys_state) lifetime is tied to that of the
associated cgroup.  css's are created when the associated cgroup is
created and destroyed when it gets destroyed.  Also, individual css's
aren't RCU protected but the whole cgroup is.  With the planned
unified hierarchy, css's will need to be dynamically created and
destroyed within the lifetime of a cgroup.

To enable such usages, this patch decouples css destruction from
cgroup destruction - offline_css() invocation and the final css_put()
are moved from cgroup_destroy_css_killed() to css_killed_work_fn().
Now each css is individually offlined and put as its reference count
is killed instead of waiting for all css's attached to the cgroup to
finish refcnt killing and then proceeding to offlining and putting
them together.

While this changes the order of destruction operations, the changes
shouldn't be noticeable to cgroup subsystems or userland.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarLi Zefan <lizefan@huawei.com>
parent f20104de
...@@ -82,7 +82,7 @@ struct cgroup_subsys_state { ...@@ -82,7 +82,7 @@ struct cgroup_subsys_state {
/* ID for this css, if possible */ /* ID for this css, if possible */
struct css_id __rcu *id; struct css_id __rcu *id;
/* Used to put @cgroup->dentry on the last css_put() */ /* percpu_ref killing and putting dentry on the last css_put() */
struct work_struct destroy_work; struct work_struct destroy_work;
}; };
......
...@@ -4355,6 +4355,7 @@ static void offline_css(struct cgroup_subsys_state *css) ...@@ -4355,6 +4355,7 @@ static void offline_css(struct cgroup_subsys_state *css)
ss->css_offline(css); ss->css_offline(css);
css->flags &= ~CSS_ONLINE; css->flags &= ~CSS_ONLINE;
css->cgroup->nr_css--;
} }
/* /*
...@@ -4558,15 +4559,30 @@ static void css_killed_work_fn(struct work_struct *work) ...@@ -4558,15 +4559,30 @@ static void css_killed_work_fn(struct work_struct *work)
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
/*
* css_tryget() is guaranteed to fail now. Tell subsystems to
* initate destruction.
*/
offline_css(css);
/* /*
* If @cgrp is marked dead, it's waiting for refs of all css's to * If @cgrp is marked dead, it's waiting for refs of all css's to
* be disabled before proceeding to the second phase of cgroup * be disabled before proceeding to the second phase of cgroup
* destruction. If we are the last one, kick it off. * destruction. If we are the last one, kick it off.
*/ */
if (!--cgrp->nr_css && cgroup_is_dead(cgrp)) if (!cgrp->nr_css && cgroup_is_dead(cgrp))
cgroup_destroy_css_killed(cgrp); cgroup_destroy_css_killed(cgrp);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
/*
* Put the css refs from kill_css(). Each css holds an extra
* reference to the cgroup's dentry and cgroup removal proceeds
* regardless of css refs. On the last put of each css, whenever
* that may be, the extra dentry ref is put so that dentry
* destruction happens only after all css's are released.
*/
css_put(css);
} }
/* css kill confirmation processing requires process context, bounce */ /* css kill confirmation processing requires process context, bounce */
...@@ -4633,11 +4649,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) ...@@ -4633,11 +4649,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
* as killed on all CPUs on return. * as killed on all CPUs on return.
* *
* Use percpu_ref_kill_and_confirm() to get notifications as each * Use percpu_ref_kill_and_confirm() to get notifications as each
* css is confirmed to be seen as killed on all CPUs. The * css is confirmed to be seen as killed on all CPUs.
* notification callback keeps track of the number of css's to be * cgroup_destroy_css_killed() will be invoked to perform the rest
* killed and invokes cgroup_destroy_css_killed() to perform the * of destruction once the percpu refs of all css's are confirmed
* rest of destruction once the percpu refs of all css's are * to be killed.
* confirmed to be killed.
*/ */
for_each_root_subsys(cgrp->root, ss) { for_each_root_subsys(cgrp->root, ss) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, ss->subsys_id); struct cgroup_subsys_state *css = cgroup_css(cgrp, ss->subsys_id);
...@@ -4704,36 +4719,17 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) ...@@ -4704,36 +4719,17 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
* @work: cgroup->destroy_free_work * @work: cgroup->destroy_free_work
* *
* This function is invoked from a work item for a cgroup which is being * This function is invoked from a work item for a cgroup which is being
* destroyed after the percpu refcnts of all css's are guaranteed to be * destroyed after all css's are offlined and performs the rest of
* seen as killed on all CPUs, and performs the rest of destruction. This * destruction. This is the second step of destruction described in the
* is the second step of destruction described in the comment above * comment above cgroup_destroy_locked().
* cgroup_destroy_locked().
*/ */
static void cgroup_destroy_css_killed(struct cgroup *cgrp) static void cgroup_destroy_css_killed(struct cgroup *cgrp)
{ {
struct cgroup *parent = cgrp->parent; struct cgroup *parent = cgrp->parent;
struct dentry *d = cgrp->dentry; struct dentry *d = cgrp->dentry;
struct cgroup_subsys *ss;
lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&cgroup_mutex);
/*
* css_tryget() is guaranteed to fail now. Tell subsystems to
* initate destruction.
*/
for_each_root_subsys(cgrp->root, ss)
offline_css(cgroup_css(cgrp, ss->subsys_id));
/*
* Put the css refs from cgroup_destroy_locked(). Each css holds
* an extra reference to the cgroup's dentry and cgroup removal
* proceeds regardless of css refs. On the last put of each css,
* whenever that may be, the extra dentry ref is put so that dentry
* destruction happens only after all css's are released.
*/
for_each_root_subsys(cgrp->root, ss)
css_put(cgroup_css(cgrp, ss->subsys_id));
/* delete this cgroup from parent->children */ /* delete this cgroup from parent->children */
list_del_rcu(&cgrp->sibling); list_del_rcu(&cgrp->sibling);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment