Commit 5406812e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "More change than I'd have liked at this stage.  The pids controller
  and the changes made to cgroup core to support it introduced and
  revealed several important issues.

   - Assigning membership to a newly created task and migrating it can
     race leading to incorrect accounting.  Oleg fixed it by widening
     threadgroup synchronization.  It looks like we'll be able to merge
     it with a different percpu rwsem which is used in fork path making
     things simpler and cheaper.

   - The recent change to extend cgroup membership to zombies (so that
     pid accounting can extend till the pid is actually released) missed
     pinning the underlying data structures leading to use-after-free.
     Fixed.

   - v2 hierarchy was calling subsystem callbacks with the wrong target
     cgroup_subsys_state based on the incorrect assumption that they
     share the same target.  pids is the first controller affected by
     this.  Subsys callbacks updated so that they can deal with
     multi-target migrations"

* 'for-4.4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup_pids: don't account for the root cgroup
  cgroup: fix handling of multi-destination migration from subtree_control enabling
  cgroup_freezer: simplify propagation of CGROUP_FROZEN clearing in freezer_attach()
  cgroup: pids: kill pids_fork(), simplify pids_can_fork() and pids_cancel_fork()
  cgroup: pids: fix race between cgroup_post_fork() and cgroup_migrate()
  cgroup: make css_set pin its css's to avoid use-afer-free
  cgroup: fix cftype->file_offset handling
parents 633bb738 0b98f0c0
...@@ -1127,15 +1127,15 @@ void blkcg_exit_queue(struct request_queue *q) ...@@ -1127,15 +1127,15 @@ void blkcg_exit_queue(struct request_queue *q)
* of the main cic data structures. For now we allow a task to change * of the main cic data structures. For now we allow a task to change
* its cgroup only if it's the only owner of its ioc. * its cgroup only if it's the only owner of its ioc.
*/ */
static int blkcg_can_attach(struct cgroup_subsys_state *css, static int blkcg_can_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct task_struct *task; struct task_struct *task;
struct cgroup_subsys_state *dst_css;
struct io_context *ioc; struct io_context *ioc;
int ret = 0; int ret = 0;
/* task_lock() is needed to avoid races with exit_io_context() */ /* task_lock() is needed to avoid races with exit_io_context() */
cgroup_taskset_for_each(task, tset) { cgroup_taskset_for_each(task, dst_css, tset) {
task_lock(task); task_lock(task);
ioc = task->io_context; ioc = task->io_context;
if (ioc && atomic_read(&ioc->nr_tasks) > 1) if (ioc && atomic_read(&ioc->nr_tasks) > 1)
......
...@@ -90,7 +90,6 @@ enum { ...@@ -90,7 +90,6 @@ enum {
*/ */
struct cgroup_file { struct cgroup_file {
/* do not access any fields from outside cgroup core */ /* do not access any fields from outside cgroup core */
struct list_head node; /* anchored at css->files */
struct kernfs_node *kn; struct kernfs_node *kn;
}; };
...@@ -134,9 +133,6 @@ struct cgroup_subsys_state { ...@@ -134,9 +133,6 @@ struct cgroup_subsys_state {
*/ */
u64 serial_nr; u64 serial_nr;
/* all cgroup_files associated with this css */
struct list_head files;
/* percpu_ref killing and RCU release */ /* percpu_ref killing and RCU release */
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct work_struct destroy_work; struct work_struct destroy_work;
...@@ -426,12 +422,9 @@ struct cgroup_subsys { ...@@ -426,12 +422,9 @@ struct cgroup_subsys {
void (*css_reset)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css);
void (*css_e_css_changed)(struct cgroup_subsys_state *css); void (*css_e_css_changed)(struct cgroup_subsys_state *css);
int (*can_attach)(struct cgroup_subsys_state *css, int (*can_attach)(struct cgroup_taskset *tset);
struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_subsys_state *css, void (*attach)(struct cgroup_taskset *tset);
struct cgroup_taskset *tset);
void (*attach)(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset);
int (*can_fork)(struct task_struct *task, void **priv_p); int (*can_fork)(struct task_struct *task, void **priv_p);
void (*cancel_fork)(struct task_struct *task, void *priv); void (*cancel_fork)(struct task_struct *task, void *priv);
void (*fork)(struct task_struct *task, void *priv); void (*fork)(struct task_struct *task, void *priv);
......
...@@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); ...@@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
...@@ -119,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state ...@@ -119,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state
struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *css); struct cgroup_subsys_state *css);
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); struct cgroup_subsys_state **dst_cssp);
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp);
void css_task_iter_start(struct cgroup_subsys_state *css, void css_task_iter_start(struct cgroup_subsys_state *css,
struct css_task_iter *it); struct css_task_iter *it);
...@@ -235,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it); ...@@ -235,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it);
/** /**
* cgroup_taskset_for_each - iterate cgroup_taskset * cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor * @task: the loop cursor
* @dst_css: the destination css
* @tset: taskset to iterate * @tset: taskset to iterate
* *
* @tset may contain multiple tasks and they may belong to multiple * @tset may contain multiple tasks and they may belong to multiple
* processes. When there are multiple tasks in @tset, if a task of a * processes.
* process is in @tset, all tasks of the process are in @tset. Also, all *
* are guaranteed to share the same source and destination csses. * On the v2 hierarchy, there may be tasks from multiple processes and they
* may not share the source or destination csses.
*
* On traditional hierarchies, when there are multiple tasks in @tset, if a
* task of a process is in @tset, all tasks of the process are in @tset.
* Also, all are guaranteed to share the same source and destination csses.
* *
* Iteration is not in any specific order. * Iteration is not in any specific order.
*/ */
#define cgroup_taskset_for_each(task, tset) \ #define cgroup_taskset_for_each(task, dst_css, tset) \
for ((task) = cgroup_taskset_first((tset)); (task); \ for ((task) = cgroup_taskset_first((tset), &(dst_css)); \
(task) = cgroup_taskset_next((tset))) (task); \
(task) = cgroup_taskset_next((tset), &(dst_css)))
/** /**
* cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
* @leader: the loop cursor * @leader: the loop cursor
* @dst_css: the destination css
* @tset: takset to iterate * @tset: takset to iterate
* *
* Iterate threadgroup leaders of @tset. For single-task migrations, @tset * Iterate threadgroup leaders of @tset. For single-task migrations, @tset
* may not contain any. * may not contain any.
*/ */
#define cgroup_taskset_for_each_leader(leader, tset) \ #define cgroup_taskset_for_each_leader(leader, dst_css, tset) \
for ((leader) = cgroup_taskset_first((tset)); (leader); \ for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \
(leader) = cgroup_taskset_next((tset))) \ (leader); \
(leader) = cgroup_taskset_next((tset), &(dst_css))) \
if ((leader) != (leader)->group_leader) \ if ((leader) != (leader)->group_leader) \
; \ ; \
else else
...@@ -516,19 +528,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) ...@@ -516,19 +528,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
pr_cont_kernfs_path(cgrp->kn); pr_cont_kernfs_path(cgrp->kn);
} }
/**
* cgroup_file_notify - generate a file modified event for a cgroup_file
* @cfile: target cgroup_file
*
* @cfile must have been obtained by setting cftype->file_offset.
*/
static inline void cgroup_file_notify(struct cgroup_file *cfile)
{
/* might not have been created due to one of the CFTYPE selector flags */
if (cfile->kn)
kernfs_notify(cfile->kn);
}
#else /* !CONFIG_CGROUPS */ #else /* !CONFIG_CGROUPS */
struct cgroup_subsys_state; struct cgroup_subsys_state;
......
...@@ -97,6 +97,12 @@ static DEFINE_SPINLOCK(css_set_lock); ...@@ -97,6 +97,12 @@ static DEFINE_SPINLOCK(css_set_lock);
*/ */
static DEFINE_SPINLOCK(cgroup_idr_lock); static DEFINE_SPINLOCK(cgroup_idr_lock);
/*
* Protects cgroup_file->kn for !self csses. It synchronizes notifications
* against file removal/re-creation across css hiding.
*/
static DEFINE_SPINLOCK(cgroup_file_kn_lock);
/* /*
* Protects cgroup_subsys->release_agent_path. Modifying it also requires * Protects cgroup_subsys->release_agent_path. Modifying it also requires
* cgroup_mutex. Reading requires either cgroup_mutex or this spinlock. * cgroup_mutex. Reading requires either cgroup_mutex or this spinlock.
...@@ -754,9 +760,11 @@ static void put_css_set_locked(struct css_set *cset) ...@@ -754,9 +760,11 @@ static void put_css_set_locked(struct css_set *cset)
if (!atomic_dec_and_test(&cset->refcount)) if (!atomic_dec_and_test(&cset->refcount))
return; return;
/* This css_set is dead. unlink it and release cgroup refcounts */ /* This css_set is dead. unlink it and release cgroup and css refs */
for_each_subsys(ss, ssid) for_each_subsys(ss, ssid) {
list_del(&cset->e_cset_node[ssid]); list_del(&cset->e_cset_node[ssid]);
css_put(cset->subsys[ssid]);
}
hash_del(&cset->hlist); hash_del(&cset->hlist);
css_set_count--; css_set_count--;
...@@ -1056,9 +1064,13 @@ static struct css_set *find_css_set(struct css_set *old_cset, ...@@ -1056,9 +1064,13 @@ static struct css_set *find_css_set(struct css_set *old_cset,
key = css_set_hash(cset->subsys); key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key); hash_add(css_set_table, &cset->hlist, key);
for_each_subsys(ss, ssid) for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cset->subsys[ssid];
list_add_tail(&cset->e_cset_node[ssid], list_add_tail(&cset->e_cset_node[ssid],
&cset->subsys[ssid]->cgroup->e_csets[ssid]); &css->cgroup->e_csets[ssid]);
css_get(css);
}
spin_unlock_bh(&css_set_lock); spin_unlock_bh(&css_set_lock);
...@@ -1393,6 +1405,16 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) ...@@ -1393,6 +1405,16 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
char name[CGROUP_FILE_NAME_MAX]; char name[CGROUP_FILE_NAME_MAX];
lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&cgroup_mutex);
if (cft->file_offset) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
struct cgroup_file *cfile = (void *)css + cft->file_offset;
spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = NULL;
spin_unlock_irq(&cgroup_file_kn_lock);
}
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
} }
...@@ -1856,7 +1878,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) ...@@ -1856,7 +1878,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->self.sibling); INIT_LIST_HEAD(&cgrp->self.sibling);
INIT_LIST_HEAD(&cgrp->self.children); INIT_LIST_HEAD(&cgrp->self.children);
INIT_LIST_HEAD(&cgrp->self.files);
INIT_LIST_HEAD(&cgrp->cset_links); INIT_LIST_HEAD(&cgrp->cset_links);
INIT_LIST_HEAD(&cgrp->pidlists); INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex); mutex_init(&cgrp->pidlist_mutex);
...@@ -2216,6 +2237,9 @@ struct cgroup_taskset { ...@@ -2216,6 +2237,9 @@ struct cgroup_taskset {
struct list_head src_csets; struct list_head src_csets;
struct list_head dst_csets; struct list_head dst_csets;
/* the subsys currently being processed */
int ssid;
/* /*
* Fields for cgroup_taskset_*() iteration. * Fields for cgroup_taskset_*() iteration.
* *
...@@ -2278,25 +2302,29 @@ static void cgroup_taskset_add(struct task_struct *task, ...@@ -2278,25 +2302,29 @@ static void cgroup_taskset_add(struct task_struct *task,
/** /**
* cgroup_taskset_first - reset taskset and return the first task * cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest * @tset: taskset of interest
* @dst_cssp: output variable for the destination css
* *
* @tset iteration is initialized and the first task is returned. * @tset iteration is initialized and the first task is returned.
*/ */
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset) struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp)
{ {
tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node); tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
tset->cur_task = NULL; tset->cur_task = NULL;
return cgroup_taskset_next(tset); return cgroup_taskset_next(tset, dst_cssp);
} }
/** /**
* cgroup_taskset_next - iterate to the next task in taskset * cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest * @tset: taskset of interest
* @dst_cssp: output variable for the destination css
* *
* Return the next task in @tset. Iteration must have been initialized * Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first(). * with cgroup_taskset_first().
*/ */
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp)
{ {
struct css_set *cset = tset->cur_cset; struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task; struct task_struct *task = tset->cur_task;
...@@ -2311,6 +2339,18 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) ...@@ -2311,6 +2339,18 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
if (&task->cg_list != &cset->mg_tasks) { if (&task->cg_list != &cset->mg_tasks) {
tset->cur_cset = cset; tset->cur_cset = cset;
tset->cur_task = task; tset->cur_task = task;
/*
* This function may be called both before and
* after cgroup_taskset_migrate(). The two cases
* can be distinguished by looking at whether @cset
* has its ->mg_dst_cset set.
*/
if (cset->mg_dst_cset)
*dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
else
*dst_cssp = cset->subsys[tset->ssid];
return task; return task;
} }
...@@ -2346,7 +2386,8 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, ...@@ -2346,7 +2386,8 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
/* check that we can legitimately attach to the cgroup */ /* check that we can legitimately attach to the cgroup */
for_each_e_css(css, i, dst_cgrp) { for_each_e_css(css, i, dst_cgrp) {
if (css->ss->can_attach) { if (css->ss->can_attach) {
ret = css->ss->can_attach(css, tset); tset->ssid = i;
ret = css->ss->can_attach(tset);
if (ret) { if (ret) {
failed_css = css; failed_css = css;
goto out_cancel_attach; goto out_cancel_attach;
...@@ -2379,9 +2420,12 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, ...@@ -2379,9 +2420,12 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
*/ */
tset->csets = &tset->dst_csets; tset->csets = &tset->dst_csets;
for_each_e_css(css, i, dst_cgrp) for_each_e_css(css, i, dst_cgrp) {
if (css->ss->attach) if (css->ss->attach) {
css->ss->attach(css, tset); tset->ssid = i;
css->ss->attach(tset);
}
}
ret = 0; ret = 0;
goto out_release_tset; goto out_release_tset;
...@@ -2390,8 +2434,10 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, ...@@ -2390,8 +2434,10 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
for_each_e_css(css, i, dst_cgrp) { for_each_e_css(css, i, dst_cgrp) {
if (css == failed_css) if (css == failed_css)
break; break;
if (css->ss->cancel_attach) if (css->ss->cancel_attach) {
css->ss->cancel_attach(css, tset); tset->ssid = i;
css->ss->cancel_attach(tset);
}
} }
out_release_tset: out_release_tset:
spin_lock_bh(&css_set_lock); spin_lock_bh(&css_set_lock);
...@@ -3313,9 +3359,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, ...@@ -3313,9 +3359,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
if (cft->file_offset) { if (cft->file_offset) {
struct cgroup_file *cfile = (void *)css + cft->file_offset; struct cgroup_file *cfile = (void *)css + cft->file_offset;
kernfs_get(kn); spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = kn; cfile->kn = kn;
list_add(&cfile->node, &css->files); spin_unlock_irq(&cgroup_file_kn_lock);
} }
return 0; return 0;
...@@ -3552,6 +3598,22 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) ...@@ -3552,6 +3598,22 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
return cgroup_add_cftypes(ss, cfts); return cgroup_add_cftypes(ss, cfts);
} }
/**
* cgroup_file_notify - generate a file modified event for a cgroup_file
* @cfile: target cgroup_file
*
* @cfile must have been obtained by setting cftype->file_offset.
*/
void cgroup_file_notify(struct cgroup_file *cfile)
{
unsigned long flags;
spin_lock_irqsave(&cgroup_file_kn_lock, flags);
if (cfile->kn)
kernfs_notify(cfile->kn);
spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
}
/** /**
* cgroup_task_count - count the number of tasks in a cgroup. * cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question * @cgrp: the cgroup in question
...@@ -4613,13 +4675,9 @@ static void css_free_work_fn(struct work_struct *work) ...@@ -4613,13 +4675,9 @@ static void css_free_work_fn(struct work_struct *work)
container_of(work, struct cgroup_subsys_state, destroy_work); container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss; struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup; struct cgroup *cgrp = css->cgroup;
struct cgroup_file *cfile;
percpu_ref_exit(&css->refcnt); percpu_ref_exit(&css->refcnt);
list_for_each_entry(cfile, &css->files, node)
kernfs_put(cfile->kn);
if (ss) { if (ss) {
/* css free path */ /* css free path */
int id = css->id; int id = css->id;
...@@ -4724,7 +4782,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css, ...@@ -4724,7 +4782,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
css->ss = ss; css->ss = ss;
INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->sibling);
INIT_LIST_HEAD(&css->children); INIT_LIST_HEAD(&css->children);
INIT_LIST_HEAD(&css->files);
css->serial_nr = css_serial_nr_next++; css->serial_nr = css_serial_nr_next++;
if (cgroup_parent(cgrp)) { if (cgroup_parent(cgrp)) {
......
...@@ -155,12 +155,10 @@ static void freezer_css_free(struct cgroup_subsys_state *css) ...@@ -155,12 +155,10 @@ static void freezer_css_free(struct cgroup_subsys_state *css)
* @freezer->lock. freezer_attach() makes the new tasks conform to the * @freezer->lock. freezer_attach() makes the new tasks conform to the
* current state and all following state changes can see the new tasks. * current state and all following state changes can see the new tasks.
*/ */
static void freezer_attach(struct cgroup_subsys_state *new_css, static void freezer_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct freezer *freezer = css_freezer(new_css);
struct task_struct *task; struct task_struct *task;
bool clear_frozen = false; struct cgroup_subsys_state *new_css;
mutex_lock(&freezer_mutex); mutex_lock(&freezer_mutex);
...@@ -174,20 +172,19 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, ...@@ -174,20 +172,19 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
* current state before executing the following - !frozen tasks may * current state before executing the following - !frozen tasks may
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one. * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
*/ */
cgroup_taskset_for_each(task, tset) { cgroup_taskset_for_each(task, new_css, tset) {
struct freezer *freezer = css_freezer(new_css);
if (!(freezer->state & CGROUP_FREEZING)) { if (!(freezer->state & CGROUP_FREEZING)) {
__thaw_task(task); __thaw_task(task);
} else { } else {
freeze_task(task); freeze_task(task);
/* clear FROZEN and propagate upwards */
while (freezer && (freezer->state & CGROUP_FROZEN)) {
freezer->state &= ~CGROUP_FROZEN; freezer->state &= ~CGROUP_FROZEN;
clear_frozen = true; freezer = parent_freezer(freezer);
} }
} }
/* propagate FROZEN clearing upwards */
while (clear_frozen && (freezer = parent_freezer(freezer))) {
freezer->state &= ~CGROUP_FROZEN;
clear_frozen = freezer->state & CGROUP_FREEZING;
} }
mutex_unlock(&freezer_mutex); mutex_unlock(&freezer_mutex);
......
...@@ -106,7 +106,7 @@ static void pids_uncharge(struct pids_cgroup *pids, int num) ...@@ -106,7 +106,7 @@ static void pids_uncharge(struct pids_cgroup *pids, int num)
{ {
struct pids_cgroup *p; struct pids_cgroup *p;
for (p = pids; p; p = parent_pids(p)) for (p = pids; parent_pids(p); p = parent_pids(p))
pids_cancel(p, num); pids_cancel(p, num);
} }
...@@ -123,7 +123,7 @@ static void pids_charge(struct pids_cgroup *pids, int num) ...@@ -123,7 +123,7 @@ static void pids_charge(struct pids_cgroup *pids, int num)
{ {
struct pids_cgroup *p; struct pids_cgroup *p;
for (p = pids; p; p = parent_pids(p)) for (p = pids; parent_pids(p); p = parent_pids(p))
atomic64_add(num, &p->counter); atomic64_add(num, &p->counter);
} }
...@@ -140,7 +140,7 @@ static int pids_try_charge(struct pids_cgroup *pids, int num) ...@@ -140,7 +140,7 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
{ {
struct pids_cgroup *p, *q; struct pids_cgroup *p, *q;
for (p = pids; p; p = parent_pids(p)) { for (p = pids; parent_pids(p); p = parent_pids(p)) {
int64_t new = atomic64_add_return(num, &p->counter); int64_t new = atomic64_add_return(num, &p->counter);
/* /*
...@@ -162,13 +162,13 @@ static int pids_try_charge(struct pids_cgroup *pids, int num) ...@@ -162,13 +162,13 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
return -EAGAIN; return -EAGAIN;
} }
static int pids_can_attach(struct cgroup_subsys_state *css, static int pids_can_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct pids_cgroup *pids = css_pids(css);
struct task_struct *task; struct task_struct *task;
struct cgroup_subsys_state *dst_css;
cgroup_taskset_for_each(task, tset) { cgroup_taskset_for_each(task, dst_css, tset) {
struct pids_cgroup *pids = css_pids(dst_css);
struct cgroup_subsys_state *old_css; struct cgroup_subsys_state *old_css;
struct pids_cgroup *old_pids; struct pids_cgroup *old_pids;
...@@ -187,13 +187,13 @@ static int pids_can_attach(struct cgroup_subsys_state *css, ...@@ -187,13 +187,13 @@ static int pids_can_attach(struct cgroup_subsys_state *css,
return 0; return 0;
} }
static void pids_cancel_attach(struct cgroup_subsys_state *css, static void pids_cancel_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct pids_cgroup *pids = css_pids(css);
struct task_struct *task; struct task_struct *task;
struct cgroup_subsys_state *dst_css;
cgroup_taskset_for_each(task, tset) { cgroup_taskset_for_each(task, dst_css, tset) {
struct pids_cgroup *pids = css_pids(dst_css);
struct cgroup_subsys_state *old_css; struct cgroup_subsys_state *old_css;
struct pids_cgroup *old_pids; struct pids_cgroup *old_pids;
...@@ -205,65 +205,28 @@ static void pids_cancel_attach(struct cgroup_subsys_state *css, ...@@ -205,65 +205,28 @@ static void pids_cancel_attach(struct cgroup_subsys_state *css,
} }
} }
/*
* task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
* on threadgroup_change_begin() held by the copy_process().
*/
static int pids_can_fork(struct task_struct *task, void **priv_p) static int pids_can_fork(struct task_struct *task, void **priv_p)
{ {
struct cgroup_subsys_state *css; struct cgroup_subsys_state *css;
struct pids_cgroup *pids; struct pids_cgroup *pids;
int err;
/* css = task_css_check(current, pids_cgrp_id, true);
* Use the "current" task_css for the pids subsystem as the tentative
* css. It is possible we will charge the wrong hierarchy, in which
* case we will forcefully revert/reapply the charge on the right
* hierarchy after it is committed to the task proper.
*/
css = task_get_css(current, pids_cgrp_id);
pids = css_pids(css); pids = css_pids(css);
return pids_try_charge(pids, 1);
err = pids_try_charge(pids, 1);
if (err)
goto err_css_put;
*priv_p = css;
return 0;
err_css_put:
css_put(css);
return err;
} }
static void pids_cancel_fork(struct task_struct *task, void *priv) static void pids_cancel_fork(struct task_struct *task, void *priv)
{
struct cgroup_subsys_state *css = priv;
struct pids_cgroup *pids = css_pids(css);
pids_uncharge(pids, 1);
css_put(css);
}
static void pids_fork(struct task_struct *task, void *priv)
{ {
struct cgroup_subsys_state *css; struct cgroup_subsys_state *css;
struct cgroup_subsys_state *old_css = priv;
struct pids_cgroup *pids; struct pids_cgroup *pids;
struct pids_cgroup *old_pids = css_pids(old_css);
css = task_get_css(task, pids_cgrp_id); css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css); pids = css_pids(css);
pids_uncharge(pids, 1);
/*
* If the association has changed, we have to revert and reapply the
* charge/uncharge on the wrong hierarchy to the current one. Since
* the association can only change due to an organisation event, its
* okay for us to ignore the limit in this case.
*/
if (pids != old_pids) {
pids_uncharge(old_pids, 1);
pids_charge(pids, 1);
}
css_put(css);
css_put(old_css);
} }
static void pids_free(struct task_struct *task) static void pids_free(struct task_struct *task)
...@@ -335,6 +298,7 @@ static struct cftype pids_files[] = { ...@@ -335,6 +298,7 @@ static struct cftype pids_files[] = {
{ {
.name = "current", .name = "current",
.read_s64 = pids_current_read, .read_s64 = pids_current_read,
.flags = CFTYPE_NOT_ON_ROOT,
}, },
{ } /* terminate */ { } /* terminate */
}; };
...@@ -346,7 +310,6 @@ struct cgroup_subsys pids_cgrp_subsys = { ...@@ -346,7 +310,6 @@ struct cgroup_subsys pids_cgrp_subsys = {
.cancel_attach = pids_cancel_attach, .cancel_attach = pids_cancel_attach,
.can_fork = pids_can_fork, .can_fork = pids_can_fork,
.cancel_fork = pids_cancel_fork, .cancel_fork = pids_cancel_fork,
.fork = pids_fork,
.free = pids_free, .free = pids_free,
.legacy_cftypes = pids_files, .legacy_cftypes = pids_files,
.dfl_cftypes = pids_files, .dfl_cftypes = pids_files,
......
...@@ -1429,15 +1429,16 @@ static int fmeter_getrate(struct fmeter *fmp) ...@@ -1429,15 +1429,16 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs; static struct cpuset *cpuset_attach_old_cs;
/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
static int cpuset_can_attach(struct cgroup_subsys_state *css, static int cpuset_can_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct cpuset *cs = css_cs(css); struct cgroup_subsys_state *css;
struct cpuset *cs;
struct task_struct *task; struct task_struct *task;
int ret; int ret;
/* used later by cpuset_attach() */ /* used later by cpuset_attach() */
cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset)); cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
cs = css_cs(css);
mutex_lock(&cpuset_mutex); mutex_lock(&cpuset_mutex);
...@@ -1447,7 +1448,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, ...@@ -1447,7 +1448,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
goto out_unlock; goto out_unlock;
cgroup_taskset_for_each(task, tset) { cgroup_taskset_for_each(task, css, tset) {
ret = task_can_attach(task, cs->cpus_allowed); ret = task_can_attach(task, cs->cpus_allowed);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
...@@ -1467,9 +1468,14 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, ...@@ -1467,9 +1468,14 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
return ret; return ret;
} }
static void cpuset_cancel_attach(struct cgroup_subsys_state *css, static void cpuset_cancel_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct cgroup_subsys_state *css;
struct cpuset *cs;
cgroup_taskset_first(tset, &css);
cs = css_cs(css);
mutex_lock(&cpuset_mutex); mutex_lock(&cpuset_mutex);
css_cs(css)->attach_in_progress--; css_cs(css)->attach_in_progress--;
mutex_unlock(&cpuset_mutex); mutex_unlock(&cpuset_mutex);
...@@ -1482,16 +1488,19 @@ static void cpuset_cancel_attach(struct cgroup_subsys_state *css, ...@@ -1482,16 +1488,19 @@ static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
*/ */
static cpumask_var_t cpus_attach; static cpumask_var_t cpus_attach;
static void cpuset_attach(struct cgroup_subsys_state *css, static void cpuset_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
/* static buf protected by cpuset_mutex */ /* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to; static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task; struct task_struct *task;
struct task_struct *leader; struct task_struct *leader;
struct cpuset *cs = css_cs(css); struct cgroup_subsys_state *css;
struct cpuset *cs;
struct cpuset *oldcs = cpuset_attach_old_cs; struct cpuset *oldcs = cpuset_attach_old_cs;
cgroup_taskset_first(tset, &css);
cs = css_cs(css);
mutex_lock(&cpuset_mutex); mutex_lock(&cpuset_mutex);
/* prepare for attach */ /* prepare for attach */
...@@ -1502,7 +1511,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, ...@@ -1502,7 +1511,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
guarantee_online_mems(cs, &cpuset_attach_nodemask_to); guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cgroup_taskset_for_each(task, tset) { cgroup_taskset_for_each(task, css, tset) {
/* /*
* can_attach beforehand should guarantee that this doesn't * can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here * fail. TODO: have a better way to handle failure here
...@@ -1518,7 +1527,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, ...@@ -1518,7 +1527,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
* sleep and should be moved outside migration path proper. * sleep and should be moved outside migration path proper.
*/ */
cpuset_attach_nodemask_to = cs->effective_mems; cpuset_attach_nodemask_to = cs->effective_mems;
cgroup_taskset_for_each_leader(leader, tset) { cgroup_taskset_for_each_leader(leader, css, tset) {
struct mm_struct *mm = get_task_mm(leader); struct mm_struct *mm = get_task_mm(leader);
if (mm) { if (mm) {
......
...@@ -9494,12 +9494,12 @@ static int __perf_cgroup_move(void *info) ...@@ -9494,12 +9494,12 @@ static int __perf_cgroup_move(void *info)
return 0; return 0;
} }
static void perf_cgroup_attach(struct cgroup_subsys_state *css, static void perf_cgroup_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct task_struct *task; struct task_struct *task;
struct cgroup_subsys_state *css;
cgroup_taskset_for_each(task, tset) cgroup_taskset_for_each(task, css, tset)
task_function_call(task, __perf_cgroup_move, task); task_function_call(task, __perf_cgroup_move, task);
} }
......
...@@ -1368,7 +1368,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1368,7 +1368,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->real_start_time = ktime_get_boot_ns(); p->real_start_time = ktime_get_boot_ns();
p->io_context = NULL; p->io_context = NULL;
p->audit_context = NULL; p->audit_context = NULL;
if (clone_flags & CLONE_THREAD)
threadgroup_change_begin(current); threadgroup_change_begin(current);
cgroup_fork(p); cgroup_fork(p);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
...@@ -1610,7 +1609,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1610,7 +1609,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
proc_fork_connector(p); proc_fork_connector(p);
cgroup_post_fork(p, cgrp_ss_priv); cgroup_post_fork(p, cgrp_ss_priv);
if (clone_flags & CLONE_THREAD)
threadgroup_change_end(current); threadgroup_change_end(current);
perf_event_fork(p); perf_event_fork(p);
...@@ -1652,7 +1650,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1652,7 +1650,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
mpol_put(p->mempolicy); mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock: bad_fork_cleanup_threadgroup_lock:
#endif #endif
if (clone_flags & CLONE_THREAD)
threadgroup_change_end(current); threadgroup_change_end(current);
delayacct_tsk_free(p); delayacct_tsk_free(p);
bad_fork_cleanup_count: bad_fork_cleanup_count:
......
...@@ -8241,12 +8241,12 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private) ...@@ -8241,12 +8241,12 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private)
sched_move_task(task); sched_move_task(task);
} }
static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct task_struct *task; struct task_struct *task;
struct cgroup_subsys_state *css;
cgroup_taskset_for_each(task, tset) { cgroup_taskset_for_each(task, css, tset) {
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
if (!sched_rt_can_attach(css_tg(css), task)) if (!sched_rt_can_attach(css_tg(css), task))
return -EINVAL; return -EINVAL;
...@@ -8259,12 +8259,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, ...@@ -8259,12 +8259,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
return 0; return 0;
} }
static void cpu_cgroup_attach(struct cgroup_subsys_state *css, static void cpu_cgroup_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct task_struct *task; struct task_struct *task;
struct cgroup_subsys_state *css;
cgroup_taskset_for_each(task, tset) cgroup_taskset_for_each(task, css, tset)
sched_move_task(task); sched_move_task(task);
} }
......
...@@ -4779,23 +4779,18 @@ static void mem_cgroup_clear_mc(void) ...@@ -4779,23 +4779,18 @@ static void mem_cgroup_clear_mc(void)
spin_unlock(&mc.lock); spin_unlock(&mc.lock);
} }
static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct cgroup_subsys_state *css;
struct mem_cgroup *memcg;
struct mem_cgroup *from; struct mem_cgroup *from;
struct task_struct *leader, *p; struct task_struct *leader, *p;
struct mm_struct *mm; struct mm_struct *mm;
unsigned long move_flags; unsigned long move_flags;
int ret = 0; int ret = 0;
/* /* charge immigration isn't supported on the default hierarchy */
* We are now commited to this value whatever it is. Changes in this if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
* tunable will only affect upcoming migrations, not the current one.
* So we need to save it, and keep it going.
*/
move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
if (!move_flags)
return 0; return 0;
/* /*
...@@ -4805,13 +4800,23 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, ...@@ -4805,13 +4800,23 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
* multiple. * multiple.
*/ */
p = NULL; p = NULL;
cgroup_taskset_for_each_leader(leader, tset) { cgroup_taskset_for_each_leader(leader, css, tset) {
WARN_ON_ONCE(p); WARN_ON_ONCE(p);
p = leader; p = leader;
memcg = mem_cgroup_from_css(css);
} }
if (!p) if (!p)
return 0; return 0;
/*
* We are now commited to this value whatever it is. Changes in this
* tunable will only affect upcoming migrations, not the current one.
* So we need to save it, and keep it going.
*/
move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
if (!move_flags)
return 0;
from = mem_cgroup_from_task(p); from = mem_cgroup_from_task(p);
VM_BUG_ON(from == memcg); VM_BUG_ON(from == memcg);
...@@ -4842,8 +4847,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, ...@@ -4842,8 +4847,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
return ret; return ret;
} }
static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css, static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
if (mc.to) if (mc.to)
mem_cgroup_clear_mc(); mem_cgroup_clear_mc();
...@@ -4985,10 +4989,10 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) ...@@ -4985,10 +4989,10 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
atomic_dec(&mc.from->moving_account); atomic_dec(&mc.from->moving_account);
} }
static void mem_cgroup_move_task(struct cgroup_subsys_state *css, static void mem_cgroup_move_task(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct task_struct *p = cgroup_taskset_first(tset); struct cgroup_subsys_state *css;
struct task_struct *p = cgroup_taskset_first(tset, &css);
struct mm_struct *mm = get_task_mm(p); struct mm_struct *mm = get_task_mm(p);
if (mm) { if (mm) {
...@@ -5000,17 +5004,14 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css, ...@@ -5000,17 +5004,14 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
mem_cgroup_clear_mc(); mem_cgroup_clear_mc();
} }
#else /* !CONFIG_MMU */ #else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
return 0; return 0;
} }
static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css, static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
} }
static void mem_cgroup_move_task(struct cgroup_subsys_state *css, static void mem_cgroup_move_task(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
} }
#endif #endif
......
...@@ -81,9 +81,11 @@ static void update_classid(struct cgroup_subsys_state *css, void *v) ...@@ -81,9 +81,11 @@ static void update_classid(struct cgroup_subsys_state *css, void *v)
css_task_iter_end(&it); css_task_iter_end(&it);
} }
static void cgrp_attach(struct cgroup_subsys_state *css, static void cgrp_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct cgroup_subsys_state *css;
cgroup_taskset_first(tset, &css);
update_classid(css, update_classid(css,
(void *)(unsigned long)css_cls_state(css)->classid); (void *)(unsigned long)css_cls_state(css)->classid);
} }
......
...@@ -218,13 +218,14 @@ static int update_netprio(const void *v, struct file *file, unsigned n) ...@@ -218,13 +218,14 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
return 0; return 0;
} }
static void net_prio_attach(struct cgroup_subsys_state *css, static void net_prio_attach(struct cgroup_taskset *tset)
struct cgroup_taskset *tset)
{ {
struct task_struct *p; struct task_struct *p;
struct cgroup_subsys_state *css;
cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->cgroup->id; void *v = (void *)(unsigned long)css->cgroup->id;
cgroup_taskset_for_each(p, tset) {
task_lock(p); task_lock(p);
iterate_fd(p->files, 0, update_netprio, v); iterate_fd(p->files, 0, update_netprio, v);
task_unlock(p); task_unlock(p);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment