Commit 52526076 authored by Tejun Heo's avatar Tejun Heo Committed by Greg Kroah-Hartman

memcg: relocate charge moving from ->attach to ->post_attach

commit 264a0ae1 upstream.

Hello,

So, this ended up a lot simpler than I originally expected.  I tested
it lightly and it seems to work fine.  Petr, can you please test these
two patches w/o the lru drain drop patch and see whether the problem
is gone?

Thanks.
------ 8< ------
If charge moving is used, memcg performs relabeling of the affected
pages from its ->attach callback which is called under both
cgroup_threadgroup_rwsem and thus can't create new kthreads.  This is
fragile as various operations may depend on workqueues making forward
progress which relies on the ability to create new kthreads.

There's no reason to perform charge moving from ->attach which is deep
in the task migration path.  Move it to ->post_attach which is called
after the actual migration is finished and cgroup_threadgroup_rwsem is
dropped.

* move_charge_struct->mm is added and ->can_attach is now responsible
  for pinning and recording the target mm.  mem_cgroup_clear_mc() is
  updated accordingly.  This also simplifies mem_cgroup_move_task().

* mem_cgroup_move_task() is now called from ->post_attach instead of
  ->attach.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarMichal Hocko <mhocko@kernel.org>
Debugged-and-tested-by: default avatarPetr Mladek <pmladek@suse.com>
Reported-by: default avatarCyril Hrubis <chrubis@suse.cz>
Reported-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Fixes: 1ed13287 ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem")
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent d5209747
......@@ -196,6 +196,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
/* "mc" and its members are protected by cgroup_mutex */
static struct move_charge_struct {
spinlock_t lock; /* for from, to */
struct mm_struct *mm;
struct mem_cgroup *from;
struct mem_cgroup *to;
unsigned long flags;
......@@ -4800,6 +4801,8 @@ static void __mem_cgroup_clear_mc(void)
static void mem_cgroup_clear_mc(void)
{
struct mm_struct *mm = mc.mm;
/*
* we must clear moving_task before waking up waiters at the end of
* task migration.
......@@ -4809,7 +4812,10 @@ static void mem_cgroup_clear_mc(void)
spin_lock(&mc.lock);
mc.from = NULL;
mc.to = NULL;
mc.mm = NULL;
spin_unlock(&mc.lock);
mmput(mm);
}
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
......@@ -4866,6 +4872,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
VM_BUG_ON(mc.moved_swap);
spin_lock(&mc.lock);
mc.mm = mm;
mc.from = from;
mc.to = memcg;
mc.flags = move_flags;
......@@ -4875,8 +4882,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
ret = mem_cgroup_precharge_mc(mm);
if (ret)
mem_cgroup_clear_mc();
}
} else {
mmput(mm);
}
return ret;
}
......@@ -4985,11 +4993,11 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
return ret;
}
static void mem_cgroup_move_charge(struct mm_struct *mm)
static void mem_cgroup_move_charge(void)
{
struct mm_walk mem_cgroup_move_charge_walk = {
.pmd_entry = mem_cgroup_move_charge_pte_range,
.mm = mm,
.mm = mc.mm,
};
lru_add_drain_all();
......@@ -5001,7 +5009,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
atomic_inc(&mc.from->moving_account);
synchronize_rcu();
retry:
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
/*
* Someone who are holding the mmap_sem might be waiting in
* waitq. So we cancel all extra charges, wake up all waiters,
......@@ -5018,23 +5026,16 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
* additional charge, the page walk just aborts.
*/
walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
up_read(&mm->mmap_sem);
up_read(&mc.mm->mmap_sem);
atomic_dec(&mc.from->moving_account);
}
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
static void mem_cgroup_move_task(void)
{
struct cgroup_subsys_state *css;
struct task_struct *p = cgroup_taskset_first(tset, &css);
struct mm_struct *mm = get_task_mm(p);
if (mm) {
if (mc.to)
mem_cgroup_move_charge(mm);
mmput(mm);
}
if (mc.to)
if (mc.to) {
mem_cgroup_move_charge();
mem_cgroup_clear_mc();
}
}
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
......@@ -5044,7 +5045,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
}
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
static void mem_cgroup_move_task(void)
{
}
#endif
......@@ -5258,7 +5259,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach,
.attach = mem_cgroup_move_task,
.post_attach = mem_cgroup_move_task,
.bind = mem_cgroup_bind,
.dfl_cftypes = memory_files,
.legacy_cftypes = mem_cgroup_legacy_files,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment