Commit 33ad801d authored by Li Zefan's avatar Li Zefan Committed by Tejun Heo

cpuset: record old_mems_allowed in struct cpuset

When we update a cpuset's mems_allowed and thus update tasks'
mems_allowed, it's required to pass the old mems_allowed and new
mems_allowed to cpuset_migrate_mm().

Currently we save old mems_allowed in a temp local variable before
changing cpuset->mems_allowed. This patch changes it by saving
old mems_allowed in cpuset->old_mems_allowed.

This currently won't change any behavior, but it will later allow
us to keep tasks in empty cpusets.

v3: restored "cpuset_attach_nodemask_to = cs->mems_allowed"
Signed-off-by: default avatarLi Zefan <lizefan@huawei.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 388afd85
...@@ -88,6 +88,18 @@ struct cpuset { ...@@ -88,6 +88,18 @@ struct cpuset {
cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
/*
* This is old Memory Nodes tasks took on.
*
* - top_cpuset.old_mems_allowed is initialized to mems_allowed.
* - A new cpuset's old_mems_allowed is initialized when some
* task is moved into it.
* - old_mems_allowed is used in cpuset_migrate_mm() when we change
* cpuset.mems_allowed and have tasks' nodemask updated, and
* then old_mems_allowed is updated to mems_allowed.
*/
nodemask_t old_mems_allowed;
struct fmeter fmeter; /* memory_pressure filter */ struct fmeter fmeter; /* memory_pressure filter */
/* /*
...@@ -972,16 +984,12 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, ...@@ -972,16 +984,12 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
static void cpuset_change_nodemask(struct task_struct *p, static void cpuset_change_nodemask(struct task_struct *p,
struct cgroup_scanner *scan) struct cgroup_scanner *scan)
{ {
struct cpuset *cs = cgroup_cs(scan->cg);
struct mm_struct *mm; struct mm_struct *mm;
struct cpuset *cs;
int migrate; int migrate;
const nodemask_t *oldmem = scan->data; nodemask_t *newmems = scan->data;
static nodemask_t newmems; /* protected by cpuset_mutex */
cs = cgroup_cs(scan->cg);
guarantee_online_mems(cs, &newmems);
cpuset_change_task_nodemask(p, &newmems); cpuset_change_task_nodemask(p, newmems);
mm = get_task_mm(p); mm = get_task_mm(p);
if (!mm) if (!mm)
...@@ -991,7 +999,7 @@ static void cpuset_change_nodemask(struct task_struct *p, ...@@ -991,7 +999,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
mpol_rebind_mm(mm, &cs->mems_allowed); mpol_rebind_mm(mm, &cs->mems_allowed);
if (migrate) if (migrate)
cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed); cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems);
mmput(mm); mmput(mm);
} }
...@@ -1000,25 +1008,26 @@ static void *cpuset_being_rebound; ...@@ -1000,25 +1008,26 @@ static void *cpuset_being_rebound;
/** /**
* update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
* @cs: the cpuset in which each task's mems_allowed mask needs to be changed * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
* @oldmem: old mems_allowed of cpuset cs
* @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
* *
* Called with cpuset_mutex held * Called with cpuset_mutex held
* No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
* if @heap != NULL. * if @heap != NULL.
*/ */
static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
struct ptr_heap *heap)
{ {
static nodemask_t newmems; /* protected by cpuset_mutex */
struct cgroup_scanner scan; struct cgroup_scanner scan;
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
guarantee_online_mems(cs, &newmems);
scan.cg = cs->css.cgroup; scan.cg = cs->css.cgroup;
scan.test_task = NULL; scan.test_task = NULL;
scan.process_task = cpuset_change_nodemask; scan.process_task = cpuset_change_nodemask;
scan.heap = heap; scan.heap = heap;
scan.data = (nodemask_t *)oldmem; scan.data = &newmems;
/* /*
* The mpol_rebind_mm() call takes mmap_sem, which we couldn't * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
...@@ -1032,6 +1041,12 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, ...@@ -1032,6 +1041,12 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
*/ */
cgroup_scan_tasks(&scan); cgroup_scan_tasks(&scan);
/*
* All the tasks' nodemasks have been updated, update
* cs->old_mems_allowed.
*/
cs->old_mems_allowed = newmems;
/* We're done rebinding vmas to this cpuset's new mems_allowed. */ /* We're done rebinding vmas to this cpuset's new mems_allowed. */
cpuset_being_rebound = NULL; cpuset_being_rebound = NULL;
} }
...@@ -1052,13 +1067,9 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, ...@@ -1052,13 +1067,9 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
const char *buf) const char *buf)
{ {
NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL);
int retval; int retval;
struct ptr_heap heap; struct ptr_heap heap;
if (!oldmem)
return -ENOMEM;
/* /*
* top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
* it's read-only * it's read-only
...@@ -1087,8 +1098,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1087,8 +1098,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
goto done; goto done;
} }
} }
*oldmem = cs->mems_allowed;
if (nodes_equal(*oldmem, trialcs->mems_allowed)) { if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
retval = 0; /* Too easy - nothing to do */ retval = 0; /* Too easy - nothing to do */
goto done; goto done;
} }
...@@ -1104,11 +1115,10 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1104,11 +1115,10 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
cs->mems_allowed = trialcs->mems_allowed; cs->mems_allowed = trialcs->mems_allowed;
mutex_unlock(&callback_mutex); mutex_unlock(&callback_mutex);
update_tasks_nodemask(cs, oldmem, &heap); update_tasks_nodemask(cs, &heap);
heap_free(&heap); heap_free(&heap);
done: done:
NODEMASK_FREE(oldmem);
return retval; return retval;
} }
...@@ -1431,6 +1441,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) ...@@ -1431,6 +1441,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
mmput(mm); mmput(mm);
} }
cs->old_mems_allowed = cpuset_attach_nodemask_to;
cs->attach_in_progress--; cs->attach_in_progress--;
if (!cs->attach_in_progress) if (!cs->attach_in_progress)
wake_up(&cpuset_attach_wq); wake_up(&cpuset_attach_wq);
...@@ -1985,7 +1997,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) ...@@ -1985,7 +1997,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
static void cpuset_hotplug_update_tasks(struct cpuset *cs) static void cpuset_hotplug_update_tasks(struct cpuset *cs)
{ {
static cpumask_t off_cpus; static cpumask_t off_cpus;
static nodemask_t off_mems, tmp_mems; static nodemask_t off_mems;
bool is_empty; bool is_empty;
retry: retry:
...@@ -2015,11 +2027,10 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) ...@@ -2015,11 +2027,10 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs)
/* remove offline mems from @cs */ /* remove offline mems from @cs */
if (!nodes_empty(off_mems)) { if (!nodes_empty(off_mems)) {
tmp_mems = cs->mems_allowed;
mutex_lock(&callback_mutex); mutex_lock(&callback_mutex);
nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems);
mutex_unlock(&callback_mutex); mutex_unlock(&callback_mutex);
update_tasks_nodemask(cs, &tmp_mems, NULL); update_tasks_nodemask(cs, NULL);
} }
is_empty = cpumask_empty(cs->cpus_allowed) || is_empty = cpumask_empty(cs->cpus_allowed) ||
...@@ -2083,11 +2094,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -2083,11 +2094,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
/* synchronize mems_allowed to N_MEMORY */ /* synchronize mems_allowed to N_MEMORY */
if (mems_updated) { if (mems_updated) {
tmp_mems = top_cpuset.mems_allowed;
mutex_lock(&callback_mutex); mutex_lock(&callback_mutex);
top_cpuset.mems_allowed = new_mems; top_cpuset.mems_allowed = new_mems;
mutex_unlock(&callback_mutex); mutex_unlock(&callback_mutex);
update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL); update_tasks_nodemask(&top_cpuset, NULL);
} }
mutex_unlock(&cpuset_mutex); mutex_unlock(&cpuset_mutex);
...@@ -2158,6 +2168,7 @@ void __init cpuset_init_smp(void) ...@@ -2158,6 +2168,7 @@ void __init cpuset_init_smp(void)
{ {
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
top_cpuset.mems_allowed = node_states[N_MEMORY]; top_cpuset.mems_allowed = node_states[N_MEMORY];
top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
register_hotmemory_notifier(&cpuset_track_online_nodes_nb); register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment