Commit 46a87b38 authored by Paul Turner's avatar Paul Turner Committed by Peter Zijlstra

sched/core: Distribute tasks within affinity masks

Currently, when updating the affinity of tasks via either cpusets.cpus,
or, sched_setaffinity(); tasks not currently running within the newly
specified mask will be arbitrarily assigned to the first CPU within the
mask.

This (particularly in the case that we are restricting masks) can
result in many tasks being assigned to the first CPUs of their new
masks.

This:
 1) Can induce scheduling delays while the load-balancer has a chance to
    spread them between their new CPUs.
 2) Can antogonize a poor load-balancer behavior where it has a
    difficult time recognizing that a cross-socket imbalance has been
    forced by an affinity mask.

This change adds a new cpumask interface to allow iterated calls to
distribute within the intersection of the provided masks.

The cases that this mainly affects are:
 - modifying cpuset.cpus
 - when tasks join a cpuset
 - when modifying a task's affinity via sched_setaffinity(2)
Signed-off-by: default avatarPaul Turner <pjt@google.com>
Signed-off-by: default avatarJosh Don <joshdon@google.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarQais Yousef <qais.yousef@arm.com>
Tested-by: default avatarQais Yousef <qais.yousef@arm.com>
Link: https://lkml.kernel.org/r/20200311010113.136465-1-joshdon@google.com
parent fe61468b
......@@ -194,6 +194,11 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
return 0;
}
static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p) {
return cpumask_next_and(-1, src1p, src2p);
}
#define for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_not(cpu, mask) \
......@@ -245,6 +250,8 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
unsigned int cpumask_local_spread(unsigned int i, int node);
int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p);
/**
* for_each_cpu - iterate over every cpu in a mask
......
......@@ -1650,7 +1650,12 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
if (cpumask_equal(p->cpus_ptr, new_mask))
goto out;
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
/*
* Picking a ~random cpu helps in cases where we are changing affinity
* for groups of tasks (ie. cpuset), so that load balancing is not
* immediately required to distribute the tasks within their new mask.
*/
dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask);
if (dest_cpu >= nr_cpu_ids) {
ret = -EINVAL;
goto out;
......
......@@ -232,3 +232,32 @@ unsigned int cpumask_local_spread(unsigned int i, int node)
BUG();
}
EXPORT_SYMBOL(cpumask_local_spread);
static DEFINE_PER_CPU(int, distribute_cpu_mask_prev);
/**
* Returns an arbitrary cpu within srcp1 & srcp2.
*
* Iterated calls using the same srcp1 and srcp2 will be distributed within
* their intersection.
*
* Returns >= nr_cpu_ids if the intersection is empty.
*/
int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p)
{
int next, prev;
/* NOTE: our first selection will skip 0. */
prev = __this_cpu_read(distribute_cpu_mask_prev);
next = cpumask_next_and(prev, src1p, src2p);
if (next >= nr_cpu_ids)
next = cpumask_first_and(src1p, src2p);
if (next < nr_cpu_ids)
__this_cpu_write(distribute_cpu_mask_prev, next);
return next;
}
EXPORT_SYMBOL(cpumask_any_and_distribute);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment