Commit ffda12a1 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched: optimize group load balancer

I noticed that tg_shares_up() unconditionally takes rq-locks for all cpus
in the sched_domain. This hurts.

We need the rq-locks whenever we change the weight of the per-cpu group sched
entities. To allevate this a little, only change the weight when the new
weight is at least shares_thresh away from the old value.

This avoids the rq-lock for the top level entries, since those will never
be re-weighted, and fuzzes the lower level entries a little to gain performance
in semi-stable situations.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent b0aa51b9
...@@ -1621,6 +1621,7 @@ extern unsigned int sysctl_sched_features; ...@@ -1621,6 +1621,7 @@ extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_shares_ratelimit; extern unsigned int sysctl_sched_shares_ratelimit;
extern unsigned int sysctl_sched_shares_thresh;
int sched_nr_latency_handler(struct ctl_table *table, int write, int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length, struct file *file, void __user *buffer, size_t *length,
......
...@@ -817,6 +817,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; ...@@ -817,6 +817,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
*/ */
unsigned int sysctl_sched_shares_ratelimit = 250000; unsigned int sysctl_sched_shares_ratelimit = 250000;
/*
* Inject some fuzzyness into changing the per-cpu group shares
* this avoids remote rq-locks at the expense of fairness.
* default: 4
*/
unsigned int sysctl_sched_shares_thresh = 4;
/* /*
* period over which we measure -rt task cpu usage in us. * period over which we measure -rt task cpu usage in us.
* default: 1s * default: 1s
...@@ -1453,7 +1460,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); ...@@ -1453,7 +1460,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
* Calculate and set the cpu's group shares. * Calculate and set the cpu's group shares.
*/ */
static void static void
__update_group_shares_cpu(struct task_group *tg, int cpu, update_group_shares_cpu(struct task_group *tg, int cpu,
unsigned long sd_shares, unsigned long sd_rq_weight) unsigned long sd_shares, unsigned long sd_rq_weight)
{ {
int boost = 0; int boost = 0;
...@@ -1485,19 +1492,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, ...@@ -1485,19 +1492,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
* *
*/ */
shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
if (abs(shares - tg->se[cpu]->load.weight) >
sysctl_sched_shares_thresh) {
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
spin_lock_irqsave(&rq->lock, flags);
/* /*
* record the actual number of shares, not the boosted amount. * record the actual number of shares, not the boosted amount.
*/ */
tg->cfs_rq[cpu]->shares = boost ? 0 : shares; tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
tg->cfs_rq[cpu]->rq_weight = rq_weight; tg->cfs_rq[cpu]->rq_weight = rq_weight;
if (shares < MIN_SHARES)
shares = MIN_SHARES;
else if (shares > MAX_SHARES)
shares = MAX_SHARES;
__set_se_shares(tg->se[cpu], shares); __set_se_shares(tg->se[cpu], shares);
spin_unlock_irqrestore(&rq->lock, flags);
}
} }
/* /*
...@@ -1526,14 +1537,8 @@ static int tg_shares_up(struct task_group *tg, void *data) ...@@ -1526,14 +1537,8 @@ static int tg_shares_up(struct task_group *tg, void *data)
if (!rq_weight) if (!rq_weight)
rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
for_each_cpu_mask(i, sd->span) { for_each_cpu_mask(i, sd->span)
struct rq *rq = cpu_rq(i); update_group_shares_cpu(tg, i, shares, rq_weight);
unsigned long flags;
spin_lock_irqsave(&rq->lock, flags);
__update_group_shares_cpu(tg, i, shares, rq_weight);
spin_unlock_irqrestore(&rq->lock, flags);
}
return 0; return 0;
} }
......
...@@ -274,6 +274,16 @@ static struct ctl_table kern_table[] = { ...@@ -274,6 +274,16 @@ static struct ctl_table kern_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_shares_thresh",
.data = &sysctl_sched_shares_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
.procname = "sched_child_runs_first", .procname = "sched_child_runs_first",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment