Commit e017b4db authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
 "This includes a fix for the add_wait_queue() queue ordering brown
  paperbag bug, plus PELT accounting fixes for cgroups scheduling
  artifacts"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/fair: Update and fix the runnable propagation rule
  sched/wait: Fix add_wait_queue() behavioral change
parents 1c764725 a4c3c049
...@@ -3413,9 +3413,9 @@ void set_task_rq_fair(struct sched_entity *se, ...@@ -3413,9 +3413,9 @@ void set_task_rq_fair(struct sched_entity *se,
* _IFF_ we look at the pure running and runnable sums. Because they * _IFF_ we look at the pure running and runnable sums. Because they
* represent the very same entity, just at different points in the hierarchy. * represent the very same entity, just at different points in the hierarchy.
* *
* * Per the above update_tg_cfs_util() is trivial and simply copies the running
* Per the above update_tg_cfs_util() is trivial (and still 'wrong') and * sum over (but still wrong, because the group entity and group rq do not have
* simply copies the running sum over. * their PELT windows aligned).
* *
* However, update_tg_cfs_runnable() is more complex. So we have: * However, update_tg_cfs_runnable() is more complex. So we have:
* *
...@@ -3424,11 +3424,11 @@ void set_task_rq_fair(struct sched_entity *se, ...@@ -3424,11 +3424,11 @@ void set_task_rq_fair(struct sched_entity *se,
* And since, like util, the runnable part should be directly transferable, * And since, like util, the runnable part should be directly transferable,
* the following would _appear_ to be the straight forward approach: * the following would _appear_ to be the straight forward approach:
* *
* grq->avg.load_avg = grq->load.weight * grq->avg.running_avg (3) * grq->avg.load_avg = grq->load.weight * grq->avg.runnable_avg (3)
* *
* And per (1) we have: * And per (1) we have:
* *
* ge->avg.running_avg == grq->avg.running_avg * ge->avg.runnable_avg == grq->avg.runnable_avg
* *
* Which gives: * Which gives:
* *
...@@ -3447,27 +3447,28 @@ void set_task_rq_fair(struct sched_entity *se, ...@@ -3447,27 +3447,28 @@ void set_task_rq_fair(struct sched_entity *se,
* to (shortly) return to us. This only works by keeping the weights as * to (shortly) return to us. This only works by keeping the weights as
* integral part of the sum. We therefore cannot decompose as per (3). * integral part of the sum. We therefore cannot decompose as per (3).
* *
* OK, so what then? * Another reason this doesn't work is that runnable isn't a 0-sum entity.
* Imagine a rq with 2 tasks that each are runnable 2/3 of the time. Then the
* rq itself is runnable anywhere between 2/3 and 1 depending on how the
* runnable section of these tasks overlap (or not). If they were to perfectly
* align the rq as a whole would be runnable 2/3 of the time. If however we
* always have at least 1 runnable task, the rq as a whole is always runnable.
* *
* So we'll have to approximate.. :/
* *
* Another way to look at things is: * Given the constraint:
* *
* grq->avg.load_avg = \Sum se->avg.load_avg * ge->avg.running_sum <= ge->avg.runnable_sum <= LOAD_AVG_MAX
* *
* Therefore, per (2): * We can construct a rule that adds runnable to a rq by assuming minimal
* overlap.
* *
* grq->avg.load_avg = \Sum se->load.weight * se->avg.runnable_avg * On removal, we'll assume each task is equally runnable; which yields:
* *
* And the very thing we're propagating is a change in that sum (someone * grq->avg.runnable_sum = grq->avg.load_sum / grq->load.weight
* joined/left). So we can easily know the runnable change, which would be, per
* (2) the already tracked se->load_avg divided by the corresponding
* se->weight.
* *
* Basically (4) but in differential form: * XXX: only do this for the part of runnable > running ?
* *
* d(runnable_avg) += se->avg.load_avg / se->load.weight
* (5)
* ge->avg.load_avg += ge->load.weight * d(runnable_avg)
*/ */
static inline void static inline void
...@@ -3479,6 +3480,14 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq ...@@ -3479,6 +3480,14 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
if (!delta) if (!delta)
return; return;
/*
* The relation between sum and avg is:
*
* LOAD_AVG_MAX - 1024 + sa->period_contrib
*
* however, the PELT windows are not aligned between grq and gse.
*/
/* Set new sched_entity's utilization */ /* Set new sched_entity's utilization */
se->avg.util_avg = gcfs_rq->avg.util_avg; se->avg.util_avg = gcfs_rq->avg.util_avg;
se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX; se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
...@@ -3491,33 +3500,68 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq ...@@ -3491,33 +3500,68 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
static inline void static inline void
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{ {
long runnable_sum = gcfs_rq->prop_runnable_sum; long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
long runnable_load_avg, load_avg; unsigned long runnable_load_avg, load_avg;
s64 runnable_load_sum, load_sum; u64 runnable_load_sum, load_sum = 0;
s64 delta_sum;
if (!runnable_sum) if (!runnable_sum)
return; return;
gcfs_rq->prop_runnable_sum = 0; gcfs_rq->prop_runnable_sum = 0;
if (runnable_sum >= 0) {
/*
* Add runnable; clip at LOAD_AVG_MAX. Reflects that until
* the CPU is saturated running == runnable.
*/
runnable_sum += se->avg.load_sum;
runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX);
} else {
/*
* Estimate the new unweighted runnable_sum of the gcfs_rq by
* assuming all tasks are equally runnable.
*/
if (scale_load_down(gcfs_rq->load.weight)) {
load_sum = div_s64(gcfs_rq->avg.load_sum,
scale_load_down(gcfs_rq->load.weight));
}
/* But make sure to not inflate se's runnable */
runnable_sum = min(se->avg.load_sum, load_sum);
}
/*
* runnable_sum can't be lower than running_sum
* As running sum is scale with cpu capacity wehreas the runnable sum
* is not we rescale running_sum 1st
*/
running_sum = se->avg.util_sum /
arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
runnable_sum = max(runnable_sum, running_sum);
load_sum = (s64)se_weight(se) * runnable_sum; load_sum = (s64)se_weight(se) * runnable_sum;
load_avg = div_s64(load_sum, LOAD_AVG_MAX); load_avg = div_s64(load_sum, LOAD_AVG_MAX);
add_positive(&se->avg.load_sum, runnable_sum); delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
add_positive(&se->avg.load_avg, load_avg); delta_avg = load_avg - se->avg.load_avg;
add_positive(&cfs_rq->avg.load_avg, load_avg); se->avg.load_sum = runnable_sum;
add_positive(&cfs_rq->avg.load_sum, load_sum); se->avg.load_avg = load_avg;
add_positive(&cfs_rq->avg.load_avg, delta_avg);
add_positive(&cfs_rq->avg.load_sum, delta_sum);
runnable_load_sum = (s64)se_runnable(se) * runnable_sum; runnable_load_sum = (s64)se_runnable(se) * runnable_sum;
runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX); runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX);
delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum;
delta_avg = runnable_load_avg - se->avg.runnable_load_avg;
add_positive(&se->avg.runnable_load_sum, runnable_sum); se->avg.runnable_load_sum = runnable_sum;
add_positive(&se->avg.runnable_load_avg, runnable_load_avg); se->avg.runnable_load_avg = runnable_load_avg;
if (se->on_rq) { if (se->on_rq) {
add_positive(&cfs_rq->avg.runnable_load_avg, runnable_load_avg); add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg);
add_positive(&cfs_rq->avg.runnable_load_sum, runnable_load_sum); add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum);
} }
} }
......
...@@ -27,7 +27,7 @@ void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq ...@@ -27,7 +27,7 @@ void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&wq_head->lock, flags); spin_lock_irqsave(&wq_head->lock, flags);
__add_wait_queue_entry_tail(wq_head, wq_entry); __add_wait_queue(wq_head, wq_entry);
spin_unlock_irqrestore(&wq_head->lock, flags); spin_unlock_irqrestore(&wq_head->lock, flags);
} }
EXPORT_SYMBOL(add_wait_queue); EXPORT_SYMBOL(add_wait_queue);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment