Commit 03582f33 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fix from Ingo Molnar:
 "Fix an exec() related scalability/performance regression, which was
  caused by incorrectly calculating load and migrating tasks on exec()
  when they shouldn't be"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/fair: Fix cpu_util_wake() for 'execl' type workloads
parents b53e27f6 c469933e
...@@ -5674,11 +5674,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, ...@@ -5674,11 +5674,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
return target; return target;
} }
static unsigned long cpu_util_wake(int cpu, struct task_struct *p); static unsigned long cpu_util_without(int cpu, struct task_struct *p);
static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) static unsigned long capacity_spare_without(int cpu, struct task_struct *p)
{ {
return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0); return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0);
} }
/* /*
...@@ -5738,7 +5738,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, ...@@ -5738,7 +5738,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
spare_cap = capacity_spare_wake(i, p); spare_cap = capacity_spare_without(i, p);
if (spare_cap > max_spare_cap) if (spare_cap > max_spare_cap)
max_spare_cap = spare_cap; max_spare_cap = spare_cap;
...@@ -5889,8 +5889,8 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p ...@@ -5889,8 +5889,8 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
return prev_cpu; return prev_cpu;
/* /*
* We need task's util for capacity_spare_wake, sync it up to prev_cpu's * We need task's util for capacity_spare_without, sync it up to
* last_update_time. * prev_cpu's last_update_time.
*/ */
if (!(sd_flag & SD_BALANCE_FORK)) if (!(sd_flag & SD_BALANCE_FORK))
sync_entity_load_avg(&p->se); sync_entity_load_avg(&p->se);
...@@ -6216,10 +6216,19 @@ static inline unsigned long cpu_util(int cpu) ...@@ -6216,10 +6216,19 @@ static inline unsigned long cpu_util(int cpu)
} }
/* /*
* cpu_util_wake: Compute CPU utilization with any contributions from * cpu_util_without: compute cpu utilization without any contributions from *p
* the waking task p removed. * @cpu: the CPU which utilization is requested
* @p: the task which utilization should be discounted
*
* The utilization of a CPU is defined by the utilization of tasks currently
* enqueued on that CPU as well as tasks which are currently sleeping after an
* execution on that CPU.
*
* This method returns the utilization of the specified CPU by discounting the
* utilization of the specified task, whenever the task is currently
* contributing to the CPU utilization.
*/ */
static unsigned long cpu_util_wake(int cpu, struct task_struct *p) static unsigned long cpu_util_without(int cpu, struct task_struct *p)
{ {
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
unsigned int util; unsigned int util;
...@@ -6231,7 +6240,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) ...@@ -6231,7 +6240,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
cfs_rq = &cpu_rq(cpu)->cfs; cfs_rq = &cpu_rq(cpu)->cfs;
util = READ_ONCE(cfs_rq->avg.util_avg); util = READ_ONCE(cfs_rq->avg.util_avg);
/* Discount task's blocked util from CPU's util */ /* Discount task's util from CPU's util */
util -= min_t(unsigned int, util, task_util(p)); util -= min_t(unsigned int, util, task_util(p));
/* /*
...@@ -6240,14 +6249,14 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) ...@@ -6240,14 +6249,14 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
* a) if *p is the only task sleeping on this CPU, then: * a) if *p is the only task sleeping on this CPU, then:
* cpu_util (== task_util) > util_est (== 0) * cpu_util (== task_util) > util_est (== 0)
* and thus we return: * and thus we return:
* cpu_util_wake = (cpu_util - task_util) = 0 * cpu_util_without = (cpu_util - task_util) = 0
* *
* b) if other tasks are SLEEPING on this CPU, which is now exiting * b) if other tasks are SLEEPING on this CPU, which is now exiting
* IDLE, then: * IDLE, then:
* cpu_util >= task_util * cpu_util >= task_util
* cpu_util > util_est (== 0) * cpu_util > util_est (== 0)
* and thus we discount *p's blocked utilization to return: * and thus we discount *p's blocked utilization to return:
* cpu_util_wake = (cpu_util - task_util) >= 0 * cpu_util_without = (cpu_util - task_util) >= 0
* *
* c) if other tasks are RUNNABLE on that CPU and * c) if other tasks are RUNNABLE on that CPU and
* util_est > cpu_util * util_est > cpu_util
...@@ -6260,8 +6269,33 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) ...@@ -6260,8 +6269,33 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
* covered by the following code when estimated utilization is * covered by the following code when estimated utilization is
* enabled. * enabled.
*/ */
if (sched_feat(UTIL_EST)) if (sched_feat(UTIL_EST)) {
util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued)); unsigned int estimated =
READ_ONCE(cfs_rq->avg.util_est.enqueued);
/*
* Despite the following checks we still have a small window
* for a possible race, when an execl's select_task_rq_fair()
* races with LB's detach_task():
*
* detach_task()
* p->on_rq = TASK_ON_RQ_MIGRATING;
* ---------------------------------- A
* deactivate_task() \
* dequeue_task() + RaceTime
* util_est_dequeue() /
* ---------------------------------- B
*
* The additional check on "current == p" it's required to
* properly fix the execl regression and it helps in further
* reducing the chances for the above race.
*/
if (unlikely(task_on_rq_queued(p) || current == p)) {
estimated -= min_t(unsigned int, estimated,
(_task_util_est(p) | UTIL_AVG_UNCHANGED));
}
util = max(util, estimated);
}
/* /*
* Utilization (estimated) can exceed the CPU capacity, thus let's * Utilization (estimated) can exceed the CPU capacity, thus let's
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment