Commit 590680d1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'sched-urgent-2020-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes/updates from Thomas Gleixner:

 - Deduplicate the average computations in the scheduler core and the
   fair class code.

 - Fix a raise between runtime distribution and assignement which can
   cause exceeding the quota by up to 70%.

 - Prevent negative results in the imbalanace calculation

 - Remove a stale warning in the workqueue code which can be triggered
   since the call site was moved out of preempt disabled code. It's a
   false positive.

 - Deduplicate the print macros for procfs

 - Add the ucmap values to the SCHED_DEBUG procfs output for completness

* tag 'sched-urgent-2020-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/debug: Add task uclamp values to SCHED_DEBUG procfs
  sched/debug: Factor out printing formats into common macros
  sched/debug: Remove redundant macro define
  sched/core: Remove unused rq::last_load_update_tick
  workqueue: Remove the warning in wq_worker_sleeping()
  sched/fair: Fix negative imbalance in imbalance calculation
  sched/fair: Fix race between runtime distribution and assignment
  sched/fair: Align rq->avg_idle and rq->avg_scan_cost
parents 20e2aa81 96e74ebf
...@@ -2119,12 +2119,6 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) ...@@ -2119,12 +2119,6 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
return cpu; return cpu;
} }
static void update_avg(u64 *avg, u64 sample)
{
s64 diff = sample - *avg;
*avg += diff >> 3;
}
void sched_set_stop_task(int cpu, struct task_struct *stop) void sched_set_stop_task(int cpu, struct task_struct *stop)
{ {
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
...@@ -4126,7 +4120,8 @@ static inline void sched_submit_work(struct task_struct *tsk) ...@@ -4126,7 +4120,8 @@ static inline void sched_submit_work(struct task_struct *tsk)
* it wants to wake up a task to maintain concurrency. * it wants to wake up a task to maintain concurrency.
* As this function is called inside the schedule() context, * As this function is called inside the schedule() context,
* we disable preemption to avoid it calling schedule() again * we disable preemption to avoid it calling schedule() again
* in the possible wakeup of a kworker. * in the possible wakeup of a kworker and because wq_worker_sleeping()
* requires it.
*/ */
if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
preempt_disable(); preempt_disable();
...@@ -6699,7 +6694,6 @@ void __init sched_init(void) ...@@ -6699,7 +6694,6 @@ void __init sched_init(void)
rq_attach_root(rq, &def_root_domain); rq_attach_root(rq, &def_root_domain);
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
rq->last_load_update_tick = jiffies;
rq->last_blocked_load_update_tick = jiffies; rq->last_blocked_load_update_tick = jiffies;
atomic_set(&rq->nohz_flags, 0); atomic_set(&rq->nohz_flags, 0);
#endif #endif
......
...@@ -816,10 +816,12 @@ static int __init init_sched_debug_procfs(void) ...@@ -816,10 +816,12 @@ static int __init init_sched_debug_procfs(void)
__initcall(init_sched_debug_procfs); __initcall(init_sched_debug_procfs);
#define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F) #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
#define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F) #define __P(F) __PS(#F, F)
#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F)) #define P(F) __PS(#F, p->F)
#define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) #define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
#define __PN(F) __PSN(#F, F)
#define PN(F) __PSN(#F, p->F)
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
...@@ -868,18 +870,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ...@@ -868,18 +870,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
SEQ_printf(m, SEQ_printf(m,
"---------------------------------------------------------" "---------------------------------------------------------"
"----------\n"); "----------\n");
#define __P(F) \
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F) #define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
#define P(F) \ #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
#define P_SCHEDSTAT(F) \
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
#define __PN(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
#define PN_SCHEDSTAT(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
PN(se.exec_start); PN(se.exec_start);
PN(se.vruntime); PN(se.vruntime);
...@@ -939,10 +932,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ...@@ -939,10 +932,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
} }
__P(nr_switches); __P(nr_switches);
SEQ_printf(m, "%-45s:%21Ld\n", __PS("nr_voluntary_switches", p->nvcsw);
"nr_voluntary_switches", (long long)p->nvcsw); __PS("nr_involuntary_switches", p->nivcsw);
SEQ_printf(m, "%-45s:%21Ld\n",
"nr_involuntary_switches", (long long)p->nivcsw);
P(se.load.weight); P(se.load.weight);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -955,6 +946,12 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ...@@ -955,6 +946,12 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P(se.avg.last_update_time); P(se.avg.last_update_time);
P(se.avg.util_est.ewma); P(se.avg.util_est.ewma);
P(se.avg.util_est.enqueued); P(se.avg.util_est.enqueued);
#endif
#ifdef CONFIG_UCLAMP_TASK
__PS("uclamp.min", p->uclamp[UCLAMP_MIN].value);
__PS("uclamp.max", p->uclamp[UCLAMP_MAX].value);
__PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));
__PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));
#endif #endif
P(policy); P(policy);
P(prio); P(prio);
...@@ -963,11 +960,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ...@@ -963,11 +960,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P(dl.deadline); P(dl.deadline);
} }
#undef PN_SCHEDSTAT #undef PN_SCHEDSTAT
#undef PN
#undef __PN
#undef P_SCHEDSTAT #undef P_SCHEDSTAT
#undef P
#undef __P
{ {
unsigned int this_cpu = raw_smp_processor_id(); unsigned int this_cpu = raw_smp_processor_id();
...@@ -975,8 +968,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ...@@ -975,8 +968,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
t0 = cpu_clock(this_cpu); t0 = cpu_clock(this_cpu);
t1 = cpu_clock(this_cpu); t1 = cpu_clock(this_cpu);
SEQ_printf(m, "%-45s:%21Ld\n", __PS("clock-delta", t1-t0);
"clock-delta", (long long)(t1-t0));
} }
sched_show_numa(p, m); sched_show_numa(p, m);
......
...@@ -4836,11 +4836,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -4836,11 +4836,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
resched_curr(rq); resched_curr(rq);
} }
static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining) static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
{ {
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
u64 runtime; u64 runtime, remaining = 1;
u64 starting_runtime = remaining;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
...@@ -4855,10 +4854,13 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining) ...@@ -4855,10 +4854,13 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
/* By the above check, this should never be true */ /* By the above check, this should never be true */
SCHED_WARN_ON(cfs_rq->runtime_remaining > 0); SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);
raw_spin_lock(&cfs_b->lock);
runtime = -cfs_rq->runtime_remaining + 1; runtime = -cfs_rq->runtime_remaining + 1;
if (runtime > remaining) if (runtime > cfs_b->runtime)
runtime = remaining; runtime = cfs_b->runtime;
remaining -= runtime; cfs_b->runtime -= runtime;
remaining = cfs_b->runtime;
raw_spin_unlock(&cfs_b->lock);
cfs_rq->runtime_remaining += runtime; cfs_rq->runtime_remaining += runtime;
...@@ -4873,8 +4875,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining) ...@@ -4873,8 +4875,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
break; break;
} }
rcu_read_unlock(); rcu_read_unlock();
return starting_runtime - remaining;
} }
/* /*
...@@ -4885,7 +4885,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining) ...@@ -4885,7 +4885,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
*/ */
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags) static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
{ {
u64 runtime;
int throttled; int throttled;
/* no need to continue the timer with no bandwidth constraint */ /* no need to continue the timer with no bandwidth constraint */
...@@ -4914,24 +4913,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u ...@@ -4914,24 +4913,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u
cfs_b->nr_throttled += overrun; cfs_b->nr_throttled += overrun;
/* /*
* This check is repeated as we are holding onto the new bandwidth while * This check is repeated as we release cfs_b->lock while we unthrottle.
* we unthrottle. This can potentially race with an unthrottled group
* trying to acquire new bandwidth from the global pool. This can result
* in us over-using our runtime if it is all used during this loop, but
* only by limited amounts in that extreme case.
*/ */
while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
runtime = cfs_b->runtime;
cfs_b->distribute_running = 1; cfs_b->distribute_running = 1;
raw_spin_unlock_irqrestore(&cfs_b->lock, flags); raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
/* we can't nest cfs_b->lock while distributing bandwidth */ /* we can't nest cfs_b->lock while distributing bandwidth */
runtime = distribute_cfs_runtime(cfs_b, runtime); distribute_cfs_runtime(cfs_b);
raw_spin_lock_irqsave(&cfs_b->lock, flags); raw_spin_lock_irqsave(&cfs_b->lock, flags);
cfs_b->distribute_running = 0; cfs_b->distribute_running = 0;
throttled = !list_empty(&cfs_b->throttled_cfs_rq); throttled = !list_empty(&cfs_b->throttled_cfs_rq);
lsub_positive(&cfs_b->runtime, runtime);
} }
/* /*
...@@ -5065,10 +5057,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) ...@@ -5065,10 +5057,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
if (!runtime) if (!runtime)
return; return;
runtime = distribute_cfs_runtime(cfs_b, runtime); distribute_cfs_runtime(cfs_b);
raw_spin_lock_irqsave(&cfs_b->lock, flags); raw_spin_lock_irqsave(&cfs_b->lock, flags);
lsub_positive(&cfs_b->runtime, runtime);
cfs_b->distribute_running = 0; cfs_b->distribute_running = 0;
raw_spin_unlock_irqrestore(&cfs_b->lock, flags); raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
} }
...@@ -6080,8 +6071,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t ...@@ -6080,8 +6071,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
struct sched_domain *this_sd; struct sched_domain *this_sd;
u64 avg_cost, avg_idle; u64 avg_cost, avg_idle;
u64 time, cost; u64 time;
s64 delta;
int this = smp_processor_id(); int this = smp_processor_id();
int cpu, nr = INT_MAX; int cpu, nr = INT_MAX;
...@@ -6119,9 +6109,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t ...@@ -6119,9 +6109,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
} }
time = cpu_clock(this) - time; time = cpu_clock(this) - time;
cost = this_sd->avg_scan_cost; update_avg(&this_sd->avg_scan_cost, time);
delta = (s64)(time - cost) / 8;
this_sd->avg_scan_cost += delta;
return cpu; return cpu;
} }
...@@ -9048,6 +9036,14 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s ...@@ -9048,6 +9036,14 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) / sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
sds->total_capacity; sds->total_capacity;
/*
* If the local group is more loaded than the selected
* busiest group don't try to pull any tasks.
*/
if (local->avg_load >= busiest->avg_load) {
env->imbalance = 0;
return;
}
} }
/* /*
......
...@@ -195,6 +195,12 @@ static inline int task_has_dl_policy(struct task_struct *p) ...@@ -195,6 +195,12 @@ static inline int task_has_dl_policy(struct task_struct *p)
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
static inline void update_avg(u64 *avg, u64 sample)
{
s64 diff = sample - *avg;
*avg += diff / 8;
}
/* /*
* !! For sched_setattr_nocheck() (kernel) only !! * !! For sched_setattr_nocheck() (kernel) only !!
* *
...@@ -882,7 +888,6 @@ struct rq { ...@@ -882,7 +888,6 @@ struct rq {
#endif #endif
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
unsigned long last_load_update_tick;
unsigned long last_blocked_load_update_tick; unsigned long last_blocked_load_update_tick;
unsigned int has_blocked_load; unsigned int has_blocked_load;
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
......
...@@ -858,7 +858,8 @@ void wq_worker_running(struct task_struct *task) ...@@ -858,7 +858,8 @@ void wq_worker_running(struct task_struct *task)
* @task: task going to sleep * @task: task going to sleep
* *
* This function is called from schedule() when a busy worker is * This function is called from schedule() when a busy worker is
* going to sleep. * going to sleep. Preemption needs to be disabled to protect ->sleeping
* assignment.
*/ */
void wq_worker_sleeping(struct task_struct *task) void wq_worker_sleeping(struct task_struct *task)
{ {
...@@ -875,7 +876,8 @@ void wq_worker_sleeping(struct task_struct *task) ...@@ -875,7 +876,8 @@ void wq_worker_sleeping(struct task_struct *task)
pool = worker->pool; pool = worker->pool;
if (WARN_ON_ONCE(worker->sleeping)) /* Return if preempted before wq_worker_running() was reached */
if (worker->sleeping)
return; return;
worker->sleeping = 1; worker->sleeping = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment