Commit 57801c1b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Thomas Gleixner:
 "A couple of scheduler fixes:

   - force watchdog reset while processing sysrq-w

   - fix a deadlock when enabling trace events in the scheduler

   - fixes to the throttled next buddy logic

   - fixes for the average accounting (missing serialization and
     underflow handling)

   - allow kernel threads for fallback to online but not active cpus"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/core: Allow kthreads to fall back to online && !active cpus
  sched/fair: Do not announce throttled next buddy in dequeue_task_fair()
  sched/fair: Initialize throttle_count for new task-groups lazily
  sched/fair: Fix cfs_rq avg tracking underflow
  kernel/sysrq, watchdog, sched/core: Reset watchdog on all CPUs while processing sysrq-w
  sched/debug: Fix deadlock when enabling sched events
  sched/fair: Fix post_init_entity_util_avg() serialization
parents e3b22bc3 feb245e3
...@@ -1536,7 +1536,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) ...@@ -1536,7 +1536,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for (;;) { for (;;) {
/* Any allowed, online CPU? */ /* Any allowed, online CPU? */
for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
if (!cpu_active(dest_cpu)) if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
continue;
if (!cpu_online(dest_cpu))
continue; continue;
goto out; goto out;
} }
...@@ -2535,10 +2537,9 @@ void wake_up_new_task(struct task_struct *p) ...@@ -2535,10 +2537,9 @@ void wake_up_new_task(struct task_struct *p)
*/ */
set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
#endif #endif
/* Post initialize new task's util average when its cfs_rq is set */ rq = __task_rq_lock(p, &rf);
post_init_entity_util_avg(&p->se); post_init_entity_util_avg(&p->se);
rq = __task_rq_lock(p, &rf);
activate_task(rq, p, 0); activate_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED; p->on_rq = TASK_ON_RQ_QUEUED;
trace_sched_wakeup_new(p); trace_sched_wakeup_new(p);
...@@ -5148,14 +5149,16 @@ void show_state_filter(unsigned long state_filter) ...@@ -5148,14 +5149,16 @@ void show_state_filter(unsigned long state_filter)
/* /*
* reset the NMI-timeout, listing all files on a slow * reset the NMI-timeout, listing all files on a slow
* console might take a lot of time: * console might take a lot of time:
* Also, reset softlockup watchdogs on all CPUs, because
* another CPU might be blocked waiting for us to process
* an IPI.
*/ */
touch_nmi_watchdog(); touch_nmi_watchdog();
touch_all_softlockup_watchdogs();
if (!state_filter || (p->state & state_filter)) if (!state_filter || (p->state & state_filter))
sched_show_task(p); sched_show_task(p);
} }
touch_all_softlockup_watchdogs();
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
if (!state_filter) if (!state_filter)
sysrq_sched_debug_show(); sysrq_sched_debug_show();
......
...@@ -2904,6 +2904,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) ...@@ -2904,6 +2904,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
} }
} }
/*
* Unsigned subtract and clamp on underflow.
*
* Explicitly do a load-store to ensure the intermediate value never hits
* memory. This allows lockless observations without ever seeing the negative
* values.
*/
#define sub_positive(_ptr, _val) do { \
typeof(_ptr) ptr = (_ptr); \
typeof(*ptr) val = (_val); \
typeof(*ptr) res, var = READ_ONCE(*ptr); \
res = var - val; \
if (res > var) \
res = 0; \
WRITE_ONCE(*ptr, res); \
} while (0)
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */ /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
static inline int static inline int
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
...@@ -2913,15 +2930,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) ...@@ -2913,15 +2930,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
if (atomic_long_read(&cfs_rq->removed_load_avg)) { if (atomic_long_read(&cfs_rq->removed_load_avg)) {
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
sa->load_avg = max_t(long, sa->load_avg - r, 0); sub_positive(&sa->load_avg, r);
sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0); sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
removed_load = 1; removed_load = 1;
} }
if (atomic_long_read(&cfs_rq->removed_util_avg)) { if (atomic_long_read(&cfs_rq->removed_util_avg)) {
long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0); long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
sa->util_avg = max_t(long, sa->util_avg - r, 0); sub_positive(&sa->util_avg, r);
sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0); sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
removed_util = 1; removed_util = 1;
} }
...@@ -2994,10 +3011,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s ...@@ -2994,10 +3011,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
&se->avg, se->on_rq * scale_load_down(se->load.weight), &se->avg, se->on_rq * scale_load_down(se->load.weight),
cfs_rq->curr == se, NULL); cfs_rq->curr == se, NULL);
cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0); sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0); sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0); sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0); sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
cfs_rq_util_change(cfs_rq); cfs_rq_util_change(cfs_rq);
} }
...@@ -3246,7 +3263,7 @@ static inline void check_schedstat_required(void) ...@@ -3246,7 +3263,7 @@ static inline void check_schedstat_required(void)
trace_sched_stat_iowait_enabled() || trace_sched_stat_iowait_enabled() ||
trace_sched_stat_blocked_enabled() || trace_sched_stat_blocked_enabled() ||
trace_sched_stat_runtime_enabled()) { trace_sched_stat_runtime_enabled()) {
pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, " printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
"stat_blocked and stat_runtime require the " "stat_blocked and stat_runtime require the "
"kernel parameter schedstats=enabled or " "kernel parameter schedstats=enabled or "
"kernel.sched_schedstats=1\n"); "kernel.sched_schedstats=1\n");
...@@ -4185,6 +4202,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) ...@@ -4185,6 +4202,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
if (!cfs_bandwidth_used()) if (!cfs_bandwidth_used())
return; return;
/* Synchronize hierarchical throttle counter: */
if (unlikely(!cfs_rq->throttle_uptodate)) {
struct rq *rq = rq_of(cfs_rq);
struct cfs_rq *pcfs_rq;
struct task_group *tg;
cfs_rq->throttle_uptodate = 1;
/* Get closest up-to-date node, because leaves go first: */
for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) {
pcfs_rq = tg->cfs_rq[cpu_of(rq)];
if (pcfs_rq->throttle_uptodate)
break;
}
if (tg) {
cfs_rq->throttle_count = pcfs_rq->throttle_count;
cfs_rq->throttled_clock_task = rq_clock_task(rq);
}
}
/* an active group must be handled by the update_curr()->put() path */ /* an active group must be handled by the update_curr()->put() path */
if (!cfs_rq->runtime_enabled || cfs_rq->curr) if (!cfs_rq->runtime_enabled || cfs_rq->curr)
return; return;
...@@ -4500,15 +4537,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -4500,15 +4537,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* Don't dequeue parent if it has other entities besides us */ /* Don't dequeue parent if it has other entities besides us */
if (cfs_rq->load.weight) { if (cfs_rq->load.weight) {
/* Avoid re-evaluating load for this entity: */
se = parent_entity(se);
/* /*
* Bias pick_next to pick a task from this cfs_rq, as * Bias pick_next to pick a task from this cfs_rq, as
* p is sleeping when it is within its sched_slice. * p is sleeping when it is within its sched_slice.
*/ */
if (task_sleep && parent_entity(se)) if (task_sleep && se && !throttled_hierarchy(cfs_rq))
set_next_buddy(parent_entity(se)); set_next_buddy(se);
/* avoid re-evaluating load for this entity */
se = parent_entity(se);
break; break;
} }
flags |= DEQUEUE_SLEEP; flags |= DEQUEUE_SLEEP;
...@@ -8496,8 +8532,9 @@ void free_fair_sched_group(struct task_group *tg) ...@@ -8496,8 +8532,9 @@ void free_fair_sched_group(struct task_group *tg)
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
{ {
struct cfs_rq *cfs_rq;
struct sched_entity *se; struct sched_entity *se;
struct cfs_rq *cfs_rq;
struct rq *rq;
int i; int i;
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
...@@ -8512,6 +8549,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) ...@@ -8512,6 +8549,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
init_cfs_bandwidth(tg_cfs_bandwidth(tg)); init_cfs_bandwidth(tg_cfs_bandwidth(tg));
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
rq = cpu_rq(i);
cfs_rq = kzalloc_node(sizeof(struct cfs_rq), cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
GFP_KERNEL, cpu_to_node(i)); GFP_KERNEL, cpu_to_node(i));
if (!cfs_rq) if (!cfs_rq)
...@@ -8525,7 +8564,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) ...@@ -8525,7 +8564,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
init_cfs_rq(cfs_rq); init_cfs_rq(cfs_rq);
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
init_entity_runnable_average(se); init_entity_runnable_average(se);
raw_spin_lock_irq(&rq->lock);
post_init_entity_util_avg(se); post_init_entity_util_avg(se);
raw_spin_unlock_irq(&rq->lock);
} }
return 1; return 1;
......
...@@ -437,7 +437,7 @@ struct cfs_rq { ...@@ -437,7 +437,7 @@ struct cfs_rq {
u64 throttled_clock, throttled_clock_task; u64 throttled_clock, throttled_clock_task;
u64 throttled_clock_task_time; u64 throttled_clock_task_time;
int throttled, throttle_count; int throttled, throttle_count, throttle_uptodate;
struct list_head throttled_list; struct list_head throttled_list;
#endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment