Commit ceeadb83 authored by Yafang Shao's avatar Yafang Shao Committed by Peter Zijlstra

sched: Make struct sched_statistics independent of fair sched class

If we want to use the schedstats facility to trace other sched classes, we
should make it independent of fair sched class. The struct sched_statistics
is the schedular statistics of a task_struct or a task_group. So we can
move it into struct task_struct and struct task_group to achieve the goal.

After the patch, schestats are orgnized as follows,

    struct task_struct {
       ...
       struct sched_entity se;
       struct sched_rt_entity rt;
       struct sched_dl_entity dl;
       ...
       struct sched_statistics stats;
       ...
   };

Regarding the task group, schedstats is only supported for fair group
sched, and a new struct sched_entity_stats is introduced, suggested by
Peter -

    struct sched_entity_stats {
        struct sched_entity     se;
        struct sched_statistics stats;
    } __no_randomize_layout;

Then with the se in a task_group, we can easily get the stats.

The sched_statistics members may be frequently modified when schedstats is
enabled, in order to avoid impacting on random data which may in the same
cacheline with them, the struct sched_statistics is defined as cacheline
aligned.

As this patch changes the core struct of scheduler, so I verified the
performance it may impact on the scheduler with 'perf bench sched
pipe', suggested by Mel. Below is the result, in which all the values
are in usecs/op.
                                  Before               After
      kernel.sched_schedstats=0  5.2~5.4               5.2~5.4
      kernel.sched_schedstats=1  5.3~5.5               5.3~5.5
[These data is a little difference with the earlier version, that is
 because my old test machine is destroyed so I have to use a new
 different test machine.]

Almost no impact on the sched performance.

No functional change.

[lkp@intel.com: reported build failure in earlier version]
Signed-off-by: default avatarYafang Shao <laoar.shao@gmail.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarMel Gorman <mgorman@suse.de>
Link: https://lore.kernel.org/r/20210905143547.4668-3-laoar.shao@gmail.com
parent a2dcb276
...@@ -522,7 +522,7 @@ struct sched_statistics { ...@@ -522,7 +522,7 @@ struct sched_statistics {
u64 nr_wakeups_passive; u64 nr_wakeups_passive;
u64 nr_wakeups_idle; u64 nr_wakeups_idle;
#endif #endif
}; } ____cacheline_aligned;
struct sched_entity { struct sched_entity {
/* For load-balancing: */ /* For load-balancing: */
...@@ -538,8 +538,6 @@ struct sched_entity { ...@@ -538,8 +538,6 @@ struct sched_entity {
u64 nr_migrations; u64 nr_migrations;
struct sched_statistics statistics;
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
int depth; int depth;
struct sched_entity *parent; struct sched_entity *parent;
...@@ -803,6 +801,8 @@ struct task_struct { ...@@ -803,6 +801,8 @@ struct task_struct {
struct uclamp_se uclamp[UCLAMP_CNT]; struct uclamp_se uclamp[UCLAMP_CNT];
#endif #endif
struct sched_statistics stats;
#ifdef CONFIG_PREEMPT_NOTIFIERS #ifdef CONFIG_PREEMPT_NOTIFIERS
/* List of struct preempt_notifier: */ /* List of struct preempt_notifier: */
struct hlist_head preempt_notifiers; struct hlist_head preempt_notifiers;
......
...@@ -3489,11 +3489,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) ...@@ -3489,11 +3489,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
if (cpu == rq->cpu) { if (cpu == rq->cpu) {
__schedstat_inc(rq->ttwu_local); __schedstat_inc(rq->ttwu_local);
__schedstat_inc(p->se.statistics.nr_wakeups_local); __schedstat_inc(p->stats.nr_wakeups_local);
} else { } else {
struct sched_domain *sd; struct sched_domain *sd;
__schedstat_inc(p->se.statistics.nr_wakeups_remote); __schedstat_inc(p->stats.nr_wakeups_remote);
rcu_read_lock(); rcu_read_lock();
for_each_domain(rq->cpu, sd) { for_each_domain(rq->cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
...@@ -3505,14 +3505,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) ...@@ -3505,14 +3505,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
} }
if (wake_flags & WF_MIGRATED) if (wake_flags & WF_MIGRATED)
__schedstat_inc(p->se.statistics.nr_wakeups_migrate); __schedstat_inc(p->stats.nr_wakeups_migrate);
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
__schedstat_inc(rq->ttwu_count); __schedstat_inc(rq->ttwu_count);
__schedstat_inc(p->se.statistics.nr_wakeups); __schedstat_inc(p->stats.nr_wakeups);
if (wake_flags & WF_SYNC) if (wake_flags & WF_SYNC)
__schedstat_inc(p->se.statistics.nr_wakeups_sync); __schedstat_inc(p->stats.nr_wakeups_sync);
} }
/* /*
...@@ -4196,7 +4196,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -4196,7 +4196,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
/* Even if schedstat is disabled, there should not be garbage */ /* Even if schedstat is disabled, there should not be garbage */
memset(&p->se.statistics, 0, sizeof(p->se.statistics)); memset(&p->stats, 0, sizeof(p->stats));
#endif #endif
RB_CLEAR_NODE(&p->dl.rb_node); RB_CLEAR_NODE(&p->dl.rb_node);
...@@ -9553,9 +9553,9 @@ void normalize_rt_tasks(void) ...@@ -9553,9 +9553,9 @@ void normalize_rt_tasks(void)
continue; continue;
p->se.exec_start = 0; p->se.exec_start = 0;
schedstat_set(p->se.statistics.wait_start, 0); schedstat_set(p->stats.wait_start, 0);
schedstat_set(p->se.statistics.sleep_start, 0); schedstat_set(p->stats.sleep_start, 0);
schedstat_set(p->se.statistics.block_start, 0); schedstat_set(p->stats.block_start, 0);
if (!dl_task(p) && !rt_task(p)) { if (!dl_task(p) && !rt_task(p)) {
/* /*
...@@ -10397,11 +10397,14 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v) ...@@ -10397,11 +10397,14 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time); seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
if (schedstat_enabled() && tg != &root_task_group) { if (schedstat_enabled() && tg != &root_task_group) {
struct sched_statistics *stats;
u64 ws = 0; u64 ws = 0;
int i; int i;
for_each_possible_cpu(i) for_each_possible_cpu(i) {
ws += schedstat_val(tg->se[i]->statistics.wait_sum); stats = __schedstats_from_se(tg->se[i]);
ws += schedstat_val(stats->wait_sum);
}
seq_printf(sf, "wait_sum %llu\n", ws); seq_printf(sf, "wait_sum %llu\n", ws);
} }
......
...@@ -1265,8 +1265,8 @@ static void update_curr_dl(struct rq *rq) ...@@ -1265,8 +1265,8 @@ static void update_curr_dl(struct rq *rq)
return; return;
} }
schedstat_set(curr->se.statistics.exec_max, schedstat_set(curr->stats.exec_max,
max(curr->se.statistics.exec_max, delta_exec)); max(curr->stats.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec; curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec); account_group_exec_runtime(curr, delta_exec);
......
...@@ -449,9 +449,11 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group ...@@ -449,9 +449,11 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
struct sched_entity *se = tg->se[cpu]; struct sched_entity *se = tg->se[cpu];
#define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F)) #define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \
#F, (long long)schedstat_val(stats->F))
#define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F))) #define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \
#F, SPLIT_NS((long long)schedstat_val(stats->F)))
if (!se) if (!se)
return; return;
...@@ -461,16 +463,18 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group ...@@ -461,16 +463,18 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
PN(se->sum_exec_runtime); PN(se->sum_exec_runtime);
if (schedstat_enabled()) { if (schedstat_enabled()) {
PN_SCHEDSTAT(se->statistics.wait_start); struct sched_statistics *stats = __schedstats_from_se(se);
PN_SCHEDSTAT(se->statistics.sleep_start);
PN_SCHEDSTAT(se->statistics.block_start); PN_SCHEDSTAT(wait_start);
PN_SCHEDSTAT(se->statistics.sleep_max); PN_SCHEDSTAT(sleep_start);
PN_SCHEDSTAT(se->statistics.block_max); PN_SCHEDSTAT(block_start);
PN_SCHEDSTAT(se->statistics.exec_max); PN_SCHEDSTAT(sleep_max);
PN_SCHEDSTAT(se->statistics.slice_max); PN_SCHEDSTAT(block_max);
PN_SCHEDSTAT(se->statistics.wait_max); PN_SCHEDSTAT(exec_max);
PN_SCHEDSTAT(se->statistics.wait_sum); PN_SCHEDSTAT(slice_max);
P_SCHEDSTAT(se->statistics.wait_count); PN_SCHEDSTAT(wait_max);
PN_SCHEDSTAT(wait_sum);
P_SCHEDSTAT(wait_count);
} }
P(se->load.weight); P(se->load.weight);
...@@ -537,9 +541,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) ...@@ -537,9 +541,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
p->prio); p->prio);
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)), SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
SPLIT_NS(p->se.sum_exec_runtime), SPLIT_NS(p->se.sum_exec_runtime),
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime))); SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)));
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
...@@ -958,8 +962,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ...@@ -958,8 +962,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
"---------------------------------------------------------" "---------------------------------------------------------"
"----------\n"); "----------\n");
#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F)) #define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F))
#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F)) #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F))
PN(se.exec_start); PN(se.exec_start);
PN(se.vruntime); PN(se.vruntime);
...@@ -972,33 +976,33 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ...@@ -972,33 +976,33 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
if (schedstat_enabled()) { if (schedstat_enabled()) {
u64 avg_atom, avg_per_cpu; u64 avg_atom, avg_per_cpu;
PN_SCHEDSTAT(se.statistics.sum_sleep_runtime); PN_SCHEDSTAT(sum_sleep_runtime);
PN_SCHEDSTAT(se.statistics.wait_start); PN_SCHEDSTAT(wait_start);
PN_SCHEDSTAT(se.statistics.sleep_start); PN_SCHEDSTAT(sleep_start);
PN_SCHEDSTAT(se.statistics.block_start); PN_SCHEDSTAT(block_start);
PN_SCHEDSTAT(se.statistics.sleep_max); PN_SCHEDSTAT(sleep_max);
PN_SCHEDSTAT(se.statistics.block_max); PN_SCHEDSTAT(block_max);
PN_SCHEDSTAT(se.statistics.exec_max); PN_SCHEDSTAT(exec_max);
PN_SCHEDSTAT(se.statistics.slice_max); PN_SCHEDSTAT(slice_max);
PN_SCHEDSTAT(se.statistics.wait_max); PN_SCHEDSTAT(wait_max);
PN_SCHEDSTAT(se.statistics.wait_sum); PN_SCHEDSTAT(wait_sum);
P_SCHEDSTAT(se.statistics.wait_count); P_SCHEDSTAT(wait_count);
PN_SCHEDSTAT(se.statistics.iowait_sum); PN_SCHEDSTAT(iowait_sum);
P_SCHEDSTAT(se.statistics.iowait_count); P_SCHEDSTAT(iowait_count);
P_SCHEDSTAT(se.statistics.nr_migrations_cold); P_SCHEDSTAT(nr_migrations_cold);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine); P_SCHEDSTAT(nr_failed_migrations_affine);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_running); P_SCHEDSTAT(nr_failed_migrations_running);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot); P_SCHEDSTAT(nr_failed_migrations_hot);
P_SCHEDSTAT(se.statistics.nr_forced_migrations); P_SCHEDSTAT(nr_forced_migrations);
P_SCHEDSTAT(se.statistics.nr_wakeups); P_SCHEDSTAT(nr_wakeups);
P_SCHEDSTAT(se.statistics.nr_wakeups_sync); P_SCHEDSTAT(nr_wakeups_sync);
P_SCHEDSTAT(se.statistics.nr_wakeups_migrate); P_SCHEDSTAT(nr_wakeups_migrate);
P_SCHEDSTAT(se.statistics.nr_wakeups_local); P_SCHEDSTAT(nr_wakeups_local);
P_SCHEDSTAT(se.statistics.nr_wakeups_remote); P_SCHEDSTAT(nr_wakeups_remote);
P_SCHEDSTAT(se.statistics.nr_wakeups_affine); P_SCHEDSTAT(nr_wakeups_affine);
P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts); P_SCHEDSTAT(nr_wakeups_affine_attempts);
P_SCHEDSTAT(se.statistics.nr_wakeups_passive); P_SCHEDSTAT(nr_wakeups_passive);
P_SCHEDSTAT(se.statistics.nr_wakeups_idle); P_SCHEDSTAT(nr_wakeups_idle);
avg_atom = p->se.sum_exec_runtime; avg_atom = p->se.sum_exec_runtime;
if (nr_switches) if (nr_switches)
...@@ -1064,7 +1068,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ...@@ -1064,7 +1068,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
void proc_sched_set_task(struct task_struct *p) void proc_sched_set_task(struct task_struct *p)
{ {
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics)); memset(&p->stats, 0, sizeof(p->stats));
#endif #endif
} }
......
...@@ -856,8 +856,13 @@ static void update_curr(struct cfs_rq *cfs_rq) ...@@ -856,8 +856,13 @@ static void update_curr(struct cfs_rq *cfs_rq)
curr->exec_start = now; curr->exec_start = now;
schedstat_set(curr->statistics.exec_max, if (schedstat_enabled()) {
max(delta_exec, curr->statistics.exec_max)); struct sched_statistics *stats;
stats = __schedstats_from_se(curr);
__schedstat_set(stats->exec_max,
max(delta_exec, stats->exec_max));
}
curr->sum_exec_runtime += delta_exec; curr->sum_exec_runtime += delta_exec;
schedstat_add(cfs_rq->exec_clock, delta_exec); schedstat_add(cfs_rq->exec_clock, delta_exec);
...@@ -885,39 +890,45 @@ static inline void ...@@ -885,39 +890,45 @@ static inline void
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
u64 wait_start, prev_wait_start; u64 wait_start, prev_wait_start;
struct sched_statistics *stats;
if (!schedstat_enabled()) if (!schedstat_enabled())
return; return;
stats = __schedstats_from_se(se);
wait_start = rq_clock(rq_of(cfs_rq)); wait_start = rq_clock(rq_of(cfs_rq));
prev_wait_start = schedstat_val(se->statistics.wait_start); prev_wait_start = schedstat_val(stats->wait_start);
if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) && if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
likely(wait_start > prev_wait_start)) likely(wait_start > prev_wait_start))
wait_start -= prev_wait_start; wait_start -= prev_wait_start;
__schedstat_set(se->statistics.wait_start, wait_start); __schedstat_set(stats->wait_start, wait_start);
} }
static inline void static inline void
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
struct task_struct *p; struct sched_statistics *stats;
struct task_struct *p = NULL;
u64 delta; u64 delta;
if (!schedstat_enabled()) if (!schedstat_enabled())
return; return;
stats = __schedstats_from_se(se);
/* /*
* When the sched_schedstat changes from 0 to 1, some sched se * When the sched_schedstat changes from 0 to 1, some sched se
* maybe already in the runqueue, the se->statistics.wait_start * maybe already in the runqueue, the se->statistics.wait_start
* will be 0.So it will let the delta wrong. We need to avoid this * will be 0.So it will let the delta wrong. We need to avoid this
* scenario. * scenario.
*/ */
if (unlikely(!schedstat_val(se->statistics.wait_start))) if (unlikely(!schedstat_val(stats->wait_start)))
return; return;
delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start); delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(stats->wait_start);
if (entity_is_task(se)) { if (entity_is_task(se)) {
p = task_of(se); p = task_of(se);
...@@ -927,30 +938,33 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -927,30 +938,33 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
* time stamp can be adjusted to accumulate wait time * time stamp can be adjusted to accumulate wait time
* prior to migration. * prior to migration.
*/ */
__schedstat_set(se->statistics.wait_start, delta); __schedstat_set(stats->wait_start, delta);
return; return;
} }
trace_sched_stat_wait(p, delta); trace_sched_stat_wait(p, delta);
} }
__schedstat_set(se->statistics.wait_max, __schedstat_set(stats->wait_max,
max(schedstat_val(se->statistics.wait_max), delta)); max(schedstat_val(stats->wait_max), delta));
__schedstat_inc(se->statistics.wait_count); __schedstat_inc(stats->wait_count);
__schedstat_add(se->statistics.wait_sum, delta); __schedstat_add(stats->wait_sum, delta);
__schedstat_set(se->statistics.wait_start, 0); __schedstat_set(stats->wait_start, 0);
} }
static inline void static inline void
update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
struct sched_statistics *stats;
struct task_struct *tsk = NULL; struct task_struct *tsk = NULL;
u64 sleep_start, block_start; u64 sleep_start, block_start;
if (!schedstat_enabled()) if (!schedstat_enabled())
return; return;
sleep_start = schedstat_val(se->statistics.sleep_start); stats = __schedstats_from_se(se);
block_start = schedstat_val(se->statistics.block_start);
sleep_start = schedstat_val(stats->sleep_start);
block_start = schedstat_val(stats->block_start);
if (entity_is_task(se)) if (entity_is_task(se))
tsk = task_of(se); tsk = task_of(se);
...@@ -961,11 +975,11 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -961,11 +975,11 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if ((s64)delta < 0) if ((s64)delta < 0)
delta = 0; delta = 0;
if (unlikely(delta > schedstat_val(se->statistics.sleep_max))) if (unlikely(delta > schedstat_val(stats->sleep_max)))
__schedstat_set(se->statistics.sleep_max, delta); __schedstat_set(stats->sleep_max, delta);
__schedstat_set(se->statistics.sleep_start, 0); __schedstat_set(stats->sleep_start, 0);
__schedstat_add(se->statistics.sum_sleep_runtime, delta); __schedstat_add(stats->sum_sleep_runtime, delta);
if (tsk) { if (tsk) {
account_scheduler_latency(tsk, delta >> 10, 1); account_scheduler_latency(tsk, delta >> 10, 1);
...@@ -978,16 +992,16 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -978,16 +992,16 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if ((s64)delta < 0) if ((s64)delta < 0)
delta = 0; delta = 0;
if (unlikely(delta > schedstat_val(se->statistics.block_max))) if (unlikely(delta > schedstat_val(stats->block_max)))
__schedstat_set(se->statistics.block_max, delta); __schedstat_set(stats->block_max, delta);
__schedstat_set(se->statistics.block_start, 0); __schedstat_set(stats->block_start, 0);
__schedstat_add(se->statistics.sum_sleep_runtime, delta); __schedstat_add(stats->sum_sleep_runtime, delta);
if (tsk) { if (tsk) {
if (tsk->in_iowait) { if (tsk->in_iowait) {
__schedstat_add(se->statistics.iowait_sum, delta); __schedstat_add(stats->iowait_sum, delta);
__schedstat_inc(se->statistics.iowait_count); __schedstat_inc(stats->iowait_count);
trace_sched_stat_iowait(tsk, delta); trace_sched_stat_iowait(tsk, delta);
} }
...@@ -1049,10 +1063,10 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -1049,10 +1063,10 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
/* XXX racy against TTWU */ /* XXX racy against TTWU */
state = READ_ONCE(tsk->__state); state = READ_ONCE(tsk->__state);
if (state & TASK_INTERRUPTIBLE) if (state & TASK_INTERRUPTIBLE)
__schedstat_set(se->statistics.sleep_start, __schedstat_set(tsk->stats.sleep_start,
rq_clock(rq_of(cfs_rq))); rq_clock(rq_of(cfs_rq)));
if (state & TASK_UNINTERRUPTIBLE) if (state & TASK_UNINTERRUPTIBLE)
__schedstat_set(se->statistics.block_start, __schedstat_set(tsk->stats.block_start,
rq_clock(rq_of(cfs_rq))); rq_clock(rq_of(cfs_rq)));
} }
} }
...@@ -4530,8 +4544,11 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -4530,8 +4544,11 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/ */
if (schedstat_enabled() && if (schedstat_enabled() &&
rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) { rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) {
__schedstat_set(se->statistics.slice_max, struct sched_statistics *stats;
max((u64)se->statistics.slice_max,
stats = __schedstats_from_se(se);
__schedstat_set(stats->slice_max,
max((u64)stats->slice_max,
se->sum_exec_runtime - se->prev_sum_exec_runtime)); se->sum_exec_runtime - se->prev_sum_exec_runtime));
} }
...@@ -6046,12 +6063,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, ...@@ -6046,12 +6063,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits) if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync); target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts); schedstat_inc(p->stats.nr_wakeups_affine_attempts);
if (target == nr_cpumask_bits) if (target == nr_cpumask_bits)
return prev_cpu; return prev_cpu;
schedstat_inc(sd->ttwu_move_affine); schedstat_inc(sd->ttwu_move_affine);
schedstat_inc(p->se.statistics.nr_wakeups_affine); schedstat_inc(p->stats.nr_wakeups_affine);
return target; return target;
} }
...@@ -7855,7 +7872,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) ...@@ -7855,7 +7872,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) { if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
int cpu; int cpu;
schedstat_inc(p->se.statistics.nr_failed_migrations_affine); schedstat_inc(p->stats.nr_failed_migrations_affine);
env->flags |= LBF_SOME_PINNED; env->flags |= LBF_SOME_PINNED;
...@@ -7889,7 +7906,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) ...@@ -7889,7 +7906,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
env->flags &= ~LBF_ALL_PINNED; env->flags &= ~LBF_ALL_PINNED;
if (task_running(env->src_rq, p)) { if (task_running(env->src_rq, p)) {
schedstat_inc(p->se.statistics.nr_failed_migrations_running); schedstat_inc(p->stats.nr_failed_migrations_running);
return 0; return 0;
} }
...@@ -7911,12 +7928,12 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) ...@@ -7911,12 +7928,12 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
env->sd->nr_balance_failed > env->sd->cache_nice_tries) { env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
if (tsk_cache_hot == 1) { if (tsk_cache_hot == 1) {
schedstat_inc(env->sd->lb_hot_gained[env->idle]); schedstat_inc(env->sd->lb_hot_gained[env->idle]);
schedstat_inc(p->se.statistics.nr_forced_migrations); schedstat_inc(p->stats.nr_forced_migrations);
} }
return 1; return 1;
} }
schedstat_inc(p->se.statistics.nr_failed_migrations_hot); schedstat_inc(p->stats.nr_failed_migrations_hot);
return 0; return 0;
} }
...@@ -11457,7 +11474,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) ...@@ -11457,7 +11474,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
if (!cfs_rq) if (!cfs_rq)
goto err; goto err;
se = kzalloc_node(sizeof(struct sched_entity), se = kzalloc_node(sizeof(struct sched_entity_stats),
GFP_KERNEL, cpu_to_node(i)); GFP_KERNEL, cpu_to_node(i));
if (!se) if (!se)
goto err_free_rq; goto err_free_rq;
......
...@@ -1009,8 +1009,8 @@ static void update_curr_rt(struct rq *rq) ...@@ -1009,8 +1009,8 @@ static void update_curr_rt(struct rq *rq)
if (unlikely((s64)delta_exec <= 0)) if (unlikely((s64)delta_exec <= 0))
return; return;
schedstat_set(curr->se.statistics.exec_max, schedstat_set(curr->stats.exec_max,
max(curr->se.statistics.exec_max, delta_exec)); max(curr->stats.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec; curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec); account_group_exec_runtime(curr, delta_exec);
......
...@@ -41,6 +41,7 @@ rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) ...@@ -41,6 +41,7 @@ rq_sched_info_dequeue(struct rq *rq, unsigned long long delta)
#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0) #define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
#else /* !CONFIG_SCHEDSTATS: */ #else /* !CONFIG_SCHEDSTATS: */
static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { } static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { }
static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { } static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { }
static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { } static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { }
...@@ -53,8 +54,26 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt ...@@ -53,8 +54,26 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt
# define schedstat_set(var, val) do { } while (0) # define schedstat_set(var, val) do { } while (0)
# define schedstat_val(var) 0 # define schedstat_val(var) 0
# define schedstat_val_or_zero(var) 0 # define schedstat_val_or_zero(var) 0
#endif /* CONFIG_SCHEDSTATS */ #endif /* CONFIG_SCHEDSTATS */
#ifdef CONFIG_FAIR_GROUP_SCHED
struct sched_entity_stats {
struct sched_entity se;
struct sched_statistics stats;
} __no_randomize_layout;
#endif
static inline struct sched_statistics *
__schedstats_from_se(struct sched_entity *se)
{
#ifdef CONFIG_FAIR_GROUP_SCHED
if (!entity_is_task(se))
return &container_of(se, struct sched_entity_stats, se)->stats;
#endif
return &task_of(se)->stats;
}
#ifdef CONFIG_PSI #ifdef CONFIG_PSI
/* /*
* PSI tracks state that persists across sleeps, such as iowaits and * PSI tracks state that persists across sleeps, such as iowaits and
......
...@@ -78,8 +78,8 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) ...@@ -78,8 +78,8 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
if (unlikely((s64)delta_exec < 0)) if (unlikely((s64)delta_exec < 0))
delta_exec = 0; delta_exec = 0;
schedstat_set(curr->se.statistics.exec_max, schedstat_set(curr->stats.exec_max,
max(curr->se.statistics.exec_max, delta_exec)); max(curr->stats.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec; curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec); account_group_exec_runtime(curr, delta_exec);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment