Commit 51bf903b authored by Chengming Zhou's avatar Chengming Zhou Committed by Peter Zijlstra

sched/fair: Optimize and simplify rq leaf_cfs_rq_list

We notice the rq leaf_cfs_rq_list has two problems when do bugfix
backports and some test profiling.

1. cfs_rqs under throttled subtree could be added to the list, and
   make their fully decayed ancestors on the list, even though not needed.

2. #1 also make the leaf_cfs_rq_list management complex and error prone,
   this is the list of related bugfix so far:

   commit 31bc6aea ("sched/fair: Optimize update_blocked_averages()")
   commit fe61468b ("sched/fair: Fix enqueue_task_fair warning")
   commit b34cb07d ("sched/fair: Fix enqueue_task_fair() warning some more")
   commit 39f23ce0 ("sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list")
   commit 0258bdfa ("sched/fair: Fix unfairness caused by missing load decay")
   commit a7b359fc ("sched/fair: Correctly insert cfs_rq's to list on unthrottle")
   commit fdaba61e ("sched/fair: Ensure that the CFS parent is added after unthrottling")
   commit 2630cde2 ("sched/fair: Add ancestors of unthrottled undecayed cfs_rq")

commit 31bc6aea ("sched/fair: Optimize update_blocked_averages()")
delete every cfs_rq under throttled subtree from rq->leaf_cfs_rq_list,
and delete the throttled_hierarchy() test in update_blocked_averages(),
which optimized update_blocked_averages().

But those later bugfix add cfs_rqs under throttled subtree back to
rq->leaf_cfs_rq_list again, with their fully decayed ancestors, for
the integrity of rq->leaf_cfs_rq_list.

This patch takes another method, skip all cfs_rqs under throttled
hierarchy when list_add_leaf_cfs_rq(), to completely make cfs_rqs
under throttled subtree off the leaf_cfs_rq_list.

So we don't need to consider throttled related things in
enqueue_entity(), unthrottle_cfs_rq() and enqueue_task_fair(),
which simplify the code a lot. Also optimize update_blocked_averages()
since cfs_rqs under throttled hierarchy and their ancestors
won't be on the leaf_cfs_rq_list.
Signed-off-by: default avatarChengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarVincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/20220601021848.76943-1-zhouchengming@bytedance.com
parent f5b2eeb4
...@@ -3179,6 +3179,8 @@ void reweight_task(struct task_struct *p, int prio) ...@@ -3179,6 +3179,8 @@ void reweight_task(struct task_struct *p, int prio)
load->inv_weight = sched_prio_to_wmult[prio]; load->inv_weight = sched_prio_to_wmult[prio];
} }
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
...@@ -3289,8 +3291,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq) ...@@ -3289,8 +3291,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
/* /*
* Recomputes the group entity based on the current state of its group * Recomputes the group entity based on the current state of its group
* runqueue. * runqueue.
...@@ -4403,16 +4403,11 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -4403,16 +4403,11 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
__enqueue_entity(cfs_rq, se); __enqueue_entity(cfs_rq, se);
se->on_rq = 1; se->on_rq = 1;
/* if (cfs_rq->nr_running == 1) {
* When bandwidth control is enabled, cfs might have been removed
* because of a parent been throttled but cfs->nr_running > 1. Try to
* add it unconditionally.
*/
if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
list_add_leaf_cfs_rq(cfs_rq);
if (cfs_rq->nr_running == 1)
check_enqueue_throttle(cfs_rq); check_enqueue_throttle(cfs_rq);
if (!throttled_hierarchy(cfs_rq))
list_add_leaf_cfs_rq(cfs_rq);
}
} }
static void __clear_buddies_last(struct sched_entity *se) static void __clear_buddies_last(struct sched_entity *se)
...@@ -5027,11 +5022,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -5027,11 +5022,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
/* update hierarchical throttle state */ /* update hierarchical throttle state */
walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq); walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
/* Nothing to run but something to decay (on_list)? Complete the branch */
if (!cfs_rq->load.weight) { if (!cfs_rq->load.weight) {
if (cfs_rq->on_list) if (!cfs_rq->on_list)
goto unthrottle_throttle;
return; return;
/*
* Nothing to run but something to decay (on_list)?
* Complete the branch.
*/
for_each_sched_entity(se) {
if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
break;
}
goto unthrottle_throttle;
} }
task_delta = cfs_rq->h_nr_running; task_delta = cfs_rq->h_nr_running;
...@@ -5069,31 +5071,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -5069,31 +5071,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
/* end evaluation on encountering a throttled cfs_rq */ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(qcfs_rq)) if (cfs_rq_throttled(qcfs_rq))
goto unthrottle_throttle; goto unthrottle_throttle;
/*
* One parent has been throttled and cfs_rq removed from the
* list. Add it back to not break the leaf list.
*/
if (throttled_hierarchy(qcfs_rq))
list_add_leaf_cfs_rq(qcfs_rq);
} }
/* At this point se is NULL and we are at root level*/ /* At this point se is NULL and we are at root level*/
add_nr_running(rq, task_delta); add_nr_running(rq, task_delta);
unthrottle_throttle: unthrottle_throttle:
/*
* The cfs_rq_throttled() breaks in the above iteration can result in
* incomplete leaf list maintenance, resulting in triggering the
* assertion below.
*/
for_each_sched_entity(se) {
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
if (list_add_leaf_cfs_rq(qcfs_rq))
break;
}
assert_list_leaf_cfs_rq(rq); assert_list_leaf_cfs_rq(rq);
/* Determine whether we need to wake up potentially idle CPU: */ /* Determine whether we need to wake up potentially idle CPU: */
...@@ -5748,13 +5731,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5748,13 +5731,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* end evaluation on encountering a throttled cfs_rq */ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
goto enqueue_throttle; goto enqueue_throttle;
/*
* One parent has been throttled and cfs_rq removed from the
* list. Add it back to not break the leaf list.
*/
if (throttled_hierarchy(cfs_rq))
list_add_leaf_cfs_rq(cfs_rq);
} }
/* At this point se is NULL and we are at root level*/ /* At this point se is NULL and we are at root level*/
...@@ -5778,21 +5754,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5778,21 +5754,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
update_overutilized_status(rq); update_overutilized_status(rq);
enqueue_throttle: enqueue_throttle:
if (cfs_bandwidth_used()) {
/*
* When bandwidth control is enabled; the cfs_rq_throttled()
* breaks in the above iteration can result in incomplete
* leaf list maintenance, resulting in triggering the assertion
* below.
*/
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
if (list_add_leaf_cfs_rq(cfs_rq))
break;
}
}
assert_list_leaf_cfs_rq(rq); assert_list_leaf_cfs_rq(rq);
hrtick_update(rq); hrtick_update(rq);
...@@ -11316,9 +11277,13 @@ static inline bool vruntime_normalized(struct task_struct *p) ...@@ -11316,9 +11277,13 @@ static inline bool vruntime_normalized(struct task_struct *p)
*/ */
static void propagate_entity_cfs_rq(struct sched_entity *se) static void propagate_entity_cfs_rq(struct sched_entity *se)
{ {
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq = cfs_rq_of(se);
list_add_leaf_cfs_rq(cfs_rq_of(se)); if (cfs_rq_throttled(cfs_rq))
return;
if (!throttled_hierarchy(cfs_rq))
list_add_leaf_cfs_rq(cfs_rq);
/* Start to propagate at parent */ /* Start to propagate at parent */
se = se->parent; se = se->parent;
...@@ -11326,14 +11291,13 @@ static void propagate_entity_cfs_rq(struct sched_entity *se) ...@@ -11326,14 +11291,13 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
for_each_sched_entity(se) { for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se); cfs_rq = cfs_rq_of(se);
if (!cfs_rq_throttled(cfs_rq)){
update_load_avg(cfs_rq, se, UPDATE_TG); update_load_avg(cfs_rq, se, UPDATE_TG);
list_add_leaf_cfs_rq(cfs_rq);
continue;
}
if (list_add_leaf_cfs_rq(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
break; break;
if (!throttled_hierarchy(cfs_rq))
list_add_leaf_cfs_rq(cfs_rq);
} }
} }
#else #else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment