sched/fair: Fix O(nr_cgroups) in the load balancing path

CVE-2018-20784 This re-applies the commit reverted here: commit c40f7d74 ("sched/fair: Fix infinite loop in update_blocked_averages() by reverting a9e7f654") I.e. now that cfs_rq can be safely removed/added in the list, we can re-apply: commit a9e7f654 ("sched/fair: Fix O(nr_cgroups) in load balance path") Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: sargun@sargun.me Cc: tj@kernel.org Cc: xiexiuqi@huawei.com Cc: xiezhipeng1@huawei.com Link: https://lkml.kernel.org/r/1549469662-13614-3-git-send-email-vincent.guittot@linaro.orgSigned-off-by: Ingo Molnar <mingo@kernel.org> (backported from commit 039ae8bc) [ Connor Kuehl: In 'cfs_rq_is_decayed' the data member 'runnable_load_sum' belongs to struct cfs_rq and not sched_avg, so update that. Some instances of 'for_each_leaf_cfs_rq' required manual updating to the new 'for_each_leaf_cfs_rq_safe' and the last hunk for 'update_blocked_averages' required manual placement. ] Signed-off-by: Connor Kuehl <connor.kuehl@canonical.com> Acked-by: Sultan Alsawaf <sultan.alsawaf@canonical.com> Acked-by: Stefan Bader <stefan.bader@canonical.com> Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>

sched/fair: Fix O(nr_cgroups) in the load balancing path
CVE-2018-20784 This re-applies the commit reverted here: commit c40f7d74 ("sched/fair: Fix infinite loop in update_blocked_averages() by reverting a9e7f654") I.e. now that cfs_rq can be safely removed/added in the list, we can re-apply: commit a9e7f654 ("sched/fair: Fix O(nr_cgroups) in load balance path") Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: sargun@sargun.me Cc: tj@kernel.org Cc: xiexiuqi@huawei.com Cc: xiezhipeng1@huawei.com Link: https://lkml.kernel.org/r/1549469662-13614-3-git-send-email-vincent.guittot@linaro.orgSigned-off-by: Ingo Molnar <mingo@kernel.org> (backported from commit 039ae8bc) [ Connor Kuehl: In 'cfs_rq_is_decayed' the data member 'runnable_load_sum' belongs to struct cfs_rq and not sched_avg, so update that. Some instances of 'for_each_leaf_cfs_rq' required manual updating to the new 'for_each_leaf_cfs_rq_safe' and the last hunk for 'update_blocked_averages' required manual placement. ] Signed-off-by: Connor Kuehl <connor.kuehl@canonical.com> Acked-by: Sultan Alsawaf <sultan.alsawaf@canonical.com> Acked-by: Stefan Bader <stefan.bader@canonical.com> Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>
0f655038 · Vincent Guittot · Khalid Elmously · bb0c07fb · 0f655038
Commit 0f655038 authored Oct 17, 2019 by Vincent Guittot Committed by Khalid Elmously Nov 13, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 36 additions and 12 deletions

kernel/sched/fair.c kernel/sched/fair.c +36 -12

No files found.
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -376,9 +376,10 @@ static inline void assert_list_leaf_cfs_rq(struct rq *rq)
 	WARN_ON_ONCE((rq->tmp_alone_branch != &rq->leaf_cfs_rq_list));
 }

-/* Iterate through all cfs_rq's on a runqueue in bottom-up order */
-#define for_each_leaf_cfs_rq(rq, cfs_rq) \
-	list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
+/* Iterate thr' all leaf cfs_rq's on a runqueue */
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)			\
+	list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list,	\
+				 leaf_cfs_rq_list)

 /* Do the two (enqueued) entities belong to the same group ? */
 static inline struct cfs_rq *
@@ -476,8 +477,8 @@ static inline void assert_list_leaf_cfs_rq(struct rq *rq)
 {
 }

-#define for_each_leaf_cfs_rq(rq, cfs_rq)	\
-		for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)	\
+		for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos)

 static inline struct sched_entity *parent_entity(struct sched_entity *se)
 {
@@ -4203,9 +4204,9 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)

 static void __maybe_unused update_runtime_enabled(struct rq *rq)
 {
-	struct cfs_rq *cfs_rq;
+	struct cfs_rq *cfs_rq, *pos;

-	for_each_leaf_cfs_rq(rq, cfs_rq) {
+	for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
 		struct cfs_bandwidth *cfs_b = &cfs_rq->tg->cfs_bandwidth;

 		raw_spin_lock(&cfs_b->lock);
@@ -4218,7 +4219,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 {
 	struct cfs_rq *cfs_rq, *pos;

-	for_each_leaf_cfs_rq(rq, cfs_rq) {
+	for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
 		if (!cfs_rq->runtime_enabled)
 			continue;

@@ -6132,10 +6133,27 @@ static void attach_tasks(struct lb_env *env)

 #ifdef CONFIG_FAIR_GROUP_SCHED

+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+	if (cfs_rq->load.weight)
+		return false;
+
+	if (cfs_rq->avg.load_sum)
+		return false;
+
+	if (cfs_rq->avg.util_sum)
+		return false;
+
+	if (cfs_rq->runnable_load_sum)
+		return false;
+
+	return true;
+}
+
 static void update_blocked_averages(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
-	struct cfs_rq *cfs_rq;
+	struct cfs_rq *cfs_rq, *pos;
 	unsigned long flags;

 	raw_spin_lock_irqsave(&rq->lock, flags);
@@ -6145,9 +6163,15 @@ static void update_blocked_averages(int cpu)
 	 * Iterates the task_group tree in a bottom up fashion, see
 	 * list_add_leaf_cfs_rq() for details.
 	 */
-	for_each_leaf_cfs_rq(rq, cfs_rq) {
+	for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
 		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
 			update_tg_load_avg(cfs_rq, 0);
+		/*
+		 * There can be a lot of idle CPU cgroups.  Don't let fully
+		 * decayed cfs_rqs linger on the list.
+		 */
+		if (cfs_rq_is_decayed(cfs_rq))
+			list_del_leaf_cfs_rq(cfs_rq);

 	}
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -8555,10 +8579,10 @@ const struct sched_class fair_sched_class = {
 #ifdef CONFIG_SCHED_DEBUG
 void print_cfs_stats(struct seq_file *m, int cpu)
 {
-	struct cfs_rq *cfs_rq;
+	struct cfs_rq *cfs_rq, *pos;

 	rcu_read_lock();
-	for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
+	for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
 		print_cfs_rq(m, cpu, cfs_rq);
 	rcu_read_unlock();
 }