Commit 05296e75 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched/fair: Fix corner case in __accumulate_sum()

Paul noticed that in the (periods >= LOAD_AVG_MAX_N) case in
__accumulate_sum(), the returned contribution value (LOAD_AVG_MAX) is
incorrect.

This is because at this point, the decay_load() on the old state --
the first step in accumulate_sum() -- will not have resulted in 0, and
will therefore result in a sum larger than the maximum value of our
series. Obviously broken.

Note that:

	decay_load(LOAD_AVG_MAX, LOAD_AVG_MAX_N) =

                1   (345 / 32)
	47742 * - ^            = ~27
                2

Not to mention that any further contribution from the d3 segment (our
new period) would also push it over the maximum.

Solve this by noting that we can write our c2 term:

		    p
	c2 = 1024 \Sum y^n
		   n=1

In terms of our maximum value:

		    inf		      inf	  p
	max = 1024 \Sum y^n = 1024 ( \Sum y^n + \Sum y^n + y^0 )
		    n=0		      n=p+1	 n=1

Further note that:

           inf              inf            inf
        ( \Sum y^n ) y^p = \Sum y^(n+p) = \Sum y^n
           n=0              n=0            n=p

Combined that gives us:

		    p
	c2 = 1024 \Sum y^n
		   n=1

		     inf        inf
	   = 1024 ( \Sum y^n - \Sum y^n - y^0 )
		     n=0        n=p+1

	   = max - (max y^(p+1)) - 1024

Further simplify things by dealing with p=0 early on.
Reported-by: default avatarPaul Turner <pjt@google.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yuyang Du <yuyang.du@intel.com>
Cc: linux-kernel@vger.kernel.org
Fixes: a481db34 ("sched/fair: Optimize ___update_sched_avg()")
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 717a94b5
...@@ -727,7 +727,6 @@ static unsigned long task_h_load(struct task_struct *p); ...@@ -727,7 +727,6 @@ static unsigned long task_h_load(struct task_struct *p);
*/ */
#define LOAD_AVG_PERIOD 32 #define LOAD_AVG_PERIOD 32
#define LOAD_AVG_MAX 47742 /* maximum possible load avg */ #define LOAD_AVG_MAX 47742 /* maximum possible load avg */
#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_AVG_MAX */
/* Give new sched_entity start runnable values to heavy its load in infant time */ /* Give new sched_entity start runnable values to heavy its load in infant time */
void init_entity_runnable_average(struct sched_entity *se) void init_entity_runnable_average(struct sched_entity *se)
...@@ -2743,26 +2742,6 @@ static const u32 runnable_avg_yN_inv[] = { ...@@ -2743,26 +2742,6 @@ static const u32 runnable_avg_yN_inv[] = {
0x85aac367, 0x82cd8698, 0x85aac367, 0x82cd8698,
}; };
/*
* Precomputed \Sum y^k { 1<=k<=n }. These are floor(true_value) to prevent
* over-estimates when re-combining.
*/
static const u32 runnable_avg_yN_sum[] = {
0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103,
9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082,
17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
};
/*
* Precomputed \Sum y^k { 1<=k<=n, where n%32=0). Values are rolled down to
* lower integers. See Documentation/scheduler/sched-avg.txt how these
* were generated:
*/
static const u32 __accumulated_sum_N32[] = {
0, 23371, 35056, 40899, 43820, 45281,
46011, 46376, 46559, 46650, 46696, 46719,
};
/* /*
* Approximate: * Approximate:
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period) * val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
...@@ -2771,9 +2750,7 @@ static u64 decay_load(u64 val, u64 n) ...@@ -2771,9 +2750,7 @@ static u64 decay_load(u64 val, u64 n)
{ {
unsigned int local_n; unsigned int local_n;
if (!n) if (unlikely(n > LOAD_AVG_PERIOD * 63))
return val;
else if (unlikely(n > LOAD_AVG_PERIOD * 63))
return 0; return 0;
/* after bounds checking we can collapse to 32-bit */ /* after bounds checking we can collapse to 32-bit */
...@@ -2795,40 +2772,25 @@ static u64 decay_load(u64 val, u64 n) ...@@ -2795,40 +2772,25 @@ static u64 decay_load(u64 val, u64 n)
return val; return val;
} }
static u32 __accumulate_sum(u64 periods, u32 period_contrib, u32 remainder) static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
{ {
u32 c1, c2, c3 = remainder; /* y^0 == 1 */ u32 c1, c2, c3 = d3; /* y^0 == 1 */
if (!periods)
return remainder - period_contrib;
if (unlikely(periods >= LOAD_AVG_MAX_N))
return LOAD_AVG_MAX;
/* /*
* c1 = d1 y^(p+1) * c1 = d1 y^(p+1)
*/ */
c1 = decay_load((u64)(1024 - period_contrib), periods); c1 = decay_load((u64)d1, periods);
periods -= 1;
/* /*
* For updates fully spanning n periods, the contribution to runnable * p
* average will be:
*
* c2 = 1024 \Sum y^n * c2 = 1024 \Sum y^n
* n=1
* *
* We can compute this reasonably efficiently by combining: * inf inf
* * = 1024 ( \Sum y^n - \Sum y^n - y^0 )
* y^PERIOD = 1/2 with precomputed 1024 \Sum y^n {for: n < PERIOD} * n=0 n=p+1
*/ */
if (likely(periods <= LOAD_AVG_PERIOD)) { c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
c2 = runnable_avg_yN_sum[periods];
} else {
c2 = __accumulated_sum_N32[periods/LOAD_AVG_PERIOD];
periods %= LOAD_AVG_PERIOD;
c2 = decay_load(c2, periods);
c2 += runnable_avg_yN_sum[periods];
}
return c1 + c2 + c3; return c1 + c2 + c3;
} }
...@@ -2861,8 +2823,8 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, ...@@ -2861,8 +2823,8 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
unsigned long weight, int running, struct cfs_rq *cfs_rq) unsigned long weight, int running, struct cfs_rq *cfs_rq)
{ {
unsigned long scale_freq, scale_cpu; unsigned long scale_freq, scale_cpu;
u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
u64 periods; u64 periods;
u32 contrib;
scale_freq = arch_scale_freq_capacity(NULL, cpu); scale_freq = arch_scale_freq_capacity(NULL, cpu);
scale_cpu = arch_scale_cpu_capacity(NULL, cpu); scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
...@@ -2880,13 +2842,14 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, ...@@ -2880,13 +2842,14 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
decay_load(cfs_rq->runnable_load_sum, periods); decay_load(cfs_rq->runnable_load_sum, periods);
} }
sa->util_sum = decay_load((u64)(sa->util_sum), periods); sa->util_sum = decay_load((u64)(sa->util_sum), periods);
}
/* /*
* Step 2 * Step 2
*/ */
delta %= 1024; delta %= 1024;
contrib = __accumulate_sum(periods, sa->period_contrib, delta); contrib = __accumulate_pelt_segments(periods,
1024 - sa->period_contrib, delta);
}
sa->period_contrib = delta; sa->period_contrib = delta;
contrib = cap_scale(contrib, scale_freq); contrib = cap_scale(contrib, scale_freq);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment