Commit cd21debe authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Thomas Gleixner:
 "From the scheduler departement:

   - a bunch of sched deadline related fixes which deal with various
     buglets and corner cases.

   - two fixes for the loadavg spikes which are caused by the delayed
     NOHZ accounting"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/deadline: Use deadline instead of period when calculating overflow
  sched/deadline: Throttle a constrained deadline task activated after the deadline
  sched/deadline: Make sure the replenishment timer fires in the next period
  sched/loadavg: Use {READ,WRITE}_ONCE() for sample window
  sched/loadavg: Avoid loadavg spikes caused by delayed NO_HZ accounting
  sched/deadline: Add missing update_rq_clock() in dl_task_timer()
parents b5f13082 2317d5f1
...@@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, ...@@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
* *
* This function returns true if: * This function returns true if:
* *
* runtime / (deadline - t) > dl_runtime / dl_period , * runtime / (deadline - t) > dl_runtime / dl_deadline ,
* *
* IOW we can't recycle current parameters. * IOW we can't recycle current parameters.
* *
* Notice that the bandwidth check is done against the period. For * Notice that the bandwidth check is done against the deadline. For
* task with deadline equal to period this is the same of using * task with deadline equal to period this is the same of using
* dl_deadline instead of dl_period in the equation above. * dl_period instead of dl_deadline in the equation above.
*/ */
static bool dl_entity_overflow(struct sched_dl_entity *dl_se, static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se, u64 t) struct sched_dl_entity *pi_se, u64 t)
...@@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, ...@@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
* of anything below microseconds resolution is actually fiction * of anything below microseconds resolution is actually fiction
* (but still we want to give the user that illusion >;). * (but still we want to give the user that illusion >;).
*/ */
left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
right = ((dl_se->deadline - t) >> DL_SCALE) * right = ((dl_se->deadline - t) >> DL_SCALE) *
(pi_se->dl_runtime >> DL_SCALE); (pi_se->dl_runtime >> DL_SCALE);
...@@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se, ...@@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
} }
} }
static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
{
return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
}
/* /*
* If the entity depleted all its runtime, and if we want it to sleep * If the entity depleted all its runtime, and if we want it to sleep
* while waiting for some new execution time to become available, we * while waiting for some new execution time to become available, we
* set the bandwidth enforcement timer to the replenishment instant * set the bandwidth replenishment timer to the replenishment instant
* and try to activate it. * and try to activate it.
* *
* Notice that it is important for the caller to know if the timer * Notice that it is important for the caller to know if the timer
...@@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p) ...@@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p)
* that it is actually coming from rq->clock and not from * that it is actually coming from rq->clock and not from
* hrtimer's time base reading. * hrtimer's time base reading.
*/ */
act = ns_to_ktime(dl_se->deadline); act = ns_to_ktime(dl_next_period(dl_se));
now = hrtimer_cb_get_time(timer); now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq); delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta); act = ktime_add_ns(act, delta);
...@@ -638,6 +643,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) ...@@ -638,6 +643,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
lockdep_unpin_lock(&rq->lock, rf.cookie); lockdep_unpin_lock(&rq->lock, rf.cookie);
rq = dl_task_offline_migration(rq, p); rq = dl_task_offline_migration(rq, p);
rf.cookie = lockdep_pin_lock(&rq->lock); rf.cookie = lockdep_pin_lock(&rq->lock);
update_rq_clock(rq);
/* /*
* Now that the task has been migrated to the new RQ and we * Now that the task has been migrated to the new RQ and we
...@@ -689,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) ...@@ -689,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
timer->function = dl_task_timer; timer->function = dl_task_timer;
} }
/*
* During the activation, CBS checks if it can reuse the current task's
* runtime and period. If the deadline of the task is in the past, CBS
* cannot use the runtime, and so it replenishes the task. This rule
* works fine for implicit deadline tasks (deadline == period), and the
* CBS was designed for implicit deadline tasks. However, a task with
* constrained deadline (deadine < period) might be awakened after the
* deadline, but before the next period. In this case, replenishing the
* task would allow it to run for runtime / deadline. As in this case
* deadline < period, CBS enables a task to run for more than the
* runtime / period. In a very loaded system, this can cause a domino
* effect, making other tasks miss their deadlines.
*
* To avoid this problem, in the activation of a constrained deadline
* task after the deadline but before the next period, throttle the
* task and set the replenishing timer to the begin of the next period,
* unless it is boosted.
*/
static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
{
struct task_struct *p = dl_task_of(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
return;
dl_se->dl_throttled = 1;
}
}
static static
int dl_runtime_exceeded(struct sched_dl_entity *dl_se) int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
{ {
...@@ -922,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se) ...@@ -922,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
__dequeue_dl_entity(dl_se); __dequeue_dl_entity(dl_se);
} }
static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
{
return dl_se->dl_deadline < dl_se->dl_period;
}
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{ {
struct task_struct *pi_task = rt_mutex_get_top_task(p); struct task_struct *pi_task = rt_mutex_get_top_task(p);
...@@ -947,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) ...@@ -947,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
return; return;
} }
/*
* Check if a constrained deadline task was activated
* after the deadline but before the next period.
* If that is the case, the task will be throttled and
* the replenishment timer will be set to the next period.
*/
if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
dl_check_constrained_dl(&p->dl);
/* /*
* If p is throttled, we do nothing. In fact, if it exhausted * If p is throttled, we do nothing. In fact, if it exhausted
* its budget it needs a replenishment and, since it now is on * its budget it needs a replenishment and, since it now is on
......
...@@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void) ...@@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void)
* If the folding window started, make sure we start writing in the * If the folding window started, make sure we start writing in the
* next idle-delta. * next idle-delta.
*/ */
if (!time_before(jiffies, calc_load_update)) if (!time_before(jiffies, READ_ONCE(calc_load_update)))
idx++; idx++;
return idx & 1; return idx & 1;
...@@ -202,8 +202,9 @@ void calc_load_exit_idle(void) ...@@ -202,8 +202,9 @@ void calc_load_exit_idle(void)
struct rq *this_rq = this_rq(); struct rq *this_rq = this_rq();
/* /*
* If we're still before the sample window, we're done. * If we're still before the pending sample window, we're done.
*/ */
this_rq->calc_load_update = READ_ONCE(calc_load_update);
if (time_before(jiffies, this_rq->calc_load_update)) if (time_before(jiffies, this_rq->calc_load_update))
return; return;
...@@ -212,7 +213,6 @@ void calc_load_exit_idle(void) ...@@ -212,7 +213,6 @@ void calc_load_exit_idle(void)
* accounted through the nohz accounting, so skip the entire deal and * accounted through the nohz accounting, so skip the entire deal and
* sync up for the next window. * sync up for the next window.
*/ */
this_rq->calc_load_update = calc_load_update;
if (time_before(jiffies, this_rq->calc_load_update + 10)) if (time_before(jiffies, this_rq->calc_load_update + 10))
this_rq->calc_load_update += LOAD_FREQ; this_rq->calc_load_update += LOAD_FREQ;
} }
...@@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp, ...@@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp,
*/ */
static void calc_global_nohz(void) static void calc_global_nohz(void)
{ {
unsigned long sample_window;
long delta, active, n; long delta, active, n;
if (!time_before(jiffies, calc_load_update + 10)) { sample_window = READ_ONCE(calc_load_update);
if (!time_before(jiffies, sample_window + 10)) {
/* /*
* Catch-up, fold however many we are behind still * Catch-up, fold however many we are behind still
*/ */
delta = jiffies - calc_load_update - 10; delta = jiffies - sample_window - 10;
n = 1 + (delta / LOAD_FREQ); n = 1 + (delta / LOAD_FREQ);
active = atomic_long_read(&calc_load_tasks); active = atomic_long_read(&calc_load_tasks);
...@@ -324,7 +326,7 @@ static void calc_global_nohz(void) ...@@ -324,7 +326,7 @@ static void calc_global_nohz(void)
avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
calc_load_update += n * LOAD_FREQ; WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ);
} }
/* /*
...@@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { } ...@@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { }
*/ */
void calc_global_load(unsigned long ticks) void calc_global_load(unsigned long ticks)
{ {
unsigned long sample_window;
long active, delta; long active, delta;
if (time_before(jiffies, calc_load_update + 10)) sample_window = READ_ONCE(calc_load_update);
if (time_before(jiffies, sample_window + 10))
return; return;
/* /*
...@@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks) ...@@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks)
avenrun[1] = calc_load(avenrun[1], EXP_5, active); avenrun[1] = calc_load(avenrun[1], EXP_5, active);
avenrun[2] = calc_load(avenrun[2], EXP_15, active); avenrun[2] = calc_load(avenrun[2], EXP_15, active);
calc_load_update += LOAD_FREQ; WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
/* /*
* In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment