Commit 9edfbfed authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched/core: Rework rq->clock update skips

The original purpose of rq::skip_clock_update was to avoid 'costly' clock
updates for back to back wakeup-preempt pairs. The big problem with it
has always been that the rq variable is unaware of the context and
causes indiscrimiate clock skips.

Rework the entire thing and create a sense of context by only allowing
schedule() to skip clock updates. (XXX can we measure the cost of the
added store?)

By ensuring only schedule can ever skip an update, we guarantee we're
never more than 1 tick behind on the update.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/20150105103554.432381549@infradead.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent cebde6d6
...@@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq) ...@@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq)
{ {
s64 delta; s64 delta;
if (rq->skip_clock_update > 0) lockdep_assert_held(&rq->lock);
if (rq->clock_skip_update & RQCF_ACT_SKIP)
return; return;
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
...@@ -1046,7 +1048,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) ...@@ -1046,7 +1048,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
* this case, we can save a useless back to back clock update. * this case, we can save a useless back to back clock update.
*/ */
if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
rq->skip_clock_update = 1; rq_clock_skip_update(rq, true);
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -2779,6 +2781,8 @@ static void __sched __schedule(void) ...@@ -2779,6 +2781,8 @@ static void __sched __schedule(void)
smp_mb__before_spinlock(); smp_mb__before_spinlock();
raw_spin_lock_irq(&rq->lock); raw_spin_lock_irq(&rq->lock);
rq->clock_skip_update <<= 1; /* promote REQ to ACT */
switch_count = &prev->nivcsw; switch_count = &prev->nivcsw;
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely(signal_pending_state(prev->state, prev))) { if (unlikely(signal_pending_state(prev->state, prev))) {
...@@ -2803,13 +2807,13 @@ static void __sched __schedule(void) ...@@ -2803,13 +2807,13 @@ static void __sched __schedule(void)
switch_count = &prev->nvcsw; switch_count = &prev->nvcsw;
} }
if (task_on_rq_queued(prev) || rq->skip_clock_update < 0) if (task_on_rq_queued(prev))
update_rq_clock(rq); update_rq_clock(rq);
next = pick_next_task(rq, prev); next = pick_next_task(rq, prev);
clear_tsk_need_resched(prev); clear_tsk_need_resched(prev);
clear_preempt_need_resched(); clear_preempt_need_resched();
rq->skip_clock_update = 0; rq->clock_skip_update = 0;
if (likely(prev != next)) { if (likely(prev != next)) {
rq->nr_switches++; rq->nr_switches++;
......
...@@ -5156,7 +5156,7 @@ static void yield_task_fair(struct rq *rq) ...@@ -5156,7 +5156,7 @@ static void yield_task_fair(struct rq *rq)
* so we don't do microscopic update in schedule() * so we don't do microscopic update in schedule()
* and double the fastpath cost. * and double the fastpath cost.
*/ */
rq->skip_clock_update = 1; rq_clock_skip_update(rq, true);
} }
set_skip_buddy(se); set_skip_buddy(se);
......
...@@ -831,11 +831,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) ...@@ -831,11 +831,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
enqueue = 1; enqueue = 1;
/* /*
* Force a clock update if the CPU was idle, * When we're idle and a woken (rt) task is
* lest wakeup -> unthrottle time accumulate. * throttled check_preempt_curr() will set
* skip_update and the time between the wakeup
* and this unthrottle will get accounted as
* 'runtime'.
*/ */
if (rt_rq->rt_nr_running && rq->curr == rq->idle) if (rt_rq->rt_nr_running && rq->curr == rq->idle)
rq->skip_clock_update = -1; rq_clock_skip_update(rq, false);
} }
if (rt_rq->rt_time || rt_rq->rt_nr_running) if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0; idle = 0;
......
...@@ -558,8 +558,6 @@ struct rq { ...@@ -558,8 +558,6 @@ struct rq {
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
unsigned long last_sched_tick; unsigned long last_sched_tick;
#endif #endif
int skip_clock_update;
/* capture load from *all* tasks on this cpu: */ /* capture load from *all* tasks on this cpu: */
struct load_weight load; struct load_weight load;
unsigned long nr_load_updates; unsigned long nr_load_updates;
...@@ -588,6 +586,7 @@ struct rq { ...@@ -588,6 +586,7 @@ struct rq {
unsigned long next_balance; unsigned long next_balance;
struct mm_struct *prev_mm; struct mm_struct *prev_mm;
unsigned int clock_skip_update;
u64 clock; u64 clock;
u64 clock_task; u64 clock_task;
...@@ -704,6 +703,18 @@ static inline u64 rq_clock_task(struct rq *rq) ...@@ -704,6 +703,18 @@ static inline u64 rq_clock_task(struct rq *rq)
return rq->clock_task; return rq->clock_task;
} }
#define RQCF_REQ_SKIP 0x01
#define RQCF_ACT_SKIP 0x02
static inline void rq_clock_skip_update(struct rq *rq, bool skip)
{
lockdep_assert_held(&rq->lock);
if (skip)
rq->clock_skip_update |= RQCF_REQ_SKIP;
else
rq->clock_skip_update &= ~RQCF_REQ_SKIP;
}
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
enum numa_topology_type { enum numa_topology_type {
NUMA_DIRECT, NUMA_DIRECT,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment