Commit 98ec21a0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-hrtimers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Thomas Gleixner:
 "This series of scheduler updates depends on sched/core and timers/core
  branches, which are already in your tree:

   - Scheduler balancing overhaul to plug a hard to trigger race which
     causes an oops in the balancer (Peter Zijlstra)

   - Lockdep updates which are related to the balancing updates (Peter
     Zijlstra)"

* 'sched-hrtimers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched,lockdep: Employ lock pinning
  lockdep: Implement lock pinning
  lockdep: Simplify lock_release()
  sched: Streamline the task migration locking a little
  sched: Move code around
  sched,dl: Fix sched class hopping CBS hole
  sched, dl: Convert switched_{from, to}_dl() / prio_changed_dl() to balance callbacks
  sched,dl: Remove return value from pull_dl_task()
  sched, rt: Convert switched_{from, to}_rt() / prio_changed_rt() to balance callbacks
  sched,rt: Remove return value from pull_rt_task()
  sched: Allow balance callbacks for check_class_changed()
  sched: Use replace normalize_task() with __sched_setscheduler()
  sched: Replace post_schedule with a balance callback list
parents a2629483 cbce1a68
......@@ -255,6 +255,7 @@ struct held_lock {
unsigned int check:1; /* see lock_acquire() comment */
unsigned int hardirqs_off:1;
unsigned int references:12; /* 32 bits */
unsigned int pin_count;
};
/*
......@@ -354,6 +355,9 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
extern void lockdep_clear_current_reclaim_state(void);
extern void lockdep_trace_alloc(gfp_t mask);
extern void lock_pin_lock(struct lockdep_map *lock);
extern void lock_unpin_lock(struct lockdep_map *lock);
# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
......@@ -368,6 +372,9 @@ extern void lockdep_trace_alloc(gfp_t mask);
#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map)
#define lockdep_unpin_lock(l) lock_unpin_lock(&(l)->dep_map)
#else /* !CONFIG_LOCKDEP */
static inline void lockdep_off(void)
......@@ -420,6 +427,9 @@ struct lock_class_key { };
#define lockdep_recursing(tsk) (0)
#define lockdep_pin_lock(l) do { (void)(l); } while (0)
#define lockdep_unpin_lock(l) do { (void)(l); } while (0)
#endif /* !LOCKDEP */
#ifdef CONFIG_LOCK_STAT
......
......@@ -3157,6 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->waittime_stamp = 0;
hlock->holdtime_stamp = lockstat_clock();
#endif
hlock->pin_count = 0;
if (check && !mark_irqflags(curr, hlock))
return 0;
......@@ -3260,26 +3261,6 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
return 0;
}
/*
* Common debugging checks for both nested and non-nested unlock:
*/
static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
unsigned long ip)
{
if (unlikely(!debug_locks))
return 0;
/*
* Lockdep should run with IRQs disabled, recursion, head-ache, etc..
*/
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return 0;
if (curr->lockdep_depth <= 0)
return print_unlock_imbalance_bug(curr, lock, ip);
return 1;
}
static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
{
if (hlock->instance == lock)
......@@ -3376,31 +3357,35 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
}
/*
* Remove the lock to the list of currently held locks in a
* potentially non-nested (out of order) manner. This is a
* relatively rare operation, as all the unlock APIs default
* to nested mode (which uses lock_release()):
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
* mutex_lock_interruptible()).
*
* @nested is an hysterical artifact, needs a tree wide cleanup.
*/
static int
lock_release_non_nested(struct task_struct *curr,
struct lockdep_map *lock, unsigned long ip)
__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock, *prev_hlock;
unsigned int depth;
int i;
/*
* Check whether the lock exists in the current stack
* of held locks:
*/
if (unlikely(!debug_locks))
return 0;
depth = curr->lockdep_depth;
/*
* So we're all set to release this lock.. wait what lock? We don't
* own any locks, you've been drinking again?
*/
if (DEBUG_LOCKS_WARN_ON(!depth))
return 0;
if (DEBUG_LOCKS_WARN_ON(depth <= 0))
return print_unlock_imbalance_bug(curr, lock, ip);
/*
* Check whether the lock exists in the current stack
* of held locks:
*/
prev_hlock = NULL;
for (i = depth-1; i >= 0; i--) {
hlock = curr->held_locks + i;
......@@ -3419,6 +3404,8 @@ lock_release_non_nested(struct task_struct *curr,
if (hlock->instance == lock)
lock_release_holdtime(hlock);
WARN(hlock->pin_count, "releasing a pinned lock\n");
if (hlock->references) {
hlock->references--;
if (hlock->references) {
......@@ -3456,91 +3443,66 @@ lock_release_non_nested(struct task_struct *curr,
*/
if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
return 0;
return 1;
}
/*
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
* mutex_lock_interruptible()). This is done for unlocks that nest
* perfectly. (i.e. the current top of the lock-stack is unlocked)
*/
static int lock_release_nested(struct task_struct *curr,
struct lockdep_map *lock, unsigned long ip)
static int __lock_is_held(struct lockdep_map *lock)
{
struct held_lock *hlock;
unsigned int depth;
struct task_struct *curr = current;
int i;
/*
* Pop off the top of the lock stack:
*/
depth = curr->lockdep_depth - 1;
hlock = curr->held_locks + depth;
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
/*
* Is the unlock non-nested:
*/
if (hlock->instance != lock || hlock->references)
return lock_release_non_nested(curr, lock, ip);
curr->lockdep_depth--;
if (match_held_lock(hlock, lock))
return 1;
}
/*
* No more locks, but somehow we've got hash left over, who left it?
*/
if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
return 0;
curr->curr_chain_key = hlock->prev_chain_key;
lock_release_holdtime(hlock);
#ifdef CONFIG_DEBUG_LOCKDEP
hlock->prev_chain_key = 0;
hlock->class_idx = 0;
hlock->acquire_ip = 0;
hlock->irq_context = 0;
#endif
return 1;
}
/*
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
* mutex_lock_interruptible()). This is done for unlocks that nest
* perfectly. (i.e. the current top of the lock-stack is unlocked)
*/
static void
__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
static void __lock_pin_lock(struct lockdep_map *lock)
{
struct task_struct *curr = current;
int i;
if (!check_unlock(curr, lock, ip))
if (unlikely(!debug_locks))
return;
if (nested) {
if (!lock_release_nested(curr, lock, ip))
return;
} else {
if (!lock_release_non_nested(curr, lock, ip))
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock)) {
hlock->pin_count++;
return;
}
}
check_chain_key(curr);
WARN(1, "pinning an unheld lock\n");
}
static int __lock_is_held(struct lockdep_map *lock)
static void __lock_unpin_lock(struct lockdep_map *lock)
{
struct task_struct *curr = current;
int i;
if (unlikely(!debug_locks))
return;
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock))
return 1;
if (match_held_lock(hlock, lock)) {
if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n"))
return;
hlock->pin_count--;
return;
}
}
return 0;
WARN(1, "unpinning an unheld lock\n");
}
/*
......@@ -3639,7 +3601,8 @@ void lock_release(struct lockdep_map *lock, int nested,
check_flags(flags);
current->lockdep_recursion = 1;
trace_lock_release(lock, ip);
__lock_release(lock, nested, ip);
if (__lock_release(lock, nested, ip))
check_chain_key(current);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
......@@ -3665,6 +3628,40 @@ int lock_is_held(struct lockdep_map *lock)
}
EXPORT_SYMBOL_GPL(lock_is_held);
void lock_pin_lock(struct lockdep_map *lock)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
__lock_pin_lock(lock);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(lock_pin_lock);
void lock_unpin_lock(struct lockdep_map *lock)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
__lock_unpin_lock(lock);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(lock_unpin_lock);
void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
{
current->lockdep_reclaim_gfp = gfp_mask;
......
......@@ -1000,7 +1000,11 @@ inline int task_curr(const struct task_struct *p)
}
/*
* Can drop rq->lock because from sched_class::switched_from() methods drop it.
* switched_from, switched_to and prio_changed must _NOT_ drop rq->lock,
* use the balance_callback list if you want balancing.
*
* this means any call to check_class_changed() must be followed by a call to
* balance_callback().
*/
static inline void check_class_changed(struct rq *rq, struct task_struct *p,
const struct sched_class *prev_class,
......@@ -1009,7 +1013,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
if (prev_class != p->sched_class) {
if (prev_class->switched_from)
prev_class->switched_from(rq, p);
/* Possble rq->lock 'hole'. */
p->sched_class->switched_to(rq, p);
} else if (oldprio != p->prio || dl_task(p))
p->sched_class->prio_changed(rq, p, oldprio);
......@@ -1041,6 +1045,177 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
}
#ifdef CONFIG_SMP
/*
* This is how migration works:
*
* 1) we invoke migration_cpu_stop() on the target CPU using
* stop_one_cpu().
* 2) stopper starts to run (implicitly forcing the migrated thread
* off the CPU)
* 3) it checks whether the migrated task is still in the wrong runqueue.
* 4) if it's in the wrong runqueue then the migration thread removes
* it and puts it into the right queue.
* 5) stopper completes and stop_one_cpu() returns and the migration
* is done.
*/
/*
* move_queued_task - move a queued task to new rq.
*
* Returns (locked) new rq. Old rq's lock is released.
*/
static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new_cpu)
{
lockdep_assert_held(&rq->lock);
dequeue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(p, new_cpu);
raw_spin_unlock(&rq->lock);
rq = cpu_rq(new_cpu);
raw_spin_lock(&rq->lock);
BUG_ON(task_cpu(p) != new_cpu);
p->on_rq = TASK_ON_RQ_QUEUED;
enqueue_task(rq, p, 0);
check_preempt_curr(rq, p, 0);
return rq;
}
struct migration_arg {
struct task_struct *task;
int dest_cpu;
};
/*
* Move (not current) task off this cpu, onto dest cpu. We're doing
* this because either it can't run here any more (set_cpus_allowed()
* away from this CPU, or CPU going down), or because we're
* attempting to rebalance this task on exec (sched_exec).
*
* So we race with normal scheduler movements, but that's OK, as long
* as the task is no longer on this CPU.
*/
static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_cpu)
{
if (unlikely(!cpu_active(dest_cpu)))
return rq;
/* Affinity changed (again). */
if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
return rq;
rq = move_queued_task(rq, p, dest_cpu);
return rq;
}
/*
* migration_cpu_stop - this will be executed by a highprio stopper thread
* and performs thread migration by bumping thread off CPU then
* 'pushing' onto another runqueue.
*/
static int migration_cpu_stop(void *data)
{
struct migration_arg *arg = data;
struct task_struct *p = arg->task;
struct rq *rq = this_rq();
/*
* The original target cpu might have gone down and we might
* be on another cpu but it doesn't matter.
*/
local_irq_disable();
/*
* We need to explicitly wake pending tasks before running
* __migrate_task() such that we will not miss enforcing cpus_allowed
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
sched_ttwu_pending();
raw_spin_lock(&p->pi_lock);
raw_spin_lock(&rq->lock);
/*
* If task_rq(p) != rq, it cannot be migrated here, because we're
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
* we're holding p->pi_lock.
*/
if (task_rq(p) == rq && task_on_rq_queued(p))
rq = __migrate_task(rq, p, arg->dest_cpu);
raw_spin_unlock(&rq->lock);
raw_spin_unlock(&p->pi_lock);
local_irq_enable();
return 0;
}
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
if (p->sched_class->set_cpus_allowed)
p->sched_class->set_cpus_allowed(p, new_mask);
cpumask_copy(&p->cpus_allowed, new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
}
/*
* Change a given task's CPU affinity. Migrate the thread to a
* proper CPU and schedule it away if the CPU it's executing on
* is removed from the allowed bitmask.
*
* NOTE: the caller must have a valid reference to the task, the
* task must not exit() & deallocate itself prematurely. The
* call is not atomic; no spinlocks may be held.
*/
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
unsigned long flags;
struct rq *rq;
unsigned int dest_cpu;
int ret = 0;
rq = task_rq_lock(p, &flags);
if (cpumask_equal(&p->cpus_allowed, new_mask))
goto out;
if (!cpumask_intersects(new_mask, cpu_active_mask)) {
ret = -EINVAL;
goto out;
}
do_set_cpus_allowed(p, new_mask);
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock(rq, p, &flags);
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
return 0;
} else if (task_on_rq_queued(p)) {
/*
* OK, since we're going to drop the lock immediately
* afterwards anyway.
*/
lockdep_unpin_lock(&rq->lock);
rq = move_queued_task(rq, p, dest_cpu);
lockdep_pin_lock(&rq->lock);
}
out:
task_rq_unlock(rq, p, &flags);
return ret;
}
EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
#ifdef CONFIG_SCHED_DEBUG
......@@ -1181,13 +1356,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
return ret;
}
struct migration_arg {
struct task_struct *task;
int dest_cpu;
};
static int migration_cpu_stop(void *data);
/*
* wait_task_inactive - wait for a thread to unschedule.
*
......@@ -1320,9 +1488,7 @@ void kick_process(struct task_struct *p)
preempt_enable();
}
EXPORT_SYMBOL_GPL(kick_process);
#endif /* CONFIG_SMP */
#ifdef CONFIG_SMP
/*
* ->cpus_allowed is protected by both rq->lock and p->pi_lock
*/
......@@ -1402,6 +1568,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
static inline
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
{
lockdep_assert_held(&p->pi_lock);
if (p->nr_cpus_allowed > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
......@@ -1427,7 +1595,7 @@ static void update_avg(u64 *avg, u64 sample)
s64 diff = sample - *avg;
*avg += diff >> 3;
}
#endif
#endif /* CONFIG_SMP */
static void
ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
......@@ -1490,8 +1658,15 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
if (p->sched_class->task_woken)
if (p->sched_class->task_woken) {
/*
* Our task @p is fully woken up and running; so its safe to
* drop the rq->lock, hereafter rq is only used for statistics.
*/
lockdep_unpin_lock(&rq->lock);
p->sched_class->task_woken(rq, p);
lockdep_pin_lock(&rq->lock);
}
if (rq->idle_stamp) {
u64 delta = rq_clock(rq) - rq->idle_stamp;
......@@ -1510,6 +1685,8 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
static void
ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
{
lockdep_assert_held(&rq->lock);
#ifdef CONFIG_SMP
if (p->sched_contributes_to_load)
rq->nr_uninterruptible--;
......@@ -1554,6 +1731,7 @@ void sched_ttwu_pending(void)
return;
raw_spin_lock_irqsave(&rq->lock, flags);
lockdep_pin_lock(&rq->lock);
while (llist) {
p = llist_entry(llist, struct task_struct, wake_entry);
......@@ -1561,6 +1739,7 @@ void sched_ttwu_pending(void)
ttwu_do_activate(rq, p, 0);
}
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
......@@ -1657,7 +1836,9 @@ static void ttwu_queue(struct task_struct *p, int cpu)
#endif
raw_spin_lock(&rq->lock);
lockdep_pin_lock(&rq->lock);
ttwu_do_activate(rq, p, 0);
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
}
......@@ -1752,9 +1933,17 @@ static void try_to_wake_up_local(struct task_struct *p)
lockdep_assert_held(&rq->lock);
if (!raw_spin_trylock(&p->pi_lock)) {
/*
* This is OK, because current is on_cpu, which avoids it being
* picked for load-balance and preemption/IRQs are still
* disabled avoiding further scheduler activity on it and we've
* not yet picked a replacement task.
*/
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
raw_spin_lock(&p->pi_lock);
raw_spin_lock(&rq->lock);
lockdep_pin_lock(&rq->lock);
}
if (!(p->state & TASK_NORMAL))
......@@ -2294,23 +2483,35 @@ static struct rq *finish_task_switch(struct task_struct *prev)
#ifdef CONFIG_SMP
/* rq->lock is NOT held, but preemption is disabled */
static inline void post_schedule(struct rq *rq)
static void __balance_callback(struct rq *rq)
{
if (rq->post_schedule) {
struct callback_head *head, *next;
void (*func)(struct rq *rq);
unsigned long flags;
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->curr->sched_class->post_schedule)
rq->curr->sched_class->post_schedule(rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
head = rq->balance_callback;
rq->balance_callback = NULL;
while (head) {
func = (void (*)(struct rq *))head->func;
next = head->next;
head->next = NULL;
head = next;
rq->post_schedule = 0;
func(rq);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
static inline void balance_callback(struct rq *rq)
{
if (unlikely(rq->balance_callback))
__balance_callback(rq);
}
#else
static inline void post_schedule(struct rq *rq)
static inline void balance_callback(struct rq *rq)
{
}
......@@ -2328,7 +2529,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
/* finish_task_switch() drops rq->lock and enables preemtion */
preempt_disable();
rq = finish_task_switch(prev);
post_schedule(rq);
balance_callback(rq);
preempt_enable();
if (current->set_child_tid)
......@@ -2372,6 +2573,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
* of the scheduler it's an obvious special-case), so we
* do an early lockdep release here:
*/
lockdep_unpin_lock(&rq->lock);
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
/* Here we just switch the register state and the stack. */
......@@ -2794,6 +2996,7 @@ static void __sched __schedule(void)
*/
smp_mb__before_spinlock();
raw_spin_lock_irq(&rq->lock);
lockdep_pin_lock(&rq->lock);
rq->clock_skip_update <<= 1; /* promote REQ to ACT */
......@@ -2836,10 +3039,12 @@ static void __sched __schedule(void)
rq = context_switch(rq, prev, next); /* unlocks the rq */
cpu = cpu_of(rq);
} else
} else {
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock_irq(&rq->lock);
}
post_schedule(rq);
balance_callback(rq);
}
static inline void sched_submit_work(struct task_struct *tsk)
......@@ -3103,7 +3308,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
check_class_changed(rq, p, prev_class, oldprio);
out_unlock:
preempt_disable(); /* avoid rq from going away on us */
__task_rq_unlock(rq);
balance_callback(rq);
preempt_enable();
}
#endif
......@@ -3441,7 +3650,7 @@ static bool dl_param_changed(struct task_struct *p,
static int __sched_setscheduler(struct task_struct *p,
const struct sched_attr *attr,
bool user)
bool user, bool pi)
{
int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
MAX_RT_PRIO - 1 - attr->sched_priority;
......@@ -3627,6 +3836,7 @@ static int __sched_setscheduler(struct task_struct *p,
p->sched_reset_on_fork = reset_on_fork;
oldprio = p->prio;
if (pi) {
/*
* Take priority boosted tasks into account. If the new
* effective priority is unchanged, we just store the new
......@@ -3640,6 +3850,7 @@ static int __sched_setscheduler(struct task_struct *p,
task_rq_unlock(rq, p, &flags);
return 0;
}
}
queued = task_on_rq_queued(p);
running = task_current(rq, p);
......@@ -3649,7 +3860,7 @@ static int __sched_setscheduler(struct task_struct *p,
put_prev_task(rq, p);
prev_class = p->sched_class;
__setscheduler(rq, p, attr, true);
__setscheduler(rq, p, attr, pi);
if (running)
p->sched_class->set_curr_task(rq);
......@@ -3662,10 +3873,18 @@ static int __sched_setscheduler(struct task_struct *p,
}
check_class_changed(rq, p, prev_class, oldprio);
preempt_disable(); /* avoid rq from going away on us */
task_rq_unlock(rq, p, &flags);
if (pi)
rt_mutex_adjust_pi(p);
/*
* Run balance callbacks after we've adjusted the PI chain.
*/
balance_callback(rq);
preempt_enable();
return 0;
}
......@@ -3685,7 +3904,7 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
attr.sched_policy = policy;
}
return __sched_setscheduler(p, &attr, check);
return __sched_setscheduler(p, &attr, check, true);
}
/**
* sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
......@@ -3706,7 +3925,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
{
return __sched_setscheduler(p, attr, true);
return __sched_setscheduler(p, attr, true, true);
}
EXPORT_SYMBOL_GPL(sched_setattr);
......@@ -4754,149 +4973,6 @@ int task_can_attach(struct task_struct *p,
}
#ifdef CONFIG_SMP
/*
* move_queued_task - move a queued task to new rq.
*
* Returns (locked) new rq. Old rq's lock is released.
*/
static struct rq *move_queued_task(struct task_struct *p, int new_cpu)
{
struct rq *rq = task_rq(p);
lockdep_assert_held(&rq->lock);
dequeue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(p, new_cpu);
raw_spin_unlock(&rq->lock);
rq = cpu_rq(new_cpu);
raw_spin_lock(&rq->lock);
BUG_ON(task_cpu(p) != new_cpu);
p->on_rq = TASK_ON_RQ_QUEUED;
enqueue_task(rq, p, 0);
check_preempt_curr(rq, p, 0);
return rq;
}
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
if (p->sched_class->set_cpus_allowed)
p->sched_class->set_cpus_allowed(p, new_mask);
cpumask_copy(&p->cpus_allowed, new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
}
/*
* This is how migration works:
*
* 1) we invoke migration_cpu_stop() on the target CPU using
* stop_one_cpu().
* 2) stopper starts to run (implicitly forcing the migrated thread
* off the CPU)
* 3) it checks whether the migrated task is still in the wrong runqueue.
* 4) if it's in the wrong runqueue then the migration thread removes
* it and puts it into the right queue.
* 5) stopper completes and stop_one_cpu() returns and the migration
* is done.
*/
/*
* Change a given task's CPU affinity. Migrate the thread to a
* proper CPU and schedule it away if the CPU it's executing on
* is removed from the allowed bitmask.
*
* NOTE: the caller must have a valid reference to the task, the
* task must not exit() & deallocate itself prematurely. The
* call is not atomic; no spinlocks may be held.
*/
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
unsigned long flags;
struct rq *rq;
unsigned int dest_cpu;
int ret = 0;
rq = task_rq_lock(p, &flags);
if (cpumask_equal(&p->cpus_allowed, new_mask))
goto out;
if (!cpumask_intersects(new_mask, cpu_active_mask)) {
ret = -EINVAL;
goto out;
}
do_set_cpus_allowed(p, new_mask);
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock(rq, p, &flags);
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
return 0;
} else if (task_on_rq_queued(p))
rq = move_queued_task(p, dest_cpu);
out:
task_rq_unlock(rq, p, &flags);
return ret;
}
EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
/*
* Move (not current) task off this cpu, onto dest cpu. We're doing
* this because either it can't run here any more (set_cpus_allowed()
* away from this CPU, or CPU going down), or because we're
* attempting to rebalance this task on exec (sched_exec).
*
* So we race with normal scheduler movements, but that's OK, as long
* as the task is no longer on this CPU.
*
* Returns non-zero if task was successfully migrated.
*/
static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
{
struct rq *rq;
int ret = 0;
if (unlikely(!cpu_active(dest_cpu)))
return ret;
rq = cpu_rq(src_cpu);
raw_spin_lock(&p->pi_lock);
raw_spin_lock(&rq->lock);
/* Already moved. */
if (task_cpu(p) != src_cpu)
goto done;
/* Affinity changed (again). */
if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
goto fail;
/*
* If we're not on a rq, the next wake-up will ensure we're
* placed properly.
*/
if (task_on_rq_queued(p))
rq = move_queued_task(p, dest_cpu);
done:
ret = 1;
fail:
raw_spin_unlock(&rq->lock);
raw_spin_unlock(&p->pi_lock);
return ret;
}
#ifdef CONFIG_NUMA_BALANCING
/* Migrate current task p to target_cpu */
......@@ -4944,35 +5020,9 @@ void sched_setnuma(struct task_struct *p, int nid)
enqueue_task(rq, p, 0);
task_rq_unlock(rq, p, &flags);
}
#endif
/*
* migration_cpu_stop - this will be executed by a highprio stopper thread
* and performs thread migration by bumping thread off CPU then
* 'pushing' onto another runqueue.
*/
static int migration_cpu_stop(void *data)
{
struct migration_arg *arg = data;
/*
* The original target cpu might have gone down and we might
* be on another cpu but it doesn't matter.
*/
local_irq_disable();
/*
* We need to explicitly wake pending tasks before running
* __migrate_task() such that we will not miss enforcing cpus_allowed
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
sched_ttwu_pending();
__migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
local_irq_enable();
return 0;
}
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_HOTPLUG_CPU
/*
* Ensures that the idle task is using init_mm right before its cpu goes
* offline.
......@@ -5028,9 +5078,9 @@ static struct task_struct fake_task = {
* there's no concurrency possible, we hold the required locks anyway
* because of lock validation efforts.
*/
static void migrate_tasks(unsigned int dead_cpu)
static void migrate_tasks(struct rq *dead_rq)
{
struct rq *rq = cpu_rq(dead_cpu);
struct rq *rq = dead_rq;
struct task_struct *next, *stop = rq->stop;
int dest_cpu;
......@@ -5052,7 +5102,7 @@ static void migrate_tasks(unsigned int dead_cpu)
*/
update_rq_clock(rq);
for ( ; ; ) {
for (;;) {
/*
* There's this thread running, bail when that's the only
* remaining thread.
......@@ -5060,22 +5110,29 @@ static void migrate_tasks(unsigned int dead_cpu)
if (rq->nr_running == 1)
break;
/*
* Ensure rq->lock covers the entire task selection
* until the migration.
*/
lockdep_pin_lock(&rq->lock);
next = pick_next_task(rq, &fake_task);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);
/* Find suitable destination for @next, with force if needed. */
dest_cpu = select_fallback_rq(dead_cpu, next);
raw_spin_unlock(&rq->lock);
__migrate_task(next, dead_cpu, dest_cpu);
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
lockdep_unpin_lock(&rq->lock);
rq = __migrate_task(rq, next, dest_cpu);
if (rq != dead_rq) {
raw_spin_unlock(&rq->lock);
rq = dead_rq;
raw_spin_lock(&rq->lock);
}
}
rq->stop = stop;
}
#endif /* CONFIG_HOTPLUG_CPU */
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
......@@ -5254,7 +5311,7 @@ static void register_sched_domain_sysctl(void)
static void unregister_sched_domain_sysctl(void)
{
}
#endif
#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */
static void set_rq_online(struct rq *rq)
{
......@@ -5323,7 +5380,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
migrate_tasks(cpu);
migrate_tasks(rq);
BUG_ON(rq->nr_running != 1); /* the migration thread */
raw_spin_unlock_irqrestore(&rq->lock, flags);
break;
......@@ -5401,9 +5458,6 @@ static int __init migration_init(void)
return 0;
}
early_initcall(migration_init);
#endif
#ifdef CONFIG_SMP
static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
......@@ -7235,7 +7289,7 @@ void __init sched_init(void)
rq->sd = NULL;
rq->rd = NULL;
rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
rq->post_schedule = 0;
rq->balance_callback = NULL;
rq->active_balance = 0;
rq->next_balance = jiffies;
rq->push_cpu = 0;
......@@ -7365,32 +7419,12 @@ EXPORT_SYMBOL(___might_sleep);
#endif
#ifdef CONFIG_MAGIC_SYSRQ
static void normalize_task(struct rq *rq, struct task_struct *p)
void normalize_rt_tasks(void)
{
const struct sched_class *prev_class = p->sched_class;
struct task_struct *g, *p;
struct sched_attr attr = {
.sched_policy = SCHED_NORMAL,
};
int old_prio = p->prio;
int queued;
queued = task_on_rq_queued(p);
if (queued)
dequeue_task(rq, p, 0);
__setscheduler(rq, p, &attr, false);
if (queued) {
enqueue_task(rq, p, 0);
resched_curr(rq);
}
check_class_changed(rq, p, prev_class, old_prio);
}
void normalize_rt_tasks(void)
{
struct task_struct *g, *p;
unsigned long flags;
struct rq *rq;
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
......@@ -7417,9 +7451,7 @@ void normalize_rt_tasks(void)
continue;
}
rq = task_rq_lock(p, &flags);
normalize_task(rq, p);
task_rq_unlock(rq, p, &flags);
__sched_setscheduler(p, &attr, false, false);
}
read_unlock(&tasklist_lock);
}
......
......@@ -213,14 +213,28 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
return dl_task(prev);
}
static inline void set_post_schedule(struct rq *rq)
static DEFINE_PER_CPU(struct callback_head, dl_push_head);
static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
static void push_dl_tasks(struct rq *);
static void pull_dl_task(struct rq *);
static inline void queue_push_tasks(struct rq *rq)
{
rq->post_schedule = has_pushable_dl_tasks(rq);
if (!has_pushable_dl_tasks(rq))
return;
queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
}
static inline void queue_pull_task(struct rq *rq)
{
queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
}
static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
{
struct rq *later_rq = NULL;
bool fallback = false;
......@@ -254,14 +268,19 @@ static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
double_lock_balance(rq, later_rq);
}
/*
* By now the task is replenished and enqueued; migrate it.
*/
deactivate_task(rq, p, 0);
set_task_cpu(p, later_rq->cpu);
activate_task(later_rq, p, ENQUEUE_REPLENISH);
activate_task(later_rq, p, 0);
if (!fallback)
resched_curr(later_rq);
double_unlock_balance(rq, later_rq);
double_unlock_balance(later_rq, rq);
return later_rq;
}
#else
......@@ -291,12 +310,15 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
return false;
}
static inline int pull_dl_task(struct rq *rq)
static inline void pull_dl_task(struct rq *rq)
{
}
static inline void queue_push_tasks(struct rq *rq)
{
return 0;
}
static inline void set_post_schedule(struct rq *rq)
static inline void queue_pull_task(struct rq *rq)
{
}
#endif /* CONFIG_SMP */
......@@ -498,22 +520,23 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
* actually started or not (i.e., the replenishment instant is in
* the future or in the past).
*/
static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
static int start_dl_timer(struct task_struct *p)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
struct sched_dl_entity *dl_se = &p->dl;
struct hrtimer *timer = &dl_se->dl_timer;
struct rq *rq = task_rq(p);
ktime_t now, act;
s64 delta;
if (boosted)
return 0;
lockdep_assert_held(&rq->lock);
/*
* We want the timer to fire at the deadline, but considering
* that it is actually coming from rq->clock and not from
* hrtimer's time base reading.
*/
act = ns_to_ktime(dl_se->deadline);
now = hrtimer_cb_get_time(&dl_se->dl_timer);
now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta);
......@@ -525,7 +548,19 @@ static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
if (ktime_us_delta(act, now) < 0)
return 0;
hrtimer_start(&dl_se->dl_timer, act, HRTIMER_MODE_ABS);
/*
* !enqueued will guarantee another callback; even if one is already in
* progress. This ensures a balanced {get,put}_task_struct().
*
* The race against __run_timer() clearing the enqueued state is
* harmless because we're holding task_rq()->lock, therefore the timer
* expiring after we've done the check will wait on its task_rq_lock()
* and observe our state.
*/
if (!hrtimer_is_queued(timer)) {
get_task_struct(p);
hrtimer_start(timer, act, HRTIMER_MODE_ABS);
}
return 1;
}
......@@ -555,35 +590,40 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
rq = task_rq_lock(p, &flags);
/*
* We need to take care of several possible races here:
*
* - the task might have changed its scheduling policy
* to something different than SCHED_DEADLINE
* - the task might have changed its reservation parameters
* (through sched_setattr())
* - the task might have been boosted by someone else and
* might be in the boosting/deboosting path
* The task might have changed its scheduling policy to something
* different than SCHED_DEADLINE (through switched_fromd_dl()).
*/
if (!dl_task(p)) {
__dl_clear_params(p);
goto unlock;
}
/*
* This is possible if switched_from_dl() raced against a running
* callback that took the above !dl_task() path and we've since then
* switched back into SCHED_DEADLINE.
*
* In all this cases we bail out, as the task is already
* in the runqueue or is going to be enqueued back anyway.
* There's nothing to do except drop our task reference.
*/
if (!dl_task(p) || dl_se->dl_new ||
dl_se->dl_boosted || !dl_se->dl_throttled)
if (dl_se->dl_new)
goto unlock;
sched_clock_tick();
update_rq_clock(rq);
/*
* The task might have been boosted by someone else and might be in the
* boosting/deboosting path, its not throttled.
*/
if (dl_se->dl_boosted)
goto unlock;
#ifdef CONFIG_SMP
/*
* If we find that the rq the task was on is no longer
* available, we need to select a new rq.
* Spurious timer due to start_dl_timer() race; or we already received
* a replenishment from rt_mutex_setprio().
*/
if (unlikely(!rq->online)) {
dl_task_offline_migration(rq, p);
if (!dl_se->dl_throttled)
goto unlock;
}
#endif
sched_clock_tick();
update_rq_clock(rq);
/*
* If the throttle happened during sched-out; like:
......@@ -609,17 +649,38 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
check_preempt_curr_dl(rq, p, 0);
else
resched_curr(rq);
#ifdef CONFIG_SMP
/*
* Queueing this task back might have overloaded rq,
* check if we need to kick someone away.
* Perform balancing operations here; after the replenishments. We
* cannot drop rq->lock before this, otherwise the assertion in
* start_dl_timer() about not missing updates is not true.
*
* If we find that the rq the task was on is no longer available, we
* need to select a new rq.
*
* XXX figure out if select_task_rq_dl() deals with offline cpus.
*/
if (unlikely(!rq->online))
rq = dl_task_offline_migration(rq, p);
/*
* Queueing this task back might have overloaded rq, check if we need
* to kick someone away.
*/
if (has_pushable_dl_tasks(rq))
push_dl_task(rq);
#endif
unlock:
task_rq_unlock(rq, p, &flags);
/*
* This can free the task_struct, including this hrtimer, do not touch
* anything related to that after this.
*/
put_task_struct(p);
return HRTIMER_NORESTART;
}
......@@ -679,7 +740,7 @@ static void update_curr_dl(struct rq *rq)
if (dl_runtime_exceeded(dl_se)) {
dl_se->dl_throttled = 1;
__dequeue_task_dl(rq, curr, 0);
if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
if (!is_leftmost(curr, &rq->dl))
......@@ -1036,8 +1097,6 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
resched_curr(rq);
}
static int pull_dl_task(struct rq *this_rq);
#endif /* CONFIG_SMP */
/*
......@@ -1094,7 +1153,15 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
dl_rq = &rq->dl;
if (need_pull_dl_task(rq, prev)) {
/*
* This is OK, because current is on_cpu, which avoids it being
* picked for load-balance and preemption/IRQs are still
* disabled avoiding further scheduler activity on it and we're
* being very careful to re-start the picking loop.
*/
lockdep_unpin_lock(&rq->lock);
pull_dl_task(rq);
lockdep_pin_lock(&rq->lock);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a stop task can slip in, in which case we need to
......@@ -1128,7 +1195,7 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
if (hrtick_enabled(rq))
start_hrtick_dl(rq, p);
set_post_schedule(rq);
queue_push_tasks(rq);
return p;
}
......@@ -1165,7 +1232,6 @@ static void task_fork_dl(struct task_struct *p)
static void task_dead_dl(struct task_struct *p)
{
struct hrtimer *timer = &p->dl.dl_timer;
struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
/*
......@@ -1175,8 +1241,6 @@ static void task_dead_dl(struct task_struct *p)
/* XXX we should retain the bw until 0-lag */
dl_b->total_bw -= p->dl.dl_bw;
raw_spin_unlock_irq(&dl_b->lock);
hrtimer_cancel(timer);
}
static void set_curr_task_dl(struct rq *rq)
......@@ -1504,15 +1568,16 @@ static void push_dl_tasks(struct rq *rq)
;
}
static int pull_dl_task(struct rq *this_rq)
static void pull_dl_task(struct rq *this_rq)
{
int this_cpu = this_rq->cpu, ret = 0, cpu;
int this_cpu = this_rq->cpu, cpu;
struct task_struct *p;
bool resched = false;
struct rq *src_rq;
u64 dmin = LONG_MAX;
if (likely(!dl_overloaded(this_rq)))
return 0;
return;
/*
* Match the barrier from dl_set_overloaded; this guarantees that if we
......@@ -1567,7 +1632,7 @@ static int pull_dl_task(struct rq *this_rq)
src_rq->curr->dl.deadline))
goto skip;
ret = 1;
resched = true;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
......@@ -1580,12 +1645,8 @@ static int pull_dl_task(struct rq *this_rq)
double_unlock_balance(this_rq, src_rq);
}
return ret;
}
static void post_schedule_dl(struct rq *rq)
{
push_dl_tasks(rq);
if (resched)
resched_curr(this_rq);
}
/*
......@@ -1701,36 +1762,15 @@ void __init init_sched_dl_class(void)
#endif /* CONFIG_SMP */
/*
* Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
*/
static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
static void switched_from_dl(struct rq *rq, struct task_struct *p)
{
struct hrtimer *dl_timer = &p->dl.dl_timer;
/* Nobody will change task's class if pi_lock is held */
lockdep_assert_held(&p->pi_lock);
if (hrtimer_active(dl_timer)) {
int ret = hrtimer_try_to_cancel(dl_timer);
if (unlikely(ret == -1)) {
/*
* Note, p may migrate OR new deadline tasks
* may appear in rq when we are unlocking it.
* A caller of us must be fine with that.
* Start the deadline timer; if we switch back to dl before this we'll
* continue consuming our current CBS slice. If we stay outside of
* SCHED_DEADLINE until the deadline passes, the timer will reset the
* task.
*/
raw_spin_unlock(&rq->lock);
hrtimer_cancel(dl_timer);
raw_spin_lock(&rq->lock);
}
}
}
static void switched_from_dl(struct rq *rq, struct task_struct *p)
{
/* XXX we should retain the bw until 0-lag */
cancel_dl_timer(rq, p);
if (!start_dl_timer(p))
__dl_clear_params(p);
/*
......@@ -1741,8 +1781,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
return;
if (pull_dl_task(rq))
resched_curr(rq);
queue_pull_task(rq);
}
/*
......@@ -1751,21 +1790,16 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
*/
static void switched_to_dl(struct rq *rq, struct task_struct *p)
{
int check_resched = 1;
if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
push_dl_task(rq) && rq != task_rq(p))
/* Only reschedule if pushing failed */
check_resched = 0;
#endif /* CONFIG_SMP */
if (check_resched) {
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
queue_push_tasks(rq);
#else
if (dl_task(rq->curr))
check_preempt_curr_dl(rq, p, 0);
else
resched_curr(rq);
}
#endif
}
}
......@@ -1785,15 +1819,14 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
* or lowering its prio, so...
*/
if (!rq->dl.overloaded)
pull_dl_task(rq);
queue_pull_task(rq);
/*
* If we now have a earlier deadline task than p,
* then reschedule, provided p is still on this
* runqueue.
*/
if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
rq->curr == p)
if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
resched_curr(rq);
#else
/*
......@@ -1823,7 +1856,6 @@ const struct sched_class dl_sched_class = {
.set_cpus_allowed = set_cpus_allowed_dl,
.rq_online = rq_online_dl,
.rq_offline = rq_offline_dl,
.post_schedule = post_schedule_dl,
.task_woken = task_woken_dl,
#endif
......
......@@ -5392,7 +5392,15 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev)
return p;
idle:
/*
* This is OK, because current is on_cpu, which avoids it being picked
* for load-balance and preemption/IRQs are still disabled avoiding
* further scheduler activity on it and we're being very careful to
* re-start the picking loop.
*/
lockdep_unpin_lock(&rq->lock);
new_tasks = idle_balance(rq);
lockdep_pin_lock(&rq->lock);
/*
* Because idle_balance() releases (and re-acquires) rq->lock, it is
* possible for any higher priority task to appear. In that case we
......@@ -7426,9 +7434,6 @@ static int idle_balance(struct rq *this_rq)
goto out;
}
/*
* Drop the rq->lock, but keep IRQ/preempt disabled.
*/
raw_spin_unlock(&this_rq->lock);
update_blocked_averages(this_cpu);
......
......@@ -260,7 +260,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
#ifdef CONFIG_SMP
static int pull_rt_task(struct rq *this_rq);
static void pull_rt_task(struct rq *this_rq);
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{
......@@ -354,13 +354,23 @@ static inline int has_pushable_tasks(struct rq *rq)
return !plist_head_empty(&rq->rt.pushable_tasks);
}
static inline void set_post_schedule(struct rq *rq)
static DEFINE_PER_CPU(struct callback_head, rt_push_head);
static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
static void push_rt_tasks(struct rq *);
static void pull_rt_task(struct rq *);
static inline void queue_push_tasks(struct rq *rq)
{
/*
* We detect this state here so that we can avoid taking the RQ
* lock again later if there is no need to push
*/
rq->post_schedule = has_pushable_tasks(rq);
if (!has_pushable_tasks(rq))
return;
queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
}
static inline void queue_pull_task(struct rq *rq)
{
queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
}
static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
......@@ -412,12 +422,11 @@ static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
return false;
}
static inline int pull_rt_task(struct rq *this_rq)
static inline void pull_rt_task(struct rq *this_rq)
{
return 0;
}
static inline void set_post_schedule(struct rq *rq)
static inline void queue_push_tasks(struct rq *rq)
{
}
#endif /* CONFIG_SMP */
......@@ -1469,7 +1478,15 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
struct rt_rq *rt_rq = &rq->rt;
if (need_pull_rt_task(rq, prev)) {
/*
* This is OK, because current is on_cpu, which avoids it being
* picked for load-balance and preemption/IRQs are still
* disabled avoiding further scheduler activity on it and we're
* being very careful to re-start the picking loop.
*/
lockdep_unpin_lock(&rq->lock);
pull_rt_task(rq);
lockdep_pin_lock(&rq->lock);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a dl or stop task can slip in, in which case we need
......@@ -1497,7 +1514,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);
set_post_schedule(rq);
queue_push_tasks(rq);
return p;
}
......@@ -1952,14 +1969,15 @@ static void push_irq_work_func(struct irq_work *work)
}
#endif /* HAVE_RT_PUSH_IPI */
static int pull_rt_task(struct rq *this_rq)
static void pull_rt_task(struct rq *this_rq)
{
int this_cpu = this_rq->cpu, ret = 0, cpu;
int this_cpu = this_rq->cpu, cpu;
bool resched = false;
struct task_struct *p;
struct rq *src_rq;
if (likely(!rt_overloaded(this_rq)))
return 0;
return;
/*
* Match the barrier from rt_set_overloaded; this guarantees that if we
......@@ -1970,7 +1988,7 @@ static int pull_rt_task(struct rq *this_rq)
#ifdef HAVE_RT_PUSH_IPI
if (sched_feat(RT_PUSH_IPI)) {
tell_cpu_to_push(this_rq);
return 0;
return;
}
#endif
......@@ -2023,7 +2041,7 @@ static int pull_rt_task(struct rq *this_rq)
if (p->prio < src_rq->curr->prio)
goto skip;
ret = 1;
resched = true;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
......@@ -2039,12 +2057,8 @@ static int pull_rt_task(struct rq *this_rq)
double_unlock_balance(this_rq, src_rq);
}
return ret;
}
static void post_schedule_rt(struct rq *rq)
{
push_rt_tasks(rq);
if (resched)
resched_curr(this_rq);
}
/*
......@@ -2140,8 +2154,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
return;
if (pull_rt_task(rq))
resched_curr(rq);
queue_pull_task(rq);
}
void __init init_sched_rt_class(void)
......@@ -2162,8 +2175,6 @@ void __init init_sched_rt_class(void)
*/
static void switched_to_rt(struct rq *rq, struct task_struct *p)
{
int check_resched = 1;
/*
* If we are already running, then there's nothing
* that needs to be done. But if we are not running
......@@ -2173,13 +2184,12 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
*/
if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded &&
/* Don't resched if we changed runqueues */
push_rt_task(rq) && rq != task_rq(p))
check_resched = 0;
#endif /* CONFIG_SMP */
if (check_resched && p->prio < rq->curr->prio)
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
queue_push_tasks(rq);
#else
if (p->prio < rq->curr->prio)
resched_curr(rq);
#endif /* CONFIG_SMP */
}
}
......@@ -2200,14 +2210,13 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
* may need to pull tasks to this runqueue.
*/
if (oldprio < p->prio)
pull_rt_task(rq);
queue_pull_task(rq);
/*
* If there's a higher priority task waiting to run
* then reschedule. Note, the above pull_rt_task
* can release the rq lock and p could migrate.
* Only reschedule if p is still on the same runqueue.
* then reschedule.
*/
if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
if (p->prio > rq->rt.highest_prio.curr)
resched_curr(rq);
#else
/* For UP simply resched on drop of prio */
......@@ -2318,7 +2327,6 @@ const struct sched_class rt_sched_class = {
.set_cpus_allowed = set_cpus_allowed_rt,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
.post_schedule = post_schedule_rt,
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
#endif
......
......@@ -624,9 +624,10 @@ struct rq {
unsigned long cpu_capacity;
unsigned long cpu_capacity_orig;
struct callback_head *balance_callback;
unsigned char idle_balance;
/* For active balancing */
int post_schedule;
int active_balance;
int push_cpu;
struct cpu_stop_work active_balance_work;
......@@ -767,6 +768,21 @@ extern int migrate_swap(struct task_struct *, struct task_struct *);
#ifdef CONFIG_SMP
static inline void
queue_balance_callback(struct rq *rq,
struct callback_head *head,
void (*func)(struct rq *rq))
{
lockdep_assert_held(&rq->lock);
if (unlikely(head->next))
return;
head->func = (void (*)(struct callback_head *))func;
head->next = rq->balance_callback;
rq->balance_callback = head;
}
extern void sched_ttwu_pending(void);
#define rcu_dereference_check_sched_domain(p) \
......@@ -1192,7 +1208,6 @@ struct sched_class {
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
void (*post_schedule) (struct rq *this_rq);
void (*task_waking) (struct task_struct *task);
void (*task_woken) (struct rq *this_rq, struct task_struct *task);
......@@ -1423,8 +1438,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
for (;;) {
rq = task_rq(p);
raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
lockdep_pin_lock(&rq->lock);
return rq;
}
raw_spin_unlock(&rq->lock);
while (unlikely(task_on_rq_migrating(p)))
......@@ -1461,8 +1478,10 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
* If we observe the new cpu in task_rq_lock, the acquire will
* pair with the WMB to ensure we must then also see migrating.
*/
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
lockdep_pin_lock(&rq->lock);
return rq;
}
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
......@@ -1474,6 +1493,7 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
static inline void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
}
......@@ -1482,6 +1502,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
__releases(rq->lock)
__releases(p->pi_lock)
{
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment