Commit 98ec21a0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-hrtimers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Thomas Gleixner:
 "This series of scheduler updates depends on sched/core and timers/core
  branches, which are already in your tree:

   - Scheduler balancing overhaul to plug a hard to trigger race which
     causes an oops in the balancer (Peter Zijlstra)

   - Lockdep updates which are related to the balancing updates (Peter
     Zijlstra)"

* 'sched-hrtimers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched,lockdep: Employ lock pinning
  lockdep: Implement lock pinning
  lockdep: Simplify lock_release()
  sched: Streamline the task migration locking a little
  sched: Move code around
  sched,dl: Fix sched class hopping CBS hole
  sched, dl: Convert switched_{from, to}_dl() / prio_changed_dl() to balance callbacks
  sched,dl: Remove return value from pull_dl_task()
  sched, rt: Convert switched_{from, to}_rt() / prio_changed_rt() to balance callbacks
  sched,rt: Remove return value from pull_rt_task()
  sched: Allow balance callbacks for check_class_changed()
  sched: Use replace normalize_task() with __sched_setscheduler()
  sched: Replace post_schedule with a balance callback list
parents a2629483 cbce1a68
......@@ -255,6 +255,7 @@ struct held_lock {
unsigned int check:1; /* see lock_acquire() comment */
unsigned int hardirqs_off:1;
unsigned int references:12; /* 32 bits */
unsigned int pin_count;
};
/*
......@@ -354,6 +355,9 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
extern void lockdep_clear_current_reclaim_state(void);
extern void lockdep_trace_alloc(gfp_t mask);
extern void lock_pin_lock(struct lockdep_map *lock);
extern void lock_unpin_lock(struct lockdep_map *lock);
# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
......@@ -368,6 +372,9 @@ extern void lockdep_trace_alloc(gfp_t mask);
#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map)
#define lockdep_unpin_lock(l) lock_unpin_lock(&(l)->dep_map)
#else /* !CONFIG_LOCKDEP */
static inline void lockdep_off(void)
......@@ -420,6 +427,9 @@ struct lock_class_key { };
#define lockdep_recursing(tsk) (0)
#define lockdep_pin_lock(l) do { (void)(l); } while (0)
#define lockdep_unpin_lock(l) do { (void)(l); } while (0)
#endif /* !LOCKDEP */
#ifdef CONFIG_LOCK_STAT
......
......@@ -3157,6 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->waittime_stamp = 0;
hlock->holdtime_stamp = lockstat_clock();
#endif
hlock->pin_count = 0;
if (check && !mark_irqflags(curr, hlock))
return 0;
......@@ -3260,26 +3261,6 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
return 0;
}
/*
* Common debugging checks for both nested and non-nested unlock:
*/
static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
unsigned long ip)
{
if (unlikely(!debug_locks))
return 0;
/*
* Lockdep should run with IRQs disabled, recursion, head-ache, etc..
*/
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return 0;
if (curr->lockdep_depth <= 0)
return print_unlock_imbalance_bug(curr, lock, ip);
return 1;
}
static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
{
if (hlock->instance == lock)
......@@ -3376,31 +3357,35 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
}
/*
* Remove the lock to the list of currently held locks in a
* potentially non-nested (out of order) manner. This is a
* relatively rare operation, as all the unlock APIs default
* to nested mode (which uses lock_release()):
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
* mutex_lock_interruptible()).
*
* @nested is an hysterical artifact, needs a tree wide cleanup.
*/
static int
lock_release_non_nested(struct task_struct *curr,
struct lockdep_map *lock, unsigned long ip)
__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock, *prev_hlock;
unsigned int depth;
int i;
/*
* Check whether the lock exists in the current stack
* of held locks:
*/
if (unlikely(!debug_locks))
return 0;
depth = curr->lockdep_depth;
/*
* So we're all set to release this lock.. wait what lock? We don't
* own any locks, you've been drinking again?
*/
if (DEBUG_LOCKS_WARN_ON(!depth))
return 0;
if (DEBUG_LOCKS_WARN_ON(depth <= 0))
return print_unlock_imbalance_bug(curr, lock, ip);
/*
* Check whether the lock exists in the current stack
* of held locks:
*/
prev_hlock = NULL;
for (i = depth-1; i >= 0; i--) {
hlock = curr->held_locks + i;
......@@ -3419,6 +3404,8 @@ lock_release_non_nested(struct task_struct *curr,
if (hlock->instance == lock)
lock_release_holdtime(hlock);
WARN(hlock->pin_count, "releasing a pinned lock\n");
if (hlock->references) {
hlock->references--;
if (hlock->references) {
......@@ -3456,91 +3443,66 @@ lock_release_non_nested(struct task_struct *curr,
*/
if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
return 0;
return 1;
}
/*
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
* mutex_lock_interruptible()). This is done for unlocks that nest
* perfectly. (i.e. the current top of the lock-stack is unlocked)
*/
static int lock_release_nested(struct task_struct *curr,
struct lockdep_map *lock, unsigned long ip)
static int __lock_is_held(struct lockdep_map *lock)
{
struct held_lock *hlock;
unsigned int depth;
struct task_struct *curr = current;
int i;
/*
* Pop off the top of the lock stack:
*/
depth = curr->lockdep_depth - 1;
hlock = curr->held_locks + depth;
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
/*
* Is the unlock non-nested:
*/
if (hlock->instance != lock || hlock->references)
return lock_release_non_nested(curr, lock, ip);
curr->lockdep_depth--;
if (match_held_lock(hlock, lock))
return 1;
}
/*
* No more locks, but somehow we've got hash left over, who left it?
*/
if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
return 0;
curr->curr_chain_key = hlock->prev_chain_key;
lock_release_holdtime(hlock);
#ifdef CONFIG_DEBUG_LOCKDEP
hlock->prev_chain_key = 0;
hlock->class_idx = 0;
hlock->acquire_ip = 0;
hlock->irq_context = 0;
#endif
return 1;
}
/*
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
* mutex_lock_interruptible()). This is done for unlocks that nest
* perfectly. (i.e. the current top of the lock-stack is unlocked)
*/
static void
__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
static void __lock_pin_lock(struct lockdep_map *lock)
{
struct task_struct *curr = current;
int i;
if (!check_unlock(curr, lock, ip))
if (unlikely(!debug_locks))
return;
if (nested) {
if (!lock_release_nested(curr, lock, ip))
return;
} else {
if (!lock_release_non_nested(curr, lock, ip))
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock)) {
hlock->pin_count++;
return;
}
}
check_chain_key(curr);
WARN(1, "pinning an unheld lock\n");
}
static int __lock_is_held(struct lockdep_map *lock)
static void __lock_unpin_lock(struct lockdep_map *lock)
{
struct task_struct *curr = current;
int i;
if (unlikely(!debug_locks))
return;
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock))
return 1;
if (match_held_lock(hlock, lock)) {
if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n"))
return;
hlock->pin_count--;
return;
}
}
return 0;
WARN(1, "unpinning an unheld lock\n");
}
/*
......@@ -3639,7 +3601,8 @@ void lock_release(struct lockdep_map *lock, int nested,
check_flags(flags);
current->lockdep_recursion = 1;
trace_lock_release(lock, ip);
__lock_release(lock, nested, ip);
if (__lock_release(lock, nested, ip))
check_chain_key(current);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
......@@ -3665,6 +3628,40 @@ int lock_is_held(struct lockdep_map *lock)
}
EXPORT_SYMBOL_GPL(lock_is_held);
void lock_pin_lock(struct lockdep_map *lock)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
__lock_pin_lock(lock);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(lock_pin_lock);
void lock_unpin_lock(struct lockdep_map *lock)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
__lock_unpin_lock(lock);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(lock_unpin_lock);
void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
{
current->lockdep_reclaim_gfp = gfp_mask;
......
This diff is collapsed.
This diff is collapsed.
......@@ -5392,7 +5392,15 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev)
return p;
idle:
/*
* This is OK, because current is on_cpu, which avoids it being picked
* for load-balance and preemption/IRQs are still disabled avoiding
* further scheduler activity on it and we're being very careful to
* re-start the picking loop.
*/
lockdep_unpin_lock(&rq->lock);
new_tasks = idle_balance(rq);
lockdep_pin_lock(&rq->lock);
/*
* Because idle_balance() releases (and re-acquires) rq->lock, it is
* possible for any higher priority task to appear. In that case we
......@@ -7426,9 +7434,6 @@ static int idle_balance(struct rq *this_rq)
goto out;
}
/*
* Drop the rq->lock, but keep IRQ/preempt disabled.
*/
raw_spin_unlock(&this_rq->lock);
update_blocked_averages(this_cpu);
......
......@@ -260,7 +260,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
#ifdef CONFIG_SMP
static int pull_rt_task(struct rq *this_rq);
static void pull_rt_task(struct rq *this_rq);
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{
......@@ -354,13 +354,23 @@ static inline int has_pushable_tasks(struct rq *rq)
return !plist_head_empty(&rq->rt.pushable_tasks);
}
static inline void set_post_schedule(struct rq *rq)
static DEFINE_PER_CPU(struct callback_head, rt_push_head);
static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
static void push_rt_tasks(struct rq *);
static void pull_rt_task(struct rq *);
static inline void queue_push_tasks(struct rq *rq)
{
/*
* We detect this state here so that we can avoid taking the RQ
* lock again later if there is no need to push
*/
rq->post_schedule = has_pushable_tasks(rq);
if (!has_pushable_tasks(rq))
return;
queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
}
static inline void queue_pull_task(struct rq *rq)
{
queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
}
static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
......@@ -412,12 +422,11 @@ static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
return false;
}
static inline int pull_rt_task(struct rq *this_rq)
static inline void pull_rt_task(struct rq *this_rq)
{
return 0;
}
static inline void set_post_schedule(struct rq *rq)
static inline void queue_push_tasks(struct rq *rq)
{
}
#endif /* CONFIG_SMP */
......@@ -1469,7 +1478,15 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
struct rt_rq *rt_rq = &rq->rt;
if (need_pull_rt_task(rq, prev)) {
/*
* This is OK, because current is on_cpu, which avoids it being
* picked for load-balance and preemption/IRQs are still
* disabled avoiding further scheduler activity on it and we're
* being very careful to re-start the picking loop.
*/
lockdep_unpin_lock(&rq->lock);
pull_rt_task(rq);
lockdep_pin_lock(&rq->lock);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a dl or stop task can slip in, in which case we need
......@@ -1497,7 +1514,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);
set_post_schedule(rq);
queue_push_tasks(rq);
return p;
}
......@@ -1952,14 +1969,15 @@ static void push_irq_work_func(struct irq_work *work)
}
#endif /* HAVE_RT_PUSH_IPI */
static int pull_rt_task(struct rq *this_rq)
static void pull_rt_task(struct rq *this_rq)
{
int this_cpu = this_rq->cpu, ret = 0, cpu;
int this_cpu = this_rq->cpu, cpu;
bool resched = false;
struct task_struct *p;
struct rq *src_rq;
if (likely(!rt_overloaded(this_rq)))
return 0;
return;
/*
* Match the barrier from rt_set_overloaded; this guarantees that if we
......@@ -1970,7 +1988,7 @@ static int pull_rt_task(struct rq *this_rq)
#ifdef HAVE_RT_PUSH_IPI
if (sched_feat(RT_PUSH_IPI)) {
tell_cpu_to_push(this_rq);
return 0;
return;
}
#endif
......@@ -2023,7 +2041,7 @@ static int pull_rt_task(struct rq *this_rq)
if (p->prio < src_rq->curr->prio)
goto skip;
ret = 1;
resched = true;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
......@@ -2039,12 +2057,8 @@ static int pull_rt_task(struct rq *this_rq)
double_unlock_balance(this_rq, src_rq);
}
return ret;
}
static void post_schedule_rt(struct rq *rq)
{
push_rt_tasks(rq);
if (resched)
resched_curr(this_rq);
}
/*
......@@ -2140,8 +2154,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
return;
if (pull_rt_task(rq))
resched_curr(rq);
queue_pull_task(rq);
}
void __init init_sched_rt_class(void)
......@@ -2162,8 +2175,6 @@ void __init init_sched_rt_class(void)
*/
static void switched_to_rt(struct rq *rq, struct task_struct *p)
{
int check_resched = 1;
/*
* If we are already running, then there's nothing
* that needs to be done. But if we are not running
......@@ -2173,13 +2184,12 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
*/
if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded &&
/* Don't resched if we changed runqueues */
push_rt_task(rq) && rq != task_rq(p))
check_resched = 0;
#endif /* CONFIG_SMP */
if (check_resched && p->prio < rq->curr->prio)
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
queue_push_tasks(rq);
#else
if (p->prio < rq->curr->prio)
resched_curr(rq);
#endif /* CONFIG_SMP */
}
}
......@@ -2200,14 +2210,13 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
* may need to pull tasks to this runqueue.
*/
if (oldprio < p->prio)
pull_rt_task(rq);
queue_pull_task(rq);
/*
* If there's a higher priority task waiting to run
* then reschedule. Note, the above pull_rt_task
* can release the rq lock and p could migrate.
* Only reschedule if p is still on the same runqueue.
* then reschedule.
*/
if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
if (p->prio > rq->rt.highest_prio.curr)
resched_curr(rq);
#else
/* For UP simply resched on drop of prio */
......@@ -2318,7 +2327,6 @@ const struct sched_class rt_sched_class = {
.set_cpus_allowed = set_cpus_allowed_rt,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
.post_schedule = post_schedule_rt,
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
#endif
......
......@@ -624,9 +624,10 @@ struct rq {
unsigned long cpu_capacity;
unsigned long cpu_capacity_orig;
struct callback_head *balance_callback;
unsigned char idle_balance;
/* For active balancing */
int post_schedule;
int active_balance;
int push_cpu;
struct cpu_stop_work active_balance_work;
......@@ -767,6 +768,21 @@ extern int migrate_swap(struct task_struct *, struct task_struct *);
#ifdef CONFIG_SMP
static inline void
queue_balance_callback(struct rq *rq,
struct callback_head *head,
void (*func)(struct rq *rq))
{
lockdep_assert_held(&rq->lock);
if (unlikely(head->next))
return;
head->func = (void (*)(struct callback_head *))func;
head->next = rq->balance_callback;
rq->balance_callback = head;
}
extern void sched_ttwu_pending(void);
#define rcu_dereference_check_sched_domain(p) \
......@@ -1192,7 +1208,6 @@ struct sched_class {
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
void (*post_schedule) (struct rq *this_rq);
void (*task_waking) (struct task_struct *task);
void (*task_woken) (struct rq *this_rq, struct task_struct *task);
......@@ -1423,8 +1438,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
for (;;) {
rq = task_rq(p);
raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
lockdep_pin_lock(&rq->lock);
return rq;
}
raw_spin_unlock(&rq->lock);
while (unlikely(task_on_rq_migrating(p)))
......@@ -1461,8 +1478,10 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
* If we observe the new cpu in task_rq_lock, the acquire will
* pair with the WMB to ensure we must then also see migrating.
*/
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
lockdep_pin_lock(&rq->lock);
return rq;
}
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
......@@ -1474,6 +1493,7 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
static inline void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
}
......@@ -1482,6 +1502,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
__releases(rq->lock)
__releases(p->pi_lock)
{
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment