Commit abc158c8 authored by Peter Zijlstra's avatar Peter Zijlstra

sched: Prepare generic code for delayed dequeue

While most of the delayed dequeue code can be done inside the
sched_class itself, there is one location where we do not have an
appropriate hook, namely ttwu_runnable().

Add an ENQUEUE_DELAYED call to the on_rq path to deal with waking
delayed dequeue tasks.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarValentin Schneider <vschneid@redhat.com>
Tested-by: default avatarValentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105029.200000445@infradead.org
parent e8901061
...@@ -544,6 +544,7 @@ struct sched_entity { ...@@ -544,6 +544,7 @@ struct sched_entity {
struct list_head group_node; struct list_head group_node;
unsigned int on_rq; unsigned int on_rq;
unsigned int sched_delayed;
u64 exec_start; u64 exec_start;
u64 sum_exec_runtime; u64 sum_exec_runtime;
......
...@@ -2036,6 +2036,8 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags) ...@@ -2036,6 +2036,8 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
void deactivate_task(struct rq *rq, struct task_struct *p, int flags) void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{ {
SCHED_WARN_ON(flags & DEQUEUE_SLEEP);
WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
ASSERT_EXCLUSIVE_WRITER(p->on_rq); ASSERT_EXCLUSIVE_WRITER(p->on_rq);
...@@ -3689,12 +3691,14 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags) ...@@ -3689,12 +3691,14 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
rq = __task_rq_lock(p, &rf); rq = __task_rq_lock(p, &rf);
if (task_on_rq_queued(p)) { if (task_on_rq_queued(p)) {
update_rq_clock(rq);
if (p->se.sched_delayed)
enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED);
if (!task_on_cpu(rq, p)) { if (!task_on_cpu(rq, p)) {
/* /*
* When on_rq && !on_cpu the task is preempted, see if * When on_rq && !on_cpu the task is preempted, see if
* it should preempt the task that is current now. * it should preempt the task that is current now.
*/ */
update_rq_clock(rq);
wakeup_preempt(rq, p, wake_flags); wakeup_preempt(rq, p, wake_flags);
} }
ttwu_do_wakeup(p); ttwu_do_wakeup(p);
...@@ -4074,11 +4078,16 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -4074,11 +4078,16 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* case the whole 'p->on_rq && ttwu_runnable()' case below * case the whole 'p->on_rq && ttwu_runnable()' case below
* without taking any locks. * without taking any locks.
* *
* Specifically, given current runs ttwu() we must be before
* schedule()'s block_task(), as such this must not observe
* sched_delayed.
*
* In particular: * In particular:
* - we rely on Program-Order guarantees for all the ordering, * - we rely on Program-Order guarantees for all the ordering,
* - we're serialized against set_special_state() by virtue of * - we're serialized against set_special_state() by virtue of
* it disabling IRQs (this allows not taking ->pi_lock). * it disabling IRQs (this allows not taking ->pi_lock).
*/ */
SCHED_WARN_ON(p->se.sched_delayed);
if (!ttwu_state_match(p, state, &success)) if (!ttwu_state_match(p, state, &success))
goto out; goto out;
...@@ -4370,6 +4379,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -4370,6 +4379,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.slice = sysctl_sched_base_slice; p->se.slice = sysctl_sched_base_slice;
INIT_LIST_HEAD(&p->se.group_node); INIT_LIST_HEAD(&p->se.group_node);
/* A delayed task cannot be in clone(). */
SCHED_WARN_ON(p->se.sched_delayed);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL; p->se.cfs_rq = NULL;
#endif #endif
......
...@@ -2253,6 +2253,7 @@ extern const u32 sched_prio_to_wmult[40]; ...@@ -2253,6 +2253,7 @@ extern const u32 sched_prio_to_wmult[40];
#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */ #define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */ #define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
#define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */ #define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */
#define DEQUEUE_DELAYED 0x200 /* Matches ENQUEUE_DELAYED */
#define ENQUEUE_WAKEUP 0x01 #define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_RESTORE 0x02 #define ENQUEUE_RESTORE 0x02
...@@ -2268,6 +2269,7 @@ extern const u32 sched_prio_to_wmult[40]; ...@@ -2268,6 +2269,7 @@ extern const u32 sched_prio_to_wmult[40];
#endif #endif
#define ENQUEUE_INITIAL 0x80 #define ENQUEUE_INITIAL 0x80
#define ENQUEUE_MIGRATING 0x100 #define ENQUEUE_MIGRATING 0x100
#define ENQUEUE_DELAYED 0x200
#define RETRY_TASK ((void *)-1UL) #define RETRY_TASK ((void *)-1UL)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment