Commit 557a6bfc authored by Peter Zijlstra's avatar Peter Zijlstra

sched/fair: Add trivial fair server

Use deadline servers to service fair tasks.

This patch adds a fair_server deadline entity which acts as a container
for fair entities and can be used to fix starvation when higher priority
(wrt fair) tasks are monopolizing CPU(s).
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarDaniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarJuri Lelli <juri.lelli@redhat.com>
Link: https://lore.kernel.org/r/b6b0bcefaf25391bcf5b6ecdb9f1218de402d42e.1716811044.git.bristot@kernel.org
parent a741b824
...@@ -8336,6 +8336,7 @@ void __init sched_init(void) ...@@ -8336,6 +8336,7 @@ void __init sched_init(void)
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
hrtick_rq_init(rq); hrtick_rq_init(rq);
atomic_set(&rq->nr_iowait, 0); atomic_set(&rq->nr_iowait, 0);
fair_server_init(rq);
#ifdef CONFIG_SCHED_CORE #ifdef CONFIG_SCHED_CORE
rq->core = rq; rq->core = rq;
......
...@@ -1381,6 +1381,13 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 ...@@ -1381,6 +1381,13 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
resched_curr(rq); resched_curr(rq);
} }
/*
* The fair server (sole dl_server) does not account for real-time
* workload because it is running fair work.
*/
if (dl_se == &rq->fair_server)
return;
/* /*
* Because -- for now -- we share the rt bandwidth, we need to * Because -- for now -- we share the rt bandwidth, we need to
* account our runtime there too, otherwise actual rt tasks * account our runtime there too, otherwise actual rt tasks
...@@ -1414,15 +1421,31 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) ...@@ -1414,15 +1421,31 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
void dl_server_start(struct sched_dl_entity *dl_se) void dl_server_start(struct sched_dl_entity *dl_se)
{ {
struct rq *rq = dl_se->rq;
if (!dl_server(dl_se)) { if (!dl_server(dl_se)) {
/* Disabled */
dl_se->dl_runtime = 0;
dl_se->dl_deadline = 1000 * NSEC_PER_MSEC;
dl_se->dl_period = 1000 * NSEC_PER_MSEC;
dl_se->dl_server = 1; dl_se->dl_server = 1;
setup_new_dl_entity(dl_se); setup_new_dl_entity(dl_se);
} }
if (!dl_se->dl_runtime)
return;
enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
resched_curr(dl_se->rq);
} }
void dl_server_stop(struct sched_dl_entity *dl_se) void dl_server_stop(struct sched_dl_entity *dl_se)
{ {
if (!dl_se->dl_runtime)
return;
dequeue_dl_entity(dl_se, DEQUEUE_SLEEP); dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
} }
......
...@@ -5765,6 +5765,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -5765,6 +5765,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se; struct sched_entity *se;
long task_delta, idle_task_delta, dequeue = 1; long task_delta, idle_task_delta, dequeue = 1;
long rq_h_nr_running = rq->cfs.h_nr_running;
raw_spin_lock(&cfs_b->lock); raw_spin_lock(&cfs_b->lock);
/* This will start the period timer if necessary */ /* This will start the period timer if necessary */
...@@ -5837,6 +5838,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -5837,6 +5838,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
sub_nr_running(rq, task_delta); sub_nr_running(rq, task_delta);
done: done:
/* Stop the fair server if throttling resulted in no runnable tasks */
if (rq_h_nr_running && !rq->cfs.h_nr_running)
dl_server_stop(&rq->fair_server);
/* /*
* Note: distribution will already see us throttled via the * Note: distribution will already see us throttled via the
* throttled-list. rq->lock protects completion. * throttled-list. rq->lock protects completion.
...@@ -5854,6 +5858,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -5854,6 +5858,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se; struct sched_entity *se;
long task_delta, idle_task_delta; long task_delta, idle_task_delta;
long rq_h_nr_running = rq->cfs.h_nr_running;
se = cfs_rq->tg->se[cpu_of(rq)]; se = cfs_rq->tg->se[cpu_of(rq)];
...@@ -5929,6 +5934,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -5929,6 +5934,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
unthrottle_throttle: unthrottle_throttle:
assert_list_leaf_cfs_rq(rq); assert_list_leaf_cfs_rq(rq);
/* Start the fair server if un-throttling resulted in new runnable tasks */
if (!rq_h_nr_running && rq->cfs.h_nr_running)
dl_server_start(&rq->fair_server);
/* Determine whether we need to wake up potentially idle CPU: */ /* Determine whether we need to wake up potentially idle CPU: */
if (rq->curr == rq->idle && rq->cfs.nr_running) if (rq->curr == rq->idle && rq->cfs.nr_running)
resched_curr(rq); resched_curr(rq);
...@@ -6759,6 +6768,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -6759,6 +6768,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
*/ */
util_est_enqueue(&rq->cfs, p); util_est_enqueue(&rq->cfs, p);
if (!throttled_hierarchy(task_cfs_rq(p)) && !rq->cfs.h_nr_running)
dl_server_start(&rq->fair_server);
/* /*
* If in_iowait is set, the code below may not trigger any cpufreq * If in_iowait is set, the code below may not trigger any cpufreq
* utilization updates, so do it here explicitly with the IOWAIT flag * utilization updates, so do it here explicitly with the IOWAIT flag
...@@ -6903,6 +6915,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -6903,6 +6915,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
rq->next_balance = jiffies; rq->next_balance = jiffies;
dequeue_throttle: dequeue_throttle:
if (!throttled_hierarchy(task_cfs_rq(p)) && !rq->cfs.h_nr_running)
dl_server_stop(&rq->fair_server);
util_est_update(&rq->cfs, p, task_sleep); util_est_update(&rq->cfs, p, task_sleep);
hrtick_update(rq); hrtick_update(rq);
} }
...@@ -8602,6 +8617,25 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq) ...@@ -8602,6 +8617,25 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq)
return pick_next_task_fair(rq, NULL, NULL); return pick_next_task_fair(rq, NULL, NULL);
} }
static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
{
return !!dl_se->rq->cfs.nr_running;
}
static struct task_struct *fair_server_pick(struct sched_dl_entity *dl_se)
{
return pick_next_task_fair(dl_se->rq, NULL, NULL);
}
void fair_server_init(struct rq *rq)
{
struct sched_dl_entity *dl_se = &rq->fair_server;
init_dl_entity(dl_se);
dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
}
/* /*
* Account for a descheduled task: * Account for a descheduled task:
*/ */
......
...@@ -363,6 +363,8 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, ...@@ -363,6 +363,8 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_has_tasks_f has_tasks, dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick); dl_server_pick_f pick);
extern void fair_server_init(struct rq *rq);
#ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_CGROUP_SCHED
extern struct list_head task_groups; extern struct list_head task_groups;
...@@ -1039,6 +1041,8 @@ struct rq { ...@@ -1039,6 +1041,8 @@ struct rq {
struct rt_rq rt; struct rt_rq rt;
struct dl_rq dl; struct dl_rq dl;
struct sched_dl_entity fair_server;
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this CPU: */ /* list of leaf cfs_rq on this CPU: */
struct list_head leaf_cfs_rq_list; struct list_head leaf_cfs_rq_list;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment