Commit 245c8e81 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:

 - Fix EEVDF corner cases

 - Fix two nohz_full= related bugs that can cause boot crashes
   and warnings

* tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/isolation: Fix boot crash when maxcpus < first housekeeping CPU
  sched/isolation: Prevent boot crash when the boot CPU is nohz_full
  sched/eevdf: Prevent vlag from going out of bounds in reweight_eevdf()
  sched/eevdf: Fix miscalculation in reweight_entity() when se is not curr
  sched/eevdf: Always update V if se->on_rq when reweighting
parents aec147c1 257bf89d
......@@ -129,11 +129,8 @@ adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain
online to handle timekeeping tasks in order to ensure that system
calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
user processes to observe slight drifts in clock rate.) Therefore, the
boot CPU is prohibited from entering adaptive-ticks mode. Specifying a
"nohz_full=" mask that includes the boot CPU will result in a boot-time
error message, and the boot CPU will be removed from the mask. Note that
this means that your system must have at least two CPUs in order for
user processes to observe slight drifts in clock rate.) Note that this
means that your system must have at least two CPUs in order for
CONFIG_NO_HZ_FULL=y to do anything for you.
Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
......
......@@ -696,15 +696,21 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
*
* XXX could add max_slice to the augmented data to track this.
*/
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
static s64 entity_lag(u64 avruntime, struct sched_entity *se)
{
s64 lag, limit;
s64 vlag, limit;
vlag = avruntime - se->vruntime;
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
return clamp(vlag, -limit, limit);
}
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
SCHED_WARN_ON(!se->on_rq);
lag = avg_vruntime(cfs_rq) - se->vruntime;
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
se->vlag = clamp(lag, -limit, limit);
se->vlag = entity_lag(avg_vruntime(cfs_rq), se);
}
/*
......@@ -3676,11 +3682,10 @@ static inline void
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
#endif
static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
unsigned long weight)
{
unsigned long old_weight = se->load.weight;
u64 avruntime = avg_vruntime(cfs_rq);
s64 vlag, vslice;
/*
......@@ -3761,7 +3766,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
* = V - vl'
*/
if (avruntime != se->vruntime) {
vlag = (s64)(avruntime - se->vruntime);
vlag = entity_lag(avruntime, se);
vlag = div_s64(vlag * old_weight, weight);
se->vruntime = avruntime - vlag;
}
......@@ -3787,25 +3792,26 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
bool curr = cfs_rq->curr == se;
u64 avruntime;
if (se->on_rq) {
/* commit outstanding execution time */
if (curr)
update_curr(cfs_rq);
else
update_curr(cfs_rq);
avruntime = avg_vruntime(cfs_rq);
if (!curr)
__dequeue_entity(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);
if (!se->on_rq) {
if (se->on_rq) {
reweight_eevdf(se, avruntime, weight);
} else {
/*
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
* we need to scale se->vlag when w_i changes.
*/
se->vlag = div_s64(se->vlag * se->load.weight, weight);
} else {
reweight_eevdf(cfs_rq, se, weight);
}
update_load_set(&se->load, weight);
......
......@@ -46,7 +46,16 @@ int housekeeping_any_cpu(enum hk_type type)
if (cpu < nr_cpu_ids)
return cpu;
return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
if (likely(cpu < nr_cpu_ids))
return cpu;
/*
* Unless we have another problem this can only happen
* at boot time before start_secondary() brings the 1st
* housekeeping CPU up.
*/
WARN_ON_ONCE(system_state == SYSTEM_RUNNING ||
type != HK_TYPE_TIMER);
}
}
return smp_processor_id();
......@@ -109,6 +118,7 @@ static void __init housekeeping_setup_type(enum hk_type type,
static int __init housekeeping_setup(char *str, unsigned long flags)
{
cpumask_var_t non_housekeeping_mask, housekeeping_staging;
unsigned int first_cpu;
int err = 0;
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
......@@ -129,7 +139,8 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
cpumask_andnot(housekeeping_staging,
cpu_possible_mask, non_housekeeping_mask);
if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) {
first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging);
if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) {
__cpumask_set_cpu(smp_processor_id(), housekeeping_staging);
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
if (!housekeeping.flags) {
......@@ -138,6 +149,9 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
}
}
if (cpumask_empty(non_housekeeping_mask))
goto free_housekeeping_staging;
if (!housekeeping.flags) {
/* First setup call ("nohz_full=" or "isolcpus=") */
enum hk_type type;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment