Commit 4eb86765 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'smp/hotplug' into sched/core, to resolve conflicts

Conflicts:
	kernel/sched/core.c
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents eb60b3e5 e5ef27d0
...@@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) ...@@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
smp_ops->give_timebase(); smp_ops->give_timebase();
/* Wait until cpu puts itself in the online & active maps */ /* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu) || !cpu_active(cpu)) while (!cpu_online(cpu))
cpu_relax(); cpu_relax();
return 0; return 0;
......
...@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) ...@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
pcpu_attach_task(pcpu, tidle); pcpu_attach_task(pcpu, tidle);
pcpu_start_fn(pcpu, smp_start_secondary, NULL); pcpu_start_fn(pcpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */ /* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu) || !cpu_active(cpu)) while (!cpu_online(cpu))
cpu_relax(); cpu_relax();
return 0; return 0;
} }
......
...@@ -59,25 +59,7 @@ struct notifier_block; ...@@ -59,25 +59,7 @@ struct notifier_block;
* CPU notifier priorities. * CPU notifier priorities.
*/ */
enum { enum {
/*
* SCHED_ACTIVE marks a cpu which is coming up active during
* CPU_ONLINE and CPU_DOWN_FAILED and must be the first
* notifier. CPUSET_ACTIVE adjusts cpuset according to
* cpu_active mask right after SCHED_ACTIVE. During
* CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
* ordered in the similar way.
*
* This ordering guarantees consistent cpu_active mask and
* migration behavior to all cpu notifiers.
*/
CPU_PRI_SCHED_ACTIVE = INT_MAX,
CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
CPU_PRI_CPUSET_INACTIVE = INT_MIN,
/* migration should happen before other stuff but after perf */
CPU_PRI_PERF = 20, CPU_PRI_PERF = 20,
CPU_PRI_MIGRATION = 10,
/* bring up workqueues before normal notifiers and down after */ /* bring up workqueues before normal notifiers and down after */
CPU_PRI_WORKQUEUE_UP = 5, CPU_PRI_WORKQUEUE_UP = 5,
......
...@@ -8,6 +8,7 @@ enum cpuhp_state { ...@@ -8,6 +8,7 @@ enum cpuhp_state {
CPUHP_BRINGUP_CPU, CPUHP_BRINGUP_CPU,
CPUHP_AP_IDLE_DEAD, CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE, CPUHP_AP_OFFLINE,
CPUHP_AP_SCHED_STARTING,
CPUHP_AP_NOTIFY_STARTING, CPUHP_AP_NOTIFY_STARTING,
CPUHP_AP_ONLINE, CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU, CPUHP_TEARDOWN_CPU,
...@@ -16,6 +17,7 @@ enum cpuhp_state { ...@@ -16,6 +17,7 @@ enum cpuhp_state {
CPUHP_AP_NOTIFY_ONLINE, CPUHP_AP_NOTIFY_ONLINE,
CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
CPUHP_AP_ACTIVE,
CPUHP_ONLINE, CPUHP_ONLINE,
}; };
......
...@@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present) ...@@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present)
static inline void static inline void
set_cpu_online(unsigned int cpu, bool online) set_cpu_online(unsigned int cpu, bool online)
{ {
if (online) { if (online)
cpumask_set_cpu(cpu, &__cpu_online_mask); cpumask_set_cpu(cpu, &__cpu_online_mask);
cpumask_set_cpu(cpu, &__cpu_active_mask); else
} else {
cpumask_clear_cpu(cpu, &__cpu_online_mask); cpumask_clear_cpu(cpu, &__cpu_online_mask);
}
} }
static inline void static inline void
......
...@@ -374,6 +374,15 @@ extern void cpu_init (void); ...@@ -374,6 +374,15 @@ extern void cpu_init (void);
extern void trap_init(void); extern void trap_init(void);
extern void update_process_times(int user); extern void update_process_times(int user);
extern void scheduler_tick(void); extern void scheduler_tick(void);
extern int sched_cpu_starting(unsigned int cpu);
extern int sched_cpu_activate(unsigned int cpu);
extern int sched_cpu_deactivate(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
extern int sched_cpu_dying(unsigned int cpu);
#else
# define sched_cpu_dying NULL
#endif
extern void sched_show_task(struct task_struct *p); extern void sched_show_task(struct task_struct *p);
......
...@@ -703,21 +703,6 @@ static int takedown_cpu(unsigned int cpu) ...@@ -703,21 +703,6 @@ static int takedown_cpu(unsigned int cpu)
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int err; int err;
/*
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
* and RCU users of this state to go away such that all new such users
* will observe it.
*
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
* not imply sync_sched(), so wait for both.
*
* Do sync before park smpboot threads to take care the rcu boost case.
*/
if (IS_ENABLED(CONFIG_PREEMPT))
synchronize_rcu_mult(call_rcu, call_rcu_sched);
else
synchronize_rcu();
/* Park the smpboot threads */ /* Park the smpboot threads */
kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
smpboot_park_threads(cpu); smpboot_park_threads(cpu);
...@@ -923,8 +908,6 @@ void cpuhp_online_idle(enum cpuhp_state state) ...@@ -923,8 +908,6 @@ void cpuhp_online_idle(enum cpuhp_state state)
st->state = CPUHP_AP_ONLINE_IDLE; st->state = CPUHP_AP_ONLINE_IDLE;
/* The cpu is marked online, set it active now */
set_cpu_active(cpu, true);
/* Unpark the stopper thread and the hotplug thread of this cpu */ /* Unpark the stopper thread and the hotplug thread of this cpu */
stop_machine_unpark(cpu); stop_machine_unpark(cpu);
kthread_unpark(st->thread); kthread_unpark(st->thread);
...@@ -1236,6 +1219,12 @@ static struct cpuhp_step cpuhp_ap_states[] = { ...@@ -1236,6 +1219,12 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.name = "ap:offline", .name = "ap:offline",
.cant_stop = true, .cant_stop = true,
}, },
/* First state is scheduler control. Interrupts are disabled */
[CPUHP_AP_SCHED_STARTING] = {
.name = "sched:starting",
.startup = sched_cpu_starting,
.teardown = sched_cpu_dying,
},
/* /*
* Low level startup/teardown notifiers. Run with interrupts * Low level startup/teardown notifiers. Run with interrupts
* disabled. Will be removed once the notifiers are converted to * disabled. Will be removed once the notifiers are converted to
...@@ -1274,6 +1263,15 @@ static struct cpuhp_step cpuhp_ap_states[] = { ...@@ -1274,6 +1263,15 @@ static struct cpuhp_step cpuhp_ap_states[] = {
* The dynamically registered state space is here * The dynamically registered state space is here
*/ */
#ifdef CONFIG_SMP
/* Last state is scheduler control setting the cpu active */
[CPUHP_AP_ACTIVE] = {
.name = "sched:active",
.startup = sched_cpu_activate,
.teardown = sched_cpu_deactivate,
},
#endif
/* CPU is fully up and running. */ /* CPU is fully up and running. */
[CPUHP_ONLINE] = { [CPUHP_ONLINE] = {
.name = "online", .name = "online",
......
...@@ -314,29 +314,6 @@ void hrtick_start(struct rq *rq, u64 delay) ...@@ -314,29 +314,6 @@ void hrtick_start(struct rq *rq, u64 delay)
} }
} }
static int
hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int cpu = (int)(long)hcpu;
switch (action) {
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
hrtick_clear(cpu_rq(cpu));
return NOTIFY_OK;
}
return NOTIFY_DONE;
}
static __init void init_hrtick(void)
{
hotcpu_notifier(hotplug_hrtick, 0);
}
#else #else
/* /*
* Called to set the hrtick timer state. * Called to set the hrtick timer state.
...@@ -353,10 +330,6 @@ void hrtick_start(struct rq *rq, u64 delay) ...@@ -353,10 +330,6 @@ void hrtick_start(struct rq *rq, u64 delay)
hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
HRTIMER_MODE_REL_PINNED); HRTIMER_MODE_REL_PINNED);
} }
static inline void init_hrtick(void)
{
}
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
static void init_rq_hrtick(struct rq *rq) static void init_rq_hrtick(struct rq *rq)
...@@ -380,10 +353,6 @@ static inline void hrtick_clear(struct rq *rq) ...@@ -380,10 +353,6 @@ static inline void hrtick_clear(struct rq *rq)
static inline void init_rq_hrtick(struct rq *rq) static inline void init_rq_hrtick(struct rq *rq)
{ {
} }
static inline void init_hrtick(void)
{
}
#endif /* CONFIG_SCHED_HRTICK */ #endif /* CONFIG_SCHED_HRTICK */
/* /*
...@@ -1150,6 +1119,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) ...@@ -1150,6 +1119,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
static int __set_cpus_allowed_ptr(struct task_struct *p, static int __set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask, bool check) const struct cpumask *new_mask, bool check)
{ {
const struct cpumask *cpu_valid_mask = cpu_active_mask;
unsigned int dest_cpu; unsigned int dest_cpu;
struct rq_flags rf; struct rq_flags rf;
struct rq *rq; struct rq *rq;
...@@ -1157,6 +1127,13 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, ...@@ -1157,6 +1127,13 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
rq = task_rq_lock(p, &rf); rq = task_rq_lock(p, &rf);
if (p->flags & PF_KTHREAD) {
/*
* Kernel threads are allowed on online && !active CPUs
*/
cpu_valid_mask = cpu_online_mask;
}
/* /*
* Must re-check here, to close a race against __kthread_bind(), * Must re-check here, to close a race against __kthread_bind(),
* sched_setaffinity() is not guaranteed to observe the flag. * sched_setaffinity() is not guaranteed to observe the flag.
...@@ -1169,18 +1146,28 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, ...@@ -1169,18 +1146,28 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
if (cpumask_equal(&p->cpus_allowed, new_mask)) if (cpumask_equal(&p->cpus_allowed, new_mask))
goto out; goto out;
if (!cpumask_intersects(new_mask, cpu_active_mask)) { if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
do_set_cpus_allowed(p, new_mask); do_set_cpus_allowed(p, new_mask);
if (p->flags & PF_KTHREAD) {
/*
* For kernel threads that do indeed end up on online &&
* !active we want to ensure they are strict per-cpu threads.
*/
WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
!cpumask_intersects(new_mask, cpu_active_mask) &&
p->nr_cpus_allowed != 1);
}
/* Can the task run on the task's current CPU? If so, we're done */ /* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask)) if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out; goto out;
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
if (task_running(rq, p) || p->state == TASK_WAKING) { if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu }; struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */ /* Need help from migration thread: drop lock and wait. */
...@@ -1499,6 +1486,25 @@ EXPORT_SYMBOL_GPL(kick_process); ...@@ -1499,6 +1486,25 @@ EXPORT_SYMBOL_GPL(kick_process);
/* /*
* ->cpus_allowed is protected by both rq->lock and p->pi_lock * ->cpus_allowed is protected by both rq->lock and p->pi_lock
*
* A few notes on cpu_active vs cpu_online:
*
* - cpu_active must be a subset of cpu_online
*
* - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
* see __set_cpus_allowed_ptr(). At this point the newly online
* cpu isn't yet part of the sched domains, and balancing will not
* see it.
*
* - on cpu-down we clear cpu_active() to mask the sched domains and
* avoid the load balancer to place new tasks on the to be removed
* cpu. Existing tasks will remain running there and will be taken
* off.
*
* This means that fallback selection must not select !active CPUs.
* And can assume that any active CPU must be online. Conversely
* select_task_rq() below may allow selection of !active CPUs in order
* to satisfy the above rules.
*/ */
static int select_fallback_rq(int cpu, struct task_struct *p) static int select_fallback_rq(int cpu, struct task_struct *p)
{ {
...@@ -1517,8 +1523,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p) ...@@ -1517,8 +1523,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
/* Look for allowed, online CPU in same node. */ /* Look for allowed, online CPU in same node. */
for_each_cpu(dest_cpu, nodemask) { for_each_cpu(dest_cpu, nodemask) {
if (!cpu_online(dest_cpu))
continue;
if (!cpu_active(dest_cpu)) if (!cpu_active(dest_cpu))
continue; continue;
if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
...@@ -1529,8 +1533,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p) ...@@ -1529,8 +1533,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for (;;) { for (;;) {
/* Any allowed, online CPU? */ /* Any allowed, online CPU? */
for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
if (!cpu_online(dest_cpu))
continue;
if (!cpu_active(dest_cpu)) if (!cpu_active(dest_cpu))
continue; continue;
goto out; goto out;
...@@ -1582,6 +1584,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) ...@@ -1582,6 +1584,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
if (p->nr_cpus_allowed > 1) if (p->nr_cpus_allowed > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
else
cpu = cpumask_any(tsk_cpus_allowed(p));
/* /*
* In order not to call set_task_cpu() on a blocking task we need * In order not to call set_task_cpu() on a blocking task we need
...@@ -5288,6 +5292,8 @@ int task_can_attach(struct task_struct *p, ...@@ -5288,6 +5292,8 @@ int task_can_attach(struct task_struct *p,
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static bool sched_smp_initialized __read_mostly;
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
/* Migrate current task p to target_cpu */ /* Migrate current task p to target_cpu */
int migrate_task_to(struct task_struct *p, int target_cpu) int migrate_task_to(struct task_struct *p, int target_cpu)
...@@ -5503,127 +5509,13 @@ static void set_rq_offline(struct rq *rq) ...@@ -5503,127 +5509,13 @@ static void set_rq_offline(struct rq *rq)
} }
} }
/* static void set_cpu_rq_start_time(unsigned int cpu)
* migration_call - callback that gets triggered when a CPU is added.
* Here we can start up the necessary migration thread for the new CPU.
*/
static int
migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
{ {
int cpu = (long)hcpu;
unsigned long flags;
struct rq *rq = cpu_rq(cpu); struct rq *rq = cpu_rq(cpu);
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
rq->calc_load_update = calc_load_update;
account_reset_rq(rq);
break;
case CPU_ONLINE:
/* Update our root-domain */
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_online(rq);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DYING:
sched_ttwu_pending();
/* Update our root-domain */
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
migrate_tasks(rq);
BUG_ON(rq->nr_running != 1); /* the migration thread */
raw_spin_unlock_irqrestore(&rq->lock, flags);
break;
case CPU_DEAD:
calc_load_migrate(rq);
break;
#endif
}
update_max_interval();
return NOTIFY_OK;
}
/*
* Register at high priority so that task migration (migrate_all_tasks)
* happens before everything else. This has to be lower priority than
* the notifier in the perf_event subsystem, though.
*/
static struct notifier_block migration_notifier = {
.notifier_call = migration_call,
.priority = CPU_PRI_MIGRATION,
};
static void set_cpu_rq_start_time(void)
{
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
rq->age_stamp = sched_clock_cpu(cpu); rq->age_stamp = sched_clock_cpu(cpu);
} }
static int sched_cpu_active(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_STARTING:
set_cpu_rq_start_time();
return NOTIFY_OK;
case CPU_DOWN_FAILED:
set_cpu_active(cpu, true);
return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
}
static int sched_cpu_inactive(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DOWN_PREPARE:
set_cpu_active((long)hcpu, false);
return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
}
static int __init migration_init(void)
{
void *cpu = (void *)(long)smp_processor_id();
int err;
/* Initialize migration for the boot CPU */
err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
BUG_ON(err == NOTIFY_BAD);
migration_call(&migration_notifier, CPU_ONLINE, cpu);
register_cpu_notifier(&migration_notifier);
/* Register cpu active notifiers */
cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
return 0;
}
early_initcall(migration_init);
static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
...@@ -6771,10 +6663,10 @@ static void sched_init_numa(void) ...@@ -6771,10 +6663,10 @@ static void sched_init_numa(void)
init_numa_topology_type(); init_numa_topology_type();
} }
static void sched_domains_numa_masks_set(int cpu) static void sched_domains_numa_masks_set(unsigned int cpu)
{ {
int i, j;
int node = cpu_to_node(cpu); int node = cpu_to_node(cpu);
int i, j;
for (i = 0; i < sched_domains_numa_levels; i++) { for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++) { for (j = 0; j < nr_node_ids; j++) {
...@@ -6784,51 +6676,20 @@ static void sched_domains_numa_masks_set(int cpu) ...@@ -6784,51 +6676,20 @@ static void sched_domains_numa_masks_set(int cpu)
} }
} }
static void sched_domains_numa_masks_clear(int cpu) static void sched_domains_numa_masks_clear(unsigned int cpu)
{ {
int i, j; int i, j;
for (i = 0; i < sched_domains_numa_levels; i++) { for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++) for (j = 0; j < nr_node_ids; j++)
cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
} }
} }
/*
* Update sched_domains_numa_masks[level][node] array when new cpus
* are onlined.
*/
static int sched_domains_numa_masks_update(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
sched_domains_numa_masks_set(cpu);
break;
case CPU_DEAD:
sched_domains_numa_masks_clear(cpu);
break;
default:
return NOTIFY_DONE;
}
return NOTIFY_OK;
}
#else #else
static inline void sched_init_numa(void) static inline void sched_init_numa(void) { }
{ static void sched_domains_numa_masks_set(unsigned int cpu) { }
} static void sched_domains_numa_masks_clear(unsigned int cpu) { }
static int sched_domains_numa_masks_update(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
return 0;
}
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
static int __sdt_alloc(const struct cpumask *cpu_map) static int __sdt_alloc(const struct cpumask *cpu_map)
...@@ -7218,13 +7079,9 @@ static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ ...@@ -7218,13 +7079,9 @@ static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */
* If we come here as part of a suspend/resume, don't touch cpusets because we * If we come here as part of a suspend/resume, don't touch cpusets because we
* want to restore it back to its original state upon resume anyway. * want to restore it back to its original state upon resume anyway.
*/ */
static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, static void cpuset_cpu_active(void)
void *hcpu)
{ {
switch (action) { if (cpuhp_tasks_frozen) {
case CPU_ONLINE_FROZEN:
case CPU_DOWN_FAILED_FROZEN:
/* /*
* num_cpus_frozen tracks how many CPUs are involved in suspend * num_cpus_frozen tracks how many CPUs are involved in suspend
* resume sequence. As long as this is not the last online * resume sequence. As long as this is not the last online
...@@ -7234,35 +7091,25 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, ...@@ -7234,35 +7091,25 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
num_cpus_frozen--; num_cpus_frozen--;
if (likely(num_cpus_frozen)) { if (likely(num_cpus_frozen)) {
partition_sched_domains(1, NULL, NULL); partition_sched_domains(1, NULL, NULL);
break; return;
} }
/* /*
* This is the last CPU online operation. So fall through and * This is the last CPU online operation. So fall through and
* restore the original sched domains by considering the * restore the original sched domains by considering the
* cpuset configurations. * cpuset configurations.
*/ */
case CPU_ONLINE:
cpuset_update_active_cpus(true);
break;
default:
return NOTIFY_DONE;
} }
return NOTIFY_OK; cpuset_update_active_cpus(true);
} }
static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, static int cpuset_cpu_inactive(unsigned int cpu)
void *hcpu)
{ {
unsigned long flags; unsigned long flags;
long cpu = (long)hcpu;
struct dl_bw *dl_b; struct dl_bw *dl_b;
bool overflow; bool overflow;
int cpus; int cpus;
switch (action) { if (!cpuhp_tasks_frozen) {
case CPU_DOWN_PREPARE:
rcu_read_lock_sched(); rcu_read_lock_sched();
dl_b = dl_bw_of(cpu); dl_b = dl_bw_of(cpu);
...@@ -7274,19 +7121,120 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, ...@@ -7274,19 +7121,120 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
rcu_read_unlock_sched(); rcu_read_unlock_sched();
if (overflow) if (overflow)
return notifier_from_errno(-EBUSY); return -EBUSY;
cpuset_update_active_cpus(false); cpuset_update_active_cpus(false);
break; } else {
case CPU_DOWN_PREPARE_FROZEN:
num_cpus_frozen++; num_cpus_frozen++;
partition_sched_domains(1, NULL, NULL); partition_sched_domains(1, NULL, NULL);
break;
default:
return NOTIFY_DONE;
} }
return NOTIFY_OK; return 0;
} }
int sched_cpu_activate(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
set_cpu_active(cpu, true);
if (sched_smp_initialized) {
sched_domains_numa_masks_set(cpu);
cpuset_cpu_active();
}
/*
* Put the rq online, if not already. This happens:
*
* 1) In the early boot process, because we build the real domains
* after all cpus have been brought up.
*
* 2) At runtime, if cpuset_cpu_active() fails to rebuild the
* domains.
*/
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_online(rq);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
update_max_interval();
return 0;
}
int sched_cpu_deactivate(unsigned int cpu)
{
int ret;
set_cpu_active(cpu, false);
/*
* We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
* users of this state to go away such that all new such users will
* observe it.
*
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
* not imply sync_sched(), so wait for both.
*
* Do sync before park smpboot threads to take care the rcu boost case.
*/
if (IS_ENABLED(CONFIG_PREEMPT))
synchronize_rcu_mult(call_rcu, call_rcu_sched);
else
synchronize_rcu();
if (!sched_smp_initialized)
return 0;
ret = cpuset_cpu_inactive(cpu);
if (ret) {
set_cpu_active(cpu, true);
return ret;
}
sched_domains_numa_masks_clear(cpu);
return 0;
}
static void sched_rq_cpu_starting(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
rq->calc_load_update = calc_load_update;
account_reset_rq(rq);
update_max_interval();
}
int sched_cpu_starting(unsigned int cpu)
{
set_cpu_rq_start_time(cpu);
sched_rq_cpu_starting(cpu);
return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
int sched_cpu_dying(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
/* Handle pending wakeups and then migrate everything off */
sched_ttwu_pending();
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
migrate_tasks(rq);
BUG_ON(rq->nr_running != 1);
raw_spin_unlock_irqrestore(&rq->lock, flags);
calc_load_migrate(rq);
update_max_interval();
nohz_balance_exit_idle(cpu);
hrtick_clear(rq);
return 0;
}
#endif
void __init sched_init_smp(void) void __init sched_init_smp(void)
{ {
cpumask_var_t non_isolated_cpus; cpumask_var_t non_isolated_cpus;
...@@ -7308,12 +7256,6 @@ void __init sched_init_smp(void) ...@@ -7308,12 +7256,6 @@ void __init sched_init_smp(void)
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex); mutex_unlock(&sched_domains_mutex);
hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
init_hrtick();
/* Move init over to a non-isolated CPU */ /* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
BUG(); BUG();
...@@ -7322,7 +7264,16 @@ void __init sched_init_smp(void) ...@@ -7322,7 +7264,16 @@ void __init sched_init_smp(void)
init_sched_rt_class(); init_sched_rt_class();
init_sched_dl_class(); init_sched_dl_class();
sched_smp_initialized = true;
} }
static int __init migration_init(void)
{
sched_rq_cpu_starting(smp_processor_id());
return 0;
}
early_initcall(migration_init);
#else #else
void __init sched_init_smp(void) void __init sched_init_smp(void)
{ {
...@@ -7519,7 +7470,7 @@ void __init sched_init(void) ...@@ -7519,7 +7470,7 @@ void __init sched_init(void)
if (cpu_isolated_map == NULL) if (cpu_isolated_map == NULL)
zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
idle_thread_set_boot_cpu(); idle_thread_set_boot_cpu();
set_cpu_rq_start_time(); set_cpu_rq_start_time(smp_processor_id());
#endif #endif
init_sched_fair_class(); init_sched_fair_class();
......
...@@ -7814,7 +7814,7 @@ static void nohz_balancer_kick(void) ...@@ -7814,7 +7814,7 @@ static void nohz_balancer_kick(void)
return; return;
} }
static inline void nohz_balance_exit_idle(int cpu) void nohz_balance_exit_idle(unsigned int cpu)
{ {
if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
/* /*
...@@ -7887,18 +7887,6 @@ void nohz_balance_enter_idle(int cpu) ...@@ -7887,18 +7887,6 @@ void nohz_balance_enter_idle(int cpu)
atomic_inc(&nohz.nr_cpus); atomic_inc(&nohz.nr_cpus);
set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
} }
static int sched_ilb_notifier(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DYING:
nohz_balance_exit_idle(smp_processor_id());
return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
}
#endif #endif
static DEFINE_SPINLOCK(balancing); static DEFINE_SPINLOCK(balancing);
...@@ -8704,7 +8692,6 @@ __init void init_sched_fair_class(void) ...@@ -8704,7 +8692,6 @@ __init void init_sched_fair_class(void)
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
nohz.next_balance = jiffies; nohz.next_balance = jiffies;
zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
cpu_notifier(sched_ilb_notifier, 0);
#endif #endif
#endif /* SMP */ #endif /* SMP */
......
...@@ -1700,6 +1700,10 @@ enum rq_nohz_flag_bits { ...@@ -1700,6 +1700,10 @@ enum rq_nohz_flag_bits {
}; };
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
extern void nohz_balance_exit_idle(unsigned int cpu);
#else
static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#endif #endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment