Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
12fa97c6
Commit
12fa97c6
authored
Nov 10, 2020
by
Peter Zijlstra
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'sched/migrate-disable'
parents
b6d37a76
c777d847
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
1038 additions
and
227 deletions
+1038
-227
fs/proc/array.c
fs/proc/array.c
+2
-2
include/linux/cpuhotplug.h
include/linux/cpuhotplug.h
+1
-0
include/linux/cpumask.h
include/linux/cpumask.h
+6
-0
include/linux/preempt.h
include/linux/preempt.h
+69
-0
include/linux/sched.h
include/linux/sched.h
+5
-0
include/linux/sched/hotplug.h
include/linux/sched/hotplug.h
+2
-0
include/linux/stop_machine.h
include/linux/stop_machine.h
+5
-0
kernel/cpu.c
kernel/cpu.c
+8
-1
kernel/sched/core.c
kernel/sched/core.c
+739
-181
kernel/sched/cpudeadline.c
kernel/sched/cpudeadline.c
+2
-2
kernel/sched/cpupri.c
kernel/sched/cpupri.c
+2
-2
kernel/sched/deadline.c
kernel/sched/deadline.c
+31
-15
kernel/sched/rt.c
kernel/sched/rt.c
+57
-18
kernel/sched/sched.h
kernel/sched/sched.h
+56
-3
kernel/stop_machine.c
kernel/stop_machine.c
+24
-3
kernel/workqueue.c
kernel/workqueue.c
+4
-0
lib/cpumask.c
lib/cpumask.c
+18
-0
lib/dump_stack.c
lib/dump_stack.c
+2
-0
lib/smp_processor_id.c
lib/smp_processor_id.c
+5
-0
No files found.
fs/proc/array.c
View file @
12fa97c6
...
...
@@ -382,9 +382,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
static
void
task_cpus_allowed
(
struct
seq_file
*
m
,
struct
task_struct
*
task
)
{
seq_printf
(
m
,
"Cpus_allowed:
\t
%*pb
\n
"
,
cpumask_pr_args
(
task
->
cpus_ptr
));
cpumask_pr_args
(
&
task
->
cpus_mask
));
seq_printf
(
m
,
"Cpus_allowed_list:
\t
%*pbl
\n
"
,
cpumask_pr_args
(
task
->
cpus_ptr
));
cpumask_pr_args
(
&
task
->
cpus_mask
));
}
static
inline
void
task_core_dumping
(
struct
seq_file
*
m
,
struct
mm_struct
*
mm
)
...
...
include/linux/cpuhotplug.h
View file @
12fa97c6
...
...
@@ -152,6 +152,7 @@ enum cpuhp_state {
CPUHP_AP_ONLINE
,
CPUHP_TEARDOWN_CPU
,
CPUHP_AP_ONLINE_IDLE
,
CPUHP_AP_SCHED_WAIT_EMPTY
,
CPUHP_AP_SMPBOOT_THREADS
,
CPUHP_AP_X86_VDSO_VMA_ONLINE
,
CPUHP_AP_IRQ_AFFINITY_ONLINE
,
...
...
include/linux/cpumask.h
View file @
12fa97c6
...
...
@@ -199,6 +199,11 @@ static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
return
cpumask_next_and
(
-
1
,
src1p
,
src2p
);
}
static
inline
int
cpumask_any_distribute
(
const
struct
cpumask
*
srcp
)
{
return
cpumask_first
(
srcp
);
}
#define for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_not(cpu, mask) \
...
...
@@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
unsigned
int
cpumask_local_spread
(
unsigned
int
i
,
int
node
);
int
cpumask_any_and_distribute
(
const
struct
cpumask
*
src1p
,
const
struct
cpumask
*
src2p
);
int
cpumask_any_distribute
(
const
struct
cpumask
*
srcp
);
/**
* for_each_cpu - iterate over every cpu in a mask
...
...
include/linux/preempt.h
View file @
12fa97c6
...
...
@@ -322,6 +322,73 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
#endif
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
/*
* Migrate-Disable and why it is undesired.
*
* When a preempted task becomes elegible to run under the ideal model (IOW it
* becomes one of the M highest priority tasks), it might still have to wait
* for the preemptee's migrate_disable() section to complete. Thereby suffering
* a reduction in bandwidth in the exact duration of the migrate_disable()
* section.
*
* Per this argument, the change from preempt_disable() to migrate_disable()
* gets us:
*
* - a higher priority tasks gains reduced wake-up latency; with preempt_disable()
* it would have had to wait for the lower priority task.
*
* - a lower priority tasks; which under preempt_disable() could've instantly
* migrated away when another CPU becomes available, is now constrained
* by the ability to push the higher priority task away, which might itself be
* in a migrate_disable() section, reducing it's available bandwidth.
*
* IOW it trades latency / moves the interference term, but it stays in the
* system, and as long as it remains unbounded, the system is not fully
* deterministic.
*
*
* The reason we have it anyway.
*
* PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a
* number of primitives into becoming preemptible, they would also allow
* migration. This turns out to break a bunch of per-cpu usage. To this end,
* all these primitives employ migirate_disable() to restore this implicit
* assumption.
*
* This is a 'temporary' work-around at best. The correct solution is getting
* rid of the above assumptions and reworking the code to employ explicit
* per-cpu locking or short preempt-disable regions.
*
* The end goal must be to get rid of migrate_disable(), alternatively we need
* a schedulability theory that does not depend on abritrary migration.
*
*
* Notes on the implementation.
*
* The implementation is particularly tricky since existing code patterns
* dictate neither migrate_disable() nor migrate_enable() is allowed to block.
* This means that it cannot use cpus_read_lock() to serialize against hotplug,
* nor can it easily migrate itself into a pending affinity mask change on
* migrate_enable().
*
*
* Note: even non-work-conserving schedulers like semi-partitioned depends on
* migration, so migrate_disable() is not only a problem for
* work-conserving schedulers.
*
*/
extern
void
migrate_disable
(
void
);
extern
void
migrate_enable
(
void
);
#elif defined(CONFIG_PREEMPT_RT)
static
inline
void
migrate_disable
(
void
)
{
}
static
inline
void
migrate_enable
(
void
)
{
}
#else
/* !CONFIG_PREEMPT_RT */
/**
* migrate_disable - Prevent migration of the current task
*
...
...
@@ -352,4 +419,6 @@ static __always_inline void migrate_enable(void)
preempt_enable
();
}
#endif
/* CONFIG_SMP && CONFIG_PREEMPT_RT */
#endif
/* __LINUX_PREEMPT_H */
include/linux/sched.h
View file @
12fa97c6
...
...
@@ -714,6 +714,11 @@ struct task_struct {
int
nr_cpus_allowed
;
const
cpumask_t
*
cpus_ptr
;
cpumask_t
cpus_mask
;
void
*
migration_pending
;
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
unsigned
short
migration_disabled
;
#endif
unsigned
short
migration_flags
;
#ifdef CONFIG_PREEMPT_RCU
int
rcu_read_lock_nesting
;
...
...
include/linux/sched/hotplug.h
View file @
12fa97c6
...
...
@@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu);
extern
int
sched_cpu_deactivate
(
unsigned
int
cpu
);
#ifdef CONFIG_HOTPLUG_CPU
extern
int
sched_cpu_wait_empty
(
unsigned
int
cpu
);
extern
int
sched_cpu_dying
(
unsigned
int
cpu
);
#else
# define sched_cpu_wait_empty NULL
# define sched_cpu_dying NULL
#endif
...
...
include/linux/stop_machine.h
View file @
12fa97c6
...
...
@@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
struct
cpu_stop_work
{
struct
list_head
list
;
/* cpu_stopper->works */
cpu_stop_fn_t
fn
;
unsigned
long
caller
;
void
*
arg
;
struct
cpu_stop_done
*
done
;
};
...
...
@@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
void
stop_machine_unpark
(
int
cpu
);
void
stop_machine_yield
(
const
struct
cpumask
*
cpumask
);
extern
void
print_stop_info
(
const
char
*
log_lvl
,
struct
task_struct
*
task
);
#else
/* CONFIG_SMP */
#include <linux/workqueue.h>
...
...
@@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
return
false
;
}
static
inline
void
print_stop_info
(
const
char
*
log_lvl
,
struct
task_struct
*
task
)
{
}
#endif
/* CONFIG_SMP */
/*
...
...
kernel/cpu.c
View file @
12fa97c6
...
...
@@ -1602,7 +1602,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.
name
=
"ap:online"
,
},
/*
* Handled on control
l
processor until the plugged processor manages
* Handled on control processor until the plugged processor manages
* this itself.
*/
[
CPUHP_TEARDOWN_CPU
]
=
{
...
...
@@ -1611,6 +1611,13 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.
teardown
.
single
=
takedown_cpu
,
.
cant_stop
=
true
,
},
[
CPUHP_AP_SCHED_WAIT_EMPTY
]
=
{
.
name
=
"sched:waitempty"
,
.
startup
.
single
=
NULL
,
.
teardown
.
single
=
sched_cpu_wait_empty
,
},
/* Handle smpboot threads park/unpark */
[
CPUHP_AP_SMPBOOT_THREADS
]
=
{
.
name
=
"smpboot/threads:online"
,
...
...
kernel/sched/core.c
View file @
12fa97c6
...
...
@@ -1696,6 +1696,80 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_SMP
#ifdef CONFIG_PREEMPT_RT
static
void
__do_set_cpus_allowed
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
,
u32
flags
);
static
int
__set_cpus_allowed_ptr
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
,
u32
flags
);
static
void
migrate_disable_switch
(
struct
rq
*
rq
,
struct
task_struct
*
p
)
{
if
(
likely
(
!
p
->
migration_disabled
))
return
;
if
(
p
->
cpus_ptr
!=
&
p
->
cpus_mask
)
return
;
/*
* Violates locking rules! see comment in __do_set_cpus_allowed().
*/
__do_set_cpus_allowed
(
p
,
cpumask_of
(
rq
->
cpu
),
SCA_MIGRATE_DISABLE
);
}
void
migrate_disable
(
void
)
{
struct
task_struct
*
p
=
current
;
if
(
p
->
migration_disabled
)
{
p
->
migration_disabled
++
;
return
;
}
preempt_disable
();
this_rq
()
->
nr_pinned
++
;
p
->
migration_disabled
=
1
;
preempt_enable
();
}
EXPORT_SYMBOL_GPL
(
migrate_disable
);
void
migrate_enable
(
void
)
{
struct
task_struct
*
p
=
current
;
if
(
p
->
migration_disabled
>
1
)
{
p
->
migration_disabled
--
;
return
;
}
/*
* Ensure stop_task runs either before or after this, and that
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
*/
preempt_disable
();
if
(
p
->
cpus_ptr
!=
&
p
->
cpus_mask
)
__set_cpus_allowed_ptr
(
p
,
&
p
->
cpus_mask
,
SCA_MIGRATE_ENABLE
);
/*
* Mustn't clear migration_disabled() until cpus_ptr points back at the
* regular cpus_mask, otherwise things that race (eg.
* select_fallback_rq) get confused.
*/
barrier
();
p
->
migration_disabled
=
0
;
this_rq
()
->
nr_pinned
--
;
preempt_enable
();
}
EXPORT_SYMBOL_GPL
(
migrate_enable
);
static
inline
bool
rq_has_pinned_tasks
(
struct
rq
*
rq
)
{
return
rq
->
nr_pinned
;
}
#endif
/*
* Per-CPU kthreads are allowed to run on !active && online CPUs, see
* __set_cpus_allowed_ptr() and select_fallback_rq().
...
...
@@ -1705,7 +1779,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
if
(
!
cpumask_test_cpu
(
cpu
,
p
->
cpus_ptr
))
return
false
;
if
(
is_per_cpu_kthread
(
p
))
if
(
is_per_cpu_kthread
(
p
)
||
is_migration_disabled
(
p
)
)
return
cpu_online
(
cpu
);
return
cpu_active
(
cpu
);
...
...
@@ -1750,8 +1824,16 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
}
struct
migration_arg
{
struct
task_struct
*
task
;
int
dest_cpu
;
struct
task_struct
*
task
;
int
dest_cpu
;
struct
set_affinity_pending
*
pending
;
};
struct
set_affinity_pending
{
refcount_t
refs
;
struct
completion
done
;
struct
cpu_stop_work
stop_work
;
struct
migration_arg
arg
;
};
/*
...
...
@@ -1783,16 +1865,19 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
*/
static
int
migration_cpu_stop
(
void
*
data
)
{
struct
set_affinity_pending
*
pending
;
struct
migration_arg
*
arg
=
data
;
struct
task_struct
*
p
=
arg
->
task
;
int
dest_cpu
=
arg
->
dest_cpu
;
struct
rq
*
rq
=
this_rq
();
bool
complete
=
false
;
struct
rq_flags
rf
;
/*
* The original target CPU might have gone down and we might
* be on another CPU but it doesn't matter.
*/
local_irq_
disable
(
);
local_irq_
save
(
rf
.
flags
);
/*
* We need to explicitly wake pending tasks before running
* __migrate_task() such that we will not miss enforcing cpus_ptr
...
...
@@ -1802,21 +1887,126 @@ static int migration_cpu_stop(void *data)
raw_spin_lock
(
&
p
->
pi_lock
);
rq_lock
(
rq
,
&
rf
);
pending
=
p
->
migration_pending
;
/*
* If task_rq(p) != rq, it cannot be migrated here, because we're
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
* we're holding p->pi_lock.
*/
if
(
task_rq
(
p
)
==
rq
)
{
if
(
is_migration_disabled
(
p
))
goto
out
;
if
(
pending
)
{
p
->
migration_pending
=
NULL
;
complete
=
true
;
}
/* migrate_enable() -- we must not race against SCA */
if
(
dest_cpu
<
0
)
{
/*
* When this was migrate_enable() but we no longer
* have a @pending, a concurrent SCA 'fixed' things
* and we should be valid again. Nothing to do.
*/
if
(
!
pending
)
{
WARN_ON_ONCE
(
!
is_cpu_allowed
(
p
,
cpu_of
(
rq
)));
goto
out
;
}
dest_cpu
=
cpumask_any_distribute
(
&
p
->
cpus_mask
);
}
if
(
task_on_rq_queued
(
p
))
rq
=
__migrate_task
(
rq
,
&
rf
,
p
,
arg
->
dest_cpu
);
rq
=
__migrate_task
(
rq
,
&
rf
,
p
,
dest_cpu
);
else
p
->
wake_cpu
=
arg
->
dest_cpu
;
p
->
wake_cpu
=
dest_cpu
;
}
else
if
(
dest_cpu
<
0
)
{
/*
* This happens when we get migrated between migrate_enable()'s
* preempt_enable() and scheduling the stopper task. At that
* point we're a regular task again and not current anymore.
*
* A !PREEMPT kernel has a giant hole here, which makes it far
* more likely.
*/
/*
* When this was migrate_enable() but we no longer have an
* @pending, a concurrent SCA 'fixed' things and we should be
* valid again. Nothing to do.
*/
if
(
!
pending
)
{
WARN_ON_ONCE
(
!
is_cpu_allowed
(
p
,
cpu_of
(
rq
)));
goto
out
;
}
/*
* When migrate_enable() hits a rq mis-match we can't reliably
* determine is_migration_disabled() and so have to chase after
* it.
*/
task_rq_unlock
(
rq
,
p
,
&
rf
);
stop_one_cpu_nowait
(
task_cpu
(
p
),
migration_cpu_stop
,
&
pending
->
arg
,
&
pending
->
stop_work
);
return
0
;
}
rq_unlock
(
rq
,
&
rf
);
raw_spin_unlock
(
&
p
->
pi_lock
);
out:
task_rq_unlock
(
rq
,
p
,
&
rf
);
if
(
complete
)
complete_all
(
&
pending
->
done
);
/* For pending->{arg,stop_work} */
pending
=
arg
->
pending
;
if
(
pending
&&
refcount_dec_and_test
(
&
pending
->
refs
))
wake_up_var
(
&
pending
->
refs
);
local_irq_enable
();
return
0
;
}
int
push_cpu_stop
(
void
*
arg
)
{
struct
rq
*
lowest_rq
=
NULL
,
*
rq
=
this_rq
();
struct
task_struct
*
p
=
arg
;
raw_spin_lock_irq
(
&
p
->
pi_lock
);
raw_spin_lock
(
&
rq
->
lock
);
if
(
task_rq
(
p
)
!=
rq
)
goto
out_unlock
;
if
(
is_migration_disabled
(
p
))
{
p
->
migration_flags
|=
MDF_PUSH
;
goto
out_unlock
;
}
p
->
migration_flags
&=
~
MDF_PUSH
;
if
(
p
->
sched_class
->
find_lock_rq
)
lowest_rq
=
p
->
sched_class
->
find_lock_rq
(
p
,
rq
);
if
(
!
lowest_rq
)
goto
out_unlock
;
// XXX validate p is still the highest prio task
if
(
task_rq
(
p
)
==
rq
)
{
deactivate_task
(
rq
,
p
,
0
);
set_task_cpu
(
p
,
lowest_rq
->
cpu
);
activate_task
(
lowest_rq
,
p
,
0
);
resched_curr
(
lowest_rq
);
}
double_unlock_balance
(
rq
,
lowest_rq
);
out_unlock:
rq
->
push_busy
=
false
;
raw_spin_unlock
(
&
rq
->
lock
);
raw_spin_unlock_irq
(
&
p
->
pi_lock
);
put_task_struct
(
p
);
return
0
;
}
...
...
@@ -1824,18 +2014,39 @@ static int migration_cpu_stop(void *data)
* sched_class::set_cpus_allowed must do the below, but is not required to
* actually call this function.
*/
void
set_cpus_allowed_common
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
)
void
set_cpus_allowed_common
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
,
u32
flags
)
{
if
(
flags
&
(
SCA_MIGRATE_ENABLE
|
SCA_MIGRATE_DISABLE
))
{
p
->
cpus_ptr
=
new_mask
;
return
;
}
cpumask_copy
(
&
p
->
cpus_mask
,
new_mask
);
p
->
nr_cpus_allowed
=
cpumask_weight
(
new_mask
);
}
void
do_set_cpus_allowed
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
)
static
void
__do_set_cpus_allowed
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
,
u32
flags
)
{
struct
rq
*
rq
=
task_rq
(
p
);
bool
queued
,
running
;
lockdep_assert_held
(
&
p
->
pi_lock
);
/*
* This here violates the locking rules for affinity, since we're only
* supposed to change these variables while holding both rq->lock and
* p->pi_lock.
*
* HOWEVER, it magically works, because ttwu() is the only code that
* accesses these variables under p->pi_lock and only does so after
* smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule()
* before finish_task().
*
* XXX do further audits, this smells like something putrid.
*/
if
(
flags
&
SCA_MIGRATE_DISABLE
)
SCHED_WARN_ON
(
!
p
->
on_cpu
);
else
lockdep_assert_held
(
&
p
->
pi_lock
);
queued
=
task_on_rq_queued
(
p
);
running
=
task_current
(
rq
,
p
);
...
...
@@ -1851,7 +2062,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
if
(
running
)
put_prev_task
(
rq
,
p
);
p
->
sched_class
->
set_cpus_allowed
(
p
,
new_mask
);
p
->
sched_class
->
set_cpus_allowed
(
p
,
new_mask
,
flags
);
if
(
queued
)
enqueue_task
(
rq
,
p
,
ENQUEUE_RESTORE
|
ENQUEUE_NOCLOCK
);
...
...
@@ -1859,6 +2070,208 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
set_next_task
(
rq
,
p
);
}
void
do_set_cpus_allowed
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
)
{
__do_set_cpus_allowed
(
p
,
new_mask
,
0
);
}
/*
* This function is wildly self concurrent; here be dragons.
*
*
* When given a valid mask, __set_cpus_allowed_ptr() must block until the
* designated task is enqueued on an allowed CPU. If that task is currently
* running, we have to kick it out using the CPU stopper.
*
* Migrate-Disable comes along and tramples all over our nice sandcastle.
* Consider:
*
* Initial conditions: P0->cpus_mask = [0, 1]
*
* P0@CPU0 P1
*
* migrate_disable();
* <preempted>
* set_cpus_allowed_ptr(P0, [1]);
*
* P1 *cannot* return from this set_cpus_allowed_ptr() call until P0 executes
* its outermost migrate_enable() (i.e. it exits its Migrate-Disable region).
* This means we need the following scheme:
*
* P0@CPU0 P1
*
* migrate_disable();
* <preempted>
* set_cpus_allowed_ptr(P0, [1]);
* <blocks>
* <resumes>
* migrate_enable();
* __set_cpus_allowed_ptr();
* <wakes local stopper>
* `--> <woken on migration completion>
*
* Now the fun stuff: there may be several P1-like tasks, i.e. multiple
* concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any
* task p are serialized by p->pi_lock, which we can leverage: the one that
* should come into effect at the end of the Migrate-Disable region is the last
* one. This means we only need to track a single cpumask (i.e. p->cpus_mask),
* but we still need to properly signal those waiting tasks at the appropriate
* moment.
*
* This is implemented using struct set_affinity_pending. The first
* __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will
* setup an instance of that struct and install it on the targeted task_struct.
* Any and all further callers will reuse that instance. Those then wait for
* a completion signaled at the tail of the CPU stopper callback (1), triggered
* on the end of the Migrate-Disable region (i.e. outermost migrate_enable()).
*
*
* (1) In the cases covered above. There is one more where the completion is
* signaled within affine_move_task() itself: when a subsequent affinity request
* cancels the need for an active migration. Consider:
*
* Initial conditions: P0->cpus_mask = [0, 1]
*
* P0@CPU0 P1 P2
*
* migrate_disable();
* <preempted>
* set_cpus_allowed_ptr(P0, [1]);
* <blocks>
* set_cpus_allowed_ptr(P0, [0, 1]);
* <signal completion>
* <awakes>
*
* Note that the above is safe vs a concurrent migrate_enable(), as any
* pending affinity completion is preceded by an uninstallation of
* p->migration_pending done with p->pi_lock held.
*/
static
int
affine_move_task
(
struct
rq
*
rq
,
struct
task_struct
*
p
,
struct
rq_flags
*
rf
,
int
dest_cpu
,
unsigned
int
flags
)
{
struct
set_affinity_pending
my_pending
=
{
},
*
pending
=
NULL
;
struct
migration_arg
arg
=
{
.
task
=
p
,
.
dest_cpu
=
dest_cpu
,
};
bool
complete
=
false
;
/* Can the task run on the task's current CPU? If so, we're done */
if
(
cpumask_test_cpu
(
task_cpu
(
p
),
&
p
->
cpus_mask
))
{
struct
task_struct
*
push_task
=
NULL
;
if
((
flags
&
SCA_MIGRATE_ENABLE
)
&&
(
p
->
migration_flags
&
MDF_PUSH
)
&&
!
rq
->
push_busy
)
{
rq
->
push_busy
=
true
;
push_task
=
get_task_struct
(
p
);
}
pending
=
p
->
migration_pending
;
if
(
pending
)
{
refcount_inc
(
&
pending
->
refs
);
p
->
migration_pending
=
NULL
;
complete
=
true
;
}
task_rq_unlock
(
rq
,
p
,
rf
);
if
(
push_task
)
{
stop_one_cpu_nowait
(
rq
->
cpu
,
push_cpu_stop
,
p
,
&
rq
->
push_work
);
}
if
(
complete
)
goto
do_complete
;
return
0
;
}
if
(
!
(
flags
&
SCA_MIGRATE_ENABLE
))
{
/* serialized by p->pi_lock */
if
(
!
p
->
migration_pending
)
{
/* Install the request */
refcount_set
(
&
my_pending
.
refs
,
1
);
init_completion
(
&
my_pending
.
done
);
p
->
migration_pending
=
&
my_pending
;
}
else
{
pending
=
p
->
migration_pending
;
refcount_inc
(
&
pending
->
refs
);
}
}
pending
=
p
->
migration_pending
;
/*
* - !MIGRATE_ENABLE:
* we'll have installed a pending if there wasn't one already.
*
* - MIGRATE_ENABLE:
* we're here because the current CPU isn't matching anymore,
* the only way that can happen is because of a concurrent
* set_cpus_allowed_ptr() call, which should then still be
* pending completion.
*
* Either way, we really should have a @pending here.
*/
if
(
WARN_ON_ONCE
(
!
pending
))
{
task_rq_unlock
(
rq
,
p
,
rf
);
return
-
EINVAL
;
}
if
(
flags
&
SCA_MIGRATE_ENABLE
)
{
refcount_inc
(
&
pending
->
refs
);
/* pending->{arg,stop_work} */
p
->
migration_flags
&=
~
MDF_PUSH
;
task_rq_unlock
(
rq
,
p
,
rf
);
pending
->
arg
=
(
struct
migration_arg
)
{
.
task
=
p
,
.
dest_cpu
=
-
1
,
.
pending
=
pending
,
};
stop_one_cpu_nowait
(
cpu_of
(
rq
),
migration_cpu_stop
,
&
pending
->
arg
,
&
pending
->
stop_work
);
return
0
;
}
if
(
task_running
(
rq
,
p
)
||
p
->
state
==
TASK_WAKING
)
{
/*
* Lessen races (and headaches) by delegating
* is_migration_disabled(p) checks to the stopper, which will
* run on the same CPU as said p.
*/
task_rq_unlock
(
rq
,
p
,
rf
);
stop_one_cpu
(
cpu_of
(
rq
),
migration_cpu_stop
,
&
arg
);
}
else
{
if
(
!
is_migration_disabled
(
p
))
{
if
(
task_on_rq_queued
(
p
))
rq
=
move_queued_task
(
rq
,
rf
,
p
,
dest_cpu
);
p
->
migration_pending
=
NULL
;
complete
=
true
;
}
task_rq_unlock
(
rq
,
p
,
rf
);
do_complete:
if
(
complete
)
complete_all
(
&
pending
->
done
);
}
wait_for_completion
(
&
pending
->
done
);
if
(
refcount_dec_and_test
(
&
pending
->
refs
))
wake_up_var
(
&
pending
->
refs
);
/*
* Block the original owner of &pending until all subsequent callers
* have seen the completion and decremented the refcount
*/
wait_var_event
(
&
my_pending
.
refs
,
!
refcount_read
(
&
my_pending
.
refs
));
return
0
;
}
/*
* Change a given task's CPU affinity. Migrate the thread to a
* proper CPU and schedule it away if the CPU it's executing on
...
...
@@ -1869,7 +2282,8 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
* call is not atomic; no spinlocks may be held.
*/
static
int
__set_cpus_allowed_ptr
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
,
bool
check
)
const
struct
cpumask
*
new_mask
,
u32
flags
)
{
const
struct
cpumask
*
cpu_valid_mask
=
cpu_active_mask
;
unsigned
int
dest_cpu
;
...
...
@@ -1880,9 +2294,14 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
rq
=
task_rq_lock
(
p
,
&
rf
);
update_rq_clock
(
rq
);
if
(
p
->
flags
&
PF_KTHREAD
)
{
if
(
p
->
flags
&
PF_KTHREAD
||
is_migration_disabled
(
p
)
)
{
/*
* Kernel threads are allowed on online && !active CPUs
* Kernel threads are allowed on online && !active CPUs.
*
* Specifically, migration_disabled() tasks must not fail the
* cpumask_any_and_distribute() pick below, esp. so on
* SCA_MIGRATE_ENABLE, otherwise we'll not call
* set_cpus_allowed_common() and actually reset p->cpus_ptr.
*/
cpu_valid_mask
=
cpu_online_mask
;
}
...
...
@@ -1891,13 +2310,22 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
* Must re-check here, to close a race against __kthread_bind(),
* sched_setaffinity() is not guaranteed to observe the flag.
*/
if
(
check
&&
(
p
->
flags
&
PF_NO_SETAFFINITY
))
{
if
(
(
flags
&
SCA_CHECK
)
&&
(
p
->
flags
&
PF_NO_SETAFFINITY
))
{
ret
=
-
EINVAL
;
goto
out
;
}
if
(
cpumask_equal
(
&
p
->
cpus_mask
,
new_mask
))
goto
out
;
if
(
!
(
flags
&
SCA_MIGRATE_ENABLE
))
{
if
(
cpumask_equal
(
&
p
->
cpus_mask
,
new_mask
))
goto
out
;
if
(
WARN_ON_ONCE
(
p
==
current
&&
is_migration_disabled
(
p
)
&&
!
cpumask_test_cpu
(
task_cpu
(
p
),
new_mask
)))
{
ret
=
-
EBUSY
;
goto
out
;
}
}
/*
* Picking a ~random cpu helps in cases where we are changing affinity
...
...
@@ -1910,7 +2338,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
goto
out
;
}
do_set_cpus_allowed
(
p
,
new_mask
);
__do_set_cpus_allowed
(
p
,
new_mask
,
flags
);
if
(
p
->
flags
&
PF_KTHREAD
)
{
/*
...
...
@@ -1922,23 +2350,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
p
->
nr_cpus_allowed
!=
1
);
}
/* Can the task run on the task's current CPU? If so, we're done */
if
(
cpumask_test_cpu
(
task_cpu
(
p
),
new_mask
))
goto
out
;
return
affine_move_task
(
rq
,
p
,
&
rf
,
dest_cpu
,
flags
);
if
(
task_running
(
rq
,
p
)
||
p
->
state
==
TASK_WAKING
)
{
struct
migration_arg
arg
=
{
p
,
dest_cpu
};
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock
(
rq
,
p
,
&
rf
);
stop_one_cpu
(
cpu_of
(
rq
),
migration_cpu_stop
,
&
arg
);
return
0
;
}
else
if
(
task_on_rq_queued
(
p
))
{
/*
* OK, since we're going to drop the lock immediately
* afterwards anyway.
*/
rq
=
move_queued_task
(
rq
,
&
rf
,
p
,
dest_cpu
);
}
out:
task_rq_unlock
(
rq
,
p
,
&
rf
);
...
...
@@ -1947,7 +2360,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
int
set_cpus_allowed_ptr
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
)
{
return
__set_cpus_allowed_ptr
(
p
,
new_mask
,
false
);
return
__set_cpus_allowed_ptr
(
p
,
new_mask
,
0
);
}
EXPORT_SYMBOL_GPL
(
set_cpus_allowed_ptr
);
...
...
@@ -1988,6 +2401,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
* Clearly, migrating tasks to offline CPUs is a fairly daft thing.
*/
WARN_ON_ONCE
(
!
cpu_online
(
new_cpu
));
WARN_ON_ONCE
(
is_migration_disabled
(
p
));
#endif
trace_sched_migrate_task
(
p
,
new_cpu
);
...
...
@@ -2318,6 +2733,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
}
fallthrough
;
case
possible
:
/*
* XXX When called from select_task_rq() we only
* hold p->pi_lock and again violate locking order.
*
* More yuck to audit.
*/
do_set_cpus_allowed
(
p
,
cpu_possible_mask
);
state
=
fail
;
break
;
...
...
@@ -2352,7 +2773,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
{
lockdep_assert_held
(
&
p
->
pi_lock
);
if
(
p
->
nr_cpus_allowed
>
1
)
if
(
p
->
nr_cpus_allowed
>
1
&&
!
is_migration_disabled
(
p
)
)
cpu
=
p
->
sched_class
->
select_task_rq
(
p
,
cpu
,
sd_flags
,
wake_flags
);
else
cpu
=
cpumask_any
(
p
->
cpus_ptr
);
...
...
@@ -2375,6 +2796,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
void
sched_set_stop_task
(
int
cpu
,
struct
task_struct
*
stop
)
{
static
struct
lock_class_key
stop_pi_lock
;
struct
sched_param
param
=
{
.
sched_priority
=
MAX_RT_PRIO
-
1
};
struct
task_struct
*
old_stop
=
cpu_rq
(
cpu
)
->
stop
;
...
...
@@ -2390,6 +2812,20 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
sched_setscheduler_nocheck
(
stop
,
SCHED_FIFO
,
&
param
);
stop
->
sched_class
=
&
stop_sched_class
;
/*
* The PI code calls rt_mutex_setprio() with ->pi_lock held to
* adjust the effective priority of a task. As a result,
* rt_mutex_setprio() can trigger (RT) balancing operations,
* which can then trigger wakeups of the stop thread to push
* around the current task.
*
* The stop task itself will never be part of the PI-chain, it
* never blocks, therefore that ->pi_lock recursion is safe.
* Tell lockdep about this by placing the stop->pi_lock in its
* own class.
*/
lockdep_set_class
(
&
stop
->
pi_lock
,
&
stop_pi_lock
);
}
cpu_rq
(
cpu
)
->
stop
=
stop
;
...
...
@@ -2406,13 +2842,25 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
#else
static
inline
int
__set_cpus_allowed_ptr
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
,
bool
check
)
const
struct
cpumask
*
new_mask
,
u32
flags
)
{
return
set_cpus_allowed_ptr
(
p
,
new_mask
);
}
#endif
/* CONFIG_SMP */
#if !defined(CONFIG_SMP) || !defined(CONFIG_PREEMPT_RT)
static
inline
void
migrate_disable_switch
(
struct
rq
*
rq
,
struct
task_struct
*
p
)
{
}
static
inline
bool
rq_has_pinned_tasks
(
struct
rq
*
rq
)
{
return
false
;
}
#endif
static
void
ttwu_stat
(
struct
task_struct
*
p
,
int
cpu
,
int
wake_flags
)
{
...
...
@@ -3098,6 +3546,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
init_numa_balancing
(
clone_flags
,
p
);
#ifdef CONFIG_SMP
p
->
wake_entry
.
u_flags
=
CSD_TYPE_TTWU
;
p
->
migration_pending
=
NULL
;
#endif
}
...
...
@@ -3485,6 +3934,90 @@ static inline void finish_task(struct task_struct *prev)
#endif
}
#ifdef CONFIG_SMP
static
void
do_balance_callbacks
(
struct
rq
*
rq
,
struct
callback_head
*
head
)
{
void
(
*
func
)(
struct
rq
*
rq
);
struct
callback_head
*
next
;
lockdep_assert_held
(
&
rq
->
lock
);
while
(
head
)
{
func
=
(
void
(
*
)(
struct
rq
*
))
head
->
func
;
next
=
head
->
next
;
head
->
next
=
NULL
;
head
=
next
;
func
(
rq
);
}
}
static
inline
struct
callback_head
*
splice_balance_callbacks
(
struct
rq
*
rq
)
{
struct
callback_head
*
head
=
rq
->
balance_callback
;
lockdep_assert_held
(
&
rq
->
lock
);
if
(
head
)
{
rq
->
balance_callback
=
NULL
;
rq
->
balance_flags
&=
~
BALANCE_WORK
;
}
return
head
;
}
static
void
__balance_callbacks
(
struct
rq
*
rq
)
{
do_balance_callbacks
(
rq
,
splice_balance_callbacks
(
rq
));
}
static
inline
void
balance_callbacks
(
struct
rq
*
rq
,
struct
callback_head
*
head
)
{
unsigned
long
flags
;
if
(
unlikely
(
head
))
{
raw_spin_lock_irqsave
(
&
rq
->
lock
,
flags
);
do_balance_callbacks
(
rq
,
head
);
raw_spin_unlock_irqrestore
(
&
rq
->
lock
,
flags
);
}
}
static
void
balance_push
(
struct
rq
*
rq
);
static
inline
void
balance_switch
(
struct
rq
*
rq
)
{
if
(
likely
(
!
rq
->
balance_flags
))
return
;
if
(
rq
->
balance_flags
&
BALANCE_PUSH
)
{
balance_push
(
rq
);
return
;
}
__balance_callbacks
(
rq
);
}
#else
static
inline
void
__balance_callbacks
(
struct
rq
*
rq
)
{
}
static
inline
struct
callback_head
*
splice_balance_callbacks
(
struct
rq
*
rq
)
{
return
NULL
;
}
static
inline
void
balance_callbacks
(
struct
rq
*
rq
,
struct
callback_head
*
head
)
{
}
static
inline
void
balance_switch
(
struct
rq
*
rq
)
{
}
#endif
static
inline
void
prepare_lock_switch
(
struct
rq
*
rq
,
struct
task_struct
*
next
,
struct
rq_flags
*
rf
)
{
...
...
@@ -3510,6 +4043,7 @@ static inline void finish_lock_switch(struct rq *rq)
* prev into current:
*/
spin_acquire
(
&
rq
->
lock
.
dep_map
,
0
,
0
,
_THIS_IP_
);
balance_switch
(
rq
);
raw_spin_unlock_irq
(
&
rq
->
lock
);
}
...
...
@@ -3651,43 +4185,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
return
rq
;
}
#ifdef CONFIG_SMP
/* rq->lock is NOT held, but preemption is disabled */
static
void
__balance_callback
(
struct
rq
*
rq
)
{
struct
callback_head
*
head
,
*
next
;
void
(
*
func
)(
struct
rq
*
rq
);
unsigned
long
flags
;
raw_spin_lock_irqsave
(
&
rq
->
lock
,
flags
);
head
=
rq
->
balance_callback
;
rq
->
balance_callback
=
NULL
;
while
(
head
)
{
func
=
(
void
(
*
)(
struct
rq
*
))
head
->
func
;
next
=
head
->
next
;
head
->
next
=
NULL
;
head
=
next
;
func
(
rq
);
}
raw_spin_unlock_irqrestore
(
&
rq
->
lock
,
flags
);
}
static
inline
void
balance_callback
(
struct
rq
*
rq
)
{
if
(
unlikely
(
rq
->
balance_callback
))
__balance_callback
(
rq
);
}
#else
static
inline
void
balance_callback
(
struct
rq
*
rq
)
{
}
#endif
/**
* schedule_tail - first thing a freshly forked thread must call.
* @prev: the thread we just switched away from.
...
...
@@ -3707,7 +4204,6 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
*/
rq
=
finish_task_switch
(
prev
);
balance_callback
(
rq
);
preempt_enable
();
if
(
current
->
set_child_tid
)
...
...
@@ -4515,6 +5011,7 @@ static void __sched notrace __schedule(bool preempt)
*/
++*
switch_count
;
migrate_disable_switch
(
rq
,
prev
);
psi_sched_switch
(
prev
,
next
,
!
task_on_rq_queued
(
prev
));
trace_sched_switch
(
preempt
,
prev
,
next
);
...
...
@@ -4523,10 +5020,11 @@ static void __sched notrace __schedule(bool preempt)
rq
=
context_switch
(
rq
,
prev
,
next
,
&
rf
);
}
else
{
rq
->
clock_update_flags
&=
~
(
RQCF_ACT_SKIP
|
RQCF_REQ_SKIP
);
rq_unlock_irq
(
rq
,
&
rf
);
}
balance_callback
(
rq
);
rq_unpin_lock
(
rq
,
&
rf
);
__balance_callbacks
(
rq
);
raw_spin_unlock_irq
(
&
rq
->
lock
);
}
}
void
__noreturn
do_task_dead
(
void
)
...
...
@@ -4937,9 +5435,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
out_unlock:
/* Avoid rq from going away on us: */
preempt_disable
();
__task_rq_unlock
(
rq
,
&
rf
);
balance_callback
(
rq
);
rq_unpin_lock
(
rq
,
&
rf
);
__balance_callbacks
(
rq
);
raw_spin_unlock
(
&
rq
->
lock
);
preempt_enable
();
}
#else
...
...
@@ -5213,6 +5713,7 @@ static int __sched_setscheduler(struct task_struct *p,
int
retval
,
oldprio
,
oldpolicy
=
-
1
,
queued
,
running
;
int
new_effective_prio
,
policy
=
attr
->
sched_policy
;
const
struct
sched_class
*
prev_class
;
struct
callback_head
*
head
;
struct
rq_flags
rf
;
int
reset_on_fork
;
int
queue_flags
=
DEQUEUE_SAVE
|
DEQUEUE_MOVE
|
DEQUEUE_NOCLOCK
;
...
...
@@ -5451,6 +5952,7 @@ static int __sched_setscheduler(struct task_struct *p,
/* Avoid rq from going away on us: */
preempt_disable
();
head
=
splice_balance_callbacks
(
rq
);
task_rq_unlock
(
rq
,
p
,
&
rf
);
if
(
pi
)
{
...
...
@@ -5459,7 +5961,7 @@ static int __sched_setscheduler(struct task_struct *p,
}
/* Run balance callbacks after we've adjusted the PI chain: */
balance_callback
(
rq
);
balance_callback
s
(
rq
,
head
);
preempt_enable
();
return
0
;
...
...
@@ -5954,7 +6456,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
}
#endif
again:
retval
=
__set_cpus_allowed_ptr
(
p
,
new_mask
,
true
);
retval
=
__set_cpus_allowed_ptr
(
p
,
new_mask
,
SCA_CHECK
);
if
(
!
retval
)
{
cpuset_cpus_allowed
(
p
,
cpus_allowed
);
...
...
@@ -6443,6 +6945,7 @@ void sched_show_task(struct task_struct *p)
(
unsigned
long
)
task_thread_info
(
p
)
->
flags
);
print_worker_info
(
KERN_INFO
,
p
);
print_stop_info
(
KERN_INFO
,
p
);
show_stack
(
p
,
NULL
,
KERN_INFO
);
put_task_stack
(
p
);
}
...
...
@@ -6533,7 +7036,7 @@ void init_idle(struct task_struct *idle, int cpu)
*
* And since this is boot we can forgo the serialization.
*/
set_cpus_allowed_common
(
idle
,
cpumask_of
(
cpu
));
set_cpus_allowed_common
(
idle
,
cpumask_of
(
cpu
)
,
0
);
#endif
/*
* We're having a chicken and egg problem, even though we are
...
...
@@ -6684,119 +7187,126 @@ void idle_task_exit(void)
/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
}
/*
* Since this CPU is going 'away' for a while, fold any nr_active delta
* we might have. Assumes we're called after migrate_tasks() so that the
* nr_active count is stable. We need to take the teardown thread which
* is calling this into account, so we hand in adjust = 1 to the load
* calculation.
*
* Also see the comment "Global load-average calculations".
*/
static
void
calc_load_migrate
(
struct
rq
*
rq
)
static
int
__balance_push_cpu_stop
(
void
*
arg
)
{
long
delta
=
calc_load_fold_active
(
rq
,
1
)
;
if
(
delta
)
atomic_long_add
(
delta
,
&
calc_load_tasks
)
;
}
struct
task_struct
*
p
=
arg
;
struct
rq
*
rq
=
this_rq
();
struct
rq_flags
rf
;
int
cpu
;
static
struct
task_struct
*
__pick_migrate_task
(
struct
rq
*
rq
)
{
const
struct
sched_class
*
class
;
struct
task_struct
*
next
;
raw_spin_lock_irq
(
&
p
->
pi_lock
);
rq_lock
(
rq
,
&
rf
);
for_each_class
(
class
)
{
next
=
class
->
pick_next_task
(
rq
);
if
(
next
)
{
next
->
sched_class
->
put_prev_task
(
rq
,
next
);
return
next
;
}
update_rq_clock
(
rq
);
if
(
task_rq
(
p
)
==
rq
&&
task_on_rq_queued
(
p
))
{
cpu
=
select_fallback_rq
(
rq
->
cpu
,
p
);
rq
=
__migrate_task
(
rq
,
&
rf
,
p
,
cpu
);
}
/* The idle class should always have a runnable task */
BUG
();
rq_unlock
(
rq
,
&
rf
);
raw_spin_unlock_irq
(
&
p
->
pi_lock
);
put_task_struct
(
p
);
return
0
;
}
static
DEFINE_PER_CPU
(
struct
cpu_stop_work
,
push_work
);
/*
* Migrate all tasks from the rq, sleeping tasks will be migrated by
* try_to_wake_up()->select_task_rq().
*
* Called with rq->lock held even though we'er in stop_machine() and
* there's no concurrency possible, we hold the required locks anyway
* because of lock validation efforts.
* Ensure we only run per-cpu kthreads once the CPU goes !active.
*/
static
void
migrate_tasks
(
struct
rq
*
dead_rq
,
struct
rq_flags
*
rf
)
static
void
balance_push
(
struct
rq
*
rq
)
{
struct
rq
*
rq
=
dead_rq
;
struct
task_struct
*
next
,
*
stop
=
rq
->
stop
;
struct
rq_flags
orf
=
*
rf
;
int
dest_cpu
;
struct
task_struct
*
push_task
=
rq
->
curr
;
lockdep_assert_held
(
&
rq
->
lock
)
;
SCHED_WARN_ON
(
rq
->
cpu
!=
smp_processor_id
())
;
/*
* Fudge the rq selection such that the below task selection loop
* doesn't get stuck on the currently eligible stop task.
*
* We're currently inside stop_machine() and the rq is either stuck
* in the stop_machine_cpu_stop() loop, or we're executing this code,
* either way we should never end up calling schedule() until we're
* done here.
* Both the cpu-hotplug and stop task are in this case and are
* required to complete the hotplug process.
*/
rq
->
stop
=
NULL
;
if
(
is_per_cpu_kthread
(
push_task
)
||
is_migration_disabled
(
push_task
))
{
/*
* If this is the idle task on the outgoing CPU try to wake
* up the hotplug control thread which might wait for the
* last task to vanish. The rcuwait_active() check is
* accurate here because the waiter is pinned on this CPU
* and can't obviously be running in parallel.
*
* On RT kernels this also has to check whether there are
* pinned and scheduled out tasks on the runqueue. They
* need to leave the migrate disabled section first.
*/
if
(
!
rq
->
nr_running
&&
!
rq_has_pinned_tasks
(
rq
)
&&
rcuwait_active
(
&
rq
->
hotplug_wait
))
{
raw_spin_unlock
(
&
rq
->
lock
);
rcuwait_wake_up
(
&
rq
->
hotplug_wait
);
raw_spin_lock
(
&
rq
->
lock
);
}
return
;
}
get_task_struct
(
push_task
);
/*
* put_prev_task() and pick_next_task() sched
* class method both need to have an up-to-date
* value of rq->clock[_task]
* Temporarily drop rq->lock such that we can wake-up the stop task.
* Both preemption and IRQs are still disabled.
*/
update_rq_clock
(
rq
);
raw_spin_unlock
(
&
rq
->
lock
);
stop_one_cpu_nowait
(
rq
->
cpu
,
__balance_push_cpu_stop
,
push_task
,
this_cpu_ptr
(
&
push_work
));
/*
* At this point need_resched() is true and we'll take the loop in
* schedule(). The next pick is obviously going to be the stop task
* which is_per_cpu_kthread() and will push this task away.
*/
raw_spin_lock
(
&
rq
->
lock
);
}
for
(;;)
{
/*
* There's this thread running, bail when that's the only
* remaining thread:
*/
if
(
rq
->
nr_running
==
1
)
break
;
static
void
balance_push_set
(
int
cpu
,
bool
on
)
{
struct
rq
*
rq
=
cpu_rq
(
cpu
);
struct
rq_flags
rf
;
next
=
__pick_migrate_task
(
rq
);
rq_lock_irqsave
(
rq
,
&
rf
);
if
(
on
)
rq
->
balance_flags
|=
BALANCE_PUSH
;
else
rq
->
balance_flags
&=
~
BALANCE_PUSH
;
rq_unlock_irqrestore
(
rq
,
&
rf
);
}
/*
* Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->lock, such that holding either
* stabilizes the mask.
*
* Drop rq->lock is not quite as disastrous as it usually is
* because !cpu_active at this point, which means load-balance
* will not interfere. Also, stop-machine.
*/
rq_unlock
(
rq
,
rf
);
raw_spin_lock
(
&
next
->
pi_lock
);
rq_relock
(
rq
,
rf
);
/*
* Invoked from a CPUs hotplug control thread after the CPU has been marked
* inactive. All tasks which are not per CPU kernel threads are either
* pushed off this CPU now via balance_push() or placed on a different CPU
* during wakeup. Wait until the CPU is quiescent.
*/
static
void
balance_hotplug_wait
(
void
)
{
struct
rq
*
rq
=
this_rq
();
/*
* Since we're inside stop-machine, _nothing_ should have
* changed the task, WARN if weird stuff happened, because in
* that case the above rq->lock drop is a fail too.
*/
if
(
WARN_ON
(
task_rq
(
next
)
!=
rq
||
!
task_on_rq_queued
(
next
)))
{
raw_spin_unlock
(
&
next
->
pi_lock
);
continue
;
}
rcuwait_wait_event
(
&
rq
->
hotplug_wait
,
rq
->
nr_running
==
1
&&
!
rq_has_pinned_tasks
(
rq
),
TASK_UNINTERRUPTIBLE
);
}
/* Find suitable destination for @next, with force if needed. */
dest_cpu
=
select_fallback_rq
(
dead_rq
->
cpu
,
next
);
rq
=
__migrate_task
(
rq
,
rf
,
next
,
dest_cpu
);
if
(
rq
!=
dead_rq
)
{
rq_unlock
(
rq
,
rf
);
rq
=
dead_rq
;
*
rf
=
orf
;
rq_relock
(
rq
,
rf
);
}
raw_spin_unlock
(
&
next
->
pi_lock
);
}
#else
rq
->
stop
=
stop
;
static
inline
void
balance_push
(
struct
rq
*
rq
)
{
}
static
inline
void
balance_push_set
(
int
cpu
,
bool
on
)
{
}
static
inline
void
balance_hotplug_wait
(
void
)
{
}
#endif
/* CONFIG_HOTPLUG_CPU */
void
set_rq_online
(
struct
rq
*
rq
)
...
...
@@ -6882,6 +7392,8 @@ int sched_cpu_activate(unsigned int cpu)
struct
rq
*
rq
=
cpu_rq
(
cpu
);
struct
rq_flags
rf
;
balance_push_set
(
cpu
,
false
);
#ifdef CONFIG_SCHED_SMT
/*
* When going up, increment the number of cores with SMT present.
...
...
@@ -6917,6 +7429,8 @@ int sched_cpu_activate(unsigned int cpu)
int
sched_cpu_deactivate
(
unsigned
int
cpu
)
{
struct
rq
*
rq
=
cpu_rq
(
cpu
);
struct
rq_flags
rf
;
int
ret
;
set_cpu_active
(
cpu
,
false
);
...
...
@@ -6929,6 +7443,16 @@ int sched_cpu_deactivate(unsigned int cpu)
*/
synchronize_rcu
();
balance_push_set
(
cpu
,
true
);
rq_lock_irqsave
(
rq
,
&
rf
);
if
(
rq
->
rd
)
{
update_rq_clock
(
rq
);
BUG_ON
(
!
cpumask_test_cpu
(
cpu
,
rq
->
rd
->
span
));
set_rq_offline
(
rq
);
}
rq_unlock_irqrestore
(
rq
,
&
rf
);
#ifdef CONFIG_SCHED_SMT
/*
* When going down, decrement the number of cores with SMT present.
...
...
@@ -6942,6 +7466,7 @@ int sched_cpu_deactivate(unsigned int cpu)
ret
=
cpuset_cpu_inactive
(
cpu
);
if
(
ret
)
{
balance_push_set
(
cpu
,
false
);
set_cpu_active
(
cpu
,
true
);
return
ret
;
}
...
...
@@ -6965,6 +7490,41 @@ int sched_cpu_starting(unsigned int cpu)
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* Invoked immediately before the stopper thread is invoked to bring the
* CPU down completely. At this point all per CPU kthreads except the
* hotplug thread (current) and the stopper thread (inactive) have been
* either parked or have been unbound from the outgoing CPU. Ensure that
* any of those which might be on the way out are gone.
*
* If after this point a bound task is being woken on this CPU then the
* responsible hotplug callback has failed to do it's job.
* sched_cpu_dying() will catch it with the appropriate fireworks.
*/
int
sched_cpu_wait_empty
(
unsigned
int
cpu
)
{
balance_hotplug_wait
();
return
0
;
}
/*
* Since this CPU is going 'away' for a while, fold any nr_active delta we
* might have. Called from the CPU stopper task after ensuring that the
* stopper is the last running task on the CPU, so nr_active count is
* stable. We need to take the teardown thread which is calling this into
* account, so we hand in adjust = 1 to the load calculation.
*
* Also see the comment "Global load-average calculations".
*/
static
void
calc_load_migrate
(
struct
rq
*
rq
)
{
long
delta
=
calc_load_fold_active
(
rq
,
1
);
if
(
delta
)
atomic_long_add
(
delta
,
&
calc_load_tasks
);
}
int
sched_cpu_dying
(
unsigned
int
cpu
)
{
struct
rq
*
rq
=
cpu_rq
(
cpu
);
...
...
@@ -6974,12 +7534,7 @@ int sched_cpu_dying(unsigned int cpu)
sched_tick_stop
(
cpu
);
rq_lock_irqsave
(
rq
,
&
rf
);
if
(
rq
->
rd
)
{
BUG_ON
(
!
cpumask_test_cpu
(
cpu
,
rq
->
rd
->
span
));
set_rq_offline
(
rq
);
}
migrate_tasks
(
rq
,
&
rf
);
BUG_ON
(
rq
->
nr_running
!=
1
);
BUG_ON
(
rq
->
nr_running
!=
1
||
rq_has_pinned_tasks
(
rq
));
rq_unlock_irqrestore
(
rq
,
&
rf
);
calc_load_migrate
(
rq
);
...
...
@@ -7186,6 +7741,9 @@ void __init sched_init(void)
rq_csd_init
(
rq
,
&
rq
->
nohz_csd
,
nohz_csd_func
);
#endif
#ifdef CONFIG_HOTPLUG_CPU
rcuwait_init
(
&
rq
->
hotplug_wait
);
#endif
#endif
/* CONFIG_SMP */
hrtick_rq_init
(
rq
);
atomic_set
(
&
rq
->
nr_iowait
,
0
);
...
...
kernel/sched/cpudeadline.c
View file @
12fa97c6
...
...
@@ -120,7 +120,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
const
struct
sched_dl_entity
*
dl_se
=
&
p
->
dl
;
if
(
later_mask
&&
cpumask_and
(
later_mask
,
cp
->
free_cpus
,
p
->
cpus_ptr
))
{
cpumask_and
(
later_mask
,
cp
->
free_cpus
,
&
p
->
cpus_mask
))
{
unsigned
long
cap
,
max_cap
=
0
;
int
cpu
,
max_cpu
=
-
1
;
...
...
@@ -151,7 +151,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
WARN_ON
(
best_cpu
!=
-
1
&&
!
cpu_present
(
best_cpu
));
if
(
cpumask_test_cpu
(
best_cpu
,
p
->
cpus_ptr
)
&&
if
(
cpumask_test_cpu
(
best_cpu
,
&
p
->
cpus_mask
)
&&
dl_time_before
(
dl_se
->
deadline
,
cp
->
elements
[
0
].
dl
))
{
if
(
later_mask
)
cpumask_set_cpu
(
best_cpu
,
later_mask
);
...
...
kernel/sched/cpupri.c
View file @
12fa97c6
...
...
@@ -97,11 +97,11 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
if
(
skip
)
return
0
;
if
(
cpumask_any_and
(
p
->
cpus_ptr
,
vec
->
mask
)
>=
nr_cpu_ids
)
if
(
cpumask_any_and
(
&
p
->
cpus_mask
,
vec
->
mask
)
>=
nr_cpu_ids
)
return
0
;
if
(
lowest_mask
)
{
cpumask_and
(
lowest_mask
,
p
->
cpus_ptr
,
vec
->
mask
);
cpumask_and
(
lowest_mask
,
&
p
->
cpus_mask
,
vec
->
mask
);
/*
* We have to ensure that we have at least one bit
...
...
kernel/sched/deadline.c
View file @
12fa97c6
...
...
@@ -559,7 +559,7 @@ static int push_dl_task(struct rq *rq);
static
inline
bool
need_pull_dl_task
(
struct
rq
*
rq
,
struct
task_struct
*
prev
)
{
return
dl_task
(
prev
);
return
rq
->
online
&&
dl_task
(
prev
);
}
static
DEFINE_PER_CPU
(
struct
callback_head
,
dl_push_head
);
...
...
@@ -1931,7 +1931,7 @@ static void task_fork_dl(struct task_struct *p)
static
int
pick_dl_task
(
struct
rq
*
rq
,
struct
task_struct
*
p
,
int
cpu
)
{
if
(
!
task_running
(
rq
,
p
)
&&
cpumask_test_cpu
(
cpu
,
p
->
cpus_ptr
))
cpumask_test_cpu
(
cpu
,
&
p
->
cpus_mask
))
return
1
;
return
0
;
}
...
...
@@ -2021,8 +2021,8 @@ static int find_later_rq(struct task_struct *task)
return
this_cpu
;
}
best_cpu
=
cpumask_
first_and
(
later_mask
,
sched_domain_span
(
sd
));
best_cpu
=
cpumask_
any_and_distribute
(
later_mask
,
sched_domain_span
(
sd
));
/*
* Last chance: if a CPU being in both later_mask
* and current sd span is valid, that becomes our
...
...
@@ -2044,7 +2044,7 @@ static int find_later_rq(struct task_struct *task)
if
(
this_cpu
!=
-
1
)
return
this_cpu
;
cpu
=
cpumask_any
(
later_mask
);
cpu
=
cpumask_any
_distribute
(
later_mask
);
if
(
cpu
<
nr_cpu_ids
)
return
cpu
;
...
...
@@ -2081,7 +2081,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
/* Retry if something changed. */
if
(
double_lock_balance
(
rq
,
later_rq
))
{
if
(
unlikely
(
task_rq
(
task
)
!=
rq
||
!
cpumask_test_cpu
(
later_rq
->
cpu
,
task
->
cpus_ptr
)
||
!
cpumask_test_cpu
(
later_rq
->
cpu
,
&
task
->
cpus_mask
)
||
task_running
(
rq
,
task
)
||
!
dl_task
(
task
)
||
!
task_on_rq_queued
(
task
)))
{
...
...
@@ -2148,6 +2148,9 @@ static int push_dl_task(struct rq *rq)
return
0
;
retry:
if
(
is_migration_disabled
(
next_task
))
return
0
;
if
(
WARN_ON
(
next_task
==
rq
->
curr
))
return
0
;
...
...
@@ -2225,7 +2228,7 @@ static void push_dl_tasks(struct rq *rq)
static
void
pull_dl_task
(
struct
rq
*
this_rq
)
{
int
this_cpu
=
this_rq
->
cpu
,
cpu
;
struct
task_struct
*
p
;
struct
task_struct
*
p
,
*
push_task
;
bool
resched
=
false
;
struct
rq
*
src_rq
;
u64
dmin
=
LONG_MAX
;
...
...
@@ -2255,6 +2258,7 @@ static void pull_dl_task(struct rq *this_rq)
continue
;
/* Might drop this_rq->lock */
push_task
=
NULL
;
double_lock_balance
(
this_rq
,
src_rq
);
/*
...
...
@@ -2286,17 +2290,27 @@ static void pull_dl_task(struct rq *this_rq)
src_rq
->
curr
->
dl
.
deadline
))
goto
skip
;
resched
=
true
;
deactivate_task
(
src_rq
,
p
,
0
);
set_task_cpu
(
p
,
this_cpu
);
activate_task
(
this_rq
,
p
,
0
);
dmin
=
p
->
dl
.
deadline
;
if
(
is_migration_disabled
(
p
))
{
push_task
=
get_push_task
(
src_rq
);
}
else
{
deactivate_task
(
src_rq
,
p
,
0
);
set_task_cpu
(
p
,
this_cpu
);
activate_task
(
this_rq
,
p
,
0
);
dmin
=
p
->
dl
.
deadline
;
resched
=
true
;
}
/* Is there any other task even earlier? */
}
skip:
double_unlock_balance
(
this_rq
,
src_rq
);
if
(
push_task
)
{
raw_spin_unlock
(
&
this_rq
->
lock
);
stop_one_cpu_nowait
(
src_rq
->
cpu
,
push_cpu_stop
,
push_task
,
&
src_rq
->
push_work
);
raw_spin_lock
(
&
this_rq
->
lock
);
}
}
if
(
resched
)
...
...
@@ -2320,7 +2334,8 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
}
static
void
set_cpus_allowed_dl
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
)
const
struct
cpumask
*
new_mask
,
u32
flags
)
{
struct
root_domain
*
src_rd
;
struct
rq
*
rq
;
...
...
@@ -2349,7 +2364,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
raw_spin_unlock
(
&
src_dl_b
->
lock
);
}
set_cpus_allowed_common
(
p
,
new_mask
);
set_cpus_allowed_common
(
p
,
new_mask
,
flags
);
}
/* Assumes rq->lock is held */
...
...
@@ -2542,6 +2557,7 @@ DEFINE_SCHED_CLASS(dl) = {
.
rq_online
=
rq_online_dl
,
.
rq_offline
=
rq_offline_dl
,
.
task_woken
=
task_woken_dl
,
.
find_lock_rq
=
find_lock_later_rq
,
#endif
.
task_tick
=
task_tick_dl
,
...
...
kernel/sched/rt.c
View file @
12fa97c6
...
...
@@ -265,7 +265,7 @@ static void pull_rt_task(struct rq *this_rq);
static
inline
bool
need_pull_rt_task
(
struct
rq
*
rq
,
struct
task_struct
*
prev
)
{
/* Try to pull RT tasks here if we lower this rq's prio */
return
rq
->
rt
.
highest_prio
.
curr
>
prev
->
prio
;
return
rq
->
online
&&
rq
->
rt
.
highest_prio
.
curr
>
prev
->
prio
;
}
static
inline
int
rt_overloaded
(
struct
rq
*
rq
)
...
...
@@ -1660,7 +1660,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static
int
pick_rt_task
(
struct
rq
*
rq
,
struct
task_struct
*
p
,
int
cpu
)
{
if
(
!
task_running
(
rq
,
p
)
&&
cpumask_test_cpu
(
cpu
,
p
->
cpus_ptr
))
cpumask_test_cpu
(
cpu
,
&
p
->
cpus_mask
))
return
1
;
return
0
;
...
...
@@ -1754,8 +1754,8 @@ static int find_lowest_rq(struct task_struct *task)
return
this_cpu
;
}
best_cpu
=
cpumask_
first_and
(
lowest_mask
,
sched_domain_span
(
sd
));
best_cpu
=
cpumask_
any_and_distribute
(
lowest_mask
,
sched_domain_span
(
sd
));
if
(
best_cpu
<
nr_cpu_ids
)
{
rcu_read_unlock
();
return
best_cpu
;
...
...
@@ -1772,7 +1772,7 @@ static int find_lowest_rq(struct task_struct *task)
if
(
this_cpu
!=
-
1
)
return
this_cpu
;
cpu
=
cpumask_any
(
lowest_mask
);
cpu
=
cpumask_any
_distribute
(
lowest_mask
);
if
(
cpu
<
nr_cpu_ids
)
return
cpu
;
...
...
@@ -1813,7 +1813,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
* Also make sure that it wasn't scheduled on its rq.
*/
if
(
unlikely
(
task_rq
(
task
)
!=
rq
||
!
cpumask_test_cpu
(
lowest_rq
->
cpu
,
task
->
cpus_ptr
)
||
!
cpumask_test_cpu
(
lowest_rq
->
cpu
,
&
task
->
cpus_mask
)
||
task_running
(
rq
,
task
)
||
!
rt_task
(
task
)
||
!
task_on_rq_queued
(
task
)))
{
...
...
@@ -1861,7 +1861,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
* running task can migrate over to a CPU that is running a task
* of lesser priority.
*/
static
int
push_rt_task
(
struct
rq
*
rq
)
static
int
push_rt_task
(
struct
rq
*
rq
,
bool
pull
)
{
struct
task_struct
*
next_task
;
struct
rq
*
lowest_rq
;
...
...
@@ -1875,6 +1875,34 @@ static int push_rt_task(struct rq *rq)
return
0
;
retry:
if
(
is_migration_disabled
(
next_task
))
{
struct
task_struct
*
push_task
=
NULL
;
int
cpu
;
if
(
!
pull
||
rq
->
push_busy
)
return
0
;
cpu
=
find_lowest_rq
(
rq
->
curr
);
if
(
cpu
==
-
1
||
cpu
==
rq
->
cpu
)
return
0
;
/*
* Given we found a CPU with lower priority than @next_task,
* therefore it should be running. However we cannot migrate it
* to this other CPU, instead attempt to push the current
* running task on this CPU away.
*/
push_task
=
get_push_task
(
rq
);
if
(
push_task
)
{
raw_spin_unlock
(
&
rq
->
lock
);
stop_one_cpu_nowait
(
rq
->
cpu
,
push_cpu_stop
,
push_task
,
&
rq
->
push_work
);
raw_spin_lock
(
&
rq
->
lock
);
}
return
0
;
}
if
(
WARN_ON
(
next_task
==
rq
->
curr
))
return
0
;
...
...
@@ -1929,12 +1957,10 @@ static int push_rt_task(struct rq *rq)
deactivate_task
(
rq
,
next_task
,
0
);
set_task_cpu
(
next_task
,
lowest_rq
->
cpu
);
activate_task
(
lowest_rq
,
next_task
,
0
);
ret
=
1
;
resched_curr
(
lowest_rq
);
ret
=
1
;
double_unlock_balance
(
rq
,
lowest_rq
);
out:
put_task_struct
(
next_task
);
...
...
@@ -1944,7 +1970,7 @@ static int push_rt_task(struct rq *rq)
static
void
push_rt_tasks
(
struct
rq
*
rq
)
{
/* push_rt_task will return true if it moved an RT */
while
(
push_rt_task
(
rq
))
while
(
push_rt_task
(
rq
,
false
))
;
}
...
...
@@ -2097,7 +2123,8 @@ void rto_push_irq_work_func(struct irq_work *work)
*/
if
(
has_pushable_tasks
(
rq
))
{
raw_spin_lock
(
&
rq
->
lock
);
push_rt_tasks
(
rq
);
while
(
push_rt_task
(
rq
,
true
))
;
raw_spin_unlock
(
&
rq
->
lock
);
}
...
...
@@ -2122,7 +2149,7 @@ static void pull_rt_task(struct rq *this_rq)
{
int
this_cpu
=
this_rq
->
cpu
,
cpu
;
bool
resched
=
false
;
struct
task_struct
*
p
;
struct
task_struct
*
p
,
*
push_task
;
struct
rq
*
src_rq
;
int
rt_overload_count
=
rt_overloaded
(
this_rq
);
...
...
@@ -2169,6 +2196,7 @@ static void pull_rt_task(struct rq *this_rq)
* double_lock_balance, and another CPU could
* alter this_rq
*/
push_task
=
NULL
;
double_lock_balance
(
this_rq
,
src_rq
);
/*
...
...
@@ -2196,11 +2224,14 @@ static void pull_rt_task(struct rq *this_rq)
if
(
p
->
prio
<
src_rq
->
curr
->
prio
)
goto
skip
;
resched
=
true
;
deactivate_task
(
src_rq
,
p
,
0
);
set_task_cpu
(
p
,
this_cpu
);
activate_task
(
this_rq
,
p
,
0
);
if
(
is_migration_disabled
(
p
))
{
push_task
=
get_push_task
(
src_rq
);
}
else
{
deactivate_task
(
src_rq
,
p
,
0
);
set_task_cpu
(
p
,
this_cpu
);
activate_task
(
this_rq
,
p
,
0
);
resched
=
true
;
}
/*
* We continue with the search, just in
* case there's an even higher prio task
...
...
@@ -2210,6 +2241,13 @@ static void pull_rt_task(struct rq *this_rq)
}
skip:
double_unlock_balance
(
this_rq
,
src_rq
);
if
(
push_task
)
{
raw_spin_unlock
(
&
this_rq
->
lock
);
stop_one_cpu_nowait
(
src_rq
->
cpu
,
push_cpu_stop
,
push_task
,
&
src_rq
->
push_work
);
raw_spin_lock
(
&
this_rq
->
lock
);
}
}
if
(
resched
)
...
...
@@ -2451,6 +2489,7 @@ DEFINE_SCHED_CLASS(rt) = {
.
rq_offline
=
rq_offline_rt
,
.
task_woken
=
task_woken_rt
,
.
switched_from
=
switched_from_rt
,
.
find_lock_rq
=
find_lock_lowest_rq
,
#endif
.
task_tick
=
task_tick_rt
,
...
...
kernel/sched/sched.h
View file @
12fa97c6
...
...
@@ -975,6 +975,7 @@ struct rq {
unsigned
long
cpu_capacity_orig
;
struct
callback_head
*
balance_callback
;
unsigned
char
balance_flags
;
unsigned
char
nohz_idle_balance
;
unsigned
char
idle_balance
;
...
...
@@ -1005,6 +1006,10 @@ struct rq {
/* This is used to determine avg_idle's max value */
u64
max_idle_balance_cost
;
#ifdef CONFIG_HOTPLUG_CPU
struct
rcuwait
hotplug_wait
;
#endif
#endif
/* CONFIG_SMP */
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
...
...
@@ -1050,6 +1055,12 @@ struct rq {
/* Must be inspected within a rcu lock section */
struct
cpuidle_state
*
idle_state
;
#endif
#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
unsigned
int
nr_pinned
;
#endif
unsigned
int
push_busy
;
struct
cpu_stop_work
push_work
;
};
#ifdef CONFIG_FAIR_GROUP_SCHED
...
...
@@ -1077,6 +1088,16 @@ static inline int cpu_of(struct rq *rq)
#endif
}
#define MDF_PUSH 0x01
static
inline
bool
is_migration_disabled
(
struct
task_struct
*
p
)
{
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
return
p
->
migration_disabled
;
#else
return
false
;
#endif
}
#ifdef CONFIG_SCHED_SMT
extern
void
__update_idle_core
(
struct
rq
*
rq
);
...
...
@@ -1223,6 +1244,9 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
rq
->
clock_update_flags
&=
(
RQCF_REQ_SKIP
|
RQCF_ACT_SKIP
);
rf
->
clock_update_flags
=
0
;
#endif
#ifdef CONFIG_SMP
SCHED_WARN_ON
(
rq
->
balance_callback
);
#endif
}
static
inline
void
rq_unpin_lock
(
struct
rq
*
rq
,
struct
rq_flags
*
rf
)
...
...
@@ -1384,6 +1408,9 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
#ifdef CONFIG_SMP
#define BALANCE_WORK 0x01
#define BALANCE_PUSH 0x02
static
inline
void
queue_balance_callback
(
struct
rq
*
rq
,
struct
callback_head
*
head
,
...
...
@@ -1391,12 +1418,13 @@ queue_balance_callback(struct rq *rq,
{
lockdep_assert_held
(
&
rq
->
lock
);
if
(
unlikely
(
head
->
next
))
if
(
unlikely
(
head
->
next
||
(
rq
->
balance_flags
&
BALANCE_PUSH
)
))
return
;
head
->
func
=
(
void
(
*
)(
struct
callback_head
*
))
func
;
head
->
next
=
rq
->
balance_callback
;
rq
->
balance_callback
=
head
;
rq
->
balance_flags
|=
BALANCE_WORK
;
}
#define rcu_dereference_check_sched_domain(p) \
...
...
@@ -1804,10 +1832,13 @@ struct sched_class {
void
(
*
task_woken
)(
struct
rq
*
this_rq
,
struct
task_struct
*
task
);
void
(
*
set_cpus_allowed
)(
struct
task_struct
*
p
,
const
struct
cpumask
*
newmask
);
const
struct
cpumask
*
newmask
,
u32
flags
);
void
(
*
rq_online
)(
struct
rq
*
rq
);
void
(
*
rq_offline
)(
struct
rq
*
rq
);
struct
rq
*
(
*
find_lock_rq
)(
struct
task_struct
*
p
,
struct
rq
*
rq
);
#endif
void
(
*
task_tick
)(
struct
rq
*
rq
,
struct
task_struct
*
p
,
int
queued
);
...
...
@@ -1905,13 +1936,35 @@ static inline bool sched_fair_runnable(struct rq *rq)
extern
struct
task_struct
*
pick_next_task_fair
(
struct
rq
*
rq
,
struct
task_struct
*
prev
,
struct
rq_flags
*
rf
);
extern
struct
task_struct
*
pick_next_task_idle
(
struct
rq
*
rq
);
#define SCA_CHECK 0x01
#define SCA_MIGRATE_DISABLE 0x02
#define SCA_MIGRATE_ENABLE 0x04
#ifdef CONFIG_SMP
extern
void
update_group_capacity
(
struct
sched_domain
*
sd
,
int
cpu
);
extern
void
trigger_load_balance
(
struct
rq
*
rq
);
extern
void
set_cpus_allowed_common
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
);
extern
void
set_cpus_allowed_common
(
struct
task_struct
*
p
,
const
struct
cpumask
*
new_mask
,
u32
flags
);
static
inline
struct
task_struct
*
get_push_task
(
struct
rq
*
rq
)
{
struct
task_struct
*
p
=
rq
->
curr
;
lockdep_assert_held
(
&
rq
->
lock
);
if
(
rq
->
push_busy
)
return
NULL
;
if
(
p
->
nr_cpus_allowed
==
1
)
return
NULL
;
rq
->
push_busy
=
true
;
return
get_task_struct
(
p
);
}
extern
int
push_cpu_stop
(
void
*
arg
);
#endif
...
...
kernel/stop_machine.c
View file @
12fa97c6
...
...
@@ -42,11 +42,27 @@ struct cpu_stopper {
struct
list_head
works
;
/* list of pending works */
struct
cpu_stop_work
stop_work
;
/* for stop_cpus */
unsigned
long
caller
;
cpu_stop_fn_t
fn
;
};
static
DEFINE_PER_CPU
(
struct
cpu_stopper
,
cpu_stopper
);
static
bool
stop_machine_initialized
=
false
;
void
print_stop_info
(
const
char
*
log_lvl
,
struct
task_struct
*
task
)
{
/*
* If @task is a stopper task, it cannot migrate and task_cpu() is
* stable.
*/
struct
cpu_stopper
*
stopper
=
per_cpu_ptr
(
&
cpu_stopper
,
task_cpu
(
task
));
if
(
task
!=
stopper
->
thread
)
return
;
printk
(
"%sStopper: %pS <- %pS
\n
"
,
log_lvl
,
stopper
->
fn
,
(
void
*
)
stopper
->
caller
);
}
/* static data for stop_cpus */
static
DEFINE_MUTEX
(
stop_cpus_mutex
);
static
bool
stop_cpus_in_progress
;
...
...
@@ -123,7 +139,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
int
stop_one_cpu
(
unsigned
int
cpu
,
cpu_stop_fn_t
fn
,
void
*
arg
)
{
struct
cpu_stop_done
done
;
struct
cpu_stop_work
work
=
{
.
fn
=
fn
,
.
arg
=
arg
,
.
done
=
&
done
};
struct
cpu_stop_work
work
=
{
.
fn
=
fn
,
.
arg
=
arg
,
.
done
=
&
done
,
.
caller
=
_RET_IP_
};
cpu_stop_init_done
(
&
done
,
1
);
if
(
!
cpu_stop_queue_work
(
cpu
,
&
work
))
...
...
@@ -331,7 +347,8 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
work1
=
work2
=
(
struct
cpu_stop_work
){
.
fn
=
multi_cpu_stop
,
.
arg
=
&
msdata
,
.
done
=
&
done
.
done
=
&
done
,
.
caller
=
_RET_IP_
,
};
cpu_stop_init_done
(
&
done
,
2
);
...
...
@@ -367,7 +384,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
bool
stop_one_cpu_nowait
(
unsigned
int
cpu
,
cpu_stop_fn_t
fn
,
void
*
arg
,
struct
cpu_stop_work
*
work_buf
)
{
*
work_buf
=
(
struct
cpu_stop_work
){
.
fn
=
fn
,
.
arg
=
arg
,
};
*
work_buf
=
(
struct
cpu_stop_work
){
.
fn
=
fn
,
.
arg
=
arg
,
.
caller
=
_RET_IP_
,
};
return
cpu_stop_queue_work
(
cpu
,
work_buf
);
}
...
...
@@ -487,6 +504,8 @@ static void cpu_stopper_thread(unsigned int cpu)
int
ret
;
/* cpu stop callbacks must not sleep, make in_atomic() == T */
stopper
->
caller
=
work
->
caller
;
stopper
->
fn
=
fn
;
preempt_count_inc
();
ret
=
fn
(
arg
);
if
(
done
)
{
...
...
@@ -495,6 +514,8 @@ static void cpu_stopper_thread(unsigned int cpu)
cpu_stop_signal_done
(
done
);
}
preempt_count_dec
();
stopper
->
fn
=
NULL
;
stopper
->
caller
=
0
;
WARN_ONCE
(
preempt_count
(),
"cpu_stop: %ps(%p) leaked preempt count
\n
"
,
fn
,
arg
);
goto
repeat
;
...
...
kernel/workqueue.c
View file @
12fa97c6
...
...
@@ -4908,6 +4908,10 @@ static void unbind_workers(int cpu)
pool
->
flags
|=
POOL_DISASSOCIATED
;
raw_spin_unlock_irq
(
&
pool
->
lock
);
for_each_pool_worker
(
worker
,
pool
)
WARN_ON_ONCE
(
set_cpus_allowed_ptr
(
worker
->
task
,
cpu_active_mask
)
<
0
);
mutex_unlock
(
&
wq_pool_attach_mutex
);
/*
...
...
lib/cpumask.c
View file @
12fa97c6
...
...
@@ -267,3 +267,21 @@ int cpumask_any_and_distribute(const struct cpumask *src1p,
return
next
;
}
EXPORT_SYMBOL
(
cpumask_any_and_distribute
);
int
cpumask_any_distribute
(
const
struct
cpumask
*
srcp
)
{
int
next
,
prev
;
/* NOTE: our first selection will skip 0. */
prev
=
__this_cpu_read
(
distribute_cpu_mask_prev
);
next
=
cpumask_next
(
prev
,
srcp
);
if
(
next
>=
nr_cpu_ids
)
next
=
cpumask_first
(
srcp
);
if
(
next
<
nr_cpu_ids
)
__this_cpu_write
(
distribute_cpu_mask_prev
,
next
);
return
next
;
}
EXPORT_SYMBOL
(
cpumask_any_distribute
);
lib/dump_stack.c
View file @
12fa97c6
...
...
@@ -12,6 +12,7 @@
#include <linux/atomic.h>
#include <linux/kexec.h>
#include <linux/utsname.h>
#include <linux/stop_machine.h>
static
char
dump_stack_arch_desc_str
[
128
];
...
...
@@ -57,6 +58,7 @@ void dump_stack_print_info(const char *log_lvl)
log_lvl
,
dump_stack_arch_desc_str
);
print_worker_info
(
log_lvl
,
current
);
print_stop_info
(
log_lvl
,
current
);
}
/**
...
...
lib/smp_processor_id.c
View file @
12fa97c6
...
...
@@ -26,6 +26,11 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
if
(
current
->
nr_cpus_allowed
==
1
)
goto
out
;
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
if
(
current
->
migration_disabled
)
goto
out
;
#endif
/*
* It is valid to assume CPU-locality during early bootup:
*/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment