Commit 77a05940 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The biggest changes in this cycle were:

   - Make kcpustat vtime aware (Frederic Weisbecker)

   - Rework the CFS load_balance() logic (Vincent Guittot)

   - Misc cleanups, smaller enhancements, fixes.

  The load-balancing rework is the most intrusive change: it replaces
  the old heuristics that have become less meaningful after the
  introduction of the PELT metrics, with a grounds-up load-balancing
  algorithm.

  As such it's not really an iterative series, but replaces the old
  load-balancing logic with the new one. We hope there are no
  performance regressions left - but statistically it's highly probable
  that there *is* going to be some workload that is hurting from these
  chnages. If so then we'd prefer to have a look at that workload and
  fix its scheduling, instead of reverting the changes"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (46 commits)
  rackmeter: Use vtime aware kcpustat accessor
  leds: Use all-in-one vtime aware kcpustat accessor
  cpufreq: Use vtime aware kcpustat accessors for user time
  procfs: Use all-in-one vtime aware kcpustat accessor
  sched/vtime: Bring up complete kcpustat accessor
  sched/cputime: Support other fields on kcpustat_field()
  sched/cpufreq: Move the cfs_rq_util_change() call to cpufreq_update_util()
  sched/fair: Add comments for group_type and balancing at SD_NUMA level
  sched/fair: Fix rework of find_idlest_group()
  sched/uclamp: Fix overzealous type replacement
  sched/Kconfig: Fix spelling mistake in user-visible help text
  sched/core: Further clarify sched_class::set_next_task()
  sched/fair: Use mul_u32_u32()
  sched/core: Simplify sched_class::pick_next_task()
  sched/core: Optimize pick_next_task()
  sched/core: Make pick_next_task_idle() more consistent
  sched/fair: Better document newidle_balance()
  leds: Use vtime aware kcpustat accessor to fetch CPUTIME_SYSTEM
  cpufreq: Use vtime aware kcpustat accessor to fetch CPUTIME_SYSTEM
  procfs: Use vtime aware kcpustat accessor to fetch CPUTIME_SYSTEM
  ...
parents 3f59dbca de881a34
......@@ -132,7 +132,7 @@ static __u64 vtime_delta(struct task_struct *tsk)
return delta_stime;
}
void vtime_account_system(struct task_struct *tsk)
void vtime_account_kernel(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
__u64 stime = vtime_delta(tsk);
......@@ -146,7 +146,7 @@ void vtime_account_system(struct task_struct *tsk)
else
ti->stime += stime;
}
EXPORT_SYMBOL_GPL(vtime_account_system);
EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
......
......@@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
return stime;
}
void vtime_account_system(struct task_struct *tsk)
void vtime_account_kernel(struct task_struct *tsk)
{
unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
......@@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
#endif
}
}
EXPORT_SYMBOL_GPL(vtime_account_system);
EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
......@@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
/*
* Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
* Assumes that vtime_account_system/idle() has been called
* Assumes that vtime_account_kernel/idle() has been called
* recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
......
......@@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
void vtime_account_system(struct task_struct *tsk)
void vtime_account_kernel(struct task_struct *tsk)
__attribute__((alias("vtime_account_irq_enter")));
EXPORT_SYMBOL_GPL(vtime_account_system);
EXPORT_SYMBOL_GPL(vtime_account_kernel);
/*
* Sorted add to a list. List is linear searched until first bigger
......
......@@ -354,7 +354,7 @@ For 32-bit we have the following conventions - kernel is built with
.macro CALL_enter_from_user_mode
#ifdef CONFIG_CONTEXT_TRACKING
#ifdef CONFIG_JUMP_LABEL
STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_key, def=0
#endif
call enter_from_user_mode
.Lafter_call_\@:
......
......@@ -113,18 +113,21 @@ EXPORT_SYMBOL_GPL(get_governor_parent_kobj);
static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
{
u64 idle_time;
struct kernel_cpustat kcpustat;
u64 cur_wall_time;
u64 idle_time;
u64 busy_time;
cur_wall_time = jiffies64_to_nsecs(get_jiffies_64());
busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
kcpustat_cpu_fetch(&kcpustat, cpu);
busy_time = kcpustat.cpustat[CPUTIME_USER];
busy_time += kcpustat.cpustat[CPUTIME_SYSTEM];
busy_time += kcpustat.cpustat[CPUTIME_IRQ];
busy_time += kcpustat.cpustat[CPUTIME_SOFTIRQ];
busy_time += kcpustat.cpustat[CPUTIME_STEAL];
busy_time += kcpustat.cpustat[CPUTIME_NICE];
idle_time = cur_wall_time - busy_time;
if (wall)
......
......@@ -105,7 +105,7 @@ void gov_update_cpu_data(struct dbs_data *dbs_data)
j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time,
dbs_data->io_is_busy);
if (dbs_data->ignore_nice_load)
j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
}
}
}
......@@ -149,7 +149,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
j_cdbs->prev_cpu_idle = cur_idle_time;
if (ignore_nice) {
u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
u64 cur_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
idle_time += div_u64(cur_nice - j_cdbs->prev_cpu_nice, NSEC_PER_USEC);
j_cdbs->prev_cpu_nice = cur_nice;
......@@ -530,7 +530,7 @@ int cpufreq_dbs_governor_start(struct cpufreq_policy *policy)
j_cdbs->prev_load = 0;
if (ignore_nice)
j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
}
gov->start(policy);
......
......@@ -57,11 +57,15 @@ static void led_activity_function(struct timer_list *t)
curr_used = 0;
for_each_possible_cpu(i) {
curr_used += kcpustat_cpu(i).cpustat[CPUTIME_USER]
+ kcpustat_cpu(i).cpustat[CPUTIME_NICE]
+ kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]
+ kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]
+ kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
struct kernel_cpustat kcpustat;
kcpustat_cpu_fetch(&kcpustat, i);
curr_used += kcpustat.cpustat[CPUTIME_USER]
+ kcpustat.cpustat[CPUTIME_NICE]
+ kcpustat.cpustat[CPUTIME_SYSTEM]
+ kcpustat.cpustat[CPUTIME_SOFTIRQ]
+ kcpustat.cpustat[CPUTIME_IRQ];
cpus++;
}
......
......@@ -81,13 +81,14 @@ static int rackmeter_ignore_nice;
*/
static inline u64 get_cpu_idle_time(unsigned int cpu)
{
struct kernel_cpustat *kcpustat = &kcpustat_cpu(cpu);
u64 retval;
retval = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE] +
kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
retval = kcpustat->cpustat[CPUTIME_IDLE] +
kcpustat->cpustat[CPUTIME_IOWAIT];
if (rackmeter_ignore_nice)
retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
retval += kcpustat_field(kcpustat, CPUTIME_NICE, cpu);
return retval;
}
......
......@@ -120,18 +120,21 @@ static int show_stat(struct seq_file *p, void *v)
getboottime64(&boottime);
for_each_possible_cpu(i) {
struct kernel_cpustat *kcs = &kcpustat_cpu(i);
user += kcs->cpustat[CPUTIME_USER];
nice += kcs->cpustat[CPUTIME_NICE];
system += kcs->cpustat[CPUTIME_SYSTEM];
idle += get_idle_time(kcs, i);
iowait += get_iowait_time(kcs, i);
irq += kcs->cpustat[CPUTIME_IRQ];
softirq += kcs->cpustat[CPUTIME_SOFTIRQ];
steal += kcs->cpustat[CPUTIME_STEAL];
guest += kcs->cpustat[CPUTIME_GUEST];
guest_nice += kcs->cpustat[CPUTIME_GUEST_NICE];
struct kernel_cpustat kcpustat;
u64 *cpustat = kcpustat.cpustat;
kcpustat_cpu_fetch(&kcpustat, i);
user += cpustat[CPUTIME_USER];
nice += cpustat[CPUTIME_NICE];
system += cpustat[CPUTIME_SYSTEM];
idle += get_idle_time(&kcpustat, i);
iowait += get_iowait_time(&kcpustat, i);
irq += cpustat[CPUTIME_IRQ];
softirq += cpustat[CPUTIME_SOFTIRQ];
steal += cpustat[CPUTIME_STEAL];
guest += cpustat[CPUTIME_GUEST];
guest_nice += cpustat[CPUTIME_USER];
sum += kstat_cpu_irqs_sum(i);
sum += arch_irq_stat_cpu(i);
......@@ -157,19 +160,22 @@ static int show_stat(struct seq_file *p, void *v)
seq_putc(p, '\n');
for_each_online_cpu(i) {
struct kernel_cpustat *kcs = &kcpustat_cpu(i);
struct kernel_cpustat kcpustat;
u64 *cpustat = kcpustat.cpustat;
kcpustat_cpu_fetch(&kcpustat, i);
/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
user = kcs->cpustat[CPUTIME_USER];
nice = kcs->cpustat[CPUTIME_NICE];
system = kcs->cpustat[CPUTIME_SYSTEM];
idle = get_idle_time(kcs, i);
iowait = get_iowait_time(kcs, i);
irq = kcs->cpustat[CPUTIME_IRQ];
softirq = kcs->cpustat[CPUTIME_SOFTIRQ];
steal = kcs->cpustat[CPUTIME_STEAL];
guest = kcs->cpustat[CPUTIME_GUEST];
guest_nice = kcs->cpustat[CPUTIME_GUEST_NICE];
user = cpustat[CPUTIME_USER];
nice = cpustat[CPUTIME_NICE];
system = cpustat[CPUTIME_SYSTEM];
idle = get_idle_time(&kcpustat, i);
iowait = get_iowait_time(&kcpustat, i);
irq = cpustat[CPUTIME_IRQ];
softirq = cpustat[CPUTIME_SOFTIRQ];
steal = cpustat[CPUTIME_STEAL];
guest = cpustat[CPUTIME_GUEST];
guest_nice = cpustat[CPUTIME_USER];
seq_printf(p, "cpu%d", i);
seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
......
......@@ -22,26 +22,26 @@ extern void context_tracking_user_exit(void);
static inline void user_enter(void)
{
if (context_tracking_is_enabled())
if (context_tracking_enabled())
context_tracking_enter(CONTEXT_USER);
}
static inline void user_exit(void)
{
if (context_tracking_is_enabled())
if (context_tracking_enabled())
context_tracking_exit(CONTEXT_USER);
}
/* Called with interrupts disabled. */
static inline void user_enter_irqoff(void)
{
if (context_tracking_is_enabled())
if (context_tracking_enabled())
__context_tracking_enter(CONTEXT_USER);
}
static inline void user_exit_irqoff(void)
{
if (context_tracking_is_enabled())
if (context_tracking_enabled())
__context_tracking_exit(CONTEXT_USER);
}
......@@ -49,7 +49,7 @@ static inline enum ctx_state exception_enter(void)
{
enum ctx_state prev_ctx;
if (!context_tracking_is_enabled())
if (!context_tracking_enabled())
return 0;
prev_ctx = this_cpu_read(context_tracking.state);
......@@ -61,7 +61,7 @@ static inline enum ctx_state exception_enter(void)
static inline void exception_exit(enum ctx_state prev_ctx)
{
if (context_tracking_is_enabled()) {
if (context_tracking_enabled()) {
if (prev_ctx != CONTEXT_KERNEL)
context_tracking_enter(prev_ctx);
}
......@@ -77,7 +77,7 @@ static inline void exception_exit(enum ctx_state prev_ctx)
*/
static inline enum ctx_state ct_state(void)
{
return context_tracking_is_enabled() ?
return context_tracking_enabled() ?
this_cpu_read(context_tracking.state) : CONTEXT_DISABLED;
}
#else
......@@ -90,7 +90,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) { }
static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
#endif /* !CONFIG_CONTEXT_TRACKING */
#define CT_WARN_ON(cond) WARN_ON(context_tracking_is_enabled() && (cond))
#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
extern void context_tracking_init(void);
......@@ -103,12 +103,12 @@ static inline void context_tracking_init(void) { }
/* must be called with irqs disabled */
static inline void guest_enter_irqoff(void)
{
if (vtime_accounting_cpu_enabled())
if (vtime_accounting_enabled_this_cpu())
vtime_guest_enter(current);
else
current->flags |= PF_VCPU;
if (context_tracking_is_enabled())
if (context_tracking_enabled())
__context_tracking_enter(CONTEXT_GUEST);
/* KVM does not hold any references to rcu protected data when it
......@@ -118,16 +118,16 @@ static inline void guest_enter_irqoff(void)
* one time slice). Lets treat guest mode as quiescent state, just like
* we do with user-mode execution.
*/
if (!context_tracking_cpu_is_enabled())
if (!context_tracking_enabled_this_cpu())
rcu_virt_note_context_switch(smp_processor_id());
}
static inline void guest_exit_irqoff(void)
{
if (context_tracking_is_enabled())
if (context_tracking_enabled())
__context_tracking_exit(CONTEXT_GUEST);
if (vtime_accounting_cpu_enabled())
if (vtime_accounting_enabled_this_cpu())
vtime_guest_exit(current);
else
current->flags &= ~PF_VCPU;
......@@ -141,7 +141,7 @@ static inline void guest_enter_irqoff(void)
* to assume that it's the stime pending cputime
* to flush.
*/
vtime_account_system(current);
vtime_account_kernel(current);
current->flags |= PF_VCPU;
rcu_virt_note_context_switch(smp_processor_id());
}
......@@ -149,7 +149,7 @@ static inline void guest_enter_irqoff(void)
static inline void guest_exit_irqoff(void)
{
/* Flush the guest cputime we spent on the guest */
vtime_account_system(current);
vtime_account_kernel(current);
current->flags &= ~PF_VCPU;
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
......
......@@ -23,17 +23,22 @@ struct context_tracking {
};
#ifdef CONFIG_CONTEXT_TRACKING
extern struct static_key_false context_tracking_enabled;
extern struct static_key_false context_tracking_key;
DECLARE_PER_CPU(struct context_tracking, context_tracking);
static inline bool context_tracking_is_enabled(void)
static inline bool context_tracking_enabled(void)
{
return static_branch_unlikely(&context_tracking_enabled);
return static_branch_unlikely(&context_tracking_key);
}
static inline bool context_tracking_cpu_is_enabled(void)
static inline bool context_tracking_enabled_cpu(int cpu)
{
return __this_cpu_read(context_tracking.active);
return context_tracking_enabled() && per_cpu(context_tracking.active, cpu);
}
static inline bool context_tracking_enabled_this_cpu(void)
{
return context_tracking_enabled() && __this_cpu_read(context_tracking.active);
}
static inline bool context_tracking_in_user(void)
......@@ -42,9 +47,9 @@ static inline bool context_tracking_in_user(void)
}
#else
static inline bool context_tracking_in_user(void) { return false; }
static inline bool context_tracking_active(void) { return false; }
static inline bool context_tracking_is_enabled(void) { return false; }
static inline bool context_tracking_cpu_is_enabled(void) { return false; }
static inline bool context_tracking_enabled(void) { return false; }
static inline bool context_tracking_enabled_cpu(int cpu) { return false; }
static inline bool context_tracking_enabled_this_cpu(void) { return false; }
#endif /* CONFIG_CONTEXT_TRACKING */
#endif
......@@ -78,6 +78,24 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
return kstat_cpu(cpu).irqs_sum;
}
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern u64 kcpustat_field(struct kernel_cpustat *kcpustat,
enum cpu_usage_stat usage, int cpu);
extern void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu);
#else
static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat,
enum cpu_usage_stat usage, int cpu)
{
return kcpustat->cpustat[usage];
}
static inline void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
{
*dst = kcpustat_cpu(cpu);
}
#endif
extern void account_user_time(struct task_struct *, u64);
extern void account_guest_time(struct task_struct *, u64);
extern void account_system_time(struct task_struct *, int, u64);
......
......@@ -250,16 +250,21 @@ struct prev_cputime {
enum vtime_state {
/* Task is sleeping or running in a CPU with VTIME inactive: */
VTIME_INACTIVE = 0,
/* Task runs in userspace in a CPU with VTIME active: */
VTIME_USER,
/* Task is idle */
VTIME_IDLE,
/* Task runs in kernelspace in a CPU with VTIME active: */
VTIME_SYS,
/* Task runs in userspace in a CPU with VTIME active: */
VTIME_USER,
/* Task runs as guests in a CPU with VTIME active: */
VTIME_GUEST,
};
struct vtime {
seqcount_t seqcount;
unsigned long long starttime;
enum vtime_state state;
unsigned int cpu;
u64 utime;
u64 stime;
u64 gtime;
......
......@@ -174,7 +174,7 @@ extern cpumask_var_t tick_nohz_full_mask;
static inline bool tick_nohz_full_enabled(void)
{
if (!context_tracking_is_enabled())
if (!context_tracking_enabled())
return false;
return tick_nohz_full_running;
......
......@@ -11,11 +11,15 @@
struct task_struct;
/*
* vtime_accounting_cpu_enabled() definitions/declarations
* vtime_accounting_enabled_this_cpu() definitions/declarations
*/
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE)
static inline bool vtime_accounting_cpu_enabled(void) { return true; }
static inline bool vtime_accounting_enabled_this_cpu(void) { return true; }
extern void vtime_task_switch(struct task_struct *prev);
#elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
/*
* Checks if vtime is enabled on some CPU. Cputime readers want to be careful
* in that case and compute the tickless cputime.
......@@ -24,46 +28,43 @@ static inline bool vtime_accounting_cpu_enabled(void) { return true; }
*/
static inline bool vtime_accounting_enabled(void)
{
return context_tracking_is_enabled();
return context_tracking_enabled();
}
static inline bool vtime_accounting_cpu_enabled(void)
static inline bool vtime_accounting_enabled_cpu(int cpu)
{
if (vtime_accounting_enabled()) {
if (context_tracking_cpu_is_enabled())
return true;
}
return false;
return context_tracking_enabled_cpu(cpu);
}
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline bool vtime_accounting_cpu_enabled(void) { return false; }
#endif
static inline bool vtime_accounting_enabled_this_cpu(void)
{
return context_tracking_enabled_this_cpu();
}
/*
* Common vtime APIs
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void vtime_task_switch_generic(struct task_struct *prev);
#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
extern void vtime_task_switch(struct task_struct *prev);
#else
extern void vtime_common_task_switch(struct task_struct *prev);
static inline void vtime_task_switch(struct task_struct *prev)
{
if (vtime_accounting_cpu_enabled())
vtime_common_task_switch(prev);
if (vtime_accounting_enabled_this_cpu())
vtime_task_switch_generic(prev);
}
#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
extern void vtime_account_system(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline bool vtime_accounting_enabled_cpu(int cpu) {return false; }
static inline bool vtime_accounting_enabled_this_cpu(void) { return false; }
static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_system(struct task_struct *tsk) { }
#endif
/*
* Common vtime APIs
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void vtime_account_kernel(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline void vtime_account_kernel(struct task_struct *tsk) { }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
......@@ -86,7 +87,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
vtime_account_system(tsk);
vtime_account_kernel(tsk);
}
extern void vtime_flush(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
......
......@@ -65,7 +65,7 @@ config PREEMPT_RT
preemptible priority-inheritance aware variants, enforcing
interrupt threading and introducing mechanisms to break up long
non-preemptible sections. This makes the kernel, except for very
low level and critical code pathes (entry code, scheduler, low
low level and critical code paths (entry code, scheduler, low
level interrupt handling) fully preemptible and brings most
execution contexts under scheduler control.
......
......@@ -25,8 +25,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h>
DEFINE_STATIC_KEY_FALSE(context_tracking_enabled);
EXPORT_SYMBOL_GPL(context_tracking_enabled);
DEFINE_STATIC_KEY_FALSE(context_tracking_key);
EXPORT_SYMBOL_GPL(context_tracking_key);
DEFINE_PER_CPU(struct context_tracking, context_tracking);
EXPORT_SYMBOL_GPL(context_tracking);
......@@ -192,7 +192,7 @@ void __init context_tracking_cpu_set(int cpu)
if (!per_cpu(context_tracking.active, cpu)) {
per_cpu(context_tracking.active, cpu) = true;
static_branch_inc(&context_tracking_enabled);
static_branch_inc(&context_tracking_key);
}
if (initialized)
......
......@@ -811,7 +811,7 @@ static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
}
static inline enum uclamp_id uclamp_none(enum uclamp_id clamp_id)
static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
{
if (clamp_id == UCLAMP_MIN)
return 0;
......@@ -854,7 +854,7 @@ static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id,
}
static inline
enum uclamp_id uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
unsigned int uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
unsigned int clamp_value)
{
struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
......@@ -919,7 +919,7 @@ uclamp_eff_get(struct task_struct *p, enum uclamp_id clamp_id)
return uc_req;
}
enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
{
struct uclamp_se uc_eff;
......@@ -3918,13 +3918,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
prev->sched_class == &fair_sched_class) &&
rq->nr_running == rq->cfs.h_nr_running)) {
p = fair_sched_class.pick_next_task(rq, prev, rf);
p = pick_next_task_fair(rq, prev, rf);
if (unlikely(p == RETRY_TASK))
goto restart;
/* Assumes fair_sched_class->next == idle_sched_class */
if (unlikely(!p))
p = idle_sched_class.pick_next_task(rq, prev, rf);
if (!p) {
put_prev_task(rq, prev);
p = pick_next_task_idle(rq);
}
return p;
}
......@@ -3948,7 +3950,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
put_prev_task(rq, prev);
for_each_class(class) {
p = class->pick_next_task(rq, NULL, NULL);
p = class->pick_next_task(rq);
if (p)
return p;
}
......@@ -6217,7 +6219,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
struct task_struct *next;
for_each_class(class) {
next = class->pick_next_task(rq, NULL, NULL);
next = class->pick_next_task(rq);
if (next) {
next->sched_class->put_prev_task(rq, next);
return next;
......
This diff is collapsed.
......@@ -1743,13 +1743,16 @@ static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
}
#endif
static void set_next_task_dl(struct rq *rq, struct task_struct *p)
static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
{
p->se.exec_start = rq_clock_task(rq);
/* You can't push away the running task */
dequeue_pushable_dl_task(rq, p);
if (!first)
return;
if (hrtick_enabled(rq))
start_hrtick_dl(rq, p);
......@@ -1770,22 +1773,19 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
return rb_entry(left, struct sched_dl_entity, rb_node);
}
static struct task_struct *
pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
static struct task_struct *pick_next_task_dl(struct rq *rq)
{
struct sched_dl_entity *dl_se;
struct dl_rq *dl_rq = &rq->dl;
struct task_struct *p;
WARN_ON_ONCE(prev || rf);
if (!sched_dl_runnable(rq))
return NULL;
dl_se = pick_next_dl_entity(rq, dl_rq);
BUG_ON(!dl_se);
p = dl_task_of(dl_se);
set_next_task_dl(rq, p);
set_next_task_dl(rq, p, true);
return p;
}
......
This diff is collapsed.
......@@ -89,3 +89,4 @@ SCHED_FEAT(WA_BIAS, true)
* UtilEstimation. Use estimated CPU utilization.
*/
SCHED_FEAT(UTIL_EST, true)
SCHED_FEAT(UTIL_EST_FASTUP, true)
......@@ -385,21 +385,17 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
}
static void set_next_task_idle(struct rq *rq, struct task_struct *next)
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
{
update_idle_core(rq);
schedstat_inc(rq->sched_goidle);
}
static struct task_struct *
pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
struct task_struct *pick_next_task_idle(struct rq *rq)
{
struct task_struct *next = rq->idle;
if (prev)
put_prev_task(rq, prev);
set_next_task_idle(rq, next);
set_next_task_idle(rq, next, true);
return next;
}
......
......@@ -1515,13 +1515,16 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
#endif
}
static inline void set_next_task_rt(struct rq *rq, struct task_struct *p)
static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
{
p->se.exec_start = rq_clock_task(rq);
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);
if (!first)
return;
/*
* If prev task was rt, put_prev_task() has already updated the
* utilization. We only care of the case where we start to schedule a
......@@ -1564,18 +1567,15 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
return rt_task_of(rt_se);
}
static struct task_struct *
pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
static struct task_struct *pick_next_task_rt(struct rq *rq)
{
struct task_struct *p;
WARN_ON_ONCE(prev || rf);
if (!sched_rt_runnable(rq))
return NULL;
p = _pick_next_task_rt(rq);
set_next_task_rt(rq, p);
set_next_task_rt(rq, p, true);
return p;
}
......
......@@ -1713,22 +1713,10 @@ struct sched_class {
void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
/*
* Both @prev and @rf are optional and may be NULL, in which case the
* caller must already have invoked put_prev_task(rq, prev, rf).
*
* Otherwise it is the responsibility of the pick_next_task() to call
* put_prev_task() on the @prev task or something equivalent, IFF it
* returns a next task.
*
* In that case (@rf != NULL) it may return RETRY_TASK when it finds a
* higher prio class has runnable tasks.
*/
struct task_struct * (*pick_next_task)(struct rq *rq,
struct task_struct *prev,
struct rq_flags *rf);
struct task_struct *(*pick_next_task)(struct rq *rq);
void (*put_prev_task)(struct rq *rq, struct task_struct *p);
void (*set_next_task)(struct rq *rq, struct task_struct *p);
void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
#ifdef CONFIG_SMP
int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
......@@ -1780,7 +1768,7 @@ static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
static inline void set_next_task(struct rq *rq, struct task_struct *next)
{
WARN_ON_ONCE(rq->curr != next);
next->sched_class->set_next_task(rq, next);
next->sched_class->set_next_task(rq, next, false);
}
#ifdef CONFIG_SMP
......@@ -1821,6 +1809,9 @@ static inline bool sched_fair_runnable(struct rq *rq)
return rq->cfs.nr_running > 0;
}
extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
extern struct task_struct *pick_next_task_idle(struct rq *rq);
#ifdef CONFIG_SMP
extern void update_group_capacity(struct sched_domain *sd, int cpu);
......@@ -2309,7 +2300,7 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
#ifdef CONFIG_UCLAMP_TASK
enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
static __always_inline
unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
......
......@@ -29,20 +29,17 @@ check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
/* we're never preempted */
}
static void set_next_task_stop(struct rq *rq, struct task_struct *stop)
static void set_next_task_stop(struct rq *rq, struct task_struct *stop, bool first)
{
stop->se.exec_start = rq_clock_task(rq);
}
static struct task_struct *
pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
static struct task_struct *pick_next_task_stop(struct rq *rq)
{
WARN_ON_ONCE(prev || rf);
if (!sched_stop_runnable(rq))
return NULL;
set_next_task_stop(rq, rq->stop);
set_next_task_stop(rq, rq->stop, true);
return rq->stop;
}
......
......@@ -1201,16 +1201,13 @@ static void set_domain_attribute(struct sched_domain *sd,
if (!attr || attr->relax_domain_level < 0) {
if (default_relax_domain_level < 0)
return;
else
request = default_relax_domain_level;
} else
request = attr->relax_domain_level;
if (request < sd->level) {
if (sd->level > request) {
/* Turn off idle balance on this domain: */
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
} else {
/* Turn on idle balance on this domain: */
sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
}
}
......
......@@ -1119,7 +1119,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;
if (vtime_accounting_cpu_enabled())
if (vtime_accounting_enabled_this_cpu())
return;
/*
* We stopped the tick in idle. Update process times would miss the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment