Commit 825a3b26 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - massive CPU hotplug rework (Thomas Gleixner)

 - improve migration fairness (Peter Zijlstra)

 - CPU load calculation updates/cleanups (Yuyang Du)

 - cpufreq updates (Steve Muckle)

 - nohz optimizations (Frederic Weisbecker)

 - switch_mm() micro-optimization on x86 (Andy Lutomirski)

 - ... lots of other enhancements, fixes and cleanups.

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (66 commits)
  ARM: Hide finish_arch_post_lock_switch() from modules
  sched/core: Provide a tsk_nr_cpus_allowed() helper
  sched/core: Use tsk_cpus_allowed() instead of accessing ->cpus_allowed
  sched/loadavg: Fix loadavg artifacts on fully idle and on fully loaded systems
  sched/fair: Correct unit of load_above_capacity
  sched/fair: Clean up scale confusion
  sched/nohz: Fix affine unpinned timers mess
  sched/fair: Fix fairness issue on migration
  sched/core: Kill sched_class::task_waking to clean up the migration logic
  sched/fair: Prepare to fix fairness problems on migration
  sched/fair: Move record_wakee()
  sched/core: Fix comment typo in wake_q_add()
  sched/core: Remove unused variable
  sched: Make hrtick_notifier an explicit call
  sched/fair: Make ilb_notifier an explicit call
  sched/hotplug: Make activate() the last hotplug step
  sched/hotplug: Move migration CPU_DYING to sched_cpu_dying()
  sched/migration: Move CPU_ONLINE into scheduler state
  sched/migration: Move calc_load_migrate() into CPU_DYING
  sched/migration: Move prepare transition to SCHED_STARTING state
  ...
parents cf6ed9a6 ef0491ea
...@@ -1562,12 +1562,12 @@ Doing the same with chrt -r 5 and function-trace set. ...@@ -1562,12 +1562,12 @@ Doing the same with chrt -r 5 and function-trace set.
<idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit <idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit
<idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit <idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit
<idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit <idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit
<idle>-0 3dN.1 13us : update_cpu_load_nohz <-tick_nohz_idle_exit <idle>-0 3dN.1 13us : cpu_load_update_nohz <-tick_nohz_idle_exit
<idle>-0 3dN.1 13us : _raw_spin_lock <-update_cpu_load_nohz <idle>-0 3dN.1 13us : _raw_spin_lock <-cpu_load_update_nohz
<idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock <idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock
<idle>-0 3dN.2 13us : __update_cpu_load <-update_cpu_load_nohz <idle>-0 3dN.2 13us : __cpu_load_update <-cpu_load_update_nohz
<idle>-0 3dN.2 14us : sched_avg_update <-__update_cpu_load <idle>-0 3dN.2 14us : sched_avg_update <-__cpu_load_update
<idle>-0 3dN.2 14us : _raw_spin_unlock <-update_cpu_load_nohz <idle>-0 3dN.2 14us : _raw_spin_unlock <-cpu_load_update_nohz
<idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock <idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock
<idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit <idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit
<idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit <idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/preempt.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/cachetype.h> #include <asm/cachetype.h>
#include <asm/proc-fns.h> #include <asm/proc-fns.h>
...@@ -66,6 +67,7 @@ static inline void check_and_switch_context(struct mm_struct *mm, ...@@ -66,6 +67,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,
cpu_switch_mm(mm->pgd, mm); cpu_switch_mm(mm->pgd, mm);
} }
#ifndef MODULE
#define finish_arch_post_lock_switch \ #define finish_arch_post_lock_switch \
finish_arch_post_lock_switch finish_arch_post_lock_switch
static inline void finish_arch_post_lock_switch(void) static inline void finish_arch_post_lock_switch(void)
...@@ -87,6 +89,7 @@ static inline void finish_arch_post_lock_switch(void) ...@@ -87,6 +89,7 @@ static inline void finish_arch_post_lock_switch(void)
preempt_enable_no_resched(); preempt_enable_no_resched();
} }
} }
#endif /* !MODULE */
#endif /* CONFIG_MMU */ #endif /* CONFIG_MMU */
......
...@@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) ...@@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
smp_ops->give_timebase(); smp_ops->give_timebase();
/* Wait until cpu puts itself in the online & active maps */ /* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu) || !cpu_active(cpu)) while (!cpu_online(cpu))
cpu_relax(); cpu_relax();
return 0; return 0;
......
...@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) ...@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
pcpu_attach_task(pcpu, tidle); pcpu_attach_task(pcpu, tidle);
pcpu_start_fn(pcpu, smp_start_secondary, NULL); pcpu_start_fn(pcpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */ /* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu) || !cpu_active(cpu)) while (!cpu_online(cpu))
cpu_relax(); cpu_relax();
return 0; return 0;
} }
......
...@@ -2183,7 +2183,7 @@ void arch_perf_update_userpage(struct perf_event *event, ...@@ -2183,7 +2183,7 @@ void arch_perf_update_userpage(struct perf_event *event,
* cap_user_time_zero doesn't make sense when we're using a different * cap_user_time_zero doesn't make sense when we're using a different
* time base for the records. * time base for the records.
*/ */
if (event->clock == &local_clock) { if (!event->attr.use_clockid) {
userpg->cap_user_time_zero = 1; userpg->cap_user_time_zero = 1;
userpg->time_zero = data->cyc2ns_offset; userpg->time_zero = data->cyc2ns_offset;
} }
......
...@@ -115,103 +115,12 @@ static inline void destroy_context(struct mm_struct *mm) ...@@ -115,103 +115,12 @@ static inline void destroy_context(struct mm_struct *mm)
destroy_context_ldt(mm); destroy_context_ldt(mm);
} }
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk) struct task_struct *tsk);
{
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
#ifdef CONFIG_SMP
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
this_cpu_write(cpu_tlbstate.active_mm, next);
#endif
cpumask_set_cpu(cpu, mm_cpumask(next));
/*
* Re-load page tables.
*
* This logic has an ordering constraint:
*
* CPU 0: Write to a PTE for 'next'
* CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
* CPU 1: set bit 1 in next's mm_cpumask
* CPU 1: load from the PTE that CPU 0 writes (implicit)
*
* We need to prevent an outcome in which CPU 1 observes
* the new PTE value and CPU 0 observes bit 1 clear in
* mm_cpumask. (If that occurs, then the IPI will never
* be sent, and CPU 0's TLB will contain a stale entry.)
*
* The bad outcome can occur if either CPU's load is
* reordered before that CPU's store, so both CPUs must
* execute full barriers to prevent this from happening.
*
* Thus, switch_mm needs a full barrier between the
* store to mm_cpumask and any operation that could load
* from next->pgd. TLB fills are special and can happen
* due to instruction fetches or for no reason at all,
* and neither LOCK nor MFENCE orders them.
* Fortunately, load_cr3() is serializing and gives the
* ordering guarantee we need.
*
*/
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
/* Stop flush ipis for the previous mm */
cpumask_clear_cpu(cpu, mm_cpumask(prev));
/* Load per-mm CR4 state */
load_mm_cr4(next);
#ifdef CONFIG_MODIFY_LDT_SYSCALL extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
/* struct task_struct *tsk);
* Load the LDT, if the LDT is different. #define switch_mm_irqs_off switch_mm_irqs_off
*
* It's possible that prev->context.ldt doesn't match
* the LDT register. This can happen if leave_mm(prev)
* was called and then modify_ldt changed
* prev->context.ldt but suppressed an IPI to this CPU.
* In this case, prev->context.ldt != NULL, because we
* never set context.ldt to NULL while the mm still
* exists. That means that next->context.ldt !=
* prev->context.ldt, because mms never share an LDT.
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_mm_ldt(next);
#endif
}
#ifdef CONFIG_SMP
else {
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
/*
* On established mms, the mm_cpumask is only changed
* from irq context, from ptep_clear_flush() while in
* lazy tlb mode, and here. Irqs are blocked during
* schedule, protecting us from simultaneous changes.
*/
cpumask_set_cpu(cpu, mm_cpumask(next));
/*
* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
*
* As above, load_cr3() is serializing and orders TLB
* fills with respect to the mm_cpumask write.
*/
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
load_mm_cr4(next);
load_mm_ldt(next);
}
}
#endif
}
#define activate_mm(prev, next) \ #define activate_mm(prev, next) \
do { \ do { \
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
KCOV_INSTRUMENT_tlb.o := n KCOV_INSTRUMENT_tlb.o := n
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o physaddr.o gup.o setup_nx.o pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
# Make sure __phys_addr has no stackprotector # Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector) nostackp := $(call cc-option, -fno-stack-protector)
...@@ -12,7 +12,6 @@ CFLAGS_setup_nx.o := $(nostackp) ...@@ -12,7 +12,6 @@ CFLAGS_setup_nx.o := $(nostackp)
CFLAGS_fault.o := -I$(src)/../include/asm/trace CFLAGS_fault.o := -I$(src)/../include/asm/trace
obj-$(CONFIG_X86_PAT) += pat_rbtree.o obj-$(CONFIG_X86_PAT) += pat_rbtree.o
obj-$(CONFIG_SMP) += tlb.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
......
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/ */
#ifdef CONFIG_SMP
struct flush_tlb_info { struct flush_tlb_info {
struct mm_struct *flush_mm; struct mm_struct *flush_mm;
unsigned long flush_start; unsigned long flush_start;
...@@ -57,6 +59,118 @@ void leave_mm(int cpu) ...@@ -57,6 +59,118 @@ void leave_mm(int cpu)
} }
EXPORT_SYMBOL_GPL(leave_mm); EXPORT_SYMBOL_GPL(leave_mm);
#endif /* CONFIG_SMP */
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned long flags;
local_irq_save(flags);
switch_mm_irqs_off(prev, next, tsk);
local_irq_restore(flags);
}
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
#ifdef CONFIG_SMP
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
this_cpu_write(cpu_tlbstate.active_mm, next);
#endif
cpumask_set_cpu(cpu, mm_cpumask(next));
/*
* Re-load page tables.
*
* This logic has an ordering constraint:
*
* CPU 0: Write to a PTE for 'next'
* CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
* CPU 1: set bit 1 in next's mm_cpumask
* CPU 1: load from the PTE that CPU 0 writes (implicit)
*
* We need to prevent an outcome in which CPU 1 observes
* the new PTE value and CPU 0 observes bit 1 clear in
* mm_cpumask. (If that occurs, then the IPI will never
* be sent, and CPU 0's TLB will contain a stale entry.)
*
* The bad outcome can occur if either CPU's load is
* reordered before that CPU's store, so both CPUs must
* execute full barriers to prevent this from happening.
*
* Thus, switch_mm needs a full barrier between the
* store to mm_cpumask and any operation that could load
* from next->pgd. TLB fills are special and can happen
* due to instruction fetches or for no reason at all,
* and neither LOCK nor MFENCE orders them.
* Fortunately, load_cr3() is serializing and gives the
* ordering guarantee we need.
*
*/
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
/* Stop flush ipis for the previous mm */
cpumask_clear_cpu(cpu, mm_cpumask(prev));
/* Load per-mm CR4 state */
load_mm_cr4(next);
#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
* Load the LDT, if the LDT is different.
*
* It's possible that prev->context.ldt doesn't match
* the LDT register. This can happen if leave_mm(prev)
* was called and then modify_ldt changed
* prev->context.ldt but suppressed an IPI to this CPU.
* In this case, prev->context.ldt != NULL, because we
* never set context.ldt to NULL while the mm still
* exists. That means that next->context.ldt !=
* prev->context.ldt, because mms never share an LDT.
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_mm_ldt(next);
#endif
}
#ifdef CONFIG_SMP
else {
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
/*
* On established mms, the mm_cpumask is only changed
* from irq context, from ptep_clear_flush() while in
* lazy tlb mode, and here. Irqs are blocked during
* schedule, protecting us from simultaneous changes.
*/
cpumask_set_cpu(cpu, mm_cpumask(next));
/*
* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
*
* As above, load_cr3() is serializing and orders TLB
* fills with respect to the mm_cpumask write.
*/
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
load_mm_cr4(next);
load_mm_ldt(next);
}
}
#endif
}
#ifdef CONFIG_SMP
/* /*
* The flush IPI assumes that a thread switch happens in this order: * The flush IPI assumes that a thread switch happens in this order:
* [cpu0: the cpu that switches] * [cpu0: the cpu that switches]
...@@ -353,3 +467,5 @@ static int __init create_tlb_single_page_flush_ceiling(void) ...@@ -353,3 +467,5 @@ static int __init create_tlb_single_page_flush_ceiling(void)
return 0; return 0;
} }
late_initcall(create_tlb_single_page_flush_ceiling); late_initcall(create_tlb_single_page_flush_ceiling);
#endif /* CONFIG_SMP */
...@@ -59,25 +59,7 @@ struct notifier_block; ...@@ -59,25 +59,7 @@ struct notifier_block;
* CPU notifier priorities. * CPU notifier priorities.
*/ */
enum { enum {
/*
* SCHED_ACTIVE marks a cpu which is coming up active during
* CPU_ONLINE and CPU_DOWN_FAILED and must be the first
* notifier. CPUSET_ACTIVE adjusts cpuset according to
* cpu_active mask right after SCHED_ACTIVE. During
* CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
* ordered in the similar way.
*
* This ordering guarantees consistent cpu_active mask and
* migration behavior to all cpu notifiers.
*/
CPU_PRI_SCHED_ACTIVE = INT_MAX,
CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
CPU_PRI_CPUSET_INACTIVE = INT_MIN,
/* migration should happen before other stuff but after perf */
CPU_PRI_PERF = 20, CPU_PRI_PERF = 20,
CPU_PRI_MIGRATION = 10,
/* bring up workqueues before normal notifiers and down after */ /* bring up workqueues before normal notifiers and down after */
CPU_PRI_WORKQUEUE_UP = 5, CPU_PRI_WORKQUEUE_UP = 5,
......
...@@ -8,6 +8,7 @@ enum cpuhp_state { ...@@ -8,6 +8,7 @@ enum cpuhp_state {
CPUHP_BRINGUP_CPU, CPUHP_BRINGUP_CPU,
CPUHP_AP_IDLE_DEAD, CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE, CPUHP_AP_OFFLINE,
CPUHP_AP_SCHED_STARTING,
CPUHP_AP_NOTIFY_STARTING, CPUHP_AP_NOTIFY_STARTING,
CPUHP_AP_ONLINE, CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU, CPUHP_TEARDOWN_CPU,
...@@ -16,6 +17,7 @@ enum cpuhp_state { ...@@ -16,6 +17,7 @@ enum cpuhp_state {
CPUHP_AP_NOTIFY_ONLINE, CPUHP_AP_NOTIFY_ONLINE,
CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
CPUHP_AP_ACTIVE,
CPUHP_ONLINE, CPUHP_ONLINE,
}; };
......
...@@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present) ...@@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present)
static inline void static inline void
set_cpu_online(unsigned int cpu, bool online) set_cpu_online(unsigned int cpu, bool online)
{ {
if (online) { if (online)
cpumask_set_cpu(cpu, &__cpu_online_mask); cpumask_set_cpu(cpu, &__cpu_online_mask);
cpumask_set_cpu(cpu, &__cpu_active_mask); else
} else {
cpumask_clear_cpu(cpu, &__cpu_online_mask); cpumask_clear_cpu(cpu, &__cpu_online_mask);
}
} }
static inline void static inline void
......
...@@ -356,8 +356,13 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask); ...@@ -356,8 +356,13 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
extern void lockdep_clear_current_reclaim_state(void); extern void lockdep_clear_current_reclaim_state(void);
extern void lockdep_trace_alloc(gfp_t mask); extern void lockdep_trace_alloc(gfp_t mask);
extern void lock_pin_lock(struct lockdep_map *lock); struct pin_cookie { unsigned int val; };
extern void lock_unpin_lock(struct lockdep_map *lock);
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0, # define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
...@@ -374,7 +379,8 @@ extern void lock_unpin_lock(struct lockdep_map *lock); ...@@ -374,7 +379,8 @@ extern void lock_unpin_lock(struct lockdep_map *lock);
#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map) #define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map)
#define lockdep_unpin_lock(l) lock_unpin_lock(&(l)->dep_map) #define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c))
#define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c))
#else /* !CONFIG_LOCKDEP */ #else /* !CONFIG_LOCKDEP */
...@@ -427,8 +433,13 @@ struct lock_class_key { }; ...@@ -427,8 +433,13 @@ struct lock_class_key { };
#define lockdep_recursing(tsk) (0) #define lockdep_recursing(tsk) (0)
#define lockdep_pin_lock(l) do { (void)(l); } while (0) struct pin_cookie { };
#define lockdep_unpin_lock(l) do { (void)(l); } while (0)
#define NIL_COOKIE (struct pin_cookie){ }
#define lockdep_pin_lock(l) ({ struct pin_cookie cookie; cookie; })
#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0)
#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0)
#endif /* !LOCKDEP */ #endif /* !LOCKDEP */
......
#ifndef _LINUX_MMU_CONTEXT_H #ifndef _LINUX_MMU_CONTEXT_H
#define _LINUX_MMU_CONTEXT_H #define _LINUX_MMU_CONTEXT_H
#include <asm/mmu_context.h>
struct mm_struct; struct mm_struct;
void use_mm(struct mm_struct *mm); void use_mm(struct mm_struct *mm);
void unuse_mm(struct mm_struct *mm); void unuse_mm(struct mm_struct *mm);
/* Architectures that care about IRQ state in switch_mm can override this. */
#ifndef switch_mm_irqs_off
# define switch_mm_irqs_off switch_mm
#endif
#endif #endif
...@@ -177,9 +177,11 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); ...@@ -177,9 +177,11 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
extern void calc_global_load(unsigned long ticks); extern void calc_global_load(unsigned long ticks);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void update_cpu_load_nohz(int active); extern void cpu_load_update_nohz_start(void);
extern void cpu_load_update_nohz_stop(void);
#else #else
static inline void update_cpu_load_nohz(int active) { } static inline void cpu_load_update_nohz_start(void) { }
static inline void cpu_load_update_nohz_stop(void) { }
#endif #endif
extern void dump_cpu_task(int cpu); extern void dump_cpu_task(int cpu);
...@@ -371,6 +373,15 @@ extern void cpu_init (void); ...@@ -371,6 +373,15 @@ extern void cpu_init (void);
extern void trap_init(void); extern void trap_init(void);
extern void update_process_times(int user); extern void update_process_times(int user);
extern void scheduler_tick(void); extern void scheduler_tick(void);
extern int sched_cpu_starting(unsigned int cpu);
extern int sched_cpu_activate(unsigned int cpu);
extern int sched_cpu_deactivate(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
extern int sched_cpu_dying(unsigned int cpu);
#else
# define sched_cpu_dying NULL
#endif
extern void sched_show_task(struct task_struct *p); extern void sched_show_task(struct task_struct *p);
...@@ -933,10 +944,20 @@ enum cpu_idle_type { ...@@ -933,10 +944,20 @@ enum cpu_idle_type {
CPU_MAX_IDLE_TYPES CPU_MAX_IDLE_TYPES
}; };
/*
* Integer metrics need fixed point arithmetic, e.g., sched/fair
* has a few: load, load_avg, util_avg, freq, and capacity.
*
* We define a basic fixed point arithmetic range, and then formalize
* all these metrics based on that basic range.
*/
# define SCHED_FIXEDPOINT_SHIFT 10
# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
/* /*
* Increase resolution of cpu_capacity calculations * Increase resolution of cpu_capacity calculations
*/ */
#define SCHED_CAPACITY_SHIFT 10 #define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
/* /*
...@@ -1198,18 +1219,56 @@ struct load_weight { ...@@ -1198,18 +1219,56 @@ struct load_weight {
}; };
/* /*
* The load_avg/util_avg accumulates an infinite geometric series. * The load_avg/util_avg accumulates an infinite geometric series
* 1) load_avg factors frequency scaling into the amount of time that a * (see __update_load_avg() in kernel/sched/fair.c).
* sched_entity is runnable on a rq into its weight. For cfs_rq, it is the *
* aggregated such weights of all runnable and blocked sched_entities. * [load_avg definition]
* 2) util_avg factors frequency and cpu scaling into the amount of time *
* that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE]. * load_avg = runnable% * scale_load_down(load)
* For cfs_rq, it is the aggregated such times of all runnable and *
* where runnable% is the time ratio that a sched_entity is runnable.
* For cfs_rq, it is the aggregated load_avg of all runnable and
* blocked sched_entities. * blocked sched_entities.
* The 64 bit load_sum can: *
* 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with * load_avg may also take frequency scaling into account:
* the highest weight (=88761) always runnable, we should not overflow *
* 2) for entity, support any load.weight always runnable * load_avg = runnable% * scale_load_down(load) * freq%
*
* where freq% is the CPU frequency normalized to the highest frequency.
*
* [util_avg definition]
*
* util_avg = running% * SCHED_CAPACITY_SCALE
*
* where running% is the time ratio that a sched_entity is running on
* a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
* and blocked sched_entities.
*
* util_avg may also factor frequency scaling and CPU capacity scaling:
*
* util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
*
* where freq% is the same as above, and capacity% is the CPU capacity
* normalized to the greatest capacity (due to uarch differences, etc).
*
* N.B., the above ratios (runnable%, running%, freq%, and capacity%)
* themselves are in the range of [0, 1]. To do fixed point arithmetics,
* we therefore scale them to as large a range as necessary. This is for
* example reflected by util_avg's SCHED_CAPACITY_SCALE.
*
* [Overflow issue]
*
* The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
* with the highest load (=88761), always runnable on a single cfs_rq,
* and should not overflow as the number already hits PID_MAX_LIMIT.
*
* For all other cases (including 32-bit kernels), struct load_weight's
* weight will overflow first before we do, because:
*
* Max(load_avg) <= Max(load.weight)
*
* Then it is the load_weight's responsibility to consider overflow
* issues.
*/ */
struct sched_avg { struct sched_avg {
u64 last_update_time, load_sum; u64 last_update_time, load_sum;
...@@ -1871,6 +1930,11 @@ extern int arch_task_struct_size __read_mostly; ...@@ -1871,6 +1930,11 @@ extern int arch_task_struct_size __read_mostly;
/* Future-safe accessor for struct task_struct's cpus_allowed. */ /* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
static inline int tsk_nr_cpus_allowed(struct task_struct *p)
{
return p->nr_cpus_allowed;
}
#define TNF_MIGRATED 0x01 #define TNF_MIGRATED 0x01
#define TNF_NO_GROUP 0x02 #define TNF_NO_GROUP 0x02
#define TNF_SHARED 0x04 #define TNF_SHARED 0x04
...@@ -2303,8 +2367,6 @@ extern unsigned long long notrace sched_clock(void); ...@@ -2303,8 +2367,6 @@ extern unsigned long long notrace sched_clock(void);
/* /*
* See the comment in kernel/sched/clock.c * See the comment in kernel/sched/clock.c
*/ */
extern u64 cpu_clock(int cpu);
extern u64 local_clock(void);
extern u64 running_clock(void); extern u64 running_clock(void);
extern u64 sched_clock_cpu(int cpu); extern u64 sched_clock_cpu(int cpu);
...@@ -2323,6 +2385,16 @@ static inline void sched_clock_idle_sleep_event(void) ...@@ -2323,6 +2385,16 @@ static inline void sched_clock_idle_sleep_event(void)
static inline void sched_clock_idle_wakeup_event(u64 delta_ns) static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
{ {
} }
static inline u64 cpu_clock(int cpu)
{
return sched_clock();
}
static inline u64 local_clock(void)
{
return sched_clock();
}
#else #else
/* /*
* Architectures can set this to 1 if they have specified * Architectures can set this to 1 if they have specified
...@@ -2337,6 +2409,26 @@ extern void clear_sched_clock_stable(void); ...@@ -2337,6 +2409,26 @@ extern void clear_sched_clock_stable(void);
extern void sched_clock_tick(void); extern void sched_clock_tick(void);
extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns); extern void sched_clock_idle_wakeup_event(u64 delta_ns);
/*
* As outlined in clock.c, provides a fast, high resolution, nanosecond
* time source that is monotonic per cpu argument and has bounded drift
* between cpus.
*
* ######################### BIG FAT WARNING ##########################
* # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
* # go backwards !! #
* ####################################################################
*/
static inline u64 cpu_clock(int cpu)
{
return sched_clock_cpu(cpu);
}
static inline u64 local_clock(void)
{
return sched_clock_cpu(raw_smp_processor_id());
}
#endif #endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
......
...@@ -703,21 +703,6 @@ static int takedown_cpu(unsigned int cpu) ...@@ -703,21 +703,6 @@ static int takedown_cpu(unsigned int cpu)
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int err; int err;
/*
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
* and RCU users of this state to go away such that all new such users
* will observe it.
*
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
* not imply sync_sched(), so wait for both.
*
* Do sync before park smpboot threads to take care the rcu boost case.
*/
if (IS_ENABLED(CONFIG_PREEMPT))
synchronize_rcu_mult(call_rcu, call_rcu_sched);
else
synchronize_rcu();
/* Park the smpboot threads */ /* Park the smpboot threads */
kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
smpboot_park_threads(cpu); smpboot_park_threads(cpu);
...@@ -923,8 +908,6 @@ void cpuhp_online_idle(enum cpuhp_state state) ...@@ -923,8 +908,6 @@ void cpuhp_online_idle(enum cpuhp_state state)
st->state = CPUHP_AP_ONLINE_IDLE; st->state = CPUHP_AP_ONLINE_IDLE;
/* The cpu is marked online, set it active now */
set_cpu_active(cpu, true);
/* Unpark the stopper thread and the hotplug thread of this cpu */ /* Unpark the stopper thread and the hotplug thread of this cpu */
stop_machine_unpark(cpu); stop_machine_unpark(cpu);
kthread_unpark(st->thread); kthread_unpark(st->thread);
...@@ -1236,6 +1219,12 @@ static struct cpuhp_step cpuhp_ap_states[] = { ...@@ -1236,6 +1219,12 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.name = "ap:offline", .name = "ap:offline",
.cant_stop = true, .cant_stop = true,
}, },
/* First state is scheduler control. Interrupts are disabled */
[CPUHP_AP_SCHED_STARTING] = {
.name = "sched:starting",
.startup = sched_cpu_starting,
.teardown = sched_cpu_dying,
},
/* /*
* Low level startup/teardown notifiers. Run with interrupts * Low level startup/teardown notifiers. Run with interrupts
* disabled. Will be removed once the notifiers are converted to * disabled. Will be removed once the notifiers are converted to
...@@ -1274,6 +1263,15 @@ static struct cpuhp_step cpuhp_ap_states[] = { ...@@ -1274,6 +1263,15 @@ static struct cpuhp_step cpuhp_ap_states[] = {
* The dynamically registered state space is here * The dynamically registered state space is here
*/ */
#ifdef CONFIG_SMP
/* Last state is scheduler control setting the cpu active */
[CPUHP_AP_ACTIVE] = {
.name = "sched:active",
.startup = sched_cpu_activate,
.teardown = sched_cpu_deactivate,
},
#endif
/* CPU is fully up and running. */ /* CPU is fully up and running. */
[CPUHP_ONLINE] = { [CPUHP_ONLINE] = {
.name = "online", .name = "online",
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/kmemcheck.h> #include <linux/kmemcheck.h>
#include <linux/random.h>
#include <asm/sections.h> #include <asm/sections.h>
...@@ -3585,7 +3586,35 @@ static int __lock_is_held(struct lockdep_map *lock) ...@@ -3585,7 +3586,35 @@ static int __lock_is_held(struct lockdep_map *lock)
return 0; return 0;
} }
static void __lock_pin_lock(struct lockdep_map *lock) static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock)
{
struct pin_cookie cookie = NIL_COOKIE;
struct task_struct *curr = current;
int i;
if (unlikely(!debug_locks))
return cookie;
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock)) {
/*
* Grab 16bits of randomness; this is sufficient to not
* be guessable and still allows some pin nesting in
* our u32 pin_count.
*/
cookie.val = 1 + (prandom_u32() >> 16);
hlock->pin_count += cookie.val;
return cookie;
}
}
WARN(1, "pinning an unheld lock\n");
return cookie;
}
static void __lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
{ {
struct task_struct *curr = current; struct task_struct *curr = current;
int i; int i;
...@@ -3597,7 +3626,7 @@ static void __lock_pin_lock(struct lockdep_map *lock) ...@@ -3597,7 +3626,7 @@ static void __lock_pin_lock(struct lockdep_map *lock)
struct held_lock *hlock = curr->held_locks + i; struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock)) { if (match_held_lock(hlock, lock)) {
hlock->pin_count++; hlock->pin_count += cookie.val;
return; return;
} }
} }
...@@ -3605,7 +3634,7 @@ static void __lock_pin_lock(struct lockdep_map *lock) ...@@ -3605,7 +3634,7 @@ static void __lock_pin_lock(struct lockdep_map *lock)
WARN(1, "pinning an unheld lock\n"); WARN(1, "pinning an unheld lock\n");
} }
static void __lock_unpin_lock(struct lockdep_map *lock) static void __lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
{ {
struct task_struct *curr = current; struct task_struct *curr = current;
int i; int i;
...@@ -3620,7 +3649,11 @@ static void __lock_unpin_lock(struct lockdep_map *lock) ...@@ -3620,7 +3649,11 @@ static void __lock_unpin_lock(struct lockdep_map *lock)
if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n")) if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n"))
return; return;
hlock->pin_count--; hlock->pin_count -= cookie.val;
if (WARN((int)hlock->pin_count < 0, "pin count corrupted\n"))
hlock->pin_count = 0;
return; return;
} }
} }
...@@ -3751,24 +3784,44 @@ int lock_is_held(struct lockdep_map *lock) ...@@ -3751,24 +3784,44 @@ int lock_is_held(struct lockdep_map *lock)
} }
EXPORT_SYMBOL_GPL(lock_is_held); EXPORT_SYMBOL_GPL(lock_is_held);
void lock_pin_lock(struct lockdep_map *lock) struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
{ {
struct pin_cookie cookie = NIL_COOKIE;
unsigned long flags; unsigned long flags;
if (unlikely(current->lockdep_recursion)) if (unlikely(current->lockdep_recursion))
return; return cookie;
raw_local_irq_save(flags); raw_local_irq_save(flags);
check_flags(flags); check_flags(flags);
current->lockdep_recursion = 1; current->lockdep_recursion = 1;
__lock_pin_lock(lock); cookie = __lock_pin_lock(lock);
current->lockdep_recursion = 0; current->lockdep_recursion = 0;
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
return cookie;
} }
EXPORT_SYMBOL_GPL(lock_pin_lock); EXPORT_SYMBOL_GPL(lock_pin_lock);
void lock_unpin_lock(struct lockdep_map *lock) void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
__lock_repin_lock(lock, cookie);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(lock_repin_lock);
void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
{ {
unsigned long flags; unsigned long flags;
...@@ -3779,7 +3832,7 @@ void lock_unpin_lock(struct lockdep_map *lock) ...@@ -3779,7 +3832,7 @@ void lock_unpin_lock(struct lockdep_map *lock)
check_flags(flags); check_flags(flags);
current->lockdep_recursion = 1; current->lockdep_recursion = 1;
__lock_unpin_lock(lock); __lock_unpin_lock(lock, cookie);
current->lockdep_recursion = 0; current->lockdep_recursion = 0;
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
} }
......
...@@ -318,6 +318,7 @@ u64 sched_clock_cpu(int cpu) ...@@ -318,6 +318,7 @@ u64 sched_clock_cpu(int cpu)
return clock; return clock;
} }
EXPORT_SYMBOL_GPL(sched_clock_cpu);
void sched_clock_tick(void) void sched_clock_tick(void)
{ {
...@@ -363,39 +364,6 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) ...@@ -363,39 +364,6 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
} }
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
/*
* As outlined at the top, provides a fast, high resolution, nanosecond
* time source that is monotonic per cpu argument and has bounded drift
* between cpus.
*
* ######################### BIG FAT WARNING ##########################
* # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
* # go backwards !! #
* ####################################################################
*/
u64 cpu_clock(int cpu)
{
if (!sched_clock_stable())
return sched_clock_cpu(cpu);
return sched_clock();
}
/*
* Similar to cpu_clock() for the current cpu. Time will only be observed
* to be monotonic if care is taken to only compare timestampt taken on the
* same CPU.
*
* See cpu_clock().
*/
u64 local_clock(void)
{
if (!sched_clock_stable())
return sched_clock_cpu(raw_smp_processor_id());
return sched_clock();
}
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
void sched_clock_init(void) void sched_clock_init(void)
...@@ -410,22 +378,8 @@ u64 sched_clock_cpu(int cpu) ...@@ -410,22 +378,8 @@ u64 sched_clock_cpu(int cpu)
return sched_clock(); return sched_clock();
} }
u64 cpu_clock(int cpu)
{
return sched_clock();
}
u64 local_clock(void)
{
return sched_clock();
}
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
EXPORT_SYMBOL_GPL(cpu_clock);
EXPORT_SYMBOL_GPL(local_clock);
/* /*
* Running clock - returns the time that has elapsed while a guest has been * Running clock - returns the time that has elapsed while a guest has been
* running. * running.
......
This diff is collapsed.
...@@ -25,11 +25,22 @@ enum cpuacct_stat_index { ...@@ -25,11 +25,22 @@ enum cpuacct_stat_index {
CPUACCT_STAT_NSTATS, CPUACCT_STAT_NSTATS,
}; };
enum cpuacct_usage_index {
CPUACCT_USAGE_USER, /* ... user mode */
CPUACCT_USAGE_SYSTEM, /* ... kernel mode */
CPUACCT_USAGE_NRUSAGE,
};
struct cpuacct_usage {
u64 usages[CPUACCT_USAGE_NRUSAGE];
};
/* track cpu usage of a group of tasks and its child groups */ /* track cpu usage of a group of tasks and its child groups */
struct cpuacct { struct cpuacct {
struct cgroup_subsys_state css; struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */ /* cpuusage holds pointer to a u64-type object on every cpu */
u64 __percpu *cpuusage; struct cpuacct_usage __percpu *cpuusage;
struct kernel_cpustat __percpu *cpustat; struct kernel_cpustat __percpu *cpustat;
}; };
...@@ -49,7 +60,7 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca) ...@@ -49,7 +60,7 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca)
return css_ca(ca->css.parent); return css_ca(ca->css.parent);
} }
static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
static struct cpuacct root_cpuacct = { static struct cpuacct root_cpuacct = {
.cpustat = &kernel_cpustat, .cpustat = &kernel_cpustat,
.cpuusage = &root_cpuacct_cpuusage, .cpuusage = &root_cpuacct_cpuusage,
...@@ -68,7 +79,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -68,7 +79,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
if (!ca) if (!ca)
goto out; goto out;
ca->cpuusage = alloc_percpu(u64); ca->cpuusage = alloc_percpu(struct cpuacct_usage);
if (!ca->cpuusage) if (!ca->cpuusage)
goto out_free_ca; goto out_free_ca;
...@@ -96,20 +107,37 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css) ...@@ -96,20 +107,37 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
kfree(ca); kfree(ca);
} }
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
enum cpuacct_usage_index index)
{ {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
u64 data; u64 data;
/*
* We allow index == CPUACCT_USAGE_NRUSAGE here to read
* the sum of suages.
*/
BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
/* /*
* Take rq->lock to make 64-bit read safe on 32-bit platforms. * Take rq->lock to make 64-bit read safe on 32-bit platforms.
*/ */
raw_spin_lock_irq(&cpu_rq(cpu)->lock); raw_spin_lock_irq(&cpu_rq(cpu)->lock);
data = *cpuusage; #endif
if (index == CPUACCT_USAGE_NRUSAGE) {
int i = 0;
data = 0;
for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
data += cpuusage->usages[i];
} else {
data = cpuusage->usages[index];
}
#ifndef CONFIG_64BIT
raw_spin_unlock_irq(&cpu_rq(cpu)->lock); raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#else
data = *cpuusage;
#endif #endif
return data; return data;
...@@ -117,69 +145,103 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) ...@@ -117,69 +145,103 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
{ {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
int i;
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
/* /*
* Take rq->lock to make 64-bit write safe on 32-bit platforms. * Take rq->lock to make 64-bit write safe on 32-bit platforms.
*/ */
raw_spin_lock_irq(&cpu_rq(cpu)->lock); raw_spin_lock_irq(&cpu_rq(cpu)->lock);
*cpuusage = val; #endif
for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
cpuusage->usages[i] = val;
#ifndef CONFIG_64BIT
raw_spin_unlock_irq(&cpu_rq(cpu)->lock); raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#else
*cpuusage = val;
#endif #endif
} }
/* return total cpu usage (in nanoseconds) of a group */ /* return total cpu usage (in nanoseconds) of a group */
static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) static u64 __cpuusage_read(struct cgroup_subsys_state *css,
enum cpuacct_usage_index index)
{ {
struct cpuacct *ca = css_ca(css); struct cpuacct *ca = css_ca(css);
u64 totalcpuusage = 0; u64 totalcpuusage = 0;
int i; int i;
for_each_present_cpu(i) for_each_possible_cpu(i)
totalcpuusage += cpuacct_cpuusage_read(ca, i); totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
return totalcpuusage; return totalcpuusage;
} }
static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
return __cpuusage_read(css, CPUACCT_USAGE_USER);
}
static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
}
static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
}
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 val) u64 val)
{ {
struct cpuacct *ca = css_ca(css); struct cpuacct *ca = css_ca(css);
int err = 0; int cpu;
int i;
/* /*
* Only allow '0' here to do a reset. * Only allow '0' here to do a reset.
*/ */
if (val) { if (val)
err = -EINVAL; return -EINVAL;
goto out;
}
for_each_present_cpu(i) for_each_possible_cpu(cpu)
cpuacct_cpuusage_write(ca, i, 0); cpuacct_cpuusage_write(ca, cpu, 0);
out: return 0;
return err;
} }
static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) static int __cpuacct_percpu_seq_show(struct seq_file *m,
enum cpuacct_usage_index index)
{ {
struct cpuacct *ca = css_ca(seq_css(m)); struct cpuacct *ca = css_ca(seq_css(m));
u64 percpu; u64 percpu;
int i; int i;
for_each_present_cpu(i) { for_each_possible_cpu(i) {
percpu = cpuacct_cpuusage_read(ca, i); percpu = cpuacct_cpuusage_read(ca, i, index);
seq_printf(m, "%llu ", (unsigned long long) percpu); seq_printf(m, "%llu ", (unsigned long long) percpu);
} }
seq_printf(m, "\n"); seq_printf(m, "\n");
return 0; return 0;
} }
static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
{
return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
}
static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
{
return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
}
static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
{
return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
}
static const char * const cpuacct_stat_desc[] = { static const char * const cpuacct_stat_desc[] = {
[CPUACCT_STAT_USER] = "user", [CPUACCT_STAT_USER] = "user",
[CPUACCT_STAT_SYSTEM] = "system", [CPUACCT_STAT_SYSTEM] = "system",
...@@ -191,7 +253,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v) ...@@ -191,7 +253,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
int cpu; int cpu;
s64 val = 0; s64 val = 0;
for_each_online_cpu(cpu) { for_each_possible_cpu(cpu) {
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
val += kcpustat->cpustat[CPUTIME_USER]; val += kcpustat->cpustat[CPUTIME_USER];
val += kcpustat->cpustat[CPUTIME_NICE]; val += kcpustat->cpustat[CPUTIME_NICE];
...@@ -200,7 +262,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v) ...@@ -200,7 +262,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
val = 0; val = 0;
for_each_online_cpu(cpu) { for_each_possible_cpu(cpu) {
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
val += kcpustat->cpustat[CPUTIME_SYSTEM]; val += kcpustat->cpustat[CPUTIME_SYSTEM];
val += kcpustat->cpustat[CPUTIME_IRQ]; val += kcpustat->cpustat[CPUTIME_IRQ];
...@@ -219,10 +281,26 @@ static struct cftype files[] = { ...@@ -219,10 +281,26 @@ static struct cftype files[] = {
.read_u64 = cpuusage_read, .read_u64 = cpuusage_read,
.write_u64 = cpuusage_write, .write_u64 = cpuusage_write,
}, },
{
.name = "usage_user",
.read_u64 = cpuusage_user_read,
},
{
.name = "usage_sys",
.read_u64 = cpuusage_sys_read,
},
{ {
.name = "usage_percpu", .name = "usage_percpu",
.seq_show = cpuacct_percpu_seq_show, .seq_show = cpuacct_percpu_seq_show,
}, },
{
.name = "usage_percpu_user",
.seq_show = cpuacct_percpu_user_seq_show,
},
{
.name = "usage_percpu_sys",
.seq_show = cpuacct_percpu_sys_seq_show,
},
{ {
.name = "stat", .name = "stat",
.seq_show = cpuacct_stats_show, .seq_show = cpuacct_stats_show,
...@@ -238,10 +316,17 @@ static struct cftype files[] = { ...@@ -238,10 +316,17 @@ static struct cftype files[] = {
void cpuacct_charge(struct task_struct *tsk, u64 cputime) void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{ {
struct cpuacct *ca; struct cpuacct *ca;
int index = CPUACCT_USAGE_SYSTEM;
struct pt_regs *regs = task_pt_regs(tsk);
if (regs && user_mode(regs))
index = CPUACCT_USAGE_USER;
rcu_read_lock(); rcu_read_lock();
for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
*this_cpu_ptr(ca->cpuusage) += cputime; this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;
rcu_read_unlock(); rcu_read_unlock();
} }
......
...@@ -103,10 +103,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, ...@@ -103,10 +103,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
const struct sched_dl_entity *dl_se = &p->dl; const struct sched_dl_entity *dl_se = &p->dl;
if (later_mask && if (later_mask &&
cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
best_cpu = cpumask_any(later_mask); best_cpu = cpumask_any(later_mask);
goto out; goto out;
} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
dl_time_before(dl_se->deadline, cp->elements[0].dl)) { dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
best_cpu = cpudl_maximum(cp); best_cpu = cpudl_maximum(cp);
if (later_mask) if (later_mask)
......
...@@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, ...@@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
if (skip) if (skip)
continue; continue;
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
continue; continue;
if (lowest_mask) { if (lowest_mask) {
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
/* /*
* We have to ensure that we have at least one bit * We have to ensure that we have at least one bit
......
...@@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) ...@@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{ {
struct task_struct *p = dl_task_of(dl_se); struct task_struct *p = dl_task_of(dl_se);
if (p->nr_cpus_allowed > 1) if (tsk_nr_cpus_allowed(p) > 1)
dl_rq->dl_nr_migratory++; dl_rq->dl_nr_migratory++;
update_dl_migration(dl_rq); update_dl_migration(dl_rq);
...@@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) ...@@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{ {
struct task_struct *p = dl_task_of(dl_se); struct task_struct *p = dl_task_of(dl_se);
if (p->nr_cpus_allowed > 1) if (tsk_nr_cpus_allowed(p) > 1)
dl_rq->dl_nr_migratory--; dl_rq->dl_nr_migratory--;
update_dl_migration(dl_rq); update_dl_migration(dl_rq);
...@@ -591,10 +591,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) ...@@ -591,10 +591,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
struct sched_dl_entity, struct sched_dl_entity,
dl_timer); dl_timer);
struct task_struct *p = dl_task_of(dl_se); struct task_struct *p = dl_task_of(dl_se);
unsigned long flags; struct rq_flags rf;
struct rq *rq; struct rq *rq;
rq = task_rq_lock(p, &flags); rq = task_rq_lock(p, &rf);
/* /*
* The task might have changed its scheduling policy to something * The task might have changed its scheduling policy to something
...@@ -670,14 +670,14 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) ...@@ -670,14 +670,14 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
* Nothing relies on rq->lock after this, so its safe to drop * Nothing relies on rq->lock after this, so its safe to drop
* rq->lock. * rq->lock.
*/ */
lockdep_unpin_lock(&rq->lock); lockdep_unpin_lock(&rq->lock, rf.cookie);
push_dl_task(rq); push_dl_task(rq);
lockdep_pin_lock(&rq->lock); lockdep_repin_lock(&rq->lock, rf.cookie);
} }
#endif #endif
unlock: unlock:
task_rq_unlock(rq, p, &flags); task_rq_unlock(rq, p, &rf);
/* /*
* This can free the task_struct, including this hrtimer, do not touch * This can free the task_struct, including this hrtimer, do not touch
...@@ -717,10 +717,6 @@ static void update_curr_dl(struct rq *rq) ...@@ -717,10 +717,6 @@ static void update_curr_dl(struct rq *rq)
if (!dl_task(curr) || !on_dl_rq(dl_se)) if (!dl_task(curr) || !on_dl_rq(dl_se))
return; return;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
/* /*
* Consumed budget is computed considering the time as * Consumed budget is computed considering the time as
* observed by schedulable tasks (excluding time spent * observed by schedulable tasks (excluding time spent
...@@ -736,6 +732,10 @@ static void update_curr_dl(struct rq *rq) ...@@ -736,6 +732,10 @@ static void update_curr_dl(struct rq *rq)
return; return;
} }
/* kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
schedstat_set(curr->se.statistics.exec_max, schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec)); max(curr->se.statistics.exec_max, delta_exec));
...@@ -966,7 +966,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) ...@@ -966,7 +966,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
enqueue_dl_entity(&p->dl, pi_se, flags); enqueue_dl_entity(&p->dl, pi_se, flags);
if (!task_current(rq, p) && p->nr_cpus_allowed > 1) if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
enqueue_pushable_dl_task(rq, p); enqueue_pushable_dl_task(rq, p);
} }
...@@ -1040,9 +1040,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) ...@@ -1040,9 +1040,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
* try to make it stay here, it might be important. * try to make it stay here, it might be important.
*/ */
if (unlikely(dl_task(curr)) && if (unlikely(dl_task(curr)) &&
(curr->nr_cpus_allowed < 2 || (tsk_nr_cpus_allowed(curr) < 2 ||
!dl_entity_preempt(&p->dl, &curr->dl)) && !dl_entity_preempt(&p->dl, &curr->dl)) &&
(p->nr_cpus_allowed > 1)) { (tsk_nr_cpus_allowed(p) > 1)) {
int target = find_later_rq(p); int target = find_later_rq(p);
if (target != -1 && if (target != -1 &&
...@@ -1063,7 +1063,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) ...@@ -1063,7 +1063,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
* Current can't be migrated, useless to reschedule, * Current can't be migrated, useless to reschedule,
* let's hope p can move out. * let's hope p can move out.
*/ */
if (rq->curr->nr_cpus_allowed == 1 || if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
return; return;
...@@ -1071,7 +1071,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) ...@@ -1071,7 +1071,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
* p is migratable, so let's not schedule it and * p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else. * see if it is pushed or pulled somewhere else.
*/ */
if (p->nr_cpus_allowed != 1 && if (tsk_nr_cpus_allowed(p) != 1 &&
cpudl_find(&rq->rd->cpudl, p, NULL) != -1) cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
return; return;
...@@ -1125,7 +1125,8 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, ...@@ -1125,7 +1125,8 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
return rb_entry(left, struct sched_dl_entity, rb_node); return rb_entry(left, struct sched_dl_entity, rb_node);
} }
struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) struct task_struct *
pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{ {
struct sched_dl_entity *dl_se; struct sched_dl_entity *dl_se;
struct task_struct *p; struct task_struct *p;
...@@ -1140,9 +1141,9 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) ...@@ -1140,9 +1141,9 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
* disabled avoiding further scheduler activity on it and we're * disabled avoiding further scheduler activity on it and we're
* being very careful to re-start the picking loop. * being very careful to re-start the picking loop.
*/ */
lockdep_unpin_lock(&rq->lock); lockdep_unpin_lock(&rq->lock, cookie);
pull_dl_task(rq); pull_dl_task(rq);
lockdep_pin_lock(&rq->lock); lockdep_repin_lock(&rq->lock, cookie);
/* /*
* pull_rt_task() can drop (and re-acquire) rq->lock; this * pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a stop task can slip in, in which case we need to * means a stop task can slip in, in which case we need to
...@@ -1185,7 +1186,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p) ...@@ -1185,7 +1186,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
{ {
update_curr_dl(rq); update_curr_dl(rq);
if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
enqueue_pushable_dl_task(rq, p); enqueue_pushable_dl_task(rq, p);
} }
...@@ -1286,7 +1287,7 @@ static int find_later_rq(struct task_struct *task) ...@@ -1286,7 +1287,7 @@ static int find_later_rq(struct task_struct *task)
if (unlikely(!later_mask)) if (unlikely(!later_mask))
return -1; return -1;
if (task->nr_cpus_allowed == 1) if (tsk_nr_cpus_allowed(task) == 1)
return -1; return -1;
/* /*
...@@ -1392,7 +1393,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) ...@@ -1392,7 +1393,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
if (double_lock_balance(rq, later_rq)) { if (double_lock_balance(rq, later_rq)) {
if (unlikely(task_rq(task) != rq || if (unlikely(task_rq(task) != rq ||
!cpumask_test_cpu(later_rq->cpu, !cpumask_test_cpu(later_rq->cpu,
&task->cpus_allowed) || tsk_cpus_allowed(task)) ||
task_running(rq, task) || task_running(rq, task) ||
!dl_task(task) || !dl_task(task) ||
!task_on_rq_queued(task))) { !task_on_rq_queued(task))) {
...@@ -1432,7 +1433,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) ...@@ -1432,7 +1433,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(rq->cpu != task_cpu(p));
BUG_ON(task_current(rq, p)); BUG_ON(task_current(rq, p));
BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
BUG_ON(!task_on_rq_queued(p)); BUG_ON(!task_on_rq_queued(p));
BUG_ON(!dl_task(p)); BUG_ON(!dl_task(p));
...@@ -1471,7 +1472,7 @@ static int push_dl_task(struct rq *rq) ...@@ -1471,7 +1472,7 @@ static int push_dl_task(struct rq *rq)
*/ */
if (dl_task(rq->curr) && if (dl_task(rq->curr) &&
dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
rq->curr->nr_cpus_allowed > 1) { tsk_nr_cpus_allowed(rq->curr) > 1) {
resched_curr(rq); resched_curr(rq);
return 0; return 0;
} }
...@@ -1618,9 +1619,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p) ...@@ -1618,9 +1619,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
{ {
if (!task_running(rq, p) && if (!task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) && !test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 && tsk_nr_cpus_allowed(p) > 1 &&
dl_task(rq->curr) && dl_task(rq->curr) &&
(rq->curr->nr_cpus_allowed < 2 || (tsk_nr_cpus_allowed(rq->curr) < 2 ||
!dl_entity_preempt(&p->dl, &rq->curr->dl))) { !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
push_dl_tasks(rq); push_dl_tasks(rq);
} }
...@@ -1724,7 +1725,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) ...@@ -1724,7 +1725,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
if (task_on_rq_queued(p) && rq->curr != p) { if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
queue_push_tasks(rq); queue_push_tasks(rq);
#else #else
if (dl_task(rq->curr)) if (dl_task(rq->curr))
......
...@@ -626,15 +626,16 @@ do { \ ...@@ -626,15 +626,16 @@ do { \
#undef P #undef P
#undef PN #undef PN
#ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
P64(avg_idle); P64(avg_idle);
P64(max_idle_balance_cost); P64(max_idle_balance_cost);
#undef P64
#endif #endif
#ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
if (schedstat_enabled()) { if (schedstat_enabled()) {
P(yld_count); P(yld_count);
P(sched_count); P(sched_count);
...@@ -644,7 +645,6 @@ do { \ ...@@ -644,7 +645,6 @@ do { \
} }
#undef P #undef P
#undef P64
#endif #endif
spin_lock_irqsave(&sched_debug_lock, flags); spin_lock_irqsave(&sched_debug_lock, flags);
print_cfs_stats(m, cpu); print_cfs_stats(m, cpu);
......
This diff is collapsed.
...@@ -24,7 +24,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl ...@@ -24,7 +24,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
} }
static struct task_struct * static struct task_struct *
pick_next_task_idle(struct rq *rq, struct task_struct *prev) pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{ {
put_prev_task(rq, prev); put_prev_task(rq, prev);
......
...@@ -99,10 +99,13 @@ long calc_load_fold_active(struct rq *this_rq) ...@@ -99,10 +99,13 @@ long calc_load_fold_active(struct rq *this_rq)
static unsigned long static unsigned long
calc_load(unsigned long load, unsigned long exp, unsigned long active) calc_load(unsigned long load, unsigned long exp, unsigned long active)
{ {
load *= exp; unsigned long newload;
load += active * (FIXED_1 - exp);
load += 1UL << (FSHIFT - 1); newload = load * exp + active * (FIXED_1 - exp);
return load >> FSHIFT; if (active >= load)
newload += FIXED_1-1;
return newload / FIXED_1;
} }
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
......
...@@ -334,7 +334,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) ...@@ -334,7 +334,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq = &rq_of_rt_rq(rt_rq)->rt;
rt_rq->rt_nr_total++; rt_rq->rt_nr_total++;
if (p->nr_cpus_allowed > 1) if (tsk_nr_cpus_allowed(p) > 1)
rt_rq->rt_nr_migratory++; rt_rq->rt_nr_migratory++;
update_rt_migration(rt_rq); update_rt_migration(rt_rq);
...@@ -351,7 +351,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) ...@@ -351,7 +351,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq = &rq_of_rt_rq(rt_rq)->rt;
rt_rq->rt_nr_total--; rt_rq->rt_nr_total--;
if (p->nr_cpus_allowed > 1) if (tsk_nr_cpus_allowed(p) > 1)
rt_rq->rt_nr_migratory--; rt_rq->rt_nr_migratory--;
update_rt_migration(rt_rq); update_rt_migration(rt_rq);
...@@ -953,14 +953,14 @@ static void update_curr_rt(struct rq *rq) ...@@ -953,14 +953,14 @@ static void update_curr_rt(struct rq *rq)
if (curr->sched_class != &rt_sched_class) if (curr->sched_class != &rt_sched_class)
return; return;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
delta_exec = rq_clock_task(rq) - curr->se.exec_start; delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0)) if (unlikely((s64)delta_exec <= 0))
return; return;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
schedstat_set(curr->se.statistics.exec_max, schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec)); max(curr->se.statistics.exec_max, delta_exec));
...@@ -1324,7 +1324,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) ...@@ -1324,7 +1324,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
enqueue_rt_entity(rt_se, flags); enqueue_rt_entity(rt_se, flags);
if (!task_current(rq, p) && p->nr_cpus_allowed > 1) if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
enqueue_pushable_task(rq, p); enqueue_pushable_task(rq, p);
} }
...@@ -1413,7 +1413,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) ...@@ -1413,7 +1413,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
* will have to sort it out. * will have to sort it out.
*/ */
if (curr && unlikely(rt_task(curr)) && if (curr && unlikely(rt_task(curr)) &&
(curr->nr_cpus_allowed < 2 || (tsk_nr_cpus_allowed(curr) < 2 ||
curr->prio <= p->prio)) { curr->prio <= p->prio)) {
int target = find_lowest_rq(p); int target = find_lowest_rq(p);
...@@ -1437,7 +1437,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) ...@@ -1437,7 +1437,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
* Current can't be migrated, useless to reschedule, * Current can't be migrated, useless to reschedule,
* let's hope p can move out. * let's hope p can move out.
*/ */
if (rq->curr->nr_cpus_allowed == 1 || if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
return; return;
...@@ -1445,7 +1445,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) ...@@ -1445,7 +1445,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
* p is migratable, so let's not schedule it and * p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else. * see if it is pushed or pulled somewhere else.
*/ */
if (p->nr_cpus_allowed != 1 if (tsk_nr_cpus_allowed(p) != 1
&& cpupri_find(&rq->rd->cpupri, p, NULL)) && cpupri_find(&rq->rd->cpupri, p, NULL))
return; return;
...@@ -1524,7 +1524,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) ...@@ -1524,7 +1524,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
} }
static struct task_struct * static struct task_struct *
pick_next_task_rt(struct rq *rq, struct task_struct *prev) pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{ {
struct task_struct *p; struct task_struct *p;
struct rt_rq *rt_rq = &rq->rt; struct rt_rq *rt_rq = &rq->rt;
...@@ -1536,9 +1536,9 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) ...@@ -1536,9 +1536,9 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
* disabled avoiding further scheduler activity on it and we're * disabled avoiding further scheduler activity on it and we're
* being very careful to re-start the picking loop. * being very careful to re-start the picking loop.
*/ */
lockdep_unpin_lock(&rq->lock); lockdep_unpin_lock(&rq->lock, cookie);
pull_rt_task(rq); pull_rt_task(rq);
lockdep_pin_lock(&rq->lock); lockdep_repin_lock(&rq->lock, cookie);
/* /*
* pull_rt_task() can drop (and re-acquire) rq->lock; this * pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a dl or stop task can slip in, in which case we need * means a dl or stop task can slip in, in which case we need
...@@ -1579,7 +1579,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) ...@@ -1579,7 +1579,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
* The previous task needs to be made eligible for pushing * The previous task needs to be made eligible for pushing
* if it is still active * if it is still active
*/ */
if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
enqueue_pushable_task(rq, p); enqueue_pushable_task(rq, p);
} }
...@@ -1629,7 +1629,7 @@ static int find_lowest_rq(struct task_struct *task) ...@@ -1629,7 +1629,7 @@ static int find_lowest_rq(struct task_struct *task)
if (unlikely(!lowest_mask)) if (unlikely(!lowest_mask))
return -1; return -1;
if (task->nr_cpus_allowed == 1) if (tsk_nr_cpus_allowed(task) == 1)
return -1; /* No other targets possible */ return -1; /* No other targets possible */
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
...@@ -1762,7 +1762,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) ...@@ -1762,7 +1762,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(rq->cpu != task_cpu(p));
BUG_ON(task_current(rq, p)); BUG_ON(task_current(rq, p));
BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
BUG_ON(!task_on_rq_queued(p)); BUG_ON(!task_on_rq_queued(p));
BUG_ON(!rt_task(p)); BUG_ON(!rt_task(p));
...@@ -2122,9 +2122,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) ...@@ -2122,9 +2122,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
{ {
if (!task_running(rq, p) && if (!task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) && !test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 && tsk_nr_cpus_allowed(p) > 1 &&
(dl_task(rq->curr) || rt_task(rq->curr)) && (dl_task(rq->curr) || rt_task(rq->curr)) &&
(rq->curr->nr_cpus_allowed < 2 || (tsk_nr_cpus_allowed(rq->curr) < 2 ||
rq->curr->prio <= p->prio)) rq->curr->prio <= p->prio))
push_rt_tasks(rq); push_rt_tasks(rq);
} }
...@@ -2197,7 +2197,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) ...@@ -2197,7 +2197,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
*/ */
if (task_on_rq_queued(p) && rq->curr != p) { if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
queue_push_tasks(rq); queue_push_tasks(rq);
#else #else
if (p->prio < rq->curr->prio) if (p->prio < rq->curr->prio)
......
...@@ -31,9 +31,9 @@ extern void calc_global_load_tick(struct rq *this_rq); ...@@ -31,9 +31,9 @@ extern void calc_global_load_tick(struct rq *this_rq);
extern long calc_load_fold_active(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
extern void update_cpu_load_active(struct rq *this_rq); extern void cpu_load_update_active(struct rq *this_rq);
#else #else
static inline void update_cpu_load_active(struct rq *this_rq) { } static inline void cpu_load_update_active(struct rq *this_rq) { }
#endif #endif
/* /*
...@@ -49,25 +49,32 @@ static inline void update_cpu_load_active(struct rq *this_rq) { } ...@@ -49,25 +49,32 @@ static inline void update_cpu_load_active(struct rq *this_rq) { }
* and does not change the user-interface for setting shares/weights. * and does not change the user-interface for setting shares/weights.
* *
* We increase resolution only if we have enough bits to allow this increased * We increase resolution only if we have enough bits to allow this increased
* resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution * resolution (i.e. 64bit). The costs for increasing resolution when 32bit are
* when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the * pretty high and the returns do not justify the increased costs.
* increased costs. *
* Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to
* increase coverage and consistency always enable it on 64bit platforms.
*/ */
#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ #ifdef CONFIG_64BIT
# define SCHED_LOAD_RESOLUTION 10 # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) # define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT)
#else #else
# define SCHED_LOAD_RESOLUTION 0 # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
# define scale_load(w) (w) # define scale_load(w) (w)
# define scale_load_down(w) (w) # define scale_load_down(w) (w)
#endif #endif
#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) /*
#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) * Task weight (visible to users) and its load (invisible to users) have
* independent resolution, but they should be well calibrated. We use
#define NICE_0_LOAD SCHED_LOAD_SCALE * scale_load() and scale_load_down(w) to convert between them. The
#define NICE_0_SHIFT SCHED_LOAD_SHIFT * following must be true:
*
* scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
*
*/
#define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
/* /*
* Single value that decides SCHED_DEADLINE internal math precision. * Single value that decides SCHED_DEADLINE internal math precision.
...@@ -585,11 +592,13 @@ struct rq { ...@@ -585,11 +592,13 @@ struct rq {
#endif #endif
#define CPU_LOAD_IDX_MAX 5 #define CPU_LOAD_IDX_MAX 5
unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long cpu_load[CPU_LOAD_IDX_MAX];
unsigned long last_load_update_tick;
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP
unsigned long last_load_update_tick;
#endif /* CONFIG_SMP */
u64 nohz_stamp; u64 nohz_stamp;
unsigned long nohz_flags; unsigned long nohz_flags;
#endif #endif /* CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
unsigned long last_sched_tick; unsigned long last_sched_tick;
#endif #endif
...@@ -854,7 +863,7 @@ DECLARE_PER_CPU(struct sched_domain *, sd_asym); ...@@ -854,7 +863,7 @@ DECLARE_PER_CPU(struct sched_domain *, sd_asym);
struct sched_group_capacity { struct sched_group_capacity {
atomic_t ref; atomic_t ref;
/* /*
* CPU capacity of this group, SCHED_LOAD_SCALE being max capacity * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
* for a single CPU. * for a single CPU.
*/ */
unsigned int capacity; unsigned int capacity;
...@@ -1159,7 +1168,7 @@ extern const u32 sched_prio_to_wmult[40]; ...@@ -1159,7 +1168,7 @@ extern const u32 sched_prio_to_wmult[40];
* *
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified) * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline) * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
* ENQUEUE_WAKING - sched_class::task_waking was called * ENQUEUE_MIGRATED - the task was migrated during wakeup
* *
*/ */
...@@ -1174,9 +1183,9 @@ extern const u32 sched_prio_to_wmult[40]; ...@@ -1174,9 +1183,9 @@ extern const u32 sched_prio_to_wmult[40];
#define ENQUEUE_HEAD 0x08 #define ENQUEUE_HEAD 0x08
#define ENQUEUE_REPLENISH 0x10 #define ENQUEUE_REPLENISH 0x10
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define ENQUEUE_WAKING 0x20 #define ENQUEUE_MIGRATED 0x20
#else #else
#define ENQUEUE_WAKING 0x00 #define ENQUEUE_MIGRATED 0x00
#endif #endif
#define RETRY_TASK ((void *)-1UL) #define RETRY_TASK ((void *)-1UL)
...@@ -1200,14 +1209,14 @@ struct sched_class { ...@@ -1200,14 +1209,14 @@ struct sched_class {
* tasks. * tasks.
*/ */
struct task_struct * (*pick_next_task) (struct rq *rq, struct task_struct * (*pick_next_task) (struct rq *rq,
struct task_struct *prev); struct task_struct *prev,
struct pin_cookie cookie);
void (*put_prev_task) (struct rq *rq, struct task_struct *p); void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p); void (*migrate_task_rq)(struct task_struct *p);
void (*task_waking) (struct task_struct *task);
void (*task_woken) (struct rq *this_rq, struct task_struct *task); void (*task_woken) (struct rq *this_rq, struct task_struct *task);
void (*set_cpus_allowed)(struct task_struct *p, void (*set_cpus_allowed)(struct task_struct *p,
...@@ -1313,6 +1322,7 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se); ...@@ -1313,6 +1322,7 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
unsigned long to_ratio(u64 period, u64 runtime); unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se); extern void init_entity_runnable_average(struct sched_entity *se);
extern void post_init_entity_util_avg(struct sched_entity *se);
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(struct rq *rq); extern bool sched_can_stop_tick(struct rq *rq);
...@@ -1448,86 +1458,32 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } ...@@ -1448,86 +1458,32 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
static inline void sched_avg_update(struct rq *rq) { } static inline void sched_avg_update(struct rq *rq) { }
#endif #endif
/* struct rq_flags {
* __task_rq_lock - lock the rq @p resides on. unsigned long flags;
*/ struct pin_cookie cookie;
static inline struct rq *__task_rq_lock(struct task_struct *p) };
__acquires(rq->lock)
{
struct rq *rq;
lockdep_assert_held(&p->pi_lock);
for (;;) {
rq = task_rq(p);
raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
lockdep_pin_lock(&rq->lock);
return rq;
}
raw_spin_unlock(&rq->lock);
while (unlikely(task_on_rq_migrating(p)))
cpu_relax();
}
}
/* struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
* task_rq_lock - lock p->pi_lock and lock the rq @p resides on. __acquires(rq->lock);
*/ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
__acquires(p->pi_lock) __acquires(p->pi_lock)
__acquires(rq->lock) __acquires(rq->lock);
{
struct rq *rq;
for (;;) {
raw_spin_lock_irqsave(&p->pi_lock, *flags);
rq = task_rq(p);
raw_spin_lock(&rq->lock);
/*
* move_queued_task() task_rq_lock()
*
* ACQUIRE (rq->lock)
* [S] ->on_rq = MIGRATING [L] rq = task_rq()
* WMB (__set_task_cpu()) ACQUIRE (rq->lock);
* [S] ->cpu = new_cpu [L] task_rq()
* [L] ->on_rq
* RELEASE (rq->lock)
*
* If we observe the old cpu in task_rq_lock, the acquire of
* the old rq->lock will fully serialize against the stores.
*
* If we observe the new cpu in task_rq_lock, the acquire will
* pair with the WMB to ensure we must then also see migrating.
*/
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
lockdep_pin_lock(&rq->lock);
return rq;
}
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
while (unlikely(task_on_rq_migrating(p))) static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
cpu_relax();
}
}
static inline void __task_rq_unlock(struct rq *rq)
__releases(rq->lock) __releases(rq->lock)
{ {
lockdep_unpin_lock(&rq->lock); lockdep_unpin_lock(&rq->lock, rf->cookie);
raw_spin_unlock(&rq->lock); raw_spin_unlock(&rq->lock);
} }
static inline void static inline void
task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
__releases(rq->lock) __releases(rq->lock)
__releases(p->pi_lock) __releases(p->pi_lock)
{ {
lockdep_unpin_lock(&rq->lock); lockdep_unpin_lock(&rq->lock, rf->cookie);
raw_spin_unlock(&rq->lock); raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags); raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -1743,6 +1699,10 @@ enum rq_nohz_flag_bits { ...@@ -1743,6 +1699,10 @@ enum rq_nohz_flag_bits {
}; };
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
extern void nohz_balance_exit_idle(unsigned int cpu);
#else
static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#endif #endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
......
...@@ -24,7 +24,7 @@ check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) ...@@ -24,7 +24,7 @@ check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
} }
static struct task_struct * static struct task_struct *
pick_next_task_stop(struct rq *rq, struct task_struct *prev) pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{ {
struct task_struct *stop = rq->stop; struct task_struct *stop = rq->stop;
......
...@@ -776,6 +776,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ...@@ -776,6 +776,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
if (!ts->tick_stopped) { if (!ts->tick_stopped) {
nohz_balance_enter_idle(cpu); nohz_balance_enter_idle(cpu);
calc_load_enter_idle(); calc_load_enter_idle();
cpu_load_update_nohz_start();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1; ts->tick_stopped = 1;
...@@ -802,11 +803,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ...@@ -802,11 +803,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
return tick; return tick;
} }
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active) static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{ {
/* Update jiffies first */ /* Update jiffies first */
tick_do_update_jiffies64(now); tick_do_update_jiffies64(now);
update_cpu_load_nohz(active); cpu_load_update_nohz_stop();
calc_load_exit_idle(); calc_load_exit_idle();
touch_softlockup_watchdog_sched(); touch_softlockup_watchdog_sched();
...@@ -833,7 +834,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) ...@@ -833,7 +834,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (can_stop_full_tick(ts)) if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped) else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get(), 1); tick_nohz_restart_sched_tick(ts, ktime_get());
#endif #endif
} }
...@@ -1024,7 +1025,7 @@ void tick_nohz_idle_exit(void) ...@@ -1024,7 +1025,7 @@ void tick_nohz_idle_exit(void)
tick_nohz_stop_idle(ts, now); tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) { if (ts->tick_stopped) {
tick_nohz_restart_sched_tick(ts, now, 0); tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts); tick_nohz_account_idle_ticks(ts);
} }
......
...@@ -4,9 +4,9 @@ ...@@ -4,9 +4,9 @@
*/ */
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/sched.h>
#include <linux/mmu_context.h> #include <linux/mmu_context.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/sched.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment