Commit 62214039 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge branch 'pm-cpuidle'

* pm-cpuidle:
  cpuidle: Pass exit latency limit to cpuidle_use_deepest_state()
  cpuidle: Allow idle injection to apply exit latency limit
  cpuidle: Introduce cpuidle_driver_state_disabled() for driver quirks
  cpuidle: teo: Avoid code duplication in conditionals
  cpuidle: teo: Avoid using "early hits" incorrectly
  cpuidle: teo: Exclude cpuidle overhead from computations
  cpuidle: Use nanoseconds as the unit of time
  cpuidle: Consolidate disabled state checks
  ACPI: processor_idle: Skip dummy wait if kernel is in guest
  cpuidle: Do not unset the driver if it is there already
  cpuidle: teo: Fix "early hits" handling for disabled idle states
  cpuidle: teo: Consider hits and misses metrics of disabled states
  cpuidle: teo: Rename local variable in teo_select()
  cpuidle: teo: Ignore disabled idle states that are too deep
parents 05ff1ba4 5aa9ba63
...@@ -62,13 +62,13 @@ static struct cpuidle_driver imx6q_cpuidle_driver = { ...@@ -62,13 +62,13 @@ static struct cpuidle_driver imx6q_cpuidle_driver = {
*/ */
void imx6q_cpuidle_fec_irqs_used(void) void imx6q_cpuidle_fec_irqs_used(void)
{ {
imx6q_cpuidle_driver.states[1].disabled = true; cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, true);
} }
EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_used); EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_used);
void imx6q_cpuidle_fec_irqs_unused(void) void imx6q_cpuidle_fec_irqs_unused(void)
{ {
imx6q_cpuidle_driver.states[1].disabled = false; cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, false);
} }
EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_unused); EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_unused);
......
...@@ -203,7 +203,7 @@ void tegra20_cpuidle_pcie_irqs_in_use(void) ...@@ -203,7 +203,7 @@ void tegra20_cpuidle_pcie_irqs_in_use(void)
{ {
pr_info_once( pr_info_once(
"Disabling cpuidle LP2 state, since PCIe IRQs are in use\n"); "Disabling cpuidle LP2 state, since PCIe IRQs are in use\n");
tegra_idle_driver.states[1].disabled = true; cpuidle_driver_state_disabled(&tegra_idle_driver, 1, true);
} }
int __init tegra20_cpuidle_init(void) int __init tegra20_cpuidle_init(void)
......
...@@ -642,6 +642,19 @@ static int acpi_idle_bm_check(void) ...@@ -642,6 +642,19 @@ static int acpi_idle_bm_check(void)
return bm_status; return bm_status;
} }
static void wait_for_freeze(void)
{
#ifdef CONFIG_X86
/* No delay is needed if we are in guest */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
#endif
/* Dummy wait op - must do something useless after P_LVL2 read
because chipsets cannot guarantee that STPCLK# signal
gets asserted in time to freeze execution properly. */
inl(acpi_gbl_FADT.xpm_timer_block.address);
}
/** /**
* acpi_idle_do_entry - enter idle state using the appropriate method * acpi_idle_do_entry - enter idle state using the appropriate method
* @cx: cstate data * @cx: cstate data
...@@ -658,10 +671,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx) ...@@ -658,10 +671,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
} else { } else {
/* IO port based C-state */ /* IO port based C-state */
inb(cx->address); inb(cx->address);
/* Dummy wait op - must do something useless after P_LVL2 read wait_for_freeze();
because chipsets cannot guarantee that STPCLK# signal
gets asserted in time to freeze execution properly. */
inl(acpi_gbl_FADT.xpm_timer_block.address);
} }
} }
...@@ -682,8 +692,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) ...@@ -682,8 +692,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
safe_halt(); safe_halt();
else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
inb(cx->address); inb(cx->address);
/* See comment in acpi_idle_do_entry() */ wait_for_freeze();
inl(acpi_gbl_FADT.xpm_timer_block.address);
} else } else
return -ENODEV; return -ENODEV;
} }
......
...@@ -56,13 +56,10 @@ static u64 get_snooze_timeout(struct cpuidle_device *dev, ...@@ -56,13 +56,10 @@ static u64 get_snooze_timeout(struct cpuidle_device *dev,
return default_snooze_timeout; return default_snooze_timeout;
for (i = index + 1; i < drv->state_count; i++) { for (i = index + 1; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i]; if (dev->states_usage[i].disable)
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable)
continue; continue;
return s->target_residency * tb_ticks_per_usec; return drv->states[i].target_residency * tb_ticks_per_usec;
} }
return default_snooze_timeout; return default_snooze_timeout;
......
...@@ -75,44 +75,45 @@ int cpuidle_play_dead(void) ...@@ -75,44 +75,45 @@ int cpuidle_play_dead(void)
static int find_deepest_state(struct cpuidle_driver *drv, static int find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, struct cpuidle_device *dev,
unsigned int max_latency, u64 max_latency_ns,
unsigned int forbidden_flags, unsigned int forbidden_flags,
bool s2idle) bool s2idle)
{ {
unsigned int latency_req = 0; u64 latency_req = 0;
int i, ret = 0; int i, ret = 0;
for (i = 1; i < drv->state_count; i++) { for (i = 1; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable || s->exit_latency <= latency_req if (dev->states_usage[i].disable ||
|| s->exit_latency > max_latency s->exit_latency_ns <= latency_req ||
|| (s->flags & forbidden_flags) s->exit_latency_ns > max_latency_ns ||
|| (s2idle && !s->enter_s2idle)) (s->flags & forbidden_flags) ||
(s2idle && !s->enter_s2idle))
continue; continue;
latency_req = s->exit_latency; latency_req = s->exit_latency_ns;
ret = i; ret = i;
} }
return ret; return ret;
} }
/** /**
* cpuidle_use_deepest_state - Set/clear governor override flag. * cpuidle_use_deepest_state - Set/unset governor override mode.
* @enable: New value of the flag. * @latency_limit_ns: Idle state exit latency limit (or no override if 0).
* *
* Set/unset the current CPU to use the deepest idle state (override governors * If @latency_limit_ns is nonzero, set the current CPU to use the deepest idle
* going forward if set). * state with exit latency within @latency_limit_ns (override governors going
* forward), or do not override governors if it is zero.
*/ */
void cpuidle_use_deepest_state(bool enable) void cpuidle_use_deepest_state(u64 latency_limit_ns)
{ {
struct cpuidle_device *dev; struct cpuidle_device *dev;
preempt_disable(); preempt_disable();
dev = cpuidle_get_device(); dev = cpuidle_get_device();
if (dev) if (dev)
dev->use_deepest_state = enable; dev->forced_idle_latency_limit_ns = latency_limit_ns;
preempt_enable(); preempt_enable();
} }
...@@ -122,9 +123,10 @@ void cpuidle_use_deepest_state(bool enable) ...@@ -122,9 +123,10 @@ void cpuidle_use_deepest_state(bool enable)
* @dev: cpuidle device for the given CPU. * @dev: cpuidle device for the given CPU.
*/ */
int cpuidle_find_deepest_state(struct cpuidle_driver *drv, int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev) struct cpuidle_device *dev,
u64 latency_limit_ns)
{ {
return find_deepest_state(drv, dev, UINT_MAX, 0, false); return find_deepest_state(drv, dev, latency_limit_ns, 0, false);
} }
#ifdef CONFIG_SUSPEND #ifdef CONFIG_SUSPEND
...@@ -180,7 +182,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -180,7 +182,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* that interrupts won't be enabled when it exits and allows the tick to * that interrupts won't be enabled when it exits and allows the tick to
* be frozen safely. * be frozen safely.
*/ */
index = find_deepest_state(drv, dev, UINT_MAX, 0, true); index = find_deepest_state(drv, dev, U64_MAX, 0, true);
if (index > 0) if (index > 0)
enter_s2idle_proper(drv, dev, index); enter_s2idle_proper(drv, dev, index);
...@@ -209,7 +211,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, ...@@ -209,7 +211,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
* CPU as a broadcast timer, this call may fail if it is not available. * CPU as a broadcast timer, this call may fail if it is not available.
*/ */
if (broadcast && tick_broadcast_enter()) { if (broadcast && tick_broadcast_enter()) {
index = find_deepest_state(drv, dev, target_state->exit_latency, index = find_deepest_state(drv, dev, target_state->exit_latency_ns,
CPUIDLE_FLAG_TIMER_STOP, false); CPUIDLE_FLAG_TIMER_STOP, false);
if (index < 0) { if (index < 0) {
default_idle_call(); default_idle_call();
...@@ -247,7 +249,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, ...@@ -247,7 +249,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
local_irq_enable(); local_irq_enable();
if (entered_state >= 0) { if (entered_state >= 0) {
s64 diff, delay = drv->states[entered_state].exit_latency; s64 diff, delay = drv->states[entered_state].exit_latency_ns;
int i; int i;
/* /*
...@@ -255,18 +257,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, ...@@ -255,18 +257,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
* This can be moved to within driver enter routine, * This can be moved to within driver enter routine,
* but that results in multiple copies of same code. * but that results in multiple copies of same code.
*/ */
diff = ktime_us_delta(time_end, time_start); diff = ktime_sub(time_end, time_start);
if (diff > INT_MAX)
diff = INT_MAX;
dev->last_residency = (int)diff; dev->last_residency_ns = diff;
dev->states_usage[entered_state].time += dev->last_residency; dev->states_usage[entered_state].time_ns += diff;
dev->states_usage[entered_state].usage++; dev->states_usage[entered_state].usage++;
if (diff < drv->states[entered_state].target_residency) { if (diff < drv->states[entered_state].target_residency_ns) {
for (i = entered_state - 1; i >= 0; i--) { for (i = entered_state - 1; i >= 0; i--) {
if (drv->states[i].disabled || if (dev->states_usage[i].disable)
dev->states_usage[i].disable)
continue; continue;
/* Shallower states are enabled, so update. */ /* Shallower states are enabled, so update. */
...@@ -275,22 +274,21 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, ...@@ -275,22 +274,21 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
} }
} else if (diff > delay) { } else if (diff > delay) {
for (i = entered_state + 1; i < drv->state_count; i++) { for (i = entered_state + 1; i < drv->state_count; i++) {
if (drv->states[i].disabled || if (dev->states_usage[i].disable)
dev->states_usage[i].disable)
continue; continue;
/* /*
* Update if a deeper state would have been a * Update if a deeper state would have been a
* better match for the observed idle duration. * better match for the observed idle duration.
*/ */
if (diff - delay >= drv->states[i].target_residency) if (diff - delay >= drv->states[i].target_residency_ns)
dev->states_usage[entered_state].below++; dev->states_usage[entered_state].below++;
break; break;
} }
} }
} else { } else {
dev->last_residency = 0; dev->last_residency_ns = 0;
} }
return entered_state; return entered_state;
...@@ -380,10 +378,10 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv, ...@@ -380,10 +378,10 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv,
limit_ns = TICK_NSEC; limit_ns = TICK_NSEC;
for (i = 1; i < drv->state_count; i++) { for (i = 1; i < drv->state_count; i++) {
if (drv->states[i].disabled || dev->states_usage[i].disable) if (dev->states_usage[i].disable)
continue; continue;
limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC; limit_ns = (u64)drv->states[i].target_residency_ns;
} }
dev->poll_limit_ns = limit_ns; dev->poll_limit_ns = limit_ns;
...@@ -554,7 +552,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev) ...@@ -554,7 +552,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
static void __cpuidle_device_init(struct cpuidle_device *dev) static void __cpuidle_device_init(struct cpuidle_device *dev)
{ {
memset(dev->states_usage, 0, sizeof(dev->states_usage)); memset(dev->states_usage, 0, sizeof(dev->states_usage));
dev->last_residency = 0; dev->last_residency_ns = 0;
dev->next_hrtimer = 0; dev->next_hrtimer = 0;
} }
...@@ -567,12 +565,16 @@ static void __cpuidle_device_init(struct cpuidle_device *dev) ...@@ -567,12 +565,16 @@ static void __cpuidle_device_init(struct cpuidle_device *dev)
*/ */
static int __cpuidle_register_device(struct cpuidle_device *dev) static int __cpuidle_register_device(struct cpuidle_device *dev)
{ {
int ret;
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int i, ret;
if (!try_module_get(drv->owner)) if (!try_module_get(drv->owner))
return -EINVAL; return -EINVAL;
for (i = 0; i < drv->state_count; i++)
if (drv->states[i].disabled)
dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER;
per_cpu(cpuidle_devices, dev->cpu) = dev; per_cpu(cpuidle_devices, dev->cpu) = dev;
list_add(&dev->device_list, &cpuidle_detected_devices); list_add(&dev->device_list, &cpuidle_detected_devices);
......
...@@ -62,24 +62,23 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv) ...@@ -62,24 +62,23 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
* __cpuidle_set_driver - set per CPU driver variables for the given driver. * __cpuidle_set_driver - set per CPU driver variables for the given driver.
* @drv: a valid pointer to a struct cpuidle_driver * @drv: a valid pointer to a struct cpuidle_driver
* *
* For each CPU in the driver's cpumask, unset the registered driver per CPU * Returns 0 on success, -EBUSY if any CPU in the cpumask have a driver
* to @drv. * different from drv already.
*
* Returns 0 on success, -EBUSY if the CPUs have driver(s) already.
*/ */
static inline int __cpuidle_set_driver(struct cpuidle_driver *drv) static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
{ {
int cpu; int cpu;
for_each_cpu(cpu, drv->cpumask) { for_each_cpu(cpu, drv->cpumask) {
struct cpuidle_driver *old_drv;
if (__cpuidle_get_cpu_driver(cpu)) { old_drv = __cpuidle_get_cpu_driver(cpu);
__cpuidle_unset_driver(drv); if (old_drv && old_drv != drv)
return -EBUSY; return -EBUSY;
} }
for_each_cpu(cpu, drv->cpumask)
per_cpu(cpuidle_drivers, cpu) = drv; per_cpu(cpuidle_drivers, cpu) = drv;
}
return 0; return 0;
} }
...@@ -166,16 +165,27 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) ...@@ -166,16 +165,27 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
if (!drv->cpumask) if (!drv->cpumask)
drv->cpumask = (struct cpumask *)cpu_possible_mask; drv->cpumask = (struct cpumask *)cpu_possible_mask;
/* for (i = 0; i < drv->state_count; i++) {
* Look for the timer stop flag in the different states, so that we know struct cpuidle_state *s = &drv->states[i];
* if the broadcast timer has to be set up. The loop is in the reverse
* order, because usually one of the deeper states have this flag set. /*
*/ * Look for the timer stop flag in the different states and if
for (i = drv->state_count - 1; i >= 0 ; i--) { * it is found, indicate that the broadcast timer has to be set
if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) { * up.
*/
if (s->flags & CPUIDLE_FLAG_TIMER_STOP)
drv->bctimer = 1; drv->bctimer = 1;
break;
} /*
* The core will use the target residency and exit latency
* values in nanoseconds, but allow drivers to provide them in
* microseconds too.
*/
if (s->target_residency > 0)
s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
if (s->exit_latency > 0)
s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
} }
} }
...@@ -379,3 +389,31 @@ void cpuidle_driver_unref(void) ...@@ -379,3 +389,31 @@ void cpuidle_driver_unref(void)
spin_unlock(&cpuidle_driver_lock); spin_unlock(&cpuidle_driver_lock);
} }
/**
* cpuidle_driver_state_disabled - Disable or enable an idle state
* @drv: cpuidle driver owning the state
* @idx: State index
* @disable: Whether or not to disable the state
*/
void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
bool disable)
{
unsigned int cpu;
mutex_lock(&cpuidle_lock);
for_each_cpu(cpu, drv->cpumask) {
struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
if (!dev)
continue;
if (disable)
dev->states_usage[idx].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER;
else
dev->states_usage[idx].disable &= ~CPUIDLE_STATE_DISABLED_BY_DRIVER;
}
mutex_unlock(&cpuidle_lock);
}
...@@ -107,11 +107,14 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) ...@@ -107,11 +107,14 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
* cpuidle_governor_latency_req - Compute a latency constraint for CPU * cpuidle_governor_latency_req - Compute a latency constraint for CPU
* @cpu: Target CPU * @cpu: Target CPU
*/ */
int cpuidle_governor_latency_req(unsigned int cpu) s64 cpuidle_governor_latency_req(unsigned int cpu)
{ {
int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
struct device *device = get_cpu_device(cpu); struct device *device = get_cpu_device(cpu);
int device_req = dev_pm_qos_raw_resume_latency(device); int device_req = dev_pm_qos_raw_resume_latency(device);
return device_req < global_req ? device_req : global_req; if (device_req > global_req)
device_req = global_req;
return (s64)device_req * NSEC_PER_USEC;
} }
...@@ -49,7 +49,7 @@ static int haltpoll_select(struct cpuidle_driver *drv, ...@@ -49,7 +49,7 @@ static int haltpoll_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev, struct cpuidle_device *dev,
bool *stop_tick) bool *stop_tick)
{ {
int latency_req = cpuidle_governor_latency_req(dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
if (!drv->state_count || latency_req == 0) { if (!drv->state_count || latency_req == 0) {
*stop_tick = false; *stop_tick = false;
...@@ -75,10 +75,9 @@ static int haltpoll_select(struct cpuidle_driver *drv, ...@@ -75,10 +75,9 @@ static int haltpoll_select(struct cpuidle_driver *drv,
return 0; return 0;
} }
static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us) static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
{ {
unsigned int val; unsigned int val;
u64 block_ns = block_us*NSEC_PER_USEC;
/* Grow cpu_halt_poll_us if /* Grow cpu_halt_poll_us if
* cpu_halt_poll_us < block_ns < guest_halt_poll_us * cpu_halt_poll_us < block_ns < guest_halt_poll_us
...@@ -115,7 +114,7 @@ static void haltpoll_reflect(struct cpuidle_device *dev, int index) ...@@ -115,7 +114,7 @@ static void haltpoll_reflect(struct cpuidle_device *dev, int index)
dev->last_state_idx = index; dev->last_state_idx = index;
if (index != 0) if (index != 0)
adjust_poll_limit(dev, dev->last_residency); adjust_poll_limit(dev, dev->last_residency_ns);
} }
/** /**
......
...@@ -27,8 +27,8 @@ struct ladder_device_state { ...@@ -27,8 +27,8 @@ struct ladder_device_state {
struct { struct {
u32 promotion_count; u32 promotion_count;
u32 demotion_count; u32 demotion_count;
u32 promotion_time; u64 promotion_time_ns;
u32 demotion_time; u64 demotion_time_ns;
} threshold; } threshold;
struct { struct {
int promotion_count; int promotion_count;
...@@ -68,9 +68,10 @@ static int ladder_select_state(struct cpuidle_driver *drv, ...@@ -68,9 +68,10 @@ static int ladder_select_state(struct cpuidle_driver *drv,
{ {
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct ladder_device_state *last_state; struct ladder_device_state *last_state;
int last_residency, last_idx = dev->last_state_idx; int last_idx = dev->last_state_idx;
int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0; int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
int latency_req = cpuidle_governor_latency_req(dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
s64 last_residency;
/* Special case when user has set very strict latency requirement */ /* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0)) { if (unlikely(latency_req == 0)) {
...@@ -80,14 +81,13 @@ static int ladder_select_state(struct cpuidle_driver *drv, ...@@ -80,14 +81,13 @@ static int ladder_select_state(struct cpuidle_driver *drv,
last_state = &ldev->states[last_idx]; last_state = &ldev->states[last_idx];
last_residency = dev->last_residency - drv->states[last_idx].exit_latency; last_residency = dev->last_residency_ns - drv->states[last_idx].exit_latency_ns;
/* consider promotion */ /* consider promotion */
if (last_idx < drv->state_count - 1 && if (last_idx < drv->state_count - 1 &&
!drv->states[last_idx + 1].disabled &&
!dev->states_usage[last_idx + 1].disable && !dev->states_usage[last_idx + 1].disable &&
last_residency > last_state->threshold.promotion_time && last_residency > last_state->threshold.promotion_time_ns &&
drv->states[last_idx + 1].exit_latency <= latency_req) { drv->states[last_idx + 1].exit_latency_ns <= latency_req) {
last_state->stats.promotion_count++; last_state->stats.promotion_count++;
last_state->stats.demotion_count = 0; last_state->stats.demotion_count = 0;
if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
...@@ -98,13 +98,12 @@ static int ladder_select_state(struct cpuidle_driver *drv, ...@@ -98,13 +98,12 @@ static int ladder_select_state(struct cpuidle_driver *drv,
/* consider demotion */ /* consider demotion */
if (last_idx > first_idx && if (last_idx > first_idx &&
(drv->states[last_idx].disabled || (dev->states_usage[last_idx].disable ||
dev->states_usage[last_idx].disable || drv->states[last_idx].exit_latency_ns > latency_req)) {
drv->states[last_idx].exit_latency > latency_req)) {
int i; int i;
for (i = last_idx - 1; i > first_idx; i--) { for (i = last_idx - 1; i > first_idx; i--) {
if (drv->states[i].exit_latency <= latency_req) if (drv->states[i].exit_latency_ns <= latency_req)
break; break;
} }
ladder_do_selection(dev, ldev, last_idx, i); ladder_do_selection(dev, ldev, last_idx, i);
...@@ -112,7 +111,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, ...@@ -112,7 +111,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
} }
if (last_idx > first_idx && if (last_idx > first_idx &&
last_residency < last_state->threshold.demotion_time) { last_residency < last_state->threshold.demotion_time_ns) {
last_state->stats.demotion_count++; last_state->stats.demotion_count++;
last_state->stats.promotion_count = 0; last_state->stats.promotion_count = 0;
if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
...@@ -152,9 +151,9 @@ static int ladder_enable_device(struct cpuidle_driver *drv, ...@@ -152,9 +151,9 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
lstate->threshold.demotion_count = DEMOTION_COUNT; lstate->threshold.demotion_count = DEMOTION_COUNT;
if (i < drv->state_count - 1) if (i < drv->state_count - 1)
lstate->threshold.promotion_time = state->exit_latency; lstate->threshold.promotion_time_ns = state->exit_latency_ns;
if (i > first_idx) if (i > first_idx)
lstate->threshold.demotion_time = state->exit_latency; lstate->threshold.demotion_time_ns = state->exit_latency_ns;
} }
return 0; return 0;
......
...@@ -19,22 +19,12 @@ ...@@ -19,22 +19,12 @@
#include <linux/sched/stat.h> #include <linux/sched/stat.h>
#include <linux/math64.h> #include <linux/math64.h>
/*
* Please note when changing the tuning values:
* If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
* a scaling operation multiplication may overflow on 32 bit platforms.
* In that case, #define RESOLUTION as ULL to get 64 bit result:
* #define RESOLUTION 1024ULL
*
* The default values do not overflow.
*/
#define BUCKETS 12 #define BUCKETS 12
#define INTERVAL_SHIFT 3 #define INTERVAL_SHIFT 3
#define INTERVALS (1UL << INTERVAL_SHIFT) #define INTERVALS (1UL << INTERVAL_SHIFT)
#define RESOLUTION 1024 #define RESOLUTION 1024
#define DECAY 8 #define DECAY 8
#define MAX_INTERESTING 50000 #define MAX_INTERESTING (50000 * NSEC_PER_USEC)
/* /*
* Concepts and ideas behind the menu governor * Concepts and ideas behind the menu governor
...@@ -120,14 +110,14 @@ struct menu_device { ...@@ -120,14 +110,14 @@ struct menu_device {
int needs_update; int needs_update;
int tick_wakeup; int tick_wakeup;
unsigned int next_timer_us; u64 next_timer_ns;
unsigned int bucket; unsigned int bucket;
unsigned int correction_factor[BUCKETS]; unsigned int correction_factor[BUCKETS];
unsigned int intervals[INTERVALS]; unsigned int intervals[INTERVALS];
int interval_ptr; int interval_ptr;
}; };
static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
{ {
int bucket = 0; int bucket = 0;
...@@ -140,15 +130,15 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters ...@@ -140,15 +130,15 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters
if (nr_iowaiters) if (nr_iowaiters)
bucket = BUCKETS/2; bucket = BUCKETS/2;
if (duration < 10) if (duration_ns < 10ULL * NSEC_PER_USEC)
return bucket; return bucket;
if (duration < 100) if (duration_ns < 100ULL * NSEC_PER_USEC)
return bucket + 1; return bucket + 1;
if (duration < 1000) if (duration_ns < 1000ULL * NSEC_PER_USEC)
return bucket + 2; return bucket + 2;
if (duration < 10000) if (duration_ns < 10000ULL * NSEC_PER_USEC)
return bucket + 3; return bucket + 3;
if (duration < 100000) if (duration_ns < 100000ULL * NSEC_PER_USEC)
return bucket + 4; return bucket + 4;
return bucket + 5; return bucket + 5;
} }
...@@ -276,13 +266,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -276,13 +266,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick) bool *stop_tick)
{ {
struct menu_device *data = this_cpu_ptr(&menu_devices); struct menu_device *data = this_cpu_ptr(&menu_devices);
int latency_req = cpuidle_governor_latency_req(dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
int i;
int idx;
unsigned int interactivity_req;
unsigned int predicted_us; unsigned int predicted_us;
u64 predicted_ns;
u64 interactivity_req;
unsigned long nr_iowaiters; unsigned long nr_iowaiters;
ktime_t delta_next; ktime_t delta_next;
int i, idx;
if (data->needs_update) { if (data->needs_update) {
menu_update(drv, dev); menu_update(drv, dev);
...@@ -290,15 +280,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -290,15 +280,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
} }
/* determine the expected residency time, round up */ /* determine the expected residency time, round up */
data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next);
nr_iowaiters = nr_iowait_cpu(dev->cpu); nr_iowaiters = nr_iowait_cpu(dev->cpu);
data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
if (unlikely(drv->state_count <= 1 || latency_req == 0) || if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
((data->next_timer_us < drv->states[1].target_residency || ((data->next_timer_ns < drv->states[1].target_residency_ns ||
latency_req < drv->states[1].exit_latency) && latency_req < drv->states[1].exit_latency_ns) &&
!drv->states[0].disabled && !dev->states_usage[0].disable)) { !dev->states_usage[0].disable)) {
/* /*
* In this case state[0] will be used no matter what, so return * In this case state[0] will be used no matter what, so return
* it right away and keep the tick running if state[0] is a * it right away and keep the tick running if state[0] is a
...@@ -308,18 +298,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -308,18 +298,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
return 0; return 0;
} }
/* /* Round up the result for half microseconds. */
* Force the result of multiplication to be 64 bits even if both predicted_us = div_u64(data->next_timer_ns *
* operands are 32 bits. data->correction_factor[data->bucket] +
* Make sure to round up for half microseconds. (RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
*/ RESOLUTION * DECAY * NSEC_PER_USEC);
predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us * /* Use the lowest expected idle interval to pick the idle state. */
data->correction_factor[data->bucket], predicted_ns = (u64)min(predicted_us,
RESOLUTION * DECAY); get_typical_interval(data, predicted_us)) *
/* NSEC_PER_USEC;
* Use the lowest expected idle interval to pick the idle state.
*/
predicted_us = min(predicted_us, get_typical_interval(data, predicted_us));
if (tick_nohz_tick_stopped()) { if (tick_nohz_tick_stopped()) {
/* /*
...@@ -330,14 +317,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -330,14 +317,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* the known time till the closest timer event for the idle * the known time till the closest timer event for the idle
* state selection. * state selection.
*/ */
if (predicted_us < TICK_USEC) if (predicted_ns < TICK_NSEC)
predicted_us = ktime_to_us(delta_next); predicted_ns = delta_next;
} else { } else {
/* /*
* Use the performance multiplier and the user-configurable * Use the performance multiplier and the user-configurable
* latency_req to determine the maximum exit latency. * latency_req to determine the maximum exit latency.
*/ */
interactivity_req = predicted_us / performance_multiplier(nr_iowaiters); interactivity_req = div64_u64(predicted_ns,
performance_multiplier(nr_iowaiters));
if (latency_req > interactivity_req) if (latency_req > interactivity_req)
latency_req = interactivity_req; latency_req = interactivity_req;
} }
...@@ -349,27 +337,26 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -349,27 +337,26 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
idx = -1; idx = -1;
for (i = 0; i < drv->state_count; i++) { for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable) if (dev->states_usage[i].disable)
continue; continue;
if (idx == -1) if (idx == -1)
idx = i; /* first enabled state */ idx = i; /* first enabled state */
if (s->target_residency > predicted_us) { if (s->target_residency_ns > predicted_ns) {
/* /*
* Use a physical idle state, not busy polling, unless * Use a physical idle state, not busy polling, unless
* a timer is going to trigger soon enough. * a timer is going to trigger soon enough.
*/ */
if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
s->exit_latency <= latency_req && s->exit_latency_ns <= latency_req &&
s->target_residency <= data->next_timer_us) { s->target_residency_ns <= data->next_timer_ns) {
predicted_us = s->target_residency; predicted_ns = s->target_residency_ns;
idx = i; idx = i;
break; break;
} }
if (predicted_us < TICK_USEC) if (predicted_ns < TICK_NSEC)
break; break;
if (!tick_nohz_tick_stopped()) { if (!tick_nohz_tick_stopped()) {
...@@ -379,7 +366,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -379,7 +366,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* tick in that case and let the governor run * tick in that case and let the governor run
* again in the next iteration of the loop. * again in the next iteration of the loop.
*/ */
predicted_us = drv->states[idx].target_residency; predicted_ns = drv->states[idx].target_residency_ns;
break; break;
} }
...@@ -389,13 +376,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -389,13 +376,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* closest timer event, select this one to avoid getting * closest timer event, select this one to avoid getting
* stuck in the shallow one for too long. * stuck in the shallow one for too long.
*/ */
if (drv->states[idx].target_residency < TICK_USEC && if (drv->states[idx].target_residency_ns < TICK_NSEC &&
s->target_residency <= ktime_to_us(delta_next)) s->target_residency_ns <= delta_next)
idx = i; idx = i;
return idx; return idx;
} }
if (s->exit_latency > latency_req) if (s->exit_latency_ns > latency_req)
break; break;
idx = i; idx = i;
...@@ -409,12 +396,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -409,12 +396,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* expected idle duration is shorter than the tick period length. * expected idle duration is shorter than the tick period length.
*/ */
if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) { predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
unsigned int delta_next_us = ktime_to_us(delta_next);
*stop_tick = false; *stop_tick = false;
if (idx > 0 && drv->states[idx].target_residency > delta_next_us) { if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) {
/* /*
* The tick is not going to be stopped and the target * The tick is not going to be stopped and the target
* residency of the state to be returned is not within * residency of the state to be returned is not within
...@@ -422,12 +407,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -422,12 +407,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* tick, so try to correct that. * tick, so try to correct that.
*/ */
for (i = idx - 1; i >= 0; i--) { for (i = idx - 1; i >= 0; i--) {
if (drv->states[i].disabled || if (dev->states_usage[i].disable)
dev->states_usage[i].disable)
continue; continue;
idx = i; idx = i;
if (drv->states[i].target_residency <= delta_next_us) if (drv->states[i].target_residency_ns <= delta_next)
break; break;
} }
} }
...@@ -463,7 +447,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -463,7 +447,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
struct menu_device *data = this_cpu_ptr(&menu_devices); struct menu_device *data = this_cpu_ptr(&menu_devices);
int last_idx = dev->last_state_idx; int last_idx = dev->last_state_idx;
struct cpuidle_state *target = &drv->states[last_idx]; struct cpuidle_state *target = &drv->states[last_idx];
unsigned int measured_us; u64 measured_ns;
unsigned int new_factor; unsigned int new_factor;
/* /*
...@@ -481,7 +465,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -481,7 +465,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* assume the state was never reached and the exit latency is 0. * assume the state was never reached and the exit latency is 0.
*/ */
if (data->tick_wakeup && data->next_timer_us > TICK_USEC) { if (data->tick_wakeup && data->next_timer_ns > TICK_NSEC) {
/* /*
* The nohz code said that there wouldn't be any events within * The nohz code said that there wouldn't be any events within
* the tick boundary (if the tick was stopped), but the idle * the tick boundary (if the tick was stopped), but the idle
...@@ -491,7 +475,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -491,7 +475,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* have been idle long (but not forever) to help the idle * have been idle long (but not forever) to help the idle
* duration predictor do a better job next time. * duration predictor do a better job next time.
*/ */
measured_us = 9 * MAX_INTERESTING / 10; measured_ns = 9 * MAX_INTERESTING / 10;
} else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) && } else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
dev->poll_time_limit) { dev->poll_time_limit) {
/* /*
...@@ -501,28 +485,29 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -501,28 +485,29 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* the CPU might have been woken up from idle by the next timer. * the CPU might have been woken up from idle by the next timer.
* Assume that to be the case. * Assume that to be the case.
*/ */
measured_us = data->next_timer_us; measured_ns = data->next_timer_ns;
} else { } else {
/* measured value */ /* measured value */
measured_us = dev->last_residency; measured_ns = dev->last_residency_ns;
/* Deduct exit latency */ /* Deduct exit latency */
if (measured_us > 2 * target->exit_latency) if (measured_ns > 2 * target->exit_latency_ns)
measured_us -= target->exit_latency; measured_ns -= target->exit_latency_ns;
else else
measured_us /= 2; measured_ns /= 2;
} }
/* Make sure our coefficients do not exceed unity */ /* Make sure our coefficients do not exceed unity */
if (measured_us > data->next_timer_us) if (measured_ns > data->next_timer_ns)
measured_us = data->next_timer_us; measured_ns = data->next_timer_ns;
/* Update our correction ratio */ /* Update our correction ratio */
new_factor = data->correction_factor[data->bucket]; new_factor = data->correction_factor[data->bucket];
new_factor -= new_factor / DECAY; new_factor -= new_factor / DECAY;
if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING) if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
new_factor += RESOLUTION * measured_us / data->next_timer_us; new_factor += div64_u64(RESOLUTION * measured_ns,
data->next_timer_ns);
else else
/* /*
* we were idle so long that we count it as a perfect * we were idle so long that we count it as a perfect
...@@ -542,7 +527,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -542,7 +527,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->correction_factor[data->bucket] = new_factor; data->correction_factor[data->bucket] = new_factor;
/* update the repeating-pattern data */ /* update the repeating-pattern data */
data->intervals[data->interval_ptr++] = measured_us; data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns);
if (data->interval_ptr >= INTERVALS) if (data->interval_ptr >= INTERVALS)
data->interval_ptr = 0; data->interval_ptr = 0;
} }
......
...@@ -104,7 +104,7 @@ struct teo_cpu { ...@@ -104,7 +104,7 @@ struct teo_cpu {
u64 sleep_length_ns; u64 sleep_length_ns;
struct teo_idle_state states[CPUIDLE_STATE_MAX]; struct teo_idle_state states[CPUIDLE_STATE_MAX];
int interval_idx; int interval_idx;
unsigned int intervals[INTERVALS]; u64 intervals[INTERVALS];
}; };
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
...@@ -117,9 +117,8 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); ...@@ -117,9 +117,8 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{ {
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
unsigned int sleep_length_us = ktime_to_us(cpu_data->sleep_length_ns);
int i, idx_hit = -1, idx_timer = -1; int i, idx_hit = -1, idx_timer = -1;
unsigned int measured_us; u64 measured_ns;
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
/* /*
...@@ -127,23 +126,28 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -127,23 +126,28 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* enough to the closest timer event expected at the idle state * enough to the closest timer event expected at the idle state
* selection time to be discarded. * selection time to be discarded.
*/ */
measured_us = UINT_MAX; measured_ns = U64_MAX;
} else { } else {
unsigned int lat; u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
lat = drv->states[dev->last_state_idx].exit_latency; /*
* The computations below are to determine whether or not the
measured_us = ktime_to_us(cpu_data->time_span_ns); * (saved) time till the next timer event and the measured idle
* duration fall into the same "bin", so use last_residency_ns
* for that instead of time_span_ns which includes the cpuidle
* overhead.
*/
measured_ns = dev->last_residency_ns;
/* /*
* The delay between the wakeup and the first instruction * The delay between the wakeup and the first instruction
* executed by the CPU is not likely to be worst-case every * executed by the CPU is not likely to be worst-case every
* time, so take 1/2 of the exit latency as a very rough * time, so take 1/2 of the exit latency as a very rough
* approximation of the average of it. * approximation of the average of it.
*/ */
if (measured_us >= lat) if (measured_ns >= lat_ns)
measured_us -= lat / 2; measured_ns -= lat_ns / 2;
else else
measured_us /= 2; measured_ns /= 2;
} }
/* /*
...@@ -155,9 +159,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -155,9 +159,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT; cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
if (drv->states[i].target_residency <= sleep_length_us) { if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) {
idx_timer = i; idx_timer = i;
if (drv->states[i].target_residency <= measured_us) if (drv->states[i].target_residency_ns <= measured_ns)
idx_hit = i; idx_hit = i;
} }
} }
...@@ -193,30 +197,35 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -193,30 +197,35 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* Save idle duration values corresponding to non-timer wakeups for * Save idle duration values corresponding to non-timer wakeups for
* pattern detection. * pattern detection.
*/ */
cpu_data->intervals[cpu_data->interval_idx++] = measured_us; cpu_data->intervals[cpu_data->interval_idx++] = measured_ns;
if (cpu_data->interval_idx > INTERVALS) if (cpu_data->interval_idx > INTERVALS)
cpu_data->interval_idx = 0; cpu_data->interval_idx = 0;
} }
static bool teo_time_ok(u64 interval_ns)
{
return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
}
/** /**
* teo_find_shallower_state - Find shallower idle state matching given duration. * teo_find_shallower_state - Find shallower idle state matching given duration.
* @drv: cpuidle driver containing state data. * @drv: cpuidle driver containing state data.
* @dev: Target CPU. * @dev: Target CPU.
* @state_idx: Index of the capping idle state. * @state_idx: Index of the capping idle state.
* @duration_us: Idle duration value to match. * @duration_ns: Idle duration value to match.
*/ */
static int teo_find_shallower_state(struct cpuidle_driver *drv, static int teo_find_shallower_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int state_idx, struct cpuidle_device *dev, int state_idx,
unsigned int duration_us) u64 duration_ns)
{ {
int i; int i;
for (i = state_idx - 1; i >= 0; i--) { for (i = state_idx - 1; i >= 0; i--) {
if (drv->states[i].disabled || dev->states_usage[i].disable) if (dev->states_usage[i].disable)
continue; continue;
state_idx = i; state_idx = i;
if (drv->states[i].target_residency <= duration_us) if (drv->states[i].target_residency_ns <= duration_ns)
break; break;
} }
return state_idx; return state_idx;
...@@ -232,9 +241,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -232,9 +241,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick) bool *stop_tick)
{ {
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
int latency_req = cpuidle_governor_latency_req(dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
unsigned int duration_us, count; u64 duration_ns;
int max_early_idx, constraint_idx, idx, i; unsigned int hits, misses, early_hits;
int max_early_idx, prev_max_early_idx, constraint_idx, idx, i;
ktime_t delta_tick; ktime_t delta_tick;
if (dev->last_state_idx >= 0) { if (dev->last_state_idx >= 0) {
...@@ -244,50 +254,92 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -244,50 +254,92 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
cpu_data->time_span_ns = local_clock(); cpu_data->time_span_ns = local_clock();
cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick); duration_ns = tick_nohz_get_sleep_length(&delta_tick);
duration_us = ktime_to_us(cpu_data->sleep_length_ns); cpu_data->sleep_length_ns = duration_ns;
count = 0; hits = 0;
misses = 0;
early_hits = 0;
max_early_idx = -1; max_early_idx = -1;
prev_max_early_idx = -1;
constraint_idx = drv->state_count; constraint_idx = drv->state_count;
idx = -1; idx = -1;
for (i = 0; i < drv->state_count; i++) { for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable) { if (dev->states_usage[i].disable) {
/*
* Ignore disabled states with target residencies beyond
* the anticipated idle duration.
*/
if (s->target_residency_ns > duration_ns)
continue;
/*
* This state is disabled, so the range of idle duration
* values corresponding to it is covered by the current
* candidate state, but still the "hits" and "misses"
* metrics of the disabled state need to be used to
* decide whether or not the state covering the range in
* question is good enough.
*/
hits = cpu_data->states[i].hits;
misses = cpu_data->states[i].misses;
if (early_hits >= cpu_data->states[i].early_hits ||
idx < 0)
continue;
/* /*
* If the "early hits" metric of a disabled state is * If the current candidate state has been the one with
* greater than the current maximum, it should be taken * the maximum "early hits" metric so far, the "early
* into account, because it would be a mistake to select * hits" metric of the disabled state replaces the
* a deeper state with lower "early hits" metric. The * current "early hits" count to avoid selecting a
* index cannot be changed to point to it, however, so * deeper state with lower "early hits" metric.
* just increase the max count alone and let the index
* still point to a shallower idle state.
*/ */
if (max_early_idx >= 0 && if (max_early_idx == idx) {
count < cpu_data->states[i].early_hits) early_hits = cpu_data->states[i].early_hits;
count = cpu_data->states[i].early_hits; continue;
}
/*
* The current candidate state is closer to the disabled
* one than the current maximum "early hits" state, so
* replace the latter with it, but in case the maximum
* "early hits" state index has not been set so far,
* check if the current candidate state is not too
* shallow for that role.
*/
if (teo_time_ok(drv->states[idx].target_residency_ns)) {
prev_max_early_idx = max_early_idx;
early_hits = cpu_data->states[i].early_hits;
max_early_idx = idx;
}
continue; continue;
} }
if (idx < 0) if (idx < 0) {
idx = i; /* first enabled state */ idx = i; /* first enabled state */
hits = cpu_data->states[i].hits;
misses = cpu_data->states[i].misses;
}
if (s->target_residency > duration_us) if (s->target_residency_ns > duration_ns)
break; break;
if (s->exit_latency > latency_req && constraint_idx > i) if (s->exit_latency_ns > latency_req && constraint_idx > i)
constraint_idx = i; constraint_idx = i;
idx = i; idx = i;
hits = cpu_data->states[i].hits;
misses = cpu_data->states[i].misses;
if (count < cpu_data->states[i].early_hits && if (early_hits < cpu_data->states[i].early_hits &&
!(tick_nohz_tick_stopped() && teo_time_ok(drv->states[i].target_residency_ns)) {
drv->states[i].target_residency < TICK_USEC)) { prev_max_early_idx = max_early_idx;
count = cpu_data->states[i].early_hits; early_hits = cpu_data->states[i].early_hits;
max_early_idx = i; max_early_idx = i;
} }
} }
...@@ -300,10 +352,19 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -300,10 +352,19 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* "early hits" metric, but if that cannot be determined, just use the * "early hits" metric, but if that cannot be determined, just use the
* state selected so far. * state selected so far.
*/ */
if (cpu_data->states[idx].hits <= cpu_data->states[idx].misses && if (hits <= misses) {
max_early_idx >= 0) { /*
idx = max_early_idx; * The current candidate state is not suitable, so take the one
duration_us = drv->states[idx].target_residency; * whose "early hits" metric is the maximum for the range of
* shallower states.
*/
if (idx == max_early_idx)
max_early_idx = prev_max_early_idx;
if (max_early_idx >= 0) {
idx = max_early_idx;
duration_ns = drv->states[idx].target_residency_ns;
}
} }
/* /*
...@@ -316,18 +377,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -316,18 +377,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (idx < 0) { if (idx < 0) {
idx = 0; /* No states enabled. Must use 0. */ idx = 0; /* No states enabled. Must use 0. */
} else if (idx > 0) { } else if (idx > 0) {
unsigned int count = 0;
u64 sum = 0; u64 sum = 0;
count = 0;
/* /*
* Count and sum the most recent idle duration values less than * Count and sum the most recent idle duration values less than
* the current expected idle duration value. * the current expected idle duration value.
*/ */
for (i = 0; i < INTERVALS; i++) { for (i = 0; i < INTERVALS; i++) {
unsigned int val = cpu_data->intervals[i]; u64 val = cpu_data->intervals[i];
if (val >= duration_us) if (val >= duration_ns)
continue; continue;
count++; count++;
...@@ -339,17 +399,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -339,17 +399,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* values are in the interesting range. * values are in the interesting range.
*/ */
if (count > INTERVALS / 2) { if (count > INTERVALS / 2) {
unsigned int avg_us = div64_u64(sum, count); u64 avg_ns = div64_u64(sum, count);
/* /*
* Avoid spending too much time in an idle state that * Avoid spending too much time in an idle state that
* would be too shallow. * would be too shallow.
*/ */
if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) { if (teo_time_ok(avg_ns)) {
duration_us = avg_us; duration_ns = avg_ns;
if (drv->states[idx].target_residency > avg_us) if (drv->states[idx].target_residency_ns > avg_ns)
idx = teo_find_shallower_state(drv, dev, idx = teo_find_shallower_state(drv, dev,
idx, avg_us); idx, avg_ns);
} }
} }
} }
...@@ -359,9 +419,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -359,9 +419,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* expected idle duration is shorter than the tick period length. * expected idle duration is shorter than the tick period length.
*/ */
if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
duration_us < TICK_USEC) && !tick_nohz_tick_stopped()) { duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
unsigned int delta_tick_us = ktime_to_us(delta_tick);
*stop_tick = false; *stop_tick = false;
/* /*
...@@ -370,8 +428,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -370,8 +428,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* till the closest timer including the tick, try to correct * till the closest timer including the tick, try to correct
* that. * that.
*/ */
if (idx > 0 && drv->states[idx].target_residency > delta_tick_us) if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick)
idx = teo_find_shallower_state(drv, dev, idx, delta_tick_us); idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
} }
return idx; return idx;
...@@ -415,7 +473,7 @@ static int teo_enable_device(struct cpuidle_driver *drv, ...@@ -415,7 +473,7 @@ static int teo_enable_device(struct cpuidle_driver *drv,
memset(cpu_data, 0, sizeof(*cpu_data)); memset(cpu_data, 0, sizeof(*cpu_data));
for (i = 0; i < INTERVALS; i++) for (i = 0; i < INTERVALS; i++)
cpu_data->intervals[i] = UINT_MAX; cpu_data->intervals[i] = U64_MAX;
return 0; return 0;
} }
......
...@@ -49,6 +49,8 @@ void cpuidle_poll_state_init(struct cpuidle_driver *drv) ...@@ -49,6 +49,8 @@ void cpuidle_poll_state_init(struct cpuidle_driver *drv)
snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
state->exit_latency = 0; state->exit_latency = 0;
state->target_residency = 0; state->target_residency = 0;
state->exit_latency_ns = 0;
state->target_residency_ns = 0;
state->power_usage = -1; state->power_usage = -1;
state->enter = poll_idle; state->enter = poll_idle;
state->disabled = false; state->disabled = false;
......
...@@ -255,25 +255,6 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ ...@@ -255,25 +255,6 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
return sprintf(buf, "%u\n", state->_name);\ return sprintf(buf, "%u\n", state->_name);\
} }
#define define_store_state_ull_function(_name) \
static ssize_t store_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, \
const char *buf, size_t size) \
{ \
unsigned long long value; \
int err; \
if (!capable(CAP_SYS_ADMIN)) \
return -EPERM; \
err = kstrtoull(buf, 0, &value); \
if (err) \
return err; \
if (value) \
state_usage->_name = 1; \
else \
state_usage->_name = 0; \
return size; \
}
#define define_show_state_ull_function(_name) \ #define define_show_state_ull_function(_name) \
static ssize_t show_state_##_name(struct cpuidle_state *state, \ static ssize_t show_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, \ struct cpuidle_state_usage *state_usage, \
...@@ -292,18 +273,60 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ ...@@ -292,18 +273,60 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
return sprintf(buf, "%s\n", state->_name);\ return sprintf(buf, "%s\n", state->_name);\
} }
define_show_state_function(exit_latency) #define define_show_state_time_function(_name) \
define_show_state_function(target_residency) static ssize_t show_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, \
char *buf) \
{ \
return sprintf(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \
}
define_show_state_time_function(exit_latency)
define_show_state_time_function(target_residency)
define_show_state_function(power_usage) define_show_state_function(power_usage)
define_show_state_ull_function(usage) define_show_state_ull_function(usage)
define_show_state_ull_function(time)
define_show_state_str_function(name) define_show_state_str_function(name)
define_show_state_str_function(desc) define_show_state_str_function(desc)
define_show_state_ull_function(disable)
define_store_state_ull_function(disable)
define_show_state_ull_function(above) define_show_state_ull_function(above)
define_show_state_ull_function(below) define_show_state_ull_function(below)
static ssize_t show_state_time(struct cpuidle_state *state,
struct cpuidle_state_usage *state_usage,
char *buf)
{
return sprintf(buf, "%llu\n", ktime_to_us(state_usage->time_ns));
}
static ssize_t show_state_disable(struct cpuidle_state *state,
struct cpuidle_state_usage *state_usage,
char *buf)
{
return sprintf(buf, "%llu\n",
state_usage->disable & CPUIDLE_STATE_DISABLED_BY_USER);
}
static ssize_t store_state_disable(struct cpuidle_state *state,
struct cpuidle_state_usage *state_usage,
const char *buf, size_t size)
{
unsigned int value;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
err = kstrtouint(buf, 0, &value);
if (err)
return err;
if (value)
state_usage->disable |= CPUIDLE_STATE_DISABLED_BY_USER;
else
state_usage->disable &= ~CPUIDLE_STATE_DISABLED_BY_USER;
return size;
}
define_one_state_ro(name, show_state_name); define_one_state_ro(name, show_state_name);
define_one_state_ro(desc, show_state_desc); define_one_state_ro(desc, show_state_desc);
define_one_state_ro(latency, show_state_exit_latency); define_one_state_ro(latency, show_state_exit_latency);
......
...@@ -184,7 +184,12 @@ void arch_cpu_idle_dead(void); ...@@ -184,7 +184,12 @@ void arch_cpu_idle_dead(void);
int cpu_report_state(int cpu); int cpu_report_state(int cpu);
int cpu_check_up_prepare(int cpu); int cpu_check_up_prepare(int cpu);
void cpu_set_state_online(int cpu); void cpu_set_state_online(int cpu);
void play_idle(unsigned long duration_us); void play_idle_precise(u64 duration_ns, u64 latency_ns);
static inline void play_idle(unsigned long duration_us)
{
play_idle_precise(duration_us * NSEC_PER_USEC, U64_MAX);
}
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
bool cpu_wait_death(unsigned int cpu, int seconds); bool cpu_wait_death(unsigned int cpu, int seconds);
......
...@@ -29,10 +29,13 @@ struct cpuidle_driver; ...@@ -29,10 +29,13 @@ struct cpuidle_driver;
* CPUIDLE DEVICE INTERFACE * * CPUIDLE DEVICE INTERFACE *
****************************/ ****************************/
#define CPUIDLE_STATE_DISABLED_BY_USER BIT(0)
#define CPUIDLE_STATE_DISABLED_BY_DRIVER BIT(1)
struct cpuidle_state_usage { struct cpuidle_state_usage {
unsigned long long disable; unsigned long long disable;
unsigned long long usage; unsigned long long usage;
unsigned long long time; /* in US */ u64 time_ns;
unsigned long long above; /* Number of times it's been too deep */ unsigned long long above; /* Number of times it's been too deep */
unsigned long long below; /* Number of times it's been too shallow */ unsigned long long below; /* Number of times it's been too shallow */
#ifdef CONFIG_SUSPEND #ifdef CONFIG_SUSPEND
...@@ -45,6 +48,8 @@ struct cpuidle_state { ...@@ -45,6 +48,8 @@ struct cpuidle_state {
char name[CPUIDLE_NAME_LEN]; char name[CPUIDLE_NAME_LEN];
char desc[CPUIDLE_DESC_LEN]; char desc[CPUIDLE_DESC_LEN];
u64 exit_latency_ns;
u64 target_residency_ns;
unsigned int flags; unsigned int flags;
unsigned int exit_latency; /* in US */ unsigned int exit_latency; /* in US */
int power_usage; /* in mW */ int power_usage; /* in mW */
...@@ -80,14 +85,14 @@ struct cpuidle_driver_kobj; ...@@ -80,14 +85,14 @@ struct cpuidle_driver_kobj;
struct cpuidle_device { struct cpuidle_device {
unsigned int registered:1; unsigned int registered:1;
unsigned int enabled:1; unsigned int enabled:1;
unsigned int use_deepest_state:1;
unsigned int poll_time_limit:1; unsigned int poll_time_limit:1;
unsigned int cpu; unsigned int cpu;
ktime_t next_hrtimer; ktime_t next_hrtimer;
int last_state_idx; int last_state_idx;
int last_residency; u64 last_residency_ns;
u64 poll_limit_ns; u64 poll_limit_ns;
u64 forced_idle_latency_limit_ns;
struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX];
struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
struct cpuidle_driver_kobj *kobj_driver; struct cpuidle_driver_kobj *kobj_driver;
...@@ -144,6 +149,8 @@ extern int cpuidle_register_driver(struct cpuidle_driver *drv); ...@@ -144,6 +149,8 @@ extern int cpuidle_register_driver(struct cpuidle_driver *drv);
extern struct cpuidle_driver *cpuidle_get_driver(void); extern struct cpuidle_driver *cpuidle_get_driver(void);
extern struct cpuidle_driver *cpuidle_driver_ref(void); extern struct cpuidle_driver *cpuidle_driver_ref(void);
extern void cpuidle_driver_unref(void); extern void cpuidle_driver_unref(void);
extern void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
bool disable);
extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
extern int cpuidle_register_device(struct cpuidle_device *dev); extern int cpuidle_register_device(struct cpuidle_device *dev);
extern void cpuidle_unregister_device(struct cpuidle_device *dev); extern void cpuidle_unregister_device(struct cpuidle_device *dev);
...@@ -181,6 +188,8 @@ static inline int cpuidle_register_driver(struct cpuidle_driver *drv) ...@@ -181,6 +188,8 @@ static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; } static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }
static inline struct cpuidle_driver *cpuidle_driver_ref(void) {return NULL; } static inline struct cpuidle_driver *cpuidle_driver_ref(void) {return NULL; }
static inline void cpuidle_driver_unref(void) {} static inline void cpuidle_driver_unref(void) {}
static inline void cpuidle_driver_state_disabled(struct cpuidle_driver *drv,
int idx, bool disable) { }
static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
static inline int cpuidle_register_device(struct cpuidle_device *dev) static inline int cpuidle_register_device(struct cpuidle_device *dev)
{return -ENODEV; } {return -ENODEV; }
...@@ -204,18 +213,20 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } ...@@ -204,18 +213,20 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }
#ifdef CONFIG_CPU_IDLE #ifdef CONFIG_CPU_IDLE
extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev); struct cpuidle_device *dev,
u64 latency_limit_ns);
extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv, extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
struct cpuidle_device *dev); struct cpuidle_device *dev);
extern void cpuidle_use_deepest_state(bool enable); extern void cpuidle_use_deepest_state(u64 latency_limit_ns);
#else #else
static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev) struct cpuidle_device *dev,
u64 latency_limit_ns)
{return -ENODEV; } {return -ENODEV; }
static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv, static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
struct cpuidle_device *dev) struct cpuidle_device *dev)
{return -ENODEV; } {return -ENODEV; }
static inline void cpuidle_use_deepest_state(bool enable) static inline void cpuidle_use_deepest_state(u64 latency_limit_ns)
{ {
} }
#endif #endif
...@@ -260,7 +271,7 @@ struct cpuidle_governor { ...@@ -260,7 +271,7 @@ struct cpuidle_governor {
#ifdef CONFIG_CPU_IDLE #ifdef CONFIG_CPU_IDLE
extern int cpuidle_register_governor(struct cpuidle_governor *gov); extern int cpuidle_register_governor(struct cpuidle_governor *gov);
extern int cpuidle_governor_latency_req(unsigned int cpu); extern s64 cpuidle_governor_latency_req(unsigned int cpu);
#else #else
static inline int cpuidle_register_governor(struct cpuidle_governor *gov) static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
{return 0;} {return 0;}
......
...@@ -104,7 +104,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -104,7 +104,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* update no idle residency and return. * update no idle residency and return.
*/ */
if (current_clr_polling_and_test()) { if (current_clr_polling_and_test()) {
dev->last_residency = 0; dev->last_residency_ns = 0;
local_irq_enable(); local_irq_enable();
return -EBUSY; return -EBUSY;
} }
...@@ -165,7 +165,9 @@ static void cpuidle_idle_call(void) ...@@ -165,7 +165,9 @@ static void cpuidle_idle_call(void)
* until a proper wakeup interrupt happens. * until a proper wakeup interrupt happens.
*/ */
if (idle_should_enter_s2idle() || dev->use_deepest_state) { if (idle_should_enter_s2idle() || dev->forced_idle_latency_limit_ns) {
u64 max_latency_ns;
if (idle_should_enter_s2idle()) { if (idle_should_enter_s2idle()) {
rcu_idle_enter(); rcu_idle_enter();
...@@ -176,12 +178,16 @@ static void cpuidle_idle_call(void) ...@@ -176,12 +178,16 @@ static void cpuidle_idle_call(void)
} }
rcu_idle_exit(); rcu_idle_exit();
max_latency_ns = U64_MAX;
} else {
max_latency_ns = dev->forced_idle_latency_limit_ns;
} }
tick_nohz_idle_stop_tick(); tick_nohz_idle_stop_tick();
rcu_idle_enter(); rcu_idle_enter();
next_state = cpuidle_find_deepest_state(drv, dev); next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
call_cpuidle(drv, dev, next_state); call_cpuidle(drv, dev, next_state);
} else { } else {
bool stop_tick = true; bool stop_tick = true;
...@@ -311,7 +317,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) ...@@ -311,7 +317,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART; return HRTIMER_NORESTART;
} }
void play_idle(unsigned long duration_us) void play_idle_precise(u64 duration_ns, u64 latency_ns)
{ {
struct idle_timer it; struct idle_timer it;
...@@ -323,29 +329,29 @@ void play_idle(unsigned long duration_us) ...@@ -323,29 +329,29 @@ void play_idle(unsigned long duration_us)
WARN_ON_ONCE(current->nr_cpus_allowed != 1); WARN_ON_ONCE(current->nr_cpus_allowed != 1);
WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
WARN_ON_ONCE(!duration_us); WARN_ON_ONCE(!duration_ns);
rcu_sleep_check(); rcu_sleep_check();
preempt_disable(); preempt_disable();
current->flags |= PF_IDLE; current->flags |= PF_IDLE;
cpuidle_use_deepest_state(true); cpuidle_use_deepest_state(latency_ns);
it.done = 0; it.done = 0;
hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
it.timer.function = idle_inject_timer_fn; it.timer.function = idle_inject_timer_fn;
hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC), hrtimer_start(&it.timer, ns_to_ktime(duration_ns),
HRTIMER_MODE_REL_PINNED); HRTIMER_MODE_REL_PINNED);
while (!READ_ONCE(it.done)) while (!READ_ONCE(it.done))
do_idle(); do_idle();
cpuidle_use_deepest_state(false); cpuidle_use_deepest_state(0);
current->flags &= ~PF_IDLE; current->flags &= ~PF_IDLE;
preempt_fold_need_resched(); preempt_fold_need_resched();
preempt_enable(); preempt_enable();
} }
EXPORT_SYMBOL_GPL(play_idle); EXPORT_SYMBOL_GPL(play_idle_precise);
void cpu_startup_entry(enum cpuhp_state state) void cpu_startup_entry(enum cpuhp_state state)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment