Commit 62214039 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge branch 'pm-cpuidle'

* pm-cpuidle:
  cpuidle: Pass exit latency limit to cpuidle_use_deepest_state()
  cpuidle: Allow idle injection to apply exit latency limit
  cpuidle: Introduce cpuidle_driver_state_disabled() for driver quirks
  cpuidle: teo: Avoid code duplication in conditionals
  cpuidle: teo: Avoid using "early hits" incorrectly
  cpuidle: teo: Exclude cpuidle overhead from computations
  cpuidle: Use nanoseconds as the unit of time
  cpuidle: Consolidate disabled state checks
  ACPI: processor_idle: Skip dummy wait if kernel is in guest
  cpuidle: Do not unset the driver if it is there already
  cpuidle: teo: Fix "early hits" handling for disabled idle states
  cpuidle: teo: Consider hits and misses metrics of disabled states
  cpuidle: teo: Rename local variable in teo_select()
  cpuidle: teo: Ignore disabled idle states that are too deep
parents 05ff1ba4 5aa9ba63
......@@ -62,13 +62,13 @@ static struct cpuidle_driver imx6q_cpuidle_driver = {
*/
void imx6q_cpuidle_fec_irqs_used(void)
{
imx6q_cpuidle_driver.states[1].disabled = true;
cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, true);
}
EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_used);
void imx6q_cpuidle_fec_irqs_unused(void)
{
imx6q_cpuidle_driver.states[1].disabled = false;
cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, false);
}
EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_unused);
......
......@@ -203,7 +203,7 @@ void tegra20_cpuidle_pcie_irqs_in_use(void)
{
pr_info_once(
"Disabling cpuidle LP2 state, since PCIe IRQs are in use\n");
tegra_idle_driver.states[1].disabled = true;
cpuidle_driver_state_disabled(&tegra_idle_driver, 1, true);
}
int __init tegra20_cpuidle_init(void)
......
......@@ -642,6 +642,19 @@ static int acpi_idle_bm_check(void)
return bm_status;
}
static void wait_for_freeze(void)
{
#ifdef CONFIG_X86
/* No delay is needed if we are in guest */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
#endif
/* Dummy wait op - must do something useless after P_LVL2 read
because chipsets cannot guarantee that STPCLK# signal
gets asserted in time to freeze execution properly. */
inl(acpi_gbl_FADT.xpm_timer_block.address);
}
/**
* acpi_idle_do_entry - enter idle state using the appropriate method
* @cx: cstate data
......@@ -658,10 +671,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
} else {
/* IO port based C-state */
inb(cx->address);
/* Dummy wait op - must do something useless after P_LVL2 read
because chipsets cannot guarantee that STPCLK# signal
gets asserted in time to freeze execution properly. */
inl(acpi_gbl_FADT.xpm_timer_block.address);
wait_for_freeze();
}
}
......@@ -682,8 +692,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
safe_halt();
else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
inb(cx->address);
/* See comment in acpi_idle_do_entry() */
inl(acpi_gbl_FADT.xpm_timer_block.address);
wait_for_freeze();
} else
return -ENODEV;
}
......
......@@ -56,13 +56,10 @@ static u64 get_snooze_timeout(struct cpuidle_device *dev,
return default_snooze_timeout;
for (i = index + 1; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable)
if (dev->states_usage[i].disable)
continue;
return s->target_residency * tb_ticks_per_usec;
return drv->states[i].target_residency * tb_ticks_per_usec;
}
return default_snooze_timeout;
......
......@@ -75,44 +75,45 @@ int cpuidle_play_dead(void)
static int find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev,
unsigned int max_latency,
u64 max_latency_ns,
unsigned int forbidden_flags,
bool s2idle)
{
unsigned int latency_req = 0;
u64 latency_req = 0;
int i, ret = 0;
for (i = 1; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable || s->exit_latency <= latency_req
|| s->exit_latency > max_latency
|| (s->flags & forbidden_flags)
|| (s2idle && !s->enter_s2idle))
if (dev->states_usage[i].disable ||
s->exit_latency_ns <= latency_req ||
s->exit_latency_ns > max_latency_ns ||
(s->flags & forbidden_flags) ||
(s2idle && !s->enter_s2idle))
continue;
latency_req = s->exit_latency;
latency_req = s->exit_latency_ns;
ret = i;
}
return ret;
}
/**
* cpuidle_use_deepest_state - Set/clear governor override flag.
* @enable: New value of the flag.
* cpuidle_use_deepest_state - Set/unset governor override mode.
* @latency_limit_ns: Idle state exit latency limit (or no override if 0).
*
* Set/unset the current CPU to use the deepest idle state (override governors
* going forward if set).
* If @latency_limit_ns is nonzero, set the current CPU to use the deepest idle
* state with exit latency within @latency_limit_ns (override governors going
* forward), or do not override governors if it is zero.
*/
void cpuidle_use_deepest_state(bool enable)
void cpuidle_use_deepest_state(u64 latency_limit_ns)
{
struct cpuidle_device *dev;
preempt_disable();
dev = cpuidle_get_device();
if (dev)
dev->use_deepest_state = enable;
dev->forced_idle_latency_limit_ns = latency_limit_ns;
preempt_enable();
}
......@@ -122,9 +123,10 @@ void cpuidle_use_deepest_state(bool enable)
* @dev: cpuidle device for the given CPU.
*/
int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
struct cpuidle_device *dev,
u64 latency_limit_ns)
{
return find_deepest_state(drv, dev, UINT_MAX, 0, false);
return find_deepest_state(drv, dev, latency_limit_ns, 0, false);
}
#ifdef CONFIG_SUSPEND
......@@ -180,7 +182,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* that interrupts won't be enabled when it exits and allows the tick to
* be frozen safely.
*/
index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
index = find_deepest_state(drv, dev, U64_MAX, 0, true);
if (index > 0)
enter_s2idle_proper(drv, dev, index);
......@@ -209,7 +211,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
* CPU as a broadcast timer, this call may fail if it is not available.
*/
if (broadcast && tick_broadcast_enter()) {
index = find_deepest_state(drv, dev, target_state->exit_latency,
index = find_deepest_state(drv, dev, target_state->exit_latency_ns,
CPUIDLE_FLAG_TIMER_STOP, false);
if (index < 0) {
default_idle_call();
......@@ -247,7 +249,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
local_irq_enable();
if (entered_state >= 0) {
s64 diff, delay = drv->states[entered_state].exit_latency;
s64 diff, delay = drv->states[entered_state].exit_latency_ns;
int i;
/*
......@@ -255,18 +257,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
* This can be moved to within driver enter routine,
* but that results in multiple copies of same code.
*/
diff = ktime_us_delta(time_end, time_start);
if (diff > INT_MAX)
diff = INT_MAX;
diff = ktime_sub(time_end, time_start);
dev->last_residency = (int)diff;
dev->states_usage[entered_state].time += dev->last_residency;
dev->last_residency_ns = diff;
dev->states_usage[entered_state].time_ns += diff;
dev->states_usage[entered_state].usage++;
if (diff < drv->states[entered_state].target_residency) {
if (diff < drv->states[entered_state].target_residency_ns) {
for (i = entered_state - 1; i >= 0; i--) {
if (drv->states[i].disabled ||
dev->states_usage[i].disable)
if (dev->states_usage[i].disable)
continue;
/* Shallower states are enabled, so update. */
......@@ -275,22 +274,21 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
}
} else if (diff > delay) {
for (i = entered_state + 1; i < drv->state_count; i++) {
if (drv->states[i].disabled ||
dev->states_usage[i].disable)
if (dev->states_usage[i].disable)
continue;
/*
* Update if a deeper state would have been a
* better match for the observed idle duration.
*/
if (diff - delay >= drv->states[i].target_residency)
if (diff - delay >= drv->states[i].target_residency_ns)
dev->states_usage[entered_state].below++;
break;
}
}
} else {
dev->last_residency = 0;
dev->last_residency_ns = 0;
}
return entered_state;
......@@ -380,10 +378,10 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv,
limit_ns = TICK_NSEC;
for (i = 1; i < drv->state_count; i++) {
if (drv->states[i].disabled || dev->states_usage[i].disable)
if (dev->states_usage[i].disable)
continue;
limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
limit_ns = (u64)drv->states[i].target_residency_ns;
}
dev->poll_limit_ns = limit_ns;
......@@ -554,7 +552,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
static void __cpuidle_device_init(struct cpuidle_device *dev)
{
memset(dev->states_usage, 0, sizeof(dev->states_usage));
dev->last_residency = 0;
dev->last_residency_ns = 0;
dev->next_hrtimer = 0;
}
......@@ -567,12 +565,16 @@ static void __cpuidle_device_init(struct cpuidle_device *dev)
*/
static int __cpuidle_register_device(struct cpuidle_device *dev)
{
int ret;
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int i, ret;
if (!try_module_get(drv->owner))
return -EINVAL;
for (i = 0; i < drv->state_count; i++)
if (drv->states[i].disabled)
dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER;
per_cpu(cpuidle_devices, dev->cpu) = dev;
list_add(&dev->device_list, &cpuidle_detected_devices);
......
......@@ -62,24 +62,23 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
* __cpuidle_set_driver - set per CPU driver variables for the given driver.
* @drv: a valid pointer to a struct cpuidle_driver
*
* For each CPU in the driver's cpumask, unset the registered driver per CPU
* to @drv.
*
* Returns 0 on success, -EBUSY if the CPUs have driver(s) already.
* Returns 0 on success, -EBUSY if any CPU in the cpumask have a driver
* different from drv already.
*/
static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
{
int cpu;
for_each_cpu(cpu, drv->cpumask) {
struct cpuidle_driver *old_drv;
if (__cpuidle_get_cpu_driver(cpu)) {
__cpuidle_unset_driver(drv);
old_drv = __cpuidle_get_cpu_driver(cpu);
if (old_drv && old_drv != drv)
return -EBUSY;
}
for_each_cpu(cpu, drv->cpumask)
per_cpu(cpuidle_drivers, cpu) = drv;
}
return 0;
}
......@@ -166,16 +165,27 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
if (!drv->cpumask)
drv->cpumask = (struct cpumask *)cpu_possible_mask;
for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
/*
* Look for the timer stop flag in the different states, so that we know
* if the broadcast timer has to be set up. The loop is in the reverse
* order, because usually one of the deeper states have this flag set.
* Look for the timer stop flag in the different states and if
* it is found, indicate that the broadcast timer has to be set
* up.
*/
for (i = drv->state_count - 1; i >= 0 ; i--) {
if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) {
if (s->flags & CPUIDLE_FLAG_TIMER_STOP)
drv->bctimer = 1;
break;
}
/*
* The core will use the target residency and exit latency
* values in nanoseconds, but allow drivers to provide them in
* microseconds too.
*/
if (s->target_residency > 0)
s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
if (s->exit_latency > 0)
s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
}
}
......@@ -379,3 +389,31 @@ void cpuidle_driver_unref(void)
spin_unlock(&cpuidle_driver_lock);
}
/**
* cpuidle_driver_state_disabled - Disable or enable an idle state
* @drv: cpuidle driver owning the state
* @idx: State index
* @disable: Whether or not to disable the state
*/
void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
bool disable)
{
unsigned int cpu;
mutex_lock(&cpuidle_lock);
for_each_cpu(cpu, drv->cpumask) {
struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
if (!dev)
continue;
if (disable)
dev->states_usage[idx].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER;
else
dev->states_usage[idx].disable &= ~CPUIDLE_STATE_DISABLED_BY_DRIVER;
}
mutex_unlock(&cpuidle_lock);
}
......@@ -107,11 +107,14 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
* cpuidle_governor_latency_req - Compute a latency constraint for CPU
* @cpu: Target CPU
*/
int cpuidle_governor_latency_req(unsigned int cpu)
s64 cpuidle_governor_latency_req(unsigned int cpu)
{
int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
struct device *device = get_cpu_device(cpu);
int device_req = dev_pm_qos_raw_resume_latency(device);
return device_req < global_req ? device_req : global_req;
if (device_req > global_req)
device_req = global_req;
return (s64)device_req * NSEC_PER_USEC;
}
......@@ -49,7 +49,7 @@ static int haltpoll_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev,
bool *stop_tick)
{
int latency_req = cpuidle_governor_latency_req(dev->cpu);
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
if (!drv->state_count || latency_req == 0) {
*stop_tick = false;
......@@ -75,10 +75,9 @@ static int haltpoll_select(struct cpuidle_driver *drv,
return 0;
}
static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us)
static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
{
unsigned int val;
u64 block_ns = block_us*NSEC_PER_USEC;
/* Grow cpu_halt_poll_us if
* cpu_halt_poll_us < block_ns < guest_halt_poll_us
......@@ -115,7 +114,7 @@ static void haltpoll_reflect(struct cpuidle_device *dev, int index)
dev->last_state_idx = index;
if (index != 0)
adjust_poll_limit(dev, dev->last_residency);
adjust_poll_limit(dev, dev->last_residency_ns);
}
/**
......
......@@ -27,8 +27,8 @@ struct ladder_device_state {
struct {
u32 promotion_count;
u32 demotion_count;
u32 promotion_time;
u32 demotion_time;
u64 promotion_time_ns;
u64 demotion_time_ns;
} threshold;
struct {
int promotion_count;
......@@ -68,9 +68,10 @@ static int ladder_select_state(struct cpuidle_driver *drv,
{
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct ladder_device_state *last_state;
int last_residency, last_idx = dev->last_state_idx;
int last_idx = dev->last_state_idx;
int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
int latency_req = cpuidle_governor_latency_req(dev->cpu);
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
s64 last_residency;
/* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0)) {
......@@ -80,14 +81,13 @@ static int ladder_select_state(struct cpuidle_driver *drv,
last_state = &ldev->states[last_idx];
last_residency = dev->last_residency - drv->states[last_idx].exit_latency;
last_residency = dev->last_residency_ns - drv->states[last_idx].exit_latency_ns;
/* consider promotion */
if (last_idx < drv->state_count - 1 &&
!drv->states[last_idx + 1].disabled &&
!dev->states_usage[last_idx + 1].disable &&
last_residency > last_state->threshold.promotion_time &&
drv->states[last_idx + 1].exit_latency <= latency_req) {
last_residency > last_state->threshold.promotion_time_ns &&
drv->states[last_idx + 1].exit_latency_ns <= latency_req) {
last_state->stats.promotion_count++;
last_state->stats.demotion_count = 0;
if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
......@@ -98,13 +98,12 @@ static int ladder_select_state(struct cpuidle_driver *drv,
/* consider demotion */
if (last_idx > first_idx &&
(drv->states[last_idx].disabled ||
dev->states_usage[last_idx].disable ||
drv->states[last_idx].exit_latency > latency_req)) {
(dev->states_usage[last_idx].disable ||
drv->states[last_idx].exit_latency_ns > latency_req)) {
int i;
for (i = last_idx - 1; i > first_idx; i--) {
if (drv->states[i].exit_latency <= latency_req)
if (drv->states[i].exit_latency_ns <= latency_req)
break;
}
ladder_do_selection(dev, ldev, last_idx, i);
......@@ -112,7 +111,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
}
if (last_idx > first_idx &&
last_residency < last_state->threshold.demotion_time) {
last_residency < last_state->threshold.demotion_time_ns) {
last_state->stats.demotion_count++;
last_state->stats.promotion_count = 0;
if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
......@@ -152,9 +151,9 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
lstate->threshold.demotion_count = DEMOTION_COUNT;
if (i < drv->state_count - 1)
lstate->threshold.promotion_time = state->exit_latency;
lstate->threshold.promotion_time_ns = state->exit_latency_ns;
if (i > first_idx)
lstate->threshold.demotion_time = state->exit_latency;
lstate->threshold.demotion_time_ns = state->exit_latency_ns;
}
return 0;
......
......@@ -19,22 +19,12 @@
#include <linux/sched/stat.h>
#include <linux/math64.h>
/*
* Please note when changing the tuning values:
* If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
* a scaling operation multiplication may overflow on 32 bit platforms.
* In that case, #define RESOLUTION as ULL to get 64 bit result:
* #define RESOLUTION 1024ULL
*
* The default values do not overflow.
*/
#define BUCKETS 12
#define INTERVAL_SHIFT 3
#define INTERVALS (1UL << INTERVAL_SHIFT)
#define RESOLUTION 1024
#define DECAY 8
#define MAX_INTERESTING 50000
#define MAX_INTERESTING (50000 * NSEC_PER_USEC)
/*
* Concepts and ideas behind the menu governor
......@@ -120,14 +110,14 @@ struct menu_device {
int needs_update;
int tick_wakeup;
unsigned int next_timer_us;
u64 next_timer_ns;
unsigned int bucket;
unsigned int correction_factor[BUCKETS];
unsigned int intervals[INTERVALS];
int interval_ptr;
};
static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
{
int bucket = 0;
......@@ -140,15 +130,15 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters
if (nr_iowaiters)
bucket = BUCKETS/2;
if (duration < 10)
if (duration_ns < 10ULL * NSEC_PER_USEC)
return bucket;
if (duration < 100)
if (duration_ns < 100ULL * NSEC_PER_USEC)
return bucket + 1;
if (duration < 1000)
if (duration_ns < 1000ULL * NSEC_PER_USEC)
return bucket + 2;
if (duration < 10000)
if (duration_ns < 10000ULL * NSEC_PER_USEC)
return bucket + 3;
if (duration < 100000)
if (duration_ns < 100000ULL * NSEC_PER_USEC)
return bucket + 4;
return bucket + 5;
}
......@@ -276,13 +266,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
{
struct menu_device *data = this_cpu_ptr(&menu_devices);
int latency_req = cpuidle_governor_latency_req(dev->cpu);
int i;
int idx;
unsigned int interactivity_req;
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
unsigned int predicted_us;
u64 predicted_ns;
u64 interactivity_req;
unsigned long nr_iowaiters;
ktime_t delta_next;
int i, idx;
if (data->needs_update) {
menu_update(drv, dev);
......@@ -290,15 +280,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
/* determine the expected residency time, round up */
data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next);
nr_iowaiters = nr_iowait_cpu(dev->cpu);
data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
((data->next_timer_us < drv->states[1].target_residency ||
latency_req < drv->states[1].exit_latency) &&
!drv->states[0].disabled && !dev->states_usage[0].disable)) {
((data->next_timer_ns < drv->states[1].target_residency_ns ||
latency_req < drv->states[1].exit_latency_ns) &&
!dev->states_usage[0].disable)) {
/*
* In this case state[0] will be used no matter what, so return
* it right away and keep the tick running if state[0] is a
......@@ -308,18 +298,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
return 0;
}
/*
* Force the result of multiplication to be 64 bits even if both
* operands are 32 bits.
* Make sure to round up for half microseconds.
*/
predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
data->correction_factor[data->bucket],
RESOLUTION * DECAY);
/*
* Use the lowest expected idle interval to pick the idle state.
*/
predicted_us = min(predicted_us, get_typical_interval(data, predicted_us));
/* Round up the result for half microseconds. */
predicted_us = div_u64(data->next_timer_ns *
data->correction_factor[data->bucket] +
(RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
RESOLUTION * DECAY * NSEC_PER_USEC);
/* Use the lowest expected idle interval to pick the idle state. */
predicted_ns = (u64)min(predicted_us,
get_typical_interval(data, predicted_us)) *
NSEC_PER_USEC;
if (tick_nohz_tick_stopped()) {
/*
......@@ -330,14 +317,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* the known time till the closest timer event for the idle
* state selection.
*/
if (predicted_us < TICK_USEC)
predicted_us = ktime_to_us(delta_next);
if (predicted_ns < TICK_NSEC)
predicted_ns = delta_next;
} else {
/*
* Use the performance multiplier and the user-configurable
* latency_req to determine the maximum exit latency.
*/
interactivity_req = predicted_us / performance_multiplier(nr_iowaiters);
interactivity_req = div64_u64(predicted_ns,
performance_multiplier(nr_iowaiters));
if (latency_req > interactivity_req)
latency_req = interactivity_req;
}
......@@ -349,27 +337,26 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
idx = -1;
for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable)
if (dev->states_usage[i].disable)
continue;
if (idx == -1)
idx = i; /* first enabled state */
if (s->target_residency > predicted_us) {
if (s->target_residency_ns > predicted_ns) {
/*
* Use a physical idle state, not busy polling, unless
* a timer is going to trigger soon enough.
*/
if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
s->exit_latency <= latency_req &&
s->target_residency <= data->next_timer_us) {
predicted_us = s->target_residency;
s->exit_latency_ns <= latency_req &&
s->target_residency_ns <= data->next_timer_ns) {
predicted_ns = s->target_residency_ns;
idx = i;
break;
}
if (predicted_us < TICK_USEC)
if (predicted_ns < TICK_NSEC)
break;
if (!tick_nohz_tick_stopped()) {
......@@ -379,7 +366,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* tick in that case and let the governor run
* again in the next iteration of the loop.
*/
predicted_us = drv->states[idx].target_residency;
predicted_ns = drv->states[idx].target_residency_ns;
break;
}
......@@ -389,13 +376,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* closest timer event, select this one to avoid getting
* stuck in the shallow one for too long.
*/
if (drv->states[idx].target_residency < TICK_USEC &&
s->target_residency <= ktime_to_us(delta_next))
if (drv->states[idx].target_residency_ns < TICK_NSEC &&
s->target_residency_ns <= delta_next)
idx = i;
return idx;
}
if (s->exit_latency > latency_req)
if (s->exit_latency_ns > latency_req)
break;
idx = i;
......@@ -409,12 +396,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* expected idle duration is shorter than the tick period length.
*/
if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
unsigned int delta_next_us = ktime_to_us(delta_next);
predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
*stop_tick = false;
if (idx > 0 && drv->states[idx].target_residency > delta_next_us) {
if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) {
/*
* The tick is not going to be stopped and the target
* residency of the state to be returned is not within
......@@ -422,12 +407,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* tick, so try to correct that.
*/
for (i = idx - 1; i >= 0; i--) {
if (drv->states[i].disabled ||
dev->states_usage[i].disable)
if (dev->states_usage[i].disable)
continue;
idx = i;
if (drv->states[i].target_residency <= delta_next_us)
if (drv->states[i].target_residency_ns <= delta_next)
break;
}
}
......@@ -463,7 +447,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
struct menu_device *data = this_cpu_ptr(&menu_devices);
int last_idx = dev->last_state_idx;
struct cpuidle_state *target = &drv->states[last_idx];
unsigned int measured_us;
u64 measured_ns;
unsigned int new_factor;
/*
......@@ -481,7 +465,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* assume the state was never reached and the exit latency is 0.
*/
if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
if (data->tick_wakeup && data->next_timer_ns > TICK_NSEC) {
/*
* The nohz code said that there wouldn't be any events within
* the tick boundary (if the tick was stopped), but the idle
......@@ -491,7 +475,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* have been idle long (but not forever) to help the idle
* duration predictor do a better job next time.
*/
measured_us = 9 * MAX_INTERESTING / 10;
measured_ns = 9 * MAX_INTERESTING / 10;
} else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
dev->poll_time_limit) {
/*
......@@ -501,28 +485,29 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* the CPU might have been woken up from idle by the next timer.
* Assume that to be the case.
*/
measured_us = data->next_timer_us;
measured_ns = data->next_timer_ns;
} else {
/* measured value */
measured_us = dev->last_residency;
measured_ns = dev->last_residency_ns;
/* Deduct exit latency */
if (measured_us > 2 * target->exit_latency)
measured_us -= target->exit_latency;
if (measured_ns > 2 * target->exit_latency_ns)
measured_ns -= target->exit_latency_ns;
else
measured_us /= 2;
measured_ns /= 2;
}
/* Make sure our coefficients do not exceed unity */
if (measured_us > data->next_timer_us)
measured_us = data->next_timer_us;
if (measured_ns > data->next_timer_ns)
measured_ns = data->next_timer_ns;
/* Update our correction ratio */
new_factor = data->correction_factor[data->bucket];
new_factor -= new_factor / DECAY;
if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
new_factor += RESOLUTION * measured_us / data->next_timer_us;
if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
new_factor += div64_u64(RESOLUTION * measured_ns,
data->next_timer_ns);
else
/*
* we were idle so long that we count it as a perfect
......@@ -542,7 +527,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->correction_factor[data->bucket] = new_factor;
/* update the repeating-pattern data */
data->intervals[data->interval_ptr++] = measured_us;
data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns);
if (data->interval_ptr >= INTERVALS)
data->interval_ptr = 0;
}
......
......@@ -104,7 +104,7 @@ struct teo_cpu {
u64 sleep_length_ns;
struct teo_idle_state states[CPUIDLE_STATE_MAX];
int interval_idx;
unsigned int intervals[INTERVALS];
u64 intervals[INTERVALS];
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
......@@ -117,9 +117,8 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
unsigned int sleep_length_us = ktime_to_us(cpu_data->sleep_length_ns);
int i, idx_hit = -1, idx_timer = -1;
unsigned int measured_us;
u64 measured_ns;
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
/*
......@@ -127,23 +126,28 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* enough to the closest timer event expected at the idle state
* selection time to be discarded.
*/
measured_us = UINT_MAX;
measured_ns = U64_MAX;
} else {
unsigned int lat;
u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
lat = drv->states[dev->last_state_idx].exit_latency;
measured_us = ktime_to_us(cpu_data->time_span_ns);
/*
* The computations below are to determine whether or not the
* (saved) time till the next timer event and the measured idle
* duration fall into the same "bin", so use last_residency_ns
* for that instead of time_span_ns which includes the cpuidle
* overhead.
*/
measured_ns = dev->last_residency_ns;
/*
* The delay between the wakeup and the first instruction
* executed by the CPU is not likely to be worst-case every
* time, so take 1/2 of the exit latency as a very rough
* approximation of the average of it.
*/
if (measured_us >= lat)
measured_us -= lat / 2;
if (measured_ns >= lat_ns)
measured_ns -= lat_ns / 2;
else
measured_us /= 2;
measured_ns /= 2;
}
/*
......@@ -155,9 +159,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
if (drv->states[i].target_residency <= sleep_length_us) {
if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) {
idx_timer = i;
if (drv->states[i].target_residency <= measured_us)
if (drv->states[i].target_residency_ns <= measured_ns)
idx_hit = i;
}
}
......@@ -193,30 +197,35 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* Save idle duration values corresponding to non-timer wakeups for
* pattern detection.
*/
cpu_data->intervals[cpu_data->interval_idx++] = measured_us;
cpu_data->intervals[cpu_data->interval_idx++] = measured_ns;
if (cpu_data->interval_idx > INTERVALS)
cpu_data->interval_idx = 0;
}
static bool teo_time_ok(u64 interval_ns)
{
return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
}
/**
* teo_find_shallower_state - Find shallower idle state matching given duration.
* @drv: cpuidle driver containing state data.
* @dev: Target CPU.
* @state_idx: Index of the capping idle state.
* @duration_us: Idle duration value to match.
* @duration_ns: Idle duration value to match.
*/
static int teo_find_shallower_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int state_idx,
unsigned int duration_us)
u64 duration_ns)
{
int i;
for (i = state_idx - 1; i >= 0; i--) {
if (drv->states[i].disabled || dev->states_usage[i].disable)
if (dev->states_usage[i].disable)
continue;
state_idx = i;
if (drv->states[i].target_residency <= duration_us)
if (drv->states[i].target_residency_ns <= duration_ns)
break;
}
return state_idx;
......@@ -232,9 +241,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
int latency_req = cpuidle_governor_latency_req(dev->cpu);
unsigned int duration_us, count;
int max_early_idx, constraint_idx, idx, i;
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
u64 duration_ns;
unsigned int hits, misses, early_hits;
int max_early_idx, prev_max_early_idx, constraint_idx, idx, i;
ktime_t delta_tick;
if (dev->last_state_idx >= 0) {
......@@ -244,50 +254,92 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
cpu_data->time_span_ns = local_clock();
cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick);
duration_us = ktime_to_us(cpu_data->sleep_length_ns);
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
cpu_data->sleep_length_ns = duration_ns;
count = 0;
hits = 0;
misses = 0;
early_hits = 0;
max_early_idx = -1;
prev_max_early_idx = -1;
constraint_idx = drv->state_count;
idx = -1;
for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable) {
if (dev->states_usage[i].disable) {
/*
* If the "early hits" metric of a disabled state is
* greater than the current maximum, it should be taken
* into account, because it would be a mistake to select
* a deeper state with lower "early hits" metric. The
* index cannot be changed to point to it, however, so
* just increase the max count alone and let the index
* still point to a shallower idle state.
* Ignore disabled states with target residencies beyond
* the anticipated idle duration.
*/
if (max_early_idx >= 0 &&
count < cpu_data->states[i].early_hits)
count = cpu_data->states[i].early_hits;
if (s->target_residency_ns > duration_ns)
continue;
/*
* This state is disabled, so the range of idle duration
* values corresponding to it is covered by the current
* candidate state, but still the "hits" and "misses"
* metrics of the disabled state need to be used to
* decide whether or not the state covering the range in
* question is good enough.
*/
hits = cpu_data->states[i].hits;
misses = cpu_data->states[i].misses;
if (early_hits >= cpu_data->states[i].early_hits ||
idx < 0)
continue;
/*
* If the current candidate state has been the one with
* the maximum "early hits" metric so far, the "early
* hits" metric of the disabled state replaces the
* current "early hits" count to avoid selecting a
* deeper state with lower "early hits" metric.
*/
if (max_early_idx == idx) {
early_hits = cpu_data->states[i].early_hits;
continue;
}
if (idx < 0)
/*
* The current candidate state is closer to the disabled
* one than the current maximum "early hits" state, so
* replace the latter with it, but in case the maximum
* "early hits" state index has not been set so far,
* check if the current candidate state is not too
* shallow for that role.
*/
if (teo_time_ok(drv->states[idx].target_residency_ns)) {
prev_max_early_idx = max_early_idx;
early_hits = cpu_data->states[i].early_hits;
max_early_idx = idx;
}
continue;
}
if (idx < 0) {
idx = i; /* first enabled state */
hits = cpu_data->states[i].hits;
misses = cpu_data->states[i].misses;
}
if (s->target_residency > duration_us)
if (s->target_residency_ns > duration_ns)
break;
if (s->exit_latency > latency_req && constraint_idx > i)
if (s->exit_latency_ns > latency_req && constraint_idx > i)
constraint_idx = i;
idx = i;
hits = cpu_data->states[i].hits;
misses = cpu_data->states[i].misses;
if (count < cpu_data->states[i].early_hits &&
!(tick_nohz_tick_stopped() &&
drv->states[i].target_residency < TICK_USEC)) {
count = cpu_data->states[i].early_hits;
if (early_hits < cpu_data->states[i].early_hits &&
teo_time_ok(drv->states[i].target_residency_ns)) {
prev_max_early_idx = max_early_idx;
early_hits = cpu_data->states[i].early_hits;
max_early_idx = i;
}
}
......@@ -300,10 +352,19 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* "early hits" metric, but if that cannot be determined, just use the
* state selected so far.
*/
if (cpu_data->states[idx].hits <= cpu_data->states[idx].misses &&
max_early_idx >= 0) {
if (hits <= misses) {
/*
* The current candidate state is not suitable, so take the one
* whose "early hits" metric is the maximum for the range of
* shallower states.
*/
if (idx == max_early_idx)
max_early_idx = prev_max_early_idx;
if (max_early_idx >= 0) {
idx = max_early_idx;
duration_us = drv->states[idx].target_residency;
duration_ns = drv->states[idx].target_residency_ns;
}
}
/*
......@@ -316,18 +377,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (idx < 0) {
idx = 0; /* No states enabled. Must use 0. */
} else if (idx > 0) {
unsigned int count = 0;
u64 sum = 0;
count = 0;
/*
* Count and sum the most recent idle duration values less than
* the current expected idle duration value.
*/
for (i = 0; i < INTERVALS; i++) {
unsigned int val = cpu_data->intervals[i];
u64 val = cpu_data->intervals[i];
if (val >= duration_us)
if (val >= duration_ns)
continue;
count++;
......@@ -339,17 +399,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* values are in the interesting range.
*/
if (count > INTERVALS / 2) {
unsigned int avg_us = div64_u64(sum, count);
u64 avg_ns = div64_u64(sum, count);
/*
* Avoid spending too much time in an idle state that
* would be too shallow.
*/
if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) {
duration_us = avg_us;
if (drv->states[idx].target_residency > avg_us)
if (teo_time_ok(avg_ns)) {
duration_ns = avg_ns;
if (drv->states[idx].target_residency_ns > avg_ns)
idx = teo_find_shallower_state(drv, dev,
idx, avg_us);
idx, avg_ns);
}
}
}
......@@ -359,9 +419,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* expected idle duration is shorter than the tick period length.
*/
if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
duration_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
unsigned int delta_tick_us = ktime_to_us(delta_tick);
duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
*stop_tick = false;
/*
......@@ -370,8 +428,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* till the closest timer including the tick, try to correct
* that.
*/
if (idx > 0 && drv->states[idx].target_residency > delta_tick_us)
idx = teo_find_shallower_state(drv, dev, idx, delta_tick_us);
if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick)
idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
}
return idx;
......@@ -415,7 +473,7 @@ static int teo_enable_device(struct cpuidle_driver *drv,
memset(cpu_data, 0, sizeof(*cpu_data));
for (i = 0; i < INTERVALS; i++)
cpu_data->intervals[i] = UINT_MAX;
cpu_data->intervals[i] = U64_MAX;
return 0;
}
......
......@@ -49,6 +49,8 @@ void cpuidle_poll_state_init(struct cpuidle_driver *drv)
snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
state->exit_latency = 0;
state->target_residency = 0;
state->exit_latency_ns = 0;
state->target_residency_ns = 0;
state->power_usage = -1;
state->enter = poll_idle;
state->disabled = false;
......
......@@ -255,25 +255,6 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
return sprintf(buf, "%u\n", state->_name);\
}
#define define_store_state_ull_function(_name) \
static ssize_t store_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, \
const char *buf, size_t size) \
{ \
unsigned long long value; \
int err; \
if (!capable(CAP_SYS_ADMIN)) \
return -EPERM; \
err = kstrtoull(buf, 0, &value); \
if (err) \
return err; \
if (value) \
state_usage->_name = 1; \
else \
state_usage->_name = 0; \
return size; \
}
#define define_show_state_ull_function(_name) \
static ssize_t show_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, \
......@@ -292,18 +273,60 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
return sprintf(buf, "%s\n", state->_name);\
}
define_show_state_function(exit_latency)
define_show_state_function(target_residency)
#define define_show_state_time_function(_name) \
static ssize_t show_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, \
char *buf) \
{ \
return sprintf(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \
}
define_show_state_time_function(exit_latency)
define_show_state_time_function(target_residency)
define_show_state_function(power_usage)
define_show_state_ull_function(usage)
define_show_state_ull_function(time)
define_show_state_str_function(name)
define_show_state_str_function(desc)
define_show_state_ull_function(disable)
define_store_state_ull_function(disable)
define_show_state_ull_function(above)
define_show_state_ull_function(below)
static ssize_t show_state_time(struct cpuidle_state *state,
struct cpuidle_state_usage *state_usage,
char *buf)
{
return sprintf(buf, "%llu\n", ktime_to_us(state_usage->time_ns));
}
static ssize_t show_state_disable(struct cpuidle_state *state,
struct cpuidle_state_usage *state_usage,
char *buf)
{
return sprintf(buf, "%llu\n",
state_usage->disable & CPUIDLE_STATE_DISABLED_BY_USER);
}
static ssize_t store_state_disable(struct cpuidle_state *state,
struct cpuidle_state_usage *state_usage,
const char *buf, size_t size)
{
unsigned int value;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
err = kstrtouint(buf, 0, &value);
if (err)
return err;
if (value)
state_usage->disable |= CPUIDLE_STATE_DISABLED_BY_USER;
else
state_usage->disable &= ~CPUIDLE_STATE_DISABLED_BY_USER;
return size;
}
define_one_state_ro(name, show_state_name);
define_one_state_ro(desc, show_state_desc);
define_one_state_ro(latency, show_state_exit_latency);
......
......@@ -184,7 +184,12 @@ void arch_cpu_idle_dead(void);
int cpu_report_state(int cpu);
int cpu_check_up_prepare(int cpu);
void cpu_set_state_online(int cpu);
void play_idle(unsigned long duration_us);
void play_idle_precise(u64 duration_ns, u64 latency_ns);
static inline void play_idle(unsigned long duration_us)
{
play_idle_precise(duration_us * NSEC_PER_USEC, U64_MAX);
}
#ifdef CONFIG_HOTPLUG_CPU
bool cpu_wait_death(unsigned int cpu, int seconds);
......
......@@ -29,10 +29,13 @@ struct cpuidle_driver;
* CPUIDLE DEVICE INTERFACE *
****************************/
#define CPUIDLE_STATE_DISABLED_BY_USER BIT(0)
#define CPUIDLE_STATE_DISABLED_BY_DRIVER BIT(1)
struct cpuidle_state_usage {
unsigned long long disable;
unsigned long long usage;
unsigned long long time; /* in US */
u64 time_ns;
unsigned long long above; /* Number of times it's been too deep */
unsigned long long below; /* Number of times it's been too shallow */
#ifdef CONFIG_SUSPEND
......@@ -45,6 +48,8 @@ struct cpuidle_state {
char name[CPUIDLE_NAME_LEN];
char desc[CPUIDLE_DESC_LEN];
u64 exit_latency_ns;
u64 target_residency_ns;
unsigned int flags;
unsigned int exit_latency; /* in US */
int power_usage; /* in mW */
......@@ -80,14 +85,14 @@ struct cpuidle_driver_kobj;
struct cpuidle_device {
unsigned int registered:1;
unsigned int enabled:1;
unsigned int use_deepest_state:1;
unsigned int poll_time_limit:1;
unsigned int cpu;
ktime_t next_hrtimer;
int last_state_idx;
int last_residency;
u64 last_residency_ns;
u64 poll_limit_ns;
u64 forced_idle_latency_limit_ns;
struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX];
struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
struct cpuidle_driver_kobj *kobj_driver;
......@@ -144,6 +149,8 @@ extern int cpuidle_register_driver(struct cpuidle_driver *drv);
extern struct cpuidle_driver *cpuidle_get_driver(void);
extern struct cpuidle_driver *cpuidle_driver_ref(void);
extern void cpuidle_driver_unref(void);
extern void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
bool disable);
extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
extern int cpuidle_register_device(struct cpuidle_device *dev);
extern void cpuidle_unregister_device(struct cpuidle_device *dev);
......@@ -181,6 +188,8 @@ static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }
static inline struct cpuidle_driver *cpuidle_driver_ref(void) {return NULL; }
static inline void cpuidle_driver_unref(void) {}
static inline void cpuidle_driver_state_disabled(struct cpuidle_driver *drv,
int idx, bool disable) { }
static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
static inline int cpuidle_register_device(struct cpuidle_device *dev)
{return -ENODEV; }
......@@ -204,18 +213,20 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }
#ifdef CONFIG_CPU_IDLE
extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev);
struct cpuidle_device *dev,
u64 latency_limit_ns);
extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
struct cpuidle_device *dev);
extern void cpuidle_use_deepest_state(bool enable);
extern void cpuidle_use_deepest_state(u64 latency_limit_ns);
#else
static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
struct cpuidle_device *dev,
u64 latency_limit_ns)
{return -ENODEV; }
static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{return -ENODEV; }
static inline void cpuidle_use_deepest_state(bool enable)
static inline void cpuidle_use_deepest_state(u64 latency_limit_ns)
{
}
#endif
......@@ -260,7 +271,7 @@ struct cpuidle_governor {
#ifdef CONFIG_CPU_IDLE
extern int cpuidle_register_governor(struct cpuidle_governor *gov);
extern int cpuidle_governor_latency_req(unsigned int cpu);
extern s64 cpuidle_governor_latency_req(unsigned int cpu);
#else
static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
{return 0;}
......
......@@ -104,7 +104,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* update no idle residency and return.
*/
if (current_clr_polling_and_test()) {
dev->last_residency = 0;
dev->last_residency_ns = 0;
local_irq_enable();
return -EBUSY;
}
......@@ -165,7 +165,9 @@ static void cpuidle_idle_call(void)
* until a proper wakeup interrupt happens.
*/
if (idle_should_enter_s2idle() || dev->use_deepest_state) {
if (idle_should_enter_s2idle() || dev->forced_idle_latency_limit_ns) {
u64 max_latency_ns;
if (idle_should_enter_s2idle()) {
rcu_idle_enter();
......@@ -176,12 +178,16 @@ static void cpuidle_idle_call(void)
}
rcu_idle_exit();
max_latency_ns = U64_MAX;
} else {
max_latency_ns = dev->forced_idle_latency_limit_ns;
}
tick_nohz_idle_stop_tick();
rcu_idle_enter();
next_state = cpuidle_find_deepest_state(drv, dev);
next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
call_cpuidle(drv, dev, next_state);
} else {
bool stop_tick = true;
......@@ -311,7 +317,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
void play_idle(unsigned long duration_us)
void play_idle_precise(u64 duration_ns, u64 latency_ns)
{
struct idle_timer it;
......@@ -323,29 +329,29 @@ void play_idle(unsigned long duration_us)
WARN_ON_ONCE(current->nr_cpus_allowed != 1);
WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
WARN_ON_ONCE(!duration_us);
WARN_ON_ONCE(!duration_ns);
rcu_sleep_check();
preempt_disable();
current->flags |= PF_IDLE;
cpuidle_use_deepest_state(true);
cpuidle_use_deepest_state(latency_ns);
it.done = 0;
hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
it.timer.function = idle_inject_timer_fn;
hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC),
hrtimer_start(&it.timer, ns_to_ktime(duration_ns),
HRTIMER_MODE_REL_PINNED);
while (!READ_ONCE(it.done))
do_idle();
cpuidle_use_deepest_state(false);
cpuidle_use_deepest_state(0);
current->flags &= ~PF_IDLE;
preempt_fold_need_resched();
preempt_enable();
}
EXPORT_SYMBOL_GPL(play_idle);
EXPORT_SYMBOL_GPL(play_idle_precise);
void cpu_startup_entry(enum cpuhp_state state)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment