Commit 4dcb78ee authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

intel_idle: Introduce 'states_off' module parameter

In certain system configurations it may not be desirable to use some
C-states assumed to be available by intel_idle and the driver needs
to be prevented from using them even before the cpuidle sysfs
interface becomes accessible to user space.  Currently, the only way
to achieve that is by setting the 'max_cstate' module parameter to a
value lower than the index of the shallowest of the C-states in
question, but that may be overly intrusive, because it effectively
makes all of the idle states deeper than the 'max_cstate' one go
away (and the C-state to avoid may be in the middle of the range
normally regarded as available).

To allow that limitation to be overcome, introduce a new module
parameter called 'states_off' to represent a list of idle states to
be disabled by default in the form of a bitmask and update the
documentation to cover it.
Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
parent 3a5be9b8
...@@ -168,7 +168,7 @@ and ``idle=nomwait``. If any of them is present in the kernel command line, the ...@@ -168,7 +168,7 @@ and ``idle=nomwait``. If any of them is present in the kernel command line, the
``MWAIT`` instruction is not allowed to be used, so the initialization of ``MWAIT`` instruction is not allowed to be used, so the initialization of
``intel_idle`` will fail. ``intel_idle`` will fail.
Apart from that there are three module parameters recognized by ``intel_idle`` Apart from that there are four module parameters recognized by ``intel_idle``
itself that can be set via the kernel command line (they cannot be updated via itself that can be set via the kernel command line (they cannot be updated via
sysfs, so that is the only way to change their values). sysfs, so that is the only way to change their values).
...@@ -195,6 +195,23 @@ driver ignore the system's ACPI tables entirely or use them for all of the ...@@ -195,6 +195,23 @@ driver ignore the system's ACPI tables entirely or use them for all of the
recognized processor models, respectively (they both are unset by default and recognized processor models, respectively (they both are unset by default and
``use_acpi`` has no effect if ``no_acpi`` is set). ``use_acpi`` has no effect if ``no_acpi`` is set).
The value of the ``states_off`` module parameter (0 by default) represents a
list of idle states to be disabled by default in the form of a bitmask.
Namely, the positions of the bits that are set in the ``states_off`` value are
the indices of idle states to be disabled by default (as reflected by the names
of the corresponding idle state directories in ``sysfs``, :file:`state0`,
:file:`state1` ... :file:`state<i>` ..., where ``<i>`` is the index of the given
idle state; see :ref:`idle-states-representation` in :doc:`cpuidle`).
For example, if ``states_off`` is equal to 3, the driver will disable idle
states 0 and 1 by default, and if it is equal to 8, idle state 3 will be
disabled by default and so on (bit positions beyond the maximum idle state index
are ignored).
The idle states disabled this way can be enabled (on a per-CPU basis) from user
space via ``sysfs``.
.. _intel-idle-core-and-package-idle-states: .. _intel-idle-core-and-package-idle-states:
......
...@@ -63,6 +63,7 @@ static struct cpuidle_driver intel_idle_driver = { ...@@ -63,6 +63,7 @@ static struct cpuidle_driver intel_idle_driver = {
}; };
/* intel_idle.max_cstate=0 disables driver */ /* intel_idle.max_cstate=0 disables driver */
static int max_cstate = CPUIDLE_STATE_MAX - 1; static int max_cstate = CPUIDLE_STATE_MAX - 1;
static unsigned int disabled_states_mask;
static unsigned int mwait_substates; static unsigned int mwait_substates;
...@@ -1234,6 +1235,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) ...@@ -1234,6 +1235,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
if (cx->type > ACPI_STATE_C2) if (cx->type > ACPI_STATE_C2)
state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
if (disabled_states_mask & BIT(cstate))
state->flags |= CPUIDLE_FLAG_OFF;
state->enter = intel_idle; state->enter = intel_idle;
state->enter_s2idle = intel_idle_s2idle; state->enter_s2idle = intel_idle_s2idle;
} }
...@@ -1466,9 +1470,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) ...@@ -1466,9 +1470,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
/* Structure copy. */ /* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate]; drv->states[drv->state_count] = cpuidle_state_table[cstate];
if ((icpu->use_acpi || force_use_acpi) && if ((disabled_states_mask & BIT(drv->state_count)) ||
intel_idle_off_by_default(mwait_hint) && ((icpu->use_acpi || force_use_acpi) &&
!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)) intel_idle_off_by_default(mwait_hint) &&
!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
drv->state_count++; drv->state_count++;
...@@ -1487,6 +1492,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) ...@@ -1487,6 +1492,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
{ {
cpuidle_poll_state_init(drv); cpuidle_poll_state_init(drv);
if (disabled_states_mask & BIT(0))
drv->states[0].flags |= CPUIDLE_FLAG_OFF;
drv->state_count = 1; drv->state_count = 1;
if (icpu) if (icpu)
...@@ -1667,3 +1676,11 @@ device_initcall(intel_idle_init); ...@@ -1667,3 +1676,11 @@ device_initcall(intel_idle_init);
* is the easiest way (currently) to continue doing that. * is the easiest way (currently) to continue doing that.
*/ */
module_param(max_cstate, int, 0444); module_param(max_cstate, int, 0444);
/*
* The positions of the bits that are set in this number are the indices of the
* idle states to be disabled by default (as reflected by the names of the
* corresponding idle state directories in sysfs, "state0", "state1" ...
* "state<i>" ..., where <i> is the index of the given state).
*/
module_param_named(states_off, disabled_states_mask, uint, 0444);
MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment