Commit 33068b61 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge branches 'pm-cpufreq' and 'pm-cpuidle'

* pm-cpufreq:
  cpufreq: governor: Always schedule work on the CPU running update
  cpufreq: Always update current frequency before startig governor
  cpufreq: Introduce cpufreq_update_current_freq()
  cpufreq: Introduce cpufreq_start_governor()
  cpufreq: powernv: Add sysfs attributes to show throttle stats
  cpufreq: acpi-cpufreq: make Intel/AMD MSR access, io port access static
  cpufreq: powernv: Define per_cpu chip pointer to optimize hot-path
  cpufreq: acpi-cpufreq: Clean up hot plug notifier callback
  intel_pstate: Do not call wrmsrl_on_cpu() with disabled interrupts
  cpufreq: Make cpufreq_quick_get() safe to call

* pm-cpuidle:
  intel_idle: Support for Intel Xeon Phi Processor x200 Product Family
  intel_idle: prevent SKL-H boot failure when C8+C9+C10 enabled
  cpuidle: menu: Fall back to polling if next timer event is near
  cpuidle: menu: use high confidence factors only when considering polling
...@@ -271,3 +271,72 @@ Description: Parameters for the CPU cache attributes ...@@ -271,3 +271,72 @@ Description: Parameters for the CPU cache attributes
- WriteBack: data is written only to the cache line and - WriteBack: data is written only to the cache line and
the modified cache line is written to main the modified cache line is written to main
memory only when it is replaced memory only when it is replaced
What: /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub_turbo_stat
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/unthrottle
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/powercap
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overtemp
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/supply_fault
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overcurrent
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset
Date: March 2016
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
Description: POWERNV CPUFreq driver's frequency throttle stats directory and
attributes
'cpuX/cpufreq/throttle_stats' directory contains the CPU frequency
throttle stat attributes for the chip. The throttle stats of a cpu
is common across all the cpus belonging to a chip. Below are the
throttle attributes exported in the 'throttle_stats' directory:
- turbo_stat : This file gives the total number of times the max
frequency is throttled to lower frequency in turbo (at and above
nominal frequency) range of frequencies.
- sub_turbo_stat : This file gives the total number of times the
max frequency is throttled to lower frequency in sub-turbo(below
nominal frequency) range of frequencies.
- unthrottle : This file gives the total number of times the max
frequency is unthrottled after being throttled.
- powercap : This file gives the total number of times the max
frequency is throttled due to 'Power Capping'.
- overtemp : This file gives the total number of times the max
frequency is throttled due to 'CPU Over Temperature'.
- supply_fault : This file gives the total number of times the
max frequency is throttled due to 'Power Supply Failure'.
- overcurrent : This file gives the total number of times the
max frequency is throttled due to 'Overcurrent'.
- occ_reset : This file gives the total number of times the max
frequency is throttled due to 'OCC Reset'.
The sysfs attributes representing different throttle reasons like
powercap, overtemp, supply_fault, overcurrent and occ_reset map to
the reasons provided by OCC firmware for throttling the frequency.
What: /sys/devices/system/cpu/cpufreq/policyX/throttle_stats
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/turbo_stat
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/sub_turbo_stat
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/unthrottle
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/powercap
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/overtemp
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/supply_fault
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/overcurrent
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/occ_reset
Date: March 2016
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
Description: POWERNV CPUFreq driver's frequency throttle stats directory and
attributes
'policyX/throttle_stats' directory and all the attributes are same as
the /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats directory and
attributes which give the frequency throttle information of the chip.
...@@ -245,7 +245,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) ...@@ -245,7 +245,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
} }
} }
u32 cpu_freq_read_intel(struct acpi_pct_register *not_used) static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
{ {
u32 val, dummy; u32 val, dummy;
...@@ -253,7 +253,7 @@ u32 cpu_freq_read_intel(struct acpi_pct_register *not_used) ...@@ -253,7 +253,7 @@ u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
return val; return val;
} }
void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val) static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
{ {
u32 lo, hi; u32 lo, hi;
...@@ -262,7 +262,7 @@ void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val) ...@@ -262,7 +262,7 @@ void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
wrmsr(MSR_IA32_PERF_CTL, lo, hi); wrmsr(MSR_IA32_PERF_CTL, lo, hi);
} }
u32 cpu_freq_read_amd(struct acpi_pct_register *not_used) static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
{ {
u32 val, dummy; u32 val, dummy;
...@@ -270,12 +270,12 @@ u32 cpu_freq_read_amd(struct acpi_pct_register *not_used) ...@@ -270,12 +270,12 @@ u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
return val; return val;
} }
void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val) static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
{ {
wrmsr(MSR_AMD_PERF_CTL, val, 0); wrmsr(MSR_AMD_PERF_CTL, val, 0);
} }
u32 cpu_freq_read_io(struct acpi_pct_register *reg) static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
{ {
u32 val; u32 val;
...@@ -283,7 +283,7 @@ u32 cpu_freq_read_io(struct acpi_pct_register *reg) ...@@ -283,7 +283,7 @@ u32 cpu_freq_read_io(struct acpi_pct_register *reg)
return val; return val;
} }
void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val) static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
{ {
acpi_os_write_port(reg->address, val, reg->bit_width); acpi_os_write_port(reg->address, val, reg->bit_width);
} }
...@@ -514,8 +514,10 @@ static int boost_notify(struct notifier_block *nb, unsigned long action, ...@@ -514,8 +514,10 @@ static int boost_notify(struct notifier_block *nb, unsigned long action,
*/ */
switch (action) { switch (action) {
case CPU_UP_PREPARE: case CPU_DOWN_FAILED:
case CPU_UP_PREPARE_FROZEN: case CPU_DOWN_FAILED_FROZEN:
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask); boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
break; break;
......
...@@ -76,6 +76,7 @@ static inline bool has_target(void) ...@@ -76,6 +76,7 @@ static inline bool has_target(void)
/* internal prototypes */ /* internal prototypes */
static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
static unsigned int __cpufreq_get(struct cpufreq_policy *policy); static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
static int cpufreq_start_governor(struct cpufreq_policy *policy);
/** /**
* Two notifier lists: the "policy" list is involved in the * Two notifier lists: the "policy" list is involved in the
...@@ -964,10 +965,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp ...@@ -964,10 +965,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
cpumask_set_cpu(cpu, policy->cpus); cpumask_set_cpu(cpu, policy->cpus);
if (has_target()) { if (has_target()) {
ret = cpufreq_governor(policy, CPUFREQ_GOV_START); ret = cpufreq_start_governor(policy);
if (!ret)
ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
if (ret) if (ret)
pr_err("%s: Failed to start governor\n", __func__); pr_err("%s: Failed to start governor\n", __func__);
} }
...@@ -1308,10 +1306,7 @@ static void cpufreq_offline(unsigned int cpu) ...@@ -1308,10 +1306,7 @@ static void cpufreq_offline(unsigned int cpu)
/* Start governor again for active policy */ /* Start governor again for active policy */
if (!policy_is_inactive(policy)) { if (!policy_is_inactive(policy)) {
if (has_target()) { if (has_target()) {
ret = cpufreq_governor(policy, CPUFREQ_GOV_START); ret = cpufreq_start_governor(policy);
if (!ret)
ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
if (ret) if (ret)
pr_err("%s: Failed to start governor\n", __func__); pr_err("%s: Failed to start governor\n", __func__);
} }
...@@ -1401,9 +1396,17 @@ unsigned int cpufreq_quick_get(unsigned int cpu) ...@@ -1401,9 +1396,17 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
{ {
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
unsigned int ret_freq = 0; unsigned int ret_freq = 0;
unsigned long flags;
if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) read_lock_irqsave(&cpufreq_driver_lock, flags);
return cpufreq_driver->get(cpu);
if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) {
ret_freq = cpufreq_driver->get(cpu);
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
return ret_freq;
}
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
policy = cpufreq_cpu_get(cpu); policy = cpufreq_cpu_get(cpu);
if (policy) { if (policy) {
...@@ -1484,6 +1487,24 @@ unsigned int cpufreq_get(unsigned int cpu) ...@@ -1484,6 +1487,24 @@ unsigned int cpufreq_get(unsigned int cpu)
} }
EXPORT_SYMBOL(cpufreq_get); EXPORT_SYMBOL(cpufreq_get);
static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy)
{
unsigned int new_freq;
new_freq = cpufreq_driver->get(policy->cpu);
if (!new_freq)
return 0;
if (!policy->cur) {
pr_debug("cpufreq: Driver did not initialize current freq\n");
policy->cur = new_freq;
} else if (policy->cur != new_freq && has_target()) {
cpufreq_out_of_sync(policy, new_freq);
}
return new_freq;
}
static struct subsys_interface cpufreq_interface = { static struct subsys_interface cpufreq_interface = {
.name = "cpufreq", .name = "cpufreq",
.subsys = &cpu_subsys, .subsys = &cpu_subsys,
...@@ -1583,9 +1604,7 @@ void cpufreq_resume(void) ...@@ -1583,9 +1604,7 @@ void cpufreq_resume(void)
policy); policy);
} else { } else {
down_write(&policy->rwsem); down_write(&policy->rwsem);
ret = cpufreq_governor(policy, CPUFREQ_GOV_START); ret = cpufreq_start_governor(policy);
if (!ret)
cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
up_write(&policy->rwsem); up_write(&policy->rwsem);
if (ret) if (ret)
...@@ -1593,17 +1612,6 @@ void cpufreq_resume(void) ...@@ -1593,17 +1612,6 @@ void cpufreq_resume(void)
__func__, policy); __func__, policy);
} }
} }
/*
* schedule call cpufreq_update_policy() for first-online CPU, as that
* wouldn't be hotplugged-out on suspend. It will verify that the
* current freq is in sync with what we believe it to be.
*/
policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask));
if (WARN_ON(!policy))
return;
schedule_work(&policy->update);
} }
/** /**
...@@ -1927,6 +1935,17 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) ...@@ -1927,6 +1935,17 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
return ret; return ret;
} }
static int cpufreq_start_governor(struct cpufreq_policy *policy)
{
int ret;
if (cpufreq_driver->get && !cpufreq_driver->setpolicy)
cpufreq_update_current_freq(policy);
ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
}
int cpufreq_register_governor(struct cpufreq_governor *governor) int cpufreq_register_governor(struct cpufreq_governor *governor)
{ {
int err; int err;
...@@ -2063,8 +2082,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, ...@@ -2063,8 +2082,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
return cpufreq_driver->setpolicy(new_policy); return cpufreq_driver->setpolicy(new_policy);
} }
if (new_policy->governor == policy->governor) if (new_policy->governor == policy->governor) {
goto out; pr_debug("cpufreq: governor limits update\n");
return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
}
pr_debug("governor switch\n"); pr_debug("governor switch\n");
...@@ -2092,10 +2113,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, ...@@ -2092,10 +2113,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
policy->governor = new_policy->governor; policy->governor = new_policy->governor;
ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
if (!ret) { if (!ret) {
ret = cpufreq_governor(policy, CPUFREQ_GOV_START); ret = cpufreq_start_governor(policy);
if (!ret) if (!ret) {
goto out; pr_debug("cpufreq: governor change\n");
return 0;
}
cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
} }
...@@ -2106,14 +2128,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, ...@@ -2106,14 +2128,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
policy->governor = NULL; policy->governor = NULL;
else else
cpufreq_governor(policy, CPUFREQ_GOV_START); cpufreq_start_governor(policy);
} }
return ret; return ret;
out:
pr_debug("governor: change or update limits\n");
return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
} }
/** /**
...@@ -2144,19 +2162,11 @@ int cpufreq_update_policy(unsigned int cpu) ...@@ -2144,19 +2162,11 @@ int cpufreq_update_policy(unsigned int cpu)
* -> ask driver for current freq and notify governors about a change * -> ask driver for current freq and notify governors about a change
*/ */
if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
new_policy.cur = cpufreq_driver->get(cpu); new_policy.cur = cpufreq_update_current_freq(policy);
if (WARN_ON(!new_policy.cur)) { if (WARN_ON(!new_policy.cur)) {
ret = -EIO; ret = -EIO;
goto unlock; goto unlock;
} }
if (!policy->cur) {
pr_debug("Driver did not initialize current freq\n");
policy->cur = new_policy.cur;
} else {
if (policy->cur != new_policy.cur && has_target())
cpufreq_out_of_sync(policy, new_policy.cur);
}
} }
ret = cpufreq_set_policy(policy, &new_policy); ret = cpufreq_set_policy(policy, &new_policy);
......
...@@ -329,7 +329,7 @@ static void dbs_irq_work(struct irq_work *irq_work) ...@@ -329,7 +329,7 @@ static void dbs_irq_work(struct irq_work *irq_work)
struct policy_dbs_info *policy_dbs; struct policy_dbs_info *policy_dbs;
policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work); policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work);
schedule_work(&policy_dbs->work); schedule_work_on(smp_processor_id(), &policy_dbs->work);
} }
static void dbs_update_util_handler(struct update_util_data *data, u64 time, static void dbs_update_util_handler(struct update_util_data *data, u64 time,
......
...@@ -134,7 +134,7 @@ struct pstate_funcs { ...@@ -134,7 +134,7 @@ struct pstate_funcs {
int (*get_min)(void); int (*get_min)(void);
int (*get_turbo)(void); int (*get_turbo)(void);
int (*get_scaling)(void); int (*get_scaling)(void);
void (*set)(struct cpudata*, int pstate); u64 (*get_val)(struct cpudata*, int pstate);
void (*get_vid)(struct cpudata *); void (*get_vid)(struct cpudata *);
int32_t (*get_target_pstate)(struct cpudata *); int32_t (*get_target_pstate)(struct cpudata *);
}; };
...@@ -565,7 +565,7 @@ static int atom_get_turbo_pstate(void) ...@@ -565,7 +565,7 @@ static int atom_get_turbo_pstate(void)
return value & 0x7F; return value & 0x7F;
} }
static void atom_set_pstate(struct cpudata *cpudata, int pstate) static u64 atom_get_val(struct cpudata *cpudata, int pstate)
{ {
u64 val; u64 val;
int32_t vid_fp; int32_t vid_fp;
...@@ -585,9 +585,7 @@ static void atom_set_pstate(struct cpudata *cpudata, int pstate) ...@@ -585,9 +585,7 @@ static void atom_set_pstate(struct cpudata *cpudata, int pstate)
if (pstate > cpudata->pstate.max_pstate) if (pstate > cpudata->pstate.max_pstate)
vid = cpudata->vid.turbo; vid = cpudata->vid.turbo;
val |= vid; return val | vid;
wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
} }
static int silvermont_get_scaling(void) static int silvermont_get_scaling(void)
...@@ -711,7 +709,7 @@ static inline int core_get_scaling(void) ...@@ -711,7 +709,7 @@ static inline int core_get_scaling(void)
return 100000; return 100000;
} }
static void core_set_pstate(struct cpudata *cpudata, int pstate) static u64 core_get_val(struct cpudata *cpudata, int pstate)
{ {
u64 val; u64 val;
...@@ -719,7 +717,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) ...@@ -719,7 +717,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
if (limits->no_turbo && !limits->turbo_disabled) if (limits->no_turbo && !limits->turbo_disabled)
val |= (u64)1 << 32; val |= (u64)1 << 32;
wrmsrl(MSR_IA32_PERF_CTL, val); return val;
} }
static int knl_get_turbo_pstate(void) static int knl_get_turbo_pstate(void)
...@@ -750,7 +748,7 @@ static struct cpu_defaults core_params = { ...@@ -750,7 +748,7 @@ static struct cpu_defaults core_params = {
.get_min = core_get_min_pstate, .get_min = core_get_min_pstate,
.get_turbo = core_get_turbo_pstate, .get_turbo = core_get_turbo_pstate,
.get_scaling = core_get_scaling, .get_scaling = core_get_scaling,
.set = core_set_pstate, .get_val = core_get_val,
.get_target_pstate = get_target_pstate_use_performance, .get_target_pstate = get_target_pstate_use_performance,
}, },
}; };
...@@ -769,7 +767,7 @@ static struct cpu_defaults silvermont_params = { ...@@ -769,7 +767,7 @@ static struct cpu_defaults silvermont_params = {
.get_max_physical = atom_get_max_pstate, .get_max_physical = atom_get_max_pstate,
.get_min = atom_get_min_pstate, .get_min = atom_get_min_pstate,
.get_turbo = atom_get_turbo_pstate, .get_turbo = atom_get_turbo_pstate,
.set = atom_set_pstate, .get_val = atom_get_val,
.get_scaling = silvermont_get_scaling, .get_scaling = silvermont_get_scaling,
.get_vid = atom_get_vid, .get_vid = atom_get_vid,
.get_target_pstate = get_target_pstate_use_cpu_load, .get_target_pstate = get_target_pstate_use_cpu_load,
...@@ -790,7 +788,7 @@ static struct cpu_defaults airmont_params = { ...@@ -790,7 +788,7 @@ static struct cpu_defaults airmont_params = {
.get_max_physical = atom_get_max_pstate, .get_max_physical = atom_get_max_pstate,
.get_min = atom_get_min_pstate, .get_min = atom_get_min_pstate,
.get_turbo = atom_get_turbo_pstate, .get_turbo = atom_get_turbo_pstate,
.set = atom_set_pstate, .get_val = atom_get_val,
.get_scaling = airmont_get_scaling, .get_scaling = airmont_get_scaling,
.get_vid = atom_get_vid, .get_vid = atom_get_vid,
.get_target_pstate = get_target_pstate_use_cpu_load, .get_target_pstate = get_target_pstate_use_cpu_load,
...@@ -812,7 +810,7 @@ static struct cpu_defaults knl_params = { ...@@ -812,7 +810,7 @@ static struct cpu_defaults knl_params = {
.get_min = core_get_min_pstate, .get_min = core_get_min_pstate,
.get_turbo = knl_get_turbo_pstate, .get_turbo = knl_get_turbo_pstate,
.get_scaling = core_get_scaling, .get_scaling = core_get_scaling,
.set = core_set_pstate, .get_val = core_get_val,
.get_target_pstate = get_target_pstate_use_performance, .get_target_pstate = get_target_pstate_use_performance,
}, },
}; };
...@@ -839,25 +837,24 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) ...@@ -839,25 +837,24 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
} }
static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
{ {
int max_perf, min_perf;
if (force) {
update_turbo_state();
intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
pstate = clamp_t(int, pstate, min_perf, max_perf);
if (pstate == cpu->pstate.current_pstate)
return;
}
trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
cpu->pstate.current_pstate = pstate; cpu->pstate.current_pstate = pstate;
}
pstate_funcs.set(cpu, pstate); static void intel_pstate_set_min_pstate(struct cpudata *cpu)
{
int pstate = cpu->pstate.min_pstate;
intel_pstate_record_pstate(cpu, pstate);
/*
* Generally, there is no guarantee that this code will always run on
* the CPU being updated, so force the register update to run on the
* right CPU.
*/
wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
pstate_funcs.get_val(cpu, pstate));
} }
static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
...@@ -870,7 +867,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) ...@@ -870,7 +867,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
if (pstate_funcs.get_vid) if (pstate_funcs.get_vid)
pstate_funcs.get_vid(cpu); pstate_funcs.get_vid(cpu);
intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
intel_pstate_set_min_pstate(cpu);
} }
static inline void intel_pstate_calc_busy(struct cpudata *cpu) static inline void intel_pstate_calc_busy(struct cpudata *cpu)
...@@ -997,6 +995,21 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) ...@@ -997,6 +995,21 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy); return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
} }
static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
{
int max_perf, min_perf;
update_turbo_state();
intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
pstate = clamp_t(int, pstate, min_perf, max_perf);
if (pstate == cpu->pstate.current_pstate)
return;
intel_pstate_record_pstate(cpu, pstate);
wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
}
static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
{ {
int from, target_pstate; int from, target_pstate;
...@@ -1006,7 +1019,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) ...@@ -1006,7 +1019,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
target_pstate = pstate_funcs.get_target_pstate(cpu); target_pstate = pstate_funcs.get_target_pstate(cpu);
intel_pstate_set_pstate(cpu, target_pstate, true); intel_pstate_update_pstate(cpu, target_pstate);
sample = &cpu->sample; sample = &cpu->sample;
trace_pstate_sample(fp_toint(sample->core_pct_busy), trace_pstate_sample(fp_toint(sample->core_pct_busy),
...@@ -1180,7 +1193,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) ...@@ -1180,7 +1193,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
if (hwp_active) if (hwp_active)
return; return;
intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); intel_pstate_set_min_pstate(cpu);
} }
static int intel_pstate_cpu_init(struct cpufreq_policy *policy) static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
...@@ -1255,7 +1268,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) ...@@ -1255,7 +1268,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
pstate_funcs.get_min = funcs->get_min; pstate_funcs.get_min = funcs->get_min;
pstate_funcs.get_turbo = funcs->get_turbo; pstate_funcs.get_turbo = funcs->get_turbo;
pstate_funcs.get_scaling = funcs->get_scaling; pstate_funcs.get_scaling = funcs->get_scaling;
pstate_funcs.set = funcs->set; pstate_funcs.get_val = funcs->get_val;
pstate_funcs.get_vid = funcs->get_vid; pstate_funcs.get_vid = funcs->get_vid;
pstate_funcs.get_target_pstate = funcs->get_target_pstate; pstate_funcs.get_target_pstate = funcs->get_target_pstate;
......
...@@ -44,7 +44,6 @@ ...@@ -44,7 +44,6 @@
static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
static bool rebooting, throttled, occ_reset; static bool rebooting, throttled, occ_reset;
static unsigned int *core_to_chip_map;
static const char * const throttle_reason[] = { static const char * const throttle_reason[] = {
"No throttling", "No throttling",
...@@ -55,6 +54,16 @@ static const char * const throttle_reason[] = { ...@@ -55,6 +54,16 @@ static const char * const throttle_reason[] = {
"OCC Reset" "OCC Reset"
}; };
enum throttle_reason_type {
NO_THROTTLE = 0,
POWERCAP,
CPU_OVERTEMP,
POWER_SUPPLY_FAILURE,
OVERCURRENT,
OCC_RESET_THROTTLE,
OCC_MAX_REASON
};
static struct chip { static struct chip {
unsigned int id; unsigned int id;
bool throttled; bool throttled;
...@@ -62,9 +71,13 @@ static struct chip { ...@@ -62,9 +71,13 @@ static struct chip {
u8 throttle_reason; u8 throttle_reason;
cpumask_t mask; cpumask_t mask;
struct work_struct throttle; struct work_struct throttle;
int throttle_turbo;
int throttle_sub_turbo;
int reason[OCC_MAX_REASON];
} *chips; } *chips;
static int nr_chips; static int nr_chips;
static DEFINE_PER_CPU(struct chip *, chip_info);
/* /*
* Note: The set of pstates consists of contiguous integers, the * Note: The set of pstates consists of contiguous integers, the
...@@ -196,6 +209,42 @@ static struct freq_attr *powernv_cpu_freq_attr[] = { ...@@ -196,6 +209,42 @@ static struct freq_attr *powernv_cpu_freq_attr[] = {
NULL, NULL,
}; };
#define throttle_attr(name, member) \
static ssize_t name##_show(struct cpufreq_policy *policy, char *buf) \
{ \
struct chip *chip = per_cpu(chip_info, policy->cpu); \
\
return sprintf(buf, "%u\n", chip->member); \
} \
\
static struct freq_attr throttle_attr_##name = __ATTR_RO(name) \
throttle_attr(unthrottle, reason[NO_THROTTLE]);
throttle_attr(powercap, reason[POWERCAP]);
throttle_attr(overtemp, reason[CPU_OVERTEMP]);
throttle_attr(supply_fault, reason[POWER_SUPPLY_FAILURE]);
throttle_attr(overcurrent, reason[OVERCURRENT]);
throttle_attr(occ_reset, reason[OCC_RESET_THROTTLE]);
throttle_attr(turbo_stat, throttle_turbo);
throttle_attr(sub_turbo_stat, throttle_sub_turbo);
static struct attribute *throttle_attrs[] = {
&throttle_attr_unthrottle.attr,
&throttle_attr_powercap.attr,
&throttle_attr_overtemp.attr,
&throttle_attr_supply_fault.attr,
&throttle_attr_overcurrent.attr,
&throttle_attr_occ_reset.attr,
&throttle_attr_turbo_stat.attr,
&throttle_attr_sub_turbo_stat.attr,
NULL,
};
static const struct attribute_group throttle_attr_grp = {
.name = "throttle_stats",
.attrs = throttle_attrs,
};
/* Helper routines */ /* Helper routines */
/* Access helpers to power mgt SPR */ /* Access helpers to power mgt SPR */
...@@ -324,34 +373,35 @@ static inline unsigned int get_nominal_index(void) ...@@ -324,34 +373,35 @@ static inline unsigned int get_nominal_index(void)
static void powernv_cpufreq_throttle_check(void *data) static void powernv_cpufreq_throttle_check(void *data)
{ {
struct chip *chip;
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)];
unsigned long pmsr; unsigned long pmsr;
int pmsr_pmax, i; int pmsr_pmax;
pmsr = get_pmspr(SPRN_PMSR); pmsr = get_pmspr(SPRN_PMSR);
chip = this_cpu_read(chip_info);
for (i = 0; i < nr_chips; i++)
if (chips[i].id == chip_id)
break;
/* Check for Pmax Capping */ /* Check for Pmax Capping */
pmsr_pmax = (s8)PMSR_MAX(pmsr); pmsr_pmax = (s8)PMSR_MAX(pmsr);
if (pmsr_pmax != powernv_pstate_info.max) { if (pmsr_pmax != powernv_pstate_info.max) {
if (chips[i].throttled) if (chip->throttled)
goto next; goto next;
chips[i].throttled = true; chip->throttled = true;
if (pmsr_pmax < powernv_pstate_info.nominal) if (pmsr_pmax < powernv_pstate_info.nominal) {
pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
cpu, chips[i].id, pmsr_pmax, cpu, chip->id, pmsr_pmax,
powernv_pstate_info.nominal); powernv_pstate_info.nominal);
trace_powernv_throttle(chips[i].id, chip->throttle_sub_turbo++;
throttle_reason[chips[i].throttle_reason], } else {
chip->throttle_turbo++;
}
trace_powernv_throttle(chip->id,
throttle_reason[chip->throttle_reason],
pmsr_pmax); pmsr_pmax);
} else if (chips[i].throttled) { } else if (chip->throttled) {
chips[i].throttled = false; chip->throttled = false;
trace_powernv_throttle(chips[i].id, trace_powernv_throttle(chip->id,
throttle_reason[chips[i].throttle_reason], throttle_reason[chip->throttle_reason],
pmsr_pmax); pmsr_pmax);
} }
...@@ -411,6 +461,21 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) ...@@ -411,6 +461,21 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
for (i = 0; i < threads_per_core; i++) for (i = 0; i < threads_per_core; i++)
cpumask_set_cpu(base + i, policy->cpus); cpumask_set_cpu(base + i, policy->cpus);
if (!policy->driver_data) {
int ret;
ret = sysfs_create_group(&policy->kobj, &throttle_attr_grp);
if (ret) {
pr_info("Failed to create throttle stats directory for cpu %d\n",
policy->cpu);
return ret;
}
/*
* policy->driver_data is used as a flag for one-time
* creation of throttle sysfs files.
*/
policy->driver_data = policy;
}
return cpufreq_table_validate_and_show(policy, powernv_freqs); return cpufreq_table_validate_and_show(policy, powernv_freqs);
} }
...@@ -517,8 +582,10 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, ...@@ -517,8 +582,10 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
break; break;
if (omsg.throttle_status >= 0 && if (omsg.throttle_status >= 0 &&
omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) {
chips[i].throttle_reason = omsg.throttle_status; chips[i].throttle_reason = omsg.throttle_status;
chips[i].reason[omsg.throttle_status]++;
}
if (!omsg.throttle_status) if (!omsg.throttle_status)
chips[i].restore = true; chips[i].restore = true;
...@@ -558,47 +625,34 @@ static int init_chip_info(void) ...@@ -558,47 +625,34 @@ static int init_chip_info(void)
unsigned int chip[256]; unsigned int chip[256];
unsigned int cpu, i; unsigned int cpu, i;
unsigned int prev_chip_id = UINT_MAX; unsigned int prev_chip_id = UINT_MAX;
cpumask_t cpu_mask;
int ret = -ENOMEM;
core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int),
GFP_KERNEL);
if (!core_to_chip_map)
goto out;
cpumask_copy(&cpu_mask, cpu_possible_mask); for_each_possible_cpu(cpu) {
for_each_cpu(cpu, &cpu_mask) {
unsigned int id = cpu_to_chip_id(cpu); unsigned int id = cpu_to_chip_id(cpu);
if (prev_chip_id != id) { if (prev_chip_id != id) {
prev_chip_id = id; prev_chip_id = id;
chip[nr_chips++] = id; chip[nr_chips++] = id;
} }
core_to_chip_map[cpu_core_index_of_thread(cpu)] = id;
cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu));
} }
chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
if (!chips) if (!chips)
goto free_chip_map; return -ENOMEM;
for (i = 0; i < nr_chips; i++) { for (i = 0; i < nr_chips; i++) {
chips[i].id = chip[i]; chips[i].id = chip[i];
cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
for_each_cpu(cpu, &chips[i].mask)
per_cpu(chip_info, cpu) = &chips[i];
} }
return 0; return 0;
free_chip_map:
kfree(core_to_chip_map);
out:
return ret;
} }
static inline void clean_chip_info(void) static inline void clean_chip_info(void)
{ {
kfree(chips); kfree(chips);
kfree(core_to_chip_map);
} }
static inline void unregister_all_notifiers(void) static inline void unregister_all_notifiers(void)
......
...@@ -196,7 +196,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); ...@@ -196,7 +196,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
* of points is below a threshold. If it is... then use the * of points is below a threshold. If it is... then use the
* average of these 8 points as the estimated value. * average of these 8 points as the estimated value.
*/ */
static void get_typical_interval(struct menu_device *data) static unsigned int get_typical_interval(struct menu_device *data)
{ {
int i, divisor; int i, divisor;
unsigned int max, thresh, avg; unsigned int max, thresh, avg;
...@@ -253,9 +253,7 @@ static void get_typical_interval(struct menu_device *data) ...@@ -253,9 +253,7 @@ static void get_typical_interval(struct menu_device *data)
if (likely(variance <= U64_MAX/36)) { if (likely(variance <= U64_MAX/36)) {
if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3)) if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3))
|| variance <= 400) { || variance <= 400) {
if (data->next_timer_us > avg) return avg;
data->predicted_us = avg;
return;
} }
} }
...@@ -269,7 +267,7 @@ static void get_typical_interval(struct menu_device *data) ...@@ -269,7 +267,7 @@ static void get_typical_interval(struct menu_device *data)
* with sporadic activity with a bunch of short pauses. * with sporadic activity with a bunch of short pauses.
*/ */
if ((divisor * 4) <= INTERVALS * 3) if ((divisor * 4) <= INTERVALS * 3)
return; return UINT_MAX;
thresh = max - 1; thresh = max - 1;
goto again; goto again;
...@@ -286,6 +284,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -286,6 +284,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
int i; int i;
unsigned int interactivity_req; unsigned int interactivity_req;
unsigned int expected_interval;
unsigned long nr_iowaiters, cpu_load; unsigned long nr_iowaiters, cpu_load;
if (data->needs_update) { if (data->needs_update) {
...@@ -312,31 +311,42 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -312,31 +311,42 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->correction_factor[data->bucket], data->correction_factor[data->bucket],
RESOLUTION * DECAY); RESOLUTION * DECAY);
get_typical_interval(data); expected_interval = get_typical_interval(data);
expected_interval = min(expected_interval, data->next_timer_us);
/*
* Performance multiplier defines a minimum predicted idle
* duration / latency ratio. Adjust the latency limit if
* necessary.
*/
interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
if (latency_req > interactivity_req)
latency_req = interactivity_req;
if (CPUIDLE_DRIVER_STATE_START > 0) { if (CPUIDLE_DRIVER_STATE_START > 0) {
data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1; struct cpuidle_state *s = &drv->states[CPUIDLE_DRIVER_STATE_START];
unsigned int polling_threshold;
/* /*
* We want to default to C1 (hlt), not to busy polling * We want to default to C1 (hlt), not to busy polling
* unless the timer is happening really really soon. * unless the timer is happening really really soon, or
* C1's exit latency exceeds the user configured limit.
*/ */
if (interactivity_req > 20 && polling_threshold = max_t(unsigned int, 20, s->target_residency);
!drv->states[CPUIDLE_DRIVER_STATE_START].disabled && if (data->next_timer_us > polling_threshold &&
dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0) latency_req > s->exit_latency && !s->disabled &&
!dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable)
data->last_state_idx = CPUIDLE_DRIVER_STATE_START; data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
else
data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
} else { } else {
data->last_state_idx = CPUIDLE_DRIVER_STATE_START; data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
} }
/*
* Use the lowest expected idle interval to pick the idle state.
*/
data->predicted_us = min(data->predicted_us, expected_interval);
/*
* Use the performance multiplier and the user-configurable
* latency_req to determine the maximum exit latency.
*/
interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
if (latency_req > interactivity_req)
latency_req = interactivity_req;
/* /*
* Find the idle state with the lowest power while satisfying * Find the idle state with the lowest power while satisfying
* our constraints. * our constraints.
......
...@@ -65,7 +65,7 @@ ...@@ -65,7 +65,7 @@
#include <asm/mwait.h> #include <asm/mwait.h>
#include <asm/msr.h> #include <asm/msr.h>
#define INTEL_IDLE_VERSION "0.4" #define INTEL_IDLE_VERSION "0.4.1"
#define PREFIX "intel_idle: " #define PREFIX "intel_idle: "
static struct cpuidle_driver intel_idle_driver = { static struct cpuidle_driver intel_idle_driver = {
...@@ -716,6 +716,26 @@ static struct cpuidle_state avn_cstates[] = { ...@@ -716,6 +716,26 @@ static struct cpuidle_state avn_cstates[] = {
{ {
.enter = NULL } .enter = NULL }
}; };
static struct cpuidle_state knl_cstates[] = {
{
.name = "C1-KNL",
.desc = "MWAIT 0x00",
.flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 2,
.enter = &intel_idle,
.enter_freeze = intel_idle_freeze },
{
.name = "C6-KNL",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 120,
.target_residency = 500,
.enter = &intel_idle,
.enter_freeze = intel_idle_freeze },
{
.enter = NULL }
};
/** /**
* intel_idle * intel_idle
...@@ -890,6 +910,10 @@ static const struct idle_cpu idle_cpu_avn = { ...@@ -890,6 +910,10 @@ static const struct idle_cpu idle_cpu_avn = {
.disable_promotion_to_c1e = true, .disable_promotion_to_c1e = true,
}; };
static const struct idle_cpu idle_cpu_knl = {
.state_table = knl_cstates,
};
#define ICPU(model, cpu) \ #define ICPU(model, cpu) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
...@@ -921,6 +945,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { ...@@ -921,6 +945,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
ICPU(0x56, idle_cpu_bdw), ICPU(0x56, idle_cpu_bdw),
ICPU(0x4e, idle_cpu_skl), ICPU(0x4e, idle_cpu_skl),
ICPU(0x5e, idle_cpu_skl), ICPU(0x5e, idle_cpu_skl),
ICPU(0x57, idle_cpu_knl),
{} {}
}; };
MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
...@@ -994,36 +1019,92 @@ static void intel_idle_cpuidle_devices_uninit(void) ...@@ -994,36 +1019,92 @@ static void intel_idle_cpuidle_devices_uninit(void)
} }
/* /*
* intel_idle_state_table_update() * ivt_idle_state_table_update(void)
* *
* Update the default state_table for this CPU-id * Tune IVT multi-socket targets
*
* Currently used to access tuned IVT multi-socket targets
* Assumption: num_sockets == (max_package_num + 1) * Assumption: num_sockets == (max_package_num + 1)
*/ */
void intel_idle_state_table_update(void) static void ivt_idle_state_table_update(void)
{ {
/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
if (boot_cpu_data.x86_model == 0x3e) { /* IVT */ int cpu, package_num, num_sockets = 1;
int cpu, package_num, num_sockets = 1;
for_each_online_cpu(cpu) {
for_each_online_cpu(cpu) { package_num = topology_physical_package_id(cpu);
package_num = topology_physical_package_id(cpu); if (package_num + 1 > num_sockets) {
if (package_num + 1 > num_sockets) { num_sockets = package_num + 1;
num_sockets = package_num + 1;
if (num_sockets > 4) {
if (num_sockets > 4) { cpuidle_state_table = ivt_cstates_8s;
cpuidle_state_table = ivt_cstates_8s; return;
return;
}
} }
} }
}
if (num_sockets > 2)
cpuidle_state_table = ivt_cstates_4s;
if (num_sockets > 2) /* else, 1 and 2 socket systems use default ivt_cstates */
cpuidle_state_table = ivt_cstates_4s; }
/* else, 1 and 2 socket systems use default ivt_cstates */ /*
* sklh_idle_state_table_update(void)
*
* On SKL-H (model 0x5e) disable C8 and C9 if:
* C10 is enabled and SGX disabled
*/
static void sklh_idle_state_table_update(void)
{
unsigned long long msr;
unsigned int eax, ebx, ecx, edx;
/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
if (max_cstate <= 7)
return;
/* if PC10 not present in CPUID.MWAIT.EDX */
if ((mwait_substates & (0xF << 28)) == 0)
return;
rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
/* PC10 is not enabled in PKG C-state limit */
if ((msr & 0xF) != 8)
return;
ecx = 0;
cpuid(7, &eax, &ebx, &ecx, &edx);
/* if SGX is present */
if (ebx & (1 << 2)) {
rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
/* if SGX is enabled */
if (msr & (1 << 18))
return;
}
skl_cstates[5].disabled = 1; /* C8-SKL */
skl_cstates[6].disabled = 1; /* C9-SKL */
}
/*
* intel_idle_state_table_update()
*
* Update the default state_table for this CPU-id
*/
static void intel_idle_state_table_update(void)
{
switch (boot_cpu_data.x86_model) {
case 0x3e: /* IVT */
ivt_idle_state_table_update();
break;
case 0x5e: /* SKL-H */
sklh_idle_state_table_update();
break;
} }
return;
} }
/* /*
...@@ -1063,6 +1144,14 @@ static int __init intel_idle_cpuidle_driver_init(void) ...@@ -1063,6 +1144,14 @@ static int __init intel_idle_cpuidle_driver_init(void)
if (num_substates == 0) if (num_substates == 0)
continue; continue;
/* if state marked as disabled, skip it */
if (cpuidle_state_table[cstate].disabled != 0) {
pr_debug(PREFIX "state %s is disabled",
cpuidle_state_table[cstate].name);
continue;
}
if (((mwait_cstate + 1) > 2) && if (((mwait_cstate + 1) > 2) &&
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halts in idle" mark_tsc_unstable("TSC halts in idle"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment