Commit 5cbba605 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'pm-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management updates from Rafael Wysocki:
 "These address some PCI device power management issues, add new
  hardware support to the RAPL power capping driver, add HWP guaranteed
  performance change notification support to the intel_pstate driver,
  replace deprecated CPU-hotplug functions in a few places, update CPU
  PM notifiers to use raw spinlocks, update the PM domains framework
  (new DT property support, Kconfig fix), do a couple of cleanups in
  code related to system sleep, and improve the energy model and the
  schedutil cpufreq governor.

  Specifics:

   - Address 3 PCI device power management issues (Rafael Wysocki).

   - Add Power Limit4 support for Alder Lake to the Intel RAPL power
     capping driver (Sumeet Pawnikar).

   - Add HWP guaranteed performance change notification support to the
     intel_pstate driver (Srinivas Pandruvada).

   - Replace deprecated CPU-hotplug functions in code related to power
     management (Sebastian Andrzej Siewior).

   - Update CPU PM notifiers to use raw spinlocks (Valentin Schneider).

   - Add support for 'required-opps' DT property to the generic power
     domains (genpd) framework and use this property for I2C on ARM64
     sc7180 (Rajendra Nayak).

   - Fix Kconfig issue related to genpd (Geert Uytterhoeven).

   - Increase energy calculation precision in the Energy Model (Lukasz
     Luba).

   - Fix kobject deletion in the exit code of the schedutil cpufreq
     governor (Kevin Hao).

   - Unmark some functions as kernel-doc in the PM core to avoid
     false-positive documentation build warnings (Randy Dunlap).

   - Check RTC features instead of ops in suspend_test Alexandre
     Belloni)"

* tag 'pm-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  PM: domains: Fix domain attach for CONFIG_PM_OPP=n
  powercap: Add Power Limit4 support for Alder Lake SoC
  cpufreq: intel_pstate: Process HWP Guaranteed change notification
  thermal: intel: Allow processing of HWP interrupt
  notifier: Remove atomic_notifier_call_chain_robust()
  PM: cpu: Make notifier chain use a raw_spinlock_t
  PM: sleep: unmark 'state' functions as kernel-doc
  arm64: dts: sc7180: Add required-opps for i2c
  PM: domains: Add support for 'required-opps' to set default perf state
  opp: Don't print an error if required-opps is missing
  cpufreq: schedutil: Use kobject release() method to free sugov_tunables
  PM: EM: Increase energy calculation precision
  PM: sleep: check RTC features instead of ops in suspend_test
  PM: sleep: s2idle: Replace deprecated CPU-hotplug functions
  cpufreq: Replace deprecated CPU-hotplug functions
  powercap: intel_rapl: Replace deprecated CPU-hotplug functions
  PCI: PM: Enable PME if it can be signaled from D3cold
  PCI: PM: Avoid forcing PCI_D0 for wakeup reasons inconsistently
  PCI: Use pci_update_current_state() in pci_enable_device_flags()
parents 9b2eacd8 fe583359
......@@ -786,6 +786,8 @@ i2c0: i2c@880000 {
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -838,6 +840,8 @@ i2c1: i2c@884000 {
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -890,6 +894,8 @@ i2c2: i2c@888000 {
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -924,6 +930,8 @@ i2c3: i2c@88c000 {
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -976,6 +984,8 @@ i2c4: i2c@890000 {
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -1010,6 +1020,8 @@ i2c5: i2c@894000 {
<&aggre1_noc MASTER_QUP_0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -1075,6 +1087,8 @@ i2c6: i2c@a80000 {
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -1127,6 +1141,8 @@ i2c7: i2c@a84000 {
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -1161,6 +1177,8 @@ i2c8: i2c@a88000 {
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -1213,6 +1231,8 @@ i2c9: i2c@a8c000 {
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -1247,6 +1267,8 @@ i2c10: i2c@a90000 {
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......@@ -1299,6 +1321,8 @@ i2c11: i2c@a94000 {
<&aggre2_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "qup-core", "qup-config",
"qup-memory";
power-domains = <&rpmhpd SC7180_CX>;
required-opps = <&rpmhpd_opp_low_svs>;
status = "disabled";
};
......
......@@ -2604,6 +2604,12 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off)
dev_dbg(dev, "removing from PM domain %s\n", pd->name);
/* Drop the default performance state */
if (dev_gpd_data(dev)->default_pstate) {
dev_pm_genpd_set_performance_state(dev, 0);
dev_gpd_data(dev)->default_pstate = 0;
}
for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
ret = genpd_remove_device(pd, dev);
if (ret != -EAGAIN)
......@@ -2643,6 +2649,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
{
struct of_phandle_args pd_args;
struct generic_pm_domain *pd;
int pstate;
int ret;
ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
......@@ -2681,10 +2688,29 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
genpd_unlock(pd);
}
if (ret)
if (ret) {
genpd_remove_device(pd, dev);
return -EPROBE_DEFER;
}
return ret ? -EPROBE_DEFER : 1;
/* Set the default performance state */
pstate = of_get_required_opp_performance_state(dev->of_node, index);
if (pstate < 0 && pstate != -ENODEV && pstate != -EOPNOTSUPP) {
ret = pstate;
goto err;
} else if (pstate > 0) {
ret = dev_pm_genpd_set_performance_state(dev, pstate);
if (ret)
goto err;
dev_gpd_data(dev)->default_pstate = pstate;
}
return 1;
err:
dev_err(dev, "failed to set required performance state for power-domain %s: %d\n",
pd->name, ret);
genpd_remove_device(pd, dev);
return ret;
}
/**
......
......@@ -163,9 +163,9 @@ static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
if (ret || val > 1)
return -EINVAL;
get_online_cpus();
cpus_read_lock();
set_boost(policy, val);
put_online_cpus();
cpus_read_unlock();
return count;
}
......
......@@ -2654,18 +2654,18 @@ int cpufreq_boost_trigger_state(int state)
cpufreq_driver->boost_enabled = state;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
get_online_cpus();
cpus_read_lock();
for_each_active_policy(policy) {
ret = cpufreq_driver->set_boost(policy, state);
if (ret)
goto err_reset_state;
}
put_online_cpus();
cpus_read_unlock();
return 0;
err_reset_state:
put_online_cpus();
cpus_read_unlock();
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver->boost_enabled = !state;
......
......@@ -418,7 +418,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
default_powersave_bias = powersave_bias;
cpumask_clear(&done);
get_online_cpus();
cpus_read_lock();
for_each_online_cpu(cpu) {
struct cpufreq_policy *policy;
struct policy_dbs_info *policy_dbs;
......@@ -442,7 +442,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
od_tuners = dbs_data->tuners;
od_tuners->powersave_bias = default_powersave_bias;
}
put_online_cpus();
cpus_read_unlock();
}
void od_register_powersave_bias_handler(unsigned int (*f)
......
......@@ -32,6 +32,7 @@
#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
#include <asm/intel-family.h>
#include "../drivers/thermal/intel/thermal_interrupt.h"
#define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)
......@@ -219,6 +220,7 @@ struct global_params {
* @sched_flags: Store scheduler flags for possible cross CPU update
* @hwp_boost_min: Last HWP boosted min performance
* @suspended: Whether or not the driver has been suspended.
* @hwp_notify_work: workqueue for HWP notifications.
*
* This structure stores per CPU instance data for all CPUs.
*/
......@@ -257,6 +259,7 @@ struct cpudata {
unsigned int sched_flags;
u32 hwp_boost_min;
bool suspended;
struct delayed_work hwp_notify_work;
};
static struct cpudata **all_cpu_data;
......@@ -1625,6 +1628,40 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void)
/************************** sysfs end ************************/
static void intel_pstate_notify_work(struct work_struct *work)
{
mutex_lock(&intel_pstate_driver_lock);
cpufreq_update_policy(smp_processor_id());
wrmsrl(MSR_HWP_STATUS, 0);
mutex_unlock(&intel_pstate_driver_lock);
}
void notify_hwp_interrupt(void)
{
unsigned int this_cpu = smp_processor_id();
struct cpudata *cpudata;
u64 value;
if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
return;
rdmsrl(MSR_HWP_STATUS, value);
if (!(value & 0x01))
return;
cpudata = all_cpu_data[this_cpu];
schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10));
}
static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
{
/* Enable HWP notification interrupt for guaranteed performance change */
if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);
}
}
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
/* First disable HWP notification interrupt as we don't process them */
......@@ -1634,6 +1671,8 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
if (cpudata->epp_default == -EINVAL)
cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
intel_pstate_enable_hwp_interrupt(cpudata);
}
static int atom_get_min_pstate(void)
......@@ -2969,7 +3008,7 @@ static void intel_pstate_driver_cleanup(void)
{
unsigned int cpu;
get_online_cpus();
cpus_read_lock();
for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) {
if (intel_pstate_driver == &intel_pstate)
......@@ -2979,7 +3018,7 @@ static void intel_pstate_driver_cleanup(void)
all_cpu_data[cpu] = NULL;
}
}
put_online_cpus();
cpus_read_unlock();
intel_pstate_driver = NULL;
}
......
......@@ -1180,7 +1180,7 @@ static int powernowk8_init(void)
if (!x86_match_cpu(powernow_k8_ids))
return -ENODEV;
get_online_cpus();
cpus_read_lock();
for_each_online_cpu(i) {
smp_call_function_single(i, check_supported_cpu, &ret, 1);
if (!ret)
......@@ -1188,10 +1188,10 @@ static int powernowk8_init(void)
}
if (supported_cpus != num_online_cpus()) {
put_online_cpus();
cpus_read_unlock();
return -ENODEV;
}
put_online_cpus();
cpus_read_unlock();
ret = cpufreq_register_driver(&cpufreq_amd64_driver);
if (ret)
......
......@@ -918,7 +918,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work)
unsigned int cpu;
cpumask_t mask;
get_online_cpus();
cpus_read_lock();
cpumask_and(&mask, &chip->mask, cpu_online_mask);
smp_call_function_any(&mask,
powernv_cpufreq_throttle_check, NULL, 0);
......@@ -939,7 +939,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work)
cpufreq_cpu_put(policy);
}
out:
put_online_cpus();
cpus_read_unlock();
}
static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
......
......@@ -95,15 +95,7 @@ static struct dev_pm_opp *_find_opp_of_np(struct opp_table *opp_table,
static struct device_node *of_parse_required_opp(struct device_node *np,
int index)
{
struct device_node *required_np;
required_np = of_parse_phandle(np, "required-opps", index);
if (unlikely(!required_np)) {
pr_err("%s: Unable to parse required-opps: %pOF, index: %d\n",
__func__, np, index);
}
return required_np;
return of_parse_phandle(np, "required-opps", index);
}
/* The caller must call dev_pm_opp_put_opp_table() after the table is used */
......@@ -1328,7 +1320,7 @@ int of_get_required_opp_performance_state(struct device_node *np, int index)
required_np = of_parse_required_opp(np, index);
if (!required_np)
return -EINVAL;
return -ENODEV;
opp_table = _find_table_of_opp_np(required_np);
if (IS_ERR(opp_table)) {
......
......@@ -1906,11 +1906,7 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
* so that things like MSI message writing will behave as expected
* (e.g. if the device really is in D0 at enable time).
*/
if (dev->pm_cap) {
u16 pmcsr;
pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
}
pci_update_current_state(dev, dev->current_state);
if (atomic_inc_return(&dev->enable_cnt) > 1)
return 0; /* already enabled */
......@@ -2495,7 +2491,14 @@ static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable
if (enable) {
int error;
if (pci_pme_capable(dev, state))
/*
* Enable PME signaling if the device can signal PME from
* D3cold regardless of whether or not it can signal PME from
* the current target state, because that will allow it to
* signal PME when the hierarchy above it goes into D3cold and
* the device itself ends up in D3cold as a result of that.
*/
if (pci_pme_capable(dev, state) || pci_pme_capable(dev, PCI_D3cold))
pci_pme_active(dev, true);
else
ret = 1;
......@@ -2599,16 +2602,20 @@ static pci_power_t pci_target_state(struct pci_dev *dev, bool wakeup)
if (dev->current_state == PCI_D3cold)
target_state = PCI_D3cold;
if (wakeup) {
if (wakeup && dev->pme_support) {
pci_power_t state = target_state;
/*
* Find the deepest state from which the device can generate
* PME#.
*/
if (dev->pme_support) {
while (target_state
&& !(dev->pme_support & (1 << target_state)))
target_state--;
}
while (state && !(dev->pme_support & (1 << state)))
state--;
if (state)
return state;
else if (dev->pme_support & 1)
return PCI_D0;
}
return target_state;
......
......@@ -158,16 +158,16 @@ static int get_energy_counter(struct powercap_zone *power_zone,
/* prevent CPU hotplug, make sure the RAPL domain does not go
* away while reading the counter.
*/
get_online_cpus();
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
*energy_raw = energy_now;
put_online_cpus();
cpus_read_unlock();
return 0;
}
put_online_cpus();
cpus_read_unlock();
return -EIO;
}
......@@ -216,11 +216,11 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
return -EACCES;
get_online_cpus();
cpus_read_lock();
rapl_write_data_raw(rd, PL1_ENABLE, mode);
if (rapl_defaults->set_floor_freq)
rapl_defaults->set_floor_freq(rd, mode);
put_online_cpus();
cpus_read_unlock();
return 0;
}
......@@ -234,13 +234,13 @@ static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
*mode = false;
return 0;
}
get_online_cpus();
cpus_read_lock();
if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
put_online_cpus();
cpus_read_unlock();
return -EIO;
}
*mode = val;
put_online_cpus();
cpus_read_unlock();
return 0;
}
......@@ -317,7 +317,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
get_online_cpus();
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
......@@ -350,7 +350,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
if (!ret)
package_power_limit_irq_save(rp);
set_exit:
put_online_cpus();
cpus_read_unlock();
return ret;
}
......@@ -363,7 +363,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
get_online_cpus();
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
......@@ -382,7 +382,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
prim = POWER_LIMIT4;
break;
default:
put_online_cpus();
cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
......@@ -391,7 +391,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
*data = val;
get_exit:
put_online_cpus();
cpus_read_unlock();
return ret;
}
......@@ -403,7 +403,7 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
get_online_cpus();
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
......@@ -423,7 +423,7 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
}
set_time_exit:
put_online_cpus();
cpus_read_unlock();
return ret;
}
......@@ -435,7 +435,7 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
get_online_cpus();
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
......@@ -458,14 +458,14 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
val = 0;
break;
default:
put_online_cpus();
cpus_read_unlock();
return -EINVAL;
}
if (!ret)
*data = val;
get_time_exit:
put_online_cpus();
cpus_read_unlock();
return ret;
}
......@@ -491,7 +491,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
int prim;
int ret = 0;
get_online_cpus();
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
switch (rd->rpl[id].prim_id) {
case PL1_ENABLE:
......@@ -504,7 +504,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
prim = MAX_POWER;
break;
default:
put_online_cpus();
cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
......@@ -516,7 +516,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
if (rd->rpl[id].prim_id == PL4_ENABLE)
*data = *data * 2;
put_online_cpus();
cpus_read_unlock();
return ret;
}
......@@ -1358,7 +1358,7 @@ static void power_limit_state_save(void)
struct rapl_domain *rd;
int nr_pl, ret, i;
get_online_cpus();
cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
......@@ -1390,7 +1390,7 @@ static void power_limit_state_save(void)
}
}
}
put_online_cpus();
cpus_read_unlock();
}
static void power_limit_state_restore(void)
......@@ -1399,7 +1399,7 @@ static void power_limit_state_restore(void)
struct rapl_domain *rd;
int nr_pl, i;
get_online_cpus();
cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
......@@ -1425,7 +1425,7 @@ static void power_limit_state_restore(void)
}
}
}
put_online_cpus();
cpus_read_unlock();
}
static int rapl_pm_callback(struct notifier_block *nb,
......
......@@ -138,6 +138,8 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
/* List of verified CPUs. */
static const struct x86_cpu_id pl4_support_ids[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
{}
};
......
......@@ -569,13 +569,18 @@ static void notify_thresholds(__u64 msr_val)
platform_thermal_notify(msr_val);
}
void __weak notify_hwp_interrupt(void)
{
wrmsrl_safe(MSR_HWP_STATUS, 0);
}
/* Thermal transition interrupt handler */
void intel_thermal_interrupt(void)
{
__u64 msr_val;
if (static_cpu_has(X86_FEATURE_HWP))
wrmsrl_safe(MSR_HWP_STATUS, 0);
notify_hwp_interrupt();
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
......
......@@ -12,4 +12,7 @@ extern int (*platform_thermal_notify)(__u64 msr_val);
* callback has rate control */
extern bool (*platform_thermal_package_rate_control)(void);
/* Handle HWP interrupt */
extern void notify_hwp_interrupt(void);
#endif /* _INTEL_THERMAL_INTERRUPT_H */
......@@ -53,6 +53,22 @@ struct em_perf_domain {
#ifdef CONFIG_ENERGY_MODEL
#define EM_MAX_POWER 0xFFFF
/*
* Increase resolution of energy estimation calculations for 64-bit
* architectures. The extra resolution improves decision made by EAS for the
* task placement when two Performance Domains might provide similar energy
* estimation values (w/o better resolution the values could be equal).
*
* We increase resolution only if we have enough bits to allow this increased
* resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
* are pretty high and the returns do not justify the increased costs.
*/
#ifdef CONFIG_64BIT
#define em_scale_power(p) ((p) * 1000)
#else
#define em_scale_power(p) (p)
#endif
struct em_data_callback {
/**
* active_power() - Provide power at the next performance state of
......
......@@ -168,8 +168,6 @@ extern int raw_notifier_call_chain(struct raw_notifier_head *nh,
extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
unsigned long val, void *v);
extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
unsigned long val_up, unsigned long val_down, void *v);
extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh,
unsigned long val_up, unsigned long val_down, void *v);
extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh,
......
......@@ -198,6 +198,7 @@ struct generic_pm_domain_data {
struct notifier_block *power_nb;
int cpu;
unsigned int performance_state;
unsigned int default_pstate;
unsigned int rpm_pstate;
ktime_t next_wakeup;
void *data;
......
......@@ -13,19 +13,32 @@
#include <linux/spinlock.h>
#include <linux/syscore_ops.h>
static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain);
/*
* atomic_notifiers use a spinlock_t, which can block under PREEMPT_RT.
* Notifications for cpu_pm will be issued by the idle task itself, which can
* never block, IOW it requires using a raw_spinlock_t.
*/
static struct {
struct raw_notifier_head chain;
raw_spinlock_t lock;
} cpu_pm_notifier = {
.chain = RAW_NOTIFIER_INIT(cpu_pm_notifier.chain),
.lock = __RAW_SPIN_LOCK_UNLOCKED(cpu_pm_notifier.lock),
};
static int cpu_pm_notify(enum cpu_pm_event event)
{
int ret;
/*
* atomic_notifier_call_chain has a RCU read critical section, which
* could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
* RCU know this.
* This introduces a RCU read critical section, which could be
* disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know
* this.
*/
rcu_irq_enter_irqson();
ret = atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL);
rcu_read_lock();
ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL);
rcu_read_unlock();
rcu_irq_exit_irqson();
return notifier_to_errno(ret);
......@@ -33,10 +46,13 @@ static int cpu_pm_notify(enum cpu_pm_event event)
static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event event_down)
{
unsigned long flags;
int ret;
rcu_irq_enter_irqson();
ret = atomic_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL);
raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL);
raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
rcu_irq_exit_irqson();
return notifier_to_errno(ret);
......@@ -49,12 +65,17 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev
* Add a driver to a list of drivers that are notified about
* CPU and CPU cluster low power entry and exit.
*
* This function may sleep, and has the same return conditions as
* raw_notifier_chain_register.
* This function has the same return conditions as raw_notifier_chain_register.
*/
int cpu_pm_register_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb);
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
ret = raw_notifier_chain_register(&cpu_pm_notifier.chain, nb);
raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
......@@ -64,12 +85,17 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
*
* Remove a driver from the CPU PM notifier list.
*
* This function may sleep, and has the same return conditions as
* raw_notifier_chain_unregister.
* This function has the same return conditions as raw_notifier_chain_unregister.
*/
int cpu_pm_unregister_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
ret = raw_notifier_chain_unregister(&cpu_pm_notifier.chain, nb);
raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
......
......@@ -172,25 +172,6 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
}
EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
unsigned long val_up, unsigned long val_down, void *v)
{
unsigned long flags;
int ret;
/*
* Musn't use RCU; because then the notifier list can
* change between the up and down traversal.
*/
spin_lock_irqsave(&nh->lock, flags);
ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
spin_unlock_irqrestore(&nh->lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(atomic_notifier_call_chain_robust);
NOKPROBE_SYMBOL(atomic_notifier_call_chain_robust);
/**
* atomic_notifier_call_chain - Call functions in an atomic notifier chain
* @nh: Pointer to head of the atomic notifier chain
......
......@@ -170,7 +170,9 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
for (i = 0; i < nr_states; i++) {
table[i].cost = div64_u64(fmax * table[i].power,
unsigned long power_res = em_scale_power(table[i].power);
table[i].cost = div64_u64(fmax * power_res,
table[i].frequency);
}
......
......@@ -577,7 +577,7 @@ static inline void pm_print_times_init(void) {}
struct kobject *power_kobj;
/**
/*
* state - control system sleep states.
*
* show() returns available sleep state labels, which may be "mem", "standby",
......
......@@ -96,7 +96,7 @@ static void s2idle_enter(void)
s2idle_state = S2IDLE_STATE_ENTER;
raw_spin_unlock_irq(&s2idle_lock);
get_online_cpus();
cpus_read_lock();
cpuidle_resume();
/* Push all the CPUs into the idle loop. */
......@@ -106,7 +106,7 @@ static void s2idle_enter(void)
s2idle_state == S2IDLE_STATE_WAKE);
cpuidle_pause();
put_online_cpus();
cpus_read_unlock();
raw_spin_lock_irq(&s2idle_lock);
......
......@@ -129,7 +129,7 @@ static int __init has_wakealarm(struct device *dev, const void *data)
{
struct rtc_device *candidate = to_rtc_device(dev);
if (!candidate->ops->set_alarm)
if (!test_bit(RTC_FEATURE_ALARM, candidate->features))
return 0;
if (!device_may_wakeup(candidate->dev.parent))
return 0;
......
......@@ -537,9 +537,17 @@ static struct attribute *sugov_attrs[] = {
};
ATTRIBUTE_GROUPS(sugov);
static void sugov_tunables_free(struct kobject *kobj)
{
struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj);
kfree(to_sugov_tunables(attr_set));
}
static struct kobj_type sugov_tunables_ktype = {
.default_groups = sugov_groups,
.sysfs_ops = &governor_sysfs_ops,
.release = &sugov_tunables_free,
};
/********************** cpufreq governor interface *********************/
......@@ -639,12 +647,10 @@ static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_polic
return tunables;
}
static void sugov_tunables_free(struct sugov_tunables *tunables)
static void sugov_clear_global_tunables(void)
{
if (!have_governor_per_policy())
global_tunables = NULL;
kfree(tunables);
}
static int sugov_init(struct cpufreq_policy *policy)
......@@ -707,7 +713,7 @@ static int sugov_init(struct cpufreq_policy *policy)
fail:
kobject_put(&tunables->attr_set.kobj);
policy->governor_data = NULL;
sugov_tunables_free(tunables);
sugov_clear_global_tunables();
stop_kthread:
sugov_kthread_stop(sg_policy);
......@@ -734,7 +740,7 @@ static void sugov_exit(struct cpufreq_policy *policy)
count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
policy->governor_data = NULL;
if (!count)
sugov_tunables_free(tunables);
sugov_clear_global_tunables();
mutex_unlock(&global_tunables_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment