Commit 404ea9f1 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge powerclamp driver updates (that depend on cpuidle material) for v4.10.

parents 0e7414b7 feb6cd6a
...@@ -43,7 +43,6 @@ ...@@ -43,7 +43,6 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/thermal.h> #include <linux/thermal.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -86,11 +85,26 @@ static unsigned int control_cpu; /* The cpu assigned to collect stat and update ...@@ -86,11 +85,26 @@ static unsigned int control_cpu; /* The cpu assigned to collect stat and update
*/ */
static bool clamping; static bool clamping;
static const struct sched_param sparam = {
.sched_priority = MAX_USER_RT_PRIO / 2,
};
struct powerclamp_worker_data {
struct kthread_worker *worker;
struct kthread_work balancing_work;
struct kthread_delayed_work idle_injection_work;
unsigned int cpu;
unsigned int count;
unsigned int guard;
unsigned int window_size_now;
unsigned int target_ratio;
unsigned int duration_jiffies;
bool clamping;
};
static struct task_struct * __percpu *powerclamp_thread; static struct powerclamp_worker_data * __percpu worker_data;
static struct thermal_cooling_device *cooling_dev; static struct thermal_cooling_device *cooling_dev;
static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu
* clamping thread * clamping kthread worker
*/ */
static unsigned int duration; static unsigned int duration;
...@@ -262,11 +276,6 @@ static u64 pkg_state_counter(void) ...@@ -262,11 +276,6 @@ static u64 pkg_state_counter(void)
return count; return count;
} }
static void noop_timer(unsigned long foo)
{
/* empty... just the fact that we get the interrupt wakes us up */
}
static unsigned int get_compensation(int ratio) static unsigned int get_compensation(int ratio)
{ {
unsigned int comp = 0; unsigned int comp = 0;
...@@ -368,103 +377,79 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, ...@@ -368,103 +377,79 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio,
return set_target_ratio + guard <= current_ratio; return set_target_ratio + guard <= current_ratio;
} }
static int clamp_thread(void *arg) static void clamp_balancing_func(struct kthread_work *work)
{ {
int cpunr = (unsigned long)arg; struct powerclamp_worker_data *w_data;
DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); int sleeptime;
static const struct sched_param param = { unsigned long target_jiffies;
.sched_priority = MAX_USER_RT_PRIO/2, unsigned int compensated_ratio;
}; int interval; /* jiffies to sleep for each attempt */
unsigned int count = 0;
unsigned int target_ratio;
set_bit(cpunr, cpu_clamping_mask); w_data = container_of(work, struct powerclamp_worker_data,
set_freezable(); balancing_work);
init_timer_on_stack(&wakeup_timer);
sched_setscheduler(current, SCHED_FIFO, &param);
while (true == clamping && !kthread_should_stop() &&
cpu_online(cpunr)) {
int sleeptime;
unsigned long target_jiffies;
unsigned int guard;
unsigned int compensated_ratio;
int interval; /* jiffies to sleep for each attempt */
unsigned int duration_jiffies = msecs_to_jiffies(duration);
unsigned int window_size_now;
try_to_freeze();
/*
* make sure user selected ratio does not take effect until
* the next round. adjust target_ratio if user has changed
* target such that we can converge quickly.
*/
target_ratio = set_target_ratio;
guard = 1 + target_ratio/20;
window_size_now = window_size;
count++;
/*
* systems may have different ability to enter package level
* c-states, thus we need to compensate the injected idle ratio
* to achieve the actual target reported by the HW.
*/
compensated_ratio = target_ratio +
get_compensation(target_ratio);
if (compensated_ratio <= 0)
compensated_ratio = 1;
interval = duration_jiffies * 100 / compensated_ratio;
/* align idle time */
target_jiffies = roundup(jiffies, interval);
sleeptime = target_jiffies - jiffies;
if (sleeptime <= 0)
sleeptime = 1;
schedule_timeout_interruptible(sleeptime);
/*
* only elected controlling cpu can collect stats and update
* control parameters.
*/
if (cpunr == control_cpu && !(count%window_size_now)) {
should_skip =
powerclamp_adjust_controls(target_ratio,
guard, window_size_now);
smp_mb();
}
if (should_skip) /*
continue; * make sure user selected ratio does not take effect until
* the next round. adjust target_ratio if user has changed
target_jiffies = jiffies + duration_jiffies; * target such that we can converge quickly.
mod_timer(&wakeup_timer, target_jiffies); */
if (unlikely(local_softirq_pending())) w_data->target_ratio = READ_ONCE(set_target_ratio);
continue; w_data->guard = 1 + w_data->target_ratio / 20;
/* w_data->window_size_now = window_size;
* stop tick sched during idle time, interrupts are still w_data->duration_jiffies = msecs_to_jiffies(duration);
* allowed. thus jiffies are updated properly. w_data->count++;
*/
preempt_disable(); /*
/* mwait until target jiffies is reached */ * systems may have different ability to enter package level
while (time_before(jiffies, target_jiffies)) { * c-states, thus we need to compensate the injected idle ratio
unsigned long ecx = 1; * to achieve the actual target reported by the HW.
unsigned long eax = target_mwait; */
compensated_ratio = w_data->target_ratio +
/* get_compensation(w_data->target_ratio);
* REVISIT: may call enter_idle() to notify drivers who if (compensated_ratio <= 0)
* can save power during cpu idle. same for exit_idle() compensated_ratio = 1;
*/ interval = w_data->duration_jiffies * 100 / compensated_ratio;
local_touch_nmi();
stop_critical_timings(); /* align idle time */
mwait_idle_with_hints(eax, ecx); target_jiffies = roundup(jiffies, interval);
start_critical_timings(); sleeptime = target_jiffies - jiffies;
atomic_inc(&idle_wakeup_counter); if (sleeptime <= 0)
} sleeptime = 1;
preempt_enable();
if (clamping && w_data->clamping && cpu_online(w_data->cpu))
kthread_queue_delayed_work(w_data->worker,
&w_data->idle_injection_work,
sleeptime);
}
static void clamp_idle_injection_func(struct kthread_work *work)
{
struct powerclamp_worker_data *w_data;
w_data = container_of(work, struct powerclamp_worker_data,
idle_injection_work.work);
/*
* only elected controlling cpu can collect stats and update
* control parameters.
*/
if (w_data->cpu == control_cpu &&
!(w_data->count % w_data->window_size_now)) {
should_skip =
powerclamp_adjust_controls(w_data->target_ratio,
w_data->guard,
w_data->window_size_now);
smp_mb();
} }
del_timer_sync(&wakeup_timer);
clear_bit(cpunr, cpu_clamping_mask);
return 0; if (should_skip)
goto balance;
play_idle(jiffies_to_msecs(w_data->duration_jiffies));
balance:
if (clamping && w_data->clamping && cpu_online(w_data->cpu))
kthread_queue_work(w_data->worker, &w_data->balancing_work);
} }
/* /*
...@@ -508,10 +493,60 @@ static void poll_pkg_cstate(struct work_struct *dummy) ...@@ -508,10 +493,60 @@ static void poll_pkg_cstate(struct work_struct *dummy)
schedule_delayed_work(&poll_pkg_cstate_work, HZ); schedule_delayed_work(&poll_pkg_cstate_work, HZ);
} }
static void start_power_clamp_worker(unsigned long cpu)
{
struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
struct kthread_worker *worker;
worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inject/%ld", cpu);
if (IS_ERR(worker))
return;
w_data->worker = worker;
w_data->count = 0;
w_data->cpu = cpu;
w_data->clamping = true;
set_bit(cpu, cpu_clamping_mask);
sched_setscheduler(worker->task, SCHED_FIFO, &sparam);
kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
kthread_init_delayed_work(&w_data->idle_injection_work,
clamp_idle_injection_func);
kthread_queue_work(w_data->worker, &w_data->balancing_work);
}
static void stop_power_clamp_worker(unsigned long cpu)
{
struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
if (!w_data->worker)
return;
w_data->clamping = false;
/*
* Make sure that all works that get queued after this point see
* the clamping disabled. The counter part is not needed because
* there is an implicit memory barrier when the queued work
* is proceed.
*/
smp_wmb();
kthread_cancel_work_sync(&w_data->balancing_work);
kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
/*
* The balancing work still might be queued here because
* the handling of the "clapming" variable, cancel, and queue
* operations are not synchronized via a lock. But it is not
* a big deal. The balancing work is fast and destroy kthread
* will wait for it.
*/
clear_bit(w_data->cpu, cpu_clamping_mask);
kthread_destroy_worker(w_data->worker);
w_data->worker = NULL;
}
static int start_power_clamp(void) static int start_power_clamp(void)
{ {
unsigned long cpu; unsigned long cpu;
struct task_struct *thread;
set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
/* prevent cpu hotplug */ /* prevent cpu hotplug */
...@@ -525,22 +560,9 @@ static int start_power_clamp(void) ...@@ -525,22 +560,9 @@ static int start_power_clamp(void)
clamping = true; clamping = true;
schedule_delayed_work(&poll_pkg_cstate_work, 0); schedule_delayed_work(&poll_pkg_cstate_work, 0);
/* start one thread per online cpu */ /* start one kthread worker per online cpu */
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
struct task_struct **p = start_power_clamp_worker(cpu);
per_cpu_ptr(powerclamp_thread, cpu);
thread = kthread_create_on_node(clamp_thread,
(void *) cpu,
cpu_to_node(cpu),
"kidle_inject/%ld", cpu);
/* bind to cpu here */
if (likely(!IS_ERR(thread))) {
kthread_bind(thread, cpu);
wake_up_process(thread);
*p = thread;
}
} }
put_online_cpus(); put_online_cpus();
...@@ -550,71 +572,49 @@ static int start_power_clamp(void) ...@@ -550,71 +572,49 @@ static int start_power_clamp(void)
static void end_power_clamp(void) static void end_power_clamp(void)
{ {
int i; int i;
struct task_struct *thread;
clamping = false;
/* /*
* make clamping visible to other cpus and give per cpu clamping threads * Block requeuing in all the kthread workers. They will flush and
* sometime to exit, or gets killed later. * stop faster.
*/ */
smp_mb(); clamping = false;
msleep(20);
if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
pr_debug("clamping thread for cpu %d alive, kill\n", i); pr_debug("clamping worker for cpu %d alive, destroy\n",
thread = *per_cpu_ptr(powerclamp_thread, i); i);
kthread_stop(thread); stop_power_clamp_worker(i);
} }
} }
} }
static int powerclamp_cpu_callback(struct notifier_block *nfb, static int powerclamp_cpu_online(unsigned int cpu)
unsigned long action, void *hcpu)
{ {
unsigned long cpu = (unsigned long)hcpu; if (clamping == false)
struct task_struct *thread; return 0;
struct task_struct **percpu_thread = start_power_clamp_worker(cpu);
per_cpu_ptr(powerclamp_thread, cpu); /* prefer BSP as controlling CPU */
if (cpu == 0) {
if (false == clamping) control_cpu = 0;
goto exit_ok; smp_mb();
switch (action) {
case CPU_ONLINE:
thread = kthread_create_on_node(clamp_thread,
(void *) cpu,
cpu_to_node(cpu),
"kidle_inject/%lu", cpu);
if (likely(!IS_ERR(thread))) {
kthread_bind(thread, cpu);
wake_up_process(thread);
*percpu_thread = thread;
}
/* prefer BSP as controlling CPU */
if (cpu == 0) {
control_cpu = 0;
smp_mb();
}
break;
case CPU_DEAD:
if (test_bit(cpu, cpu_clamping_mask)) {
pr_err("cpu %lu dead but powerclamping thread is not\n",
cpu);
kthread_stop(*percpu_thread);
}
if (cpu == control_cpu) {
control_cpu = smp_processor_id();
smp_mb();
}
} }
return 0;
exit_ok:
return NOTIFY_OK;
} }
static struct notifier_block powerclamp_cpu_notifier = { static int powerclamp_cpu_predown(unsigned int cpu)
.notifier_call = powerclamp_cpu_callback, {
}; if (clamping == false)
return 0;
stop_power_clamp_worker(cpu);
if (cpu != control_cpu)
return 0;
control_cpu = cpumask_first(cpu_online_mask);
if (control_cpu == cpu)
control_cpu = cpumask_next(cpu, cpu_online_mask);
smp_mb();
return 0;
}
static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
unsigned long *state) unsigned long *state)
...@@ -742,6 +742,8 @@ static inline void powerclamp_create_debug_files(void) ...@@ -742,6 +742,8 @@ static inline void powerclamp_create_debug_files(void)
debugfs_remove_recursive(debug_dir); debugfs_remove_recursive(debug_dir);
} }
static enum cpuhp_state hp_state;
static int __init powerclamp_init(void) static int __init powerclamp_init(void)
{ {
int retval; int retval;
...@@ -759,10 +761,17 @@ static int __init powerclamp_init(void) ...@@ -759,10 +761,17 @@ static int __init powerclamp_init(void)
/* set default limit, maybe adjusted during runtime based on feedback */ /* set default limit, maybe adjusted during runtime based on feedback */
window_size = 2; window_size = 2;
register_hotcpu_notifier(&powerclamp_cpu_notifier); retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"thermal/intel_powerclamp:online",
powerclamp_cpu_online,
powerclamp_cpu_predown);
if (retval < 0)
goto exit_free;
hp_state = retval;
powerclamp_thread = alloc_percpu(struct task_struct *); worker_data = alloc_percpu(struct powerclamp_worker_data);
if (!powerclamp_thread) { if (!worker_data) {
retval = -ENOMEM; retval = -ENOMEM;
goto exit_unregister; goto exit_unregister;
} }
...@@ -782,9 +791,9 @@ static int __init powerclamp_init(void) ...@@ -782,9 +791,9 @@ static int __init powerclamp_init(void)
return 0; return 0;
exit_free_thread: exit_free_thread:
free_percpu(powerclamp_thread); free_percpu(worker_data);
exit_unregister: exit_unregister:
unregister_hotcpu_notifier(&powerclamp_cpu_notifier); cpuhp_remove_state_nocalls(hp_state);
exit_free: exit_free:
kfree(cpu_clamping_mask); kfree(cpu_clamping_mask);
return retval; return retval;
...@@ -793,9 +802,9 @@ module_init(powerclamp_init); ...@@ -793,9 +802,9 @@ module_init(powerclamp_init);
static void __exit powerclamp_exit(void) static void __exit powerclamp_exit(void)
{ {
unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
end_power_clamp(); end_power_clamp();
free_percpu(powerclamp_thread); cpuhp_remove_state_nocalls(hp_state);
free_percpu(worker_data);
thermal_cooling_device_unregister(cooling_dev); thermal_cooling_device_unregister(cooling_dev);
kfree(cpu_clamping_mask); kfree(cpu_clamping_mask);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment