Commit 25e2d8c1 authored by Frederic Weisbecker's avatar Frederic Weisbecker Committed by Ingo Molnar

sched/cputime: Fix ksoftirqd cputime accounting regression

irq_time_read() returns the irqtime minus the ksoftirqd time. This
is necessary because irq_time_read() is used to substract the IRQ time
from the sum_exec_runtime of a task. If we were to include the softirq
time of ksoftirqd, this task would substract its own CPU time everytime
it updates ksoftirqd->sum_exec_runtime which would therefore never
progress.

But this behaviour got broken by:

  a499a5a1 ("sched/cputime: Increment kcpustat directly on irqtime account")

... which now includes ksoftirqd softirq time in the time returned by
irq_time_read().

This has resulted in wrong ksoftirqd cputime reported to userspace
through /proc/stat and thus "top" not showing ksoftirqd when it should
after intense networking load.

ksoftirqd->stime happens to be correct but it gets scaled down by
sum_exec_runtime through task_cputime_adjusted().

To fix this, just account the strict IRQ time in a separate counter and
use it to report the IRQ time.
Reported-and-tested-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: default avatarRik van Riel <riel@redhat.com>
Acked-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1493129448-5356-1-git-send-email-fweisbec@gmail.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent ea839b41
...@@ -34,6 +34,18 @@ void disable_sched_clock_irqtime(void) ...@@ -34,6 +34,18 @@ void disable_sched_clock_irqtime(void)
sched_clock_irqtime = 0; sched_clock_irqtime = 0;
} }
static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
enum cpu_usage_stat idx)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
u64_stats_update_begin(&irqtime->sync);
cpustat[idx] += delta;
irqtime->total += delta;
irqtime->tick_delta += delta;
u64_stats_update_end(&irqtime->sync);
}
/* /*
* Called before incrementing preempt_count on {soft,}irq_enter * Called before incrementing preempt_count on {soft,}irq_enter
* and before decrementing preempt_count on {soft,}irq_exit. * and before decrementing preempt_count on {soft,}irq_exit.
...@@ -41,7 +53,6 @@ void disable_sched_clock_irqtime(void) ...@@ -41,7 +53,6 @@ void disable_sched_clock_irqtime(void)
void irqtime_account_irq(struct task_struct *curr) void irqtime_account_irq(struct task_struct *curr)
{ {
struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
u64 *cpustat = kcpustat_this_cpu->cpustat;
s64 delta; s64 delta;
int cpu; int cpu;
...@@ -52,22 +63,16 @@ void irqtime_account_irq(struct task_struct *curr) ...@@ -52,22 +63,16 @@ void irqtime_account_irq(struct task_struct *curr)
delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
irqtime->irq_start_time += delta; irqtime->irq_start_time += delta;
u64_stats_update_begin(&irqtime->sync);
/* /*
* We do not account for softirq time from ksoftirqd here. * We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread * We want to continue accounting softirq time to ksoftirqd thread
* in that case, so as not to confuse scheduler with a special task * in that case, so as not to confuse scheduler with a special task
* that do not consume any time, but still wants to run. * that do not consume any time, but still wants to run.
*/ */
if (hardirq_count()) { if (hardirq_count())
cpustat[CPUTIME_IRQ] += delta; irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
irqtime->tick_delta += delta; else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
} else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) { irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
cpustat[CPUTIME_SOFTIRQ] += delta;
irqtime->tick_delta += delta;
}
u64_stats_update_end(&irqtime->sync);
} }
EXPORT_SYMBOL_GPL(irqtime_account_irq); EXPORT_SYMBOL_GPL(irqtime_account_irq);
......
...@@ -1869,6 +1869,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { } ...@@ -1869,6 +1869,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime { struct irqtime {
u64 total;
u64 tick_delta; u64 tick_delta;
u64 irq_start_time; u64 irq_start_time;
struct u64_stats_sync sync; struct u64_stats_sync sync;
...@@ -1876,16 +1877,20 @@ struct irqtime { ...@@ -1876,16 +1877,20 @@ struct irqtime {
DECLARE_PER_CPU(struct irqtime, cpu_irqtime); DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
/*
* Returns the irqtime minus the softirq time computed by ksoftirqd.
* Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
* and never move forward.
*/
static inline u64 irq_time_read(int cpu) static inline u64 irq_time_read(int cpu)
{ {
struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
u64 *cpustat = kcpustat_cpu(cpu).cpustat;
unsigned int seq; unsigned int seq;
u64 total; u64 total;
do { do {
seq = __u64_stats_fetch_begin(&irqtime->sync); seq = __u64_stats_fetch_begin(&irqtime->sync);
total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ]; total = irqtime->total;
} while (__u64_stats_fetch_retry(&irqtime->sync, seq)); } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
return total; return total;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment