Commit 3d7efa4e authored by Heiko Carstens's avatar Heiko Carstens Committed by Vasily Gorbik

s390/idle: fix cpu idle time calculation

The idle time reported in /proc/stat sometimes incorrectly contains
huge values on s390. This is caused by a bug in arch_cpu_idle_time().

The kernel tries to figure out when a different cpu entered idle by
accessing its per-cpu data structure. There is an ordering problem: if
the remote cpu has an idle_enter value which is not zero, and an
idle_exit value which is zero, it is assumed it is idle since
"now". The "now" timestamp however is taken before the idle_enter
value is read.

Which in turn means that "now" can be smaller than idle_enter of the
remote cpu. Unconditionally subtracting idle_enter from "now" can thus
lead to a negative value (aka large unsigned value).

Fix this by moving the get_tod_clock() invocation out of the
loop. While at it also make the code a bit more readable.

A similar bug also exists for show_idle_time(). Fix this is as well.

Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: default avatarVasily Gorbik <gor@linux.ibm.com>
parent a1d863ac
...@@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); ...@@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev, static ssize_t show_idle_time(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
unsigned long long now, idle_time, idle_enter, idle_exit, in_idle;
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
unsigned long long now, idle_time, idle_enter, idle_exit;
unsigned int seq; unsigned int seq;
do { do {
now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount); seq = read_seqcount_begin(&idle->seqcount);
idle_time = READ_ONCE(idle->idle_time); idle_time = READ_ONCE(idle->idle_time);
idle_enter = READ_ONCE(idle->clock_idle_enter); idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit); idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq)); } while (read_seqcount_retry(&idle->seqcount, seq));
idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; in_idle = 0;
now = get_tod_clock();
if (idle_enter) {
if (idle_exit) {
in_idle = idle_exit - idle_enter;
} else if (now > idle_enter) {
in_idle = now - idle_enter;
}
}
idle_time += in_idle;
return sprintf(buf, "%llu\n", idle_time >> 12); return sprintf(buf, "%llu\n", idle_time >> 12);
} }
DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
...@@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); ...@@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
u64 arch_cpu_idle_time(int cpu) u64 arch_cpu_idle_time(int cpu)
{ {
struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
unsigned long long now, idle_enter, idle_exit; unsigned long long now, idle_enter, idle_exit, in_idle;
unsigned int seq; unsigned int seq;
do { do {
now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount); seq = read_seqcount_begin(&idle->seqcount);
idle_enter = READ_ONCE(idle->clock_idle_enter); idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit); idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq)); } while (read_seqcount_retry(&idle->seqcount, seq));
in_idle = 0;
return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0); now = get_tod_clock();
if (idle_enter) {
if (idle_exit) {
in_idle = idle_exit - idle_enter;
} else if (now > idle_enter) {
in_idle = now - idle_enter;
}
}
return cputime_to_nsecs(in_idle);
} }
void arch_cpu_idle_enter(void) void arch_cpu_idle_enter(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment