Commit 3d7772ea authored by Len Brown's avatar Len Brown

tools/power turbostat: harden against cpu hotplug

turbostat tends to get confused when CPUs are added and removed
while it is running.

There are races, such as checking the current cpu, and then
reading a sysfs file that depends on that cpu number.

Close the two issues that seem to come up the most.
First, there is an infinite reset loop detector --
change that to allow more resets before giving up.
Secondly, one of those file reads didn't really need
to exit the program on failure...
Signed-off-by: default avatarLen Brown <len.brown@intel.com>
parent 6ff7cb37
...@@ -1894,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -1894,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
int i; int i;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
...@@ -2764,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu) ...@@ -2764,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
sprintf(path, sprintf(path,
"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
filep = fopen_or_die(path, "r"); filep = fopen(path, "r");
if (!filep) {
warnx("%s: open failed", path);
return -1;
}
do { do {
offset -= BITMASK_SIZE; offset -= BITMASK_SIZE;
if (fscanf(filep, "%lx%c", &map, &character) != 2) if (fscanf(filep, "%lx%c", &map, &character) != 2)
...@@ -2877,7 +2882,7 @@ void re_initialize(void) ...@@ -2877,7 +2882,7 @@ void re_initialize(void)
{ {
free_all_buffers(); free_all_buffers();
setup_all_buffers(); setup_all_buffers();
printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
} }
void set_max_cpu_num(void) void set_max_cpu_num(void)
...@@ -3331,7 +3336,7 @@ void turbostat_loop() ...@@ -3331,7 +3336,7 @@ void turbostat_loop()
if (retval < -1) { if (retval < -1) {
exit(retval); exit(retval);
} else if (retval == -1) { } else if (retval == -1) {
if (restarted > 1) { if (restarted > 10) {
exit(retval); exit(retval);
} }
re_initialize(); re_initialize();
...@@ -3926,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -3926,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0; return 0;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
...@@ -3970,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -3970,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0; return 0;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
...@@ -4058,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data ...@@ -4058,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
return 0; return 0;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
...@@ -4439,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p ...@@ -4439,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0; return 0;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
...@@ -4511,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -4511,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
cpu = t->cpu_id; cpu = t->cpu_id;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment