Commit 10cdb82a authored by Andrii Nakryiko's avatar Andrii Nakryiko Committed by Masami Hiramatsu (Google)

uprobes: turn trace_uprobe's nhit counter to be per-CPU one

trace_uprobe->nhit counter is not incremented atomically, so its value
is questionable in when uprobe is hit on multiple CPUs simultaneously.

Also, doing this shared counter increment across many CPUs causes heavy
cache line bouncing, limiting uprobe/uretprobe performance scaling with
number of CPUs.

Solve both problems by making this a per-CPU counter.

Link: https://lore.kernel.org/all/20240813203409.3985398-1-andrii@kernel.org/Reviewed-by: default avatarOleg Nesterov <oleg@redhat.com>
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Signed-off-by: default avatarMasami Hiramatsu (Google) <mhiramat@kernel.org>
parent da3ea350
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/string.h> #include <linux/string.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/percpu.h>
#include "trace_dynevent.h" #include "trace_dynevent.h"
#include "trace_probe.h" #include "trace_probe.h"
...@@ -62,7 +63,7 @@ struct trace_uprobe { ...@@ -62,7 +63,7 @@ struct trace_uprobe {
char *filename; char *filename;
unsigned long offset; unsigned long offset;
unsigned long ref_ctr_offset; unsigned long ref_ctr_offset;
unsigned long nhit; unsigned long __percpu *nhits;
struct trace_probe tp; struct trace_probe tp;
}; };
...@@ -337,6 +338,12 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) ...@@ -337,6 +338,12 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
if (!tu) if (!tu)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
tu->nhits = alloc_percpu(unsigned long);
if (!tu->nhits) {
ret = -ENOMEM;
goto error;
}
ret = trace_probe_init(&tu->tp, event, group, true, nargs); ret = trace_probe_init(&tu->tp, event, group, true, nargs);
if (ret < 0) if (ret < 0)
goto error; goto error;
...@@ -349,6 +356,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) ...@@ -349,6 +356,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
return tu; return tu;
error: error:
free_percpu(tu->nhits);
kfree(tu); kfree(tu);
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -362,6 +370,7 @@ static void free_trace_uprobe(struct trace_uprobe *tu) ...@@ -362,6 +370,7 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
path_put(&tu->path); path_put(&tu->path);
trace_probe_cleanup(&tu->tp); trace_probe_cleanup(&tu->tp);
kfree(tu->filename); kfree(tu->filename);
free_percpu(tu->nhits);
kfree(tu); kfree(tu);
} }
...@@ -815,13 +824,21 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) ...@@ -815,13 +824,21 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
{ {
struct dyn_event *ev = v; struct dyn_event *ev = v;
struct trace_uprobe *tu; struct trace_uprobe *tu;
unsigned long nhits;
int cpu;
if (!is_trace_uprobe(ev)) if (!is_trace_uprobe(ev))
return 0; return 0;
tu = to_trace_uprobe(ev); tu = to_trace_uprobe(ev);
nhits = 0;
for_each_possible_cpu(cpu) {
nhits += per_cpu(*tu->nhits, cpu);
}
seq_printf(m, " %s %-44s %15lu\n", tu->filename, seq_printf(m, " %s %-44s %15lu\n", tu->filename,
trace_probe_name(&tu->tp), tu->nhit); trace_probe_name(&tu->tp), nhits);
return 0; return 0;
} }
...@@ -1512,7 +1529,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) ...@@ -1512,7 +1529,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
int ret = 0; int ret = 0;
tu = container_of(con, struct trace_uprobe, consumer); tu = container_of(con, struct trace_uprobe, consumer);
tu->nhit++;
this_cpu_inc(*tu->nhits);
udd.tu = tu; udd.tu = tu;
udd.bp_addr = instruction_pointer(regs); udd.bp_addr = instruction_pointer(regs);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment