Commit 6546b19f authored by Namhyung Kim's avatar Namhyung Kim Committed by Arnaldo Carvalho de Melo

perf/core: Add PERF_SAMPLE_CGROUP feature

The PERF_SAMPLE_CGROUP bit is to save (perf_event) cgroup information in
the sample.  It will add a 64-bit id to identify current cgroup and it's
the file handle in the cgroup file system.  Userspace should use this
information with PERF_RECORD_CGROUP event to match which cgroup it
belongs.

I put it before PERF_SAMPLE_AUX for simplicity since it just needs a
64-bit word.  But if we want bigger samples, I can work on that
direction too.

Committer testing:

  $ pahole perf_sample_data | grep -w cgroup -B5 -A5
  	/* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */
  	struct perf_regs           regs_intr;            /*   312    16 */
  	/* --- cacheline 5 boundary (320 bytes) was 8 bytes ago --- */
  	u64                        stack_user_size;      /*   328     8 */
  	u64                        phys_addr;            /*   336     8 */
  	u64                        cgroup;               /*   344     8 */

  	/* size: 384, cachelines: 6, members: 22 */
  	/* padding: 32 */
  };
  $
Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarTejun Heo <tj@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lore.kernel.org/lkml/20200325124536.2800725-3-namhyung@kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 96aaab68
...@@ -1020,6 +1020,7 @@ struct perf_sample_data { ...@@ -1020,6 +1020,7 @@ struct perf_sample_data {
u64 stack_user_size; u64 stack_user_size;
u64 phys_addr; u64 phys_addr;
u64 cgroup;
} ____cacheline_aligned; } ____cacheline_aligned;
/* default value for data source */ /* default value for data source */
......
...@@ -142,8 +142,9 @@ enum perf_event_sample_format { ...@@ -142,8 +142,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_AUX = 1U << 20,
PERF_SAMPLE_CGROUP = 1U << 21,
PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
}; };
......
...@@ -1027,7 +1027,8 @@ config CGROUP_PERF ...@@ -1027,7 +1027,8 @@ config CGROUP_PERF
help help
This option extends the perf per-cpu mode to restrict monitoring This option extends the perf per-cpu mode to restrict monitoring
to threads which belong to the cgroup specified and run on the to threads which belong to the cgroup specified and run on the
designated cpu. designated cpu. Or this can be used to have cgroup ID in samples
so that it can monitor performance events among cgroups.
Say N if unsure. Say N if unsure.
......
...@@ -1862,6 +1862,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) ...@@ -1862,6 +1862,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
if (sample_type & PERF_SAMPLE_PHYS_ADDR) if (sample_type & PERF_SAMPLE_PHYS_ADDR)
size += sizeof(data->phys_addr); size += sizeof(data->phys_addr);
if (sample_type & PERF_SAMPLE_CGROUP)
size += sizeof(data->cgroup);
event->header_size = size; event->header_size = size;
} }
...@@ -6867,6 +6870,9 @@ void perf_output_sample(struct perf_output_handle *handle, ...@@ -6867,6 +6870,9 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_PHYS_ADDR) if (sample_type & PERF_SAMPLE_PHYS_ADDR)
perf_output_put(handle, data->phys_addr); perf_output_put(handle, data->phys_addr);
if (sample_type & PERF_SAMPLE_CGROUP)
perf_output_put(handle, data->cgroup);
if (sample_type & PERF_SAMPLE_AUX) { if (sample_type & PERF_SAMPLE_AUX) {
perf_output_put(handle, data->aux_size); perf_output_put(handle, data->aux_size);
...@@ -7066,6 +7072,16 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -7066,6 +7072,16 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_PHYS_ADDR) if (sample_type & PERF_SAMPLE_PHYS_ADDR)
data->phys_addr = perf_virt_to_phys(data->addr); data->phys_addr = perf_virt_to_phys(data->addr);
#ifdef CONFIG_CGROUP_PERF
if (sample_type & PERF_SAMPLE_CGROUP) {
struct cgroup *cgrp;
/* protected by RCU */
cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
data->cgroup = cgroup_id(cgrp);
}
#endif
if (sample_type & PERF_SAMPLE_AUX) { if (sample_type & PERF_SAMPLE_AUX) {
u64 size; u64 size;
...@@ -11264,6 +11280,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, ...@@ -11264,6 +11280,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (attr->sample_type & PERF_SAMPLE_REGS_INTR) if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
ret = perf_reg_validate(attr->sample_regs_intr); ret = perf_reg_validate(attr->sample_regs_intr);
#ifndef CONFIG_CGROUP_PERF
if (attr->sample_type & PERF_SAMPLE_CGROUP)
return -EINVAL;
#endif
out: out:
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment