Commit 96aaab68 authored by Namhyung Kim's avatar Namhyung Kim Committed by Arnaldo Carvalho de Melo

perf/core: Add PERF_RECORD_CGROUP event

To support cgroup tracking, add CGROUP event to save a link between
cgroup path and id number.  This is needed since cgroups can go away
when userspace tries to read the cgroup info (from the id) later.

The attr.cgroup bit was also added to enable cgroup tracking from
userspace.

This event will be generated when a new cgroup becomes active.
Userspace might need to synthesize those events for existing cgroups.

Committer testing:

From the resulting kernel, using /sys/kernel/btf/vmlinux:

  $ pahole perf_event_attr | grep -w cgroup -B5 -A1
  	__u64                      write_backward:1;     /*    40:27  8 */
  	__u64                      namespaces:1;         /*    40:28  8 */
  	__u64                      ksymbol:1;            /*    40:29  8 */
  	__u64                      bpf_event:1;          /*    40:30  8 */
  	__u64                      aux_output:1;         /*    40:31  8 */
  	__u64                      cgroup:1;             /*    40:32  8 */
  	__u64                      __reserved_1:31;      /*    40:33  8 */
  $
Reported-by: default avatarkbuild test robot <lkp@intel.com>
Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarTejun Heo <tj@kernel.org>
[staticize perf_event_cgroup function]
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lore.kernel.org/lkml/20200325124536.2800725-2-namhyung@kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 26567ed7
...@@ -381,7 +381,8 @@ struct perf_event_attr { ...@@ -381,7 +381,8 @@ struct perf_event_attr {
ksymbol : 1, /* include ksymbol events */ ksymbol : 1, /* include ksymbol events */
bpf_event : 1, /* include bpf events */ bpf_event : 1, /* include bpf events */
aux_output : 1, /* generate AUX records instead of events */ aux_output : 1, /* generate AUX records instead of events */
__reserved_1 : 32; cgroup : 1, /* include cgroup events */
__reserved_1 : 31;
union { union {
__u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_events; /* wakeup every n events */
...@@ -1012,6 +1013,16 @@ enum perf_event_type { ...@@ -1012,6 +1013,16 @@ enum perf_event_type {
*/ */
PERF_RECORD_BPF_EVENT = 18, PERF_RECORD_BPF_EVENT = 18,
/*
* struct {
* struct perf_event_header header;
* u64 id;
* char path[];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_CGROUP = 19,
PERF_RECORD_MAX, /* non-ABI */ PERF_RECORD_MAX, /* non-ABI */
}; };
......
...@@ -387,6 +387,7 @@ static atomic_t nr_freq_events __read_mostly; ...@@ -387,6 +387,7 @@ static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly; static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_ksymbol_events __read_mostly; static atomic_t nr_ksymbol_events __read_mostly;
static atomic_t nr_bpf_events __read_mostly; static atomic_t nr_bpf_events __read_mostly;
static atomic_t nr_cgroup_events __read_mostly;
static LIST_HEAD(pmus); static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock); static DEFINE_MUTEX(pmus_lock);
...@@ -4608,6 +4609,8 @@ static void unaccount_event(struct perf_event *event) ...@@ -4608,6 +4609,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_comm_events); atomic_dec(&nr_comm_events);
if (event->attr.namespaces) if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events); atomic_dec(&nr_namespaces_events);
if (event->attr.cgroup)
atomic_dec(&nr_cgroup_events);
if (event->attr.task) if (event->attr.task)
atomic_dec(&nr_task_events); atomic_dec(&nr_task_events);
if (event->attr.freq) if (event->attr.freq)
...@@ -7735,6 +7738,105 @@ void perf_event_namespaces(struct task_struct *task) ...@@ -7735,6 +7738,105 @@ void perf_event_namespaces(struct task_struct *task)
NULL); NULL);
} }
/*
* cgroup tracking
*/
#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup_event {
char *path;
int path_size;
struct {
struct perf_event_header header;
u64 id;
char path[];
} event_id;
};
static int perf_event_cgroup_match(struct perf_event *event)
{
return event->attr.cgroup;
}
static void perf_event_cgroup_output(struct perf_event *event, void *data)
{
struct perf_cgroup_event *cgroup_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
u16 header_size = cgroup_event->event_id.header.size;
int ret;
if (!perf_event_cgroup_match(event))
return;
perf_event_header__init_id(&cgroup_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
cgroup_event->event_id.header.size);
if (ret)
goto out;
perf_output_put(&handle, cgroup_event->event_id);
__output_copy(&handle, cgroup_event->path, cgroup_event->path_size);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
out:
cgroup_event->event_id.header.size = header_size;
}
static void perf_event_cgroup(struct cgroup *cgrp)
{
struct perf_cgroup_event cgroup_event;
char path_enomem[16] = "//enomem";
char *pathname;
size_t size;
if (!atomic_read(&nr_cgroup_events))
return;
cgroup_event = (struct perf_cgroup_event){
.event_id = {
.header = {
.type = PERF_RECORD_CGROUP,
.misc = 0,
.size = sizeof(cgroup_event.event_id),
},
.id = cgroup_id(cgrp),
},
};
pathname = kmalloc(PATH_MAX, GFP_KERNEL);
if (pathname == NULL) {
cgroup_event.path = path_enomem;
} else {
/* just to be sure to have enough space for alignment */
cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
cgroup_event.path = pathname;
}
/*
* Since our buffer works in 8 byte units we need to align our string
* size to a multiple of 8. However, we must guarantee the tail end is
* zero'd out to avoid leaking random bits to userspace.
*/
size = strlen(cgroup_event.path) + 1;
while (!IS_ALIGNED(size, sizeof(u64)))
cgroup_event.path[size++] = '\0';
cgroup_event.event_id.header.size += size;
cgroup_event.path_size = size;
perf_iterate_sb(perf_event_cgroup_output,
&cgroup_event,
NULL);
kfree(pathname);
}
#endif
/* /*
* mmap tracking * mmap tracking
*/ */
...@@ -10781,6 +10883,8 @@ static void account_event(struct perf_event *event) ...@@ -10781,6 +10883,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_comm_events); atomic_inc(&nr_comm_events);
if (event->attr.namespaces) if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events); atomic_inc(&nr_namespaces_events);
if (event->attr.cgroup)
atomic_inc(&nr_cgroup_events);
if (event->attr.task) if (event->attr.task)
atomic_inc(&nr_task_events); atomic_inc(&nr_task_events);
if (event->attr.freq) if (event->attr.freq)
...@@ -12757,6 +12861,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css) ...@@ -12757,6 +12861,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
kfree(jc); kfree(jc);
} }
static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
{
perf_event_cgroup(css->cgroup);
return 0;
}
static int __perf_cgroup_move(void *info) static int __perf_cgroup_move(void *info)
{ {
struct task_struct *task = info; struct task_struct *task = info;
...@@ -12778,6 +12888,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset) ...@@ -12778,6 +12888,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)
struct cgroup_subsys perf_event_cgrp_subsys = { struct cgroup_subsys perf_event_cgrp_subsys = {
.css_alloc = perf_cgroup_css_alloc, .css_alloc = perf_cgroup_css_alloc,
.css_free = perf_cgroup_css_free, .css_free = perf_cgroup_css_free,
.css_online = perf_cgroup_css_online,
.attach = perf_cgroup_attach, .attach = perf_cgroup_attach,
/* /*
* Implicitly enable on dfl hierarchy so that perf events can * Implicitly enable on dfl hierarchy so that perf events can
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment