Commit a23967c1 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf-tracing-multiprog-tp-query'

Yonghong Song says:

====================
Commit e87c6bc3 ("bpf: permit multiple bpf attachments
for a single perf event") added support to attach multiple
bpf programs to a single perf event. Given a perf event
(kprobe, uprobe, or kernel tracepoint), the perf ioctl interface
is used to query bpf programs attached to the same trace event.

There already exists a BPF_PROG_QUERY command for introspection
currently used by cgroup+bpf. We did have an implementation for
querying tracepoint+bpf through the same interface. However, it
looks cleaner to use ioctl() style of api here, since attaching
bpf prog to tracepoint/kuprobe is also done via ioctl.

Patch #1 had the core implementation and patch #2 added
a test case in tools bpf selftests suite.

Changelogs:
v3 -> v4:
  - Fix a compilation error with newer gcc like 6.3.1 while
    old gcc 4.8.5 is okay. I was using &uquery->ids to represent
    the address to the ids array to make it explicit that the
    address is passed, and this syntax is rightly rejected
    by gcc 6.3.1.
v2 -> v3:
  - Change uapi structure perf_event_query_bpf to be more
    clearer based on Peter's suggestion, and adjust
    other codes accordingly.
v1 -> v2:
  - Rebase on top of net-next.
  - Use existing bpf_prog_array_length function instead of
    implementing the same functionality in function
    bpf_prog_array_copy_info.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 63060c39 d279f1f8
...@@ -254,6 +254,7 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, ...@@ -254,6 +254,7 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
int bpf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr); union bpf_attr __user *uattr);
...@@ -285,6 +286,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, ...@@ -285,6 +286,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
struct bpf_prog *old_prog); struct bpf_prog *old_prog);
int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
__u32 __user *prog_ids, u32 request_cnt,
__u32 __user *prog_cnt);
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog *exclude_prog, struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog, struct bpf_prog *include_prog,
......
...@@ -418,6 +418,27 @@ struct perf_event_attr { ...@@ -418,6 +418,27 @@ struct perf_event_attr {
__u16 __reserved_2; /* align to __u64 */ __u16 __reserved_2; /* align to __u64 */
}; };
/*
* Structure used by below PERF_EVENT_IOC_QUERY_BPF command
* to query bpf programs attached to the same perf tracepoint
* as the given perf event.
*/
struct perf_event_query_bpf {
/*
* The below ids array length
*/
__u32 ids_len;
/*
* Set by the kernel to indicate the number of
* available programs
*/
__u32 prog_cnt;
/*
* User provided buffer to store program ids
*/
__u32 ids[0];
};
#define perf_flags(attr) (*(&(attr)->read_format + 1)) #define perf_flags(attr) (*(&(attr)->read_format + 1))
/* /*
...@@ -433,6 +454,7 @@ struct perf_event_attr { ...@@ -433,6 +454,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
enum perf_event_ioc_flags { enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0, PERF_IOC_FLAG_GROUP = 1U << 0,
......
...@@ -1462,6 +1462,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, ...@@ -1462,6 +1462,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
rcu_read_lock(); rcu_read_lock();
prog = rcu_dereference(progs)->progs; prog = rcu_dereference(progs)->progs;
for (; *prog; prog++) { for (; *prog; prog++) {
if (*prog == &dummy_bpf_prog.prog)
continue;
id = (*prog)->aux->id; id = (*prog)->aux->id;
if (copy_to_user(prog_ids + i, &id, sizeof(id))) { if (copy_to_user(prog_ids + i, &id, sizeof(id))) {
rcu_read_unlock(); rcu_read_unlock();
...@@ -1545,6 +1547,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, ...@@ -1545,6 +1547,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
return 0; return 0;
} }
int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
__u32 __user *prog_ids, u32 request_cnt,
__u32 __user *prog_cnt)
{
u32 cnt = 0;
if (array)
cnt = bpf_prog_array_length(array);
if (copy_to_user(prog_cnt, &cnt, sizeof(cnt)))
return -EFAULT;
/* return early if user requested only program count or nothing to copy */
if (!request_cnt || !cnt)
return 0;
return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt);
}
static void bpf_prog_free_deferred(struct work_struct *work) static void bpf_prog_free_deferred(struct work_struct *work)
{ {
struct bpf_prog_aux *aux; struct bpf_prog_aux *aux;
......
...@@ -4723,6 +4723,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon ...@@ -4723,6 +4723,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
rcu_read_unlock(); rcu_read_unlock();
return 0; return 0;
} }
case PERF_EVENT_IOC_QUERY_BPF:
return bpf_event_query_prog_array(event, (void __user *)arg);
default: default:
return -ENOTTY; return -ENOTTY;
} }
......
...@@ -820,3 +820,26 @@ void perf_event_detach_bpf_prog(struct perf_event *event) ...@@ -820,3 +820,26 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
unlock: unlock:
mutex_unlock(&bpf_event_mutex); mutex_unlock(&bpf_event_mutex);
} }
int bpf_event_query_prog_array(struct perf_event *event, void __user *info)
{
struct perf_event_query_bpf __user *uquery = info;
struct perf_event_query_bpf query = {};
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -EINVAL;
if (copy_from_user(&query, uquery, sizeof(query)))
return -EFAULT;
mutex_lock(&bpf_event_mutex);
ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
uquery->ids,
query.ids_len,
&uquery->prog_cnt);
mutex_unlock(&bpf_event_mutex);
return ret;
}
...@@ -418,6 +418,27 @@ struct perf_event_attr { ...@@ -418,6 +418,27 @@ struct perf_event_attr {
__u16 __reserved_2; /* align to __u64 */ __u16 __reserved_2; /* align to __u64 */
}; };
/*
* Structure used by below PERF_EVENT_IOC_QUERY_BPF command
* to query bpf programs attached to the same perf tracepoint
* as the given perf event.
*/
struct perf_event_query_bpf {
/*
* The below ids array length
*/
__u32 ids_len;
/*
* Set by the kernel to indicate the number of
* available programs
*/
__u32 prog_cnt;
/*
* User provided buffer to store program ids
*/
__u32 ids[0];
};
#define perf_flags(attr) (*(&(attr)->read_format + 1)) #define perf_flags(attr) (*(&(attr)->read_format + 1))
/* /*
...@@ -433,6 +454,7 @@ struct perf_event_attr { ...@@ -433,6 +454,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
enum perf_event_ioc_flags { enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0, PERF_IOC_FLAG_GROUP = 1U << 0,
......
...@@ -17,7 +17,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test ...@@ -17,7 +17,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
test_offload.py test_offload.py
......
...@@ -21,8 +21,10 @@ typedef __u16 __sum16; ...@@ -21,8 +21,10 @@ typedef __u16 __sum16;
#include <linux/ipv6.h> #include <linux/ipv6.h>
#include <linux/tcp.h> #include <linux/tcp.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/perf_event.h>
#include <linux/unistd.h> #include <linux/unistd.h>
#include <sys/ioctl.h>
#include <sys/wait.h> #include <sys/wait.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/types.h> #include <sys/types.h>
...@@ -617,6 +619,136 @@ static void test_obj_name(void) ...@@ -617,6 +619,136 @@ static void test_obj_name(void)
} }
} }
static void test_tp_attach_query(void)
{
const int num_progs = 3;
int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
__u32 duration = 0, info_len, saved_prog_ids[num_progs];
const char *file = "./test_tracepoint.o";
struct perf_event_query_bpf *query;
struct perf_event_attr attr = {};
struct bpf_object *obj[num_progs];
struct bpf_prog_info prog_info;
char buf[256];
snprintf(buf, sizeof(buf),
"/sys/kernel/debug/tracing/events/sched/sched_switch/id");
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
return;
bytes = read(efd, buf, sizeof(buf));
close(efd);
if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
"read", "bytes %d errno %d\n", bytes, errno))
return;
attr.config = strtol(buf, NULL, 0);
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
attr.sample_period = 1;
attr.wakeup_events = 1;
query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
for (i = 0; i < num_progs; i++) {
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
&prog_fd[i]);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
goto cleanup1;
bzero(&prog_info, sizeof(prog_info));
prog_info.jited_prog_len = 0;
prog_info.xlated_prog_len = 0;
prog_info.nr_map_ids = 0;
info_len = sizeof(prog_info);
err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
err, errno))
goto cleanup1;
saved_prog_ids[i] = prog_info.id;
pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
0 /* flags */);
if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n",
pmu_fd[i], errno))
goto cleanup2;
err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
err, errno))
goto cleanup3;
if (i == 0) {
/* check NULL prog array query */
query->ids_len = num_progs;
err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
if (CHECK(err || query->prog_cnt != 0,
"perf_event_ioc_query_bpf",
"err %d errno %d query->prog_cnt %u\n",
err, errno, query->prog_cnt))
goto cleanup3;
}
err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]);
if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
err, errno))
goto cleanup3;
if (i == 1) {
/* try to get # of programs only */
query->ids_len = 0;
err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
if (CHECK(err || query->prog_cnt != 2,
"perf_event_ioc_query_bpf",
"err %d errno %d query->prog_cnt %u\n",
err, errno, query->prog_cnt))
goto cleanup3;
/* try a few negative tests */
/* invalid query pointer */
err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF,
(struct perf_event_query_bpf *)0x1);
if (CHECK(!err || errno != EFAULT,
"perf_event_ioc_query_bpf",
"err %d errno %d\n", err, errno))
goto cleanup3;
/* no enough space */
query->ids_len = 1;
err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2,
"perf_event_ioc_query_bpf",
"err %d errno %d query->prog_cnt %u\n",
err, errno, query->prog_cnt))
goto cleanup3;
}
query->ids_len = num_progs;
err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
if (CHECK(err || query->prog_cnt != (i + 1),
"perf_event_ioc_query_bpf",
"err %d errno %d query->prog_cnt %u\n",
err, errno, query->prog_cnt))
goto cleanup3;
for (j = 0; j < i + 1; j++)
if (CHECK(saved_prog_ids[j] != query->ids[j],
"perf_event_ioc_query_bpf",
"#%d saved_prog_id %x query prog_id %x\n",
j, saved_prog_ids[j], query->ids[j]))
goto cleanup3;
}
i = num_progs - 1;
for (; i >= 0; i--) {
cleanup3:
ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
cleanup2:
close(pmu_fd[i]);
cleanup1:
bpf_object__close(obj[i]);
}
free(query);
}
int main(void) int main(void)
{ {
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
...@@ -630,6 +762,7 @@ int main(void) ...@@ -630,6 +762,7 @@ int main(void)
test_bpf_obj_id(); test_bpf_obj_id();
test_pkt_md_access(); test_pkt_md_access();
test_obj_name(); test_obj_name();
test_tp_attach_query();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
......
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017 Facebook
#include <linux/bpf.h>
#include "bpf_helpers.h"
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
char prev_comm[16];
int prev_pid;
int prev_prio;
long long prev_state;
char next_comm[16];
int next_pid;
int next_prio;
};
SEC("tracepoint/sched/sched_switch")
int oncpu(struct sched_switch_args *ctx)
{
return 0;
}
char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment