diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5e694a308081aa157f19a54b4552dbef695db28c..a834f4b761bc5c182ad4fddfce84919f7185a939 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1612,6 +1612,7 @@ struct bpf_link_ops { struct bpf_link_info *info); int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, struct bpf_map *old_map); + __poll_t (*poll)(struct file *file, struct poll_table_struct *pts); }; struct bpf_tramp_link { @@ -1730,9 +1731,9 @@ struct bpf_struct_ops { int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); - int (*reg)(void *kdata); - void (*unreg)(void *kdata); - int (*update)(void *kdata, void *old_kdata); + int (*reg)(void *kdata, struct bpf_link *link); + void (*unreg)(void *kdata, struct bpf_link *link); + int (*update)(void *kdata, void *old_kdata, struct bpf_link *link); int (*validate)(void *kdata); void *cfi_stubs; struct module *owner; @@ -2333,6 +2334,7 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); +struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); @@ -2704,6 +2706,11 @@ static inline void bpf_link_inc(struct bpf_link *link) { } +static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) +{ + return NULL; +} + static inline void bpf_link_put(struct bpf_link *link) { } diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 86c7884abaf87ab1cc0d4b83b564bba584772205..a2cf31b14be46de4128e7dbe37b333f413eb7b8d 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -12,6 +12,7 @@ #include <linux/mutex.h> #include <linux/btf_ids.h> #include <linux/rcupdate_wait.h> +#include <linux/poll.h> struct bpf_struct_ops_value { struct bpf_struct_ops_common_value common; @@ -56,6 +57,7 @@ struct bpf_struct_ops_map { struct bpf_struct_ops_link { struct bpf_link link; struct bpf_map __rcu *map; + wait_queue_head_t wait_hup; }; static DEFINE_MUTEX(update_mutex); @@ -757,7 +759,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto unlock; } - err = st_ops->reg(kdata); + err = st_ops->reg(kdata, NULL); if (likely(!err)) { /* This refcnt increment on the map here after * 'st_ops->reg()' is secure since the state of the @@ -805,7 +807,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) BPF_STRUCT_OPS_STATE_TOBEFREE); switch (prev_state) { case BPF_STRUCT_OPS_STATE_INUSE: - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, NULL); bpf_map_put(map); return 0; case BPF_STRUCT_OPS_STATE_TOBEFREE: @@ -1057,10 +1059,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) st_map = (struct bpf_struct_ops_map *) rcu_dereference_protected(st_link->map, true); if (st_map) { - /* st_link->map can be NULL if - * bpf_struct_ops_link_create() fails to register. - */ - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link); bpf_map_put(&st_map->map); } kfree(st_link); @@ -1075,7 +1074,8 @@ static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link, st_link = container_of(link, struct bpf_struct_ops_link, link); rcu_read_lock(); map = rcu_dereference(st_link->map); - seq_printf(seq, "map_id:\t%d\n", map->id); + if (map) + seq_printf(seq, "map_id:\t%d\n", map->id); rcu_read_unlock(); } @@ -1088,7 +1088,8 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, st_link = container_of(link, struct bpf_struct_ops_link, link); rcu_read_lock(); map = rcu_dereference(st_link->map); - info->struct_ops.map_id = map->id; + if (map) + info->struct_ops.map_id = map->id; rcu_read_unlock(); return 0; } @@ -1113,6 +1114,10 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map mutex_lock(&update_mutex); old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex)); + if (!old_map) { + err = -ENOLINK; + goto err_out; + } if (expected_old_map && old_map != expected_old_map) { err = -EPERM; goto err_out; @@ -1125,7 +1130,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map goto err_out; } - err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link); if (err) goto err_out; @@ -1139,11 +1144,53 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map return err; } +static int bpf_struct_ops_map_link_detach(struct bpf_link *link) +{ + struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link); + struct bpf_struct_ops_map *st_map; + struct bpf_map *map; + + mutex_lock(&update_mutex); + + map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex)); + if (!map) { + mutex_unlock(&update_mutex); + return 0; + } + st_map = container_of(map, struct bpf_struct_ops_map, map); + + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link); + + RCU_INIT_POINTER(st_link->map, NULL); + /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or + * bpf_map_inc() in bpf_struct_ops_map_link_update(). + */ + bpf_map_put(&st_map->map); + + mutex_unlock(&update_mutex); + + wake_up_interruptible_poll(&st_link->wait_hup, EPOLLHUP); + + return 0; +} + +static __poll_t bpf_struct_ops_map_link_poll(struct file *file, + struct poll_table_struct *pts) +{ + struct bpf_struct_ops_link *st_link = file->private_data; + + poll_wait(file, &st_link->wait_hup, pts); + + return rcu_access_pointer(st_link->map) ? 0 : EPOLLHUP; +} + static const struct bpf_link_ops bpf_struct_ops_map_lops = { .dealloc = bpf_struct_ops_map_link_dealloc, + .detach = bpf_struct_ops_map_link_detach, .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, .fill_link_info = bpf_struct_ops_map_link_fill_link_info, .update_map = bpf_struct_ops_map_link_update, + .poll = bpf_struct_ops_map_link_poll, }; int bpf_struct_ops_link_create(union bpf_attr *attr) @@ -1176,13 +1223,21 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) if (err) goto err_out; - err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data); + init_waitqueue_head(&link->wait_hup); + + /* Hold the update_mutex such that the subsystem cannot + * do link->ops->detach() before the link is fully initialized. + */ + mutex_lock(&update_mutex); + err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link); if (err) { + mutex_unlock(&update_mutex); bpf_link_cleanup(&link_primer); link = NULL; goto err_out; } RCU_INIT_POINTER(link->map, map); + mutex_unlock(&update_mutex); return bpf_link_settle(&link_primer); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2222c3ff88e7fd639390112de66581b934584457..5070fa20d05c58129a24ef9b4c80f7b1773e007f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3150,6 +3150,13 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) } #endif +static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts) +{ + struct bpf_link *link = file->private_data; + + return link->ops->poll(file, pts); +} + static const struct file_operations bpf_link_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_link_show_fdinfo, @@ -3159,6 +3166,16 @@ static const struct file_operations bpf_link_fops = { .write = bpf_dummy_write, }; +static const struct file_operations bpf_link_fops_poll = { +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_link_show_fdinfo, +#endif + .release = bpf_link_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, + .poll = bpf_link_poll, +}; + static int bpf_link_alloc_id(struct bpf_link *link) { int id; @@ -3201,7 +3218,9 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) return id; } - file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); + file = anon_inode_getfile("bpf_link", + link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, + link, O_CLOEXEC); if (IS_ERR(file)) { bpf_link_free_id(id); put_unused_fd(fd); @@ -3229,7 +3248,9 @@ int bpf_link_settle(struct bpf_link_primer *primer) int bpf_link_new_fd(struct bpf_link *link) { - return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); + return anon_inode_getfd("bpf-link", + link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, + link, O_CLOEXEC); } struct bpf_link *bpf_link_get_from_fd(u32 ufd) @@ -3239,7 +3260,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) if (!f.file) return ERR_PTR(-EBADF); - if (f.file->f_op != &bpf_link_fops) { + if (f.file->f_op != &bpf_link_fops && f.file->f_op != &bpf_link_fops_poll) { fdput(f); return ERR_PTR(-EINVAL); } @@ -4971,7 +4992,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, uattr); else if (f.file->f_op == &btf_fops) err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); - else if (f.file->f_op == &bpf_link_fops) + else if (f.file->f_op == &bpf_link_fops || f.file->f_op == &bpf_link_fops_poll) err = bpf_link_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else @@ -5106,7 +5127,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (!file) return -EBADF; - if (file->f_op == &bpf_link_fops) { + if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) { struct bpf_link *link = file->private_data; if (link->ops == &bpf_raw_tp_link_lops) { @@ -5416,10 +5437,11 @@ static int link_detach(union bpf_attr *attr) return ret; } -static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) +struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) { return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); } +EXPORT_SYMBOL(bpf_link_inc_not_zero); struct bpf_link *bpf_link_by_id(u32 id) { diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 891cdf61c65ae2f850ed0218f2d9b8c110ee7bd1..3ea52b05adfbd8a22aa126b17e2751e938d1dccc 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -272,12 +272,12 @@ static int bpf_dummy_init_member(const struct btf_type *t, return -EOPNOTSUPP; } -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { return -EOPNOTSUPP; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 18227757ec0cebea5847bb636733445580a1385d..3f88d0961e5b2f40994b9d2cfaedb87f5103fab3 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -260,17 +260,17 @@ static int bpf_tcp_ca_check_member(const struct btf_type *t, return 0; } -static int bpf_tcp_ca_reg(void *kdata) +static int bpf_tcp_ca_reg(void *kdata, struct bpf_link *link) { return tcp_register_congestion_control(kdata); } -static void bpf_tcp_ca_unreg(void *kdata) +static void bpf_tcp_ca_unreg(void *kdata, struct bpf_link *link) { tcp_unregister_congestion_control(kdata); } -static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +static int bpf_tcp_ca_update(void *kdata, void *old_kdata, struct bpf_link *link) { return tcp_update_congestion_control(kdata, old_kdata); } diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c index 7bdbcac3cf628523d664f57bcb20f73a6dccbb39..948dde25034efefc2582509f701903d18fd1e5e4 100644 --- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -29,6 +29,7 @@ enum bpf_link_type___local { }; extern const void bpf_link_fops __ksym; +extern const void bpf_link_fops_poll __ksym __weak; extern const void bpf_map_fops __ksym; extern const void bpf_prog_fops __ksym; extern const void btf_fops __ksym; @@ -84,7 +85,11 @@ int iter(struct bpf_iter__task_file *ctx) fops = &btf_fops; break; case BPF_OBJ_LINK: - fops = &bpf_link_fops; + if (&bpf_link_fops_poll && + file->f_op == &bpf_link_fops_poll) + fops = &bpf_link_fops_poll; + else + fops = &bpf_link_fops; break; default: return 0; diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c index b1dd889d5d7d08ced54fa89e6835235ddd7dc3f3..948eb3962732dee2d17a68732ec899d4265ca4fb 100644 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c @@ -22,12 +22,12 @@ static int dummy_init_member(const struct btf_type *t, return 0; } -static int dummy_reg(void *kdata) +static int dummy_reg(void *kdata, struct bpf_link *link) { return 0; } -static void dummy_unreg(void *kdata) +static void dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 2a18bd320e92209de70681a8a8095f50438444e1..0a09732cde4b765369fa90f9ba16690ad150d7d7 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -820,7 +820,7 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -835,7 +835,7 @@ static int bpf_dummy_reg(void *kdata) return 0; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } @@ -871,7 +871,7 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = { .owner = THIS_MODULE, }; -static int bpf_dummy_reg2(void *kdata) +static int bpf_dummy_reg2(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops2 *ops = kdata; diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c index 29e183a80f49004d35e69bb39df67b5225a6be6e..bbcf12696a6bc8002b024f45bd181f536d53a3c4 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -3,9 +3,12 @@ #include <test_progs.h> #include <time.h> +#include <sys/epoll.h> + #include "struct_ops_module.skel.h" #include "struct_ops_nulled_out_cb.skel.h" #include "struct_ops_forgotten_cb.skel.h" +#include "struct_ops_detach.skel.h" static void check_map_info(struct bpf_map_info *info) { @@ -242,6 +245,58 @@ static void test_struct_ops_forgotten_cb(void) struct_ops_forgotten_cb__destroy(skel); } +/* Detach a link from a user space program */ +static void test_detach_link(void) +{ + struct epoll_event ev, events[2]; + struct struct_ops_detach *skel; + struct bpf_link *link = NULL; + int fd, epollfd = -1, nfds; + int err; + + skel = struct_ops_detach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_detach__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_do_detach); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + fd = bpf_link__fd(link); + if (!ASSERT_GE(fd, 0, "link_fd")) + goto cleanup; + + epollfd = epoll_create1(0); + if (!ASSERT_GE(epollfd, 0, "epoll_create1")) + goto cleanup; + + ev.events = EPOLLHUP; + ev.data.fd = fd; + err = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); + if (!ASSERT_OK(err, "epoll_ctl")) + goto cleanup; + + err = bpf_link__detach(link); + if (!ASSERT_OK(err, "detach_link")) + goto cleanup; + + /* Wait for EPOLLHUP */ + nfds = epoll_wait(epollfd, events, 2, 500); + if (!ASSERT_EQ(nfds, 1, "epoll_wait")) + goto cleanup; + + if (!ASSERT_EQ(events[0].data.fd, fd, "epoll_wait_fd")) + goto cleanup; + if (!ASSERT_TRUE(events[0].events & EPOLLHUP, "events[0].events")) + goto cleanup; + +cleanup: + if (epollfd >= 0) + close(epollfd); + bpf_link__destroy(link); + struct_ops_detach__destroy(skel); +} + void serial_test_struct_ops_module(void) { if (test__start_subtest("struct_ops_load")) @@ -254,5 +309,7 @@ void serial_test_struct_ops_module(void) test_struct_ops_nulled_out_cb(); if (test__start_subtest("struct_ops_forgotten_cb")) test_struct_ops_forgotten_cb(); + if (test__start_subtest("test_detach_link")) + test_detach_link(); } diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c new file mode 100644 index 0000000000000000000000000000000000000000..56b787a8987610ffe776238801937890244f6184 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_do_detach;