Commit a7d24d95 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf: enable task local storage for tracing'

Song Liu says:

====================

This set enables task local storage for non-BPF_LSM programs.

It is common for tracing BPF program to access per-task data. Currently,
these data are stored in hash tables with pid as the key. In
bcc/libbpftools [1], 9 out of 23 tools use such hash tables. However,
hash table is not ideal for many use case. Task local storage provides
better usability and performance for BPF programs. Please refer to 6/6 for
some performance comparison of task local storage vs. hash table.

Changes v5 => v6:
1. Add inc/dec bpf_task_storage_busy in bpf_local_storage_map_free().

Changes v4 => v5:
1. Fix build w/o CONFIG_NET. (kernel test robot)
2. Remove unnecessary check for !task_storage_ptr(). (Martin)
3. Small changes in commit logs.

Changes v3 => v4:
1. Prevent deadlock from recursive calls of bpf_task_storage_[get|delete].
   (2/6 checks potential deadlock and fails over, 4/6 adds a selftest).

Changes v2 => v3:
1. Make the selftest more robust. (Andrii)
2. Small changes with runqslower. (Andrii)
3. Shortern CC list to make it easy for vger.

Changes v1 => v2:
1. Do not allocate task local storage when the task is being freed.
2. Revise the selftest and added a new test for a task being freed.
3. Minor changes in runqslower.
====================
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 9c8f21e6 ced47e30
...@@ -1499,6 +1499,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id); ...@@ -1499,6 +1499,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_SYSCALL */ #else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{ {
...@@ -1684,6 +1685,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) ...@@ -1684,6 +1685,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
{ {
return NULL; return NULL;
} }
static inline void bpf_task_storage_free(struct task_struct *task)
{
}
#endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux, void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
...@@ -1886,6 +1891,8 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; ...@@ -1886,6 +1891,8 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
const struct bpf_func_proto *bpf_tracing_func_proto( const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog); enum bpf_func_id func_id, const struct bpf_prog *prog);
......
...@@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, ...@@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap, struct bpf_local_storage_map *smap,
bool cacheit_lockit); bool cacheit_lockit);
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap); void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
int __percpu *busy_counter);
int bpf_local_storage_map_check_btf(const struct bpf_map *map, int bpf_local_storage_map_check_btf(const struct bpf_map *map,
const struct btf *btf, const struct btf *btf,
......
...@@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode( ...@@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
} }
static inline struct bpf_storage_blob *bpf_task(
const struct task_struct *task)
{
if (unlikely(!task->security))
return NULL;
return task->security + bpf_lsm_blob_sizes.lbs_task;
}
extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_get_proto;
extern const struct bpf_func_proto bpf_inode_storage_delete_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
void bpf_inode_storage_free(struct inode *inode); void bpf_inode_storage_free(struct inode *inode);
void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_LSM */ #else /* !CONFIG_BPF_LSM */
...@@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode( ...@@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
return NULL; return NULL;
} }
static inline struct bpf_storage_blob *bpf_task(
const struct task_struct *task)
{
return NULL;
}
static inline void bpf_inode_storage_free(struct inode *inode) static inline void bpf_inode_storage_free(struct inode *inode)
{ {
} }
static inline void bpf_task_storage_free(struct task_struct *task)
{
}
#endif /* CONFIG_BPF_LSM */ #endif /* CONFIG_BPF_LSM */
#endif /* _LINUX_BPF_LSM_H */ #endif /* _LINUX_BPF_LSM_H */
...@@ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) ...@@ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif #endif
#ifdef CONFIG_BPF_LSM #ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
#endif #endif
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS) #if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
......
...@@ -42,6 +42,7 @@ struct audit_context; ...@@ -42,6 +42,7 @@ struct audit_context;
struct backing_dev_info; struct backing_dev_info;
struct bio_list; struct bio_list;
struct blk_plug; struct blk_plug;
struct bpf_local_storage;
struct capture_control; struct capture_control;
struct cfs_rq; struct cfs_rq;
struct fs_struct; struct fs_struct;
...@@ -1348,6 +1349,10 @@ struct task_struct { ...@@ -1348,6 +1349,10 @@ struct task_struct {
/* Used by LSM modules for access restriction: */ /* Used by LSM modules for access restriction: */
void *security; void *security;
#endif #endif
#ifdef CONFIG_BPF_SYSCALL
/* Used by BPF task local storage */
struct bpf_local_storage __rcu *bpf_storage;
#endif
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
unsigned long lowest_stack; unsigned long lowest_stack;
......
...@@ -9,8 +9,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) ...@@ -9,8 +9,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o obj-$(CONFIG_BPF_SYSCALL) += btf.o
...@@ -18,7 +18,6 @@ obj-$(CONFIG_BPF_JIT) += dispatcher.o ...@@ -18,7 +18,6 @@ obj-$(CONFIG_BPF_JIT) += dispatcher.o
ifeq ($(CONFIG_NET),y) ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o
obj-$(CONFIG_BPF_SYSCALL) += offload.o obj-$(CONFIG_BPF_SYSCALL) += offload.o
obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
endif endif
......
...@@ -237,7 +237,7 @@ static void inode_storage_map_free(struct bpf_map *map) ...@@ -237,7 +237,7 @@ static void inode_storage_map_free(struct bpf_map *map)
smap = (struct bpf_local_storage_map *)map; smap = (struct bpf_local_storage_map *)map;
bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx); bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
bpf_local_storage_map_free(smap); bpf_local_storage_map_free(smap, NULL);
} }
static int inode_storage_map_btf_id; static int inode_storage_map_btf_id;
......
...@@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) ...@@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
{ {
struct bpf_local_storage *local_storage; struct bpf_local_storage *local_storage;
bool free_local_storage = false; bool free_local_storage = false;
unsigned long flags;
if (unlikely(!selem_linked_to_storage(selem))) if (unlikely(!selem_linked_to_storage(selem)))
/* selem has already been unlinked from sk */ /* selem has already been unlinked from sk */
return; return;
local_storage = rcu_dereference(selem->local_storage); local_storage = rcu_dereference(selem->local_storage);
raw_spin_lock_bh(&local_storage->lock); raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem))) if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock( free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, true); local_storage, selem, true);
raw_spin_unlock_bh(&local_storage->lock); raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (free_local_storage) if (free_local_storage)
kfree_rcu(local_storage, rcu); kfree_rcu(local_storage, rcu);
...@@ -167,6 +168,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) ...@@ -167,6 +168,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
{ {
struct bpf_local_storage_map *smap; struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b; struct bpf_local_storage_map_bucket *b;
unsigned long flags;
if (unlikely(!selem_linked_to_map(selem))) if (unlikely(!selem_linked_to_map(selem)))
/* selem has already be unlinked from smap */ /* selem has already be unlinked from smap */
...@@ -174,21 +176,22 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) ...@@ -174,21 +176,22 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
smap = rcu_dereference(SDATA(selem)->smap); smap = rcu_dereference(SDATA(selem)->smap);
b = select_bucket(smap, selem); b = select_bucket(smap, selem);
raw_spin_lock_bh(&b->lock); raw_spin_lock_irqsave(&b->lock, flags);
if (likely(selem_linked_to_map(selem))) if (likely(selem_linked_to_map(selem)))
hlist_del_init_rcu(&selem->map_node); hlist_del_init_rcu(&selem->map_node);
raw_spin_unlock_bh(&b->lock); raw_spin_unlock_irqrestore(&b->lock, flags);
} }
void bpf_selem_link_map(struct bpf_local_storage_map *smap, void bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *selem) struct bpf_local_storage_elem *selem)
{ {
struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem); struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
unsigned long flags;
raw_spin_lock_bh(&b->lock); raw_spin_lock_irqsave(&b->lock, flags);
RCU_INIT_POINTER(SDATA(selem)->smap, smap); RCU_INIT_POINTER(SDATA(selem)->smap, smap);
hlist_add_head_rcu(&selem->map_node, &b->list); hlist_add_head_rcu(&selem->map_node, &b->list);
raw_spin_unlock_bh(&b->lock); raw_spin_unlock_irqrestore(&b->lock, flags);
} }
void bpf_selem_unlink(struct bpf_local_storage_elem *selem) void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
...@@ -224,16 +227,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, ...@@ -224,16 +227,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
sdata = SDATA(selem); sdata = SDATA(selem);
if (cacheit_lockit) { if (cacheit_lockit) {
unsigned long flags;
/* spinlock is needed to avoid racing with the /* spinlock is needed to avoid racing with the
* parallel delete. Otherwise, publishing an already * parallel delete. Otherwise, publishing an already
* deleted sdata to the cache will become a use-after-free * deleted sdata to the cache will become a use-after-free
* problem in the next bpf_local_storage_lookup(). * problem in the next bpf_local_storage_lookup().
*/ */
raw_spin_lock_bh(&local_storage->lock); raw_spin_lock_irqsave(&local_storage->lock, flags);
if (selem_linked_to_storage(selem)) if (selem_linked_to_storage(selem))
rcu_assign_pointer(local_storage->cache[smap->cache_idx], rcu_assign_pointer(local_storage->cache[smap->cache_idx],
sdata); sdata);
raw_spin_unlock_bh(&local_storage->lock); raw_spin_unlock_irqrestore(&local_storage->lock, flags);
} }
return sdata; return sdata;
...@@ -327,6 +332,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, ...@@ -327,6 +332,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
struct bpf_local_storage_data *old_sdata = NULL; struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *selem; struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage; struct bpf_local_storage *local_storage;
unsigned long flags;
int err; int err;
/* BPF_EXIST and BPF_NOEXIST cannot be both set */ /* BPF_EXIST and BPF_NOEXIST cannot be both set */
...@@ -374,7 +380,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, ...@@ -374,7 +380,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
} }
} }
raw_spin_lock_bh(&local_storage->lock); raw_spin_lock_irqsave(&local_storage->lock, flags);
/* Recheck local_storage->list under local_storage->lock */ /* Recheck local_storage->list under local_storage->lock */
if (unlikely(hlist_empty(&local_storage->list))) { if (unlikely(hlist_empty(&local_storage->list))) {
...@@ -428,11 +434,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, ...@@ -428,11 +434,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
} }
unlock: unlock:
raw_spin_unlock_bh(&local_storage->lock); raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return SDATA(selem); return SDATA(selem);
unlock_err: unlock_err:
raw_spin_unlock_bh(&local_storage->lock); raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -468,7 +474,8 @@ void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, ...@@ -468,7 +474,8 @@ void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
spin_unlock(&cache->idx_lock); spin_unlock(&cache->idx_lock);
} }
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap) void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
int __percpu *busy_counter)
{ {
struct bpf_local_storage_elem *selem; struct bpf_local_storage_elem *selem;
struct bpf_local_storage_map_bucket *b; struct bpf_local_storage_map_bucket *b;
...@@ -497,7 +504,15 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap) ...@@ -497,7 +504,15 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
while ((selem = hlist_entry_safe( while ((selem = hlist_entry_safe(
rcu_dereference_raw(hlist_first_rcu(&b->list)), rcu_dereference_raw(hlist_first_rcu(&b->list)),
struct bpf_local_storage_elem, map_node))) { struct bpf_local_storage_elem, map_node))) {
if (busy_counter) {
migrate_disable();
__this_cpu_inc(*busy_counter);
}
bpf_selem_unlink(selem); bpf_selem_unlink(selem);
if (busy_counter) {
__this_cpu_dec(*busy_counter);
migrate_enable();
}
cond_resched_rcu(); cond_resched_rcu();
} }
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_spin_lock_proto; return &bpf_spin_lock_proto;
case BPF_FUNC_spin_unlock: case BPF_FUNC_spin_unlock:
return &bpf_spin_unlock_proto; return &bpf_spin_unlock_proto;
case BPF_FUNC_task_storage_get:
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
case BPF_FUNC_bprm_opts_set: case BPF_FUNC_bprm_opts_set:
return &bpf_bprm_opts_set_proto; return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash: case BPF_FUNC_ima_inode_hash:
......
...@@ -15,21 +15,41 @@ ...@@ -15,21 +15,41 @@
#include <linux/bpf_local_storage.h> #include <linux/bpf_local_storage.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <uapi/linux/btf.h> #include <uapi/linux/btf.h>
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h> #include <linux/btf_ids.h>
#include <linux/fdtable.h> #include <linux/fdtable.h>
DEFINE_BPF_STORAGE_CACHE(task_cache); DEFINE_BPF_STORAGE_CACHE(task_cache);
DEFINE_PER_CPU(int, bpf_task_storage_busy);
static void bpf_task_storage_lock(void)
{
migrate_disable();
__this_cpu_inc(bpf_task_storage_busy);
}
static void bpf_task_storage_unlock(void)
{
__this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
}
static bool bpf_task_storage_trylock(void)
{
migrate_disable();
if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
__this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
return false;
}
return true;
}
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
{ {
struct task_struct *task = owner; struct task_struct *task = owner;
struct bpf_storage_blob *bsb;
bsb = bpf_task(task); return &task->bpf_storage;
if (!bsb)
return NULL;
return &bsb->storage;
} }
static struct bpf_local_storage_data * static struct bpf_local_storage_data *
...@@ -38,13 +58,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map, ...@@ -38,13 +58,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
{ {
struct bpf_local_storage *task_storage; struct bpf_local_storage *task_storage;
struct bpf_local_storage_map *smap; struct bpf_local_storage_map *smap;
struct bpf_storage_blob *bsb;
bsb = bpf_task(task);
if (!bsb)
return NULL;
task_storage = rcu_dereference(bsb->storage); task_storage = rcu_dereference(task->bpf_storage);
if (!task_storage) if (!task_storage)
return NULL; return NULL;
...@@ -57,16 +72,12 @@ void bpf_task_storage_free(struct task_struct *task) ...@@ -57,16 +72,12 @@ void bpf_task_storage_free(struct task_struct *task)
struct bpf_local_storage_elem *selem; struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage; struct bpf_local_storage *local_storage;
bool free_task_storage = false; bool free_task_storage = false;
struct bpf_storage_blob *bsb;
struct hlist_node *n; struct hlist_node *n;
unsigned long flags;
bsb = bpf_task(task);
if (!bsb)
return;
rcu_read_lock(); rcu_read_lock();
local_storage = rcu_dereference(bsb->storage); local_storage = rcu_dereference(task->bpf_storage);
if (!local_storage) { if (!local_storage) {
rcu_read_unlock(); rcu_read_unlock();
return; return;
...@@ -81,7 +92,8 @@ void bpf_task_storage_free(struct task_struct *task) ...@@ -81,7 +92,8 @@ void bpf_task_storage_free(struct task_struct *task)
* when unlinking elem from the local_storage->list and * when unlinking elem from the local_storage->list and
* the map's bucket->list. * the map's bucket->list.
*/ */
raw_spin_lock_bh(&local_storage->lock); bpf_task_storage_lock();
raw_spin_lock_irqsave(&local_storage->lock, flags);
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
/* Always unlink from map before unlinking from /* Always unlink from map before unlinking from
* local_storage. * local_storage.
...@@ -90,7 +102,8 @@ void bpf_task_storage_free(struct task_struct *task) ...@@ -90,7 +102,8 @@ void bpf_task_storage_free(struct task_struct *task)
free_task_storage = bpf_selem_unlink_storage_nolock( free_task_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, false); local_storage, selem, false);
} }
raw_spin_unlock_bh(&local_storage->lock); raw_spin_unlock_irqrestore(&local_storage->lock, flags);
bpf_task_storage_unlock();
rcu_read_unlock(); rcu_read_unlock();
/* free_task_storage should always be true as long as /* free_task_storage should always be true as long as
...@@ -123,7 +136,9 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) ...@@ -123,7 +136,9 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
goto out; goto out;
} }
bpf_task_storage_lock();
sdata = task_storage_lookup(task, map, true); sdata = task_storage_lookup(task, map, true);
bpf_task_storage_unlock();
put_pid(pid); put_pid(pid);
return sdata ? sdata->data : NULL; return sdata ? sdata->data : NULL;
out: out:
...@@ -150,13 +165,15 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, ...@@ -150,13 +165,15 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
*/ */
WARN_ON_ONCE(!rcu_read_lock_held()); WARN_ON_ONCE(!rcu_read_lock_held());
task = pid_task(pid, PIDTYPE_PID); task = pid_task(pid, PIDTYPE_PID);
if (!task || !task_storage_ptr(task)) { if (!task) {
err = -ENOENT; err = -ENOENT;
goto out; goto out;
} }
bpf_task_storage_lock();
sdata = bpf_local_storage_update( sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags); task, (struct bpf_local_storage_map *)map, value, map_flags);
bpf_task_storage_unlock();
err = PTR_ERR_OR_ZERO(sdata); err = PTR_ERR_OR_ZERO(sdata);
out: out:
...@@ -199,7 +216,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) ...@@ -199,7 +216,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
goto out; goto out;
} }
bpf_task_storage_lock();
err = task_storage_delete(task, map); err = task_storage_delete(task, map);
bpf_task_storage_unlock();
out: out:
put_pid(pid); put_pid(pid);
return err; return err;
...@@ -213,44 +232,47 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, ...@@ -213,44 +232,47 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
return (unsigned long)NULL; return (unsigned long)NULL;
/* explicitly check that the task_storage_ptr is not if (!task)
* NULL as task_storage_lookup returns NULL in this case and return (unsigned long)NULL;
* bpf_local_storage_update expects the owner to have a
* valid storage pointer. if (!bpf_task_storage_trylock())
*/
if (!task || !task_storage_ptr(task))
return (unsigned long)NULL; return (unsigned long)NULL;
sdata = task_storage_lookup(task, map, true); sdata = task_storage_lookup(task, map, true);
if (sdata) if (sdata)
return (unsigned long)sdata->data; goto unlock;
/* This helper must only be called from places where the lifetime of the task /* only allocate new storage, when the task is refcounted */
* is guaranteed. Either by being refcounted or by being protected if (refcount_read(&task->usage) &&
* by an RCU read-side critical section. (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
*/
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
sdata = bpf_local_storage_update( sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST); BPF_NOEXIST);
return IS_ERR(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
}
return (unsigned long)NULL; unlock:
bpf_task_storage_unlock();
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
} }
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
task) task)
{ {
int ret;
if (!task) if (!task)
return -EINVAL; return -EINVAL;
if (!bpf_task_storage_trylock())
return -EBUSY;
/* This helper must only be called from places where the lifetime of the task /* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected * is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section. * by an RCU read-side critical section.
*/ */
return task_storage_delete(task, map); ret = task_storage_delete(task, map);
bpf_task_storage_unlock();
return ret;
} }
static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
...@@ -276,7 +298,7 @@ static void task_storage_map_free(struct bpf_map *map) ...@@ -276,7 +298,7 @@ static void task_storage_map_free(struct bpf_map *map)
smap = (struct bpf_local_storage_map *)map; smap = (struct bpf_local_storage_map *)map;
bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
bpf_local_storage_map_free(smap); bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
} }
static int task_storage_map_btf_id; static int task_storage_map_btf_id;
......
...@@ -96,6 +96,7 @@ ...@@ -96,6 +96,7 @@
#include <linux/kasan.h> #include <linux/kasan.h>
#include <linux/scs.h> #include <linux/scs.h>
#include <linux/io_uring.h> #include <linux/io_uring.h>
#include <linux/bpf.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) ...@@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
cgroup_free(tsk); cgroup_free(tsk);
task_numa_free(tsk, true); task_numa_free(tsk, true);
security_task_free(tsk); security_task_free(tsk);
bpf_task_storage_free(tsk);
exit_creds(tsk); exit_creds(tsk);
delayacct_tsk_free(tsk); delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal); put_signal_struct(tsk->signal);
...@@ -2062,6 +2064,9 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -2062,6 +2064,9 @@ static __latent_entropy struct task_struct *copy_process(
p->sequential_io = 0; p->sequential_io = 0;
p->sequential_io_avg = 0; p->sequential_io_avg = 0;
#endif #endif
#ifdef CONFIG_BPF_SYSCALL
RCU_INIT_POINTER(p->bpf_storage, NULL);
#endif
/* Perform scheduler related setup. Assign this task to a CPU. */ /* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p); retval = sched_fork(clone_flags, p);
......
...@@ -1367,6 +1367,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -1367,6 +1367,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_per_cpu_ptr_proto; return &bpf_per_cpu_ptr_proto;
case BPF_FUNC_this_cpu_ptr: case BPF_FUNC_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto; return &bpf_this_cpu_ptr_proto;
case BPF_FUNC_task_storage_get:
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
default: default:
return NULL; return NULL;
} }
......
...@@ -89,7 +89,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) ...@@ -89,7 +89,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_local_storage_map *)map; smap = (struct bpf_local_storage_map *)map;
bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx); bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
bpf_local_storage_map_free(smap); bpf_local_storage_map_free(smap, NULL);
} }
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
......
...@@ -16,7 +16,10 @@ CFLAGS := -g -Wall ...@@ -16,7 +16,10 @@ CFLAGS := -g -Wall
# Try to detect best kernel BTF source # Try to detect best kernel BTF source
KERNEL_REL := $(shell uname -r) KERNEL_REL := $(shell uname -r)
VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL) VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
../../../vmlinux /sys/kernel/btf/vmlinux \
/boot/vmlinux-$(KERNEL_REL)
VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
$(wildcard $(VMLINUX_BTF_PATHS)))) $(wildcard $(VMLINUX_BTF_PATHS))))
......
...@@ -11,9 +11,9 @@ const volatile __u64 min_us = 0; ...@@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
const volatile pid_t targ_pid = 0; const volatile pid_t targ_pid = 0;
struct { struct {
__uint(type, BPF_MAP_TYPE_HASH); __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(max_entries, 10240); __uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, u32); __type(key, int);
__type(value, u64); __type(value, u64);
} start SEC(".maps"); } start SEC(".maps");
...@@ -25,15 +25,20 @@ struct { ...@@ -25,15 +25,20 @@ struct {
/* record enqueue timestamp */ /* record enqueue timestamp */
__always_inline __always_inline
static int trace_enqueue(u32 tgid, u32 pid) static int trace_enqueue(struct task_struct *t)
{ {
u64 ts; u32 pid = t->pid;
u64 *ptr;
if (!pid || (targ_pid && targ_pid != pid)) if (!pid || (targ_pid && targ_pid != pid))
return 0; return 0;
ts = bpf_ktime_get_ns(); ptr = bpf_task_storage_get(&start, t, 0,
bpf_map_update_elem(&start, &pid, &ts, 0); BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return 0;
*ptr = bpf_ktime_get_ns();
return 0; return 0;
} }
...@@ -43,7 +48,7 @@ int handle__sched_wakeup(u64 *ctx) ...@@ -43,7 +48,7 @@ int handle__sched_wakeup(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */ /* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0]; struct task_struct *p = (void *)ctx[0];
return trace_enqueue(p->tgid, p->pid); return trace_enqueue(p);
} }
SEC("tp_btf/sched_wakeup_new") SEC("tp_btf/sched_wakeup_new")
...@@ -52,7 +57,7 @@ int handle__sched_wakeup_new(u64 *ctx) ...@@ -52,7 +57,7 @@ int handle__sched_wakeup_new(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */ /* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0]; struct task_struct *p = (void *)ctx[0];
return trace_enqueue(p->tgid, p->pid); return trace_enqueue(p);
} }
SEC("tp_btf/sched_switch") SEC("tp_btf/sched_switch")
...@@ -70,12 +75,16 @@ int handle__sched_switch(u64 *ctx) ...@@ -70,12 +75,16 @@ int handle__sched_switch(u64 *ctx)
/* ivcsw: treat like an enqueue event and store timestamp */ /* ivcsw: treat like an enqueue event and store timestamp */
if (prev->state == TASK_RUNNING) if (prev->state == TASK_RUNNING)
trace_enqueue(prev->tgid, prev->pid); trace_enqueue(prev);
pid = next->pid; pid = next->pid;
/* For pid mismatch, save a bpf_task_storage_get */
if (!pid || (targ_pid && targ_pid != pid))
return 0;
/* fetch timestamp and calculate delta */ /* fetch timestamp and calculate delta */
tsp = bpf_map_lookup_elem(&start, &pid); tsp = bpf_task_storage_get(&start, next, 0, 0);
if (!tsp) if (!tsp)
return 0; /* missed enqueue */ return 0; /* missed enqueue */
...@@ -91,7 +100,7 @@ int handle__sched_switch(u64 *ctx) ...@@ -91,7 +100,7 @@ int handle__sched_switch(u64 *ctx)
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&event, sizeof(event)); &event, sizeof(event));
bpf_map_delete_elem(&start, &pid); bpf_task_storage_delete(&start, next);
return 0; return 0;
} }
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sys/types.h>
#include <test_progs.h>
#include "task_local_storage.skel.h"
#include "task_local_storage_exit_creds.skel.h"
#include "task_ls_recursion.skel.h"
static void test_sys_enter_exit(void)
{
struct task_local_storage *skel;
int err;
skel = task_local_storage__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
skel->bss->target_pid = syscall(SYS_gettid);
err = task_local_storage__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
syscall(SYS_gettid);
syscall(SYS_gettid);
/* 3x syscalls: 1x attach and 2x gettid */
ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
out:
task_local_storage__destroy(skel);
}
static void test_exit_creds(void)
{
struct task_local_storage_exit_creds *skel;
int err;
skel = task_local_storage_exit_creds__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
err = task_local_storage_exit_creds__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
/* trigger at least one exit_creds() */
if (CHECK_FAIL(system("ls > /dev/null")))
goto out;
/* sync rcu to make sure exit_creds() is called for "ls" */
kern_sync_rcu();
ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
out:
task_local_storage_exit_creds__destroy(skel);
}
static void test_recursion(void)
{
struct task_ls_recursion *skel;
int err;
skel = task_ls_recursion__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
err = task_ls_recursion__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
/* trigger sys_enter, make sure it does not cause deadlock */
syscall(SYS_gettid);
out:
task_ls_recursion__destroy(skel);
}
void test_task_local_storage(void)
{
if (test__start_subtest("sys_enter_exit"))
test_sys_enter_exit();
if (test__start_subtest("exit_creds"))
test_exit_creds();
if (test__start_subtest("recursion"))
test_recursion();
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, long);
} enter_id SEC(".maps");
#define MAGIC_VALUE 0xabcd1234
pid_t target_pid = 0;
int mismatch_cnt = 0;
int enter_cnt = 0;
int exit_cnt = 0;
SEC("tp_btf/sys_enter")
int BPF_PROG(on_enter, struct pt_regs *regs, long id)
{
struct task_struct *task;
long *ptr;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
ptr = bpf_task_storage_get(&enter_id, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return 0;
__sync_fetch_and_add(&enter_cnt, 1);
*ptr = MAGIC_VALUE + enter_cnt;
return 0;
}
SEC("tp_btf/sys_exit")
int BPF_PROG(on_exit, struct pt_regs *regs, long id)
{
struct task_struct *task;
long *ptr;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
ptr = bpf_task_storage_get(&enter_id, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return 0;
__sync_fetch_and_add(&exit_cnt, 1);
if (*ptr != MAGIC_VALUE + exit_cnt)
__sync_fetch_and_add(&mismatch_cnt, 1);
return 0;
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, __u64);
} task_storage SEC(".maps");
int valid_ptr_count = 0;
int null_ptr_count = 0;
SEC("fentry/exit_creds")
int BPF_PROG(trace_exit_creds, struct task_struct *task)
{
__u64 *ptr;
ptr = bpf_task_storage_get(&task_storage, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
__sync_fetch_and_add(&valid_ptr_count, 1);
else
__sync_fetch_and_add(&null_ptr_count, 1);
return 0;
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, long);
} map_a SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, long);
} map_b SEC(".maps");
SEC("fentry/bpf_local_storage_lookup")
int BPF_PROG(on_lookup)
{
struct task_struct *task = bpf_get_current_task_btf();
bpf_task_storage_delete(&map_a, task);
bpf_task_storage_delete(&map_b, task);
return 0;
}
SEC("fentry/bpf_local_storage_update")
int BPF_PROG(on_update)
{
struct task_struct *task = bpf_get_current_task_btf();
long *ptr;
ptr = bpf_task_storage_get(&map_a, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
ptr = bpf_task_storage_get(&map_b, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
return 0;
}
SEC("tp_btf/sys_enter")
int BPF_PROG(on_enter, struct pt_regs *regs, long id)
{
struct task_struct *task;
long *ptr;
task = bpf_get_current_task_btf();
ptr = bpf_task_storage_get(&map_a, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr = 200;
ptr = bpf_task_storage_get(&map_b, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr = 100;
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment