Commit 3932f227 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Martin KaFai Lau says:

====================
pull-request: bpf 2023-08-03

We've added 5 non-merge commits during the last 7 day(s) which contain
a total of 3 files changed, 37 insertions(+), 20 deletions(-).

The main changes are:

1) Disable preemption in perf_event_output helpers code,
   from Jiri Olsa

2) Add length check for SK_DIAG_BPF_STORAGE_REQ_MAP_FD parsing,
   from Lin Ma

3) Multiple warning splat fixes in cpumap from Hou Tao

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf, cpumap: Handle skb as well when clean up ptr_ring
  bpf, cpumap: Make sure kthread is running before map update returns
  bpf: Add length check for SK_DIAG_BPF_STORAGE_REQ_MAP_FD parsing
  bpf: Disable preemption in bpf_event_output
  bpf: Disable preemption in bpf_perf_event_output
====================

Link: https://lore.kernel.org/r/20230803181429.994607-1-martin.lau@linux.devSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 0d48a84b 4c9fbff5
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/completion.h>
#include <trace/events/xdp.h> #include <trace/events/xdp.h>
#include <linux/btf_ids.h> #include <linux/btf_ids.h>
...@@ -73,6 +74,7 @@ struct bpf_cpu_map_entry { ...@@ -73,6 +74,7 @@ struct bpf_cpu_map_entry {
struct rcu_head rcu; struct rcu_head rcu;
struct work_struct kthread_stop_wq; struct work_struct kthread_stop_wq;
struct completion kthread_running;
}; };
struct bpf_cpu_map { struct bpf_cpu_map {
...@@ -129,11 +131,17 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring) ...@@ -129,11 +131,17 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
* invoked cpu_map_kthread_stop(). Catch any broken behaviour * invoked cpu_map_kthread_stop(). Catch any broken behaviour
* gracefully and warn once. * gracefully and warn once.
*/ */
struct xdp_frame *xdpf; void *ptr;
while ((xdpf = ptr_ring_consume(ring))) while ((ptr = ptr_ring_consume(ring))) {
if (WARN_ON_ONCE(xdpf)) WARN_ON_ONCE(1);
xdp_return_frame(xdpf); if (unlikely(__ptr_test_bit(0, &ptr))) {
__ptr_clear_bit(0, &ptr);
kfree_skb(ptr);
continue;
}
xdp_return_frame(ptr);
}
} }
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
...@@ -153,7 +161,6 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) ...@@ -153,7 +161,6 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
static void cpu_map_kthread_stop(struct work_struct *work) static void cpu_map_kthread_stop(struct work_struct *work)
{ {
struct bpf_cpu_map_entry *rcpu; struct bpf_cpu_map_entry *rcpu;
int err;
rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
...@@ -163,14 +170,7 @@ static void cpu_map_kthread_stop(struct work_struct *work) ...@@ -163,14 +170,7 @@ static void cpu_map_kthread_stop(struct work_struct *work)
rcu_barrier(); rcu_barrier();
/* kthread_stop will wake_up_process and wait for it to complete */ /* kthread_stop will wake_up_process and wait for it to complete */
err = kthread_stop(rcpu->kthread); kthread_stop(rcpu->kthread);
if (err) {
/* kthread_stop may be called before cpu_map_kthread_run
* is executed, so we need to release the memory related
* to rcpu.
*/
put_cpu_map_entry(rcpu);
}
} }
static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
...@@ -298,11 +298,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, ...@@ -298,11 +298,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
return nframes; return nframes;
} }
static int cpu_map_kthread_run(void *data) static int cpu_map_kthread_run(void *data)
{ {
struct bpf_cpu_map_entry *rcpu = data; struct bpf_cpu_map_entry *rcpu = data;
complete(&rcpu->kthread_running);
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
/* When kthread gives stop order, then rcpu have been disconnected /* When kthread gives stop order, then rcpu have been disconnected
...@@ -467,6 +467,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, ...@@ -467,6 +467,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
goto free_ptr_ring; goto free_ptr_ring;
/* Setup kthread */ /* Setup kthread */
init_completion(&rcpu->kthread_running);
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
"cpumap/%d/map:%d", cpu, "cpumap/%d/map:%d", cpu,
map->id); map->id);
...@@ -480,6 +481,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, ...@@ -480,6 +481,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
kthread_bind(rcpu->kthread, cpu); kthread_bind(rcpu->kthread, cpu);
wake_up_process(rcpu->kthread); wake_up_process(rcpu->kthread);
/* Make sure kthread has been running, so kthread_stop() will not
* stop the kthread prematurely and all pending frames or skbs
* will be handled by the kthread before kthread_stop() returns.
*/
wait_for_completion(&rcpu->kthread_running);
return rcpu; return rcpu;
free_prog: free_prog:
......
...@@ -661,8 +661,7 @@ static DEFINE_PER_CPU(int, bpf_trace_nest_level); ...@@ -661,8 +661,7 @@ static DEFINE_PER_CPU(int, bpf_trace_nest_level);
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags, void *, data, u64, size) u64, flags, void *, data, u64, size)
{ {
struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds); struct bpf_trace_sample_data *sds;
int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
struct perf_raw_record raw = { struct perf_raw_record raw = {
.frag = { .frag = {
.size = size, .size = size,
...@@ -670,7 +669,11 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, ...@@ -670,7 +669,11 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
}, },
}; };
struct perf_sample_data *sd; struct perf_sample_data *sd;
int err; int nest_level, err;
preempt_disable();
sds = this_cpu_ptr(&bpf_trace_sds);
nest_level = this_cpu_inc_return(bpf_trace_nest_level);
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
err = -EBUSY; err = -EBUSY;
...@@ -688,9 +691,9 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, ...@@ -688,9 +691,9 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
perf_sample_save_raw_data(sd, &raw); perf_sample_save_raw_data(sd, &raw);
err = __bpf_perf_event_output(regs, map, flags, sd); err = __bpf_perf_event_output(regs, map, flags, sd);
out: out:
this_cpu_dec(bpf_trace_nest_level); this_cpu_dec(bpf_trace_nest_level);
preempt_enable();
return err; return err;
} }
...@@ -715,7 +718,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); ...@@ -715,7 +718,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{ {
int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
struct perf_raw_frag frag = { struct perf_raw_frag frag = {
.copy = ctx_copy, .copy = ctx_copy,
.size = ctx_size, .size = ctx_size,
...@@ -732,8 +734,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, ...@@ -732,8 +734,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
}; };
struct perf_sample_data *sd; struct perf_sample_data *sd;
struct pt_regs *regs; struct pt_regs *regs;
int nest_level;
u64 ret; u64 ret;
preempt_disable();
nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
ret = -EBUSY; ret = -EBUSY;
goto out; goto out;
...@@ -748,6 +754,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, ...@@ -748,6 +754,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
ret = __bpf_perf_event_output(regs, map, flags, sd); ret = __bpf_perf_event_output(regs, map, flags, sd);
out: out:
this_cpu_dec(bpf_event_output_nest_level); this_cpu_dec(bpf_event_output_nest_level);
preempt_enable();
return ret; return ret;
} }
......
...@@ -496,9 +496,12 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) ...@@ -496,9 +496,12 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
nla_for_each_nested(nla, nla_stgs, rem) { nla_for_each_nested(nla, nla_stgs, rem) {
if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
if (nla_len(nla) != sizeof(u32))
return ERR_PTR(-EINVAL);
nr_maps++; nr_maps++;
} }
}
diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
if (!diag) if (!diag)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment