Commit e980a076 authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-event-output-helper-improvements'

Daniel Borkmann says:

====================
BPF event output helper improvements

This set adds improvements to the BPF event output helper to
support non-linear data sampling, here specifically, for skb
context. For details please see individual patches. The set
is based against net-next tree.

v1 -> v2:
  - Integrated and adapted Peter's diff into patch 1, updated
    the remaining ones accordingly. Thanks Peter!
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7acef604 555c8a86
...@@ -979,12 +979,15 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) ...@@ -979,12 +979,15 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
struct pt_regs regs; struct pt_regs regs;
struct perf_sf_sde_regs *sde_regs; struct perf_sf_sde_regs *sde_regs;
struct perf_sample_data data; struct perf_sample_data data;
struct perf_raw_record raw; struct perf_raw_record raw = {
.frag = {
.size = sfr->size,
.data = sfr,
},
};
/* Setup perf sample */ /* Setup perf sample */
perf_sample_data_init(&data, 0, event->hw.last_period); perf_sample_data_init(&data, 0, event->hw.last_period);
raw.size = sfr->size;
raw.data = sfr;
data.raw = &raw; data.raw = &raw;
/* Setup pt_regs to look like an CPU-measurement external interrupt /* Setup pt_regs to look like an CPU-measurement external interrupt
......
...@@ -655,8 +655,12 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) ...@@ -655,8 +655,12 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
} }
if (event->attr.sample_type & PERF_SAMPLE_RAW) { if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.size = sizeof(u32) + ibs_data.size; raw = (struct perf_raw_record){
raw.data = ibs_data.data; .frag = {
.size = sizeof(u32) + ibs_data.size,
.data = ibs_data.data,
},
};
data.raw = &raw; data.raw = &raw;
} }
......
...@@ -209,7 +209,12 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); ...@@ -209,7 +209,12 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void); const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
const struct bpf_func_proto *bpf_get_event_output_proto(void);
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
unsigned long len);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
#ifdef CONFIG_BPF_SYSCALL #ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active); DECLARE_PER_CPU(int, bpf_prog_active);
......
...@@ -69,9 +69,22 @@ struct perf_callchain_entry_ctx { ...@@ -69,9 +69,22 @@ struct perf_callchain_entry_ctx {
bool contexts_maxed; bool contexts_maxed;
}; };
typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
unsigned long len);
struct perf_raw_frag {
union {
struct perf_raw_frag *next;
unsigned long pad;
};
perf_copy_f copy;
void *data;
u32 size;
} __packed;
struct perf_raw_record { struct perf_raw_record {
struct perf_raw_frag frag;
u32 size; u32 size;
void *data;
}; };
/* /*
...@@ -1283,6 +1296,11 @@ extern void perf_restore_debug_store(void); ...@@ -1283,6 +1296,11 @@ extern void perf_restore_debug_store(void);
static inline void perf_restore_debug_store(void) { } static inline void perf_restore_debug_store(void) { }
#endif #endif
static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
{
return frag->pad < sizeof(u64);
}
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
/* /*
......
...@@ -401,6 +401,8 @@ enum bpf_func_id { ...@@ -401,6 +401,8 @@ enum bpf_func_id {
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ /* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* user accessible mirror of in-kernel sk_buff. /* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure * new fields can only be added to the end of this structure
......
...@@ -1054,9 +1054,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) ...@@ -1054,9 +1054,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
return NULL; return NULL;
} }
const struct bpf_func_proto * __weak bpf_get_event_output_proto(void) u64 __weak
bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{ {
return NULL; return -ENOTSUPP;
} }
/* Always built-in helper functions. */ /* Always built-in helper functions. */
......
...@@ -5553,16 +5553,26 @@ void perf_output_sample(struct perf_output_handle *handle, ...@@ -5553,16 +5553,26 @@ void perf_output_sample(struct perf_output_handle *handle,
} }
if (sample_type & PERF_SAMPLE_RAW) { if (sample_type & PERF_SAMPLE_RAW) {
if (data->raw) { struct perf_raw_record *raw = data->raw;
u32 raw_size = data->raw->size;
u32 real_size = round_up(raw_size + sizeof(u32), if (raw) {
sizeof(u64)) - sizeof(u32); struct perf_raw_frag *frag = &raw->frag;
u64 zero = 0;
perf_output_put(handle, raw->size);
perf_output_put(handle, real_size); do {
__output_copy(handle, data->raw->data, raw_size); if (frag->copy) {
if (real_size - raw_size) __output_custom(handle, frag->copy,
__output_copy(handle, &zero, real_size - raw_size); frag->data, frag->size);
} else {
__output_copy(handle, frag->data,
frag->size);
}
if (perf_raw_frag_last(frag))
break;
frag = frag->next;
} while (1);
if (frag->pad)
__output_skip(handle, NULL, frag->pad);
} else { } else {
struct { struct {
u32 size; u32 size;
...@@ -5687,14 +5697,28 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -5687,14 +5697,28 @@ void perf_prepare_sample(struct perf_event_header *header,
} }
if (sample_type & PERF_SAMPLE_RAW) { if (sample_type & PERF_SAMPLE_RAW) {
int size = sizeof(u32); struct perf_raw_record *raw = data->raw;
int size;
if (data->raw) if (raw) {
size += data->raw->size; struct perf_raw_frag *frag = &raw->frag;
else u32 sum = 0;
size += sizeof(u32);
header->size += round_up(size, sizeof(u64)); do {
sum += frag->size;
if (perf_raw_frag_last(frag))
break;
frag = frag->next;
} while (1);
size = round_up(sum + sizeof(u32), sizeof(u64));
raw->size = size - sizeof(u32);
frag->pad = raw->size - sum;
} else {
size = sizeof(u64);
}
header->size += size;
} }
if (sample_type & PERF_SAMPLE_BRANCH_STACK) { if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
...@@ -7331,7 +7355,7 @@ static struct pmu perf_swevent = { ...@@ -7331,7 +7355,7 @@ static struct pmu perf_swevent = {
static int perf_tp_filter_match(struct perf_event *event, static int perf_tp_filter_match(struct perf_event *event,
struct perf_sample_data *data) struct perf_sample_data *data)
{ {
void *record = data->raw->data; void *record = data->raw->frag.data;
/* only top level events have filters set */ /* only top level events have filters set */
if (event->parent) if (event->parent)
...@@ -7387,8 +7411,10 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, ...@@ -7387,8 +7411,10 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
struct perf_event *event; struct perf_event *event;
struct perf_raw_record raw = { struct perf_raw_record raw = {
.frag = {
.size = entry_size, .size = entry_size,
.data = record, .data = record,
},
}; };
perf_sample_data_init(&data, 0, 0); perf_sample_data_init(&data, 0, 0);
......
...@@ -123,10 +123,7 @@ static inline unsigned long perf_aux_size(struct ring_buffer *rb) ...@@ -123,10 +123,7 @@ static inline unsigned long perf_aux_size(struct ring_buffer *rb)
return rb->aux_nr_pages << PAGE_SHIFT; return rb->aux_nr_pages << PAGE_SHIFT;
} }
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ #define __DEFINE_OUTPUT_COPY_BODY(memcpy_func) \
static inline unsigned long \
func_name(struct perf_output_handle *handle, \
const void *buf, unsigned long len) \
{ \ { \
unsigned long size, written; \ unsigned long size, written; \
\ \
...@@ -152,6 +149,17 @@ func_name(struct perf_output_handle *handle, \ ...@@ -152,6 +149,17 @@ func_name(struct perf_output_handle *handle, \
return len; \ return len; \
} }
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
static inline unsigned long \
func_name(struct perf_output_handle *handle, \
const void *buf, unsigned long len) \
__DEFINE_OUTPUT_COPY_BODY(memcpy_func)
static inline unsigned long
__output_custom(struct perf_output_handle *handle, perf_copy_f copy_func,
const void *buf, unsigned long len)
__DEFINE_OUTPUT_COPY_BODY(copy_func)
static inline unsigned long static inline unsigned long
memcpy_common(void *dst, const void *src, unsigned long n) memcpy_common(void *dst, const void *src, unsigned long n)
{ {
......
...@@ -233,24 +233,17 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { ...@@ -233,24 +233,17 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = {
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING,
}; };
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_raw_record *raw)
{ {
struct pt_regs *regs = (struct pt_regs *) (long) r1;
struct bpf_map *map = (struct bpf_map *) (long) r2;
struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK; u64 index = flags & BPF_F_INDEX_MASK;
void *data = (void *) (long) r4;
struct perf_sample_data sample_data; struct perf_sample_data sample_data;
struct bpf_event_entry *ee; struct bpf_event_entry *ee;
struct perf_event *event; struct perf_event *event;
struct perf_raw_record raw = {
.size = size,
.data = data,
};
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
if (index == BPF_F_CURRENT_CPU) if (index == BPF_F_CURRENT_CPU)
index = cpu; index = cpu;
if (unlikely(index >= array->map.max_entries)) if (unlikely(index >= array->map.max_entries))
...@@ -269,11 +262,29 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) ...@@ -269,11 +262,29 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
return -EOPNOTSUPP; return -EOPNOTSUPP;
perf_sample_data_init(&sample_data, 0, 0); perf_sample_data_init(&sample_data, 0, 0);
sample_data.raw = &raw; sample_data.raw = raw;
perf_event_output(event, &sample_data, regs); perf_event_output(event, &sample_data, regs);
return 0; return 0;
} }
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
{
struct pt_regs *regs = (struct pt_regs *)(long) r1;
struct bpf_map *map = (struct bpf_map *)(long) r2;
void *data = (void *)(long) r4;
struct perf_raw_record raw = {
.frag = {
.size = size,
.data = data,
},
};
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
return __bpf_perf_event_output(regs, map, flags, &raw);
}
static const struct bpf_func_proto bpf_perf_event_output_proto = { static const struct bpf_func_proto bpf_perf_event_output_proto = {
.func = bpf_perf_event_output, .func = bpf_perf_event_output,
.gpl_only = true, .gpl_only = true,
...@@ -287,29 +298,26 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { ...@@ -287,29 +298,26 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{ {
struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
struct perf_raw_frag frag = {
.copy = ctx_copy,
.size = ctx_size,
.data = ctx,
};
struct perf_raw_record raw = {
.frag = {
.next = ctx_size ? &frag : NULL,
.size = meta_size,
.data = meta,
},
};
perf_fetch_caller_regs(regs); perf_fetch_caller_regs(regs);
return bpf_perf_event_output((long)regs, r2, flags, r4, size); return __bpf_perf_event_output(regs, map, flags, &raw);
}
static const struct bpf_func_proto bpf_event_output_proto = {
.func = bpf_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_STACK,
.arg5_type = ARG_CONST_STACK_SIZE,
};
const struct bpf_func_proto *bpf_get_event_output_proto(void)
{
return &bpf_event_output_proto;
} }
static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
......
...@@ -2025,6 +2025,47 @@ bool bpf_helper_changes_skb_data(void *func) ...@@ -2025,6 +2025,47 @@ bool bpf_helper_changes_skb_data(void *func)
return false; return false;
} }
static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
unsigned long len)
{
void *ptr = skb_header_pointer(skb, 0, len, dst_buff);
if (unlikely(!ptr))
return len;
if (ptr != dst_buff)
memcpy(dst_buff, ptr, len);
return 0;
}
static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
u64 meta_size)
{
struct sk_buff *skb = (struct sk_buff *)(long) r1;
struct bpf_map *map = (struct bpf_map *)(long) r2;
u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
void *meta = (void *)(long) r4;
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
if (unlikely(skb_size > skb->len))
return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
bpf_skb_copy);
}
static const struct bpf_func_proto bpf_skb_event_output_proto = {
.func = bpf_skb_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_STACK,
.arg5_type = ARG_CONST_STACK_SIZE,
};
static unsigned short bpf_tunnel_key_af(u64 flags) static unsigned short bpf_tunnel_key_af(u64 flags)
{ {
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
...@@ -2357,7 +2398,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) ...@@ -2357,7 +2398,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_get_hash_recalc: case BPF_FUNC_get_hash_recalc:
return &bpf_get_hash_recalc_proto; return &bpf_get_hash_recalc_proto;
case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_output:
return bpf_get_event_output_proto(); return &bpf_skb_event_output_proto;
case BPF_FUNC_get_smp_processor_id: case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto; return &bpf_get_smp_processor_id_proto;
#ifdef CONFIG_SOCK_CGROUP_DATA #ifdef CONFIG_SOCK_CGROUP_DATA
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment