Commit 9df1c28b authored by Matt Mullins's avatar Matt Mullins Committed by Alexei Starovoitov

bpf: add writable context for raw tracepoints

This is an opt-in interface that allows a tracepoint to provide a safe
buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program.
The size of the buffer must be a compile-time constant, and is checked
before allowing a BPF program to attach to a tracepoint that uses this
feature.

The pointer to this buffer will be the first argument of tracepoints
that opt in; the pointer is valid and can be bpf_probe_read() by both
BPF_PROG_TYPE_RAW_TRACEPOINT and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
programs that attach to such a tracepoint, but the buffer to which it
points may only be written by the latter.
Signed-off-by: default avatarMatt Mullins <mmullins@fb.com>
Acked-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 34b8ab09
...@@ -272,6 +272,7 @@ enum bpf_reg_type { ...@@ -272,6 +272,7 @@ enum bpf_reg_type {
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
}; };
/* The information passed from prog-specific *_is_valid_access /* The information passed from prog-specific *_is_valid_access
...@@ -361,6 +362,7 @@ struct bpf_prog_aux { ...@@ -361,6 +362,7 @@ struct bpf_prog_aux {
u32 used_map_cnt; u32 used_map_cnt;
u32 max_ctx_offset; u32 max_ctx_offset;
u32 max_pkt_offset; u32 max_pkt_offset;
u32 max_tp_access;
u32 stack_depth; u32 stack_depth;
u32 id; u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */ u32 func_cnt; /* used by non-func prog as the number of func progs */
......
...@@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) ...@@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
#endif #endif
#ifdef CONFIG_CGROUP_BPF #ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
......
...@@ -45,6 +45,7 @@ struct bpf_raw_event_map { ...@@ -45,6 +45,7 @@ struct bpf_raw_event_map {
struct tracepoint *tp; struct tracepoint *tp;
void *bpf_func; void *bpf_func;
u32 num_args; u32 num_args;
u32 writable_size;
} __aligned(32); } __aligned(32);
#endif #endif
...@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \ ...@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \
* to make sure that if the tracepoint handling changes, the * to make sure that if the tracepoint handling changes, the
* bpf probe will fail to compile unless it too is updated. * bpf probe will fail to compile unless it too is updated.
*/ */
#undef DEFINE_EVENT #define __DEFINE_EVENT(template, call, proto, args, size) \
#define DEFINE_EVENT(template, call, proto, args) \
static inline void bpf_test_probe_##call(void) \ static inline void bpf_test_probe_##call(void) \
{ \ { \
check_trace_callback_type_##call(__bpf_trace_##template); \ check_trace_callback_type_##call(__bpf_trace_##template); \
...@@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \ ...@@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \
.tp = &__tracepoint_##call, \ .tp = &__tracepoint_##call, \
.bpf_func = (void *)__bpf_trace_##template, \ .bpf_func = (void *)__bpf_trace_##template, \
.num_args = COUNT_ARGS(args), \ .num_args = COUNT_ARGS(args), \
.writable_size = size, \
}; };
#define FIRST(x, ...) x
#undef DEFINE_EVENT_WRITABLE
#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \
static inline void bpf_test_buffer_##call(void) \
{ \
/* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
* BUILD_BUG_ON_ZERO() uses a different mechanism that is not \
* dead-code-eliminated. \
*/ \
FIRST(proto); \
(void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \
} \
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
#undef DEFINE_EVENT_PRINT #undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#undef DEFINE_EVENT_WRITABLE
#undef __DEFINE_EVENT
#undef FIRST
#endif /* CONFIG_BPF_EVENTS */ #endif /* CONFIG_BPF_EVENTS */
...@@ -168,6 +168,7 @@ enum bpf_prog_type { ...@@ -168,6 +168,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_FLOW_DISSECTOR,
BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
}; };
enum bpf_attach_type { enum bpf_attach_type {
......
...@@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) ...@@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
} }
raw_tp->btp = btp; raw_tp->btp = btp;
prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
BPF_PROG_TYPE_RAW_TRACEPOINT);
if (IS_ERR(prog)) { if (IS_ERR(prog)) {
err = PTR_ERR(prog); err = PTR_ERR(prog);
goto out_free_tp; goto out_free_tp;
} }
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
err = -EINVAL;
goto out_put_prog;
}
err = bpf_probe_register(raw_tp->btp, prog); err = bpf_probe_register(raw_tp->btp, prog);
if (err) if (err)
......
...@@ -405,6 +405,7 @@ static const char * const reg_type_str[] = { ...@@ -405,6 +405,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
[PTR_TO_TCP_SOCK] = "tcp_sock", [PTR_TO_TCP_SOCK] = "tcp_sock",
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
[PTR_TO_TP_BUFFER] = "tp_buffer",
}; };
static char slot_type_char[] = { static char slot_type_char[] = {
...@@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env, ...@@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env,
return 0; return 0;
} }
static int check_tp_buffer_access(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int regno, int off, int size)
{
if (off < 0) {
verbose(env,
"R%d invalid tracepoint buffer access: off=%d, size=%d",
regno, off, size);
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env,
"R%d invalid variable buffer offset: off=%d, var_off=%s",
regno, off, tn_buf);
return -EACCES;
}
if (off + size > env->prog->aux->max_tp_access)
env->prog->aux->max_tp_access = off + size;
return 0;
}
/* truncate register to smaller size (in bytes) /* truncate register to smaller size (in bytes)
* must be called with size < BPF_REG_SIZE * must be called with size < BPF_REG_SIZE
*/ */
...@@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn ...@@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_sock_access(env, insn_idx, regno, off, size, t); err = check_sock_access(env, insn_idx, regno, off, size, t);
if (!err && value_regno >= 0) if (!err && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno); mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_TP_BUFFER) {
err = check_tp_buffer_access(env, reg, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else { } else {
verbose(env, "R%d invalid mem access '%s'\n", regno, verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]); reg_type_str[reg->type]);
......
...@@ -915,6 +915,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { ...@@ -915,6 +915,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
const struct bpf_prog_ops raw_tracepoint_prog_ops = { const struct bpf_prog_ops raw_tracepoint_prog_ops = {
}; };
static bool raw_tp_writable_prog_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off == 0) {
if (size != sizeof(u64) || type != BPF_READ)
return false;
info->reg_type = PTR_TO_TP_BUFFER;
}
return raw_tp_prog_is_valid_access(off, size, type, prog, info);
}
const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
.get_func_proto = raw_tp_prog_func_proto,
.is_valid_access = raw_tp_writable_prog_is_valid_access,
};
const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
};
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog, const struct bpf_prog *prog,
struct bpf_insn_access_aux *info) struct bpf_insn_access_aux *info)
...@@ -1204,6 +1225,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * ...@@ -1204,6 +1225,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
return -EINVAL; return -EINVAL;
if (prog->aux->max_tp_access > btp->writable_size)
return -EINVAL;
return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog); return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment