Commit f18b03fa authored by Kumar Kartikeya Dwivedi's avatar Kumar Kartikeya Dwivedi Committed by Alexei Starovoitov

bpf: Implement BPF exceptions

This patch implements BPF exceptions, and introduces a bpf_throw kfunc
to allow programs to throw exceptions during their execution at runtime.
A bpf_throw invocation is treated as an immediate termination of the
program, returning back to its caller within the kernel, unwinding all
stack frames.

This allows the program to simplify its implementation, by testing for
runtime conditions which the verifier has no visibility into, and assert
that they are true. In case they are not, the program can simply throw
an exception from the other branch.

BPF exceptions are explicitly *NOT* an unlikely slowpath error handling
primitive, and this objective has guided design choices of the
implementation of the them within the kernel (with the bulk of the cost
for unwinding the stack offloaded to the bpf_throw kfunc).

The implementation of this mechanism requires use of add_hidden_subprog
mechanism introduced in the previous patch, which generates a couple of
instructions to move R1 to R0 and exit. The JIT then rewrites the
prologue of this subprog to take the stack pointer and frame pointer as
inputs and reset the stack frame, popping all callee-saved registers
saved by the main subprog. The bpf_throw function then walks the stack
at runtime, and invokes this exception subprog with the stack and frame
pointers as parameters.

Reviewers must take note that currently the main program is made to save
all callee-saved registers on x86_64 during entry into the program. This
is because we must do an equivalent of a lightweight context switch when
unwinding the stack, therefore we need the callee-saved registers of the
caller of the BPF program to be able to return with a sane state.

Note that we have to additionally handle r12, even though it is not used
by the program, because when throwing the exception the program makes an
entry into the kernel which could clobber r12 after saving it on the
stack. To be able to preserve the value we received on program entry, we
push r12 and restore it from the generated subprogram when unwinding the
stack.

For now, bpf_throw invocation fails when lingering resources or locks
exist in that path of the program. In a future followup, bpf_throw will
be extended to perform frame-by-frame unwinding to release lingering
resources for each stack frame, removing this limitation.
Signed-off-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230912233214.1518551-5-memxor@gmail.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 335d1c5b
......@@ -18,6 +18,8 @@
#include <asm/text-patching.h>
#include <asm/unwind.h>
static bool all_callee_regs_used[4] = {true, true, true, true};
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
if (len == 1)
......@@ -256,6 +258,14 @@ struct jit_context {
/* Number of bytes that will be skipped on tailcall */
#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE)
static void push_r12(u8 **pprog)
{
u8 *prog = *pprog;
EMIT2(0x41, 0x54); /* push r12 */
*pprog = prog;
}
static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
{
u8 *prog = *pprog;
......@@ -271,6 +281,14 @@ static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
*pprog = prog;
}
static void pop_r12(u8 **pprog)
{
u8 *prog = *pprog;
EMIT2(0x41, 0x5C); /* pop r12 */
*pprog = prog;
}
static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
{
u8 *prog = *pprog;
......@@ -292,7 +310,8 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
* while jumping to another program
*/
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
bool tail_call_reachable, bool is_subprog)
bool tail_call_reachable, bool is_subprog,
bool is_exception_cb)
{
u8 *prog = *pprog;
......@@ -312,8 +331,22 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
/* Keep the same instruction layout. */
EMIT2(0x66, 0x90); /* nop2 */
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* Exception callback receives FP as third parameter */
if (is_exception_cb) {
EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */
EMIT3(0x48, 0x89, 0xD5); /* mov rbp, rdx */
/* The main frame must have exception_boundary as true, so we
* first restore those callee-saved regs from stack, before
* reusing the stack frame.
*/
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
/* Reset the stack frame. */
EMIT3(0x48, 0x89, 0xEC); /* mov rsp, rbp */
} else {
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
}
/* X86_TAIL_CALL_OFFSET is here */
EMIT_ENDBR();
......@@ -472,7 +505,8 @@ static void emit_return(u8 **pprog, u8 *ip)
* goto *(prog->bpf_func + prologue_size);
* out:
*/
static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
u8 **pprog, bool *callee_regs_used,
u32 stack_depth, u8 *ip,
struct jit_context *ctx)
{
......@@ -522,7 +556,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JE, offset); /* je out */
pop_callee_regs(&prog, callee_regs_used);
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
}
EMIT1(0x58); /* pop rax */
if (stack_depth)
......@@ -546,7 +585,8 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
*pprog = prog;
}
static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
struct bpf_jit_poke_descriptor *poke,
u8 **pprog, u8 *ip,
bool *callee_regs_used, u32 stack_depth,
struct jit_context *ctx)
......@@ -575,7 +615,13 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
poke->tailcall_bypass);
pop_callee_regs(&prog, callee_regs_used);
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
}
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
......@@ -1050,8 +1096,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
emit_prologue(&prog, bpf_prog->aux->stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog));
push_callee_regs(&prog, callee_regs_used);
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
if (bpf_prog->aux->exception_boundary) {
/* We also need to save r12, which is not mapped to any BPF
* register, as we throw after entry into the kernel, which may
* overwrite r12.
*/
push_r12(&prog);
push_callee_regs(&prog, all_callee_regs_used);
} else {
push_callee_regs(&prog, callee_regs_used);
}
ilen = prog - temp;
if (rw_image)
......@@ -1648,13 +1706,15 @@ st: if (is_imm8(insn->off))
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
emit_bpf_tail_call_direct(bpf_prog,
&bpf_prog->aux->poke_tab[imm32 - 1],
&prog, image + addrs[i - 1],
callee_regs_used,
bpf_prog->aux->stack_depth,
ctx);
else
emit_bpf_tail_call_indirect(&prog,
emit_bpf_tail_call_indirect(bpf_prog,
&prog,
callee_regs_used,
bpf_prog->aux->stack_depth,
image + addrs[i - 1],
......@@ -1907,7 +1967,12 @@ st: if (is_imm8(insn->off))
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
pop_callee_regs(&prog, callee_regs_used);
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
}
EMIT1(0xC9); /* leave */
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
......
......@@ -1410,6 +1410,8 @@ struct bpf_prog_aux {
bool sleepable;
bool tail_call_reachable;
bool xdp_has_frags;
bool exception_cb;
bool exception_boundary;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
......@@ -1432,6 +1434,7 @@ struct bpf_prog_aux {
int cgroup_atype; /* enum cgroup_bpf_attach_type */
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp);
#ifdef CONFIG_SECURITY
void *security;
#endif
......
......@@ -541,7 +541,9 @@ struct bpf_subprog_info {
bool has_tail_call;
bool tail_call_reachable;
bool has_ld_abs;
bool is_cb;
bool is_async_cb;
bool is_exception_cb;
};
struct bpf_verifier_env;
......@@ -589,6 +591,7 @@ struct bpf_verifier_env {
u32 used_btf_cnt; /* number of used BTF objects */
u32 id_gen; /* used to generate unique reg IDs */
u32 hidden_subprog_cnt; /* number of hidden subprogs */
int exception_callback_subprog;
bool explore_alu_limits;
bool allow_ptr_leaks;
bool allow_uninit_stack;
......@@ -596,6 +599,7 @@ struct bpf_verifier_env {
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
bool seen_exception;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
......
......@@ -1171,6 +1171,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
bool is_bpf_text_address(unsigned long addr);
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym);
struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
......@@ -1238,6 +1239,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
return -ERANGE;
}
static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
{
return NULL;
}
static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
......
......@@ -733,7 +733,7 @@ bool is_bpf_text_address(unsigned long addr)
return ret;
}
static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
{
struct bpf_ksym *ksym = bpf_ksym_find(addr);
......
......@@ -2449,6 +2449,43 @@ __bpf_kfunc void bpf_rcu_read_unlock(void)
rcu_read_unlock();
}
struct bpf_throw_ctx {
struct bpf_prog_aux *aux;
u64 sp;
u64 bp;
int cnt;
};
static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp)
{
struct bpf_throw_ctx *ctx = cookie;
struct bpf_prog *prog;
if (!is_bpf_text_address(ip))
return !ctx->cnt;
prog = bpf_prog_ksym_find(ip);
ctx->cnt++;
if (bpf_is_subprog(prog))
return true;
ctx->aux = prog->aux;
ctx->sp = sp;
ctx->bp = bp;
return false;
}
__bpf_kfunc void bpf_throw(u64 cookie)
{
struct bpf_throw_ctx ctx = {};
arch_bpf_stack_walk(bpf_stack_walker, &ctx);
WARN_ON_ONCE(!ctx.aux);
if (ctx.aux)
WARN_ON_ONCE(!ctx.aux->exception_boundary);
WARN_ON_ONCE(!ctx.bp);
WARN_ON_ONCE(!ctx.cnt);
ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp);
}
__diag_pop();
BTF_SET8_START(generic_btf_ids)
......@@ -2478,6 +2515,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_throw)
BTF_SET8_END(generic_btf_ids)
static const struct btf_kfunc_id_set generic_kfunc_set = {
......
This diff is collapsed.
......@@ -162,4 +162,20 @@ extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym;
/* Convenience macro to wrap over bpf_obj_drop_impl */
#define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL)
/* Description
* Throw a BPF exception from the program, immediately terminating its
* execution and unwinding the stack. The supplied 'cookie' parameter
* will be the return value of the program when an exception is thrown.
*
* Note that throwing an exception with lingering resources (locks,
* references, etc.) will lead to a verification error.
*
* Note that callbacks *cannot* call this helper.
* Returns
* Never.
* Throws
* An exception with the specified 'cookie' value.
*/
extern void bpf_throw(u64 cookie) __ksym;
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment