Commit 2146f7fe authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'allocate-bpf-trampoline-on-bpf_prog_pack'

Song Liu says:

====================
Allocate bpf trampoline on bpf_prog_pack

This set enables allocating bpf trampoline from bpf_prog_pack on x86. The
majority of this work, however, is the refactoring of trampoline code.
This is needed because we need to handle 4 archs and 2 users (trampoline
and struct_ops).

1/7 through 6/7 refactors trampoline code. A few helpers are added.
7/7 finally let bpf trampoline on x86 use bpf_prog_pack.

Changes in v7:
1. Use kvmalloc for rw_image in x86/arch_prepare_bpf_trampoline. (Alexei)
2. Add comment to explain why we cannot use kvmalloc in
   x86/arch_bpf_trampoline_size. (Alexei)

Changes in v6:
1. Rebase.
2. Add Acked-by and Tested-by from Jiri Olsa and Björn Töpel.

Changes in v5:
1. Adjust size of trampoline ksym. (Jiri)
2. Use "unsigned int size" arg in image management helpers.(Daniel)

Changes in v4:
1. Dropped 1/8 in v3, which is already merged in bpf-next.
2. Add Reviewed-by from Björn Töpel.

Changes in v3:
1. Fix bug in s390. (Thanks to Ilya Leoshkevich).
2. Fix build error in riscv. (kernel test robot).

Changes in v2:
1. Add missing changes in net/bpf/bpf_dummy_struct_ops.c.
2. Reduce one dry run in arch_prepare_bpf_trampoline. (Xu Kuohai)
3. Other small fixes.
====================

Link: https://lore.kernel.org/r/20231206224054.492250-1-song@kernel.orgSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 7065eefb 3ba026fc
......@@ -1828,7 +1828,7 @@ static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
*
*/
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
struct bpf_tramp_links *tlinks, void *orig_call,
struct bpf_tramp_links *tlinks, void *func_addr,
int nregs, u32 flags)
{
int i;
......@@ -1926,7 +1926,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_IP_ARG) {
/* save ip address of the traced function */
emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx);
emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx);
emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
}
......@@ -2026,18 +2026,10 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
return ctx->idx;
}
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
void *orig_call)
static int btf_func_model_nregs(const struct btf_func_model *m)
{
int i, ret;
int nregs = m->nr_args;
int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
struct jit_ctx ctx = {
.image = NULL,
.idx = 0,
};
int i;
/* extra registers needed for struct argument */
for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
......@@ -2046,22 +2038,49 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
nregs += (m->arg_size[i] + 7) / 8 - 1;
}
return nregs;
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
struct jit_ctx ctx = {
.image = NULL,
.idx = 0,
};
struct bpf_tramp_image im;
int nregs, ret;
nregs = btf_func_model_nregs(m);
/* the first 8 registers are used for arguments */
if (nregs > 8)
return -ENOTSUPP;
ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
if (ret < 0)
return ret;
if (ret > max_insns)
return -EFBIG;
return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
}
ctx.image = image;
ctx.idx = 0;
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
void *func_addr)
{
int ret, nregs;
struct jit_ctx ctx = {
.image = image,
.idx = 0,
};
nregs = btf_func_model_nregs(m);
/* the first 8 registers are used for arguments */
if (nregs > 8)
return -ENOTSUPP;
jit_fill_hole(image, (unsigned int)(image_end - image));
ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
if (ret > 0 && validate_code(&ctx) < 0)
ret = -EINVAL;
......
......@@ -1029,23 +1029,28 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return ret;
}
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
void *func_addr)
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
int ret;
struct bpf_tramp_image im;
struct rv_jit_context ctx;
int ret;
ctx.ninsns = 0;
ctx.insns = NULL;
ctx.ro_insns = NULL;
ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
if (ret < 0)
return ret;
ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx);
if (ninsns_rvoff(ret) > (long)image_end - (long)image)
return -EFBIG;
return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
}
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
void *func_addr)
{
int ret;
struct rv_jit_context ctx;
ctx.ninsns = 0;
/*
......
......@@ -2637,6 +2637,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return 0;
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *orig_call)
{
struct bpf_tramp_image im;
struct bpf_tramp_jit tjit;
int ret;
memset(&tjit, 0, sizeof(tjit));
ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
tlinks, orig_call);
return ret < 0 ? ret : tjit.common.prg;
}
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
......@@ -2644,30 +2659,27 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
{
struct bpf_tramp_jit tjit;
int ret;
int i;
for (i = 0; i < 2; i++) {
if (i == 0) {
/* Compute offsets, check whether the code fits. */
memset(&tjit, 0, sizeof(tjit));
} else {
/* Generate the code. */
tjit.common.prg = 0;
tjit.common.prg_buf = image;
}
ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
tlinks, func_addr);
if (ret < 0)
return ret;
if (tjit.common.prg > (char *)image_end - (char *)image)
/*
* Use the same error code as for exceeding
* BPF_MAX_TRAMP_LINKS.
*/
return -E2BIG;
}
/* Compute offsets, check whether the code fits. */
memset(&tjit, 0, sizeof(tjit));
ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
tlinks, func_addr);
if (ret < 0)
return ret;
if (tjit.common.prg > (char *)image_end - (char *)image)
/*
* Use the same error code as for exceeding
* BPF_MAX_TRAMP_LINKS.
*/
return -E2BIG;
tjit.common.prg = 0;
tjit.common.prg_buf = image;
ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
tlinks, func_addr);
return tjit.common.prg;
return ret < 0 ? ret : tjit.common.prg;
}
bool bpf_jit_supports_subprog_tailcalls(void)
......
......@@ -2198,7 +2198,8 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog,
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_link *l, int stack_size,
int run_ctx_off, bool save_ret)
int run_ctx_off, bool save_ret,
void *image, void *rw_image)
{
u8 *prog = *pprog;
u8 *jmp_insn;
......@@ -2226,7 +2227,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
else
EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
if (emit_rsb_call(&prog, bpf_trampoline_enter(p), image + (prog - (u8 *)rw_image)))
return -EINVAL;
/* remember prog start time returned by __bpf_prog_enter */
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
......@@ -2250,7 +2251,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
(long) p->insnsi >> 32,
(u32) (long) p->insnsi);
/* call JITed bpf program or interpreter */
if (emit_rsb_call(&prog, p->bpf_func, prog))
if (emit_rsb_call(&prog, p->bpf_func, image + (prog - (u8 *)rw_image)))
return -EINVAL;
/*
......@@ -2277,7 +2278,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off);
else
EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
if (emit_rsb_call(&prog, bpf_trampoline_exit(p), image + (prog - (u8 *)rw_image)))
return -EINVAL;
*pprog = prog;
......@@ -2312,14 +2313,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
int run_ctx_off, bool save_ret)
int run_ctx_off, bool save_ret,
void *image, void *rw_image)
{
int i;
u8 *prog = *pprog;
for (i = 0; i < tl->nr_links; i++) {
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
run_ctx_off, save_ret))
run_ctx_off, save_ret, image, rw_image))
return -EINVAL;
}
*pprog = prog;
......@@ -2328,7 +2330,8 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
int run_ctx_off, u8 **branches)
int run_ctx_off, u8 **branches,
void *image, void *rw_image)
{
u8 *prog = *pprog;
int i;
......@@ -2339,7 +2342,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
for (i = 0; i < tl->nr_links; i++) {
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true))
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
image, rw_image))
return -EINVAL;
/* mod_ret prog stored return value into [rbp - 8]. Emit:
......@@ -2422,10 +2426,11 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
* add rsp, 8 // skip eth_type_trans's frame
* ret // return to its caller
*/
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
void *func_addr)
static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image,
void *rw_image_end, void *image,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
void *func_addr)
{
int i, ret, nr_regs = m->nr_args, stack_size = 0;
int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
......@@ -2521,7 +2526,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
orig_call += X86_PATCH_SIZE;
}
prog = image;
prog = rw_image;
EMIT_ENDBR();
/*
......@@ -2563,7 +2568,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) {
if (emit_rsb_call(&prog, __bpf_tramp_enter,
image + (prog - (u8 *)rw_image))) {
ret = -EINVAL;
goto cleanup;
}
......@@ -2571,7 +2577,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (fentry->nr_links)
if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET))
flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
return -EINVAL;
if (fmod_ret->nr_links) {
......@@ -2581,7 +2587,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
return -ENOMEM;
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
run_ctx_off, branches)) {
run_ctx_off, branches, image, rw_image)) {
ret = -EINVAL;
goto cleanup;
}
......@@ -2602,14 +2608,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
EMIT2(0xff, 0xd3); /* call *rbx */
} else {
/* call original function */
if (emit_rsb_call(&prog, orig_call, prog)) {
if (emit_rsb_call(&prog, orig_call, image + (prog - (u8 *)rw_image))) {
ret = -EINVAL;
goto cleanup;
}
}
/* remember return value in a stack for bpf prog to access */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
im->ip_after_call = prog;
im->ip_after_call = image + (prog - (u8 *)rw_image);
memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
}
......@@ -2625,12 +2631,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
* aligned address of do_fexit.
*/
for (i = 0; i < fmod_ret->nr_links; i++)
emit_cond_near_jump(&branches[i], prog, branches[i],
X86_JNE);
emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image),
image + (branches[i] - (u8 *)rw_image), X86_JNE);
}
if (fexit->nr_links)
if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) {
if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
false, image, rw_image)) {
ret = -EINVAL;
goto cleanup;
}
......@@ -2643,10 +2650,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
* restored to R0.
*/
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = prog;
im->ip_epilogue = image + (prog - (u8 *)rw_image);
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) {
if (emit_rsb_call(&prog, __bpf_tramp_exit, image + (prog - (u8 *)rw_image))) {
ret = -EINVAL;
goto cleanup;
}
......@@ -2665,19 +2672,90 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
emit_return(&prog, prog);
emit_return(&prog, image + (prog - (u8 *)rw_image));
/* Make sure the trampoline generation logic doesn't overflow */
if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
if (WARN_ON_ONCE(prog > (u8 *)rw_image_end - BPF_INSN_SAFETY)) {
ret = -EFAULT;
goto cleanup;
}
ret = prog - (u8 *)image;
ret = prog - (u8 *)rw_image + BPF_INSN_SAFETY;
cleanup:
kfree(branches);
return ret;
}
void *arch_alloc_bpf_trampoline(unsigned int size)
{
return bpf_prog_pack_alloc(size, jit_fill_hole);
}
void arch_free_bpf_trampoline(void *image, unsigned int size)
{
bpf_prog_pack_free(image, size);
}
void arch_protect_bpf_trampoline(void *image, unsigned int size)
{
}
void arch_unprotect_bpf_trampoline(void *image, unsigned int size)
{
}
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
void *func_addr)
{
void *rw_image, *tmp;
int ret;
u32 size = image_end - image;
/* rw_image doesn't need to be in module memory range, so we can
* use kvmalloc.
*/
rw_image = kvmalloc(size, GFP_KERNEL);
if (!rw_image)
return -ENOMEM;
ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m,
flags, tlinks, func_addr);
if (ret < 0)
goto out;
tmp = bpf_arch_text_copy(image, rw_image, size);
if (IS_ERR(tmp))
ret = PTR_ERR(tmp);
out:
kvfree(rw_image);
return ret;
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
struct bpf_tramp_image im;
void *image;
int ret;
/* Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
* This will NOT cause fragmentation in direct map, as we do not
* call set_memory_*() on this buffer.
*
* We cannot use kvmalloc here, because we need image to be in
* module memory range.
*/
image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!image)
return -ENOMEM;
ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
m, flags, tlinks, func_addr);
bpf_jit_free_exec(image);
return ret;
}
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf)
{
u8 *jg_reloc, *prog = *pprog;
......
......@@ -1098,10 +1098,17 @@ struct bpf_tramp_run_ctx;
* fexit = a set of program to run after original function
*/
struct bpf_tramp_image;
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
void *orig_call);
void *func_addr);
void *arch_alloc_bpf_trampoline(unsigned int size);
void arch_free_bpf_trampoline(void *image, unsigned int size);
void arch_protect_bpf_trampoline(void *image, unsigned int size);
void arch_unprotect_bpf_trampoline(void *image, unsigned int size);
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr);
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
......@@ -1134,6 +1141,7 @@ enum bpf_tramp_prog_type {
struct bpf_tramp_image {
void *image;
int size;
struct bpf_ksym ksym;
struct percpu_ref pcref;
void *ip_after_call;
......@@ -1318,7 +1326,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
/* Called only from JIT-enabled code, so there's no need for stubs. */
void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
......
......@@ -1067,7 +1067,7 @@ struct bpf_binary_header *
bpf_jit_binary_pack_hdr(const struct bpf_prog *fp);
void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns);
void bpf_prog_pack_free(struct bpf_binary_header *hdr);
void bpf_prog_pack_free(void *ptr, u32 size);
static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
{
......
......@@ -355,6 +355,7 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
void *image, void *image_end)
{
u32 flags;
int size;
tlinks[BPF_TRAMP_FENTRY].links[0] = link;
tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
......@@ -362,6 +363,12 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
* and it must be used alone.
*/
flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
if (size < 0)
return size;
if (size > (unsigned long)image_end - (unsigned long)image)
return -E2BIG;
return arch_prepare_bpf_trampoline(NULL, image, image_end,
model, flags, tlinks, NULL);
}
......@@ -515,7 +522,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (err)
goto reset_unlock;
}
set_memory_rox((long)st_map->image, 1);
arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
/* Let bpf_link handle registration & unregistration.
*
* Pair with smp_load_acquire() during lookup_elem().
......@@ -524,7 +531,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto unlock;
}
set_memory_rox((long)st_map->image, 1);
arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
err = st_ops->reg(kdata);
if (likely(!err)) {
/* This refcnt increment on the map here after
......@@ -547,8 +554,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
* there was a race in registering the struct_ops (under the same name) to
* a sub-system through different struct_ops's maps.
*/
set_memory_nx((long)st_map->image, 1);
set_memory_rw((long)st_map->image, 1);
arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE);
reset_unlock:
bpf_struct_ops_map_put_progs(st_map);
......@@ -616,7 +622,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
bpf_struct_ops_map_put_progs(st_map);
bpf_map_area_free(st_map->links);
if (st_map->image) {
bpf_jit_free_exec(st_map->image);
arch_free_bpf_trampoline(st_map->image, PAGE_SIZE);
bpf_jit_uncharge_modmem(PAGE_SIZE);
}
bpf_map_area_free(st_map->uvalue);
......@@ -691,7 +697,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
return ERR_PTR(ret);
}
st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE);
if (!st_map->image) {
/* __bpf_struct_ops_map_free() uses st_map->image as flag
* for "charged or not". In this case, we need to unchange
......@@ -711,7 +717,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
}
mutex_init(&st_map->lock);
set_vm_flush_reset_perms(st_map->image);
bpf_map_init_from_attr(map, attr);
return map;
......
......@@ -928,20 +928,20 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
return ptr;
}
void bpf_prog_pack_free(struct bpf_binary_header *hdr)
void bpf_prog_pack_free(void *ptr, u32 size)
{
struct bpf_prog_pack *pack = NULL, *tmp;
unsigned int nbits;
unsigned long pos;
mutex_lock(&pack_mutex);
if (hdr->size > BPF_PROG_PACK_SIZE) {
bpf_jit_free_exec(hdr);
if (size > BPF_PROG_PACK_SIZE) {
bpf_jit_free_exec(ptr);
goto out;
}
list_for_each_entry(tmp, &pack_list, list) {
if ((void *)hdr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > (void *)hdr) {
if (ptr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > ptr) {
pack = tmp;
break;
}
......@@ -950,10 +950,10 @@ void bpf_prog_pack_free(struct bpf_binary_header *hdr)
if (WARN_ONCE(!pack, "bpf_prog_pack bug\n"))
goto out;
nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
pos = ((unsigned long)hdr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
nbits = BPF_PROG_SIZE_TO_NBITS(size);
pos = ((unsigned long)ptr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size),
WARN_ONCE(bpf_arch_text_invalidate(ptr, size),
"bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
bitmap_clear(pack->bitmap, pos, nbits);
......@@ -1100,8 +1100,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
*rw_header = kvmalloc(size, GFP_KERNEL);
if (!*rw_header) {
bpf_arch_text_copy(&ro_header->size, &size, sizeof(size));
bpf_prog_pack_free(ro_header);
bpf_prog_pack_free(ro_header, size);
bpf_jit_uncharge_modmem(size);
return NULL;
}
......@@ -1132,7 +1131,7 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
kvfree(rw_header);
if (IS_ERR(ptr)) {
bpf_prog_pack_free(ro_header);
bpf_prog_pack_free(ro_header, ro_header->size);
return PTR_ERR(ptr);
}
return 0;
......@@ -1153,7 +1152,7 @@ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
{
u32 size = ro_header->size;
bpf_prog_pack_free(ro_header);
bpf_prog_pack_free(ro_header, size);
kvfree(rw_header);
bpf_jit_uncharge_modmem(size);
}
......
......@@ -150,14 +150,11 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
goto out;
d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!d->rw_image) {
u32 size = PAGE_SIZE;
bpf_arch_text_copy(d->image, &size, sizeof(size));
bpf_prog_pack_free((struct bpf_binary_header *)d->image);
bpf_prog_pack_free(d->image, PAGE_SIZE);
d->image = NULL;
goto out;
}
bpf_image_ksym_add(d->image, &d->ksym);
bpf_image_ksym_add(d->image, PAGE_SIZE, &d->ksym);
}
prev_num_progs = d->num_progs;
......
......@@ -115,10 +115,10 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
}
void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym)
{
ksym->start = (unsigned long) data;
ksym->end = ksym->start + PAGE_SIZE;
ksym->end = ksym->start + size;
bpf_ksym_add(ksym);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
PAGE_SIZE, false, ksym->name);
......@@ -254,8 +254,8 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_a
static void bpf_tramp_image_free(struct bpf_tramp_image *im)
{
bpf_image_ksym_del(&im->ksym);
bpf_jit_free_exec(im->image);
bpf_jit_uncharge_modmem(PAGE_SIZE);
arch_free_bpf_trampoline(im->image, im->size);
bpf_jit_uncharge_modmem(im->size);
percpu_ref_exit(&im->pcref);
kfree_rcu(im, rcu);
}
......@@ -349,7 +349,7 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im)
call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
}
static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key)
static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
{
struct bpf_tramp_image *im;
struct bpf_ksym *ksym;
......@@ -360,15 +360,15 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key)
if (!im)
goto out;
err = bpf_jit_charge_modmem(PAGE_SIZE);
err = bpf_jit_charge_modmem(size);
if (err)
goto out_free_im;
im->size = size;
err = -ENOMEM;
im->image = image = bpf_jit_alloc_exec(PAGE_SIZE);
im->image = image = arch_alloc_bpf_trampoline(size);
if (!image)
goto out_uncharge;
set_vm_flush_reset_perms(image);
err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
if (err)
......@@ -377,13 +377,13 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key)
ksym = &im->ksym;
INIT_LIST_HEAD_RCU(&ksym->lnode);
snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
bpf_image_ksym_add(image, ksym);
bpf_image_ksym_add(image, size, ksym);
return im;
out_free_image:
bpf_jit_free_exec(im->image);
arch_free_bpf_trampoline(im->image, im->size);
out_uncharge:
bpf_jit_uncharge_modmem(PAGE_SIZE);
bpf_jit_uncharge_modmem(size);
out_free_im:
kfree(im);
out:
......@@ -396,7 +396,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
struct bpf_tramp_links *tlinks;
u32 orig_flags = tr->flags;
bool ip_arg = false;
int err, total;
int err, total, size;
tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
if (IS_ERR(tlinks))
......@@ -409,12 +409,6 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
goto out;
}
im = bpf_tramp_image_alloc(tr->key);
if (IS_ERR(im)) {
err = PTR_ERR(im);
goto out;
}
/* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
......@@ -438,13 +432,31 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
tr->flags |= BPF_TRAMP_F_ORIG_STACK;
#endif
err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
tlinks, tr->func.addr);
if (size < 0) {
err = size;
goto out;
}
if (size > PAGE_SIZE) {
err = -E2BIG;
goto out;
}
im = bpf_tramp_image_alloc(tr->key, size);
if (IS_ERR(im)) {
err = PTR_ERR(im);
goto out;
}
err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
&tr->func.model, tr->flags, tlinks,
tr->func.addr);
if (err < 0)
goto out_free;
set_memory_rox((long)im->image, 1);
arch_protect_bpf_trampoline(im->image, im->size);
WARN_ON(tr->cur_image && total == 0);
if (tr->cur_image)
......@@ -464,9 +476,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
tr->fops->func = NULL;
tr->fops->trampoline = 0;
/* reset im->image memory attr for arch_prepare_bpf_trampoline */
set_memory_nx((long)im->image, 1);
set_memory_rw((long)im->image, 1);
/* free im memory and reallocate later */
bpf_tramp_image_free(im);
goto again;
}
#endif
......@@ -1032,10 +1043,50 @@ bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
}
int __weak
arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
void *orig_call)
void *func_addr)
{
return -ENOTSUPP;
}
void * __weak arch_alloc_bpf_trampoline(unsigned int size)
{
void *image;
if (WARN_ON_ONCE(size > PAGE_SIZE))
return NULL;
image = bpf_jit_alloc_exec(PAGE_SIZE);
if (image)
set_vm_flush_reset_perms(image);
return image;
}
void __weak arch_free_bpf_trampoline(void *image, unsigned int size)
{
WARN_ON_ONCE(size > PAGE_SIZE);
/* bpf_jit_free_exec doesn't need "size", but
* bpf_prog_pack_free() needs it.
*/
bpf_jit_free_exec(image);
}
void __weak arch_protect_bpf_trampoline(void *image, unsigned int size)
{
WARN_ON_ONCE(size > PAGE_SIZE);
set_memory_rox((long)image, 1);
}
void __weak arch_unprotect_bpf_trampoline(void *image, unsigned int size)
{
WARN_ON_ONCE(size > PAGE_SIZE);
set_memory_nx((long)image, 1);
set_memory_rw((long)image, 1);
}
int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
return -ENOTSUPP;
}
......
......@@ -101,12 +101,11 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
goto out;
}
image = bpf_jit_alloc_exec(PAGE_SIZE);
image = arch_alloc_bpf_trampoline(PAGE_SIZE);
if (!image) {
err = -ENOMEM;
goto out;
}
set_vm_flush_reset_perms(image);
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
......@@ -124,7 +123,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (err < 0)
goto out;
set_memory_rox((long)image, 1);
arch_protect_bpf_trampoline(image, PAGE_SIZE);
prog_ret = dummy_ops_call_op(image, args);
err = dummy_ops_copy_args(args);
......@@ -134,7 +133,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = -EFAULT;
out:
kfree(args);
bpf_jit_free_exec(image);
arch_free_bpf_trampoline(image, PAGE_SIZE);
if (link)
bpf_link_put(&link->link);
kfree(tlinks);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment