Commit d352eca2 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'support-bpf_fastcall-patterns-for-calls-to-kfuncs'

Eduard Zingerman says:

====================
support bpf_fastcall patterns for calls to kfuncs

As an extension of [1], allow bpf_fastcall patterns for kfuncs:
- pattern rules are the same as for helpers;
- spill/fill removal is allowed only for kfuncs listed in the
  is_fastcall_kfunc_call (under assumption that such kfuncs would
  always be members of special_kfunc_list).

Allow bpf_fastcall rewrite for bpf_cast_to_kern_ctx() and
bpf_rdonly_cast() in order to conjure selftests for this feature.

After this patch-set verifier would rewrite the program below:

  r2 = 1
  *(u64 *)(r10 - 32) = r2
  call %[bpf_cast_to_kern_ctx]
  r2 = *(u64 *)(r10 - 32)
  r0 = r2;"

As follows:

  r2 = 1   /* spill/fill at r10[-32] is removed */
  r0 = r1  /* replacement for bpf_cast_to_kern_ctx() */
  r0 = r2
  exit

Also, attribute used by LLVM implementation of the feature had been
changed from no_caller_saved_registers to bpf_fastcall (see [2]).
This patch-set replaces references to nocsr by references to
bpf_fastcall to keep LLVM and Kernel parts in sync.

[1] no_caller_saved_registers attribute for helper calls
    https://lore.kernel.org/bpf/20240722233844.1406874-1-eddyz87@gmail.com/
[2] [BPF] introduce __attribute__((bpf_fastcall))
    https://github.com/llvm/llvm-project/pull/105417

Changes v2->v3:
- added a patch fixing arch_mask handling in test_loader,
  otherwise newly added tests for the feature were skipped
  (a fix for regression introduced by a recent commit);
- fixed warning regarding unused 'params' variable;
- applied stylistical fixes suggested by Yonghong;
- added acks from Yonghong;

Changes v1->v2:
- added two patches replacing all mentions of nocsr by bpf_fastcall
  (suggested by Andrii);
- removed KF_NOCSR flag (suggested by Yonghong).

v1: https://lore.kernel.org/bpf/20240812234356.2089263-1-eddyz87@gmail.com/
v2: https://lore.kernel.org/bpf/20240817015140.1039351-1-eddyz87@gmail.com/
====================

Link: https://lore.kernel.org/r/20240822084112.3257995-1-eddyz87@gmail.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 6d641ca5 8c2e043d
......@@ -808,12 +808,12 @@ struct bpf_func_proto {
bool gpl_only;
bool pkt_access;
bool might_sleep;
/* set to true if helper follows contract for gcc/llvm
* attribute no_caller_saved_registers:
/* set to true if helper follows contract for llvm
* attribute bpf_fastcall:
* - void functions do not scratch r0
* - functions taking N arguments scratch only registers r1-rN
*/
bool allow_nocsr;
bool allow_fastcall;
enum bpf_return_type ret_type;
union {
struct {
......
......@@ -577,13 +577,13 @@ struct bpf_insn_aux_data {
bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
u8 alu_state; /* used in combination with alu_limit */
/* true if STX or LDX instruction is a part of a spill/fill
* pattern for a no_caller_saved_registers call.
* pattern for a bpf_fastcall call.
*/
u8 nocsr_pattern:1;
u8 fastcall_pattern:1;
/* for CALL instructions, a number of spill/fill pairs in the
* no_caller_saved_registers pattern.
* bpf_fastcall pattern.
*/
u8 nocsr_spills_num:3;
u8 fastcall_spills_num:3;
/* below fields are initialized once */
unsigned int orig_idx; /* original instruction index */
......@@ -653,10 +653,10 @@ struct bpf_subprog_info {
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u16 stack_depth; /* max. stack depth used by this function */
u16 stack_extra;
/* offsets in range [stack_depth .. nocsr_stack_off)
* are used for no_caller_saved_registers spills and fills.
/* offsets in range [stack_depth .. fastcall_stack_off)
* are used for bpf_fastcall spills and fills.
*/
s16 nocsr_stack_off;
s16 fastcall_stack_off;
bool has_tail_call: 1;
bool tail_call_reachable: 1;
bool has_ld_abs: 1;
......@@ -664,8 +664,8 @@ struct bpf_subprog_info {
bool is_async_cb: 1;
bool is_exception_cb: 1;
bool args_cached: 1;
/* true if nocsr stack region is used by functions that can't be inlined */
bool keep_nocsr_stack: 1;
/* true if bpf_fastcall stack region is used by functions that can't be inlined */
bool keep_fastcall_stack: 1;
u8 arg_cnt;
struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
......
......@@ -158,7 +158,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
.func = bpf_get_smp_processor_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
.allow_nocsr = true,
.allow_fastcall = true,
};
BPF_CALL_0(bpf_get_numa_node_id)
......
This diff is collapsed.
......@@ -53,7 +53,7 @@
#include "verifier_movsx.skel.h"
#include "verifier_netfilter_ctx.skel.h"
#include "verifier_netfilter_retcode.skel.h"
#include "verifier_nocsr.skel.h"
#include "verifier_bpf_fastcall.skel.h"
#include "verifier_or_jmp32_k.skel.h"
#include "verifier_precision.skel.h"
#include "verifier_prevent_map_lookup.skel.h"
......@@ -177,7 +177,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
void test_verifier_movsx(void) { RUN(verifier_movsx); }
void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
void test_verifier_nocsr(void) { RUN(verifier_nocsr); }
void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); }
void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); }
void test_verifier_precision(void) { RUN(verifier_precision); }
void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); }
......
......@@ -2,8 +2,11 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
#include <stdbool.h>
#include "bpf_kfuncs.h"
SEC("raw_tp")
__arch_x86_64
......@@ -39,7 +42,7 @@ __naked void simple(void)
: __clobber_all);
}
/* The logic for detecting and verifying nocsr pattern is the same for
/* The logic for detecting and verifying bpf_fastcall pattern is the same for
* any arch, however x86 differs from arm64 or riscv64 in a way
* bpf_get_smp_processor_id is rewritten:
* - on x86 it is done by verifier
......@@ -52,7 +55,7 @@ __naked void simple(void)
*
* It is really desirable to check instruction indexes in the xlated
* patterns, so add this canary test to check that function rewrite by
* jit is correctly processed by nocsr logic, keep the rest of the
* jit is correctly processed by bpf_fastcall logic, keep the rest of the
* tests as x86.
*/
SEC("raw_tp")
......@@ -463,7 +466,7 @@ __naked static void bad_write_in_subprog_aux(void)
{
asm volatile (
"r0 = 1;"
"*(u64 *)(r1 - 0) = r0;" /* invalidates nocsr contract for caller: */
"*(u64 *)(r1 - 0) = r0;" /* invalidates bpf_fastcall contract for caller: */
"exit;" /* caller stack at -8 used outside of the pattern */
::: __clobber_all);
}
......@@ -480,7 +483,7 @@ __naked void bad_helper_write(void)
{
asm volatile (
"r1 = 1;"
/* nocsr pattern with stack offset -8 */
/* bpf_fastcall pattern with stack offset -8 */
"*(u64 *)(r10 - 8) = r1;"
"call %[bpf_get_smp_processor_id];"
"r1 = *(u64 *)(r10 - 8);"
......@@ -488,7 +491,7 @@ __naked void bad_helper_write(void)
"r1 += -8;"
"r2 = 1;"
"r3 = 42;"
/* read dst is fp[-8], thus nocsr rewrite not applied */
/* read dst is fp[-8], thus bpf_fastcall rewrite not applied */
"call %[bpf_probe_read_kernel];"
"exit;"
:
......@@ -598,7 +601,7 @@ __arch_x86_64
__log_level(4) __msg("stack depth 8")
__xlated("2: r0 = &(void __percpu *)(r0)")
__success
__naked void helper_call_does_not_prevent_nocsr(void)
__naked void helper_call_does_not_prevent_bpf_fastcall(void)
{
asm volatile (
"r1 = 1;"
......@@ -689,7 +692,7 @@ __naked int bpf_loop_interaction1(void)
{
asm volatile (
"r1 = 1;"
/* nocsr stack region at -16, but could be removed */
/* bpf_fastcall stack region at -16, but could be removed */
"*(u64 *)(r10 - 16) = r1;"
"call %[bpf_get_smp_processor_id];"
"r1 = *(u64 *)(r10 - 16);"
......@@ -729,7 +732,7 @@ __naked int bpf_loop_interaction2(void)
{
asm volatile (
"r1 = 42;"
/* nocsr stack region at -16, cannot be removed */
/* bpf_fastcall stack region at -16, cannot be removed */
"*(u64 *)(r10 - 16) = r1;"
"call %[bpf_get_smp_processor_id];"
"r1 = *(u64 *)(r10 - 16);"
......@@ -759,8 +762,8 @@ __msg("stack depth 512+0")
__xlated("r0 = &(void __percpu *)(r0)")
__success
/* cumulative_stack_depth() stack usage is MAX_BPF_STACK,
* called subprogram uses an additional slot for nocsr spill/fill,
* since nocsr spill/fill could be removed the program still fits
* called subprogram uses an additional slot for bpf_fastcall spill/fill,
* since bpf_fastcall spill/fill could be removed the program still fits
* in MAX_BPF_STACK and should be accepted.
*/
__naked int cumulative_stack_depth(void)
......@@ -798,7 +801,7 @@ __xlated("3: r0 = &(void __percpu *)(r0)")
__xlated("4: r0 = *(u32 *)(r0 +0)")
__xlated("5: exit")
__success
__naked int nocsr_max_stack_ok(void)
__naked int bpf_fastcall_max_stack_ok(void)
{
asm volatile(
"r1 = 42;"
......@@ -820,7 +823,7 @@ __arch_x86_64
__log_level(4)
__msg("stack depth 520")
__failure
__naked int nocsr_max_stack_fail(void)
__naked int bpf_fastcall_max_stack_fail(void)
{
asm volatile(
"r1 = 42;"
......@@ -828,7 +831,7 @@ __naked int nocsr_max_stack_fail(void)
"*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;"
"call %[bpf_get_smp_processor_id];"
"r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);"
/* call to prandom blocks nocsr rewrite */
/* call to prandom blocks bpf_fastcall rewrite */
"*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;"
"call %[bpf_get_prandom_u32];"
"r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);"
......@@ -842,4 +845,56 @@ __naked int nocsr_max_stack_fail(void)
);
}
SEC("cgroup/getsockname_unix")
__xlated("0: r2 = 1")
/* bpf_cast_to_kern_ctx is replaced by a single assignment */
__xlated("1: r0 = r1")
__xlated("2: r0 = r2")
__xlated("3: exit")
__success
__naked void kfunc_bpf_cast_to_kern_ctx(void)
{
asm volatile (
"r2 = 1;"
"*(u64 *)(r10 - 32) = r2;"
"call %[bpf_cast_to_kern_ctx];"
"r2 = *(u64 *)(r10 - 32);"
"r0 = r2;"
"exit;"
:
: __imm(bpf_cast_to_kern_ctx)
: __clobber_all);
}
SEC("raw_tp")
__xlated("3: r3 = 1")
/* bpf_rdonly_cast is replaced by a single assignment */
__xlated("4: r0 = r1")
__xlated("5: r0 = r3")
void kfunc_bpf_rdonly_cast(void)
{
asm volatile (
"r2 = %[btf_id];"
"r3 = 1;"
"*(u64 *)(r10 - 32) = r3;"
"call %[bpf_rdonly_cast];"
"r3 = *(u64 *)(r10 - 32);"
"r0 = r3;"
:
: __imm(bpf_rdonly_cast),
[btf_id]"r"(bpf_core_type_id_kernel(union bpf_attr))
: __clobber_common);
}
/* BTF FUNC records are not generated for kfuncs referenced
* from inline assembly. These records are necessary for
* libbpf to link the program. The function below is a hack
* to ensure that BTF FUNC records are generated.
*/
void kfunc_root(void)
{
bpf_cast_to_kern_ctx(0);
bpf_rdonly_cast(0, 0);
}
char _license[] SEC("license") = "GPL";
......@@ -543,7 +543,7 @@ static int parse_test_spec(struct test_loader *tester,
}
}
spec->arch_mask = arch_mask;
spec->arch_mask = arch_mask ?: -1;
if (spec->mode_mask == 0)
spec->mode_mask = PRIV;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment