Commit 7d8d5355 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'libbpf: support non-mmap()'able data sections'

Andrii Nakryiko says:

====================

Make libbpf more conservative in using BPF_F_MMAPABLE flag with internal BPF
array maps that are backing global data sections. See patch #2 for full
description and justification.

Changes in this dataset support having bpf_spinlock, kptr, rb_tree nodes and
other "special" variables as global variables. Combining this with libbpf's
existing support for multiple custom .data.* sections allows BPF programs to
utilize multiple spinlock/rbtree_node/kptr variables in a pretty natural way
by just putting all such variables into separate data sections (and thus ARRAY
maps).

v1->v2:
  - address Stanislav's feedback, adds acks.
====================
Acked-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 81bfcc3f 2f968e9f
...@@ -1461,15 +1461,12 @@ static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 ...@@ -1461,15 +1461,12 @@ static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32
return -ENOENT; return -ENOENT;
} }
static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
{ {
Elf_Data *symbols = obj->efile.symbols; Elf_Data *symbols = obj->efile.symbols;
const char *sname; const char *sname;
size_t si; size_t si;
if (!name || !off)
return -EINVAL;
for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
Elf64_Sym *sym = elf_sym_by_idx(obj, si); Elf64_Sym *sym = elf_sym_by_idx(obj, si);
...@@ -1483,15 +1480,13 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ ...@@ -1483,15 +1480,13 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _
sname = elf_sym_str(obj, sym->st_name); sname = elf_sym_str(obj, sym->st_name);
if (!sname) { if (!sname) {
pr_warn("failed to get sym name string for var %s\n", name); pr_warn("failed to get sym name string for var %s\n", name);
return -EIO; return ERR_PTR(-EIO);
}
if (strcmp(name, sname) == 0) {
*off = sym->st_value;
return 0;
} }
if (strcmp(name, sname) == 0)
return sym;
} }
return -ENOENT; return ERR_PTR(-ENOENT);
} }
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
...@@ -1582,7 +1577,38 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) ...@@ -1582,7 +1577,38 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name)
} }
static int static int
bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map); map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
/* Internal BPF map is mmap()'able only if at least one of corresponding
* DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
* variable and it's not marked as __hidden (which turns it into, effectively,
* a STATIC variable).
*/
static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
{
const struct btf_type *t, *vt;
struct btf_var_secinfo *vsi;
int i, n;
if (!map->btf_value_type_id)
return false;
t = btf__type_by_id(obj->btf, map->btf_value_type_id);
if (!btf_is_datasec(t))
return false;
vsi = btf_var_secinfos(t);
for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
vt = btf__type_by_id(obj->btf, vsi->type);
if (!btf_is_var(vt))
continue;
if (btf_var(vt)->linkage != BTF_VAR_STATIC)
return true;
}
return false;
}
static int static int
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
...@@ -1614,6 +1640,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, ...@@ -1614,6 +1640,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def->max_entries = 1; def->max_entries = 1;
def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
? BPF_F_RDONLY_PROG : 0; ? BPF_F_RDONLY_PROG : 0;
/* failures are fine because of maps like .rodata.str1.1 */
(void) map_fill_btf_type_info(obj, map);
if (map_is_mmapable(obj, map))
def->map_flags |= BPF_F_MMAPABLE; def->map_flags |= BPF_F_MMAPABLE;
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
...@@ -1631,9 +1662,6 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, ...@@ -1631,9 +1662,6 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
return err; return err;
} }
/* failures are fine because of maps like .rodata.str1.1 */
(void) bpf_map_find_btf_info(obj, map);
if (data) if (data)
memcpy(map->mmaped, data, data_sz); memcpy(map->mmaped, data, data_sz);
...@@ -2545,7 +2573,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, ...@@ -2545,7 +2573,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
fill_map_from_def(map->inner_map, &inner_def); fill_map_from_def(map->inner_map, &inner_def);
} }
err = bpf_map_find_btf_info(obj, map); err = map_fill_btf_type_info(obj, map);
if (err) if (err)
return err; return err;
...@@ -2850,57 +2878,89 @@ static int compare_vsi_off(const void *_a, const void *_b) ...@@ -2850,57 +2878,89 @@ static int compare_vsi_off(const void *_a, const void *_b)
static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
struct btf_type *t) struct btf_type *t)
{ {
__u32 size = 0, off = 0, i, vars = btf_vlen(t); __u32 size = 0, i, vars = btf_vlen(t);
const char *name = btf__name_by_offset(btf, t->name_off); const char *sec_name = btf__name_by_offset(btf, t->name_off);
const struct btf_type *t_var;
struct btf_var_secinfo *vsi; struct btf_var_secinfo *vsi;
const struct btf_var *var; bool fixup_offsets = false;
int ret; int err;
if (!name) { if (!sec_name) {
pr_debug("No name found in string section for DATASEC kind.\n"); pr_debug("No name found in string section for DATASEC kind.\n");
return -ENOENT; return -ENOENT;
} }
/* .extern datasec size and var offsets were set correctly during /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
* extern collection step, so just skip straight to sorting variables * variable offsets set at the previous step. Further, not every
* extern BTF VAR has corresponding ELF symbol preserved, so we skip
* all fixups altogether for such sections and go straight to sorting
* VARs within their DATASEC.
*/ */
if (t->size) if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
goto sort_vars; goto sort_vars;
ret = find_elf_sec_sz(obj, name, &size); /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
if (ret || !size) { * fix this up. But BPF static linker already fixes this up and fills
pr_debug("Invalid size for section %s: %u bytes\n", name, size); * all the sizes and offsets during static linking. So this step has
* to be optional. But the STV_HIDDEN handling is non-optional for any
* non-extern DATASEC, so the variable fixup loop below handles both
* functions at the same time, paying the cost of BTF VAR <-> ELF
* symbol matching just once.
*/
if (t->size == 0) {
err = find_elf_sec_sz(obj, sec_name, &size);
if (err || !size) {
pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
sec_name, size, err);
return -ENOENT; return -ENOENT;
} }
t->size = size; t->size = size;
fixup_offsets = true;
}
for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
const struct btf_type *t_var;
struct btf_var *var;
const char *var_name;
Elf64_Sym *sym;
t_var = btf__type_by_id(btf, vsi->type); t_var = btf__type_by_id(btf, vsi->type);
if (!t_var || !btf_is_var(t_var)) { if (!t_var || !btf_is_var(t_var)) {
pr_debug("Non-VAR type seen in section %s\n", name); pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
return -EINVAL; return -EINVAL;
} }
var = btf_var(t_var); var = btf_var(t_var);
if (var->linkage == BTF_VAR_STATIC) if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
continue; continue;
name = btf__name_by_offset(btf, t_var->name_off); var_name = btf__name_by_offset(btf, t_var->name_off);
if (!name) { if (!var_name) {
pr_debug("No name found in string section for VAR kind\n"); pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
sec_name, i);
return -ENOENT; return -ENOENT;
} }
ret = find_elf_var_offset(obj, name, &off); sym = find_elf_var_sym(obj, var_name);
if (ret) { if (IS_ERR(sym)) {
pr_debug("No offset found in symbol table for VAR %s\n", pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
name); sec_name, var_name);
return -ENOENT; return -ENOENT;
} }
vsi->offset = off; if (fixup_offsets)
vsi->offset = sym->st_value;
/* if variable is a global/weak symbol, but has restricted
* (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
* as static. This follows similar logic for functions (BPF
* subprogs) and influences libbpf's further decisions about
* whether to make global data BPF array maps as
* BPF_F_MMAPABLE.
*/
if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
|| ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
var->linkage = BTF_VAR_STATIC;
} }
sort_vars: sort_vars:
...@@ -2908,13 +2968,16 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, ...@@ -2908,13 +2968,16 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
return 0; return 0;
} }
static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) static int bpf_object_fixup_btf(struct bpf_object *obj)
{ {
int err = 0; int i, n, err = 0;
__u32 i, n = btf__type_cnt(btf);
if (!obj->btf)
return 0;
n = btf__type_cnt(obj->btf);
for (i = 1; i < n; i++) { for (i = 1; i < n; i++) {
struct btf_type *t = btf_type_by_id(btf, i); struct btf_type *t = btf_type_by_id(obj->btf, i);
/* Loader needs to fix up some of the things compiler /* Loader needs to fix up some of the things compiler
* couldn't get its hands on while emitting BTF. This * couldn't get its hands on while emitting BTF. This
...@@ -2922,27 +2985,11 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) ...@@ -2922,27 +2985,11 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
* the info from the ELF itself for this purpose. * the info from the ELF itself for this purpose.
*/ */
if (btf_is_datasec(t)) { if (btf_is_datasec(t)) {
err = btf_fixup_datasec(obj, btf, t); err = btf_fixup_datasec(obj, obj->btf, t);
if (err) if (err)
break;
}
}
return libbpf_err(err);
}
static int bpf_object__finalize_btf(struct bpf_object *obj)
{
int err;
if (!obj->btf)
return 0;
err = btf_finalize_data(obj, obj->btf);
if (err) {
pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
return err; return err;
} }
}
return 0; return 0;
} }
...@@ -4235,7 +4282,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat ...@@ -4235,7 +4282,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat
return 0; return 0;
} }
static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
{ {
int id; int id;
...@@ -7233,7 +7280,7 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, ...@@ -7233,7 +7280,7 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__check_endianness(obj);
err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__elf_collect(obj);
err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object__collect_externs(obj);
err = err ? : bpf_object__finalize_btf(obj); err = err ? : bpf_object_fixup_btf(obj);
err = err ? : bpf_object__init_maps(obj, opts); err = err ? : bpf_object__init_maps(obj, opts);
err = err ? : bpf_object_init_progs(obj, opts); err = err ? : bpf_object_init_progs(obj, opts);
err = err ? : bpf_object__collect_relos(obj); err = err ? : bpf_object__collect_relos(obj);
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Facebook */ /* Copyright (c) 2019 Facebook */
#include <test_progs.h> #include <test_progs.h>
#include <sys/mman.h>
struct s { struct s {
int a; int a;
...@@ -22,7 +23,8 @@ void test_skeleton(void) ...@@ -22,7 +23,8 @@ void test_skeleton(void)
struct test_skeleton__kconfig *kcfg; struct test_skeleton__kconfig *kcfg;
const void *elf_bytes; const void *elf_bytes;
size_t elf_bytes_sz = 0; size_t elf_bytes_sz = 0;
int i; void *m;
int i, fd;
skel = test_skeleton__open(); skel = test_skeleton__open();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
...@@ -124,6 +126,13 @@ void test_skeleton(void) ...@@ -124,6 +126,13 @@ void test_skeleton(void)
ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr");
fd = bpf_map__fd(skel->maps.data_non_mmapable);
m = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd, 0);
if (!ASSERT_EQ(m, MAP_FAILED, "unexpected_mmap_success"))
munmap(m, getpagesize());
ASSERT_EQ(bpf_map__map_flags(skel->maps.data_non_mmapable), 0, "non_mmap_flags");
elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz);
ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_OK_PTR(elf_bytes, "elf_bytes");
ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz");
......
...@@ -53,6 +53,20 @@ int out_mostly_var; ...@@ -53,6 +53,20 @@ int out_mostly_var;
char huge_arr[16 * 1024 * 1024]; char huge_arr[16 * 1024 * 1024];
/* non-mmapable custom .data section */
struct my_value { int x, y, z; };
__hidden int zero_key SEC(".data.non_mmapable");
static struct my_value zero_value SEC(".data.non_mmapable");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct my_value);
__uint(max_entries, 1);
} my_map SEC(".maps");
SEC("raw_tp/sys_enter") SEC("raw_tp/sys_enter")
int handler(const void *ctx) int handler(const void *ctx)
{ {
...@@ -75,6 +89,9 @@ int handler(const void *ctx) ...@@ -75,6 +89,9 @@ int handler(const void *ctx)
huge_arr[sizeof(huge_arr) - 1] = 123; huge_arr[sizeof(huge_arr) - 1] = 123;
/* make sure zero_key and zero_value are not optimized out */
bpf_map_update_elem(&my_map, &zero_key, &zero_value, BPF_ANY);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment