Commit 92f61973 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'BPF array map fixes and improvements'

Andrii Nakryiko says:

====================

Fix 32-bit overflow in value pointer calculations in BPF array map. And then
raise obsolete limit on array map value size. Add selftest making sure this is
working as intended.

v1->v2:
  - fix broken patch #1 (no mask_index use in helper, as stated in commit
    message; and add missing semicolon).
====================
Acked-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e5e23424 24316461
...@@ -70,10 +70,8 @@ int array_map_alloc_check(union bpf_attr *attr) ...@@ -70,10 +70,8 @@ int array_map_alloc_check(union bpf_attr *attr)
attr->map_flags & BPF_F_PRESERVE_ELEMS) attr->map_flags & BPF_F_PRESERVE_ELEMS)
return -EINVAL; return -EINVAL;
if (attr->value_size > KMALLOC_MAX_SIZE) /* avoid overflow on round_up(map->value_size) */
/* if value_size is bigger, the user space won't be able to if (attr->value_size > INT_MAX)
* access the elements.
*/
return -E2BIG; return -E2BIG;
return 0; return 0;
...@@ -156,6 +154,11 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -156,6 +154,11 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
return &array->map; return &array->map;
} }
static void *array_map_elem_ptr(struct bpf_array* array, u32 index)
{
return array->value + (u64)array->elem_size * index;
}
/* Called from syscall or from eBPF program */ /* Called from syscall or from eBPF program */
static void *array_map_lookup_elem(struct bpf_map *map, void *key) static void *array_map_lookup_elem(struct bpf_map *map, void *key)
{ {
...@@ -165,7 +168,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -165,7 +168,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries)) if (unlikely(index >= array->map.max_entries))
return NULL; return NULL;
return array->value + array->elem_size * (index & array->index_mask); return array->value + (u64)array->elem_size * (index & array->index_mask);
} }
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
...@@ -203,7 +206,7 @@ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) ...@@ -203,7 +206,7 @@ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{ {
struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf; struct bpf_insn *insn = insn_buf;
u32 elem_size = round_up(map->value_size, 8); u32 elem_size = array->elem_size;
const int ret = BPF_REG_0; const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1; const int map_ptr = BPF_REG_1;
const int index = BPF_REG_2; const int index = BPF_REG_2;
...@@ -272,7 +275,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) ...@@ -272,7 +275,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
* access 'value_size' of them, so copying rounded areas * access 'value_size' of them, so copying rounded areas
* will not leak any kernel data * will not leak any kernel data
*/ */
size = round_up(map->value_size, 8); size = array->elem_size;
rcu_read_lock(); rcu_read_lock();
pptr = array->pptrs[index & array->index_mask]; pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
...@@ -339,7 +342,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -339,7 +342,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
value, map->value_size); value, map->value_size);
} else { } else {
val = array->value + val = array->value +
array->elem_size * (index & array->index_mask); (u64)array->elem_size * (index & array->index_mask);
if (map_flags & BPF_F_LOCK) if (map_flags & BPF_F_LOCK)
copy_map_value_locked(map, val, value, false); copy_map_value_locked(map, val, value, false);
else else
...@@ -376,7 +379,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, ...@@ -376,7 +379,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
* returned or zeros which were zero-filled by percpu_alloc, * returned or zeros which were zero-filled by percpu_alloc,
* so no kernel data leaks possible * so no kernel data leaks possible
*/ */
size = round_up(map->value_size, 8); size = array->elem_size;
rcu_read_lock(); rcu_read_lock();
pptr = array->pptrs[index & array->index_mask]; pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
...@@ -408,8 +411,7 @@ static void array_map_free_timers(struct bpf_map *map) ...@@ -408,8 +411,7 @@ static void array_map_free_timers(struct bpf_map *map)
return; return;
for (i = 0; i < array->map.max_entries; i++) for (i = 0; i < array->map.max_entries; i++)
bpf_timer_cancel_and_free(array->value + array->elem_size * i + bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off);
map->timer_off);
} }
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */ /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
...@@ -420,7 +422,7 @@ static void array_map_free(struct bpf_map *map) ...@@ -420,7 +422,7 @@ static void array_map_free(struct bpf_map *map)
if (map_value_has_kptrs(map)) { if (map_value_has_kptrs(map)) {
for (i = 0; i < array->map.max_entries; i++) for (i = 0; i < array->map.max_entries; i++)
bpf_map_free_kptrs(map, array->value + array->elem_size * i); bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
bpf_map_free_kptr_off_tab(map); bpf_map_free_kptr_off_tab(map);
} }
...@@ -556,7 +558,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) ...@@ -556,7 +558,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
index = info->index & array->index_mask; index = info->index & array->index_mask;
if (info->percpu_value_buf) if (info->percpu_value_buf)
return array->pptrs[index]; return array->pptrs[index];
return array->value + array->elem_size * index; return array_map_elem_ptr(array, index);
} }
static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
...@@ -575,7 +577,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) ...@@ -575,7 +577,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
index = info->index & array->index_mask; index = info->index & array->index_mask;
if (info->percpu_value_buf) if (info->percpu_value_buf)
return array->pptrs[index]; return array->pptrs[index];
return array->value + array->elem_size * index; return array_map_elem_ptr(array, index);
} }
static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
...@@ -583,6 +585,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) ...@@ -583,6 +585,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
struct bpf_iter_seq_array_map_info *info = seq->private; struct bpf_iter_seq_array_map_info *info = seq->private;
struct bpf_iter__bpf_map_elem ctx = {}; struct bpf_iter__bpf_map_elem ctx = {};
struct bpf_map *map = info->map; struct bpf_map *map = info->map;
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_iter_meta meta; struct bpf_iter_meta meta;
struct bpf_prog *prog; struct bpf_prog *prog;
int off = 0, cpu = 0; int off = 0, cpu = 0;
...@@ -603,7 +606,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) ...@@ -603,7 +606,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
ctx.value = v; ctx.value = v;
} else { } else {
pptr = v; pptr = v;
size = round_up(map->value_size, 8); size = array->elem_size;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
bpf_long_memcpy(info->percpu_value_buf + off, bpf_long_memcpy(info->percpu_value_buf + off,
per_cpu_ptr(pptr, cpu), per_cpu_ptr(pptr, cpu),
...@@ -633,11 +636,12 @@ static int bpf_iter_init_array_map(void *priv_data, ...@@ -633,11 +636,12 @@ static int bpf_iter_init_array_map(void *priv_data,
{ {
struct bpf_iter_seq_array_map_info *seq_info = priv_data; struct bpf_iter_seq_array_map_info *seq_info = priv_data;
struct bpf_map *map = aux->map; struct bpf_map *map = aux->map;
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *value_buf; void *value_buf;
u32 buf_size; u32 buf_size;
if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
buf_size = round_up(map->value_size, 8) * num_possible_cpus(); buf_size = array->elem_size * num_possible_cpus();
value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
if (!value_buf) if (!value_buf)
return -ENOMEM; return -ENOMEM;
...@@ -690,7 +694,7 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_ ...@@ -690,7 +694,7 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_
if (is_percpu) if (is_percpu)
val = this_cpu_ptr(array->pptrs[i]); val = this_cpu_ptr(array->pptrs[i]);
else else
val = array->value + array->elem_size * i; val = array_map_elem_ptr(array, i);
num_elems++; num_elems++;
key = i; key = i;
ret = callback_fn((u64)(long)map, (u64)(long)&key, ret = callback_fn((u64)(long)map, (u64)(long)&key,
...@@ -1322,7 +1326,7 @@ static int array_of_map_gen_lookup(struct bpf_map *map, ...@@ -1322,7 +1326,7 @@ static int array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf) struct bpf_insn *insn_buf)
{ {
struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 elem_size = round_up(map->value_size, 8); u32 elem_size = array->elem_size;
struct bpf_insn *insn = insn_buf; struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0; const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1; const int map_ptr = BPF_REG_1;
......
...@@ -122,6 +122,8 @@ void test_skeleton(void) ...@@ -122,6 +122,8 @@ void test_skeleton(void)
ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var");
ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr");
elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz);
ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_OK_PTR(elf_bytes, "elf_bytes");
ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz");
......
...@@ -51,6 +51,8 @@ int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; ...@@ -51,6 +51,8 @@ int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 };
int read_mostly_var __read_mostly; int read_mostly_var __read_mostly;
int out_mostly_var; int out_mostly_var;
char huge_arr[16 * 1024 * 1024];
SEC("raw_tp/sys_enter") SEC("raw_tp/sys_enter")
int handler(const void *ctx) int handler(const void *ctx)
{ {
...@@ -71,6 +73,8 @@ int handler(const void *ctx) ...@@ -71,6 +73,8 @@ int handler(const void *ctx)
out_mostly_var = read_mostly_var; out_mostly_var = read_mostly_var;
huge_arr[sizeof(huge_arr) - 1] = 123;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment