Commit f4cceb78 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Reduce kmalloc / kfree churn in the verifier'

Lorenz Bauer says:

====================

github.com/cilium/ebpf runs integration tests with libbpf in a vm on CI.
I recently did some work to increase the code coverage from that, and
started experiencing OOM-kills in the VM. That led me down a rabbit
hole looking at verifier memory allocation patterns. I didn't figure out
what triggered the OOM-kills but refactored some often called memory
allocation code.

The key insight is that often times we don't need to do a full kfree /
kmalloc, but can instead just reallocate. The first patch adds two helpers
which do just that for the use cases in the verifier, which are sufficiently
different that they can't use stock krealloc_array and friends.

The series makes bpf_verif_scale about 10% faster in my VM set up, which
is especially noticeable when running with KASAN enabled.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents b7415964 c9e73e3d
......@@ -215,6 +215,13 @@ struct bpf_idx_pair {
u32 idx;
};
struct bpf_id_pair {
u32 old;
u32 cur;
};
/* Maximum number of register states that can exist at once */
#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
#define MAX_CALL_FRAMES 8
struct bpf_verifier_state {
/* call stack tracking */
......@@ -418,6 +425,7 @@ struct bpf_verifier_env {
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
struct {
int *insn_state;
int *insn_stack;
......
......@@ -737,81 +737,104 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose(env, "\n");
}
#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
static int copy_##NAME##_state(struct bpf_func_state *dst, \
const struct bpf_func_state *src) \
{ \
if (!src->FIELD) \
return 0; \
if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
/* internal bug, make state invalid to reject the program */ \
memset(dst, 0, sizeof(*dst)); \
return -EFAULT; \
} \
memcpy(dst->FIELD, src->FIELD, \
sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
return 0; \
}
/* copy_reference_state() */
COPY_STATE_FN(reference, acquired_refs, refs, 1)
/* copy_stack_state() */
COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
#undef COPY_STATE_FN
#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
bool copy_old) \
{ \
u32 old_size = state->COUNT; \
struct bpf_##NAME##_state *new_##FIELD; \
int slot = size / SIZE; \
\
if (size <= old_size || !size) { \
if (copy_old) \
return 0; \
state->COUNT = slot * SIZE; \
if (!size && old_size) { \
kfree(state->FIELD); \
state->FIELD = NULL; \
} \
return 0; \
} \
new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
GFP_KERNEL); \
if (!new_##FIELD) \
return -ENOMEM; \
if (copy_old) { \
if (state->FIELD) \
memcpy(new_##FIELD, state->FIELD, \
sizeof(*new_##FIELD) * (old_size / SIZE)); \
memset(new_##FIELD + old_size / SIZE, 0, \
sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
} \
state->COUNT = slot * SIZE; \
kfree(state->FIELD); \
state->FIELD = new_##FIELD; \
return 0; \
}
/* realloc_reference_state() */
REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
/* realloc_stack_state() */
REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
#undef REALLOC_STATE_FN
/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
* make it consume minimal amount of memory. check_stack_write() access from
* the program calls into realloc_func_state() to grow the stack size.
* Note there is a non-zero 'parent' pointer inside bpf_verifier_state
* which realloc_stack_state() copies over. It points to previous
* bpf_verifier_state which is never reallocated.
*/
static int realloc_func_state(struct bpf_func_state *state, int stack_size,
int refs_size, bool copy_old)
{
int err = realloc_reference_state(state, refs_size, copy_old);
if (err)
return err;
return realloc_stack_state(state, stack_size, copy_old);
/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
* small to hold src. This is different from krealloc since we don't want to preserve
* the contents of dst.
*
* Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
* not be allocated.
*/
static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
{
size_t bytes;
if (ZERO_OR_NULL_PTR(src))
goto out;
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
if (ksize(dst) < bytes) {
kfree(dst);
dst = kmalloc_track_caller(bytes, flags);
if (!dst)
return NULL;
}
memcpy(dst, src, bytes);
out:
return dst ? dst : ZERO_SIZE_PTR;
}
/* resize an array from old_n items to new_n items. the array is reallocated if it's too
* small to hold new_n items. new items are zeroed out if the array grows.
*
* Contrary to krealloc_array, does not free arr if new_n is zero.
*/
static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
{
if (!new_n || old_n == new_n)
goto out;
arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
if (!arr)
return NULL;
if (new_n > old_n)
memset(arr + old_n * size, 0, (new_n - old_n) * size);
out:
return arr ? arr : ZERO_SIZE_PTR;
}
static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
{
dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
sizeof(struct bpf_reference_state), GFP_KERNEL);
if (!dst->refs)
return -ENOMEM;
dst->acquired_refs = src->acquired_refs;
return 0;
}
static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
{
size_t n = src->allocated_stack / BPF_REG_SIZE;
dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
GFP_KERNEL);
if (!dst->stack)
return -ENOMEM;
dst->allocated_stack = src->allocated_stack;
return 0;
}
static int resize_reference_state(struct bpf_func_state *state, size_t n)
{
state->refs = realloc_array(state->refs, state->acquired_refs, n,
sizeof(struct bpf_reference_state));
if (!state->refs)
return -ENOMEM;
state->acquired_refs = n;
return 0;
}
static int grow_stack_state(struct bpf_func_state *state, int size)
{
size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
if (old_n >= n)
return 0;
state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
if (!state->stack)
return -ENOMEM;
state->allocated_stack = size;
return 0;
}
/* Acquire a pointer id from the env and update the state->refs to include
......@@ -825,7 +848,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
int new_ofs = state->acquired_refs;
int id, err;
err = realloc_reference_state(state, state->acquired_refs + 1, true);
err = resize_reference_state(state, state->acquired_refs + 1);
if (err)
return err;
id = ++env->id_gen;
......@@ -854,18 +877,6 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id)
return -EINVAL;
}
static int transfer_reference_state(struct bpf_func_state *dst,
struct bpf_func_state *src)
{
int err = realloc_reference_state(dst, src->acquired_refs, false);
if (err)
return err;
err = copy_reference_state(dst, src);
if (err)
return err;
return 0;
}
static void free_func_state(struct bpf_func_state *state)
{
if (!state)
......@@ -904,10 +915,6 @@ static int copy_func_state(struct bpf_func_state *dst,
{
int err;
err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
false);
if (err)
return err;
memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
err = copy_reference_state(dst, src);
if (err)
......@@ -919,16 +926,13 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
const struct bpf_verifier_state *src)
{
struct bpf_func_state *dst;
u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
int i, err;
if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
kfree(dst_state->jmp_history);
dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
GFP_USER);
if (!dst_state->jmp_history)
return -ENOMEM;
}
memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
dst_state->jmp_history_cnt = src->jmp_history_cnt;
/* if dst has more stack frames then src frame, free them */
......@@ -2590,8 +2594,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
struct bpf_reg_state *reg = NULL;
err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
state->acquired_refs, true);
err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
if (err)
return err;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
......@@ -2753,8 +2756,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
if (value_reg && register_is_null(value_reg))
writing_zero = true;
err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE),
state->acquired_refs, true);
err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
if (err)
return err;
......@@ -5629,7 +5631,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
subprog /* subprog number within this prog */);
/* Transfer references to the callee */
err = transfer_reference_state(callee, caller);
err = copy_reference_state(callee, caller);
if (err)
return err;
......@@ -5780,7 +5782,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
}
/* Transfer references to the caller */
err = transfer_reference_state(caller, callee);
err = copy_reference_state(caller, callee);
if (err)
return err;
......@@ -9746,13 +9748,6 @@ static bool range_within(struct bpf_reg_state *old,
old->s32_max_value >= cur->s32_max_value;
}
/* Maximum number of register states that can exist at once */
#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
struct idpair {
u32 old;
u32 cur;
};
/* If in the old state two registers had the same id, then they need to have
* the same id in the new state as well. But that id could be different from
* the old state, so we need to track the mapping from old to new ids.
......@@ -9763,11 +9758,11 @@ struct idpair {
* So we look through our idmap to see if this old id has been seen before. If
* so, we require the new id to match; otherwise, we add the id pair to the map.
*/
static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
{
unsigned int i;
for (i = 0; i < ID_MAP_SIZE; i++) {
for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
if (!idmap[i].old) {
/* Reached an empty slot; haven't seen this id before */
idmap[i].old = old_id;
......@@ -9880,7 +9875,7 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
/* Returns true if (rold safe implies rcur safe) */
static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
struct idpair *idmap)
struct bpf_id_pair *idmap)
{
bool equal;
......@@ -9998,7 +9993,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
static bool stacksafe(struct bpf_func_state *old,
struct bpf_func_state *cur,
struct idpair *idmap)
struct bpf_id_pair *idmap)
{
int i, spi;
......@@ -10095,32 +10090,23 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
* whereas register type in current state is meaningful, it means that
* the current state will reach 'bpf_exit' instruction safely
*/
static bool func_states_equal(struct bpf_func_state *old,
static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
struct bpf_func_state *cur)
{
struct idpair *idmap;
bool ret = false;
int i;
idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
/* If we failed to allocate the idmap, just say it's not safe */
if (!idmap)
memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
for (i = 0; i < MAX_BPF_REG; i++)
if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch))
return false;
for (i = 0; i < MAX_BPF_REG; i++) {
if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
goto out_free;
}
if (!stacksafe(old, cur, idmap))
goto out_free;
if (!stacksafe(old, cur, env->idmap_scratch))
return false;
if (!refsafe(old, cur))
goto out_free;
ret = true;
out_free:
kfree(idmap);
return ret;
return false;
return true;
}
static bool states_equal(struct bpf_verifier_env *env,
......@@ -10147,7 +10133,7 @@ static bool states_equal(struct bpf_verifier_env *env,
for (i = 0; i <= old->curframe; i++) {
if (old->frame[i]->callsite != cur->frame[i]->callsite)
return false;
if (!func_states_equal(old->frame[i], cur->frame[i]))
if (!func_states_equal(env, old->frame[i], cur->frame[i]))
return false;
}
return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment