Commit a9bfac14 authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Greg Kroah-Hartman

bpf: prevent out-of-bounds speculation

commit b2157399 upstream.

Under speculation, CPUs may mis-predict branches in bounds checks. Thus,
memory accesses under a bounds check may be speculated even if the
bounds check fails, providing a primitive for building a side channel.

To avoid leaking kernel data round up array-based maps and mask the index
after bounds check, so speculated load with out of bounds index will load
either valid value from the array or zero from the padded area.

Unconditionally mask index for all array types even when max_entries
are not rounded to power of 2 for root user.
When map is created by unpriv user generate a sequence of bpf insns
that includes AND operation to make sure that JITed code includes
the same 'index & index_mask' operation.

If prog_array map is created by unpriv user replace
  bpf_tail_call(ctx, map, index);
with
  if (index >= max_entries) {
    index &= map->index_mask;
    bpf_tail_call(ctx, map, index);
  }
(along with roundup to power 2) to prevent out-of-bounds speculation.
There is secondary redundant 'if (index >= max_entries)' in the interpreter
and in all JITs, but they can be optimized later if necessary.

Other array-like maps (cpumap, devmap, sockmap, perf_event_array, cgroup_array)
cannot be used by unpriv, so no changes there.

That fixes bpf side of "Variant 1: bounds check bypass (CVE-2017-5753)" on
all architectures with and without JIT.

v2->v3:
Daniel noticed that attack potentially can be crafted via syscall commands
without loading the program, so add masking to those paths as well.
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Slaby <jslaby@suse.cz>
[ Backported to 4.9 - gregkh ]
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent f55093dc
...@@ -43,6 +43,7 @@ struct bpf_map { ...@@ -43,6 +43,7 @@ struct bpf_map {
u32 max_entries; u32 max_entries;
u32 map_flags; u32 map_flags;
u32 pages; u32 pages;
bool unpriv_array;
struct user_struct *user; struct user_struct *user;
const struct bpf_map_ops *ops; const struct bpf_map_ops *ops;
struct work_struct work; struct work_struct work;
...@@ -189,6 +190,7 @@ struct bpf_prog_aux { ...@@ -189,6 +190,7 @@ struct bpf_prog_aux {
struct bpf_array { struct bpf_array {
struct bpf_map map; struct bpf_map map;
u32 elem_size; u32 elem_size;
u32 index_mask;
/* 'ownership' of prog_array is claimed by the first program that /* 'ownership' of prog_array is claimed by the first program that
* is going to use this map or by the first program which FD is stored * is going to use this map or by the first program which FD is stored
* in the map to make sure that all callers and callees have the same * in the map to make sure that all callers and callees have the same
......
...@@ -67,7 +67,10 @@ struct bpf_verifier_state_list { ...@@ -67,7 +67,10 @@ struct bpf_verifier_state_list {
}; };
struct bpf_insn_aux_data { struct bpf_insn_aux_data {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ union {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
};
bool seen; /* this insn was processed by the verifier */ bool seen; /* this insn was processed by the verifier */
}; };
......
...@@ -46,9 +46,10 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) ...@@ -46,9 +46,10 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
static struct bpf_map *array_map_alloc(union bpf_attr *attr) static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{ {
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array; struct bpf_array *array;
u64 array_size; u64 array_size;
u32 elem_size;
/* check sanity of attributes */ /* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 || if (attr->max_entries == 0 || attr->key_size != 4 ||
...@@ -63,11 +64,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -63,11 +64,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
elem_size = round_up(attr->value_size, 8); elem_size = round_up(attr->value_size, 8);
max_entries = attr->max_entries;
index_mask = roundup_pow_of_two(max_entries) - 1;
if (unpriv)
/* round up array size to nearest power of 2,
* since cpu will speculate within index_mask limits
*/
max_entries = index_mask + 1;
array_size = sizeof(*array); array_size = sizeof(*array);
if (percpu) if (percpu)
array_size += (u64) attr->max_entries * sizeof(void *); array_size += (u64) max_entries * sizeof(void *);
else else
array_size += (u64) attr->max_entries * elem_size; array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */ /* make sure there is no u32 overflow later in round_up() */
if (array_size >= U32_MAX - PAGE_SIZE) if (array_size >= U32_MAX - PAGE_SIZE)
...@@ -77,6 +87,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -77,6 +87,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array = bpf_map_area_alloc(array_size); array = bpf_map_area_alloc(array_size);
if (!array) if (!array)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */ /* copy mandatory map attributes */
array->map.map_type = attr->map_type; array->map.map_type = attr->map_type;
...@@ -110,7 +122,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -110,7 +122,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries)) if (unlikely(index >= array->map.max_entries))
return NULL; return NULL;
return array->value + array->elem_size * index; return array->value + array->elem_size * (index & array->index_mask);
} }
/* Called from eBPF program */ /* Called from eBPF program */
...@@ -122,7 +134,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -122,7 +134,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries)) if (unlikely(index >= array->map.max_entries))
return NULL; return NULL;
return this_cpu_ptr(array->pptrs[index]); return this_cpu_ptr(array->pptrs[index & array->index_mask]);
} }
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
...@@ -142,7 +154,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) ...@@ -142,7 +154,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
*/ */
size = round_up(map->value_size, 8); size = round_up(map->value_size, 8);
rcu_read_lock(); rcu_read_lock();
pptr = array->pptrs[index]; pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size; off += size;
...@@ -190,10 +202,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -190,10 +202,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EEXIST; return -EEXIST;
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
memcpy(this_cpu_ptr(array->pptrs[index]), memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size); value, map->value_size);
else else
memcpy(array->value + array->elem_size * index, memcpy(array->value +
array->elem_size * (index & array->index_mask),
value, map->value_size); value, map->value_size);
return 0; return 0;
} }
...@@ -227,7 +240,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, ...@@ -227,7 +240,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
*/ */
size = round_up(map->value_size, 8); size = round_up(map->value_size, 8);
rcu_read_lock(); rcu_read_lock();
pptr = array->pptrs[index]; pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size; off += size;
......
...@@ -1187,7 +1187,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) ...@@ -1187,7 +1187,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
} }
} }
static int check_call(struct bpf_verifier_env *env, int func_id) static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{ {
struct bpf_verifier_state *state = &env->cur_state; struct bpf_verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL; const struct bpf_func_proto *fn = NULL;
...@@ -1238,6 +1238,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id) ...@@ -1238,6 +1238,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
if (err) if (err)
return err; return err;
if (func_id == BPF_FUNC_tail_call) {
if (meta.map_ptr == NULL) {
verbose("verifier bug\n");
return -EINVAL;
}
env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
}
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
if (err) if (err)
return err; return err;
...@@ -3019,7 +3026,7 @@ static int do_check(struct bpf_verifier_env *env) ...@@ -3019,7 +3026,7 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL; return -EINVAL;
} }
err = check_call(env, insn->imm); err = check_call(env, insn->imm, insn_idx);
if (err) if (err)
return err; return err;
...@@ -3372,7 +3379,11 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) ...@@ -3372,7 +3379,11 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
struct bpf_insn *insn = prog->insnsi; struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn; const struct bpf_func_proto *fn;
const int insn_cnt = prog->len; const int insn_cnt = prog->len;
int i; struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
int i, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) { for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code != (BPF_JMP | BPF_CALL)) if (insn->code != (BPF_JMP | BPF_CALL))
...@@ -3390,6 +3401,31 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) ...@@ -3390,6 +3401,31 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
*/ */
insn->imm = 0; insn->imm = 0;
insn->code |= BPF_X; insn->code |= BPF_X;
/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
* if (index >= max_entries) goto out;
* index &= array->index_mask;
* to avoid out-of-bounds cpu speculation
*/
map_ptr = env->insn_aux_data[i + delta].map_ptr;
if (!map_ptr->unpriv_array)
continue;
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
map_ptr->max_entries, 2);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
container_of(map_ptr,
struct bpf_array,
map)->index_mask);
insn_buf[2] = *insn;
cnt = 3;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue; continue;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment