Commit 2696e114 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2020-02-07

The following pull-request contains BPF updates for your *net* tree.

We've added 15 non-merge commits during the last 10 day(s) which contain
a total of 12 files changed, 114 insertions(+), 31 deletions(-).

The main changes are:

1) Various BPF sockmap fixes related to RCU handling in the map's tear-
   down code, from Jakub Sitnicki.

2) Fix macro state explosion in BPF sk_storage map when calculating its
   bucket_log on allocation, from Martin KaFai Lau.

3) Fix potential BPF sockmap update race by rechecking socket's established
   state under lock, from Lorenz Bauer.

4) Fix crash in bpftool on missing xlated instructions when kptr_restrict
   sysctl is set, from Toke Høiland-Jørgensen.

5) Fix i40e's XSK wakeup code to return proper error in busy state and
   various misc fixes in xdpsock BPF sample code, from Maciej Fijalkowski.

6) Fix the way modifiers are skipped in BTF in the verifier while walking
   pointers to avoid program rejection, from Alexei Starovoitov.

7) Fix Makefile for runqslower BPF tool to i) rebuild on libbpf changes and
   ii) to fix undefined reference linker errors for older gcc version due to
   order of passed gcc parameters, from Yulia Kartseva and Song Liu.

8) Fix a trampoline_count BPF kselftest warning about missing braces around
   initializer, from Andrii Nakryiko.

9) Fix up redundant "HAVE" prefix from large INSN limit kernel probe in
   bpftool, from Michal Rostecki.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents dfa7f709 88d6f130
...@@ -791,7 +791,7 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) ...@@ -791,7 +791,7 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
struct i40e_ring *ring; struct i40e_ring *ring;
if (test_bit(__I40E_CONFIG_BUSY, pf->state)) if (test_bit(__I40E_CONFIG_BUSY, pf->state))
return -ENETDOWN; return -EAGAIN;
if (test_bit(__I40E_VSI_DOWN, vsi->state)) if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN; return -ENETDOWN;
......
...@@ -728,7 +728,7 @@ struct bpf_struct_ops { ...@@ -728,7 +728,7 @@ struct bpf_struct_ops {
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
#define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id);
void bpf_struct_ops_init(struct btf *btf); void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log);
bool bpf_struct_ops_get(const void *kdata); bool bpf_struct_ops_get(const void *kdata);
void bpf_struct_ops_put(const void *kdata); void bpf_struct_ops_put(const void *kdata);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
...@@ -752,7 +752,10 @@ static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) ...@@ -752,7 +752,10 @@ static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
{ {
return NULL; return NULL;
} }
static inline void bpf_struct_ops_init(struct btf *btf) { } static inline void bpf_struct_ops_init(struct btf *btf,
struct bpf_verifier_log *log)
{
}
static inline bool bpf_try_module_get(const void *data, struct module *owner) static inline bool bpf_try_module_get(const void *data, struct module *owner)
{ {
return try_module_get(owner); return try_module_get(owner);
......
...@@ -96,12 +96,11 @@ const struct bpf_prog_ops bpf_struct_ops_prog_ops = { ...@@ -96,12 +96,11 @@ const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
static const struct btf_type *module_type; static const struct btf_type *module_type;
void bpf_struct_ops_init(struct btf *btf) void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log)
{ {
s32 type_id, value_id, module_id; s32 type_id, value_id, module_id;
const struct btf_member *member; const struct btf_member *member;
struct bpf_struct_ops *st_ops; struct bpf_struct_ops *st_ops;
struct bpf_verifier_log log = {};
const struct btf_type *t; const struct btf_type *t;
char value_name[128]; char value_name[128];
const char *mname; const char *mname;
...@@ -172,7 +171,7 @@ void bpf_struct_ops_init(struct btf *btf) ...@@ -172,7 +171,7 @@ void bpf_struct_ops_init(struct btf *btf)
member->type, member->type,
NULL); NULL);
if (func_proto && if (func_proto &&
btf_distill_func_proto(&log, btf, btf_distill_func_proto(log, btf,
func_proto, mname, func_proto, mname,
&st_ops->func_models[j])) { &st_ops->func_models[j])) {
pr_warn("Error in parsing func ptr %s in struct %s\n", pr_warn("Error in parsing func ptr %s in struct %s\n",
......
...@@ -3643,7 +3643,7 @@ struct btf *btf_parse_vmlinux(void) ...@@ -3643,7 +3643,7 @@ struct btf *btf_parse_vmlinux(void)
goto errout; goto errout;
} }
bpf_struct_ops_init(btf); bpf_struct_ops_init(btf, log);
btf_verifier_env_free(env); btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1); refcount_set(&btf->refcnt, 1);
...@@ -3931,6 +3931,7 @@ int btf_struct_access(struct bpf_verifier_log *log, ...@@ -3931,6 +3931,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
if (btf_type_is_ptr(mtype)) { if (btf_type_is_ptr(mtype)) {
const struct btf_type *stype; const struct btf_type *stype;
u32 id;
if (msize != size || off != moff) { if (msize != size || off != moff) {
bpf_log(log, bpf_log(log,
...@@ -3939,12 +3940,9 @@ int btf_struct_access(struct bpf_verifier_log *log, ...@@ -3939,12 +3940,9 @@ int btf_struct_access(struct bpf_verifier_log *log,
return -EACCES; return -EACCES;
} }
stype = btf_type_by_id(btf_vmlinux, mtype->type); stype = btf_type_skip_modifiers(btf_vmlinux, mtype->type, &id);
/* skip modifiers */
while (btf_type_is_modifier(stype))
stype = btf_type_by_id(btf_vmlinux, stype->type);
if (btf_type_is_struct(stype)) { if (btf_type_is_struct(stype)) {
*next_btf_id = mtype->type; *next_btf_id = id;
return PTR_TO_BTF_ID; return PTR_TO_BTF_ID;
} }
} }
......
...@@ -643,9 +643,10 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) ...@@ -643,9 +643,10 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr); bpf_map_init_from_attr(&smap->map, attr);
nbuckets = roundup_pow_of_two(num_possible_cpus());
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus()))); nbuckets = max_t(u32, 2, nbuckets);
nbuckets = 1U << smap->bucket_log; smap->bucket_log = ilog2(nbuckets);
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
ret = bpf_map_charge_init(&smap->map.memory, cost); ret = bpf_map_charge_init(&smap->map.memory, cost);
......
...@@ -234,7 +234,6 @@ static void sock_map_free(struct bpf_map *map) ...@@ -234,7 +234,6 @@ static void sock_map_free(struct bpf_map *map)
int i; int i;
synchronize_rcu(); synchronize_rcu();
rcu_read_lock();
raw_spin_lock_bh(&stab->lock); raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) { for (i = 0; i < stab->map.max_entries; i++) {
struct sock **psk = &stab->sks[i]; struct sock **psk = &stab->sks[i];
...@@ -243,13 +242,15 @@ static void sock_map_free(struct bpf_map *map) ...@@ -243,13 +242,15 @@ static void sock_map_free(struct bpf_map *map)
sk = xchg(psk, NULL); sk = xchg(psk, NULL);
if (sk) { if (sk) {
lock_sock(sk); lock_sock(sk);
rcu_read_lock();
sock_map_unref(sk, psk); sock_map_unref(sk, psk);
rcu_read_unlock();
release_sock(sk); release_sock(sk);
} }
} }
raw_spin_unlock_bh(&stab->lock); raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock();
/* wait for psock readers accessing its map link */
synchronize_rcu(); synchronize_rcu();
bpf_map_area_free(stab->sks); bpf_map_area_free(stab->sks);
...@@ -416,13 +417,15 @@ static int sock_map_update_elem(struct bpf_map *map, void *key, ...@@ -416,13 +417,15 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
if (!sock_map_sk_is_suitable(sk) || if (!sock_map_sk_is_suitable(sk)) {
sk->sk_state != TCP_ESTABLISHED) {
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
goto out; goto out;
} }
sock_map_sk_acquire(sk); sock_map_sk_acquire(sk);
if (sk->sk_state != TCP_ESTABLISHED)
ret = -EOPNOTSUPP;
else
ret = sock_map_update_common(map, idx, sk, flags); ret = sock_map_update_common(map, idx, sk, flags);
sock_map_sk_release(sk); sock_map_sk_release(sk);
out: out:
...@@ -739,13 +742,15 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key, ...@@ -739,13 +742,15 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key,
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
if (!sock_map_sk_is_suitable(sk) || if (!sock_map_sk_is_suitable(sk)) {
sk->sk_state != TCP_ESTABLISHED) {
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
goto out; goto out;
} }
sock_map_sk_acquire(sk); sock_map_sk_acquire(sk);
if (sk->sk_state != TCP_ESTABLISHED)
ret = -EOPNOTSUPP;
else
ret = sock_hash_update_common(map, key, sk, flags); ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk); sock_map_sk_release(sk);
out: out:
...@@ -859,19 +864,22 @@ static void sock_hash_free(struct bpf_map *map) ...@@ -859,19 +864,22 @@ static void sock_hash_free(struct bpf_map *map)
int i; int i;
synchronize_rcu(); synchronize_rcu();
rcu_read_lock();
for (i = 0; i < htab->buckets_num; i++) { for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i); bucket = sock_hash_select_bucket(htab, i);
raw_spin_lock_bh(&bucket->lock); raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) { hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node); hlist_del_rcu(&elem->node);
lock_sock(elem->sk); lock_sock(elem->sk);
rcu_read_lock();
sock_map_unref(elem->sk, elem); sock_map_unref(elem->sk, elem);
rcu_read_unlock();
release_sock(elem->sk); release_sock(elem->sk);
} }
raw_spin_unlock_bh(&bucket->lock); raw_spin_unlock_bh(&bucket->lock);
} }
rcu_read_unlock();
/* wait for psock readers accessing its map link */
synchronize_rcu();
bpf_map_area_free(htab->buckets); bpf_map_area_free(htab->buckets);
kfree(htab); kfree(htab);
......
...@@ -83,7 +83,6 @@ static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; ...@@ -83,7 +83,6 @@ static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
static u32 opt_umem_flags; static u32 opt_umem_flags;
static int opt_unaligned_chunks; static int opt_unaligned_chunks;
static int opt_mmap_flags; static int opt_mmap_flags;
static u32 opt_xdp_bind_flags;
static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static int opt_timeout = 1000; static int opt_timeout = 1000;
static bool opt_need_wakeup = true; static bool opt_need_wakeup = true;
...@@ -789,7 +788,8 @@ static void kick_tx(struct xsk_socket_info *xsk) ...@@ -789,7 +788,8 @@ static void kick_tx(struct xsk_socket_info *xsk)
int ret; int ret;
ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY) if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN ||
errno == EBUSY || errno == ENETDOWN)
return; return;
exit_with_error(errno); exit_with_error(errno);
} }
......
...@@ -580,7 +580,7 @@ probe_large_insn_limit(const char *define_prefix, __u32 ifindex) ...@@ -580,7 +580,7 @@ probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
res = bpf_probe_large_insn_limit(ifindex); res = bpf_probe_large_insn_limit(ifindex);
print_bool_feature("have_large_insn_limit", print_bool_feature("have_large_insn_limit",
"Large program size limit", "Large program size limit",
"HAVE_LARGE_INSN_LIMIT", "LARGE_INSN_LIMIT",
res, define_prefix); res, define_prefix);
} }
......
...@@ -536,7 +536,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, ...@@ -536,7 +536,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
buf = (unsigned char *)(info->jited_prog_insns); buf = (unsigned char *)(info->jited_prog_insns);
member_len = info->jited_prog_len; member_len = info->jited_prog_len;
} else { /* DUMP_XLATED */ } else { /* DUMP_XLATED */
if (info->xlated_prog_len == 0) { if (info->xlated_prog_len == 0 || !info->xlated_prog_insns) {
p_err("error retrieving insn dump: kernel.kptr_restrict set?"); p_err("error retrieving insn dump: kernel.kptr_restrict set?");
return -1; return -1;
} }
......
...@@ -41,7 +41,7 @@ clean: ...@@ -41,7 +41,7 @@ clean:
$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
$(call msg,BINARY,$@) $(call msg,BINARY,$@)
$(Q)$(CC) $(CFLAGS) -lelf -lz $^ -o $@ $(Q)$(CC) $(CFLAGS) $^ -lelf -lz -o $@
$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \
$(OUTPUT)/runqslower.bpf.o $(OUTPUT)/runqslower.bpf.o
...@@ -75,7 +75,7 @@ $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) ...@@ -75,7 +75,7 @@ $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
fi fi
$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
$(BPFOBJ): | $(OUTPUT) $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \
OUTPUT=$(abspath $(dir $@))/ $(abspath $@) OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
......
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
#include "test_progs.h"
static int connected_socket_v4(void)
{
struct sockaddr_in addr = {
.sin_family = AF_INET,
.sin_port = htons(80),
.sin_addr = { inet_addr("127.0.0.1") },
};
socklen_t len = sizeof(addr);
int s, repair, err;
s = socket(AF_INET, SOCK_STREAM, 0);
if (CHECK_FAIL(s == -1))
goto error;
repair = TCP_REPAIR_ON;
err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
if (CHECK_FAIL(err))
goto error;
err = connect(s, (struct sockaddr *)&addr, len);
if (CHECK_FAIL(err))
goto error;
repair = TCP_REPAIR_OFF_NO_WP;
err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
if (CHECK_FAIL(err))
goto error;
return s;
error:
perror(__func__);
close(s);
return -1;
}
/* Create a map, populate it with one socket, and free the map. */
static void test_sockmap_create_update_free(enum bpf_map_type map_type)
{
const int zero = 0;
int s, map, err;
s = connected_socket_v4();
if (CHECK_FAIL(s == -1))
return;
map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
if (CHECK_FAIL(map == -1)) {
perror("bpf_create_map");
goto out;
}
err = bpf_map_update_elem(map, &zero, &s, BPF_NOEXIST);
if (CHECK_FAIL(err)) {
perror("bpf_map_update");
goto out;
}
out:
close(map);
close(s);
}
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
}
...@@ -46,7 +46,7 @@ void test_trampoline_count(void) ...@@ -46,7 +46,7 @@ void test_trampoline_count(void)
const char *fentry_name = "fentry/__set_task_comm"; const char *fentry_name = "fentry/__set_task_comm";
const char *fexit_name = "fexit/__set_task_comm"; const char *fexit_name = "fexit/__set_task_comm";
const char *object = "test_trampoline_count.o"; const char *object = "test_trampoline_count.o";
struct inst inst[MAX_TRAMP_PROGS] = { 0 }; struct inst inst[MAX_TRAMP_PROGS] = {};
int err, i = 0, duration = 0; int err, i = 0, duration = 0;
struct bpf_object *obj; struct bpf_object *obj;
struct bpf_link *link; struct bpf_link *link;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment