Commit 8223967f authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'fix-lpm-map'

Yonghong Song says:

====================
A kernel page fault which happens in lpm map trie_get_next_key is reported
by syzbot and Eric. The issue was introduced by commit b471f2f1
("bpf: implement MAP_GET_NEXT_KEY command for LPM_TRIE map").
Patch #1 fixed the issue in the kernel and patch #2 adds a multithreaded
test case in tools/testing/selftests/bpf/test_lpm_map.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 1651e39e af32efee
...@@ -593,11 +593,10 @@ static void trie_free(struct bpf_map *map) ...@@ -593,11 +593,10 @@ static void trie_free(struct bpf_map *map)
static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
{ {
struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root;
struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct bpf_lpm_trie_key *key = _key, *next_key = _next_key; struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
struct lpm_trie_node *node, *next_node = NULL, *parent;
struct lpm_trie_node **node_stack = NULL; struct lpm_trie_node **node_stack = NULL;
struct lpm_trie_node __rcu **root;
int err = 0, stack_ptr = -1; int err = 0, stack_ptr = -1;
unsigned int next_bit; unsigned int next_bit;
size_t matchlen; size_t matchlen;
...@@ -614,14 +613,13 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) ...@@ -614,14 +613,13 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
*/ */
/* Empty trie */ /* Empty trie */
if (!rcu_dereference(trie->root)) search_root = rcu_dereference(trie->root);
if (!search_root)
return -ENOENT; return -ENOENT;
/* For invalid key, find the leftmost node in the trie */ /* For invalid key, find the leftmost node in the trie */
if (!key || key->prefixlen > trie->max_prefixlen) { if (!key || key->prefixlen > trie->max_prefixlen)
root = &trie->root;
goto find_leftmost; goto find_leftmost;
}
node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *), node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *),
GFP_ATOMIC | __GFP_NOWARN); GFP_ATOMIC | __GFP_NOWARN);
...@@ -629,7 +627,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) ...@@ -629,7 +627,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
return -ENOMEM; return -ENOMEM;
/* Try to find the exact node for the given key */ /* Try to find the exact node for the given key */
for (node = rcu_dereference(trie->root); node;) { for (node = search_root; node;) {
node_stack[++stack_ptr] = node; node_stack[++stack_ptr] = node;
matchlen = longest_prefix_match(trie, node, key); matchlen = longest_prefix_match(trie, node, key);
if (node->prefixlen != matchlen || if (node->prefixlen != matchlen ||
...@@ -640,10 +638,8 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) ...@@ -640,10 +638,8 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
node = rcu_dereference(node->child[next_bit]); node = rcu_dereference(node->child[next_bit]);
} }
if (!node || node->prefixlen != key->prefixlen || if (!node || node->prefixlen != key->prefixlen ||
(node->flags & LPM_TREE_NODE_FLAG_IM)) { (node->flags & LPM_TREE_NODE_FLAG_IM))
root = &trie->root;
goto find_leftmost; goto find_leftmost;
}
/* The node with the exactly-matching key has been found, /* The node with the exactly-matching key has been found,
* find the first node in postorder after the matched node. * find the first node in postorder after the matched node.
...@@ -651,10 +647,10 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) ...@@ -651,10 +647,10 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
node = node_stack[stack_ptr]; node = node_stack[stack_ptr];
while (stack_ptr > 0) { while (stack_ptr > 0) {
parent = node_stack[stack_ptr - 1]; parent = node_stack[stack_ptr - 1];
if (rcu_dereference(parent->child[0]) == node && if (rcu_dereference(parent->child[0]) == node) {
rcu_dereference(parent->child[1])) { search_root = rcu_dereference(parent->child[1]);
root = &parent->child[1]; if (search_root)
goto find_leftmost; goto find_leftmost;
} }
if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) { if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) {
next_node = parent; next_node = parent;
...@@ -673,7 +669,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) ...@@ -673,7 +669,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
/* Find the leftmost non-intermediate node, all intermediate nodes /* Find the leftmost non-intermediate node, all intermediate nodes
* have exact two children, so this function will never return NULL. * have exact two children, so this function will never return NULL.
*/ */
for (node = rcu_dereference(*root); node;) { for (node = search_root; node;) {
if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
next_node = node; next_node = node;
node = rcu_dereference(node->child[0]); node = rcu_dereference(node->child[0]);
......
...@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),) ...@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
endif endif
CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
LDLIBS += -lcap -lelf -lrt LDLIBS += -lcap -lelf -lrt -lpthread
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <errno.h> #include <errno.h>
#include <inttypes.h> #include <inttypes.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <pthread.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
...@@ -641,6 +642,98 @@ static void test_lpm_get_next_key(void) ...@@ -641,6 +642,98 @@ static void test_lpm_get_next_key(void)
close(map_fd); close(map_fd);
} }
#define MAX_TEST_KEYS 4
struct lpm_mt_test_info {
int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */
int iter;
int map_fd;
struct {
__u32 prefixlen;
__u32 data;
} key[MAX_TEST_KEYS];
};
static void *lpm_test_command(void *arg)
{
int i, j, ret, iter, key_size;
struct lpm_mt_test_info *info = arg;
struct bpf_lpm_trie_key *key_p;
key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
key_p = alloca(key_size);
for (iter = 0; iter < info->iter; iter++)
for (i = 0; i < MAX_TEST_KEYS; i++) {
/* first half of iterations in forward order,
* and second half in backward order.
*/
j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1;
key_p->prefixlen = info->key[j].prefixlen;
memcpy(key_p->data, &info->key[j].data, sizeof(__u32));
if (info->cmd == 0) {
__u32 value = j;
/* update must succeed */
assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0);
} else if (info->cmd == 1) {
ret = bpf_map_delete_elem(info->map_fd, key_p);
assert(ret == 0 || errno == ENOENT);
} else if (info->cmd == 2) {
__u32 value;
ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
assert(ret == 0 || errno == ENOENT);
} else {
struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
}
}
// Pass successful exit info back to the main thread
pthread_exit((void *)info);
}
static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
{
info->iter = 2000;
info->map_fd = map_fd;
info->key[0].prefixlen = 16;
inet_pton(AF_INET, "192.168.0.0", &info->key[0].data);
info->key[1].prefixlen = 24;
inet_pton(AF_INET, "192.168.0.0", &info->key[1].data);
info->key[2].prefixlen = 24;
inet_pton(AF_INET, "192.168.128.0", &info->key[2].data);
info->key[3].prefixlen = 24;
inet_pton(AF_INET, "192.168.1.0", &info->key[3].data);
}
static void test_lpm_multi_thread(void)
{
struct lpm_mt_test_info info[4];
size_t key_size, value_size;
pthread_t thread_id[4];
int i, map_fd;
void *ret;
/* create a trie */
value_size = sizeof(__u32);
key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
100, BPF_F_NO_PREALLOC);
/* create 4 threads to test update, delete, lookup and get_next_key */
setup_lpm_mt_test_info(&info[0], map_fd);
for (i = 0; i < 4; i++) {
if (i != 0)
memcpy(&info[i], &info[0], sizeof(info[i]));
info[i].cmd = i;
assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0);
}
for (i = 0; i < 4; i++)
assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]);
close(map_fd);
}
int main(void) int main(void)
{ {
struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
...@@ -667,6 +760,8 @@ int main(void) ...@@ -667,6 +760,8 @@ int main(void)
test_lpm_get_next_key(); test_lpm_get_next_key();
test_lpm_multi_thread();
printf("test_lpm: OK\n"); printf("test_lpm: OK\n");
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment