Commit 8ac2c867 authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-per-cpu-maps'

Alexei Starovoitov says:

====================
bpf: introduce per-cpu maps

We've started to use bpf to trace every packet and atomic add
instruction (event JITed) started to show up in perf profile.
The solution is to do per-cpu counters.
For PERCPU_(HASH|ARRAY) map the existing bpf_map_lookup() helper
returns per-cpu area which bpf programs can use to store and
increment the counters. The BPF_MAP_LOOKUP_ELEM syscall command
returns areas from all cpus and user process aggregates the counters.
The usage example is in patch 6. The api turned out to be very
easy to use from bpf program and from user space.
Long term we were discussing to add 'bounded loop' instruction,
so bpf programs can do aggregation within the program which may
help some use cases. Right now user space aggregation of
per-cpu counters fits the best.

This patch set is new approach for per-cpu hash and array maps.
I've reused the map tests written by Martin and Ming, but
implementation and api is new. Old discussion here:
http://thread.gmane.org/gmane.linux.kernel/2123800/focus=2126435
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ba905f5e 3059303f
...@@ -151,6 +151,7 @@ struct bpf_array { ...@@ -151,6 +151,7 @@ struct bpf_array {
union { union {
char value[0] __aligned(8); char value[0] __aligned(8);
void *ptrs[0] __aligned(8); void *ptrs[0] __aligned(8);
void __percpu *pptrs[0] __aligned(8);
}; };
}; };
#define MAX_TAIL_CALL_CNT 32 #define MAX_TAIL_CALL_CNT 32
...@@ -182,6 +183,29 @@ int bpf_prog_new_fd(struct bpf_prog *prog); ...@@ -182,6 +183,29 @@ int bpf_prog_new_fd(struct bpf_prog *prog);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname); int bpf_obj_get_user(const char __user *pathname);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 flags);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
* Best-effort only. No barriers here, since it _will_ race with concurrent
* updates from BPF programs. Called from bpf syscall and mostly used with
* size 8 or 16 bytes, so ask compiler to inline it.
*/
static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
{
const long *lsrc = src;
long *ldst = dst;
size /= sizeof(long);
while (size--)
*ldst++ = *lsrc++;
}
/* verify correctness of eBPF program */ /* verify correctness of eBPF program */
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
#else #else
......
...@@ -81,6 +81,8 @@ enum bpf_map_type { ...@@ -81,6 +81,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_ARRAY,
BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY,
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
}; };
enum bpf_prog_type { enum bpf_prog_type {
......
...@@ -17,11 +17,39 @@ ...@@ -17,11 +17,39 @@
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
static void bpf_array_free_percpu(struct bpf_array *array)
{
int i;
for (i = 0; i < array->map.max_entries; i++)
free_percpu(array->pptrs[i]);
}
static int bpf_array_alloc_percpu(struct bpf_array *array)
{
void __percpu *ptr;
int i;
for (i = 0; i < array->map.max_entries; i++) {
ptr = __alloc_percpu_gfp(array->elem_size, 8,
GFP_USER | __GFP_NOWARN);
if (!ptr) {
bpf_array_free_percpu(array);
return -ENOMEM;
}
array->pptrs[i] = ptr;
}
return 0;
}
/* Called from syscall */ /* Called from syscall */
static struct bpf_map *array_map_alloc(union bpf_attr *attr) static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{ {
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
struct bpf_array *array; struct bpf_array *array;
u32 elem_size, array_size; u64 array_size;
u32 elem_size;
/* check sanity of attributes */ /* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 || if (attr->max_entries == 0 || attr->key_size != 4 ||
...@@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
elem_size = round_up(attr->value_size, 8); elem_size = round_up(attr->value_size, 8);
/* check round_up into zero and u32 overflow */ array_size = sizeof(*array);
if (elem_size == 0 || if (percpu)
attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) array_size += (u64) attr->max_entries * sizeof(void *);
else
array_size += (u64) attr->max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
if (array_size >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
array_size = sizeof(*array) + attr->max_entries * elem_size;
/* allocate all map elements and zero-initialize them */ /* allocate all map elements and zero-initialize them */
array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
...@@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
} }
/* copy mandatory map attributes */ /* copy mandatory map attributes */
array->map.map_type = attr->map_type;
array->map.key_size = attr->key_size; array->map.key_size = attr->key_size;
array->map.value_size = attr->value_size; array->map.value_size = attr->value_size;
array->map.max_entries = attr->max_entries; array->map.max_entries = attr->max_entries;
array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
array->elem_size = elem_size; array->elem_size = elem_size;
if (!percpu)
goto out;
array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
if (array_size >= U32_MAX - PAGE_SIZE ||
elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
kvfree(array);
return ERR_PTR(-ENOMEM);
}
out:
array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
return &array->map; return &array->map;
} }
...@@ -67,12 +112,50 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -67,12 +112,50 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key; u32 index = *(u32 *)key;
if (index >= array->map.max_entries) if (unlikely(index >= array->map.max_entries))
return NULL; return NULL;
return array->value + array->elem_size * index; return array->value + array->elem_size * index;
} }
/* Called from eBPF program */
static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
if (unlikely(index >= array->map.max_entries))
return NULL;
return this_cpu_ptr(array->pptrs[index]);
}
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
void __percpu *pptr;
int cpu, off = 0;
u32 size;
if (unlikely(index >= array->map.max_entries))
return -ENOENT;
/* per_cpu areas are zero-filled and bpf programs can only
* access 'value_size' of them, so copying rounded areas
* will not leak any kernel data
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size;
}
rcu_read_unlock();
return 0;
}
/* Called from syscall */ /* Called from syscall */
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{ {
...@@ -99,19 +182,62 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -99,19 +182,62 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key; u32 index = *(u32 *)key;
if (map_flags > BPF_EXIST) if (unlikely(map_flags > BPF_EXIST))
/* unknown flags */ /* unknown flags */
return -EINVAL; return -EINVAL;
if (index >= array->map.max_entries) if (unlikely(index >= array->map.max_entries))
/* all elements were pre-allocated, cannot insert a new one */ /* all elements were pre-allocated, cannot insert a new one */
return -E2BIG; return -E2BIG;
if (map_flags == BPF_NOEXIST) if (unlikely(map_flags == BPF_NOEXIST))
/* all elements already exist */ /* all elements already exist */
return -EEXIST; return -EEXIST;
memcpy(array->value + array->elem_size * index, value, map->value_size); if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
memcpy(this_cpu_ptr(array->pptrs[index]),
value, map->value_size);
else
memcpy(array->value + array->elem_size * index,
value, map->value_size);
return 0;
}
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
void __percpu *pptr;
int cpu, off = 0;
u32 size;
if (unlikely(map_flags > BPF_EXIST))
/* unknown flags */
return -EINVAL;
if (unlikely(index >= array->map.max_entries))
/* all elements were pre-allocated, cannot insert a new one */
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
/* all elements already exist */
return -EEXIST;
/* the user space will provide round_up(value_size, 8) bytes that
* will be copied into per-cpu area. bpf programs can only access
* value_size of it. During lookup the same extra bytes will be
* returned or zeros which were zero-filled by percpu_alloc,
* so no kernel data leaks possible
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size;
}
rcu_read_unlock();
return 0; return 0;
} }
...@@ -133,6 +259,9 @@ static void array_map_free(struct bpf_map *map) ...@@ -133,6 +259,9 @@ static void array_map_free(struct bpf_map *map)
*/ */
synchronize_rcu(); synchronize_rcu();
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
bpf_array_free_percpu(array);
kvfree(array); kvfree(array);
} }
...@@ -150,9 +279,24 @@ static struct bpf_map_type_list array_type __read_mostly = { ...@@ -150,9 +279,24 @@ static struct bpf_map_type_list array_type __read_mostly = {
.type = BPF_MAP_TYPE_ARRAY, .type = BPF_MAP_TYPE_ARRAY,
}; };
static const struct bpf_map_ops percpu_array_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = percpu_array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
};
static struct bpf_map_type_list percpu_array_type __read_mostly = {
.ops = &percpu_array_ops,
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
};
static int __init register_array_map(void) static int __init register_array_map(void)
{ {
bpf_register_map_type(&array_type); bpf_register_map_type(&array_type);
bpf_register_map_type(&percpu_array_type);
return 0; return 0;
} }
late_initcall(register_array_map); late_initcall(register_array_map);
......
This diff is collapsed.
...@@ -239,6 +239,7 @@ static int map_lookup_elem(union bpf_attr *attr) ...@@ -239,6 +239,7 @@ static int map_lookup_elem(union bpf_attr *attr)
int ufd = attr->map_fd; int ufd = attr->map_fd;
struct bpf_map *map; struct bpf_map *map;
void *key, *value, *ptr; void *key, *value, *ptr;
u32 value_size;
struct fd f; struct fd f;
int err; int err;
...@@ -259,23 +260,35 @@ static int map_lookup_elem(union bpf_attr *attr) ...@@ -259,23 +260,35 @@ static int map_lookup_elem(union bpf_attr *attr)
if (copy_from_user(key, ukey, map->key_size) != 0) if (copy_from_user(key, ukey, map->key_size) != 0)
goto free_key; goto free_key;
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
value_size = round_up(map->value_size, 8) * num_possible_cpus();
else
value_size = map->value_size;
err = -ENOMEM; err = -ENOMEM;
value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value) if (!value)
goto free_key; goto free_key;
rcu_read_lock(); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
ptr = map->ops->map_lookup_elem(map, key); err = bpf_percpu_hash_copy(map, key, value);
if (ptr) } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
memcpy(value, ptr, map->value_size); err = bpf_percpu_array_copy(map, key, value);
rcu_read_unlock(); } else {
rcu_read_lock();
ptr = map->ops->map_lookup_elem(map, key);
if (ptr)
memcpy(value, ptr, value_size);
rcu_read_unlock();
err = ptr ? 0 : -ENOENT;
}
err = -ENOENT; if (err)
if (!ptr)
goto free_value; goto free_value;
err = -EFAULT; err = -EFAULT;
if (copy_to_user(uvalue, value, map->value_size) != 0) if (copy_to_user(uvalue, value, value_size) != 0)
goto free_value; goto free_value;
err = 0; err = 0;
...@@ -298,6 +311,7 @@ static int map_update_elem(union bpf_attr *attr) ...@@ -298,6 +311,7 @@ static int map_update_elem(union bpf_attr *attr)
int ufd = attr->map_fd; int ufd = attr->map_fd;
struct bpf_map *map; struct bpf_map *map;
void *key, *value; void *key, *value;
u32 value_size;
struct fd f; struct fd f;
int err; int err;
...@@ -318,21 +332,30 @@ static int map_update_elem(union bpf_attr *attr) ...@@ -318,21 +332,30 @@ static int map_update_elem(union bpf_attr *attr)
if (copy_from_user(key, ukey, map->key_size) != 0) if (copy_from_user(key, ukey, map->key_size) != 0)
goto free_key; goto free_key;
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
value_size = round_up(map->value_size, 8) * num_possible_cpus();
else
value_size = map->value_size;
err = -ENOMEM; err = -ENOMEM;
value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value) if (!value)
goto free_key; goto free_key;
err = -EFAULT; err = -EFAULT;
if (copy_from_user(value, uvalue, map->value_size) != 0) if (copy_from_user(value, uvalue, value_size) != 0)
goto free_value; goto free_value;
/* eBPF program that use maps are running under rcu_read_lock(), if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
* therefore all map accessors rely on this fact, so do the same here err = bpf_percpu_hash_update(map, key, value, attr->flags);
*/ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
rcu_read_lock(); err = bpf_percpu_array_update(map, key, value, attr->flags);
err = map->ops->map_update_elem(map, key, value, attr->flags); } else {
rcu_read_unlock(); rcu_read_lock();
err = map->ops->map_update_elem(map, key, value, attr->flags);
rcu_read_unlock();
}
free_value: free_value:
kfree(value); kfree(value);
......
...@@ -89,6 +89,100 @@ static void test_hashmap_sanity(int i, void *data) ...@@ -89,6 +89,100 @@ static void test_hashmap_sanity(int i, void *data)
close(map_fd); close(map_fd);
} }
/* sanity tests for percpu map API */
static void test_percpu_hashmap_sanity(int task, void *data)
{
long long key, next_key;
int expected_key_mask = 0;
unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
long long value[nr_cpus];
int map_fd, i;
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
sizeof(value[0]), 2);
if (map_fd < 0) {
printf("failed to create hashmap '%s'\n", strerror(errno));
exit(1);
}
for (i = 0; i < nr_cpus; i++)
value[i] = i + 100;
key = 1;
/* insert key=1 element */
assert(!(expected_key_mask & key));
assert(bpf_update_elem(map_fd, &key, value, BPF_ANY) == 0);
expected_key_mask |= key;
/* BPF_NOEXIST means: add new element if it doesn't exist */
assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 &&
/* key=1 already exists */
errno == EEXIST);
/* -1 is an invalid flag */
assert(bpf_update_elem(map_fd, &key, value, -1) == -1 &&
errno == EINVAL);
/* check that key=1 can be found. value could be 0 if the lookup
* was run from a different cpu.
*/
value[0] = 1;
assert(bpf_lookup_elem(map_fd, &key, value) == 0 && value[0] == 100);
key = 2;
/* check that key=2 is not found */
assert(bpf_lookup_elem(map_fd, &key, value) == -1 && errno == ENOENT);
/* BPF_EXIST means: update existing element */
assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == -1 &&
/* key=2 is not there */
errno == ENOENT);
/* insert key=2 element */
assert(!(expected_key_mask & key));
assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
expected_key_mask |= key;
/* key=1 and key=2 were inserted, check that key=0 cannot be inserted
* due to max_entries limit
*/
key = 0;
assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
/* check that key = 0 doesn't exist */
assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
/* iterate over two elements */
while (!bpf_get_next_key(map_fd, &key, &next_key)) {
assert((expected_key_mask & next_key) == next_key);
expected_key_mask &= ~next_key;
assert(bpf_lookup_elem(map_fd, &next_key, value) == 0);
for (i = 0; i < nr_cpus; i++)
assert(value[i] == i + 100);
key = next_key;
}
assert(errno == ENOENT);
/* Update with BPF_EXIST */
key = 1;
assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == 0);
/* delete both elements */
key = 1;
assert(bpf_delete_elem(map_fd, &key) == 0);
key = 2;
assert(bpf_delete_elem(map_fd, &key) == 0);
assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
key = 0;
/* check that map is empty */
assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 &&
errno == ENOENT);
close(map_fd);
}
static void test_arraymap_sanity(int i, void *data) static void test_arraymap_sanity(int i, void *data)
{ {
int key, next_key, map_fd; int key, next_key, map_fd;
...@@ -142,6 +236,94 @@ static void test_arraymap_sanity(int i, void *data) ...@@ -142,6 +236,94 @@ static void test_arraymap_sanity(int i, void *data)
close(map_fd); close(map_fd);
} }
static void test_percpu_arraymap_many_keys(void)
{
unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
unsigned nr_keys = 20000;
long values[nr_cpus];
int key, map_fd, i;
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
sizeof(values[0]), nr_keys);
if (map_fd < 0) {
printf("failed to create per-cpu arraymap '%s'\n",
strerror(errno));
exit(1);
}
for (i = 0; i < nr_cpus; i++)
values[i] = i + 10;
for (key = 0; key < nr_keys; key++)
assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0);
for (key = 0; key < nr_keys; key++) {
for (i = 0; i < nr_cpus; i++)
values[i] = 0;
assert(bpf_lookup_elem(map_fd, &key, values) == 0);
for (i = 0; i < nr_cpus; i++)
assert(values[i] == i + 10);
}
close(map_fd);
}
static void test_percpu_arraymap_sanity(int i, void *data)
{
unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
long values[nr_cpus];
int key, next_key, map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
sizeof(values[0]), 2);
if (map_fd < 0) {
printf("failed to create arraymap '%s'\n", strerror(errno));
exit(1);
}
for (i = 0; i < nr_cpus; i++)
values[i] = i + 100;
key = 1;
/* insert key=1 element */
assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0);
values[0] = 0;
assert(bpf_update_elem(map_fd, &key, values, BPF_NOEXIST) == -1 &&
errno == EEXIST);
/* check that key=1 can be found */
assert(bpf_lookup_elem(map_fd, &key, values) == 0 && values[0] == 100);
key = 0;
/* check that key=0 is also found and zero initialized */
assert(bpf_lookup_elem(map_fd, &key, values) == 0 &&
values[0] == 0 && values[nr_cpus - 1] == 0);
/* check that key=2 cannot be inserted due to max_entries limit */
key = 2;
assert(bpf_update_elem(map_fd, &key, values, BPF_EXIST) == -1 &&
errno == E2BIG);
/* check that key = 2 doesn't exist */
assert(bpf_lookup_elem(map_fd, &key, values) == -1 && errno == ENOENT);
/* iterate over two elements */
assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
next_key == 0);
assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 &&
next_key == 1);
assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&
errno == ENOENT);
/* delete shouldn't succeed */
key = 1;
assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL);
close(map_fd);
}
#define MAP_SIZE (32 * 1024) #define MAP_SIZE (32 * 1024)
static void test_map_large(void) static void test_map_large(void)
{ {
...@@ -209,7 +391,9 @@ static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data) ...@@ -209,7 +391,9 @@ static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data)
static void test_map_stress(void) static void test_map_stress(void)
{ {
run_parallel(100, test_hashmap_sanity, NULL); run_parallel(100, test_hashmap_sanity, NULL);
run_parallel(100, test_percpu_hashmap_sanity, NULL);
run_parallel(100, test_arraymap_sanity, NULL); run_parallel(100, test_arraymap_sanity, NULL);
run_parallel(100, test_percpu_arraymap_sanity, NULL);
} }
#define TASKS 1024 #define TASKS 1024
...@@ -282,7 +466,11 @@ static void test_map_parallel(void) ...@@ -282,7 +466,11 @@ static void test_map_parallel(void)
int main(void) int main(void)
{ {
test_hashmap_sanity(0, NULL); test_hashmap_sanity(0, NULL);
test_percpu_hashmap_sanity(0, NULL);
test_arraymap_sanity(0, NULL); test_arraymap_sanity(0, NULL);
test_percpu_arraymap_sanity(0, NULL);
test_percpu_arraymap_many_keys();
test_map_large(); test_map_large();
test_map_parallel(); test_map_parallel();
test_map_stress(); test_map_stress();
......
...@@ -70,7 +70,7 @@ struct hist_key { ...@@ -70,7 +70,7 @@ struct hist_key {
}; };
struct bpf_map_def SEC("maps") my_hist_map = { struct bpf_map_def SEC("maps") my_hist_map = {
.type = BPF_MAP_TYPE_HASH, .type = BPF_MAP_TYPE_PERCPU_HASH,
.key_size = sizeof(struct hist_key), .key_size = sizeof(struct hist_key),
.value_size = sizeof(long), .value_size = sizeof(long),
.max_entries = 1024, .max_entries = 1024,
......
...@@ -37,6 +37,8 @@ struct hist_key { ...@@ -37,6 +37,8 @@ struct hist_key {
static void print_hist_for_pid(int fd, void *task) static void print_hist_for_pid(int fd, void *task)
{ {
struct hist_key key = {}, next_key; struct hist_key key = {}, next_key;
unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
long values[nr_cpus];
char starstr[MAX_STARS]; char starstr[MAX_STARS];
long value; long value;
long data[MAX_INDEX] = {}; long data[MAX_INDEX] = {};
...@@ -49,7 +51,10 @@ static void print_hist_for_pid(int fd, void *task) ...@@ -49,7 +51,10 @@ static void print_hist_for_pid(int fd, void *task)
key = next_key; key = next_key;
continue; continue;
} }
bpf_lookup_elem(fd, &next_key, &value); bpf_lookup_elem(fd, &next_key, values);
value = 0;
for (i = 0; i < nr_cpus; i++)
value += values[i];
ind = next_key.index; ind = next_key.index;
data[ind] = value; data[ind] = value;
if (value && ind > max_ind) if (value && ind > max_ind)
......
...@@ -20,7 +20,7 @@ struct bpf_map_def SEC("maps") my_map = { ...@@ -20,7 +20,7 @@ struct bpf_map_def SEC("maps") my_map = {
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful * example will no longer be meaningful
*/ */
SEC("kprobe/blk_mq_start_request") SEC("kprobe/blk_start_request")
int bpf_prog1(struct pt_regs *ctx) int bpf_prog1(struct pt_regs *ctx)
{ {
long rq = PT_REGS_PARM1(ctx); long rq = PT_REGS_PARM1(ctx);
...@@ -42,13 +42,13 @@ static unsigned int log2l(unsigned long long n) ...@@ -42,13 +42,13 @@ static unsigned int log2l(unsigned long long n)
#define SLOTS 100 #define SLOTS 100
struct bpf_map_def SEC("maps") lat_map = { struct bpf_map_def SEC("maps") lat_map = {
.type = BPF_MAP_TYPE_ARRAY, .type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(u32), .key_size = sizeof(u32),
.value_size = sizeof(u64), .value_size = sizeof(u64),
.max_entries = SLOTS, .max_entries = SLOTS,
}; };
SEC("kprobe/blk_update_request") SEC("kprobe/blk_account_io_completion")
int bpf_prog2(struct pt_regs *ctx) int bpf_prog2(struct pt_regs *ctx)
{ {
long rq = PT_REGS_PARM1(ctx); long rq = PT_REGS_PARM1(ctx);
...@@ -81,7 +81,7 @@ int bpf_prog2(struct pt_regs *ctx) ...@@ -81,7 +81,7 @@ int bpf_prog2(struct pt_regs *ctx)
value = bpf_map_lookup_elem(&lat_map, &index); value = bpf_map_lookup_elem(&lat_map, &index);
if (value) if (value)
__sync_fetch_and_add((long *)value, 1); *value += 1;
return 0; return 0;
} }
......
...@@ -20,11 +20,13 @@ ...@@ -20,11 +20,13 @@
static void clear_stats(int fd) static void clear_stats(int fd)
{ {
unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
__u64 values[nr_cpus];
__u32 key; __u32 key;
__u64 value = 0;
memset(values, 0, sizeof(values));
for (key = 0; key < SLOTS; key++) for (key = 0; key < SLOTS; key++)
bpf_update_elem(fd, &key, &value, BPF_ANY); bpf_update_elem(fd, &key, values, BPF_ANY);
} }
const char *color[] = { const char *color[] = {
...@@ -75,15 +77,20 @@ static void print_banner(void) ...@@ -75,15 +77,20 @@ static void print_banner(void)
static void print_hist(int fd) static void print_hist(int fd)
{ {
__u32 key; unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
__u64 value;
__u64 cnt[SLOTS];
__u64 max_cnt = 0;
__u64 total_events = 0; __u64 total_events = 0;
long values[nr_cpus];
__u64 max_cnt = 0;
__u64 cnt[SLOTS];
__u64 value;
__u32 key;
int i;
for (key = 0; key < SLOTS; key++) { for (key = 0; key < SLOTS; key++) {
bpf_lookup_elem(fd, &key, values);
value = 0; value = 0;
bpf_lookup_elem(fd, &key, &value); for (i = 0; i < nr_cpus; i++)
value += values[i];
cnt[key] = value; cnt[key] = value;
total_events += value; total_events += value;
if (value > max_cnt) if (value > max_cnt)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment