linux/tools/testing/selftests/bpf/test_maps.c

628 lines
16 KiB
C
Raw Normal View History

/*
* Testsuite for eBPF maps
*
* Copyright (c) 2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
bpf: fix multiple issues in selftest suite and samples 1) The test_lru_map and test_lru_dist fails building on my machine since the sys/resource.h header is not included. 2) test_verifier fails in one test case where we try to call an invalid function, since the verifier log output changed wrt printing function names. 3) Current selftest suite code relies on sysconf(_SC_NPROCESSORS_CONF) for retrieving the number of possible CPUs. This is broken at least in our scenario and really just doesn't work. glibc tries a number of things for retrieving _SC_NPROCESSORS_CONF. First it tries equivalent of /sys/devices/system/cpu/cpu[0-9]* | wc -l, if that fails, depending on the config, it either tries to count CPUs in /proc/cpuinfo, or returns the _SC_NPROCESSORS_ONLN value instead. If /proc/cpuinfo has some issue, it returns just 1 worst case. This oddity is nothing new [1], but semantics/behaviour seems to be settled. _SC_NPROCESSORS_ONLN will parse /sys/devices/system/cpu/online, if that fails it looks into /proc/stat for cpuX entries, and if also that fails for some reason, /proc/cpuinfo is consulted (and returning 1 if unlikely all breaks down). While that might match num_possible_cpus() from the kernel in some cases, it's really not guaranteed with CPU hotplugging, and can result in a buffer overflow since the array in user space could have too few number of slots, and on perpcu map lookup, the kernel will write beyond that memory of the value buffer. William Tu reported such mismatches: [...] The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu() happens when CPU hotadd is enabled. For example, in Fusion when setting vcpu.hotadd = "TRUE" or in KVM, setting ./qemu-system-x86_64 -smp 2, maxcpus=4 ... the num_possible_cpu() will be 4 and sysconf() will be 2 [2]. [...] Documentation/cputopology.txt says /sys/devices/system/cpu/possible outputs cpu_possible_mask. That is the same as in num_possible_cpus(), so first step would be to fix the _SC_NPROCESSORS_CONF calls with our own implementation. Later, we could add support to bpf(2) for passing a mask via CPU_SET(3), for example, to just select a subset of CPUs. BPF samples code needs this fix as well (at least so that people stop copying this). Thus, define bpf_num_possible_cpus() once in selftests and import it from there for the sample code to avoid duplicating it. The remaining sysconf(_SC_NPROCESSORS_CONF) in samples are unrelated. After all three issues are fixed, the test suite runs fine again: # make run_tests | grep self selftests: test_verifier [PASS] selftests: test_maps [PASS] selftests: test_lru_map [PASS] selftests: test_kmod.sh [PASS] [1] https://www.sourceware.org/ml/libc-alpha/2011-06/msg00079.html [2] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html Fixes: 3059303f59cf ("samples/bpf: update tracex[23] examples to use per-cpu maps") Fixes: 86af8b4191d2 ("Add sample for adding simple drop program to link") Fixes: df570f577231 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY") Fixes: e15596717948 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_HASH") Fixes: ebb676daa1a3 ("bpf: Print function name in addition to function id") Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: William Tu <u9012063@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-26 08:28:09 +08:00
#include "bpf_util.h"
static int map_flags;
static void test_hashmap(int task, void *data)
{
long long key, next_key, first_key, value;
int fd;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
2, map_flags);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
}
key = 1;
value = 1234;
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
value = 0;
/* BPF_NOEXIST means add new element if it doesn't exist. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
errno == EINVAL);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
key = 2;
/* Check that key=2 is not found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
/* key=2 is not there. */
errno == ENOENT);
/* Insert key=2 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
/* key=1 and key=2 were inserted, check that key=0 cannot be
* inserted due to max_entries limit.
*/
key = 0;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
/* Update existing element, though the map is full. */
key = 1;
assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
key = 2;
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-22 10:05:04 +08:00
key = 3;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
key = 0;
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
(first_key == 1 || first_key == 2));
assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
(next_key == first_key));
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
(next_key == 1 || next_key == 2) &&
(next_key != first_key));
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
errno == ENOENT);
/* Delete both elements. */
key = 1;
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
errno == ENOENT);
close(fd);
}
bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-22 10:05:04 +08:00
static void test_hashmap_sizes(int task, void *data)
{
int fd, i, j;
for (i = 1; i <= 512; i <<= 1)
for (j = 1; j <= 1 << 18; j <<= 1) {
fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
2, map_flags);
if (fd < 0) {
printf("Failed to create hashmap key=%d value=%d '%s'\n",
i, j, strerror(errno));
exit(1);
}
close(fd);
usleep(10); /* give kernel time to destroy */
}
}
static void test_hashmap_percpu(int task, void *data)
{
bpf: fix multiple issues in selftest suite and samples 1) The test_lru_map and test_lru_dist fails building on my machine since the sys/resource.h header is not included. 2) test_verifier fails in one test case where we try to call an invalid function, since the verifier log output changed wrt printing function names. 3) Current selftest suite code relies on sysconf(_SC_NPROCESSORS_CONF) for retrieving the number of possible CPUs. This is broken at least in our scenario and really just doesn't work. glibc tries a number of things for retrieving _SC_NPROCESSORS_CONF. First it tries equivalent of /sys/devices/system/cpu/cpu[0-9]* | wc -l, if that fails, depending on the config, it either tries to count CPUs in /proc/cpuinfo, or returns the _SC_NPROCESSORS_ONLN value instead. If /proc/cpuinfo has some issue, it returns just 1 worst case. This oddity is nothing new [1], but semantics/behaviour seems to be settled. _SC_NPROCESSORS_ONLN will parse /sys/devices/system/cpu/online, if that fails it looks into /proc/stat for cpuX entries, and if also that fails for some reason, /proc/cpuinfo is consulted (and returning 1 if unlikely all breaks down). While that might match num_possible_cpus() from the kernel in some cases, it's really not guaranteed with CPU hotplugging, and can result in a buffer overflow since the array in user space could have too few number of slots, and on perpcu map lookup, the kernel will write beyond that memory of the value buffer. William Tu reported such mismatches: [...] The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu() happens when CPU hotadd is enabled. For example, in Fusion when setting vcpu.hotadd = "TRUE" or in KVM, setting ./qemu-system-x86_64 -smp 2, maxcpus=4 ... the num_possible_cpu() will be 4 and sysconf() will be 2 [2]. [...] Documentation/cputopology.txt says /sys/devices/system/cpu/possible outputs cpu_possible_mask. That is the same as in num_possible_cpus(), so first step would be to fix the _SC_NPROCESSORS_CONF calls with our own implementation. Later, we could add support to bpf(2) for passing a mask via CPU_SET(3), for example, to just select a subset of CPUs. BPF samples code needs this fix as well (at least so that people stop copying this). Thus, define bpf_num_possible_cpus() once in selftests and import it from there for the sample code to avoid duplicating it. The remaining sysconf(_SC_NPROCESSORS_CONF) in samples are unrelated. After all three issues are fixed, the test suite runs fine again: # make run_tests | grep self selftests: test_verifier [PASS] selftests: test_maps [PASS] selftests: test_lru_map [PASS] selftests: test_kmod.sh [PASS] [1] https://www.sourceware.org/ml/libc-alpha/2011-06/msg00079.html [2] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html Fixes: 3059303f59cf ("samples/bpf: update tracex[23] examples to use per-cpu maps") Fixes: 86af8b4191d2 ("Add sample for adding simple drop program to link") Fixes: df570f577231 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY") Fixes: e15596717948 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_HASH") Fixes: ebb676daa1a3 ("bpf: Print function name in addition to function id") Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: William Tu <u9012063@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-26 08:28:09 +08:00
unsigned int nr_cpus = bpf_num_possible_cpus();
BPF_DECLARE_PERCPU(long, value);
long long key, next_key, first_key;
int expected_key_mask = 0;
int fd, i;
fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
sizeof(bpf_percpu(value, 0)), 2, map_flags);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
}
for (i = 0; i < nr_cpus; i++)
bpf_percpu(value, i) = i + 100;
key = 1;
/* Insert key=1 element. */
assert(!(expected_key_mask & key));
assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
expected_key_mask |= key;
/* BPF_NOEXIST means add new element if it doesn't exist. */
assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
errno == EINVAL);
/* Check that key=1 can be found. Value could be 0 if the lookup
* was run from a different CPU.
*/
bpf_percpu(value, 0) = 1;
assert(bpf_map_lookup_elem(fd, &key, value) == 0 &&
bpf_percpu(value, 0) == 100);
key = 2;
/* Check that key=2 is not found. */
assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
/* key=2 is not there. */
errno == ENOENT);
/* Insert key=2 element. */
assert(!(expected_key_mask & key));
assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
expected_key_mask |= key;
/* key=1 and key=2 were inserted, check that key=0 cannot be
* inserted due to max_entries limit.
*/
key = 0;
assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
((expected_key_mask & first_key) == first_key));
while (!bpf_map_get_next_key(fd, &key, &next_key)) {
if (first_key) {
assert(next_key == first_key);
first_key = 0;
}
assert((expected_key_mask & next_key) == next_key);
expected_key_mask &= ~next_key;
assert(bpf_map_lookup_elem(fd, &next_key, value) == 0);
for (i = 0; i < nr_cpus; i++)
assert(bpf_percpu(value, i) == i + 100);
key = next_key;
}
assert(errno == ENOENT);
/* Update with BPF_EXIST. */
key = 1;
assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == 0);
/* Delete both elements. */
key = 1;
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
errno == ENOENT);
close(fd);
}
static void test_hashmap_walk(int task, void *data)
{
int fd, i, max_entries = 100000;
long long key, value, next_key;
bool next_key_valid = true;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
max_entries, map_flags);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
}
for (i = 0; i < max_entries; i++) {
key = i; value = key;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
}
for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
&next_key) == 0; i++) {
key = next_key;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
}
assert(i == max_entries);
assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
for (i = 0; next_key_valid; i++) {
next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
value++;
assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
key = next_key;
}
assert(i == max_entries);
for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
&next_key) == 0; i++) {
key = next_key;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
assert(value - 1 == key);
}
assert(i == max_entries);
close(fd);
}
static void test_arraymap(int task, void *data)
{
int key, next_key, fd;
long long value;
fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
2, 0);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
}
key = 1;
value = 1234;
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
value = 0;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == EEXIST);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
key = 0;
/* Check that key=0 is also found and zero initialized. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
/* key=0 and key=1 were inserted, check that key=2 cannot be inserted
* due to max_entries limit.
*/
key = 2;
assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
next_key == 0);
assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
close(fd);
}
static void test_arraymap_percpu(int task, void *data)
{
bpf: fix multiple issues in selftest suite and samples 1) The test_lru_map and test_lru_dist fails building on my machine since the sys/resource.h header is not included. 2) test_verifier fails in one test case where we try to call an invalid function, since the verifier log output changed wrt printing function names. 3) Current selftest suite code relies on sysconf(_SC_NPROCESSORS_CONF) for retrieving the number of possible CPUs. This is broken at least in our scenario and really just doesn't work. glibc tries a number of things for retrieving _SC_NPROCESSORS_CONF. First it tries equivalent of /sys/devices/system/cpu/cpu[0-9]* | wc -l, if that fails, depending on the config, it either tries to count CPUs in /proc/cpuinfo, or returns the _SC_NPROCESSORS_ONLN value instead. If /proc/cpuinfo has some issue, it returns just 1 worst case. This oddity is nothing new [1], but semantics/behaviour seems to be settled. _SC_NPROCESSORS_ONLN will parse /sys/devices/system/cpu/online, if that fails it looks into /proc/stat for cpuX entries, and if also that fails for some reason, /proc/cpuinfo is consulted (and returning 1 if unlikely all breaks down). While that might match num_possible_cpus() from the kernel in some cases, it's really not guaranteed with CPU hotplugging, and can result in a buffer overflow since the array in user space could have too few number of slots, and on perpcu map lookup, the kernel will write beyond that memory of the value buffer. William Tu reported such mismatches: [...] The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu() happens when CPU hotadd is enabled. For example, in Fusion when setting vcpu.hotadd = "TRUE" or in KVM, setting ./qemu-system-x86_64 -smp 2, maxcpus=4 ... the num_possible_cpu() will be 4 and sysconf() will be 2 [2]. [...] Documentation/cputopology.txt says /sys/devices/system/cpu/possible outputs cpu_possible_mask. That is the same as in num_possible_cpus(), so first step would be to fix the _SC_NPROCESSORS_CONF calls with our own implementation. Later, we could add support to bpf(2) for passing a mask via CPU_SET(3), for example, to just select a subset of CPUs. BPF samples code needs this fix as well (at least so that people stop copying this). Thus, define bpf_num_possible_cpus() once in selftests and import it from there for the sample code to avoid duplicating it. The remaining sysconf(_SC_NPROCESSORS_CONF) in samples are unrelated. After all three issues are fixed, the test suite runs fine again: # make run_tests | grep self selftests: test_verifier [PASS] selftests: test_maps [PASS] selftests: test_lru_map [PASS] selftests: test_kmod.sh [PASS] [1] https://www.sourceware.org/ml/libc-alpha/2011-06/msg00079.html [2] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html Fixes: 3059303f59cf ("samples/bpf: update tracex[23] examples to use per-cpu maps") Fixes: 86af8b4191d2 ("Add sample for adding simple drop program to link") Fixes: df570f577231 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY") Fixes: e15596717948 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_HASH") Fixes: ebb676daa1a3 ("bpf: Print function name in addition to function id") Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: William Tu <u9012063@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-26 08:28:09 +08:00
unsigned int nr_cpus = bpf_num_possible_cpus();
BPF_DECLARE_PERCPU(long, values);
int key, next_key, fd, i;
fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
sizeof(bpf_percpu(values, 0)), 2, 0);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
}
for (i = 0; i < nr_cpus; i++)
bpf_percpu(values, i) = i + 100;
key = 1;
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
bpf_percpu(values, 0) = 0;
assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
errno == EEXIST);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
bpf_percpu(values, 0) == 100);
key = 0;
/* Check that key=0 is also found and zero initialized. */
assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
bpf_percpu(values, 0) == 0 &&
bpf_percpu(values, nr_cpus - 1) == 0);
/* Check that key=2 cannot be inserted due to max_entries limit. */
key = 2;
assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
next_key == 0);
assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
close(fd);
}
static void test_arraymap_percpu_many_keys(void)
{
bpf: fix multiple issues in selftest suite and samples 1) The test_lru_map and test_lru_dist fails building on my machine since the sys/resource.h header is not included. 2) test_verifier fails in one test case where we try to call an invalid function, since the verifier log output changed wrt printing function names. 3) Current selftest suite code relies on sysconf(_SC_NPROCESSORS_CONF) for retrieving the number of possible CPUs. This is broken at least in our scenario and really just doesn't work. glibc tries a number of things for retrieving _SC_NPROCESSORS_CONF. First it tries equivalent of /sys/devices/system/cpu/cpu[0-9]* | wc -l, if that fails, depending on the config, it either tries to count CPUs in /proc/cpuinfo, or returns the _SC_NPROCESSORS_ONLN value instead. If /proc/cpuinfo has some issue, it returns just 1 worst case. This oddity is nothing new [1], but semantics/behaviour seems to be settled. _SC_NPROCESSORS_ONLN will parse /sys/devices/system/cpu/online, if that fails it looks into /proc/stat for cpuX entries, and if also that fails for some reason, /proc/cpuinfo is consulted (and returning 1 if unlikely all breaks down). While that might match num_possible_cpus() from the kernel in some cases, it's really not guaranteed with CPU hotplugging, and can result in a buffer overflow since the array in user space could have too few number of slots, and on perpcu map lookup, the kernel will write beyond that memory of the value buffer. William Tu reported such mismatches: [...] The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu() happens when CPU hotadd is enabled. For example, in Fusion when setting vcpu.hotadd = "TRUE" or in KVM, setting ./qemu-system-x86_64 -smp 2, maxcpus=4 ... the num_possible_cpu() will be 4 and sysconf() will be 2 [2]. [...] Documentation/cputopology.txt says /sys/devices/system/cpu/possible outputs cpu_possible_mask. That is the same as in num_possible_cpus(), so first step would be to fix the _SC_NPROCESSORS_CONF calls with our own implementation. Later, we could add support to bpf(2) for passing a mask via CPU_SET(3), for example, to just select a subset of CPUs. BPF samples code needs this fix as well (at least so that people stop copying this). Thus, define bpf_num_possible_cpus() once in selftests and import it from there for the sample code to avoid duplicating it. The remaining sysconf(_SC_NPROCESSORS_CONF) in samples are unrelated. After all three issues are fixed, the test suite runs fine again: # make run_tests | grep self selftests: test_verifier [PASS] selftests: test_maps [PASS] selftests: test_lru_map [PASS] selftests: test_kmod.sh [PASS] [1] https://www.sourceware.org/ml/libc-alpha/2011-06/msg00079.html [2] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html Fixes: 3059303f59cf ("samples/bpf: update tracex[23] examples to use per-cpu maps") Fixes: 86af8b4191d2 ("Add sample for adding simple drop program to link") Fixes: df570f577231 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY") Fixes: e15596717948 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_HASH") Fixes: ebb676daa1a3 ("bpf: Print function name in addition to function id") Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: William Tu <u9012063@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-26 08:28:09 +08:00
unsigned int nr_cpus = bpf_num_possible_cpus();
BPF_DECLARE_PERCPU(long, values);
bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-22 10:05:04 +08:00
/* nr_keys is not too large otherwise the test stresses percpu
* allocator more than anything else
*/
unsigned int nr_keys = 2000;
int key, fd, i;
fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
sizeof(bpf_percpu(values, 0)), nr_keys, 0);
if (fd < 0) {
printf("Failed to create per-cpu arraymap '%s'!\n",
strerror(errno));
exit(1);
}
for (i = 0; i < nr_cpus; i++)
bpf_percpu(values, i) = i + 10;
for (key = 0; key < nr_keys; key++)
assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
for (key = 0; key < nr_keys; key++) {
for (i = 0; i < nr_cpus; i++)
bpf_percpu(values, i) = 0;
assert(bpf_map_lookup_elem(fd, &key, values) == 0);
for (i = 0; i < nr_cpus; i++)
assert(bpf_percpu(values, i) == i + 10);
}
close(fd);
}
#define MAP_SIZE (32 * 1024)
static void test_map_large(void)
{
struct bigkey {
int a;
char b[116];
long long c;
} key;
int fd, i, value;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE, map_flags);
if (fd < 0) {
printf("Failed to create large map '%s'!\n", strerror(errno));
exit(1);
}
for (i = 0; i < MAP_SIZE; i++) {
key = (struct bigkey) { .c = i };
value = i;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
}
key.c = -1;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
/* Iterate through all elements. */
assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
key.c = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
key.c = 0;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
key.a = 1;
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
close(fd);
}
static void run_parallel(int tasks, void (*fn)(int task, void *data),
void *data)
{
pid_t pid[tasks];
int i;
for (i = 0; i < tasks; i++) {
pid[i] = fork();
if (pid[i] == 0) {
fn(i, data);
exit(0);
} else if (pid[i] == -1) {
printf("Couldn't spawn #%d process!\n", i);
exit(1);
}
}
for (i = 0; i < tasks; i++) {
int status;
assert(waitpid(pid[i], &status, 0) == pid[i]);
assert(status == 0);
}
}
static void test_map_stress(void)
{
run_parallel(100, test_hashmap, NULL);
run_parallel(100, test_hashmap_percpu, NULL);
bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-22 10:05:04 +08:00
run_parallel(100, test_hashmap_sizes, NULL);
run_parallel(100, test_hashmap_walk, NULL);
run_parallel(100, test_arraymap, NULL);
run_parallel(100, test_arraymap_percpu, NULL);
}
#define TASKS 1024
#define DO_UPDATE 1
#define DO_DELETE 0
static void do_work(int fn, void *data)
{
int do_update = ((int *)data)[1];
int fd = ((int *)data)[0];
int i, key, value;
for (i = fn; i < MAP_SIZE; i += TASKS) {
key = value = i;
if (do_update) {
assert(bpf_map_update_elem(fd, &key, &value,
BPF_NOEXIST) == 0);
assert(bpf_map_update_elem(fd, &key, &value,
BPF_EXIST) == 0);
} else {
assert(bpf_map_delete_elem(fd, &key) == 0);
}
}
}
static void test_map_parallel(void)
{
int i, fd, key = 0, value = 0;
int data[2];
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE, map_flags);
if (fd < 0) {
printf("Failed to create map for parallel test '%s'!\n",
strerror(errno));
exit(1);
}
/* Use the same fd in children to add elements to this map:
* child_0 adds key=0, key=1024, key=2048, ...
* child_1 adds key=1, key=1025, key=2049, ...
* child_1023 adds key=1023, ...
*/
data[0] = fd;
data[1] = DO_UPDATE;
run_parallel(TASKS, do_work, data);
/* Check that key=0 is already there. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == EEXIST);
/* Check that all elements were inserted. */
assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
key = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
/* Another check for all elements */
for (i = 0; i < MAP_SIZE; i++) {
key = MAP_SIZE - i - 1;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 &&
value == key);
}
/* Now let's delete all elemenets in parallel. */
data[1] = DO_DELETE;
run_parallel(TASKS, do_work, data);
/* Nothing should be left. */
key = -1;
assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
}
static void run_all_tests(void)
{
test_hashmap(0, NULL);
test_hashmap_percpu(0, NULL);
test_hashmap_walk(0, NULL);
test_arraymap(0, NULL);
test_arraymap_percpu(0, NULL);
test_arraymap_percpu_many_keys();
test_map_large();
test_map_parallel();
test_map_stress();
}
int main(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
setrlimit(RLIMIT_MEMLOCK, &rinf);
map_flags = 0;
run_all_tests();
map_flags = BPF_F_NO_PREALLOC;
run_all_tests();
printf("test_maps: OK\n");
return 0;
}