linux/tools/testing/selftests/bpf/test_maps.c

1142 lines
28 KiB
C
Raw Normal View History

/*
* Testsuite for eBPF maps
*
* Copyright (c) 2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
bpf: fix multiple issues in selftest suite and samples 1) The test_lru_map and test_lru_dist fails building on my machine since the sys/resource.h header is not included. 2) test_verifier fails in one test case where we try to call an invalid function, since the verifier log output changed wrt printing function names. 3) Current selftest suite code relies on sysconf(_SC_NPROCESSORS_CONF) for retrieving the number of possible CPUs. This is broken at least in our scenario and really just doesn't work. glibc tries a number of things for retrieving _SC_NPROCESSORS_CONF. First it tries equivalent of /sys/devices/system/cpu/cpu[0-9]* | wc -l, if that fails, depending on the config, it either tries to count CPUs in /proc/cpuinfo, or returns the _SC_NPROCESSORS_ONLN value instead. If /proc/cpuinfo has some issue, it returns just 1 worst case. This oddity is nothing new [1], but semantics/behaviour seems to be settled. _SC_NPROCESSORS_ONLN will parse /sys/devices/system/cpu/online, if that fails it looks into /proc/stat for cpuX entries, and if also that fails for some reason, /proc/cpuinfo is consulted (and returning 1 if unlikely all breaks down). While that might match num_possible_cpus() from the kernel in some cases, it's really not guaranteed with CPU hotplugging, and can result in a buffer overflow since the array in user space could have too few number of slots, and on perpcu map lookup, the kernel will write beyond that memory of the value buffer. William Tu reported such mismatches: [...] The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu() happens when CPU hotadd is enabled. For example, in Fusion when setting vcpu.hotadd = "TRUE" or in KVM, setting ./qemu-system-x86_64 -smp 2, maxcpus=4 ... the num_possible_cpu() will be 4 and sysconf() will be 2 [2]. [...] Documentation/cputopology.txt says /sys/devices/system/cpu/possible outputs cpu_possible_mask. That is the same as in num_possible_cpus(), so first step would be to fix the _SC_NPROCESSORS_CONF calls with our own implementation. Later, we could add support to bpf(2) for passing a mask via CPU_SET(3), for example, to just select a subset of CPUs. BPF samples code needs this fix as well (at least so that people stop copying this). Thus, define bpf_num_possible_cpus() once in selftests and import it from there for the sample code to avoid duplicating it. The remaining sysconf(_SC_NPROCESSORS_CONF) in samples are unrelated. After all three issues are fixed, the test suite runs fine again: # make run_tests | grep self selftests: test_verifier [PASS] selftests: test_maps [PASS] selftests: test_lru_map [PASS] selftests: test_kmod.sh [PASS] [1] https://www.sourceware.org/ml/libc-alpha/2011-06/msg00079.html [2] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html Fixes: 3059303f59cf ("samples/bpf: update tracex[23] examples to use per-cpu maps") Fixes: 86af8b4191d2 ("Add sample for adding simple drop program to link") Fixes: df570f577231 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY") Fixes: e15596717948 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_HASH") Fixes: ebb676daa1a3 ("bpf: Print function name in addition to function id") Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: William Tu <u9012063@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-26 08:28:09 +08:00
#include "bpf_util.h"
static int map_flags;
static void test_hashmap(int task, void *data)
{
long long key, next_key, first_key, value;
int fd;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
2, map_flags);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
}
key = 1;
value = 1234;
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
value = 0;
/* BPF_NOEXIST means add new element if it doesn't exist. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
errno == EINVAL);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
key = 2;
/* Check that key=2 is not found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
/* key=2 is not there. */
errno == ENOENT);
/* Insert key=2 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
/* key=1 and key=2 were inserted, check that key=0 cannot be
* inserted due to max_entries limit.
*/
key = 0;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
/* Update existing element, though the map is full. */
key = 1;
assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
key = 2;
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-22 10:05:04 +08:00
key = 3;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
key = 0;
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
(first_key == 1 || first_key == 2));
assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
(next_key == first_key));
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
(next_key == 1 || next_key == 2) &&
(next_key != first_key));
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
errno == ENOENT);
/* Delete both elements. */
key = 1;
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
errno == ENOENT);
close(fd);
}
bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-22 10:05:04 +08:00
static void test_hashmap_sizes(int task, void *data)
{
int fd, i, j;
for (i = 1; i <= 512; i <<= 1)
for (j = 1; j <= 1 << 18; j <<= 1) {
fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
2, map_flags);
if (fd < 0) {
if (errno == ENOMEM)
return;
bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-22 10:05:04 +08:00
printf("Failed to create hashmap key=%d value=%d '%s'\n",
i, j, strerror(errno));
exit(1);
}
close(fd);
usleep(10); /* give kernel time to destroy */
}
}
static void test_hashmap_percpu(int task, void *data)
{
bpf: fix multiple issues in selftest suite and samples 1) The test_lru_map and test_lru_dist fails building on my machine since the sys/resource.h header is not included. 2) test_verifier fails in one test case where we try to call an invalid function, since the verifier log output changed wrt printing function names. 3) Current selftest suite code relies on sysconf(_SC_NPROCESSORS_CONF) for retrieving the number of possible CPUs. This is broken at least in our scenario and really just doesn't work. glibc tries a number of things for retrieving _SC_NPROCESSORS_CONF. First it tries equivalent of /sys/devices/system/cpu/cpu[0-9]* | wc -l, if that fails, depending on the config, it either tries to count CPUs in /proc/cpuinfo, or returns the _SC_NPROCESSORS_ONLN value instead. If /proc/cpuinfo has some issue, it returns just 1 worst case. This oddity is nothing new [1], but semantics/behaviour seems to be settled. _SC_NPROCESSORS_ONLN will parse /sys/devices/system/cpu/online, if that fails it looks into /proc/stat for cpuX entries, and if also that fails for some reason, /proc/cpuinfo is consulted (and returning 1 if unlikely all breaks down). While that might match num_possible_cpus() from the kernel in some cases, it's really not guaranteed with CPU hotplugging, and can result in a buffer overflow since the array in user space could have too few number of slots, and on perpcu map lookup, the kernel will write beyond that memory of the value buffer. William Tu reported such mismatches: [...] The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu() happens when CPU hotadd is enabled. For example, in Fusion when setting vcpu.hotadd = "TRUE" or in KVM, setting ./qemu-system-x86_64 -smp 2, maxcpus=4 ... the num_possible_cpu() will be 4 and sysconf() will be 2 [2]. [...] Documentation/cputopology.txt says /sys/devices/system/cpu/possible outputs cpu_possible_mask. That is the same as in num_possible_cpus(), so first step would be to fix the _SC_NPROCESSORS_CONF calls with our own implementation. Later, we could add support to bpf(2) for passing a mask via CPU_SET(3), for example, to just select a subset of CPUs. BPF samples code needs this fix as well (at least so that people stop copying this). Thus, define bpf_num_possible_cpus() once in selftests and import it from there for the sample code to avoid duplicating it. The remaining sysconf(_SC_NPROCESSORS_CONF) in samples are unrelated. After all three issues are fixed, the test suite runs fine again: # make run_tests | grep self selftests: test_verifier [PASS] selftests: test_maps [PASS] selftests: test_lru_map [PASS] selftests: test_kmod.sh [PASS] [1] https://www.sourceware.org/ml/libc-alpha/2011-06/msg00079.html [2] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html Fixes: 3059303f59cf ("samples/bpf: update tracex[23] examples to use per-cpu maps") Fixes: 86af8b4191d2 ("Add sample for adding simple drop program to link") Fixes: df570f577231 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY") Fixes: e15596717948 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_HASH") Fixes: ebb676daa1a3 ("bpf: Print function name in addition to function id") Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: William Tu <u9012063@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-26 08:28:09 +08:00
unsigned int nr_cpus = bpf_num_possible_cpus();
BPF_DECLARE_PERCPU(long, value);
long long key, next_key, first_key;
int expected_key_mask = 0;
int fd, i;
fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
sizeof(bpf_percpu(value, 0)), 2, map_flags);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
}
for (i = 0; i < nr_cpus; i++)
bpf_percpu(value, i) = i + 100;
key = 1;
/* Insert key=1 element. */
assert(!(expected_key_mask & key));
assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
expected_key_mask |= key;
/* BPF_NOEXIST means add new element if it doesn't exist. */
assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
errno == EINVAL);
/* Check that key=1 can be found. Value could be 0 if the lookup
* was run from a different CPU.
*/
bpf_percpu(value, 0) = 1;
assert(bpf_map_lookup_elem(fd, &key, value) == 0 &&
bpf_percpu(value, 0) == 100);
key = 2;
/* Check that key=2 is not found. */
assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
/* key=2 is not there. */
errno == ENOENT);
/* Insert key=2 element. */
assert(!(expected_key_mask & key));
assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
expected_key_mask |= key;
/* key=1 and key=2 were inserted, check that key=0 cannot be
* inserted due to max_entries limit.
*/
key = 0;
assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
((expected_key_mask & first_key) == first_key));
while (!bpf_map_get_next_key(fd, &key, &next_key)) {
if (first_key) {
assert(next_key == first_key);
first_key = 0;
}
assert((expected_key_mask & next_key) == next_key);
expected_key_mask &= ~next_key;
assert(bpf_map_lookup_elem(fd, &next_key, value) == 0);
for (i = 0; i < nr_cpus; i++)
assert(bpf_percpu(value, i) == i + 100);
key = next_key;
}
assert(errno == ENOENT);
/* Update with BPF_EXIST. */
key = 1;
assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == 0);
/* Delete both elements. */
key = 1;
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
errno == ENOENT);
close(fd);
}
static void test_hashmap_walk(int task, void *data)
{
int fd, i, max_entries = 1000;
long long key, value, next_key;
bool next_key_valid = true;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
max_entries, map_flags);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
}
for (i = 0; i < max_entries; i++) {
key = i; value = key;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
}
for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
&next_key) == 0; i++) {
key = next_key;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
}
assert(i == max_entries);
assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
for (i = 0; next_key_valid; i++) {
next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
value++;
assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
key = next_key;
}
assert(i == max_entries);
for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
&next_key) == 0; i++) {
key = next_key;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
assert(value - 1 == key);
}
assert(i == max_entries);
close(fd);
}
static void test_arraymap(int task, void *data)
{
int key, next_key, fd;
long long value;
fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
2, 0);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
}
key = 1;
value = 1234;
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
value = 0;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == EEXIST);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
key = 0;
/* Check that key=0 is also found and zero initialized. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
/* key=0 and key=1 were inserted, check that key=2 cannot be inserted
* due to max_entries limit.
*/
key = 2;
assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
next_key == 0);
assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
close(fd);
}
static void test_arraymap_percpu(int task, void *data)
{
bpf: fix multiple issues in selftest suite and samples 1) The test_lru_map and test_lru_dist fails building on my machine since the sys/resource.h header is not included. 2) test_verifier fails in one test case where we try to call an invalid function, since the verifier log output changed wrt printing function names. 3) Current selftest suite code relies on sysconf(_SC_NPROCESSORS_CONF) for retrieving the number of possible CPUs. This is broken at least in our scenario and really just doesn't work. glibc tries a number of things for retrieving _SC_NPROCESSORS_CONF. First it tries equivalent of /sys/devices/system/cpu/cpu[0-9]* | wc -l, if that fails, depending on the config, it either tries to count CPUs in /proc/cpuinfo, or returns the _SC_NPROCESSORS_ONLN value instead. If /proc/cpuinfo has some issue, it returns just 1 worst case. This oddity is nothing new [1], but semantics/behaviour seems to be settled. _SC_NPROCESSORS_ONLN will parse /sys/devices/system/cpu/online, if that fails it looks into /proc/stat for cpuX entries, and if also that fails for some reason, /proc/cpuinfo is consulted (and returning 1 if unlikely all breaks down). While that might match num_possible_cpus() from the kernel in some cases, it's really not guaranteed with CPU hotplugging, and can result in a buffer overflow since the array in user space could have too few number of slots, and on perpcu map lookup, the kernel will write beyond that memory of the value buffer. William Tu reported such mismatches: [...] The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu() happens when CPU hotadd is enabled. For example, in Fusion when setting vcpu.hotadd = "TRUE" or in KVM, setting ./qemu-system-x86_64 -smp 2, maxcpus=4 ... the num_possible_cpu() will be 4 and sysconf() will be 2 [2]. [...] Documentation/cputopology.txt says /sys/devices/system/cpu/possible outputs cpu_possible_mask. That is the same as in num_possible_cpus(), so first step would be to fix the _SC_NPROCESSORS_CONF calls with our own implementation. Later, we could add support to bpf(2) for passing a mask via CPU_SET(3), for example, to just select a subset of CPUs. BPF samples code needs this fix as well (at least so that people stop copying this). Thus, define bpf_num_possible_cpus() once in selftests and import it from there for the sample code to avoid duplicating it. The remaining sysconf(_SC_NPROCESSORS_CONF) in samples are unrelated. After all three issues are fixed, the test suite runs fine again: # make run_tests | grep self selftests: test_verifier [PASS] selftests: test_maps [PASS] selftests: test_lru_map [PASS] selftests: test_kmod.sh [PASS] [1] https://www.sourceware.org/ml/libc-alpha/2011-06/msg00079.html [2] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html Fixes: 3059303f59cf ("samples/bpf: update tracex[23] examples to use per-cpu maps") Fixes: 86af8b4191d2 ("Add sample for adding simple drop program to link") Fixes: df570f577231 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY") Fixes: e15596717948 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_HASH") Fixes: ebb676daa1a3 ("bpf: Print function name in addition to function id") Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: William Tu <u9012063@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-26 08:28:09 +08:00
unsigned int nr_cpus = bpf_num_possible_cpus();
BPF_DECLARE_PERCPU(long, values);
int key, next_key, fd, i;
fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
sizeof(bpf_percpu(values, 0)), 2, 0);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
}
for (i = 0; i < nr_cpus; i++)
bpf_percpu(values, i) = i + 100;
key = 1;
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
bpf_percpu(values, 0) = 0;
assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
errno == EEXIST);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
bpf_percpu(values, 0) == 100);
key = 0;
/* Check that key=0 is also found and zero initialized. */
assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
bpf_percpu(values, 0) == 0 &&
bpf_percpu(values, nr_cpus - 1) == 0);
/* Check that key=2 cannot be inserted due to max_entries limit. */
key = 2;
assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
next_key == 0);
assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
close(fd);
}
static void test_arraymap_percpu_many_keys(void)
{
bpf: fix multiple issues in selftest suite and samples 1) The test_lru_map and test_lru_dist fails building on my machine since the sys/resource.h header is not included. 2) test_verifier fails in one test case where we try to call an invalid function, since the verifier log output changed wrt printing function names. 3) Current selftest suite code relies on sysconf(_SC_NPROCESSORS_CONF) for retrieving the number of possible CPUs. This is broken at least in our scenario and really just doesn't work. glibc tries a number of things for retrieving _SC_NPROCESSORS_CONF. First it tries equivalent of /sys/devices/system/cpu/cpu[0-9]* | wc -l, if that fails, depending on the config, it either tries to count CPUs in /proc/cpuinfo, or returns the _SC_NPROCESSORS_ONLN value instead. If /proc/cpuinfo has some issue, it returns just 1 worst case. This oddity is nothing new [1], but semantics/behaviour seems to be settled. _SC_NPROCESSORS_ONLN will parse /sys/devices/system/cpu/online, if that fails it looks into /proc/stat for cpuX entries, and if also that fails for some reason, /proc/cpuinfo is consulted (and returning 1 if unlikely all breaks down). While that might match num_possible_cpus() from the kernel in some cases, it's really not guaranteed with CPU hotplugging, and can result in a buffer overflow since the array in user space could have too few number of slots, and on perpcu map lookup, the kernel will write beyond that memory of the value buffer. William Tu reported such mismatches: [...] The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu() happens when CPU hotadd is enabled. For example, in Fusion when setting vcpu.hotadd = "TRUE" or in KVM, setting ./qemu-system-x86_64 -smp 2, maxcpus=4 ... the num_possible_cpu() will be 4 and sysconf() will be 2 [2]. [...] Documentation/cputopology.txt says /sys/devices/system/cpu/possible outputs cpu_possible_mask. That is the same as in num_possible_cpus(), so first step would be to fix the _SC_NPROCESSORS_CONF calls with our own implementation. Later, we could add support to bpf(2) for passing a mask via CPU_SET(3), for example, to just select a subset of CPUs. BPF samples code needs this fix as well (at least so that people stop copying this). Thus, define bpf_num_possible_cpus() once in selftests and import it from there for the sample code to avoid duplicating it. The remaining sysconf(_SC_NPROCESSORS_CONF) in samples are unrelated. After all three issues are fixed, the test suite runs fine again: # make run_tests | grep self selftests: test_verifier [PASS] selftests: test_maps [PASS] selftests: test_lru_map [PASS] selftests: test_kmod.sh [PASS] [1] https://www.sourceware.org/ml/libc-alpha/2011-06/msg00079.html [2] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html Fixes: 3059303f59cf ("samples/bpf: update tracex[23] examples to use per-cpu maps") Fixes: 86af8b4191d2 ("Add sample for adding simple drop program to link") Fixes: df570f577231 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY") Fixes: e15596717948 ("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_HASH") Fixes: ebb676daa1a3 ("bpf: Print function name in addition to function id") Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: William Tu <u9012063@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-26 08:28:09 +08:00
unsigned int nr_cpus = bpf_num_possible_cpus();
BPF_DECLARE_PERCPU(long, values);
bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-22 10:05:04 +08:00
/* nr_keys is not too large otherwise the test stresses percpu
* allocator more than anything else
*/
unsigned int nr_keys = 2000;
int key, fd, i;
fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
sizeof(bpf_percpu(values, 0)), nr_keys, 0);
if (fd < 0) {
printf("Failed to create per-cpu arraymap '%s'!\n",
strerror(errno));
exit(1);
}
for (i = 0; i < nr_cpus; i++)
bpf_percpu(values, i) = i + 10;
for (key = 0; key < nr_keys; key++)
assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
for (key = 0; key < nr_keys; key++) {
for (i = 0; i < nr_cpus; i++)
bpf_percpu(values, i) = 0;
assert(bpf_map_lookup_elem(fd, &key, values) == 0);
for (i = 0; i < nr_cpus; i++)
assert(bpf_percpu(values, i) == i + 10);
}
close(fd);
}
static void test_devmap(int task, void *data)
{
int fd;
__u32 key, value;
fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
2, 0);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
}
close(fd);
}
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <arpa/inet.h>
#include <sys/select.h>
#include <linux/err.h>
#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
static void test_sockmap(int tasks, void *data)
{
int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc;
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, udp, sfd[6] = {0xdeadbeef};
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
int parse_prog, verdict_prog;
struct sockaddr_in addr;
struct bpf_object *obj;
struct timeval to;
__u32 key, value;
pid_t pid[tasks];
fd_set w;
/* Create some sockets to use with sockmap */
for (i = 0; i < 2; i++) {
sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
if (sfd[i] < 0)
goto out;
err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
(char *)&one, sizeof(one));
if (err) {
printf("failed to setsockopt\n");
goto out;
}
err = ioctl(sfd[i], FIONBIO, (char *)&one);
if (err < 0) {
printf("failed to ioctl\n");
goto out;
}
memset(&addr, 0, sizeof(struct sockaddr_in));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = inet_addr("127.0.0.1");
addr.sin_port = htons(ports[i]);
err = bind(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
printf("failed to bind: err %i: %i:%i\n",
err, i, sfd[i]);
goto out;
}
err = listen(sfd[i], 32);
if (err < 0) {
printf("failed to listen\n");
goto out;
}
}
for (i = 2; i < 4; i++) {
sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
if (sfd[i] < 0)
goto out;
err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
(char *)&one, sizeof(one));
if (err) {
printf("set sock opt\n");
goto out;
}
memset(&addr, 0, sizeof(struct sockaddr_in));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = inet_addr("127.0.0.1");
addr.sin_port = htons(ports[i - 2]);
err = connect(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
if (err) {
printf("failed to connect\n");
goto out;
}
}
for (i = 4; i < 6; i++) {
sfd[i] = accept(sfd[i - 4], NULL, NULL);
if (sfd[i] < 0) {
printf("accept failed\n");
goto out;
}
}
/* Test sockmap with connected sockets */
fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
sizeof(key), sizeof(value),
6, 0);
if (fd < 0) {
printf("Failed to create sockmap %i\n", fd);
goto out_sockmap;
}
/* Test update with unsupported UDP socket */
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
if (!err) {
printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
i, udp);
goto out_sockmap;
}
/* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
if (err) {
printf("Failed noprog update sockmap '%i:%i'\n",
i, sfd[i]);
goto out_sockmap;
}
}
/* Test attaching/detaching bad fds */
err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0);
if (!err) {
printf("Failed invalid parser prog attach\n");
goto out_sockmap;
}
err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!err) {
printf("Failed invalid verdict prog attach\n");
goto out_sockmap;
}
err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
if (!err) {
printf("Failed unknown prog attach\n");
goto out_sockmap;
}
err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER);
if (err) {
printf("Failed empty parser prog detach\n");
goto out_sockmap;
}
err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT);
if (err) {
printf("Failed empty verdict prog detach\n");
goto out_sockmap;
}
err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
if (!err) {
printf("Detach invalid prog successful\n");
goto out_sockmap;
}
/* Load SK_SKB program and Attach */
err = bpf_prog_load(SOCKMAP_PARSE_PROG,
BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
if (err) {
printf("Failed to load SK_SKB parse prog\n");
goto out_sockmap;
}
err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
if (err) {
printf("Failed to load SK_SKB verdict prog\n");
goto out_sockmap;
}
bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
if (IS_ERR(bpf_map_rx)) {
printf("Failed to load map rx from verdict prog\n");
goto out_sockmap;
}
map_fd_rx = bpf_map__fd(bpf_map_rx);
if (map_fd_rx < 0) {
printf("Failed to get map fd\n");
goto out_sockmap;
}
bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
if (IS_ERR(bpf_map_tx)) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
map_fd_tx = bpf_map__fd(bpf_map_tx);
if (map_fd_tx < 0) {
printf("Failed to get map tx fd\n");
goto out_sockmap;
}
bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
if (IS_ERR(bpf_map_break)) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
map_fd_break = bpf_map__fd(bpf_map_break);
if (map_fd_break < 0) {
printf("Failed to get map tx fd\n");
goto out_sockmap;
}
err = bpf_prog_attach(parse_prog, map_fd_break,
BPF_SK_SKB_STREAM_PARSER, 0);
if (!err) {
printf("Allowed attaching SK_SKB program to invalid map\n");
goto out_sockmap;
}
err = bpf_prog_attach(parse_prog, map_fd_rx,
BPF_SK_SKB_STREAM_PARSER, 0);
if (err) {
printf("Failed stream parser bpf prog attach\n");
goto out_sockmap;
}
err = bpf_prog_attach(verdict_prog, map_fd_rx,
BPF_SK_SKB_STREAM_VERDICT, 0);
if (err) {
printf("Failed stream verdict bpf prog attach\n");
goto out_sockmap;
}
err = bpf_prog_attach(verdict_prog, map_fd_rx,
__MAX_BPF_ATTACH_TYPE, 0);
if (!err) {
printf("Attached unknown bpf prog\n");
goto out_sockmap;
}
/* Test map update elem afterwards fd lives in fd and map_fd */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
if (err) {
printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
err = bpf_map_update_elem(map_fd_tx, &i, &sfd[i], BPF_ANY);
if (err) {
printf("Failed map_fd_tx update sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
}
/* Test map delete elem and remove send/recv sockets */
for (i = 2; i < 4; i++) {
err = bpf_map_delete_elem(map_fd_rx, &i);
if (err) {
printf("Failed delete sockmap rx %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
err = bpf_map_delete_elem(map_fd_tx, &i);
if (err) {
printf("Failed delete sockmap tx %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
}
/* Test map send/recv */
for (i = 0; i < 2; i++) {
buf[0] = i;
buf[1] = 0x5;
sc = send(sfd[2], buf, 20, 0);
if (sc < 0) {
printf("Failed sockmap send\n");
goto out_sockmap;
}
FD_ZERO(&w);
FD_SET(sfd[3], &w);
to.tv_sec = 1;
to.tv_usec = 0;
s = select(sfd[3] + 1, &w, NULL, NULL, &to);
if (s == -1) {
perror("Failed sockmap select()");
goto out_sockmap;
} else if (!s) {
printf("Failed sockmap unexpected timeout\n");
goto out_sockmap;
}
if (!FD_ISSET(sfd[3], &w)) {
printf("Failed sockmap select/recv\n");
goto out_sockmap;
}
rc = recv(sfd[3], buf, sizeof(buf), 0);
if (rc < 0) {
printf("Failed sockmap recv\n");
goto out_sockmap;
}
}
/* Negative null entry lookup from datapath should be dropped */
buf[0] = 1;
buf[1] = 12;
sc = send(sfd[2], buf, 20, 0);
if (sc < 0) {
printf("Failed sockmap send\n");
goto out_sockmap;
}
/* Push fd into same slot */
i = 2;
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
if (!err) {
printf("Failed allowed sockmap dup slot BPF_NOEXIST\n");
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
if (err) {
printf("Failed sockmap update new slot BPF_ANY\n");
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
if (err) {
printf("Failed sockmap update new slot BPF_EXIST\n");
goto out_sockmap;
}
/* Delete the elems without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_delete_elem(fd, &i);
if (err) {
printf("Failed delete sockmap %i '%i:%i'\n",
err, i, sfd[i]);
}
}
/* Test having multiple maps open and set with programs on same fds */
err = bpf_prog_attach(parse_prog, fd,
BPF_SK_SKB_STREAM_PARSER, 0);
if (err) {
printf("Failed fd bpf parse prog attach\n");
goto out_sockmap;
}
err = bpf_prog_attach(verdict_prog, fd,
BPF_SK_SKB_STREAM_VERDICT, 0);
if (err) {
printf("Failed fd bpf verdict prog attach\n");
goto out_sockmap;
}
for (i = 4; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
if (!err) {
printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
if (!err) {
printf("Failed allowed duplicate program in update NOEXIST sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
if (!err) {
printf("Failed allowed duplicate program in update EXIST sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
}
/* Test tasks number of forked operations */
for (i = 0; i < tasks; i++) {
pid[i] = fork();
if (pid[i] == 0) {
for (i = 0; i < 6; i++) {
bpf_map_delete_elem(map_fd_tx, &i);
bpf_map_delete_elem(map_fd_rx, &i);
bpf_map_update_elem(map_fd_tx, &i,
&sfd[i], BPF_ANY);
bpf_map_update_elem(map_fd_rx, &i,
&sfd[i], BPF_ANY);
}
exit(0);
} else if (pid[i] == -1) {
printf("Couldn't spawn #%d process!\n", i);
exit(1);
}
}
for (i = 0; i < tasks; i++) {
int status;
assert(waitpid(pid[i], &status, 0) == pid[i]);
assert(status == 0);
}
err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE);
if (!err) {
printf("Detached an invalid prog type.\n");
goto out_sockmap;
}
err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
if (err) {
printf("Failed parser prog detach\n");
goto out_sockmap;
}
err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
if (err) {
printf("Failed parser prog detach\n");
goto out_sockmap;
}
/* Test map close sockets and empty maps */
for (i = 0; i < 6; i++) {
bpf_map_delete_elem(map_fd_tx, &i);
bpf_map_delete_elem(map_fd_rx, &i);
close(sfd[i]);
}
close(fd);
close(map_fd_rx);
bpf_object__close(obj);
return;
out:
for (i = 0; i < 6; i++)
close(sfd[i]);
printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
exit(1);
out_sockmap:
for (i = 0; i < 6; i++) {
if (map_fd_tx)
bpf_map_delete_elem(map_fd_tx, &i);
if (map_fd_rx)
bpf_map_delete_elem(map_fd_rx, &i);
close(sfd[i]);
}
close(fd);
exit(1);
}
#define MAP_SIZE (32 * 1024)
static void test_map_large(void)
{
struct bigkey {
int a;
char b[116];
long long c;
} key;
int fd, i, value;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE, map_flags);
if (fd < 0) {
printf("Failed to create large map '%s'!\n", strerror(errno));
exit(1);
}
for (i = 0; i < MAP_SIZE; i++) {
key = (struct bigkey) { .c = i };
value = i;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
}
key.c = -1;
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
/* Iterate through all elements. */
assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
key.c = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
key.c = 0;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
key.a = 1;
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
close(fd);
}
#define run_parallel(N, FN, DATA) \
printf("Fork %d tasks to '" #FN "'\n", N); \
__run_parallel(N, FN, DATA)
static void __run_parallel(int tasks, void (*fn)(int task, void *data),
void *data)
{
pid_t pid[tasks];
int i;
for (i = 0; i < tasks; i++) {
pid[i] = fork();
if (pid[i] == 0) {
fn(i, data);
exit(0);
} else if (pid[i] == -1) {
printf("Couldn't spawn #%d process!\n", i);
exit(1);
}
}
for (i = 0; i < tasks; i++) {
int status;
assert(waitpid(pid[i], &status, 0) == pid[i]);
assert(status == 0);
}
}
static void test_map_stress(void)
{
run_parallel(100, test_hashmap, NULL);
run_parallel(100, test_hashmap_percpu, NULL);
bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-22 10:05:04 +08:00
run_parallel(100, test_hashmap_sizes, NULL);
run_parallel(100, test_hashmap_walk, NULL);
run_parallel(100, test_arraymap, NULL);
run_parallel(100, test_arraymap_percpu, NULL);
}
#define TASKS 1024
#define DO_UPDATE 1
#define DO_DELETE 0
static void test_update_delete(int fn, void *data)
{
int do_update = ((int *)data)[1];
int fd = ((int *)data)[0];
int i, key, value;
for (i = fn; i < MAP_SIZE; i += TASKS) {
key = value = i;
if (do_update) {
assert(bpf_map_update_elem(fd, &key, &value,
BPF_NOEXIST) == 0);
assert(bpf_map_update_elem(fd, &key, &value,
BPF_EXIST) == 0);
} else {
assert(bpf_map_delete_elem(fd, &key) == 0);
}
}
}
static void test_map_parallel(void)
{
int i, fd, key = 0, value = 0;
int data[2];
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE, map_flags);
if (fd < 0) {
printf("Failed to create map for parallel test '%s'!\n",
strerror(errno));
exit(1);
}
/* Use the same fd in children to add elements to this map:
* child_0 adds key=0, key=1024, key=2048, ...
* child_1 adds key=1, key=1025, key=2049, ...
* child_1023 adds key=1023, ...
*/
data[0] = fd;
data[1] = DO_UPDATE;
run_parallel(TASKS, test_update_delete, data);
/* Check that key=0 is already there. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == EEXIST);
/* Check that all elements were inserted. */
assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
key = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
/* Another check for all elements */
for (i = 0; i < MAP_SIZE; i++) {
key = MAP_SIZE - i - 1;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 &&
value == key);
}
/* Now let's delete all elemenets in parallel. */
data[1] = DO_DELETE;
run_parallel(TASKS, test_update_delete, data);
/* Nothing should be left. */
key = -1;
assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
}
static void test_map_rdonly(void)
{
int fd, key = 0, value = 0;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE, map_flags | BPF_F_RDONLY);
if (fd < 0) {
printf("Failed to create map for read only test '%s'!\n",
strerror(errno));
exit(1);
}
key = 1;
value = 1234;
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
errno == EPERM);
/* Check that key=2 is not found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
}
static void test_map_wronly(void)
{
int fd, key = 0, value = 0;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE, map_flags | BPF_F_WRONLY);
if (fd < 0) {
printf("Failed to create map for read only test '%s'!\n",
strerror(errno));
exit(1);
}
key = 1;
value = 1234;
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
/* Check that key=2 is not found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
}
static void run_all_tests(void)
{
test_hashmap(0, NULL);
test_hashmap_percpu(0, NULL);
test_hashmap_walk(0, NULL);
test_arraymap(0, NULL);
test_arraymap_percpu(0, NULL);
test_arraymap_percpu_many_keys();
test_devmap(0, NULL);
test_sockmap(0, NULL);
test_map_large();
test_map_parallel();
test_map_stress();
test_map_rdonly();
test_map_wronly();
}
int main(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
setrlimit(RLIMIT_MEMLOCK, &rinf);
map_flags = 0;
run_all_tests();
map_flags = BPF_F_NO_PREALLOC;
run_all_tests();
printf("test_maps: OK\n");
return 0;
}