mirror of https://gitee.com/openkylin/linux.git
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2021-07-15 The following pull-request contains BPF updates for your *net-next* tree. We've added 45 non-merge commits during the last 15 day(s) which contain a total of 52 files changed, 3122 insertions(+), 384 deletions(-). The main changes are: 1) Introduce bpf timers, from Alexei. 2) Add sockmap support for unix datagram socket, from Cong. 3) Fix potential memleak and UAF in the verifier, from He. 4) Add bpf_get_func_ip helper, from Jiri. 5) Improvements to generic XDP mode, from Kumar. 6) Support for passing xdp_md to XDP programs in bpf_prog_run, from Zvi. =================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
82a1ffe57e
|
@ -10387,6 +10387,7 @@ F: net/core/skmsg.c
|
|||
F: net/core/sock_map.c
|
||||
F: net/ipv4/tcp_bpf.c
|
||||
F: net/ipv4/udp_bpf.c
|
||||
F: net/unix/unix_bpf.c
|
||||
|
||||
LANDLOCK SECURITY MODULE
|
||||
M: Mickaël Salaün <mic@digikod.net>
|
||||
|
|
|
@ -1954,6 +1954,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
|||
if (flags & BPF_TRAMP_F_CALL_ORIG)
|
||||
stack_size += 8; /* room for return value of orig_call */
|
||||
|
||||
if (flags & BPF_TRAMP_F_IP_ARG)
|
||||
stack_size += 8; /* room for IP address argument */
|
||||
|
||||
if (flags & BPF_TRAMP_F_SKIP_FRAME)
|
||||
/* skip patched call instruction and point orig_call to actual
|
||||
* body of the kernel function.
|
||||
|
@ -1967,6 +1970,22 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
|||
EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
|
||||
EMIT1(0x53); /* push rbx */
|
||||
|
||||
if (flags & BPF_TRAMP_F_IP_ARG) {
|
||||
/* Store IP address of the traced function:
|
||||
* mov rax, QWORD PTR [rbp + 8]
|
||||
* sub rax, X86_PATCH_SIZE
|
||||
* mov QWORD PTR [rbp - stack_size], rax
|
||||
*/
|
||||
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
|
||||
EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE);
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size);
|
||||
|
||||
/* Continue with stack_size for regs storage, stack will
|
||||
* be correctly restored with 'leave' instruction.
|
||||
*/
|
||||
stack_size -= 8;
|
||||
}
|
||||
|
||||
save_regs(m, &prog, nr_args, stack_size);
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <asm/types.h>
|
||||
#include <linux/bits.h>
|
||||
#include <linux/typecheck.h>
|
||||
|
||||
#include <uapi/linux/kernel.h>
|
||||
|
||||
|
@ -253,6 +254,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
|
|||
__clear_bit(nr, addr);
|
||||
}
|
||||
|
||||
/**
|
||||
* __ptr_set_bit - Set bit in a pointer's value
|
||||
* @nr: the bit to set
|
||||
* @addr: the address of the pointer variable
|
||||
*
|
||||
* Example:
|
||||
* void *p = foo();
|
||||
* __ptr_set_bit(bit, &p);
|
||||
*/
|
||||
#define __ptr_set_bit(nr, addr) \
|
||||
({ \
|
||||
typecheck_pointer(*(addr)); \
|
||||
__set_bit(nr, (unsigned long *)(addr)); \
|
||||
})
|
||||
|
||||
/**
|
||||
* __ptr_clear_bit - Clear bit in a pointer's value
|
||||
* @nr: the bit to clear
|
||||
* @addr: the address of the pointer variable
|
||||
*
|
||||
* Example:
|
||||
* void *p = foo();
|
||||
* __ptr_clear_bit(bit, &p);
|
||||
*/
|
||||
#define __ptr_clear_bit(nr, addr) \
|
||||
({ \
|
||||
typecheck_pointer(*(addr)); \
|
||||
__clear_bit(nr, (unsigned long *)(addr)); \
|
||||
})
|
||||
|
||||
/**
|
||||
* __ptr_test_bit - Test bit in a pointer's value
|
||||
* @nr: the bit to test
|
||||
* @addr: the address of the pointer variable
|
||||
*
|
||||
* Example:
|
||||
* void *p = foo();
|
||||
* if (__ptr_test_bit(bit, &p)) {
|
||||
* ...
|
||||
* } else {
|
||||
* ...
|
||||
* }
|
||||
*/
|
||||
#define __ptr_test_bit(nr, addr) \
|
||||
({ \
|
||||
typecheck_pointer(*(addr)); \
|
||||
test_bit(nr, (unsigned long *)(addr)); \
|
||||
})
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#ifndef set_mask_bits
|
||||
|
|
|
@ -168,6 +168,7 @@ struct bpf_map {
|
|||
u32 max_entries;
|
||||
u32 map_flags;
|
||||
int spin_lock_off; /* >=0 valid offset, <0 error */
|
||||
int timer_off; /* >=0 valid offset, <0 error */
|
||||
u32 id;
|
||||
int numa_node;
|
||||
u32 btf_key_type_id;
|
||||
|
@ -197,30 +198,53 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map)
|
|||
return map->spin_lock_off >= 0;
|
||||
}
|
||||
|
||||
static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
|
||||
static inline bool map_value_has_timer(const struct bpf_map *map)
|
||||
{
|
||||
if (likely(!map_value_has_spin_lock(map)))
|
||||
return;
|
||||
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
|
||||
(struct bpf_spin_lock){};
|
||||
return map->timer_off >= 0;
|
||||
}
|
||||
|
||||
/* copy everything but bpf_spin_lock */
|
||||
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
|
||||
{
|
||||
if (unlikely(map_value_has_spin_lock(map)))
|
||||
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
|
||||
(struct bpf_spin_lock){};
|
||||
if (unlikely(map_value_has_timer(map)))
|
||||
*(struct bpf_timer *)(dst + map->timer_off) =
|
||||
(struct bpf_timer){};
|
||||
}
|
||||
|
||||
/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
|
||||
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
|
||||
{
|
||||
if (unlikely(map_value_has_spin_lock(map))) {
|
||||
u32 off = map->spin_lock_off;
|
||||
u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
|
||||
|
||||
memcpy(dst, src, off);
|
||||
memcpy(dst + off + sizeof(struct bpf_spin_lock),
|
||||
src + off + sizeof(struct bpf_spin_lock),
|
||||
map->value_size - off - sizeof(struct bpf_spin_lock));
|
||||
if (unlikely(map_value_has_spin_lock(map))) {
|
||||
s_off = map->spin_lock_off;
|
||||
s_sz = sizeof(struct bpf_spin_lock);
|
||||
} else if (unlikely(map_value_has_timer(map))) {
|
||||
t_off = map->timer_off;
|
||||
t_sz = sizeof(struct bpf_timer);
|
||||
}
|
||||
|
||||
if (unlikely(s_sz || t_sz)) {
|
||||
if (s_off < t_off || !s_sz) {
|
||||
swap(s_off, t_off);
|
||||
swap(s_sz, t_sz);
|
||||
}
|
||||
memcpy(dst, src, t_off);
|
||||
memcpy(dst + t_off + t_sz,
|
||||
src + t_off + t_sz,
|
||||
s_off - t_off - t_sz);
|
||||
memcpy(dst + s_off + s_sz,
|
||||
src + s_off + s_sz,
|
||||
map->value_size - s_off - s_sz);
|
||||
} else {
|
||||
memcpy(dst, src, map->value_size);
|
||||
}
|
||||
}
|
||||
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
|
||||
bool lock_src);
|
||||
void bpf_timer_cancel_and_free(void *timer);
|
||||
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
|
||||
|
||||
struct bpf_offload_dev;
|
||||
|
@ -314,6 +338,7 @@ enum bpf_arg_type {
|
|||
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
|
||||
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
|
||||
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
|
||||
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
|
||||
__BPF_ARG_TYPE_MAX,
|
||||
};
|
||||
|
||||
|
@ -554,6 +579,11 @@ struct btf_func_model {
|
|||
*/
|
||||
#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
|
||||
|
||||
/* Store IP address of the caller on the trampoline stack,
|
||||
* so it's available for trampoline's programs.
|
||||
*/
|
||||
#define BPF_TRAMP_F_IP_ARG BIT(3)
|
||||
|
||||
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
|
||||
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
|
||||
*/
|
||||
|
@ -1509,12 +1539,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
|
|||
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
|
||||
struct bpf_prog *xdp_prog, struct bpf_map *map,
|
||||
bool exclude_ingress);
|
||||
bool dev_map_can_have_prog(struct bpf_map *map);
|
||||
|
||||
void __cpu_map_flush(void);
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx);
|
||||
bool cpu_map_prog_allowed(struct bpf_map *map);
|
||||
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
|
||||
struct sk_buff *skb);
|
||||
|
||||
/* Return map's numa specified by userspace */
|
||||
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
|
||||
|
@ -1711,6 +1741,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline bool cpu_map_prog_allowed(struct bpf_map *map)
|
||||
{
|
||||
return false;
|
||||
|
@ -1852,6 +1888,12 @@ void bpf_map_offload_map_free(struct bpf_map *map);
|
|||
int bpf_prog_test_run_syscall(struct bpf_prog *prog,
|
||||
const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr);
|
||||
|
||||
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
|
||||
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
|
||||
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
|
||||
void sock_map_unhash(struct sock *sk);
|
||||
void sock_map_close(struct sock *sk, long timeout);
|
||||
#else
|
||||
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
|
||||
union bpf_attr *attr)
|
||||
|
@ -1884,24 +1926,6 @@ static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
|
|||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
|
||||
|
||||
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
|
||||
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
|
||||
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
|
||||
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
|
||||
void sock_map_unhash(struct sock *sk);
|
||||
void sock_map_close(struct sock *sk, long timeout);
|
||||
|
||||
void bpf_sk_reuseport_detach(struct sock *sk);
|
||||
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
|
||||
void *value);
|
||||
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 map_flags);
|
||||
#else
|
||||
static inline void bpf_sk_reuseport_detach(struct sock *sk)
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
|
||||
|
@ -1921,7 +1945,21 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
|
|||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
|
||||
|
||||
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
|
||||
void bpf_sk_reuseport_detach(struct sock *sk);
|
||||
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
|
||||
void *value);
|
||||
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 map_flags);
|
||||
#else
|
||||
static inline void bpf_sk_reuseport_detach(struct sock *sk)
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
|
||||
void *key, void *value)
|
||||
{
|
||||
|
|
|
@ -53,7 +53,14 @@ struct bpf_reg_state {
|
|||
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
|
||||
* PTR_TO_MAP_VALUE_OR_NULL
|
||||
*/
|
||||
struct bpf_map *map_ptr;
|
||||
struct {
|
||||
struct bpf_map *map_ptr;
|
||||
/* To distinguish map lookups from outer map
|
||||
* the map_uid is non-zero for registers
|
||||
* pointing to inner maps.
|
||||
*/
|
||||
u32 map_uid;
|
||||
};
|
||||
|
||||
/* for PTR_TO_BTF_ID */
|
||||
struct {
|
||||
|
@ -201,12 +208,19 @@ struct bpf_func_state {
|
|||
* zero == main subprog
|
||||
*/
|
||||
u32 subprogno;
|
||||
/* Every bpf_timer_start will increment async_entry_cnt.
|
||||
* It's used to distinguish:
|
||||
* void foo(void) { for(;;); }
|
||||
* void foo(void) { bpf_timer_set_callback(,foo); }
|
||||
*/
|
||||
u32 async_entry_cnt;
|
||||
bool in_callback_fn;
|
||||
bool in_async_callback_fn;
|
||||
|
||||
/* The following fields should be last. See copy_func_state() */
|
||||
int acquired_refs;
|
||||
struct bpf_reference_state *refs;
|
||||
int allocated_stack;
|
||||
bool in_callback_fn;
|
||||
struct bpf_stack_state *stack;
|
||||
};
|
||||
|
||||
|
@ -392,6 +406,7 @@ struct bpf_subprog_info {
|
|||
bool has_tail_call;
|
||||
bool tail_call_reachable;
|
||||
bool has_ld_abs;
|
||||
bool is_async_cb;
|
||||
};
|
||||
|
||||
/* single container for all structs
|
||||
|
|
|
@ -99,6 +99,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
|
|||
const struct btf_member *m,
|
||||
u32 expected_offset, u32 expected_size);
|
||||
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
|
||||
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
|
||||
bool btf_type_is_void(const struct btf_type *t);
|
||||
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
|
||||
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
|
||||
|
|
|
@ -559,7 +559,8 @@ struct bpf_prog {
|
|||
kprobe_override:1, /* Do we override a kprobe? */
|
||||
has_callchain_buf:1, /* callchain buffer allocated? */
|
||||
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
|
||||
call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */
|
||||
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
|
||||
call_get_func_ip:1; /* Do we call get_func_ip() */
|
||||
enum bpf_prog_type type; /* Type of BPF program */
|
||||
enum bpf_attach_type expected_attach_type; /* For some prog types */
|
||||
u32 len; /* Number of filter blocks */
|
||||
|
|
|
@ -3984,6 +3984,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
|
|||
__dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
|
||||
}
|
||||
|
||||
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog);
|
||||
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
|
||||
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
|
||||
int netif_rx(struct sk_buff *skb);
|
||||
|
|
|
@ -863,8 +863,8 @@ struct sk_buff {
|
|||
__u8 tc_skip_classify:1;
|
||||
__u8 tc_at_ingress:1;
|
||||
#endif
|
||||
#ifdef CONFIG_NET_REDIRECT
|
||||
__u8 redirected:1;
|
||||
#ifdef CONFIG_NET_REDIRECT
|
||||
__u8 from_ingress:1;
|
||||
#endif
|
||||
#ifdef CONFIG_TLS_DEVICE
|
||||
|
@ -4664,17 +4664,13 @@ static inline __wsum lco_csum(struct sk_buff *skb)
|
|||
|
||||
static inline bool skb_is_redirected(const struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_NET_REDIRECT
|
||||
return skb->redirected;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
|
||||
{
|
||||
#ifdef CONFIG_NET_REDIRECT
|
||||
skb->redirected = 1;
|
||||
#ifdef CONFIG_NET_REDIRECT
|
||||
skb->from_ingress = from_ingress;
|
||||
if (skb->from_ingress)
|
||||
skb->tstamp = 0;
|
||||
|
@ -4683,9 +4679,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
|
|||
|
||||
static inline void skb_reset_redirect(struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_NET_REDIRECT
|
||||
skb->redirected = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool skb_csum_is_sctp(struct sk_buff *skb)
|
||||
|
|
|
@ -22,4 +22,13 @@
|
|||
(void)__tmp; \
|
||||
})
|
||||
|
||||
/*
|
||||
* Check at compile time that something is a pointer type.
|
||||
*/
|
||||
#define typecheck_pointer(x) \
|
||||
({ typeof(x) __dummy; \
|
||||
(void)sizeof(*__dummy); \
|
||||
1; \
|
||||
})
|
||||
|
||||
#endif /* TYPECHECK_H_INCLUDED */
|
||||
|
|
|
@ -82,6 +82,8 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
|
|||
long unix_inq_len(struct sock *sk);
|
||||
long unix_outq_len(struct sock *sk);
|
||||
|
||||
int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
|
||||
int flags);
|
||||
#ifdef CONFIG_SYSCTL
|
||||
int unix_sysctl_register(struct net *net);
|
||||
void unix_sysctl_unregister(struct net *net);
|
||||
|
@ -89,4 +91,14 @@ void unix_sysctl_unregister(struct net *net);
|
|||
static inline int unix_sysctl_register(struct net *net) { return 0; }
|
||||
static inline void unix_sysctl_unregister(struct net *net) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
extern struct proto unix_proto;
|
||||
|
||||
int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
|
||||
void __init unix_bpf_build_proto(void);
|
||||
#else
|
||||
static inline void __init unix_bpf_build_proto(void)
|
||||
{}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -276,6 +276,11 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
|
|||
return unlikely(xdp->data_meta > xdp->data);
|
||||
}
|
||||
|
||||
static inline bool xdp_metalen_invalid(unsigned long metalen)
|
||||
{
|
||||
return (metalen & (sizeof(__u32) - 1)) || (metalen > 32);
|
||||
}
|
||||
|
||||
struct xdp_attachment_info {
|
||||
struct bpf_prog *prog;
|
||||
u32 flags;
|
||||
|
|
|
@ -324,9 +324,6 @@ union bpf_iter_link_info {
|
|||
* **BPF_PROG_TYPE_SK_LOOKUP**
|
||||
* *data_in* and *data_out* must be NULL.
|
||||
*
|
||||
* **BPF_PROG_TYPE_XDP**
|
||||
* *ctx_in* and *ctx_out* must be NULL.
|
||||
*
|
||||
* **BPF_PROG_TYPE_RAW_TRACEPOINT**,
|
||||
* **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
|
||||
*
|
||||
|
@ -3249,7 +3246,7 @@ union bpf_attr {
|
|||
* long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
|
||||
* Description
|
||||
* Select a **SO_REUSEPORT** socket from a
|
||||
* **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
|
||||
* **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
|
||||
* It checks the selected socket is matching the incoming
|
||||
* request in the socket buffer.
|
||||
* Return
|
||||
|
@ -4780,6 +4777,76 @@ union bpf_attr {
|
|||
* Execute close syscall for given FD.
|
||||
* Return
|
||||
* A syscall result.
|
||||
*
|
||||
* long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
|
||||
* Description
|
||||
* Initialize the timer.
|
||||
* First 4 bits of *flags* specify clockid.
|
||||
* Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
|
||||
* All other bits of *flags* are reserved.
|
||||
* The verifier will reject the program if *timer* is not from
|
||||
* the same *map*.
|
||||
* Return
|
||||
* 0 on success.
|
||||
* **-EBUSY** if *timer* is already initialized.
|
||||
* **-EINVAL** if invalid *flags* are passed.
|
||||
* **-EPERM** if *timer* is in a map that doesn't have any user references.
|
||||
* The user space should either hold a file descriptor to a map with timers
|
||||
* or pin such map in bpffs. When map is unpinned or file descriptor is
|
||||
* closed all timers in the map will be cancelled and freed.
|
||||
*
|
||||
* long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
|
||||
* Description
|
||||
* Configure the timer to call *callback_fn* static function.
|
||||
* Return
|
||||
* 0 on success.
|
||||
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
|
||||
* **-EPERM** if *timer* is in a map that doesn't have any user references.
|
||||
* The user space should either hold a file descriptor to a map with timers
|
||||
* or pin such map in bpffs. When map is unpinned or file descriptor is
|
||||
* closed all timers in the map will be cancelled and freed.
|
||||
*
|
||||
* long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
|
||||
* Description
|
||||
* Set timer expiration N nanoseconds from the current time. The
|
||||
* configured callback will be invoked in soft irq context on some cpu
|
||||
* and will not repeat unless another bpf_timer_start() is made.
|
||||
* In such case the next invocation can migrate to a different cpu.
|
||||
* Since struct bpf_timer is a field inside map element the map
|
||||
* owns the timer. The bpf_timer_set_callback() will increment refcnt
|
||||
* of BPF program to make sure that callback_fn code stays valid.
|
||||
* When user space reference to a map reaches zero all timers
|
||||
* in a map are cancelled and corresponding program's refcnts are
|
||||
* decremented. This is done to make sure that Ctrl-C of a user
|
||||
* process doesn't leave any timers running. If map is pinned in
|
||||
* bpffs the callback_fn can re-arm itself indefinitely.
|
||||
* bpf_map_update/delete_elem() helpers and user space sys_bpf commands
|
||||
* cancel and free the timer in the given map element.
|
||||
* The map can contain timers that invoke callback_fn-s from different
|
||||
* programs. The same callback_fn can serve different timers from
|
||||
* different maps if key/value layout matches across maps.
|
||||
* Every bpf_timer_set_callback() can have different callback_fn.
|
||||
*
|
||||
* Return
|
||||
* 0 on success.
|
||||
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
|
||||
* or invalid *flags* are passed.
|
||||
*
|
||||
* long bpf_timer_cancel(struct bpf_timer *timer)
|
||||
* Description
|
||||
* Cancel the timer and wait for callback_fn to finish if it was running.
|
||||
* Return
|
||||
* 0 if the timer was not active.
|
||||
* 1 if the timer was active.
|
||||
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
|
||||
* **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
|
||||
* own timer which would have led to a deadlock otherwise.
|
||||
*
|
||||
* u64 bpf_get_func_ip(void *ctx)
|
||||
* Description
|
||||
* Get address of the traced function (for tracing and kprobe programs).
|
||||
* Return
|
||||
* Address of the traced function.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
|
@ -4951,6 +5018,11 @@ union bpf_attr {
|
|||
FN(sys_bpf), \
|
||||
FN(btf_find_by_name_kind), \
|
||||
FN(sys_close), \
|
||||
FN(timer_init), \
|
||||
FN(timer_set_callback), \
|
||||
FN(timer_start), \
|
||||
FN(timer_cancel), \
|
||||
FN(get_func_ip), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
|
@ -6077,6 +6149,11 @@ struct bpf_spin_lock {
|
|||
__u32 val;
|
||||
};
|
||||
|
||||
struct bpf_timer {
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_sysctl {
|
||||
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
|
||||
* Allows 1,2,4-byte read, but no write.
|
||||
|
|
|
@ -29,7 +29,7 @@ config BPF_SYSCALL
|
|||
select IRQ_WORK
|
||||
select TASKS_TRACE_RCU
|
||||
select BINARY_PRINTF
|
||||
select NET_SOCK_MSG if INET
|
||||
select NET_SOCK_MSG if NET
|
||||
default n
|
||||
help
|
||||
Enable the bpf() system call that allows to manipulate BPF programs
|
||||
|
|
|
@ -287,6 +287,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void check_and_free_timer_in_array(struct bpf_array *arr, void *val)
|
||||
{
|
||||
if (unlikely(map_value_has_timer(&arr->map)))
|
||||
bpf_timer_cancel_and_free(val + arr->map.timer_off);
|
||||
}
|
||||
|
||||
/* Called from syscall or from eBPF program */
|
||||
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
u64 map_flags)
|
||||
|
@ -321,6 +327,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||
copy_map_value_locked(map, val, value, false);
|
||||
else
|
||||
copy_map_value(map, val, value);
|
||||
check_and_free_timer_in_array(array, val);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -374,6 +381,19 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
|
|||
return (void *)round_down((unsigned long)array, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static void array_map_free_timers(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
int i;
|
||||
|
||||
if (likely(!map_value_has_timer(map)))
|
||||
return;
|
||||
|
||||
for (i = 0; i < array->map.max_entries; i++)
|
||||
bpf_timer_cancel_and_free(array->value + array->elem_size * i +
|
||||
map->timer_off);
|
||||
}
|
||||
|
||||
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
|
||||
static void array_map_free(struct bpf_map *map)
|
||||
{
|
||||
|
@ -668,6 +688,7 @@ const struct bpf_map_ops array_map_ops = {
|
|||
.map_alloc = array_map_alloc,
|
||||
.map_free = array_map_free,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_release_uref = array_map_free_timers,
|
||||
.map_lookup_elem = array_map_lookup_elem,
|
||||
.map_update_elem = array_map_update_elem,
|
||||
.map_delete_elem = array_map_delete_elem,
|
||||
|
|
|
@ -3046,43 +3046,92 @@ static void btf_struct_log(struct btf_verifier_env *env,
|
|||
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
|
||||
}
|
||||
|
||||
/* find 'struct bpf_spin_lock' in map value.
|
||||
* return >= 0 offset if found
|
||||
* and < 0 in case of error
|
||||
*/
|
||||
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
|
||||
static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
|
||||
const char *name, int sz, int align)
|
||||
{
|
||||
const struct btf_member *member;
|
||||
u32 i, off = -ENOENT;
|
||||
|
||||
if (!__btf_type_is_struct(t))
|
||||
return -EINVAL;
|
||||
|
||||
for_each_member(i, t, member) {
|
||||
const struct btf_type *member_type = btf_type_by_id(btf,
|
||||
member->type);
|
||||
if (!__btf_type_is_struct(member_type))
|
||||
continue;
|
||||
if (member_type->size != sizeof(struct bpf_spin_lock))
|
||||
if (member_type->size != sz)
|
||||
continue;
|
||||
if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
|
||||
"bpf_spin_lock"))
|
||||
if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
|
||||
continue;
|
||||
if (off != -ENOENT)
|
||||
/* only one 'struct bpf_spin_lock' is allowed */
|
||||
/* only one such field is allowed */
|
||||
return -E2BIG;
|
||||
off = btf_member_bit_offset(t, member);
|
||||
if (off % 8)
|
||||
/* valid C code cannot generate such BTF */
|
||||
return -EINVAL;
|
||||
off /= 8;
|
||||
if (off % __alignof__(struct bpf_spin_lock))
|
||||
/* valid struct bpf_spin_lock will be 4 byte aligned */
|
||||
if (off % align)
|
||||
return -EINVAL;
|
||||
}
|
||||
return off;
|
||||
}
|
||||
|
||||
static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
|
||||
const char *name, int sz, int align)
|
||||
{
|
||||
const struct btf_var_secinfo *vsi;
|
||||
u32 i, off = -ENOENT;
|
||||
|
||||
for_each_vsi(i, t, vsi) {
|
||||
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
|
||||
const struct btf_type *var_type = btf_type_by_id(btf, var->type);
|
||||
|
||||
if (!__btf_type_is_struct(var_type))
|
||||
continue;
|
||||
if (var_type->size != sz)
|
||||
continue;
|
||||
if (vsi->size != sz)
|
||||
continue;
|
||||
if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
|
||||
continue;
|
||||
if (off != -ENOENT)
|
||||
/* only one such field is allowed */
|
||||
return -E2BIG;
|
||||
off = vsi->offset;
|
||||
if (off % align)
|
||||
return -EINVAL;
|
||||
}
|
||||
return off;
|
||||
}
|
||||
|
||||
static int btf_find_field(const struct btf *btf, const struct btf_type *t,
|
||||
const char *name, int sz, int align)
|
||||
{
|
||||
|
||||
if (__btf_type_is_struct(t))
|
||||
return btf_find_struct_field(btf, t, name, sz, align);
|
||||
else if (btf_type_is_datasec(t))
|
||||
return btf_find_datasec_var(btf, t, name, sz, align);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* find 'struct bpf_spin_lock' in map value.
|
||||
* return >= 0 offset if found
|
||||
* and < 0 in case of error
|
||||
*/
|
||||
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
|
||||
{
|
||||
return btf_find_field(btf, t, "bpf_spin_lock",
|
||||
sizeof(struct bpf_spin_lock),
|
||||
__alignof__(struct bpf_spin_lock));
|
||||
}
|
||||
|
||||
int btf_find_timer(const struct btf *btf, const struct btf_type *t)
|
||||
{
|
||||
return btf_find_field(btf, t, "bpf_timer",
|
||||
sizeof(struct bpf_timer),
|
||||
__alignof__(struct bpf_timer));
|
||||
}
|
||||
|
||||
static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
|
||||
u32 type_id, void *data, u8 bits_offset,
|
||||
struct btf_show *show)
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
* netstack, and assigning dedicated CPUs for this stage. This
|
||||
* basically allows for 10G wirespeed pre-filtering via bpf.
|
||||
*/
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/ptr_ring.h>
|
||||
|
@ -168,6 +169,46 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
|
|||
}
|
||||
}
|
||||
|
||||
static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
|
||||
struct list_head *listp,
|
||||
struct xdp_cpumap_stats *stats)
|
||||
{
|
||||
struct sk_buff *skb, *tmp;
|
||||
struct xdp_buff xdp;
|
||||
u32 act;
|
||||
int err;
|
||||
|
||||
list_for_each_entry_safe(skb, tmp, listp, list) {
|
||||
act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog);
|
||||
switch (act) {
|
||||
case XDP_PASS:
|
||||
break;
|
||||
case XDP_REDIRECT:
|
||||
skb_list_del_init(skb);
|
||||
err = xdp_do_generic_redirect(skb->dev, skb, &xdp,
|
||||
rcpu->prog);
|
||||
if (unlikely(err)) {
|
||||
kfree_skb(skb);
|
||||
stats->drop++;
|
||||
} else {
|
||||
stats->redirect++;
|
||||
}
|
||||
return;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(skb->dev, rcpu->prog, act);
|
||||
fallthrough;
|
||||
case XDP_DROP:
|
||||
skb_list_del_init(skb);
|
||||
kfree_skb(skb);
|
||||
stats->drop++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
|
||||
void **frames, int n,
|
||||
struct xdp_cpumap_stats *stats)
|
||||
|
@ -176,11 +217,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
|
|||
struct xdp_buff xdp;
|
||||
int i, nframes = 0;
|
||||
|
||||
if (!rcpu->prog)
|
||||
return n;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
|
||||
xdp_set_return_frame_no_direct();
|
||||
xdp.rxq = &rxq;
|
||||
|
||||
|
@ -227,17 +263,37 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
|
|||
}
|
||||
}
|
||||
|
||||
if (stats->redirect)
|
||||
xdp_do_flush_map();
|
||||
|
||||
xdp_clear_return_frame_no_direct();
|
||||
|
||||
return nframes;
|
||||
}
|
||||
|
||||
#define CPUMAP_BATCH 8
|
||||
|
||||
static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
|
||||
int xdp_n, struct xdp_cpumap_stats *stats,
|
||||
struct list_head *list)
|
||||
{
|
||||
int nframes;
|
||||
|
||||
if (!rcpu->prog)
|
||||
return xdp_n;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
|
||||
nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats);
|
||||
|
||||
if (stats->redirect)
|
||||
xdp_do_flush();
|
||||
|
||||
if (unlikely(!list_empty(list)))
|
||||
cpu_map_bpf_prog_run_skb(rcpu, list, stats);
|
||||
|
||||
rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
|
||||
|
||||
return nframes;
|
||||
}
|
||||
|
||||
#define CPUMAP_BATCH 8
|
||||
|
||||
static int cpu_map_kthread_run(void *data)
|
||||
{
|
||||
|
@ -254,9 +310,9 @@ static int cpu_map_kthread_run(void *data)
|
|||
struct xdp_cpumap_stats stats = {}; /* zero stats */
|
||||
unsigned int kmem_alloc_drops = 0, sched = 0;
|
||||
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
|
||||
int i, n, m, nframes, xdp_n;
|
||||
void *frames[CPUMAP_BATCH];
|
||||
void *skbs[CPUMAP_BATCH];
|
||||
int i, n, m, nframes;
|
||||
LIST_HEAD(list);
|
||||
|
||||
/* Release CPU reschedule checks */
|
||||
|
@ -280,9 +336,20 @@ static int cpu_map_kthread_run(void *data)
|
|||
*/
|
||||
n = __ptr_ring_consume_batched(rcpu->queue, frames,
|
||||
CPUMAP_BATCH);
|
||||
for (i = 0; i < n; i++) {
|
||||
for (i = 0, xdp_n = 0; i < n; i++) {
|
||||
void *f = frames[i];
|
||||
struct page *page = virt_to_page(f);
|
||||
struct page *page;
|
||||
|
||||
if (unlikely(__ptr_test_bit(0, &f))) {
|
||||
struct sk_buff *skb = f;
|
||||
|
||||
__ptr_clear_bit(0, &skb);
|
||||
list_add_tail(&skb->list, &list);
|
||||
continue;
|
||||
}
|
||||
|
||||
frames[xdp_n++] = f;
|
||||
page = virt_to_page(f);
|
||||
|
||||
/* Bring struct page memory area to curr CPU. Read by
|
||||
* build_skb_around via page_is_pfmemalloc(), and when
|
||||
|
@ -292,7 +359,7 @@ static int cpu_map_kthread_run(void *data)
|
|||
}
|
||||
|
||||
/* Support running another XDP prog on this CPU */
|
||||
nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
|
||||
nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
|
||||
if (nframes) {
|
||||
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
|
||||
if (unlikely(m == 0)) {
|
||||
|
@ -330,12 +397,6 @@ static int cpu_map_kthread_run(void *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool cpu_map_prog_allowed(struct bpf_map *map)
|
||||
{
|
||||
return map->map_type == BPF_MAP_TYPE_CPUMAP &&
|
||||
map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
|
||||
}
|
||||
|
||||
static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
|
@ -701,6 +762,25 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
__skb_pull(skb, skb->mac_len);
|
||||
skb_set_redirected(skb, false);
|
||||
__ptr_set_bit(0, &skb);
|
||||
|
||||
ret = ptr_ring_produce(rcpu->queue, skb);
|
||||
if (ret < 0)
|
||||
goto trace;
|
||||
|
||||
wake_up_process(rcpu->kthread);
|
||||
trace:
|
||||
trace_xdp_cpumap_enqueue(rcpu->map_id, !ret, !!ret, rcpu->cpu);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __cpu_map_flush(void)
|
||||
{
|
||||
struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
|
||||
|
|
|
@ -322,16 +322,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
|
|||
return -ENOENT;
|
||||
}
|
||||
|
||||
bool dev_map_can_have_prog(struct bpf_map *map)
|
||||
{
|
||||
if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
|
||||
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
|
||||
map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
|
||||
struct xdp_frame **frames, int n,
|
||||
struct net_device *dev)
|
||||
|
@ -499,6 +489,37 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst)
|
||||
{
|
||||
struct xdp_txq_info txq = { .dev = dst->dev };
|
||||
struct xdp_buff xdp;
|
||||
u32 act;
|
||||
|
||||
if (!dst->xdp_prog)
|
||||
return XDP_PASS;
|
||||
|
||||
__skb_pull(skb, skb->mac_len);
|
||||
xdp.txq = &txq;
|
||||
|
||||
act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog);
|
||||
switch (act) {
|
||||
case XDP_PASS:
|
||||
__skb_push(skb, skb->mac_len);
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(dst->dev, dst->xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_DROP:
|
||||
kfree_skb(skb);
|
||||
break;
|
||||
}
|
||||
|
||||
return act;
|
||||
}
|
||||
|
||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
|
@ -615,6 +636,14 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
|
|||
err = xdp_ok_fwd_dev(dst->dev, skb->len);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
/* Redirect has already succeeded semantically at this point, so we just
|
||||
* return 0 even if packet is dropped. Helper below takes care of
|
||||
* freeing skb.
|
||||
*/
|
||||
if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS)
|
||||
return 0;
|
||||
|
||||
skb->dev = dst->dev;
|
||||
generic_xdp_tx(skb, xdp_prog);
|
||||
|
||||
|
|
|
@ -228,6 +228,32 @@ static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
|
|||
return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
|
||||
}
|
||||
|
||||
static bool htab_has_extra_elems(struct bpf_htab *htab)
|
||||
{
|
||||
return !htab_is_percpu(htab) && !htab_is_lru(htab);
|
||||
}
|
||||
|
||||
static void htab_free_prealloced_timers(struct bpf_htab *htab)
|
||||
{
|
||||
u32 num_entries = htab->map.max_entries;
|
||||
int i;
|
||||
|
||||
if (likely(!map_value_has_timer(&htab->map)))
|
||||
return;
|
||||
if (htab_has_extra_elems(htab))
|
||||
num_entries += num_possible_cpus();
|
||||
|
||||
for (i = 0; i < num_entries; i++) {
|
||||
struct htab_elem *elem;
|
||||
|
||||
elem = get_htab_elem(htab, i);
|
||||
bpf_timer_cancel_and_free(elem->key +
|
||||
round_up(htab->map.key_size, 8) +
|
||||
htab->map.timer_off);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
static void htab_free_elems(struct bpf_htab *htab)
|
||||
{
|
||||
int i;
|
||||
|
@ -265,8 +291,12 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
|
|||
struct htab_elem *l;
|
||||
|
||||
if (node) {
|
||||
u32 key_size = htab->map.key_size;
|
||||
|
||||
l = container_of(node, struct htab_elem, lru_node);
|
||||
memcpy(l->key, key, htab->map.key_size);
|
||||
memcpy(l->key, key, key_size);
|
||||
check_and_init_map_value(&htab->map,
|
||||
l->key + round_up(key_size, 8));
|
||||
return l;
|
||||
}
|
||||
|
||||
|
@ -278,7 +308,7 @@ static int prealloc_init(struct bpf_htab *htab)
|
|||
u32 num_entries = htab->map.max_entries;
|
||||
int err = -ENOMEM, i;
|
||||
|
||||
if (!htab_is_percpu(htab) && !htab_is_lru(htab))
|
||||
if (htab_has_extra_elems(htab))
|
||||
num_entries += num_possible_cpus();
|
||||
|
||||
htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries,
|
||||
|
@ -695,6 +725,14 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
|
|||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem)
|
||||
{
|
||||
if (unlikely(map_value_has_timer(&htab->map)))
|
||||
bpf_timer_cancel_and_free(elem->key +
|
||||
round_up(htab->map.key_size, 8) +
|
||||
htab->map.timer_off);
|
||||
}
|
||||
|
||||
/* It is called from the bpf_lru_list when the LRU needs to delete
|
||||
* older elements from the htab.
|
||||
*/
|
||||
|
@ -719,6 +757,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
|
|||
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
|
||||
if (l == tgt_l) {
|
||||
hlist_nulls_del_rcu(&l->hash_node);
|
||||
check_and_free_timer(htab, l);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -790,6 +829,7 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
|
|||
{
|
||||
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
|
||||
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
|
||||
check_and_free_timer(htab, l);
|
||||
kfree(l);
|
||||
}
|
||||
|
||||
|
@ -817,6 +857,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
|||
htab_put_fd_value(htab, l);
|
||||
|
||||
if (htab_is_prealloc(htab)) {
|
||||
check_and_free_timer(htab, l);
|
||||
__pcpu_freelist_push(&htab->freelist, &l->fnode);
|
||||
} else {
|
||||
atomic_dec(&htab->count);
|
||||
|
@ -920,8 +961,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
|||
l_new = ERR_PTR(-ENOMEM);
|
||||
goto dec_count;
|
||||
}
|
||||
check_and_init_map_lock(&htab->map,
|
||||
l_new->key + round_up(key_size, 8));
|
||||
check_and_init_map_value(&htab->map,
|
||||
l_new->key + round_up(key_size, 8));
|
||||
}
|
||||
|
||||
memcpy(l_new->key, key, key_size);
|
||||
|
@ -1062,6 +1103,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||
hlist_nulls_del_rcu(&l_old->hash_node);
|
||||
if (!htab_is_prealloc(htab))
|
||||
free_htab_elem(htab, l_old);
|
||||
else
|
||||
check_and_free_timer(htab, l_old);
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
|
@ -1069,6 +1112,12 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
|
||||
{
|
||||
check_and_free_timer(htab, elem);
|
||||
bpf_lru_push_free(&htab->lru, &elem->lru_node);
|
||||
}
|
||||
|
||||
static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
u64 map_flags)
|
||||
{
|
||||
|
@ -1102,7 +1151,8 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||
l_new = prealloc_lru_pop(htab, key, hash);
|
||||
if (!l_new)
|
||||
return -ENOMEM;
|
||||
memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
|
||||
copy_map_value(&htab->map,
|
||||
l_new->key + round_up(map->key_size, 8), value);
|
||||
|
||||
ret = htab_lock_bucket(htab, b, hash, &flags);
|
||||
if (ret)
|
||||
|
@ -1128,9 +1178,9 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
|
||||
if (ret)
|
||||
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
|
||||
htab_lru_push_free(htab, l_new);
|
||||
else if (l_old)
|
||||
bpf_lru_push_free(&htab->lru, &l_old->lru_node);
|
||||
htab_lru_push_free(htab, l_old);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1339,7 +1389,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
|
|||
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
if (l)
|
||||
bpf_lru_push_free(&htab->lru, &l->lru_node);
|
||||
htab_lru_push_free(htab, l);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1359,6 +1409,35 @@ static void delete_all_elements(struct bpf_htab *htab)
|
|||
}
|
||||
}
|
||||
|
||||
static void htab_free_malloced_timers(struct bpf_htab *htab)
|
||||
{
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < htab->n_buckets; i++) {
|
||||
struct hlist_nulls_head *head = select_bucket(htab, i);
|
||||
struct hlist_nulls_node *n;
|
||||
struct htab_elem *l;
|
||||
|
||||
hlist_nulls_for_each_entry(l, n, head, hash_node)
|
||||
check_and_free_timer(htab, l);
|
||||
cond_resched_rcu();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void htab_map_free_timers(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
|
||||
if (likely(!map_value_has_timer(&htab->map)))
|
||||
return;
|
||||
if (!htab_is_prealloc(htab))
|
||||
htab_free_malloced_timers(htab);
|
||||
else
|
||||
htab_free_prealloced_timers(htab);
|
||||
}
|
||||
|
||||
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
|
||||
static void htab_map_free(struct bpf_map *map)
|
||||
{
|
||||
|
@ -1456,7 +1535,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
|
|||
else
|
||||
copy_map_value(map, value, l->key +
|
||||
roundup_key_size);
|
||||
check_and_init_map_lock(map, value);
|
||||
check_and_init_map_value(map, value);
|
||||
}
|
||||
|
||||
hlist_nulls_del_rcu(&l->hash_node);
|
||||
|
@ -1467,7 +1546,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
|
|||
htab_unlock_bucket(htab, b, hash, bflags);
|
||||
|
||||
if (is_lru_map && l)
|
||||
bpf_lru_push_free(&htab->lru, &l->lru_node);
|
||||
htab_lru_push_free(htab, l);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1645,7 +1724,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
|||
true);
|
||||
else
|
||||
copy_map_value(map, dst_val, value);
|
||||
check_and_init_map_lock(map, dst_val);
|
||||
check_and_init_map_value(map, dst_val);
|
||||
}
|
||||
if (do_delete) {
|
||||
hlist_nulls_del_rcu(&l->hash_node);
|
||||
|
@ -1672,7 +1751,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
|||
while (node_to_free) {
|
||||
l = node_to_free;
|
||||
node_to_free = node_to_free->batch_flink;
|
||||
bpf_lru_push_free(&htab->lru, &l->lru_node);
|
||||
htab_lru_push_free(htab, l);
|
||||
}
|
||||
|
||||
next_batch:
|
||||
|
@ -2034,6 +2113,7 @@ const struct bpf_map_ops htab_map_ops = {
|
|||
.map_alloc = htab_map_alloc,
|
||||
.map_free = htab_map_free,
|
||||
.map_get_next_key = htab_map_get_next_key,
|
||||
.map_release_uref = htab_map_free_timers,
|
||||
.map_lookup_elem = htab_map_lookup_elem,
|
||||
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
|
||||
.map_update_elem = htab_map_update_elem,
|
||||
|
@ -2055,6 +2135,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
|
|||
.map_alloc = htab_map_alloc,
|
||||
.map_free = htab_map_free,
|
||||
.map_get_next_key = htab_map_get_next_key,
|
||||
.map_release_uref = htab_map_free_timers,
|
||||
.map_lookup_elem = htab_lru_map_lookup_elem,
|
||||
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
|
||||
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
|
||||
|
|
|
@ -289,13 +289,18 @@ static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
|
|||
|
||||
static DEFINE_PER_CPU(unsigned long, irqsave_flags);
|
||||
|
||||
notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
|
||||
static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
__bpf_spin_lock(lock);
|
||||
__this_cpu_write(irqsave_flags, flags);
|
||||
}
|
||||
|
||||
notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
|
||||
{
|
||||
__bpf_spin_lock_irqsave(lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -306,13 +311,18 @@ const struct bpf_func_proto bpf_spin_lock_proto = {
|
|||
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
|
||||
};
|
||||
|
||||
notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
|
||||
static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
flags = __this_cpu_read(irqsave_flags);
|
||||
__bpf_spin_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
|
||||
{
|
||||
__bpf_spin_unlock_irqrestore(lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -333,9 +343,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
|
|||
else
|
||||
lock = dst + map->spin_lock_off;
|
||||
preempt_disable();
|
||||
____bpf_spin_lock(lock);
|
||||
__bpf_spin_lock_irqsave(lock);
|
||||
copy_map_value(map, dst, src);
|
||||
____bpf_spin_unlock(lock);
|
||||
__bpf_spin_unlock_irqrestore(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
|
@ -989,6 +999,320 @@ const struct bpf_func_proto bpf_snprintf_proto = {
|
|||
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
};
|
||||
|
||||
/* BPF map elements can contain 'struct bpf_timer'.
|
||||
* Such map owns all of its BPF timers.
|
||||
* 'struct bpf_timer' is allocated as part of map element allocation
|
||||
* and it's zero initialized.
|
||||
* That space is used to keep 'struct bpf_timer_kern'.
|
||||
* bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
|
||||
* remembers 'struct bpf_map *' pointer it's part of.
|
||||
* bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
|
||||
* bpf_timer_start() arms the timer.
|
||||
* If user space reference to a map goes to zero at this point
|
||||
* ops->map_release_uref callback is responsible for cancelling the timers,
|
||||
* freeing their memory, and decrementing prog's refcnts.
|
||||
* bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
|
||||
* Inner maps can contain bpf timers as well. ops->map_release_uref is
|
||||
* freeing the timers when inner map is replaced or deleted by user space.
|
||||
*/
|
||||
struct bpf_hrtimer {
|
||||
struct hrtimer timer;
|
||||
struct bpf_map *map;
|
||||
struct bpf_prog *prog;
|
||||
void __rcu *callback_fn;
|
||||
void *value;
|
||||
};
|
||||
|
||||
/* the actual struct hidden inside uapi struct bpf_timer */
|
||||
struct bpf_timer_kern {
|
||||
struct bpf_hrtimer *timer;
|
||||
/* bpf_spin_lock is used here instead of spinlock_t to make
|
||||
* sure that it always fits into space resereved by struct bpf_timer
|
||||
* regardless of LOCKDEP and spinlock debug flags.
|
||||
*/
|
||||
struct bpf_spin_lock lock;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
|
||||
|
||||
static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
|
||||
{
|
||||
struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
|
||||
struct bpf_map *map = t->map;
|
||||
void *value = t->value;
|
||||
void *callback_fn;
|
||||
void *key;
|
||||
u32 idx;
|
||||
|
||||
callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
|
||||
if (!callback_fn)
|
||||
goto out;
|
||||
|
||||
/* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
|
||||
* cannot be preempted by another bpf_timer_cb() on the same cpu.
|
||||
* Remember the timer this callback is servicing to prevent
|
||||
* deadlock if callback_fn() calls bpf_timer_cancel() or
|
||||
* bpf_map_delete_elem() on the same timer.
|
||||
*/
|
||||
this_cpu_write(hrtimer_running, t);
|
||||
if (map->map_type == BPF_MAP_TYPE_ARRAY) {
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
|
||||
/* compute the key */
|
||||
idx = ((char *)value - array->value) / array->elem_size;
|
||||
key = &idx;
|
||||
} else { /* hash or lru */
|
||||
key = value - round_up(map->key_size, 8);
|
||||
}
|
||||
|
||||
BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key,
|
||||
(u64)(long)value, 0, 0);
|
||||
/* The verifier checked that return value is zero. */
|
||||
|
||||
this_cpu_write(hrtimer_running, NULL);
|
||||
out:
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map,
|
||||
u64, flags)
|
||||
{
|
||||
clockid_t clockid = flags & (MAX_CLOCKS - 1);
|
||||
struct bpf_hrtimer *t;
|
||||
int ret = 0;
|
||||
|
||||
BUILD_BUG_ON(MAX_CLOCKS != 16);
|
||||
BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
|
||||
BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
|
||||
|
||||
if (in_nmi())
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (flags >= MAX_CLOCKS ||
|
||||
/* similar to timerfd except _ALARM variants are not supported */
|
||||
(clockid != CLOCK_MONOTONIC &&
|
||||
clockid != CLOCK_REALTIME &&
|
||||
clockid != CLOCK_BOOTTIME))
|
||||
return -EINVAL;
|
||||
__bpf_spin_lock_irqsave(&timer->lock);
|
||||
t = timer->timer;
|
||||
if (t) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
if (!atomic64_read(&map->usercnt)) {
|
||||
/* maps with timers must be either held by user space
|
||||
* or pinned in bpffs.
|
||||
*/
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
/* allocate hrtimer via map_kmalloc to use memcg accounting */
|
||||
t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
|
||||
if (!t) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
t->value = (void *)timer - map->timer_off;
|
||||
t->map = map;
|
||||
t->prog = NULL;
|
||||
rcu_assign_pointer(t->callback_fn, NULL);
|
||||
hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
|
||||
t->timer.function = bpf_timer_cb;
|
||||
timer->timer = t;
|
||||
out:
|
||||
__bpf_spin_unlock_irqrestore(&timer->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_timer_init_proto = {
|
||||
.func = bpf_timer_init,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_TIMER,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
|
||||
struct bpf_prog_aux *, aux)
|
||||
{
|
||||
struct bpf_prog *prev, *prog = aux->prog;
|
||||
struct bpf_hrtimer *t;
|
||||
int ret = 0;
|
||||
|
||||
if (in_nmi())
|
||||
return -EOPNOTSUPP;
|
||||
__bpf_spin_lock_irqsave(&timer->lock);
|
||||
t = timer->timer;
|
||||
if (!t) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (!atomic64_read(&t->map->usercnt)) {
|
||||
/* maps with timers must be either held by user space
|
||||
* or pinned in bpffs. Otherwise timer might still be
|
||||
* running even when bpf prog is detached and user space
|
||||
* is gone, since map_release_uref won't ever be called.
|
||||
*/
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
prev = t->prog;
|
||||
if (prev != prog) {
|
||||
/* Bump prog refcnt once. Every bpf_timer_set_callback()
|
||||
* can pick different callback_fn-s within the same prog.
|
||||
*/
|
||||
prog = bpf_prog_inc_not_zero(prog);
|
||||
if (IS_ERR(prog)) {
|
||||
ret = PTR_ERR(prog);
|
||||
goto out;
|
||||
}
|
||||
if (prev)
|
||||
/* Drop prev prog refcnt when swapping with new prog */
|
||||
bpf_prog_put(prev);
|
||||
t->prog = prog;
|
||||
}
|
||||
rcu_assign_pointer(t->callback_fn, callback_fn);
|
||||
out:
|
||||
__bpf_spin_unlock_irqrestore(&timer->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_timer_set_callback_proto = {
|
||||
.func = bpf_timer_set_callback,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_TIMER,
|
||||
.arg2_type = ARG_PTR_TO_FUNC,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
|
||||
{
|
||||
struct bpf_hrtimer *t;
|
||||
int ret = 0;
|
||||
|
||||
if (in_nmi())
|
||||
return -EOPNOTSUPP;
|
||||
if (flags)
|
||||
return -EINVAL;
|
||||
__bpf_spin_lock_irqsave(&timer->lock);
|
||||
t = timer->timer;
|
||||
if (!t || !t->prog) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
|
||||
out:
|
||||
__bpf_spin_unlock_irqrestore(&timer->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_timer_start_proto = {
|
||||
.func = bpf_timer_start,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_TIMER,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static void drop_prog_refcnt(struct bpf_hrtimer *t)
|
||||
{
|
||||
struct bpf_prog *prog = t->prog;
|
||||
|
||||
if (prog) {
|
||||
bpf_prog_put(prog);
|
||||
t->prog = NULL;
|
||||
rcu_assign_pointer(t->callback_fn, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
|
||||
{
|
||||
struct bpf_hrtimer *t;
|
||||
int ret = 0;
|
||||
|
||||
if (in_nmi())
|
||||
return -EOPNOTSUPP;
|
||||
__bpf_spin_lock_irqsave(&timer->lock);
|
||||
t = timer->timer;
|
||||
if (!t) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (this_cpu_read(hrtimer_running) == t) {
|
||||
/* If bpf callback_fn is trying to bpf_timer_cancel()
|
||||
* its own timer the hrtimer_cancel() will deadlock
|
||||
* since it waits for callback_fn to finish
|
||||
*/
|
||||
ret = -EDEADLK;
|
||||
goto out;
|
||||
}
|
||||
drop_prog_refcnt(t);
|
||||
out:
|
||||
__bpf_spin_unlock_irqrestore(&timer->lock);
|
||||
/* Cancel the timer and wait for associated callback to finish
|
||||
* if it was running.
|
||||
*/
|
||||
ret = ret ?: hrtimer_cancel(&t->timer);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_timer_cancel_proto = {
|
||||
.func = bpf_timer_cancel,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_TIMER,
|
||||
};
|
||||
|
||||
/* This function is called by map_delete/update_elem for individual element and
|
||||
* by ops->map_release_uref when the user space reference to a map reaches zero.
|
||||
*/
|
||||
void bpf_timer_cancel_and_free(void *val)
|
||||
{
|
||||
struct bpf_timer_kern *timer = val;
|
||||
struct bpf_hrtimer *t;
|
||||
|
||||
/* Performance optimization: read timer->timer without lock first. */
|
||||
if (!READ_ONCE(timer->timer))
|
||||
return;
|
||||
|
||||
__bpf_spin_lock_irqsave(&timer->lock);
|
||||
/* re-read it under lock */
|
||||
t = timer->timer;
|
||||
if (!t)
|
||||
goto out;
|
||||
drop_prog_refcnt(t);
|
||||
/* The subsequent bpf_timer_start/cancel() helpers won't be able to use
|
||||
* this timer, since it won't be initialized.
|
||||
*/
|
||||
timer->timer = NULL;
|
||||
out:
|
||||
__bpf_spin_unlock_irqrestore(&timer->lock);
|
||||
if (!t)
|
||||
return;
|
||||
/* Cancel the timer and wait for callback to complete if it was running.
|
||||
* If hrtimer_cancel() can be safely called it's safe to call kfree(t)
|
||||
* right after for both preallocated and non-preallocated maps.
|
||||
* The timer->timer = NULL was already done and no code path can
|
||||
* see address 't' anymore.
|
||||
*
|
||||
* Check that bpf_map_delete/update_elem() wasn't called from timer
|
||||
* callback_fn. In such case don't call hrtimer_cancel() (since it will
|
||||
* deadlock) and don't call hrtimer_try_to_cancel() (since it will just
|
||||
* return -1). Though callback_fn is still running on this cpu it's
|
||||
* safe to do kfree(t) because bpf_timer_cb() read everything it needed
|
||||
* from 't'. The bpf subprog callback_fn won't be able to access 't',
|
||||
* since timer->timer = NULL was already done. The timer will be
|
||||
* effectively cancelled because bpf_timer_cb() will return
|
||||
* HRTIMER_NORESTART.
|
||||
*/
|
||||
if (this_cpu_read(hrtimer_running) != t)
|
||||
hrtimer_cancel(&t->timer);
|
||||
kfree(t);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_current_task_proto __weak;
|
||||
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
|
||||
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
|
||||
|
@ -1055,6 +1379,14 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
|||
return &bpf_per_cpu_ptr_proto;
|
||||
case BPF_FUNC_this_cpu_ptr:
|
||||
return &bpf_this_cpu_ptr_proto;
|
||||
case BPF_FUNC_timer_init:
|
||||
return &bpf_timer_init_proto;
|
||||
case BPF_FUNC_timer_set_callback:
|
||||
return &bpf_timer_set_callback_proto;
|
||||
case BPF_FUNC_timer_start:
|
||||
return &bpf_timer_start_proto;
|
||||
case BPF_FUNC_timer_cancel:
|
||||
return &bpf_timer_cancel_proto;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -173,7 +173,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
|
|||
return -ENOMEM;
|
||||
|
||||
memcpy(&new->data[0], value, map->value_size);
|
||||
check_and_init_map_lock(map, new->data);
|
||||
check_and_init_map_value(map, new->data);
|
||||
|
||||
new = xchg(&storage->buf, new);
|
||||
kfree_rcu(new, rcu);
|
||||
|
@ -509,7 +509,7 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
|
|||
map->numa_node);
|
||||
if (!storage->buf)
|
||||
goto enomem;
|
||||
check_and_init_map_lock(map, storage->buf->data);
|
||||
check_and_init_map_value(map, storage->buf->data);
|
||||
} else {
|
||||
storage->percpu_buf = bpf_map_alloc_percpu(map, size, 8, gfp);
|
||||
if (!storage->percpu_buf)
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
*/
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
|
||||
#include "map_in_map.h"
|
||||
|
||||
|
@ -50,6 +51,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
|||
inner_map_meta->map_flags = inner_map->map_flags;
|
||||
inner_map_meta->max_entries = inner_map->max_entries;
|
||||
inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
|
||||
inner_map_meta->timer_off = inner_map->timer_off;
|
||||
if (inner_map->btf) {
|
||||
btf_get(inner_map->btf);
|
||||
inner_map_meta->btf = inner_map->btf;
|
||||
}
|
||||
|
||||
/* Misc members not needed in bpf_map_meta_equal() check. */
|
||||
inner_map_meta->ops = inner_map->ops;
|
||||
|
@ -65,6 +71,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
|||
|
||||
void bpf_map_meta_free(struct bpf_map *map_meta)
|
||||
{
|
||||
btf_put(map_meta->btf);
|
||||
kfree(map_meta);
|
||||
}
|
||||
|
||||
|
@ -75,6 +82,7 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
|
|||
return meta0->map_type == meta1->map_type &&
|
||||
meta0->key_size == meta1->key_size &&
|
||||
meta0->value_size == meta1->value_size &&
|
||||
meta0->timer_off == meta1->timer_off &&
|
||||
meta0->map_flags == meta1->map_flags;
|
||||
}
|
||||
|
||||
|
|
|
@ -260,8 +260,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
|
|||
copy_map_value_locked(map, value, ptr, true);
|
||||
else
|
||||
copy_map_value(map, value, ptr);
|
||||
/* mask lock, since value wasn't zero inited */
|
||||
check_and_init_map_lock(map, value);
|
||||
/* mask lock and timer, since value wasn't zero inited */
|
||||
check_and_init_map_value(map, value);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
@ -623,7 +623,8 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
|
|||
struct bpf_map *map = filp->private_data;
|
||||
int err;
|
||||
|
||||
if (!map->ops->map_mmap || map_value_has_spin_lock(map))
|
||||
if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
|
||||
map_value_has_timer(map))
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (!(vma->vm_flags & VM_SHARED))
|
||||
|
@ -793,6 +794,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
|
|||
}
|
||||
}
|
||||
|
||||
map->timer_off = btf_find_timer(btf, value_type);
|
||||
if (map_value_has_timer(map)) {
|
||||
if (map->map_flags & BPF_F_RDONLY_PROG)
|
||||
return -EACCES;
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (map->ops->map_check_btf)
|
||||
ret = map->ops->map_check_btf(map, btf, key_type, value_type);
|
||||
|
||||
|
@ -844,6 +855,7 @@ static int map_create(union bpf_attr *attr)
|
|||
mutex_init(&map->freeze_mutex);
|
||||
|
||||
map->spin_lock_off = -EINVAL;
|
||||
map->timer_off = -EINVAL;
|
||||
if (attr->btf_key_type_id || attr->btf_value_type_id ||
|
||||
/* Even the map's value is a kernel's struct,
|
||||
* the bpf_prog.o must have BTF to begin with
|
||||
|
@ -1591,7 +1603,8 @@ static int map_freeze(const union bpf_attr *attr)
|
|||
if (IS_ERR(map))
|
||||
return PTR_ERR(map);
|
||||
|
||||
if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
|
||||
if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
|
||||
map_value_has_timer(map)) {
|
||||
fdput(f);
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
@ -1699,6 +1712,8 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
|
|||
|
||||
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/* cBPF to eBPF migrations are currently not in the idr store.
|
||||
* Offloaded programs are removed from the store when their device
|
||||
* disappears - even if someone grabs an fd to them they are unusable,
|
||||
|
@ -1708,7 +1723,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
|
|||
return;
|
||||
|
||||
if (do_idr_lock)
|
||||
spin_lock_bh(&prog_idr_lock);
|
||||
spin_lock_irqsave(&prog_idr_lock, flags);
|
||||
else
|
||||
__acquire(&prog_idr_lock);
|
||||
|
||||
|
@ -1716,7 +1731,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
|
|||
prog->aux->id = 0;
|
||||
|
||||
if (do_idr_lock)
|
||||
spin_unlock_bh(&prog_idr_lock);
|
||||
spin_unlock_irqrestore(&prog_idr_lock, flags);
|
||||
else
|
||||
__release(&prog_idr_lock);
|
||||
}
|
||||
|
@ -1752,14 +1767,32 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
|
|||
}
|
||||
}
|
||||
|
||||
static void bpf_prog_put_deferred(struct work_struct *work)
|
||||
{
|
||||
struct bpf_prog_aux *aux;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
aux = container_of(work, struct bpf_prog_aux, work);
|
||||
prog = aux->prog;
|
||||
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
|
||||
bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
|
||||
__bpf_prog_put_noref(prog, true);
|
||||
}
|
||||
|
||||
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
|
||||
{
|
||||
if (atomic64_dec_and_test(&prog->aux->refcnt)) {
|
||||
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
|
||||
bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
|
||||
struct bpf_prog_aux *aux = prog->aux;
|
||||
|
||||
if (atomic64_dec_and_test(&aux->refcnt)) {
|
||||
/* bpf_prog_free_id() must be called first */
|
||||
bpf_prog_free_id(prog, do_idr_lock);
|
||||
__bpf_prog_put_noref(prog, true);
|
||||
|
||||
if (in_irq() || irqs_disabled()) {
|
||||
INIT_WORK(&aux->work, bpf_prog_put_deferred);
|
||||
schedule_work(&aux->work);
|
||||
} else {
|
||||
bpf_prog_put_deferred(&aux->work);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -172,7 +172,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
|
|||
}
|
||||
|
||||
static struct bpf_tramp_progs *
|
||||
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
|
||||
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
|
||||
{
|
||||
const struct bpf_prog_aux *aux;
|
||||
struct bpf_tramp_progs *tprogs;
|
||||
|
@ -189,8 +189,10 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
|
|||
*total += tr->progs_cnt[kind];
|
||||
progs = tprogs[kind].progs;
|
||||
|
||||
hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
|
||||
hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) {
|
||||
*ip_arg |= aux->prog->call_get_func_ip;
|
||||
*progs++ = aux->prog;
|
||||
}
|
||||
}
|
||||
return tprogs;
|
||||
}
|
||||
|
@ -333,9 +335,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
|
|||
struct bpf_tramp_image *im;
|
||||
struct bpf_tramp_progs *tprogs;
|
||||
u32 flags = BPF_TRAMP_F_RESTORE_REGS;
|
||||
bool ip_arg = false;
|
||||
int err, total;
|
||||
|
||||
tprogs = bpf_trampoline_get_progs(tr, &total);
|
||||
tprogs = bpf_trampoline_get_progs(tr, &total, &ip_arg);
|
||||
if (IS_ERR(tprogs))
|
||||
return PTR_ERR(tprogs);
|
||||
|
||||
|
@ -357,6 +360,9 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
|
|||
tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
|
||||
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
|
||||
|
||||
if (ip_arg)
|
||||
flags |= BPF_TRAMP_F_IP_ARG;
|
||||
|
||||
err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
|
||||
&tr->func.model, flags, tprogs,
|
||||
tr->func.addr);
|
||||
|
|
|
@ -255,6 +255,7 @@ struct bpf_call_arg_meta {
|
|||
int mem_size;
|
||||
u64 msize_max_value;
|
||||
int ref_obj_id;
|
||||
int map_uid;
|
||||
int func_id;
|
||||
struct btf *btf;
|
||||
u32 btf_id;
|
||||
|
@ -734,6 +735,10 @@ static void print_verifier_state(struct bpf_verifier_env *env,
|
|||
if (state->refs[i].id)
|
||||
verbose(env, ",%d", state->refs[i].id);
|
||||
}
|
||||
if (state->in_callback_fn)
|
||||
verbose(env, " cb");
|
||||
if (state->in_async_callback_fn)
|
||||
verbose(env, " async_cb");
|
||||
verbose(env, "\n");
|
||||
}
|
||||
|
||||
|
@ -1135,6 +1140,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
|
|||
if (map->inner_map_meta) {
|
||||
reg->type = CONST_PTR_TO_MAP;
|
||||
reg->map_ptr = map->inner_map_meta;
|
||||
/* transfer reg's id which is unique for every map_lookup_elem
|
||||
* as UID of the inner map.
|
||||
*/
|
||||
reg->map_uid = reg->id;
|
||||
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
|
||||
reg->type = PTR_TO_XDP_SOCK;
|
||||
} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
|
||||
|
@ -1522,6 +1531,54 @@ static void init_func_state(struct bpf_verifier_env *env,
|
|||
init_reg_state(env, state);
|
||||
}
|
||||
|
||||
/* Similar to push_stack(), but for async callbacks */
|
||||
static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
|
||||
int insn_idx, int prev_insn_idx,
|
||||
int subprog)
|
||||
{
|
||||
struct bpf_verifier_stack_elem *elem;
|
||||
struct bpf_func_state *frame;
|
||||
|
||||
elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
|
||||
if (!elem)
|
||||
goto err;
|
||||
|
||||
elem->insn_idx = insn_idx;
|
||||
elem->prev_insn_idx = prev_insn_idx;
|
||||
elem->next = env->head;
|
||||
elem->log_pos = env->log.len_used;
|
||||
env->head = elem;
|
||||
env->stack_size++;
|
||||
if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
|
||||
verbose(env,
|
||||
"The sequence of %d jumps is too complex for async cb.\n",
|
||||
env->stack_size);
|
||||
goto err;
|
||||
}
|
||||
/* Unlike push_stack() do not copy_verifier_state().
|
||||
* The caller state doesn't matter.
|
||||
* This is async callback. It starts in a fresh stack.
|
||||
* Initialize it similar to do_check_common().
|
||||
*/
|
||||
elem->st.branches = 1;
|
||||
frame = kzalloc(sizeof(*frame), GFP_KERNEL);
|
||||
if (!frame)
|
||||
goto err;
|
||||
init_func_state(env, frame,
|
||||
BPF_MAIN_FUNC /* callsite */,
|
||||
0 /* frameno within this callchain */,
|
||||
subprog /* subprog number within this prog */);
|
||||
elem->st.frame[0] = frame;
|
||||
return &elem->st;
|
||||
err:
|
||||
free_verifier_state(env->cur_state, true);
|
||||
env->cur_state = NULL;
|
||||
/* pop all elements and return */
|
||||
while (!pop_stack(env, NULL, NULL, false));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
enum reg_arg_type {
|
||||
SRC_OP, /* register is used as source operand */
|
||||
DST_OP, /* register is used as destination operand */
|
||||
|
@ -3241,6 +3298,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
|
|||
return -EACCES;
|
||||
}
|
||||
}
|
||||
if (map_value_has_timer(map)) {
|
||||
u32 t = map->timer_off;
|
||||
|
||||
if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
|
||||
t < reg->umax_value + off + size) {
|
||||
verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
|
||||
return -EACCES;
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -3643,6 +3709,8 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
|
|||
continue_func:
|
||||
subprog_end = subprog[idx + 1].start;
|
||||
for (; i < subprog_end; i++) {
|
||||
int next_insn;
|
||||
|
||||
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
|
||||
continue;
|
||||
/* remember insn and function to return to */
|
||||
|
@ -3650,13 +3718,22 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
|
|||
ret_prog[frame] = idx;
|
||||
|
||||
/* find the callee */
|
||||
i = i + insn[i].imm + 1;
|
||||
idx = find_subprog(env, i);
|
||||
next_insn = i + insn[i].imm + 1;
|
||||
idx = find_subprog(env, next_insn);
|
||||
if (idx < 0) {
|
||||
WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
|
||||
i);
|
||||
next_insn);
|
||||
return -EFAULT;
|
||||
}
|
||||
if (subprog[idx].is_async_cb) {
|
||||
if (subprog[idx].has_tail_call) {
|
||||
verbose(env, "verifier bug. subprog has tail_call and async cb\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
/* async callbacks don't increase bpf prog stack size */
|
||||
continue;
|
||||
}
|
||||
i = next_insn;
|
||||
|
||||
if (subprog[idx].has_tail_call)
|
||||
tail_call_reachable = true;
|
||||
|
@ -4656,6 +4733,54 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int process_timer_func(struct bpf_verifier_env *env, int regno,
|
||||
struct bpf_call_arg_meta *meta)
|
||||
{
|
||||
struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
|
||||
bool is_const = tnum_is_const(reg->var_off);
|
||||
struct bpf_map *map = reg->map_ptr;
|
||||
u64 val = reg->var_off.value;
|
||||
|
||||
if (!is_const) {
|
||||
verbose(env,
|
||||
"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
|
||||
regno);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!map->btf) {
|
||||
verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
|
||||
map->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!map_value_has_timer(map)) {
|
||||
if (map->timer_off == -E2BIG)
|
||||
verbose(env,
|
||||
"map '%s' has more than one 'struct bpf_timer'\n",
|
||||
map->name);
|
||||
else if (map->timer_off == -ENOENT)
|
||||
verbose(env,
|
||||
"map '%s' doesn't have 'struct bpf_timer'\n",
|
||||
map->name);
|
||||
else
|
||||
verbose(env,
|
||||
"map '%s' is not a struct type or bpf_timer is mangled\n",
|
||||
map->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (map->timer_off != val + reg->off) {
|
||||
verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
|
||||
val + reg->off, map->timer_off);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (meta->map_ptr) {
|
||||
verbose(env, "verifier bug. Two map pointers in a timer helper\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
meta->map_uid = reg->map_uid;
|
||||
meta->map_ptr = map;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
|
||||
{
|
||||
return type == ARG_PTR_TO_MEM ||
|
||||
|
@ -4788,6 +4913,7 @@ static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PER
|
|||
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
|
||||
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
|
||||
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
|
||||
static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
|
||||
|
||||
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
||||
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
|
||||
|
@ -4819,6 +4945,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
|||
[ARG_PTR_TO_FUNC] = &func_ptr_types,
|
||||
[ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
|
||||
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
|
||||
[ARG_PTR_TO_TIMER] = &timer_types,
|
||||
};
|
||||
|
||||
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
|
||||
|
@ -4948,7 +5075,29 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
|
|||
|
||||
if (arg_type == ARG_CONST_MAP_PTR) {
|
||||
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
|
||||
if (meta->map_ptr) {
|
||||
/* Use map_uid (which is unique id of inner map) to reject:
|
||||
* inner_map1 = bpf_map_lookup_elem(outer_map, key1)
|
||||
* inner_map2 = bpf_map_lookup_elem(outer_map, key2)
|
||||
* if (inner_map1 && inner_map2) {
|
||||
* timer = bpf_map_lookup_elem(inner_map1);
|
||||
* if (timer)
|
||||
* // mismatch would have been allowed
|
||||
* bpf_timer_init(timer, inner_map2);
|
||||
* }
|
||||
*
|
||||
* Comparing map_ptr is enough to distinguish normal and outer maps.
|
||||
*/
|
||||
if (meta->map_ptr != reg->map_ptr ||
|
||||
meta->map_uid != reg->map_uid) {
|
||||
verbose(env,
|
||||
"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
|
||||
meta->map_uid, reg->map_uid);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
meta->map_ptr = reg->map_ptr;
|
||||
meta->map_uid = reg->map_uid;
|
||||
} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
|
||||
/* bpf_map_xxx(..., map_ptr, ..., key) call:
|
||||
* check that [key, key + map->key_size) are within
|
||||
|
@ -5000,6 +5149,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
|
|||
verbose(env, "verifier internal error\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
} else if (arg_type == ARG_PTR_TO_TIMER) {
|
||||
if (process_timer_func(env, regno, meta))
|
||||
return -EACCES;
|
||||
} else if (arg_type == ARG_PTR_TO_FUNC) {
|
||||
meta->subprogno = reg->subprogno;
|
||||
} else if (arg_type_is_mem_ptr(arg_type)) {
|
||||
|
@ -5615,6 +5767,31 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
|||
}
|
||||
}
|
||||
|
||||
if (insn->code == (BPF_JMP | BPF_CALL) &&
|
||||
insn->imm == BPF_FUNC_timer_set_callback) {
|
||||
struct bpf_verifier_state *async_cb;
|
||||
|
||||
/* there is no real recursion here. timer callbacks are async */
|
||||
env->subprog_info[subprog].is_async_cb = true;
|
||||
async_cb = push_async_cb(env, env->subprog_info[subprog].start,
|
||||
*insn_idx, subprog);
|
||||
if (!async_cb)
|
||||
return -EFAULT;
|
||||
callee = async_cb->frame[0];
|
||||
callee->async_entry_cnt = caller->async_entry_cnt + 1;
|
||||
|
||||
/* Convert bpf_timer_set_callback() args into timer callback args */
|
||||
err = set_callee_state_cb(env, caller, callee, *insn_idx);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
clear_caller_saved_regs(env, caller->regs);
|
||||
mark_reg_unknown(env, caller->regs, BPF_REG_0);
|
||||
caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
|
||||
/* continue with next insn after call */
|
||||
return 0;
|
||||
}
|
||||
|
||||
callee = kzalloc(sizeof(*callee), GFP_KERNEL);
|
||||
if (!callee)
|
||||
return -ENOMEM;
|
||||
|
@ -5742,6 +5919,35 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int set_timer_callback_state(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *caller,
|
||||
struct bpf_func_state *callee,
|
||||
int insn_idx)
|
||||
{
|
||||
struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
|
||||
|
||||
/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
|
||||
* callback_fn(struct bpf_map *map, void *key, void *value);
|
||||
*/
|
||||
callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
|
||||
__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
|
||||
callee->regs[BPF_REG_1].map_ptr = map_ptr;
|
||||
|
||||
callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
|
||||
__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
|
||||
callee->regs[BPF_REG_2].map_ptr = map_ptr;
|
||||
|
||||
callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
|
||||
__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
|
||||
callee->regs[BPF_REG_3].map_ptr = map_ptr;
|
||||
|
||||
/* unused */
|
||||
__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
|
||||
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
|
||||
callee->in_async_callback_fn = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
|
||||
{
|
||||
struct bpf_verifier_state *state = env->cur_state;
|
||||
|
@ -5955,6 +6161,29 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
|
|||
return err;
|
||||
}
|
||||
|
||||
static int check_get_func_ip(struct bpf_verifier_env *env)
|
||||
{
|
||||
enum bpf_attach_type eatype = env->prog->expected_attach_type;
|
||||
enum bpf_prog_type type = resolve_prog_type(env->prog);
|
||||
int func_id = BPF_FUNC_get_func_ip;
|
||||
|
||||
if (type == BPF_PROG_TYPE_TRACING) {
|
||||
if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT &&
|
||||
eatype != BPF_MODIFY_RETURN) {
|
||||
verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
|
||||
func_id_name(func_id), func_id);
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
return 0;
|
||||
} else if (type == BPF_PROG_TYPE_KPROBE) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
verbose(env, "func %s#%d not supported for program type %d\n",
|
||||
func_id_name(func_id), func_id, type);
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
int *insn_idx_p)
|
||||
{
|
||||
|
@ -6069,6 +6298,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (func_id == BPF_FUNC_timer_set_callback) {
|
||||
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
|
||||
set_timer_callback_state);
|
||||
if (err < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (func_id == BPF_FUNC_snprintf) {
|
||||
err = check_bpf_snprintf_call(env, regs);
|
||||
if (err < 0)
|
||||
|
@ -6104,6 +6340,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
|||
return -EINVAL;
|
||||
}
|
||||
regs[BPF_REG_0].map_ptr = meta.map_ptr;
|
||||
regs[BPF_REG_0].map_uid = meta.map_uid;
|
||||
if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
|
||||
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
|
||||
if (map_value_has_spin_lock(meta.map_ptr))
|
||||
|
@ -6225,6 +6462,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
|||
if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
|
||||
env->prog->call_get_stack = true;
|
||||
|
||||
if (func_id == BPF_FUNC_get_func_ip) {
|
||||
if (check_get_func_ip(env))
|
||||
return -ENOTSUPP;
|
||||
env->prog->call_get_func_ip = true;
|
||||
}
|
||||
|
||||
if (changes_data)
|
||||
clear_all_pkt_pointers(env);
|
||||
return 0;
|
||||
|
@ -9099,7 +9342,8 @@ static int check_return_code(struct bpf_verifier_env *env)
|
|||
struct tnum range = tnum_range(0, 1);
|
||||
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
|
||||
int err;
|
||||
const bool is_subprog = env->cur_state->frame[0]->subprogno;
|
||||
struct bpf_func_state *frame = env->cur_state->frame[0];
|
||||
const bool is_subprog = frame->subprogno;
|
||||
|
||||
/* LSM and struct_ops func-ptr's return type could be "void" */
|
||||
if (!is_subprog &&
|
||||
|
@ -9124,6 +9368,22 @@ static int check_return_code(struct bpf_verifier_env *env)
|
|||
}
|
||||
|
||||
reg = cur_regs(env) + BPF_REG_0;
|
||||
|
||||
if (frame->in_async_callback_fn) {
|
||||
/* enforce return zero from async callbacks like timer */
|
||||
if (reg->type != SCALAR_VALUE) {
|
||||
verbose(env, "In async callback the register R0 is not a known value (%s)\n",
|
||||
reg_type_str[reg->type]);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!tnum_in(tnum_const(0), reg->var_off)) {
|
||||
verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_subprog) {
|
||||
if (reg->type != SCALAR_VALUE) {
|
||||
verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
|
||||
|
@ -9338,8 +9598,12 @@ static int visit_func_call_insn(int t, int insn_cnt,
|
|||
init_explored_state(env, t + 1);
|
||||
if (visit_callee) {
|
||||
init_explored_state(env, t);
|
||||
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
|
||||
env, false);
|
||||
ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
|
||||
/* It's ok to allow recursion from CFG point of
|
||||
* view. __check_func_call() will do the actual
|
||||
* check.
|
||||
*/
|
||||
bpf_pseudo_func(insns + t));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -9367,6 +9631,13 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
|
|||
return DONE_EXPLORING;
|
||||
|
||||
case BPF_CALL:
|
||||
if (insns[t].imm == BPF_FUNC_timer_set_callback)
|
||||
/* Mark this call insn to trigger is_state_visited() check
|
||||
* before call itself is processed by __check_func_call().
|
||||
* Otherwise new async state will be pushed for further
|
||||
* exploration.
|
||||
*/
|
||||
init_explored_state(env, t);
|
||||
return visit_func_call_insn(t, insn_cnt, insns, env,
|
||||
insns[t].src_reg == BPF_PSEUDO_CALL);
|
||||
|
||||
|
@ -10374,9 +10645,25 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
|
|||
states_cnt++;
|
||||
if (sl->state.insn_idx != insn_idx)
|
||||
goto next;
|
||||
|
||||
if (sl->state.branches) {
|
||||
if (states_maybe_looping(&sl->state, cur) &&
|
||||
states_equal(env, &sl->state, cur)) {
|
||||
struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
|
||||
|
||||
if (frame->in_async_callback_fn &&
|
||||
frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
|
||||
/* Different async_entry_cnt means that the verifier is
|
||||
* processing another entry into async callback.
|
||||
* Seeing the same state is not an indication of infinite
|
||||
* loop or infinite recursion.
|
||||
* But finding the same state doesn't mean that it's safe
|
||||
* to stop processing the current state. The previous state
|
||||
* hasn't yet reached bpf_exit, since state.branches > 0.
|
||||
* Checking in_async_callback_fn alone is not enough either.
|
||||
* Since the verifier still needs to catch infinite loops
|
||||
* inside async callbacks.
|
||||
*/
|
||||
} else if (states_maybe_looping(&sl->state, cur) &&
|
||||
states_equal(env, &sl->state, cur)) {
|
||||
verbose_linfo(env, insn_idx, "; ");
|
||||
verbose(env, "infinite loop detected at insn %d\n", insn_idx);
|
||||
return -EINVAL;
|
||||
|
@ -11425,10 +11712,11 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
|
|||
* insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
|
||||
* [0, off) and [off, end) to new locations, so the patched range stays zero
|
||||
*/
|
||||
static int adjust_insn_aux_data(struct bpf_verifier_env *env,
|
||||
struct bpf_prog *new_prog, u32 off, u32 cnt)
|
||||
static void adjust_insn_aux_data(struct bpf_verifier_env *env,
|
||||
struct bpf_insn_aux_data *new_data,
|
||||
struct bpf_prog *new_prog, u32 off, u32 cnt)
|
||||
{
|
||||
struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
|
||||
struct bpf_insn_aux_data *old_data = env->insn_aux_data;
|
||||
struct bpf_insn *insn = new_prog->insnsi;
|
||||
u32 old_seen = old_data[off].seen;
|
||||
u32 prog_len;
|
||||
|
@ -11441,12 +11729,9 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
|
|||
old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
|
||||
|
||||
if (cnt == 1)
|
||||
return 0;
|
||||
return;
|
||||
prog_len = new_prog->len;
|
||||
new_data = vzalloc(array_size(prog_len,
|
||||
sizeof(struct bpf_insn_aux_data)));
|
||||
if (!new_data)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
|
||||
memcpy(new_data + off + cnt - 1, old_data + off,
|
||||
sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
|
||||
|
@ -11457,7 +11742,6 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
|
|||
}
|
||||
env->insn_aux_data = new_data;
|
||||
vfree(old_data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
|
||||
|
@ -11492,6 +11776,14 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
|
|||
const struct bpf_insn *patch, u32 len)
|
||||
{
|
||||
struct bpf_prog *new_prog;
|
||||
struct bpf_insn_aux_data *new_data = NULL;
|
||||
|
||||
if (len > 1) {
|
||||
new_data = vzalloc(array_size(env->prog->len + len - 1,
|
||||
sizeof(struct bpf_insn_aux_data)));
|
||||
if (!new_data)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
|
||||
if (IS_ERR(new_prog)) {
|
||||
|
@ -11499,10 +11791,10 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
|
|||
verbose(env,
|
||||
"insn %d cannot be patched due to 16-bit range\n",
|
||||
env->insn_aux_data[off].orig_idx);
|
||||
vfree(new_data);
|
||||
return NULL;
|
||||
}
|
||||
if (adjust_insn_aux_data(env, new_prog, off, len))
|
||||
return NULL;
|
||||
adjust_insn_aux_data(env, new_data, new_prog, off, len);
|
||||
adjust_subprog_starts(env, off, len);
|
||||
adjust_poke_descs(new_prog, off, len);
|
||||
return new_prog;
|
||||
|
@ -12351,6 +12643,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
|
|||
{
|
||||
struct bpf_prog *prog = env->prog;
|
||||
bool expect_blinding = bpf_jit_blinding_enabled(prog);
|
||||
enum bpf_prog_type prog_type = resolve_prog_type(prog);
|
||||
struct bpf_insn *insn = prog->insnsi;
|
||||
const struct bpf_func_proto *fn;
|
||||
const int insn_cnt = prog->len;
|
||||
|
@ -12568,6 +12861,39 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (insn->imm == BPF_FUNC_timer_set_callback) {
|
||||
/* The verifier will process callback_fn as many times as necessary
|
||||
* with different maps and the register states prepared by
|
||||
* set_timer_callback_state will be accurate.
|
||||
*
|
||||
* The following use case is valid:
|
||||
* map1 is shared by prog1, prog2, prog3.
|
||||
* prog1 calls bpf_timer_init for some map1 elements
|
||||
* prog2 calls bpf_timer_set_callback for some map1 elements.
|
||||
* Those that were not bpf_timer_init-ed will return -EINVAL.
|
||||
* prog3 calls bpf_timer_start for some map1 elements.
|
||||
* Those that were not both bpf_timer_init-ed and
|
||||
* bpf_timer_set_callback-ed will return -EINVAL.
|
||||
*/
|
||||
struct bpf_insn ld_addrs[2] = {
|
||||
BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
|
||||
};
|
||||
|
||||
insn_buf[0] = ld_addrs[0];
|
||||
insn_buf[1] = ld_addrs[1];
|
||||
insn_buf[2] = *insn;
|
||||
cnt = 3;
|
||||
|
||||
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
|
||||
if (!new_prog)
|
||||
return -ENOMEM;
|
||||
|
||||
delta += cnt - 1;
|
||||
env->prog = prog = new_prog;
|
||||
insn = new_prog->insnsi + i + delta;
|
||||
goto patch_call_imm;
|
||||
}
|
||||
|
||||
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
|
||||
* and other inlining handlers are currently limited to 64 bit
|
||||
* only.
|
||||
|
@ -12684,6 +13010,21 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
|
|||
continue;
|
||||
}
|
||||
|
||||
/* Implement bpf_get_func_ip inline. */
|
||||
if (prog_type == BPF_PROG_TYPE_TRACING &&
|
||||
insn->imm == BPF_FUNC_get_func_ip) {
|
||||
/* Load IP address from ctx - 8 */
|
||||
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
|
||||
|
||||
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
|
||||
if (!new_prog)
|
||||
return -ENOMEM;
|
||||
|
||||
env->prog = prog = new_prog;
|
||||
insn = new_prog->insnsi + i + delta;
|
||||
continue;
|
||||
}
|
||||
|
||||
patch_call_imm:
|
||||
fn = env->ops->get_func_proto(insn->imm, env->prog);
|
||||
/* all functions that have prototype and verifier allowed
|
||||
|
|
|
@ -948,6 +948,33 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
|
|||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx)
|
||||
{
|
||||
/* This helper call is inlined by verifier. */
|
||||
return ((u64 *)ctx)[-1];
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
|
||||
.func = bpf_get_func_ip_tracing,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
|
||||
{
|
||||
struct kprobe *kp = kprobe_running();
|
||||
|
||||
return kp ? (u64) kp->addr : 0;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
|
||||
.func = bpf_get_func_ip_kprobe,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto *
|
||||
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
|
@ -1058,8 +1085,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
return &bpf_for_each_map_elem_proto;
|
||||
case BPF_FUNC_snprintf:
|
||||
return &bpf_snprintf_proto;
|
||||
case BPF_FUNC_get_func_ip:
|
||||
return &bpf_get_func_ip_proto_tracing;
|
||||
default:
|
||||
return NULL;
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1077,6 +1106,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
case BPF_FUNC_override_return:
|
||||
return &bpf_override_return_proto;
|
||||
#endif
|
||||
case BPF_FUNC_get_func_ip:
|
||||
return &bpf_get_func_ip_proto_kprobe;
|
||||
default:
|
||||
return bpf_tracing_func_proto(func_id, prog);
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <linux/error-injection.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sock_diag.h>
|
||||
#include <net/xdp.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/bpf_test_run.h>
|
||||
|
@ -687,6 +688,64 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp)
|
||||
{
|
||||
unsigned int ingress_ifindex, rx_queue_index;
|
||||
struct netdev_rx_queue *rxqueue;
|
||||
struct net_device *device;
|
||||
|
||||
if (!xdp_md)
|
||||
return 0;
|
||||
|
||||
if (xdp_md->egress_ifindex != 0)
|
||||
return -EINVAL;
|
||||
|
||||
ingress_ifindex = xdp_md->ingress_ifindex;
|
||||
rx_queue_index = xdp_md->rx_queue_index;
|
||||
|
||||
if (!ingress_ifindex && rx_queue_index)
|
||||
return -EINVAL;
|
||||
|
||||
if (ingress_ifindex) {
|
||||
device = dev_get_by_index(current->nsproxy->net_ns,
|
||||
ingress_ifindex);
|
||||
if (!device)
|
||||
return -ENODEV;
|
||||
|
||||
if (rx_queue_index >= device->real_num_rx_queues)
|
||||
goto free_dev;
|
||||
|
||||
rxqueue = __netif_get_rx_queue(device, rx_queue_index);
|
||||
|
||||
if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq))
|
||||
goto free_dev;
|
||||
|
||||
xdp->rxq = &rxqueue->xdp_rxq;
|
||||
/* The device is now tracked in the xdp->rxq for later
|
||||
* dev_put()
|
||||
*/
|
||||
}
|
||||
|
||||
xdp->data = xdp->data_meta + xdp_md->data;
|
||||
return 0;
|
||||
|
||||
free_dev:
|
||||
dev_put(device);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
|
||||
{
|
||||
if (!xdp_md)
|
||||
return;
|
||||
|
||||
xdp_md->data = xdp->data - xdp->data_meta;
|
||||
xdp_md->data_end = xdp->data_end - xdp->data_meta;
|
||||
|
||||
if (xdp_md->ingress_ifindex)
|
||||
dev_put(xdp->rxq->dev);
|
||||
}
|
||||
|
||||
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
|
@ -697,35 +756,69 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
|
|||
struct netdev_rx_queue *rxqueue;
|
||||
struct xdp_buff xdp = {};
|
||||
u32 retval, duration;
|
||||
struct xdp_md *ctx;
|
||||
u32 max_data_sz;
|
||||
void *data;
|
||||
int ret;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (kattr->test.ctx_in || kattr->test.ctx_out)
|
||||
return -EINVAL;
|
||||
ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
|
||||
if (IS_ERR(ctx))
|
||||
return PTR_ERR(ctx);
|
||||
|
||||
if (ctx) {
|
||||
/* There can't be user provided data before the meta data */
|
||||
if (ctx->data_meta || ctx->data_end != size ||
|
||||
ctx->data > ctx->data_end ||
|
||||
unlikely(xdp_metalen_invalid(ctx->data)))
|
||||
goto free_ctx;
|
||||
/* Meta data is allocated from the headroom */
|
||||
headroom -= ctx->data;
|
||||
}
|
||||
|
||||
/* XDP have extra tailroom as (most) drivers use full page */
|
||||
max_data_sz = 4096 - headroom - tailroom;
|
||||
|
||||
data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
|
||||
if (IS_ERR(data))
|
||||
return PTR_ERR(data);
|
||||
if (IS_ERR(data)) {
|
||||
ret = PTR_ERR(data);
|
||||
goto free_ctx;
|
||||
}
|
||||
|
||||
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
|
||||
xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
|
||||
&rxqueue->xdp_rxq);
|
||||
xdp_prepare_buff(&xdp, data, headroom, size, true);
|
||||
|
||||
ret = xdp_convert_md_to_buff(ctx, &xdp);
|
||||
if (ret)
|
||||
goto free_data;
|
||||
|
||||
bpf_prog_change_xdp(NULL, prog);
|
||||
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
|
||||
/* We convert the xdp_buff back to an xdp_md before checking the return
|
||||
* code so the reference count of any held netdevice will be decremented
|
||||
* even if the test run failed.
|
||||
*/
|
||||
xdp_convert_buff_to_md(&xdp, ctx);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
|
||||
size = xdp.data_end - xdp.data;
|
||||
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
|
||||
|
||||
if (xdp.data_meta != data + headroom ||
|
||||
xdp.data_end != xdp.data_meta + size)
|
||||
size = xdp.data_end - xdp.data_meta;
|
||||
|
||||
ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
|
||||
duration);
|
||||
if (!ret)
|
||||
ret = bpf_ctx_finish(kattr, uattr, ctx,
|
||||
sizeof(struct xdp_md));
|
||||
|
||||
out:
|
||||
bpf_prog_change_xdp(prog, NULL);
|
||||
free_data:
|
||||
kfree(data);
|
||||
free_ctx:
|
||||
kfree(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,8 +33,6 @@ obj-$(CONFIG_HWBM) += hwbm.o
|
|||
obj-$(CONFIG_NET_DEVLINK) += devlink.o
|
||||
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
|
||||
obj-$(CONFIG_FAILOVER) += failover.o
|
||||
ifeq ($(CONFIG_INET),y)
|
||||
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
|
||||
endif
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
|
||||
|
|
103
net/core/dev.c
103
net/core/dev.c
|
@ -4744,45 +4744,18 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
|
|||
return rxqueue;
|
||||
}
|
||||
|
||||
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
|
||||
struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
void *orig_data, *orig_data_end, *hard_start;
|
||||
struct netdev_rx_queue *rxqueue;
|
||||
u32 metalen, act = XDP_DROP;
|
||||
bool orig_bcast, orig_host;
|
||||
u32 mac_len, frame_sz;
|
||||
__be16 orig_eth_type;
|
||||
struct ethhdr *eth;
|
||||
u32 metalen, act;
|
||||
int off;
|
||||
|
||||
/* Reinjected packets coming from act_mirred or similar should
|
||||
* not get XDP generic processing.
|
||||
*/
|
||||
if (skb_is_redirected(skb))
|
||||
return XDP_PASS;
|
||||
|
||||
/* XDP packets must be linear and must have sufficient headroom
|
||||
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
|
||||
* native XDP provides, thus we need to do it here as well.
|
||||
*/
|
||||
if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
|
||||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
|
||||
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
|
||||
int troom = skb->tail + skb->data_len - skb->end;
|
||||
|
||||
/* In case we have to go down the path and also linearize,
|
||||
* then lets do the pskb_expand_head() work just once here.
|
||||
*/
|
||||
if (pskb_expand_head(skb,
|
||||
hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
|
||||
troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
|
||||
goto do_drop;
|
||||
if (skb_linearize(skb))
|
||||
goto do_drop;
|
||||
}
|
||||
|
||||
/* The XDP program wants to see the packet starting at the MAC
|
||||
* header.
|
||||
*/
|
||||
|
@ -4837,6 +4810,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
|
|||
skb->protocol = eth_type_trans(skb, skb->dev);
|
||||
}
|
||||
|
||||
/* Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull
|
||||
* before calling us again on redirect path. We do not call do_redirect
|
||||
* as we leave that up to the caller.
|
||||
*
|
||||
* Caller is responsible for managing lifetime of skb (i.e. calling
|
||||
* kfree_skb in response to actions it cannot handle/XDP_DROP).
|
||||
*/
|
||||
switch (act) {
|
||||
case XDP_REDIRECT:
|
||||
case XDP_TX:
|
||||
|
@ -4847,6 +4827,49 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
|
|||
if (metalen)
|
||||
skb_metadata_set(skb, metalen);
|
||||
break;
|
||||
}
|
||||
|
||||
return act;
|
||||
}
|
||||
|
||||
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
|
||||
struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
u32 act = XDP_DROP;
|
||||
|
||||
/* Reinjected packets coming from act_mirred or similar should
|
||||
* not get XDP generic processing.
|
||||
*/
|
||||
if (skb_is_redirected(skb))
|
||||
return XDP_PASS;
|
||||
|
||||
/* XDP packets must be linear and must have sufficient headroom
|
||||
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
|
||||
* native XDP provides, thus we need to do it here as well.
|
||||
*/
|
||||
if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
|
||||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
|
||||
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
|
||||
int troom = skb->tail + skb->data_len - skb->end;
|
||||
|
||||
/* In case we have to go down the path and also linearize,
|
||||
* then lets do the pskb_expand_head() work just once here.
|
||||
*/
|
||||
if (pskb_expand_head(skb,
|
||||
hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
|
||||
troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
|
||||
goto do_drop;
|
||||
if (skb_linearize(skb))
|
||||
goto do_drop;
|
||||
}
|
||||
|
||||
act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
|
||||
switch (act) {
|
||||
case XDP_REDIRECT:
|
||||
case XDP_TX:
|
||||
case XDP_PASS:
|
||||
break;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
fallthrough;
|
||||
|
@ -5312,7 +5335,6 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
|
|||
ret = NET_RX_DROP;
|
||||
goto out;
|
||||
}
|
||||
skb_reset_mac_len(skb);
|
||||
}
|
||||
|
||||
if (eth_type_vlan(skb->protocol)) {
|
||||
|
@ -5638,25 +5660,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
|
|||
struct bpf_prog *new = xdp->prog;
|
||||
int ret = 0;
|
||||
|
||||
if (new) {
|
||||
u32 i;
|
||||
|
||||
mutex_lock(&new->aux->used_maps_mutex);
|
||||
|
||||
/* generic XDP does not work with DEVMAPs that can
|
||||
* have a bpf_prog installed on an entry
|
||||
*/
|
||||
for (i = 0; i < new->aux->used_map_cnt; i++) {
|
||||
if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
|
||||
cpu_map_prog_allowed(new->aux->used_maps[i])) {
|
||||
mutex_unlock(&new->aux->used_maps_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&new->aux->used_maps_mutex);
|
||||
}
|
||||
|
||||
switch (xdp->command) {
|
||||
case XDP_SETUP_PROG:
|
||||
rcu_assign_pointer(dev->xdp_prog, new);
|
||||
|
|
|
@ -77,6 +77,7 @@
|
|||
#include <net/transp_v6.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <net/tls.h>
|
||||
#include <net/xdp.h>
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
bpf_sk_base_func_proto(enum bpf_func_id func_id);
|
||||
|
@ -3880,8 +3881,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
|
|||
if (unlikely(meta < xdp_frame_end ||
|
||||
meta > xdp->data))
|
||||
return -EINVAL;
|
||||
if (unlikely((metalen & (sizeof(__u32) - 1)) ||
|
||||
(metalen > 32)))
|
||||
if (unlikely(xdp_metalen_invalid(metalen)))
|
||||
return -EACCES;
|
||||
|
||||
xdp->data_meta = meta;
|
||||
|
@ -4040,8 +4040,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
|
|||
goto err;
|
||||
consume_skb(skb);
|
||||
break;
|
||||
case BPF_MAP_TYPE_CPUMAP:
|
||||
err = cpu_map_generic_redirect(fwd, skb);
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
break;
|
||||
default:
|
||||
/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
|
||||
err = -EBADRQC;
|
||||
goto err;
|
||||
}
|
||||
|
|
|
@ -211,8 +211,6 @@ static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
|
|||
return psock;
|
||||
}
|
||||
|
||||
static bool sock_map_redirect_allowed(const struct sock *sk);
|
||||
|
||||
static int sock_map_link(struct bpf_map *map, struct sock *sk)
|
||||
{
|
||||
struct sk_psock_progs *progs = sock_map_progs(map);
|
||||
|
@ -223,13 +221,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
|
|||
struct sk_psock *psock;
|
||||
int ret;
|
||||
|
||||
/* Only sockets we can redirect into/from in BPF need to hold
|
||||
* refs to parser/verdict progs and have their sk_data_ready
|
||||
* and sk_write_space callbacks overridden.
|
||||
*/
|
||||
if (!sock_map_redirect_allowed(sk))
|
||||
goto no_progs;
|
||||
|
||||
stream_verdict = READ_ONCE(progs->stream_verdict);
|
||||
if (stream_verdict) {
|
||||
stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
|
||||
|
@ -264,7 +255,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
|
|||
}
|
||||
}
|
||||
|
||||
no_progs:
|
||||
psock = sock_map_psock_get_checked(sk);
|
||||
if (IS_ERR(psock)) {
|
||||
ret = PTR_ERR(psock);
|
||||
|
@ -527,12 +517,6 @@ static bool sk_is_tcp(const struct sock *sk)
|
|||
sk->sk_protocol == IPPROTO_TCP;
|
||||
}
|
||||
|
||||
static bool sk_is_udp(const struct sock *sk)
|
||||
{
|
||||
return sk->sk_type == SOCK_DGRAM &&
|
||||
sk->sk_protocol == IPPROTO_UDP;
|
||||
}
|
||||
|
||||
static bool sock_map_redirect_allowed(const struct sock *sk)
|
||||
{
|
||||
if (sk_is_tcp(sk))
|
||||
|
@ -550,10 +534,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
|
|||
{
|
||||
if (sk_is_tcp(sk))
|
||||
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
|
||||
else if (sk_is_udp(sk))
|
||||
return sk_hashed(sk);
|
||||
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int sock_hash_update_common(struct bpf_map *map, void *key,
|
||||
|
@ -1536,6 +1517,7 @@ void sock_map_close(struct sock *sk, long timeout)
|
|||
release_sock(sk);
|
||||
saved_close(sk, timeout);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sock_map_close);
|
||||
|
||||
static int sock_map_iter_attach_target(struct bpf_prog *prog,
|
||||
union bpf_iter_link_info *linfo,
|
||||
|
|
|
@ -112,7 +112,6 @@ static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
|
|||
static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
|
||||
{
|
||||
*prot = *base;
|
||||
prot->unhash = sock_map_unhash;
|
||||
prot->close = sock_map_close;
|
||||
prot->recvmsg = udp_bpf_recvmsg;
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ obj-$(CONFIG_UNIX) += unix.o
|
|||
|
||||
unix-y := af_unix.o garbage.o
|
||||
unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o
|
||||
unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
|
||||
|
||||
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
|
||||
unix_diag-y := diag.o
|
||||
|
|
|
@ -494,6 +494,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
|
|||
sk_error_report(other);
|
||||
}
|
||||
}
|
||||
sk->sk_state = other->sk_state = TCP_CLOSE;
|
||||
}
|
||||
|
||||
static void unix_sock_destructor(struct sock *sk)
|
||||
|
@ -669,6 +670,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
|
|||
unsigned int flags);
|
||||
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
|
||||
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
|
||||
static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor);
|
||||
static int unix_dgram_connect(struct socket *, struct sockaddr *,
|
||||
int, int);
|
||||
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
|
||||
|
@ -746,6 +749,7 @@ static const struct proto_ops unix_dgram_ops = {
|
|||
.listen = sock_no_listen,
|
||||
.shutdown = unix_shutdown,
|
||||
.sendmsg = unix_dgram_sendmsg,
|
||||
.read_sock = unix_read_sock,
|
||||
.recvmsg = unix_dgram_recvmsg,
|
||||
.mmap = sock_no_mmap,
|
||||
.sendpage = sock_no_sendpage,
|
||||
|
@ -777,10 +781,21 @@ static const struct proto_ops unix_seqpacket_ops = {
|
|||
.show_fdinfo = unix_show_fdinfo,
|
||||
};
|
||||
|
||||
static struct proto unix_proto = {
|
||||
static void unix_close(struct sock *sk, long timeout)
|
||||
{
|
||||
/* Nothing to do here, unix socket does not need a ->close().
|
||||
* This is merely for sockmap.
|
||||
*/
|
||||
}
|
||||
|
||||
struct proto unix_proto = {
|
||||
.name = "UNIX",
|
||||
.owner = THIS_MODULE,
|
||||
.obj_size = sizeof(struct unix_sock),
|
||||
.close = unix_close,
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
.psock_update_sk_prot = unix_bpf_update_proto,
|
||||
#endif
|
||||
};
|
||||
|
||||
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
|
||||
|
@ -864,6 +879,7 @@ static int unix_release(struct socket *sock)
|
|||
if (!sk)
|
||||
return 0;
|
||||
|
||||
sk->sk_prot->close(sk, 0);
|
||||
unix_release_sock(sk, 0);
|
||||
sock->sk = NULL;
|
||||
|
||||
|
@ -1199,6 +1215,9 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
|
|||
unix_peer(sk) = other;
|
||||
unix_state_double_unlock(sk, other);
|
||||
}
|
||||
|
||||
if (unix_peer(sk))
|
||||
sk->sk_state = other->sk_state = TCP_ESTABLISHED;
|
||||
return 0;
|
||||
|
||||
out_unlock:
|
||||
|
@ -1431,12 +1450,10 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
|
|||
init_peercred(ska);
|
||||
init_peercred(skb);
|
||||
|
||||
if (ska->sk_type != SOCK_DGRAM) {
|
||||
ska->sk_state = TCP_ESTABLISHED;
|
||||
skb->sk_state = TCP_ESTABLISHED;
|
||||
socka->state = SS_CONNECTED;
|
||||
sockb->state = SS_CONNECTED;
|
||||
}
|
||||
ska->sk_state = TCP_ESTABLISHED;
|
||||
skb->sk_state = TCP_ESTABLISHED;
|
||||
socka->state = SS_CONNECTED;
|
||||
sockb->state = SS_CONNECTED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2081,11 +2098,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
|
|||
}
|
||||
}
|
||||
|
||||
static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
|
||||
size_t size, int flags)
|
||||
int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
|
||||
int flags)
|
||||
{
|
||||
struct scm_cookie scm;
|
||||
struct sock *sk = sock->sk;
|
||||
struct socket *sock = sk->sk_socket;
|
||||
struct unix_sock *u = unix_sk(sk);
|
||||
struct sk_buff *skb, *last;
|
||||
long timeo;
|
||||
|
@ -2188,6 +2205,53 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
|
|||
return err;
|
||||
}
|
||||
|
||||
static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
|
||||
int flags)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
if (sk->sk_prot != &unix_proto)
|
||||
return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
|
||||
flags & ~MSG_DONTWAIT, NULL);
|
||||
#endif
|
||||
return __unix_dgram_recvmsg(sk, msg, size, flags);
|
||||
}
|
||||
|
||||
static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor)
|
||||
{
|
||||
int copied = 0;
|
||||
|
||||
while (1) {
|
||||
struct unix_sock *u = unix_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
int used, err;
|
||||
|
||||
mutex_lock(&u->iolock);
|
||||
skb = skb_recv_datagram(sk, 0, 1, &err);
|
||||
mutex_unlock(&u->iolock);
|
||||
if (!skb)
|
||||
return err;
|
||||
|
||||
used = recv_actor(desc, skb, 0, skb->len);
|
||||
if (used <= 0) {
|
||||
if (!copied)
|
||||
copied = used;
|
||||
kfree_skb(skb);
|
||||
break;
|
||||
} else if (used <= skb->len) {
|
||||
copied += used;
|
||||
}
|
||||
|
||||
kfree_skb(skb);
|
||||
if (!desc->count)
|
||||
break;
|
||||
}
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sleep until more data has arrived. But check for races..
|
||||
*/
|
||||
|
@ -2925,6 +2989,7 @@ static int __init af_unix_init(void)
|
|||
|
||||
sock_register(&unix_family_ops);
|
||||
register_pernet_subsys(&unix_net_ops);
|
||||
unix_bpf_build_proto();
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
|
||||
|
||||
#include <linux/skmsg.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/af_unix.h>
|
||||
|
||||
#define unix_sk_has_data(__sk, __psock) \
|
||||
({ !skb_queue_empty(&__sk->sk_receive_queue) || \
|
||||
!skb_queue_empty(&__psock->ingress_skb) || \
|
||||
!list_empty(&__psock->ingress_msg); \
|
||||
})
|
||||
|
||||
static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
|
||||
long timeo)
|
||||
{
|
||||
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
||||
struct unix_sock *u = unix_sk(sk);
|
||||
int ret = 0;
|
||||
|
||||
if (sk->sk_shutdown & RCV_SHUTDOWN)
|
||||
return 1;
|
||||
|
||||
if (!timeo)
|
||||
return ret;
|
||||
|
||||
add_wait_queue(sk_sleep(sk), &wait);
|
||||
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
if (!unix_sk_has_data(sk, psock)) {
|
||||
mutex_unlock(&u->iolock);
|
||||
wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
|
||||
mutex_lock(&u->iolock);
|
||||
ret = unix_sk_has_data(sk, psock);
|
||||
}
|
||||
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
remove_wait_queue(sk_sleep(sk), &wait);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
|
||||
size_t len, int nonblock, int flags,
|
||||
int *addr_len)
|
||||
{
|
||||
struct unix_sock *u = unix_sk(sk);
|
||||
struct sk_psock *psock;
|
||||
int copied, ret;
|
||||
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
return __unix_dgram_recvmsg(sk, msg, len, flags);
|
||||
|
||||
mutex_lock(&u->iolock);
|
||||
if (!skb_queue_empty(&sk->sk_receive_queue) &&
|
||||
sk_psock_queue_empty(psock)) {
|
||||
ret = __unix_dgram_recvmsg(sk, msg, len, flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
msg_bytes_ready:
|
||||
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
|
||||
if (!copied) {
|
||||
long timeo;
|
||||
int data;
|
||||
|
||||
timeo = sock_rcvtimeo(sk, nonblock);
|
||||
data = unix_msg_wait_data(sk, psock, timeo);
|
||||
if (data) {
|
||||
if (!sk_psock_queue_empty(psock))
|
||||
goto msg_bytes_ready;
|
||||
ret = __unix_dgram_recvmsg(sk, msg, len, flags);
|
||||
goto out;
|
||||
}
|
||||
copied = -EAGAIN;
|
||||
}
|
||||
ret = copied;
|
||||
out:
|
||||
mutex_unlock(&u->iolock);
|
||||
sk_psock_put(sk, psock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct proto *unix_prot_saved __read_mostly;
|
||||
static DEFINE_SPINLOCK(unix_prot_lock);
|
||||
static struct proto unix_bpf_prot;
|
||||
|
||||
static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
|
||||
{
|
||||
*prot = *base;
|
||||
prot->close = sock_map_close;
|
||||
prot->recvmsg = unix_dgram_bpf_recvmsg;
|
||||
}
|
||||
|
||||
static void unix_bpf_check_needs_rebuild(struct proto *ops)
|
||||
{
|
||||
if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) {
|
||||
spin_lock_bh(&unix_prot_lock);
|
||||
if (likely(ops != unix_prot_saved)) {
|
||||
unix_bpf_rebuild_protos(&unix_bpf_prot, ops);
|
||||
smp_store_release(&unix_prot_saved, ops);
|
||||
}
|
||||
spin_unlock_bh(&unix_prot_lock);
|
||||
}
|
||||
}
|
||||
|
||||
int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
|
||||
{
|
||||
if (restore) {
|
||||
sk->sk_write_space = psock->saved_write_space;
|
||||
WRITE_ONCE(sk->sk_prot, psock->sk_proto);
|
||||
return 0;
|
||||
}
|
||||
|
||||
unix_bpf_check_needs_rebuild(psock->sk_proto);
|
||||
WRITE_ONCE(sk->sk_prot, &unix_bpf_prot);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init unix_bpf_build_proto(void)
|
||||
{
|
||||
unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto);
|
||||
}
|
|
@ -792,13 +792,23 @@ int main(int argc, char **argv)
|
|||
|
||||
n_cpus = get_nprocs_conf();
|
||||
|
||||
/* Notice: choosing he queue size is very important with the
|
||||
* ixgbe driver, because it's driver page recycling trick is
|
||||
* dependend on pages being returned quickly. The number of
|
||||
* out-standing packets in the system must be less-than 2x
|
||||
* RX-ring size.
|
||||
/* Notice: Choosing the queue size is very important when CPU is
|
||||
* configured with power-saving states.
|
||||
*
|
||||
* If deepest state take 133 usec to wakeup from (133/10^6). When link
|
||||
* speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
|
||||
* arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
|
||||
* 166250 bytes. With MTU size packets this is 110 packets, and with
|
||||
* minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
|
||||
*
|
||||
* Setting default cpumap queue to 2048 as worst-case (small packet)
|
||||
* should be +64 packet due kthread wakeup call (due to xdp_do_flush)
|
||||
* worst-case is 2043 packets.
|
||||
*
|
||||
* Sysadm can configured system to avoid deep-sleep via:
|
||||
* tuned-adm profile network-latency
|
||||
*/
|
||||
qsize = 128+64;
|
||||
qsize = 2048;
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||
prog_load_attr.file = filename;
|
||||
|
|
|
@ -547,6 +547,7 @@ class PrinterHelpers(Printer):
|
|||
'struct inode',
|
||||
'struct socket',
|
||||
'struct file',
|
||||
'struct bpf_timer',
|
||||
]
|
||||
known_types = {
|
||||
'...',
|
||||
|
@ -594,6 +595,7 @@ class PrinterHelpers(Printer):
|
|||
'struct inode',
|
||||
'struct socket',
|
||||
'struct file',
|
||||
'struct bpf_timer',
|
||||
}
|
||||
mapped_types = {
|
||||
'u8': '__u8',
|
||||
|
|
|
@ -324,9 +324,6 @@ union bpf_iter_link_info {
|
|||
* **BPF_PROG_TYPE_SK_LOOKUP**
|
||||
* *data_in* and *data_out* must be NULL.
|
||||
*
|
||||
* **BPF_PROG_TYPE_XDP**
|
||||
* *ctx_in* and *ctx_out* must be NULL.
|
||||
*
|
||||
* **BPF_PROG_TYPE_RAW_TRACEPOINT**,
|
||||
* **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
|
||||
*
|
||||
|
@ -3249,7 +3246,7 @@ union bpf_attr {
|
|||
* long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
|
||||
* Description
|
||||
* Select a **SO_REUSEPORT** socket from a
|
||||
* **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
|
||||
* **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
|
||||
* It checks the selected socket is matching the incoming
|
||||
* request in the socket buffer.
|
||||
* Return
|
||||
|
@ -4780,6 +4777,76 @@ union bpf_attr {
|
|||
* Execute close syscall for given FD.
|
||||
* Return
|
||||
* A syscall result.
|
||||
*
|
||||
* long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
|
||||
* Description
|
||||
* Initialize the timer.
|
||||
* First 4 bits of *flags* specify clockid.
|
||||
* Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
|
||||
* All other bits of *flags* are reserved.
|
||||
* The verifier will reject the program if *timer* is not from
|
||||
* the same *map*.
|
||||
* Return
|
||||
* 0 on success.
|
||||
* **-EBUSY** if *timer* is already initialized.
|
||||
* **-EINVAL** if invalid *flags* are passed.
|
||||
* **-EPERM** if *timer* is in a map that doesn't have any user references.
|
||||
* The user space should either hold a file descriptor to a map with timers
|
||||
* or pin such map in bpffs. When map is unpinned or file descriptor is
|
||||
* closed all timers in the map will be cancelled and freed.
|
||||
*
|
||||
* long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
|
||||
* Description
|
||||
* Configure the timer to call *callback_fn* static function.
|
||||
* Return
|
||||
* 0 on success.
|
||||
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
|
||||
* **-EPERM** if *timer* is in a map that doesn't have any user references.
|
||||
* The user space should either hold a file descriptor to a map with timers
|
||||
* or pin such map in bpffs. When map is unpinned or file descriptor is
|
||||
* closed all timers in the map will be cancelled and freed.
|
||||
*
|
||||
* long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
|
||||
* Description
|
||||
* Set timer expiration N nanoseconds from the current time. The
|
||||
* configured callback will be invoked in soft irq context on some cpu
|
||||
* and will not repeat unless another bpf_timer_start() is made.
|
||||
* In such case the next invocation can migrate to a different cpu.
|
||||
* Since struct bpf_timer is a field inside map element the map
|
||||
* owns the timer. The bpf_timer_set_callback() will increment refcnt
|
||||
* of BPF program to make sure that callback_fn code stays valid.
|
||||
* When user space reference to a map reaches zero all timers
|
||||
* in a map are cancelled and corresponding program's refcnts are
|
||||
* decremented. This is done to make sure that Ctrl-C of a user
|
||||
* process doesn't leave any timers running. If map is pinned in
|
||||
* bpffs the callback_fn can re-arm itself indefinitely.
|
||||
* bpf_map_update/delete_elem() helpers and user space sys_bpf commands
|
||||
* cancel and free the timer in the given map element.
|
||||
* The map can contain timers that invoke callback_fn-s from different
|
||||
* programs. The same callback_fn can serve different timers from
|
||||
* different maps if key/value layout matches across maps.
|
||||
* Every bpf_timer_set_callback() can have different callback_fn.
|
||||
*
|
||||
* Return
|
||||
* 0 on success.
|
||||
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
|
||||
* or invalid *flags* are passed.
|
||||
*
|
||||
* long bpf_timer_cancel(struct bpf_timer *timer)
|
||||
* Description
|
||||
* Cancel the timer and wait for callback_fn to finish if it was running.
|
||||
* Return
|
||||
* 0 if the timer was not active.
|
||||
* 1 if the timer was active.
|
||||
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
|
||||
* **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
|
||||
* own timer which would have led to a deadlock otherwise.
|
||||
*
|
||||
* u64 bpf_get_func_ip(void *ctx)
|
||||
* Description
|
||||
* Get address of the traced function (for tracing and kprobe programs).
|
||||
* Return
|
||||
* Address of the traced function.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
|
@ -4951,6 +5018,11 @@ union bpf_attr {
|
|||
FN(sys_bpf), \
|
||||
FN(btf_find_by_name_kind), \
|
||||
FN(sys_close), \
|
||||
FN(timer_init), \
|
||||
FN(timer_set_callback), \
|
||||
FN(timer_start), \
|
||||
FN(timer_cancel), \
|
||||
FN(get_func_ip), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
|
@ -6077,6 +6149,11 @@ struct bpf_spin_lock {
|
|||
__u32 val;
|
||||
};
|
||||
|
||||
struct bpf_timer {
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_sysctl {
|
||||
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
|
||||
* Allows 1,2,4-byte read, but no write.
|
||||
|
|
|
@ -3894,6 +3894,42 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
|
||||
{
|
||||
char file[PATH_MAX], buff[4096];
|
||||
FILE *fp;
|
||||
__u32 val;
|
||||
int err;
|
||||
|
||||
snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
|
||||
memset(info, 0, sizeof(*info));
|
||||
|
||||
fp = fopen(file, "r");
|
||||
if (!fp) {
|
||||
err = -errno;
|
||||
pr_warn("failed to open %s: %d. No procfs support?\n", file,
|
||||
err);
|
||||
return err;
|
||||
}
|
||||
|
||||
while (fgets(buff, sizeof(buff), fp)) {
|
||||
if (sscanf(buff, "map_type:\t%u", &val) == 1)
|
||||
info->type = val;
|
||||
else if (sscanf(buff, "key_size:\t%u", &val) == 1)
|
||||
info->key_size = val;
|
||||
else if (sscanf(buff, "value_size:\t%u", &val) == 1)
|
||||
info->value_size = val;
|
||||
else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
|
||||
info->max_entries = val;
|
||||
else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
|
||||
info->map_flags = val;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_map__reuse_fd(struct bpf_map *map, int fd)
|
||||
{
|
||||
struct bpf_map_info info = {};
|
||||
|
@ -3902,6 +3938,8 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
|
|||
char *new_name;
|
||||
|
||||
err = bpf_obj_get_info_by_fd(fd, &info, &len);
|
||||
if (err && errno == EINVAL)
|
||||
err = bpf_get_map_info_from_fdinfo(fd, &info);
|
||||
if (err)
|
||||
return libbpf_err(err);
|
||||
|
||||
|
@ -4381,12 +4419,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
|
|||
struct bpf_map_info map_info = {};
|
||||
char msg[STRERR_BUFSIZE];
|
||||
__u32 map_info_len;
|
||||
int err;
|
||||
|
||||
map_info_len = sizeof(map_info);
|
||||
|
||||
if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
|
||||
pr_warn("failed to get map info for map FD %d: %s\n",
|
||||
map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
|
||||
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
|
||||
if (err && errno == EINVAL)
|
||||
err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
|
||||
if (err) {
|
||||
pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
|
||||
libbpf_strerror_r(errno, msg, sizeof(msg)));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -10304,19 +10346,25 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
|
|||
return pfd;
|
||||
}
|
||||
|
||||
struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
|
||||
bool retprobe,
|
||||
const char *func_name)
|
||||
struct bpf_program_attach_kprobe_opts {
|
||||
bool retprobe;
|
||||
unsigned long offset;
|
||||
};
|
||||
|
||||
static struct bpf_link*
|
||||
bpf_program__attach_kprobe_opts(struct bpf_program *prog,
|
||||
const char *func_name,
|
||||
struct bpf_program_attach_kprobe_opts *opts)
|
||||
{
|
||||
char errmsg[STRERR_BUFSIZE];
|
||||
struct bpf_link *link;
|
||||
int pfd, err;
|
||||
|
||||
pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
|
||||
0 /* offset */, -1 /* pid */);
|
||||
pfd = perf_event_open_probe(false /* uprobe */, opts->retprobe, func_name,
|
||||
opts->offset, -1 /* pid */);
|
||||
if (pfd < 0) {
|
||||
pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
|
||||
prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
|
||||
prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
|
||||
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
|
||||
return libbpf_err_ptr(pfd);
|
||||
}
|
||||
|
@ -10325,23 +10373,53 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
|
|||
if (err) {
|
||||
close(pfd);
|
||||
pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
|
||||
prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
|
||||
prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
|
||||
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
|
||||
return libbpf_err_ptr(err);
|
||||
}
|
||||
return link;
|
||||
}
|
||||
|
||||
struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
|
||||
bool retprobe,
|
||||
const char *func_name)
|
||||
{
|
||||
struct bpf_program_attach_kprobe_opts opts = {
|
||||
.retprobe = retprobe,
|
||||
};
|
||||
|
||||
return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
|
||||
}
|
||||
|
||||
static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
|
||||
struct bpf_program *prog)
|
||||
{
|
||||
struct bpf_program_attach_kprobe_opts opts;
|
||||
unsigned long offset = 0;
|
||||
struct bpf_link *link;
|
||||
const char *func_name;
|
||||
bool retprobe;
|
||||
char *func;
|
||||
int n, err;
|
||||
|
||||
func_name = prog->sec_name + sec->len;
|
||||
retprobe = strcmp(sec->sec, "kretprobe/") == 0;
|
||||
opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
|
||||
|
||||
return bpf_program__attach_kprobe(prog, retprobe, func_name);
|
||||
n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%lx", &func, &offset);
|
||||
if (n < 1) {
|
||||
err = -EINVAL;
|
||||
pr_warn("kprobe name is invalid: %s\n", func_name);
|
||||
return libbpf_err_ptr(err);
|
||||
}
|
||||
if (opts.retprobe && offset != 0) {
|
||||
err = -EINVAL;
|
||||
pr_warn("kretprobes do not support offset specification\n");
|
||||
return libbpf_err_ptr(err);
|
||||
}
|
||||
|
||||
opts.offset = offset;
|
||||
link = bpf_program__attach_kprobe_opts(prog, func, &opts);
|
||||
free(func);
|
||||
return link;
|
||||
}
|
||||
|
||||
struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <test_progs.h>
|
||||
#include "get_func_ip_test.skel.h"
|
||||
|
||||
void test_get_func_ip_test(void)
|
||||
{
|
||||
struct get_func_ip_test *skel = NULL;
|
||||
__u32 duration = 0, retval;
|
||||
int err, prog_fd;
|
||||
|
||||
skel = get_func_ip_test__open();
|
||||
if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
|
||||
return;
|
||||
|
||||
/* test6 is x86_64 specifc because of the instruction
|
||||
* offset, disabling it for all other archs
|
||||
*/
|
||||
#ifndef __x86_64__
|
||||
bpf_program__set_autoload(skel->progs.test6, false);
|
||||
#endif
|
||||
|
||||
err = get_func_ip_test__load(skel);
|
||||
if (!ASSERT_OK(err, "get_func_ip_test__load"))
|
||||
goto cleanup;
|
||||
|
||||
err = get_func_ip_test__attach(skel);
|
||||
if (!ASSERT_OK(err, "get_func_ip_test__attach"))
|
||||
goto cleanup;
|
||||
|
||||
prog_fd = bpf_program__fd(skel->progs.test1);
|
||||
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
|
||||
NULL, NULL, &retval, &duration);
|
||||
ASSERT_OK(err, "test_run");
|
||||
ASSERT_EQ(retval, 0, "test_run");
|
||||
|
||||
prog_fd = bpf_program__fd(skel->progs.test5);
|
||||
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
|
||||
NULL, NULL, &retval, &duration);
|
||||
|
||||
ASSERT_OK(err, "test_run");
|
||||
|
||||
ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
|
||||
ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
|
||||
ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
|
||||
ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
|
||||
ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
|
||||
#ifdef __x86_64__
|
||||
ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
|
||||
#endif
|
||||
|
||||
cleanup:
|
||||
get_func_ip_test__destroy(skel);
|
||||
}
|
|
@ -351,9 +351,11 @@ static void test_insert_opened(int family, int sotype, int mapfd)
|
|||
errno = 0;
|
||||
value = s;
|
||||
err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
|
||||
if (!err || errno != EOPNOTSUPP)
|
||||
FAIL_ERRNO("map_update: expected EOPNOTSUPP");
|
||||
|
||||
if (sotype == SOCK_STREAM) {
|
||||
if (!err || errno != EOPNOTSUPP)
|
||||
FAIL_ERRNO("map_update: expected EOPNOTSUPP");
|
||||
} else if (err)
|
||||
FAIL_ERRNO("map_update: expected success");
|
||||
xclose(s);
|
||||
}
|
||||
|
||||
|
@ -919,6 +921,23 @@ static const char *redir_mode_str(enum redir_mode mode)
|
|||
}
|
||||
}
|
||||
|
||||
static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
|
||||
{
|
||||
u64 value;
|
||||
u32 key;
|
||||
int err;
|
||||
|
||||
key = 0;
|
||||
value = fd1;
|
||||
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key = 1;
|
||||
value = fd2;
|
||||
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
}
|
||||
|
||||
static void redir_to_connected(int family, int sotype, int sock_mapfd,
|
||||
int verd_mapfd, enum redir_mode mode)
|
||||
{
|
||||
|
@ -928,7 +947,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
|
|||
unsigned int pass;
|
||||
socklen_t len;
|
||||
int err, n;
|
||||
u64 value;
|
||||
u32 key;
|
||||
char b;
|
||||
|
||||
|
@ -965,15 +983,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
|
|||
if (p1 < 0)
|
||||
goto close_cli1;
|
||||
|
||||
key = 0;
|
||||
value = p0;
|
||||
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
if (err)
|
||||
goto close_peer1;
|
||||
|
||||
key = 1;
|
||||
value = p1;
|
||||
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
err = add_to_sockmap(sock_mapfd, p0, p1);
|
||||
if (err)
|
||||
goto close_peer1;
|
||||
|
||||
|
@ -1061,7 +1071,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
|
|||
int s, c, p, err, n;
|
||||
unsigned int drop;
|
||||
socklen_t len;
|
||||
u64 value;
|
||||
u32 key;
|
||||
|
||||
zero_verdict_count(verd_mapfd);
|
||||
|
@ -1086,15 +1095,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
|
|||
if (p < 0)
|
||||
goto close_cli;
|
||||
|
||||
key = 0;
|
||||
value = s;
|
||||
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
if (err)
|
||||
goto close_peer;
|
||||
|
||||
key = 1;
|
||||
value = p;
|
||||
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
err = add_to_sockmap(sock_mapfd, s, p);
|
||||
if (err)
|
||||
goto close_peer;
|
||||
|
||||
|
@ -1346,7 +1347,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
|
|||
int s1, s2, c, err;
|
||||
unsigned int drop;
|
||||
socklen_t len;
|
||||
u64 value;
|
||||
u32 key;
|
||||
|
||||
zero_verdict_count(verd_map);
|
||||
|
@ -1360,16 +1360,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
|
|||
if (s2 < 0)
|
||||
goto close_srv1;
|
||||
|
||||
key = 0;
|
||||
value = s1;
|
||||
err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
|
||||
err = add_to_sockmap(sock_map, s1, s2);
|
||||
if (err)
|
||||
goto close_srv2;
|
||||
|
||||
key = 1;
|
||||
value = s2;
|
||||
err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
|
||||
|
||||
/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
|
||||
len = sizeof(addr);
|
||||
err = xgetsockname(s2, sockaddr(&addr), &len);
|
||||
|
@ -1441,6 +1435,8 @@ static const char *family_str(sa_family_t family)
|
|||
return "IPv4";
|
||||
case AF_INET6:
|
||||
return "IPv6";
|
||||
case AF_UNIX:
|
||||
return "Unix";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
|
@ -1563,6 +1559,99 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
|
|||
}
|
||||
}
|
||||
|
||||
static void unix_redir_to_connected(int sotype, int sock_mapfd,
|
||||
int verd_mapfd, enum redir_mode mode)
|
||||
{
|
||||
const char *log_prefix = redir_mode_str(mode);
|
||||
int c0, c1, p0, p1;
|
||||
unsigned int pass;
|
||||
int retries = 100;
|
||||
int err, n;
|
||||
int sfd[2];
|
||||
u32 key;
|
||||
char b;
|
||||
|
||||
zero_verdict_count(verd_mapfd);
|
||||
|
||||
if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
|
||||
return;
|
||||
c0 = sfd[0], p0 = sfd[1];
|
||||
|
||||
if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
|
||||
goto close0;
|
||||
c1 = sfd[0], p1 = sfd[1];
|
||||
|
||||
err = add_to_sockmap(sock_mapfd, p0, p1);
|
||||
if (err)
|
||||
goto close;
|
||||
|
||||
n = write(c1, "a", 1);
|
||||
if (n < 0)
|
||||
FAIL_ERRNO("%s: write", log_prefix);
|
||||
if (n == 0)
|
||||
FAIL("%s: incomplete write", log_prefix);
|
||||
if (n < 1)
|
||||
goto close;
|
||||
|
||||
key = SK_PASS;
|
||||
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
|
||||
if (err)
|
||||
goto close;
|
||||
if (pass != 1)
|
||||
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
|
||||
|
||||
again:
|
||||
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
|
||||
if (n < 0) {
|
||||
if (errno == EAGAIN && retries--)
|
||||
goto again;
|
||||
FAIL_ERRNO("%s: read", log_prefix);
|
||||
}
|
||||
if (n == 0)
|
||||
FAIL("%s: incomplete read", log_prefix);
|
||||
|
||||
close:
|
||||
xclose(c1);
|
||||
xclose(p1);
|
||||
close0:
|
||||
xclose(c0);
|
||||
xclose(p0);
|
||||
}
|
||||
|
||||
static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
|
||||
struct bpf_map *inner_map, int sotype)
|
||||
{
|
||||
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
|
||||
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
|
||||
int sock_map = bpf_map__fd(inner_map);
|
||||
int err;
|
||||
|
||||
err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
skel->bss->test_ingress = false;
|
||||
unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
|
||||
skel->bss->test_ingress = true;
|
||||
unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
|
||||
|
||||
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
|
||||
}
|
||||
|
||||
static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
|
||||
int sotype)
|
||||
{
|
||||
const char *family_name, *map_name;
|
||||
char s[MAX_TEST_NAME];
|
||||
|
||||
family_name = family_str(AF_UNIX);
|
||||
map_name = map_type_str(map);
|
||||
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
|
||||
if (!test__start_subtest(s))
|
||||
return;
|
||||
unix_skb_redir_to_connected(skel, map, sotype);
|
||||
}
|
||||
|
||||
static void test_reuseport(struct test_sockmap_listen *skel,
|
||||
struct bpf_map *map, int family, int sotype)
|
||||
{
|
||||
|
@ -1603,33 +1692,27 @@ static void test_reuseport(struct test_sockmap_listen *skel,
|
|||
}
|
||||
}
|
||||
|
||||
static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
|
||||
int verd_mapfd, enum redir_mode mode)
|
||||
static int udp_socketpair(int family, int *s, int *c)
|
||||
{
|
||||
const char *log_prefix = redir_mode_str(mode);
|
||||
struct sockaddr_storage addr;
|
||||
int c0, c1, p0, p1;
|
||||
unsigned int pass;
|
||||
int retries = 100;
|
||||
socklen_t len;
|
||||
int err, n;
|
||||
u64 value;
|
||||
u32 key;
|
||||
char b;
|
||||
int p0, c0;
|
||||
int err;
|
||||
|
||||
zero_verdict_count(verd_mapfd);
|
||||
|
||||
p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
|
||||
p0 = socket_loopback(family, SOCK_DGRAM | SOCK_NONBLOCK);
|
||||
if (p0 < 0)
|
||||
return;
|
||||
return p0;
|
||||
|
||||
len = sizeof(addr);
|
||||
err = xgetsockname(p0, sockaddr(&addr), &len);
|
||||
if (err)
|
||||
goto close_peer0;
|
||||
|
||||
c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
|
||||
if (c0 < 0)
|
||||
c0 = xsocket(family, SOCK_DGRAM | SOCK_NONBLOCK, 0);
|
||||
if (c0 < 0) {
|
||||
err = c0;
|
||||
goto close_peer0;
|
||||
}
|
||||
err = xconnect(c0, sockaddr(&addr), len);
|
||||
if (err)
|
||||
goto close_cli0;
|
||||
|
@ -1640,35 +1723,38 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
|
|||
if (err)
|
||||
goto close_cli0;
|
||||
|
||||
p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
|
||||
if (p1 < 0)
|
||||
goto close_cli0;
|
||||
err = xgetsockname(p1, sockaddr(&addr), &len);
|
||||
*s = p0;
|
||||
*c = c0;
|
||||
return 0;
|
||||
|
||||
close_cli0:
|
||||
xclose(c0);
|
||||
close_peer0:
|
||||
xclose(p0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
|
||||
enum redir_mode mode)
|
||||
{
|
||||
const char *log_prefix = redir_mode_str(mode);
|
||||
int c0, c1, p0, p1;
|
||||
unsigned int pass;
|
||||
int retries = 100;
|
||||
int err, n;
|
||||
u32 key;
|
||||
char b;
|
||||
|
||||
zero_verdict_count(verd_mapfd);
|
||||
|
||||
err = udp_socketpair(family, &p0, &c0);
|
||||
if (err)
|
||||
return;
|
||||
err = udp_socketpair(family, &p1, &c1);
|
||||
if (err)
|
||||
goto close_cli0;
|
||||
|
||||
c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
|
||||
if (c1 < 0)
|
||||
goto close_peer1;
|
||||
err = xconnect(c1, sockaddr(&addr), len);
|
||||
if (err)
|
||||
goto close_cli1;
|
||||
err = xgetsockname(c1, sockaddr(&addr), &len);
|
||||
if (err)
|
||||
goto close_cli1;
|
||||
err = xconnect(p1, sockaddr(&addr), len);
|
||||
if (err)
|
||||
goto close_cli1;
|
||||
|
||||
key = 0;
|
||||
value = p0;
|
||||
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
if (err)
|
||||
goto close_cli1;
|
||||
|
||||
key = 1;
|
||||
value = p1;
|
||||
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
err = add_to_sockmap(sock_mapfd, p0, p1);
|
||||
if (err)
|
||||
goto close_cli1;
|
||||
|
||||
|
@ -1699,11 +1785,9 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
|
|||
|
||||
close_cli1:
|
||||
xclose(c1);
|
||||
close_peer1:
|
||||
xclose(p1);
|
||||
close_cli0:
|
||||
xclose(c0);
|
||||
close_peer0:
|
||||
xclose(p0);
|
||||
}
|
||||
|
||||
|
@ -1720,11 +1804,9 @@ static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
|
|||
return;
|
||||
|
||||
skel->bss->test_ingress = false;
|
||||
udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
|
||||
REDIR_EGRESS);
|
||||
udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
|
||||
skel->bss->test_ingress = true;
|
||||
udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
|
||||
REDIR_INGRESS);
|
||||
udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
|
||||
|
||||
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
|
||||
}
|
||||
|
@ -1743,6 +1825,175 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map
|
|||
udp_skb_redir_to_connected(skel, map, family);
|
||||
}
|
||||
|
||||
static void udp_unix_redir_to_connected(int family, int sock_mapfd,
|
||||
int verd_mapfd, enum redir_mode mode)
|
||||
{
|
||||
const char *log_prefix = redir_mode_str(mode);
|
||||
int c0, c1, p0, p1;
|
||||
unsigned int pass;
|
||||
int retries = 100;
|
||||
int err, n;
|
||||
int sfd[2];
|
||||
u32 key;
|
||||
char b;
|
||||
|
||||
zero_verdict_count(verd_mapfd);
|
||||
|
||||
if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
|
||||
return;
|
||||
c0 = sfd[0], p0 = sfd[1];
|
||||
|
||||
err = udp_socketpair(family, &p1, &c1);
|
||||
if (err)
|
||||
goto close;
|
||||
|
||||
err = add_to_sockmap(sock_mapfd, p0, p1);
|
||||
if (err)
|
||||
goto close_cli1;
|
||||
|
||||
n = write(c1, "a", 1);
|
||||
if (n < 0)
|
||||
FAIL_ERRNO("%s: write", log_prefix);
|
||||
if (n == 0)
|
||||
FAIL("%s: incomplete write", log_prefix);
|
||||
if (n < 1)
|
||||
goto close_cli1;
|
||||
|
||||
key = SK_PASS;
|
||||
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
|
||||
if (err)
|
||||
goto close_cli1;
|
||||
if (pass != 1)
|
||||
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
|
||||
|
||||
again:
|
||||
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
|
||||
if (n < 0) {
|
||||
if (errno == EAGAIN && retries--)
|
||||
goto again;
|
||||
FAIL_ERRNO("%s: read", log_prefix);
|
||||
}
|
||||
if (n == 0)
|
||||
FAIL("%s: incomplete read", log_prefix);
|
||||
|
||||
close_cli1:
|
||||
xclose(c1);
|
||||
xclose(p1);
|
||||
close:
|
||||
xclose(c0);
|
||||
xclose(p0);
|
||||
}
|
||||
|
||||
static void udp_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
|
||||
struct bpf_map *inner_map, int family)
|
||||
{
|
||||
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
|
||||
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
|
||||
int sock_map = bpf_map__fd(inner_map);
|
||||
int err;
|
||||
|
||||
err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
skel->bss->test_ingress = false;
|
||||
udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
|
||||
skel->bss->test_ingress = true;
|
||||
udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
|
||||
|
||||
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
|
||||
}
|
||||
|
||||
static void unix_udp_redir_to_connected(int family, int sock_mapfd,
|
||||
int verd_mapfd, enum redir_mode mode)
|
||||
{
|
||||
const char *log_prefix = redir_mode_str(mode);
|
||||
int c0, c1, p0, p1;
|
||||
unsigned int pass;
|
||||
int err, n;
|
||||
int sfd[2];
|
||||
u32 key;
|
||||
char b;
|
||||
|
||||
zero_verdict_count(verd_mapfd);
|
||||
|
||||
err = udp_socketpair(family, &p0, &c0);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
|
||||
goto close_cli0;
|
||||
c1 = sfd[0], p1 = sfd[1];
|
||||
|
||||
err = add_to_sockmap(sock_mapfd, p0, p1);
|
||||
if (err)
|
||||
goto close;
|
||||
|
||||
n = write(c1, "a", 1);
|
||||
if (n < 0)
|
||||
FAIL_ERRNO("%s: write", log_prefix);
|
||||
if (n == 0)
|
||||
FAIL("%s: incomplete write", log_prefix);
|
||||
if (n < 1)
|
||||
goto close;
|
||||
|
||||
key = SK_PASS;
|
||||
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
|
||||
if (err)
|
||||
goto close;
|
||||
if (pass != 1)
|
||||
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
|
||||
|
||||
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
|
||||
if (n < 0)
|
||||
FAIL_ERRNO("%s: read", log_prefix);
|
||||
if (n == 0)
|
||||
FAIL("%s: incomplete read", log_prefix);
|
||||
|
||||
close:
|
||||
xclose(c1);
|
||||
xclose(p1);
|
||||
close_cli0:
|
||||
xclose(c0);
|
||||
xclose(p0);
|
||||
|
||||
}
|
||||
|
||||
static void unix_udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
|
||||
struct bpf_map *inner_map, int family)
|
||||
{
|
||||
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
|
||||
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
|
||||
int sock_map = bpf_map__fd(inner_map);
|
||||
int err;
|
||||
|
||||
err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
skel->bss->test_ingress = false;
|
||||
unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
|
||||
skel->bss->test_ingress = true;
|
||||
unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
|
||||
|
||||
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
|
||||
}
|
||||
|
||||
static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
|
||||
int family)
|
||||
{
|
||||
const char *family_name, *map_name;
|
||||
char s[MAX_TEST_NAME];
|
||||
|
||||
family_name = family_str(family);
|
||||
map_name = map_type_str(map);
|
||||
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
|
||||
if (!test__start_subtest(s))
|
||||
return;
|
||||
udp_unix_skb_redir_to_connected(skel, map, family);
|
||||
unix_udp_skb_redir_to_connected(skel, map, family);
|
||||
}
|
||||
|
||||
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
|
||||
int family)
|
||||
{
|
||||
|
@ -1752,6 +2003,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
|
|||
test_reuseport(skel, map, family, SOCK_STREAM);
|
||||
test_reuseport(skel, map, family, SOCK_DGRAM);
|
||||
test_udp_redir(skel, map, family);
|
||||
test_udp_unix_redir(skel, map, family);
|
||||
}
|
||||
|
||||
void test_sockmap_listen(void)
|
||||
|
@ -1767,10 +2019,12 @@ void test_sockmap_listen(void)
|
|||
skel->bss->test_sockmap = true;
|
||||
run_tests(skel, skel->maps.sock_map, AF_INET);
|
||||
run_tests(skel, skel->maps.sock_map, AF_INET6);
|
||||
test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
|
||||
|
||||
skel->bss->test_sockmap = false;
|
||||
run_tests(skel, skel->maps.sock_hash, AF_INET);
|
||||
run_tests(skel, skel->maps.sock_hash, AF_INET6);
|
||||
test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
|
||||
|
||||
test_sockmap_listen__destroy(skel);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#include <test_progs.h>
|
||||
#include "timer.skel.h"
|
||||
|
||||
static int timer(struct timer *timer_skel)
|
||||
{
|
||||
int err, prog_fd;
|
||||
__u32 duration = 0, retval;
|
||||
|
||||
err = timer__attach(timer_skel);
|
||||
if (!ASSERT_OK(err, "timer_attach"))
|
||||
return err;
|
||||
|
||||
ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
|
||||
ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
|
||||
|
||||
prog_fd = bpf_program__fd(timer_skel->progs.test1);
|
||||
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
|
||||
NULL, NULL, &retval, &duration);
|
||||
ASSERT_OK(err, "test_run");
|
||||
ASSERT_EQ(retval, 0, "test_run");
|
||||
timer__detach(timer_skel);
|
||||
|
||||
usleep(50); /* 10 usecs should be enough, but give it extra */
|
||||
/* check that timer_cb1() was executed 10+10 times */
|
||||
ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2");
|
||||
ASSERT_EQ(timer_skel->data->callback2_check, 42, "callback2_check2");
|
||||
|
||||
/* check that timer_cb2() was executed twice */
|
||||
ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data");
|
||||
|
||||
/* check that there were no errors in timer execution */
|
||||
ASSERT_EQ(timer_skel->bss->err, 0, "err");
|
||||
|
||||
/* check that code paths completed */
|
||||
ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void test_timer(void)
|
||||
{
|
||||
struct timer *timer_skel = NULL;
|
||||
int err;
|
||||
|
||||
timer_skel = timer__open_and_load();
|
||||
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
|
||||
goto cleanup;
|
||||
|
||||
err = timer(timer_skel);
|
||||
ASSERT_OK(err, "timer");
|
||||
cleanup:
|
||||
timer__destroy(timer_skel);
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#include <test_progs.h>
|
||||
#include "timer_mim.skel.h"
|
||||
#include "timer_mim_reject.skel.h"
|
||||
|
||||
static int timer_mim(struct timer_mim *timer_skel)
|
||||
{
|
||||
__u32 duration = 0, retval;
|
||||
__u64 cnt1, cnt2;
|
||||
int err, prog_fd, key1 = 1;
|
||||
|
||||
err = timer_mim__attach(timer_skel);
|
||||
if (!ASSERT_OK(err, "timer_attach"))
|
||||
return err;
|
||||
|
||||
prog_fd = bpf_program__fd(timer_skel->progs.test1);
|
||||
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
|
||||
NULL, NULL, &retval, &duration);
|
||||
ASSERT_OK(err, "test_run");
|
||||
ASSERT_EQ(retval, 0, "test_run");
|
||||
timer_mim__detach(timer_skel);
|
||||
|
||||
/* check that timer_cb[12] are incrementing 'cnt' */
|
||||
cnt1 = READ_ONCE(timer_skel->bss->cnt);
|
||||
usleep(200); /* 100 times more than interval */
|
||||
cnt2 = READ_ONCE(timer_skel->bss->cnt);
|
||||
ASSERT_GT(cnt2, cnt1, "cnt");
|
||||
|
||||
ASSERT_EQ(timer_skel->bss->err, 0, "err");
|
||||
/* check that code paths completed */
|
||||
ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok");
|
||||
|
||||
close(bpf_map__fd(timer_skel->maps.inner_htab));
|
||||
err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1);
|
||||
ASSERT_EQ(err, 0, "delete inner map");
|
||||
|
||||
/* check that timer_cb[12] are no longer running */
|
||||
cnt1 = READ_ONCE(timer_skel->bss->cnt);
|
||||
usleep(200);
|
||||
cnt2 = READ_ONCE(timer_skel->bss->cnt);
|
||||
ASSERT_EQ(cnt2, cnt1, "cnt");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void test_timer_mim(void)
|
||||
{
|
||||
struct timer_mim_reject *timer_reject_skel = NULL;
|
||||
libbpf_print_fn_t old_print_fn = NULL;
|
||||
struct timer_mim *timer_skel = NULL;
|
||||
int err;
|
||||
|
||||
old_print_fn = libbpf_set_print(NULL);
|
||||
timer_reject_skel = timer_mim_reject__open_and_load();
|
||||
libbpf_set_print(old_print_fn);
|
||||
if (!ASSERT_ERR_PTR(timer_reject_skel, "timer_reject_skel_load"))
|
||||
goto cleanup;
|
||||
|
||||
timer_skel = timer_mim__open_and_load();
|
||||
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
|
||||
goto cleanup;
|
||||
|
||||
err = timer_mim(timer_skel);
|
||||
ASSERT_OK(err, "timer_mim");
|
||||
cleanup:
|
||||
timer_mim__destroy(timer_skel);
|
||||
timer_mim_reject__destroy(timer_reject_skel);
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <test_progs.h>
|
||||
#include <network_helpers.h>
|
||||
#include "test_xdp_context_test_run.skel.h"
|
||||
|
||||
void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
|
||||
__u32 data_meta, __u32 data, __u32 data_end,
|
||||
__u32 ingress_ifindex, __u32 rx_queue_index,
|
||||
__u32 egress_ifindex)
|
||||
{
|
||||
struct xdp_md ctx = {
|
||||
.data = data,
|
||||
.data_end = data_end,
|
||||
.data_meta = data_meta,
|
||||
.ingress_ifindex = ingress_ifindex,
|
||||
.rx_queue_index = rx_queue_index,
|
||||
.egress_ifindex = egress_ifindex,
|
||||
};
|
||||
int err;
|
||||
|
||||
opts.ctx_in = &ctx;
|
||||
opts.ctx_size_in = sizeof(ctx);
|
||||
err = bpf_prog_test_run_opts(prog_fd, &opts);
|
||||
ASSERT_EQ(errno, EINVAL, "errno-EINVAL");
|
||||
ASSERT_ERR(err, "bpf_prog_test_run");
|
||||
}
|
||||
|
||||
void test_xdp_context_test_run(void)
|
||||
{
|
||||
struct test_xdp_context_test_run *skel = NULL;
|
||||
char data[sizeof(pkt_v4) + sizeof(__u32)];
|
||||
char bad_ctx[sizeof(struct xdp_md) + 1];
|
||||
struct xdp_md ctx_in, ctx_out;
|
||||
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
|
||||
.data_in = &data,
|
||||
.data_size_in = sizeof(data),
|
||||
.ctx_out = &ctx_out,
|
||||
.ctx_size_out = sizeof(ctx_out),
|
||||
.repeat = 1,
|
||||
);
|
||||
int err, prog_fd;
|
||||
|
||||
skel = test_xdp_context_test_run__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "skel"))
|
||||
return;
|
||||
prog_fd = bpf_program__fd(skel->progs.xdp_context);
|
||||
|
||||
/* Data past the end of the kernel's struct xdp_md must be 0 */
|
||||
bad_ctx[sizeof(bad_ctx) - 1] = 1;
|
||||
opts.ctx_in = bad_ctx;
|
||||
opts.ctx_size_in = sizeof(bad_ctx);
|
||||
err = bpf_prog_test_run_opts(prog_fd, &opts);
|
||||
ASSERT_EQ(errno, E2BIG, "extradata-errno");
|
||||
ASSERT_ERR(err, "bpf_prog_test_run(extradata)");
|
||||
|
||||
*(__u32 *)data = XDP_PASS;
|
||||
*(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4;
|
||||
opts.ctx_in = &ctx_in;
|
||||
opts.ctx_size_in = sizeof(ctx_in);
|
||||
memset(&ctx_in, 0, sizeof(ctx_in));
|
||||
ctx_in.data_meta = 0;
|
||||
ctx_in.data = sizeof(__u32);
|
||||
ctx_in.data_end = ctx_in.data + sizeof(pkt_v4);
|
||||
err = bpf_prog_test_run_opts(prog_fd, &opts);
|
||||
ASSERT_OK(err, "bpf_prog_test_run(valid)");
|
||||
ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval");
|
||||
ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize");
|
||||
ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize");
|
||||
ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta");
|
||||
ASSERT_EQ(ctx_out.data, 0, "valid-data");
|
||||
ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend");
|
||||
|
||||
/* Meta data's size must be a multiple of 4 */
|
||||
test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0);
|
||||
|
||||
/* data_meta must reference the start of data */
|
||||
test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
|
||||
0, 0, 0);
|
||||
|
||||
/* Meta data must be 32 bytes or smaller */
|
||||
test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0);
|
||||
|
||||
/* Total size of data must match data_end - data_meta */
|
||||
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
|
||||
sizeof(data) - 1, 0, 0, 0);
|
||||
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
|
||||
sizeof(data) + 1, 0, 0, 0);
|
||||
|
||||
/* RX queue cannot be specified without specifying an ingress */
|
||||
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
|
||||
0, 1, 0);
|
||||
|
||||
/* Interface 1 is always the loopback interface which always has only
|
||||
* one RX queue (index 0). This makes index 1 an invalid rx queue index
|
||||
* for interface 1.
|
||||
*/
|
||||
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
|
||||
1, 1, 0);
|
||||
|
||||
/* The egress cannot be specified */
|
||||
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
|
||||
0, 0, 1);
|
||||
|
||||
test_xdp_context_test_run__destroy(skel);
|
||||
}
|
|
@ -7,64 +7,53 @@
|
|||
|
||||
#define IFINDEX_LO 1
|
||||
|
||||
void test_xdp_with_cpumap_helpers(void)
|
||||
void test_xdp_cpumap_attach(void)
|
||||
{
|
||||
struct test_xdp_with_cpumap_helpers *skel;
|
||||
struct bpf_prog_info info = {};
|
||||
__u32 len = sizeof(info);
|
||||
struct bpf_cpumap_val val = {
|
||||
.qsize = 192,
|
||||
};
|
||||
__u32 duration = 0, idx = 0;
|
||||
__u32 len = sizeof(info);
|
||||
int err, prog_fd, map_fd;
|
||||
__u32 idx = 0;
|
||||
|
||||
skel = test_xdp_with_cpumap_helpers__open_and_load();
|
||||
if (CHECK_FAIL(!skel)) {
|
||||
perror("test_xdp_with_cpumap_helpers__open_and_load");
|
||||
if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
|
||||
return;
|
||||
}
|
||||
|
||||
/* can not attach program with cpumaps that allow programs
|
||||
* as xdp generic
|
||||
*/
|
||||
prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
|
||||
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
|
||||
CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
|
||||
"should have failed\n");
|
||||
if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
|
||||
goto out_close;
|
||||
|
||||
err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
|
||||
ASSERT_OK(err, "XDP program detach");
|
||||
|
||||
prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
|
||||
map_fd = bpf_map__fd(skel->maps.cpu_map);
|
||||
err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
|
||||
if (CHECK_FAIL(err))
|
||||
if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
|
||||
goto out_close;
|
||||
|
||||
val.bpf_prog.fd = prog_fd;
|
||||
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
|
||||
CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
|
||||
err, errno);
|
||||
ASSERT_OK(err, "Add program to cpumap entry");
|
||||
|
||||
err = bpf_map_lookup_elem(map_fd, &idx, &val);
|
||||
CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
|
||||
CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
|
||||
"expected %u read %u\n", info.id, val.bpf_prog.id);
|
||||
ASSERT_OK(err, "Read cpumap entry");
|
||||
ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
|
||||
|
||||
/* can not attach BPF_XDP_CPUMAP program to a device */
|
||||
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
|
||||
CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
|
||||
"should have failed\n");
|
||||
if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
|
||||
bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
|
||||
|
||||
val.qsize = 192;
|
||||
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
|
||||
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
|
||||
CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
|
||||
"should have failed\n");
|
||||
ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
|
||||
|
||||
out_close:
|
||||
test_xdp_with_cpumap_helpers__destroy(skel);
|
||||
}
|
||||
|
||||
void test_xdp_cpumap_attach(void)
|
||||
{
|
||||
if (test__start_subtest("cpumap_with_progs"))
|
||||
test_xdp_with_cpumap_helpers();
|
||||
}
|
||||
|
|
|
@ -16,50 +16,45 @@ void test_xdp_with_devmap_helpers(void)
|
|||
.ifindex = IFINDEX_LO,
|
||||
};
|
||||
__u32 len = sizeof(info);
|
||||
__u32 duration = 0, idx = 0;
|
||||
int err, dm_fd, map_fd;
|
||||
__u32 idx = 0;
|
||||
|
||||
|
||||
skel = test_xdp_with_devmap_helpers__open_and_load();
|
||||
if (CHECK_FAIL(!skel)) {
|
||||
perror("test_xdp_with_devmap_helpers__open_and_load");
|
||||
if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
|
||||
return;
|
||||
}
|
||||
|
||||
/* can not attach program with DEVMAPs that allow programs
|
||||
* as xdp generic
|
||||
*/
|
||||
dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
|
||||
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
|
||||
CHECK(err == 0, "Generic attach of program with 8-byte devmap",
|
||||
"should have failed\n");
|
||||
if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
|
||||
goto out_close;
|
||||
|
||||
err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
|
||||
ASSERT_OK(err, "XDP program detach");
|
||||
|
||||
dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
|
||||
map_fd = bpf_map__fd(skel->maps.dm_ports);
|
||||
err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
|
||||
if (CHECK_FAIL(err))
|
||||
if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
|
||||
goto out_close;
|
||||
|
||||
val.bpf_prog.fd = dm_fd;
|
||||
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
|
||||
CHECK(err, "Add program to devmap entry",
|
||||
"err %d errno %d\n", err, errno);
|
||||
ASSERT_OK(err, "Add program to devmap entry");
|
||||
|
||||
err = bpf_map_lookup_elem(map_fd, &idx, &val);
|
||||
CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
|
||||
CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
|
||||
"expected %u read %u\n", info.id, val.bpf_prog.id);
|
||||
ASSERT_OK(err, "Read devmap entry");
|
||||
ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
|
||||
|
||||
/* can not attach BPF_XDP_DEVMAP program to a device */
|
||||
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
|
||||
CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
|
||||
"should have failed\n");
|
||||
if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
|
||||
bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
|
||||
|
||||
val.ifindex = 1;
|
||||
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
|
||||
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
|
||||
CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
|
||||
"should have failed\n");
|
||||
ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
|
||||
|
||||
out_close:
|
||||
test_xdp_with_devmap_helpers__destroy(skel);
|
||||
|
@ -68,12 +63,10 @@ void test_xdp_with_devmap_helpers(void)
|
|||
void test_neg_xdp_devmap_helpers(void)
|
||||
{
|
||||
struct test_xdp_devmap_helpers *skel;
|
||||
__u32 duration = 0;
|
||||
|
||||
skel = test_xdp_devmap_helpers__open_and_load();
|
||||
if (CHECK(skel,
|
||||
"Load of XDP program accessing egress ifindex without attach type",
|
||||
"should have failed\n")) {
|
||||
if (!ASSERT_EQ(skel, NULL,
|
||||
"Load of XDP program accessing egress ifindex without attach type")) {
|
||||
test_xdp_devmap_helpers__destroy(skel);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
extern const void bpf_fentry_test1 __ksym;
|
||||
extern const void bpf_fentry_test2 __ksym;
|
||||
extern const void bpf_fentry_test3 __ksym;
|
||||
extern const void bpf_fentry_test4 __ksym;
|
||||
extern const void bpf_modify_return_test __ksym;
|
||||
extern const void bpf_fentry_test6 __ksym;
|
||||
|
||||
__u64 test1_result = 0;
|
||||
SEC("fentry/bpf_fentry_test1")
|
||||
int BPF_PROG(test1, int a)
|
||||
{
|
||||
__u64 addr = bpf_get_func_ip(ctx);
|
||||
|
||||
test1_result = (const void *) addr == &bpf_fentry_test1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__u64 test2_result = 0;
|
||||
SEC("fexit/bpf_fentry_test2")
|
||||
int BPF_PROG(test2, int a)
|
||||
{
|
||||
__u64 addr = bpf_get_func_ip(ctx);
|
||||
|
||||
test2_result = (const void *) addr == &bpf_fentry_test2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__u64 test3_result = 0;
|
||||
SEC("kprobe/bpf_fentry_test3")
|
||||
int test3(struct pt_regs *ctx)
|
||||
{
|
||||
__u64 addr = bpf_get_func_ip(ctx);
|
||||
|
||||
test3_result = (const void *) addr == &bpf_fentry_test3;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__u64 test4_result = 0;
|
||||
SEC("kretprobe/bpf_fentry_test4")
|
||||
int BPF_KRETPROBE(test4)
|
||||
{
|
||||
__u64 addr = bpf_get_func_ip(ctx);
|
||||
|
||||
test4_result = (const void *) addr == &bpf_fentry_test4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__u64 test5_result = 0;
|
||||
SEC("fmod_ret/bpf_modify_return_test")
|
||||
int BPF_PROG(test5, int a, int *b, int ret)
|
||||
{
|
||||
__u64 addr = bpf_get_func_ip(ctx);
|
||||
|
||||
test5_result = (const void *) addr == &bpf_modify_return_test;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__u64 test6_result = 0;
|
||||
SEC("kprobe/bpf_fentry_test6+0x5")
|
||||
int test6(struct pt_regs *ctx)
|
||||
{
|
||||
__u64 addr = bpf_get_func_ip(ctx);
|
||||
|
||||
test6_result = (const void *) addr == &bpf_fentry_test6 + 5;
|
||||
return 0;
|
||||
}
|
|
@ -528,7 +528,6 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
|
|||
|
||||
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
|
||||
{
|
||||
char buf[sizeof(struct v6hdr)];
|
||||
struct gre_hdr greh;
|
||||
struct udphdr udph;
|
||||
int olen = len;
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
SEC("xdp")
|
||||
int xdp_context(struct xdp_md *xdp)
|
||||
{
|
||||
void *data = (void *)(long)xdp->data;
|
||||
__u32 *metadata = (void *)(long)xdp->data_meta;
|
||||
__u32 ret;
|
||||
|
||||
if (metadata + 1 > data)
|
||||
return XDP_ABORTED;
|
||||
ret = *metadata;
|
||||
if (bpf_xdp_adjust_meta(xdp, 4))
|
||||
return XDP_ABORTED;
|
||||
return ret;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
|
@ -0,0 +1,297 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#include <linux/bpf.h>
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "bpf_tcp_helpers.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
struct hmap_elem {
|
||||
int counter;
|
||||
struct bpf_timer timer;
|
||||
struct bpf_spin_lock lock; /* unused */
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, 1000);
|
||||
__type(key, int);
|
||||
__type(value, struct hmap_elem);
|
||||
} hmap SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__uint(max_entries, 1000);
|
||||
__type(key, int);
|
||||
__type(value, struct hmap_elem);
|
||||
} hmap_malloc SEC(".maps");
|
||||
|
||||
struct elem {
|
||||
struct bpf_timer t;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 2);
|
||||
__type(key, int);
|
||||
__type(value, struct elem);
|
||||
} array SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_LRU_HASH);
|
||||
__uint(max_entries, 4);
|
||||
__type(key, int);
|
||||
__type(value, struct elem);
|
||||
} lru SEC(".maps");
|
||||
|
||||
__u64 bss_data;
|
||||
__u64 err;
|
||||
__u64 ok;
|
||||
__u64 callback_check = 52;
|
||||
__u64 callback2_check = 52;
|
||||
|
||||
#define ARRAY 1
|
||||
#define HTAB 2
|
||||
#define HTAB_MALLOC 3
|
||||
#define LRU 4
|
||||
|
||||
/* callback for array and lru timers */
|
||||
static int timer_cb1(void *map, int *key, struct bpf_timer *timer)
|
||||
{
|
||||
/* increment bss variable twice.
|
||||
* Once via array timer callback and once via lru timer callback
|
||||
*/
|
||||
bss_data += 5;
|
||||
|
||||
/* *key == 0 - the callback was called for array timer.
|
||||
* *key == 4 - the callback was called from lru timer.
|
||||
*/
|
||||
if (*key == ARRAY) {
|
||||
struct bpf_timer *lru_timer;
|
||||
int lru_key = LRU;
|
||||
|
||||
/* rearm array timer to be called again in ~35 seconds */
|
||||
if (bpf_timer_start(timer, 1ull << 35, 0) != 0)
|
||||
err |= 1;
|
||||
|
||||
lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
|
||||
if (!lru_timer)
|
||||
return 0;
|
||||
bpf_timer_set_callback(lru_timer, timer_cb1);
|
||||
if (bpf_timer_start(lru_timer, 0, 0) != 0)
|
||||
err |= 2;
|
||||
} else if (*key == LRU) {
|
||||
int lru_key, i;
|
||||
|
||||
for (i = LRU + 1;
|
||||
i <= 100 /* for current LRU eviction algorithm this number
|
||||
* should be larger than ~ lru->max_entries * 2
|
||||
*/;
|
||||
i++) {
|
||||
struct elem init = {};
|
||||
|
||||
/* lru_key cannot be used as loop induction variable
|
||||
* otherwise the loop will be unbounded.
|
||||
*/
|
||||
lru_key = i;
|
||||
|
||||
/* add more elements into lru map to push out current
|
||||
* element and force deletion of this timer
|
||||
*/
|
||||
bpf_map_update_elem(map, &lru_key, &init, 0);
|
||||
/* look it up to bump it into active list */
|
||||
bpf_map_lookup_elem(map, &lru_key);
|
||||
|
||||
/* keep adding until *key changes underneath,
|
||||
* which means that key/timer memory was reused
|
||||
*/
|
||||
if (*key != LRU)
|
||||
break;
|
||||
}
|
||||
|
||||
/* check that the timer was removed */
|
||||
if (bpf_timer_cancel(timer) != -EINVAL)
|
||||
err |= 4;
|
||||
ok |= 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/bpf_fentry_test1")
|
||||
int BPF_PROG(test1, int a)
|
||||
{
|
||||
struct bpf_timer *arr_timer, *lru_timer;
|
||||
struct elem init = {};
|
||||
int lru_key = LRU;
|
||||
int array_key = ARRAY;
|
||||
|
||||
arr_timer = bpf_map_lookup_elem(&array, &array_key);
|
||||
if (!arr_timer)
|
||||
return 0;
|
||||
bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
|
||||
|
||||
bpf_map_update_elem(&lru, &lru_key, &init, 0);
|
||||
lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
|
||||
if (!lru_timer)
|
||||
return 0;
|
||||
bpf_timer_init(lru_timer, &lru, CLOCK_MONOTONIC);
|
||||
|
||||
bpf_timer_set_callback(arr_timer, timer_cb1);
|
||||
bpf_timer_start(arr_timer, 0 /* call timer_cb1 asap */, 0);
|
||||
|
||||
/* init more timers to check that array destruction
|
||||
* doesn't leak timer memory.
|
||||
*/
|
||||
array_key = 0;
|
||||
arr_timer = bpf_map_lookup_elem(&array, &array_key);
|
||||
if (!arr_timer)
|
||||
return 0;
|
||||
bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* callback for prealloc and non-prealloca hashtab timers */
|
||||
static int timer_cb2(void *map, int *key, struct hmap_elem *val)
|
||||
{
|
||||
if (*key == HTAB)
|
||||
callback_check--;
|
||||
else
|
||||
callback2_check--;
|
||||
if (val->counter > 0 && --val->counter) {
|
||||
/* re-arm the timer again to execute after 1 usec */
|
||||
bpf_timer_start(&val->timer, 1000, 0);
|
||||
} else if (*key == HTAB) {
|
||||
struct bpf_timer *arr_timer;
|
||||
int array_key = ARRAY;
|
||||
|
||||
/* cancel arr_timer otherwise bpf_fentry_test1 prog
|
||||
* will stay alive forever.
|
||||
*/
|
||||
arr_timer = bpf_map_lookup_elem(&array, &array_key);
|
||||
if (!arr_timer)
|
||||
return 0;
|
||||
if (bpf_timer_cancel(arr_timer) != 1)
|
||||
/* bpf_timer_cancel should return 1 to indicate
|
||||
* that arr_timer was active at this time
|
||||
*/
|
||||
err |= 8;
|
||||
|
||||
/* try to cancel ourself. It shouldn't deadlock. */
|
||||
if (bpf_timer_cancel(&val->timer) != -EDEADLK)
|
||||
err |= 16;
|
||||
|
||||
/* delete this key and this timer anyway.
|
||||
* It shouldn't deadlock either.
|
||||
*/
|
||||
bpf_map_delete_elem(map, key);
|
||||
|
||||
/* in preallocated hashmap both 'key' and 'val' could have been
|
||||
* reused to store another map element (like in LRU above),
|
||||
* but in controlled test environment the below test works.
|
||||
* It's not a use-after-free. The memory is owned by the map.
|
||||
*/
|
||||
if (bpf_timer_start(&val->timer, 1000, 0) != -EINVAL)
|
||||
err |= 32;
|
||||
ok |= 2;
|
||||
} else {
|
||||
if (*key != HTAB_MALLOC)
|
||||
err |= 64;
|
||||
|
||||
/* try to cancel ourself. It shouldn't deadlock. */
|
||||
if (bpf_timer_cancel(&val->timer) != -EDEADLK)
|
||||
err |= 128;
|
||||
|
||||
/* delete this key and this timer anyway.
|
||||
* It shouldn't deadlock either.
|
||||
*/
|
||||
bpf_map_delete_elem(map, key);
|
||||
|
||||
/* in non-preallocated hashmap both 'key' and 'val' are RCU
|
||||
* protected and still valid though this element was deleted
|
||||
* from the map. Arm this timer for ~35 seconds. When callback
|
||||
* finishes the call_rcu will invoke:
|
||||
* htab_elem_free_rcu
|
||||
* check_and_free_timer
|
||||
* bpf_timer_cancel_and_free
|
||||
* to cancel this 35 second sleep and delete the timer for real.
|
||||
*/
|
||||
if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0)
|
||||
err |= 256;
|
||||
ok |= 4;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_timer_test(void)
|
||||
{
|
||||
struct hmap_elem *val;
|
||||
int key = HTAB, key_malloc = HTAB_MALLOC;
|
||||
|
||||
val = bpf_map_lookup_elem(&hmap, &key);
|
||||
if (val) {
|
||||
if (bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME) != 0)
|
||||
err |= 512;
|
||||
bpf_timer_set_callback(&val->timer, timer_cb2);
|
||||
bpf_timer_start(&val->timer, 1000, 0);
|
||||
}
|
||||
val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
|
||||
if (val) {
|
||||
if (bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME) != 0)
|
||||
err |= 1024;
|
||||
bpf_timer_set_callback(&val->timer, timer_cb2);
|
||||
bpf_timer_start(&val->timer, 1000, 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/bpf_fentry_test2")
|
||||
int BPF_PROG(test2, int a, int b)
|
||||
{
|
||||
struct hmap_elem init = {}, *val;
|
||||
int key = HTAB, key_malloc = HTAB_MALLOC;
|
||||
|
||||
init.counter = 10; /* number of times to trigger timer_cb2 */
|
||||
bpf_map_update_elem(&hmap, &key, &init, 0);
|
||||
val = bpf_map_lookup_elem(&hmap, &key);
|
||||
if (val)
|
||||
bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
|
||||
/* update the same key to free the timer */
|
||||
bpf_map_update_elem(&hmap, &key, &init, 0);
|
||||
|
||||
bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
|
||||
val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
|
||||
if (val)
|
||||
bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
|
||||
/* update the same key to free the timer */
|
||||
bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
|
||||
|
||||
/* init more timers to check that htab operations
|
||||
* don't leak timer memory.
|
||||
*/
|
||||
key = 0;
|
||||
bpf_map_update_elem(&hmap, &key, &init, 0);
|
||||
val = bpf_map_lookup_elem(&hmap, &key);
|
||||
if (val)
|
||||
bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
|
||||
bpf_map_delete_elem(&hmap, &key);
|
||||
bpf_map_update_elem(&hmap, &key, &init, 0);
|
||||
val = bpf_map_lookup_elem(&hmap, &key);
|
||||
if (val)
|
||||
bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
|
||||
|
||||
/* and with non-prealloc htab */
|
||||
key_malloc = 0;
|
||||
bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
|
||||
val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
|
||||
if (val)
|
||||
bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
|
||||
bpf_map_delete_elem(&hmap_malloc, &key_malloc);
|
||||
bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
|
||||
val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
|
||||
if (val)
|
||||
bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
|
||||
|
||||
return bpf_timer_test();
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#include <linux/bpf.h>
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "bpf_tcp_helpers.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
struct hmap_elem {
|
||||
int pad; /* unused */
|
||||
struct bpf_timer timer;
|
||||
};
|
||||
|
||||
struct inner_map {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, 1024);
|
||||
__type(key, int);
|
||||
__type(value, struct hmap_elem);
|
||||
} inner_htab SEC(".maps");
|
||||
|
||||
#define ARRAY_KEY 1
|
||||
#define HASH_KEY 1234
|
||||
|
||||
struct outer_arr {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
|
||||
__uint(max_entries, 2);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(int));
|
||||
__array(values, struct inner_map);
|
||||
} outer_arr SEC(".maps") = {
|
||||
.values = { [ARRAY_KEY] = &inner_htab },
|
||||
};
|
||||
|
||||
__u64 err;
|
||||
__u64 ok;
|
||||
__u64 cnt;
|
||||
|
||||
static int timer_cb1(void *map, int *key, struct hmap_elem *val);
|
||||
|
||||
static int timer_cb2(void *map, int *key, struct hmap_elem *val)
|
||||
{
|
||||
cnt++;
|
||||
bpf_timer_set_callback(&val->timer, timer_cb1);
|
||||
if (bpf_timer_start(&val->timer, 1000, 0))
|
||||
err |= 1;
|
||||
ok |= 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* callback for inner hash map */
|
||||
static int timer_cb1(void *map, int *key, struct hmap_elem *val)
|
||||
{
|
||||
cnt++;
|
||||
bpf_timer_set_callback(&val->timer, timer_cb2);
|
||||
if (bpf_timer_start(&val->timer, 1000, 0))
|
||||
err |= 2;
|
||||
/* Do a lookup to make sure 'map' and 'key' pointers are correct */
|
||||
bpf_map_lookup_elem(map, key);
|
||||
ok |= 2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/bpf_fentry_test1")
|
||||
int BPF_PROG(test1, int a)
|
||||
{
|
||||
struct hmap_elem init = {};
|
||||
struct bpf_map *inner_map;
|
||||
struct hmap_elem *val;
|
||||
int array_key = ARRAY_KEY;
|
||||
int hash_key = HASH_KEY;
|
||||
|
||||
inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
|
||||
if (!inner_map)
|
||||
return 0;
|
||||
|
||||
bpf_map_update_elem(inner_map, &hash_key, &init, 0);
|
||||
val = bpf_map_lookup_elem(inner_map, &hash_key);
|
||||
if (!val)
|
||||
return 0;
|
||||
|
||||
bpf_timer_init(&val->timer, inner_map, CLOCK_MONOTONIC);
|
||||
if (bpf_timer_set_callback(&val->timer, timer_cb1))
|
||||
err |= 4;
|
||||
if (bpf_timer_start(&val->timer, 0, 0))
|
||||
err |= 8;
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#include <linux/bpf.h>
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "bpf_tcp_helpers.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
struct hmap_elem {
|
||||
int pad; /* unused */
|
||||
struct bpf_timer timer;
|
||||
};
|
||||
|
||||
struct inner_map {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, 1024);
|
||||
__type(key, int);
|
||||
__type(value, struct hmap_elem);
|
||||
} inner_htab SEC(".maps");
|
||||
|
||||
#define ARRAY_KEY 1
|
||||
#define ARRAY_KEY2 2
|
||||
#define HASH_KEY 1234
|
||||
|
||||
struct outer_arr {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
|
||||
__uint(max_entries, 2);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(int));
|
||||
__array(values, struct inner_map);
|
||||
} outer_arr SEC(".maps") = {
|
||||
.values = { [ARRAY_KEY] = &inner_htab },
|
||||
};
|
||||
|
||||
__u64 err;
|
||||
__u64 ok;
|
||||
__u64 cnt;
|
||||
|
||||
/* callback for inner hash map */
|
||||
static int timer_cb(void *map, int *key, struct hmap_elem *val)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/bpf_fentry_test1")
|
||||
int BPF_PROG(test1, int a)
|
||||
{
|
||||
struct hmap_elem init = {};
|
||||
struct bpf_map *inner_map, *inner_map2;
|
||||
struct hmap_elem *val;
|
||||
int array_key = ARRAY_KEY;
|
||||
int array_key2 = ARRAY_KEY2;
|
||||
int hash_key = HASH_KEY;
|
||||
|
||||
inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
|
||||
if (!inner_map)
|
||||
return 0;
|
||||
|
||||
inner_map2 = bpf_map_lookup_elem(&outer_arr, &array_key2);
|
||||
if (!inner_map2)
|
||||
return 0;
|
||||
bpf_map_update_elem(inner_map, &hash_key, &init, 0);
|
||||
val = bpf_map_lookup_elem(inner_map, &hash_key);
|
||||
if (!val)
|
||||
return 0;
|
||||
|
||||
bpf_timer_init(&val->timer, inner_map2, CLOCK_MONOTONIC);
|
||||
if (bpf_timer_set_callback(&val->timer, timer_cb))
|
||||
err |= 4;
|
||||
if (bpf_timer_start(&val->timer, 0, 0))
|
||||
err |= 8;
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue