mirror of https://gitee.com/openkylin/linux.git
bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP
The xskmap is yet another BPF map, very much inspired by dev/cpu/sockmap, and is a holder of AF_XDP sockets. A user application adds AF_XDP sockets into the map, and by using the bpf_redirect_map helper, an XDP program can redirect XDP frames to an AF_XDP socket. Note that a socket that is bound to certain ifindex/queue index will *only* accept XDP frames from that netdev/queue index. If an XDP program tries to redirect from a netdev/queue index other than what the socket is bound to, the frame will not be received on the socket. A socket can reside in multiple maps. v3: Fixed race and simplified code. v2: Removed one indirection in map lookup. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
c497176cb2
commit
fbfc504a24
|
@ -676,6 +676,31 @@ static inline int sock_map_prog(struct bpf_map *map,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(CONFIG_XDP_SOCKETS)
|
||||||
|
struct xdp_sock;
|
||||||
|
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
|
||||||
|
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||||
|
struct xdp_sock *xs);
|
||||||
|
void __xsk_map_flush(struct bpf_map *map);
|
||||||
|
#else
|
||||||
|
struct xdp_sock;
|
||||||
|
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
|
||||||
|
u32 key)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||||
|
struct xdp_sock *xs)
|
||||||
|
{
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __xsk_map_flush(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* verifier prototypes for helper functions called from eBPF programs */
|
/* verifier prototypes for helper functions called from eBPF programs */
|
||||||
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
|
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
|
||||||
extern const struct bpf_func_proto bpf_map_update_elem_proto;
|
extern const struct bpf_func_proto bpf_map_update_elem_proto;
|
||||||
|
|
|
@ -49,4 +49,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
|
||||||
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
|
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
|
||||||
#endif
|
#endif
|
||||||
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
|
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
|
||||||
|
#if defined(CONFIG_XDP_SOCKETS)
|
||||||
|
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -28,6 +28,7 @@ struct xdp_sock {
|
||||||
struct xsk_queue *rx;
|
struct xsk_queue *rx;
|
||||||
struct net_device *dev;
|
struct net_device *dev;
|
||||||
struct xdp_umem *umem;
|
struct xdp_umem *umem;
|
||||||
|
struct list_head flush_node;
|
||||||
u16 queue_id;
|
u16 queue_id;
|
||||||
/* Protects multiple processes in the control path */
|
/* Protects multiple processes in the control path */
|
||||||
struct mutex mutex;
|
struct mutex mutex;
|
||||||
|
@ -39,6 +40,7 @@ struct xdp_buff;
|
||||||
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
||||||
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
||||||
void xsk_flush(struct xdp_sock *xs);
|
void xsk_flush(struct xdp_sock *xs);
|
||||||
|
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
|
||||||
#else
|
#else
|
||||||
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||||
{
|
{
|
||||||
|
@ -53,6 +55,11 @@ static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||||
static inline void xsk_flush(struct xdp_sock *xs)
|
static inline void xsk_flush(struct xdp_sock *xs)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
#endif /* CONFIG_XDP_SOCKETS */
|
#endif /* CONFIG_XDP_SOCKETS */
|
||||||
|
|
||||||
#endif /* _LINUX_XDP_SOCK_H */
|
#endif /* _LINUX_XDP_SOCK_H */
|
||||||
|
|
|
@ -116,6 +116,7 @@ enum bpf_map_type {
|
||||||
BPF_MAP_TYPE_DEVMAP,
|
BPF_MAP_TYPE_DEVMAP,
|
||||||
BPF_MAP_TYPE_SOCKMAP,
|
BPF_MAP_TYPE_SOCKMAP,
|
||||||
BPF_MAP_TYPE_CPUMAP,
|
BPF_MAP_TYPE_CPUMAP,
|
||||||
|
BPF_MAP_TYPE_XSKMAP,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_prog_type {
|
enum bpf_prog_type {
|
||||||
|
|
|
@ -8,6 +8,9 @@ obj-$(CONFIG_BPF_SYSCALL) += btf.o
|
||||||
ifeq ($(CONFIG_NET),y)
|
ifeq ($(CONFIG_NET),y)
|
||||||
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
|
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
|
||||||
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
|
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
|
||||||
|
ifeq ($(CONFIG_XDP_SOCKETS),y)
|
||||||
|
obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
|
||||||
|
endif
|
||||||
obj-$(CONFIG_BPF_SYSCALL) += offload.o
|
obj-$(CONFIG_BPF_SYSCALL) += offload.o
|
||||||
ifeq ($(CONFIG_STREAM_PARSER),y)
|
ifeq ($(CONFIG_STREAM_PARSER),y)
|
||||||
ifeq ($(CONFIG_INET),y)
|
ifeq ($(CONFIG_INET),y)
|
||||||
|
|
|
@ -2070,8 +2070,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
||||||
if (func_id != BPF_FUNC_redirect_map)
|
if (func_id != BPF_FUNC_redirect_map)
|
||||||
goto error;
|
goto error;
|
||||||
break;
|
break;
|
||||||
/* Restrict bpf side of cpumap, open when use-cases appear */
|
/* Restrict bpf side of cpumap and xskmap, open when use-cases
|
||||||
|
* appear.
|
||||||
|
*/
|
||||||
case BPF_MAP_TYPE_CPUMAP:
|
case BPF_MAP_TYPE_CPUMAP:
|
||||||
|
case BPF_MAP_TYPE_XSKMAP:
|
||||||
if (func_id != BPF_FUNC_redirect_map)
|
if (func_id != BPF_FUNC_redirect_map)
|
||||||
goto error;
|
goto error;
|
||||||
break;
|
break;
|
||||||
|
@ -2118,7 +2121,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
||||||
break;
|
break;
|
||||||
case BPF_FUNC_redirect_map:
|
case BPF_FUNC_redirect_map:
|
||||||
if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
|
if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
|
||||||
map->map_type != BPF_MAP_TYPE_CPUMAP)
|
map->map_type != BPF_MAP_TYPE_CPUMAP &&
|
||||||
|
map->map_type != BPF_MAP_TYPE_XSKMAP)
|
||||||
goto error;
|
goto error;
|
||||||
break;
|
break;
|
||||||
case BPF_FUNC_sk_redirect_map:
|
case BPF_FUNC_sk_redirect_map:
|
||||||
|
|
|
@ -0,0 +1,239 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* XSKMAP used for AF_XDP sockets
|
||||||
|
* Copyright(c) 2018 Intel Corporation.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms and conditions of the GNU General Public License,
|
||||||
|
* version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
* more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include <linux/capability.h>
|
||||||
|
#include <net/xdp_sock.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
|
||||||
|
struct xsk_map {
|
||||||
|
struct bpf_map map;
|
||||||
|
struct xdp_sock **xsk_map;
|
||||||
|
struct list_head __percpu *flush_list;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
|
||||||
|
{
|
||||||
|
int cpu, err = -EINVAL;
|
||||||
|
struct xsk_map *m;
|
||||||
|
u64 cost;
|
||||||
|
|
||||||
|
if (!capable(CAP_NET_ADMIN))
|
||||||
|
return ERR_PTR(-EPERM);
|
||||||
|
|
||||||
|
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||||
|
attr->value_size != 4 ||
|
||||||
|
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
m = kzalloc(sizeof(*m), GFP_USER);
|
||||||
|
if (!m)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
bpf_map_init_from_attr(&m->map, attr);
|
||||||
|
|
||||||
|
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
|
||||||
|
cost += sizeof(struct list_head) * num_possible_cpus();
|
||||||
|
if (cost >= U32_MAX - PAGE_SIZE)
|
||||||
|
goto free_m;
|
||||||
|
|
||||||
|
m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
/* Notice returns -EPERM on if map size is larger than memlock limit */
|
||||||
|
err = bpf_map_precharge_memlock(m->map.pages);
|
||||||
|
if (err)
|
||||||
|
goto free_m;
|
||||||
|
|
||||||
|
m->flush_list = alloc_percpu(struct list_head);
|
||||||
|
if (!m->flush_list)
|
||||||
|
goto free_m;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu)
|
||||||
|
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
|
||||||
|
|
||||||
|
m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
|
||||||
|
sizeof(struct xdp_sock *),
|
||||||
|
m->map.numa_node);
|
||||||
|
if (!m->xsk_map)
|
||||||
|
goto free_percpu;
|
||||||
|
return &m->map;
|
||||||
|
|
||||||
|
free_percpu:
|
||||||
|
free_percpu(m->flush_list);
|
||||||
|
free_m:
|
||||||
|
kfree(m);
|
||||||
|
return ERR_PTR(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void xsk_map_free(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
synchronize_net();
|
||||||
|
|
||||||
|
for (i = 0; i < map->max_entries; i++) {
|
||||||
|
struct xdp_sock *xs;
|
||||||
|
|
||||||
|
xs = m->xsk_map[i];
|
||||||
|
if (!xs)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
sock_put((struct sock *)xs);
|
||||||
|
}
|
||||||
|
|
||||||
|
free_percpu(m->flush_list);
|
||||||
|
bpf_map_area_free(m->xsk_map);
|
||||||
|
kfree(m);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
u32 index = key ? *(u32 *)key : U32_MAX;
|
||||||
|
u32 *next = next_key;
|
||||||
|
|
||||||
|
if (index >= m->map.max_entries) {
|
||||||
|
*next = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index == m->map.max_entries - 1)
|
||||||
|
return -ENOENT;
|
||||||
|
*next = index + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
struct xdp_sock *xs;
|
||||||
|
|
||||||
|
if (key >= map->max_entries)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
xs = READ_ONCE(m->xsk_map[key]);
|
||||||
|
return xs;
|
||||||
|
}
|
||||||
|
|
||||||
|
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||||
|
struct xdp_sock *xs)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = xsk_rcv(xs, xdp);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (!xs->flush_node.prev)
|
||||||
|
list_add(&xs->flush_node, flush_list);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __xsk_map_flush(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||||
|
struct xdp_sock *xs, *tmp;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
|
||||||
|
xsk_flush(xs);
|
||||||
|
__list_del(xs->flush_node.prev, xs->flush_node.next);
|
||||||
|
xs->flush_node.prev = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||||
|
u64 map_flags)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
u32 i = *(u32 *)key, fd = *(u32 *)value;
|
||||||
|
struct xdp_sock *xs, *old_xs;
|
||||||
|
struct socket *sock;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (unlikely(map_flags > BPF_EXIST))
|
||||||
|
return -EINVAL;
|
||||||
|
if (unlikely(i >= m->map.max_entries))
|
||||||
|
return -E2BIG;
|
||||||
|
if (unlikely(map_flags == BPF_NOEXIST))
|
||||||
|
return -EEXIST;
|
||||||
|
|
||||||
|
sock = sockfd_lookup(fd, &err);
|
||||||
|
if (!sock)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (sock->sk->sk_family != PF_XDP) {
|
||||||
|
sockfd_put(sock);
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
|
xs = (struct xdp_sock *)sock->sk;
|
||||||
|
|
||||||
|
if (!xsk_is_setup_for_bpf_map(xs)) {
|
||||||
|
sockfd_put(sock);
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
|
sock_hold(sock->sk);
|
||||||
|
|
||||||
|
old_xs = xchg(&m->xsk_map[i], xs);
|
||||||
|
if (old_xs) {
|
||||||
|
/* Make sure we've flushed everything. */
|
||||||
|
synchronize_net();
|
||||||
|
sock_put((struct sock *)old_xs);
|
||||||
|
}
|
||||||
|
|
||||||
|
sockfd_put(sock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int xsk_map_delete_elem(struct bpf_map *map, void *key)
|
||||||
|
{
|
||||||
|
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||||
|
struct xdp_sock *old_xs;
|
||||||
|
int k = *(u32 *)key;
|
||||||
|
|
||||||
|
if (k >= map->max_entries)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
old_xs = xchg(&m->xsk_map[k], NULL);
|
||||||
|
if (old_xs) {
|
||||||
|
/* Make sure we've flushed everything. */
|
||||||
|
synchronize_net();
|
||||||
|
sock_put((struct sock *)old_xs);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct bpf_map_ops xsk_map_ops = {
|
||||||
|
.map_alloc = xsk_map_alloc,
|
||||||
|
.map_free = xsk_map_free,
|
||||||
|
.map_get_next_key = xsk_map_get_next_key,
|
||||||
|
.map_lookup_elem = xsk_map_lookup_elem,
|
||||||
|
.map_update_elem = xsk_map_update_elem,
|
||||||
|
.map_delete_elem = xsk_map_delete_elem,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,11 @@ static struct xdp_sock *xdp_sk(struct sock *sk)
|
||||||
return (struct xdp_sock *)sk;
|
return (struct xdp_sock *)sk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
|
||||||
|
{
|
||||||
|
return !!xs->rx;
|
||||||
|
}
|
||||||
|
|
||||||
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||||
{
|
{
|
||||||
u32 *id, len = xdp->data_end - xdp->data;
|
u32 *id, len = xdp->data_end - xdp->data;
|
||||||
|
|
Loading…
Reference in New Issue