2019-05-27 14:55:01 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2017-08-05 18:38:26 +08:00
|
|
|
/*
|
|
|
|
* SR-IPv6 implementation
|
|
|
|
*
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
* Authors:
|
2017-08-05 18:38:26 +08:00
|
|
|
* David Lebrun <david.lebrun@uclouvain.be>
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
* eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
|
2017-08-05 18:38:26 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/net.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <net/ip.h>
|
|
|
|
#include <net/lwtunnel.h>
|
|
|
|
#include <net/netevent.h>
|
|
|
|
#include <net/netns/generic.h>
|
|
|
|
#include <net/ip6_fib.h>
|
|
|
|
#include <net/route.h>
|
|
|
|
#include <net/seg6.h>
|
|
|
|
#include <linux/seg6.h>
|
|
|
|
#include <linux/seg6_local.h>
|
|
|
|
#include <net/addrconf.h>
|
|
|
|
#include <net/ip6_route.h>
|
|
|
|
#include <net/dst_cache.h>
|
2020-01-20 12:48:37 +08:00
|
|
|
#include <net/ip_tunnels.h>
|
2017-08-05 18:38:26 +08:00
|
|
|
#ifdef CONFIG_IPV6_SEG6_HMAC
|
|
|
|
#include <net/seg6_hmac.h>
|
|
|
|
#endif
|
2018-05-20 21:58:13 +08:00
|
|
|
#include <net/seg6_local.h>
|
2017-08-25 15:58:17 +08:00
|
|
|
#include <linux/etherdevice.h>
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
#include <linux/bpf.h>
|
2017-08-05 18:38:26 +08:00
|
|
|
|
|
|
|
struct seg6_local_lwt;
|
|
|
|
|
2020-12-02 21:05:13 +08:00
|
|
|
/* callbacks used for customizing the creation and destruction of a behavior */
|
|
|
|
struct seg6_local_lwtunnel_ops {
|
|
|
|
int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
|
|
|
|
struct netlink_ext_ack *extack);
|
|
|
|
void (*destroy_state)(struct seg6_local_lwt *slwt);
|
|
|
|
};
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
struct seg6_action_desc {
|
|
|
|
int action;
|
|
|
|
unsigned long attrs;
|
2020-12-02 21:05:12 +08:00
|
|
|
|
|
|
|
/* The optattrs field is used for specifying all the optional
|
|
|
|
* attributes supported by a specific behavior.
|
|
|
|
* It means that if one of these attributes is not provided in the
|
|
|
|
* netlink message during the behavior creation, no errors will be
|
|
|
|
* returned to the userspace.
|
|
|
|
*
|
|
|
|
* Each attribute can be only of two types (mutually exclusive):
|
|
|
|
* 1) required or 2) optional.
|
|
|
|
* Every user MUST obey to this rule! If you set an attribute as
|
|
|
|
* required the same attribute CANNOT be set as optional and vice
|
|
|
|
* versa.
|
|
|
|
*/
|
|
|
|
unsigned long optattrs;
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
|
|
|
|
int static_headroom;
|
2020-12-02 21:05:13 +08:00
|
|
|
|
|
|
|
struct seg6_local_lwtunnel_ops slwt_ops;
|
2017-08-05 18:38:26 +08:00
|
|
|
};
|
|
|
|
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
struct bpf_lwt_prog {
|
|
|
|
struct bpf_prog *prog;
|
|
|
|
char *name;
|
|
|
|
};
|
|
|
|
|
2020-12-02 21:05:14 +08:00
|
|
|
enum seg6_end_dt_mode {
|
|
|
|
DT_INVALID_MODE = -EINVAL,
|
|
|
|
DT_LEGACY_MODE = 0,
|
|
|
|
DT_VRF_MODE = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct seg6_end_dt_info {
|
|
|
|
enum seg6_end_dt_mode mode;
|
|
|
|
|
|
|
|
struct net *net;
|
|
|
|
/* VRF device associated to the routing table used by the SRv6
|
|
|
|
* End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
|
|
|
|
*/
|
|
|
|
int vrf_ifindex;
|
|
|
|
int vrf_table;
|
|
|
|
|
|
|
|
/* tunneled packet proto and family (IPv4 or IPv6) */
|
|
|
|
__be16 proto;
|
|
|
|
u16 family;
|
|
|
|
int hdrlen;
|
|
|
|
};
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
struct seg6_local_lwt {
|
|
|
|
int action;
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
int table;
|
|
|
|
struct in_addr nh4;
|
|
|
|
struct in6_addr nh6;
|
|
|
|
int iif;
|
|
|
|
int oif;
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
struct bpf_lwt_prog bpf;
|
2020-12-02 21:05:14 +08:00
|
|
|
#ifdef CONFIG_NET_L3_MASTER_DEV
|
|
|
|
struct seg6_end_dt_info dt_info;
|
|
|
|
#endif
|
2017-08-05 18:38:26 +08:00
|
|
|
|
|
|
|
int headroom;
|
|
|
|
struct seg6_action_desc *desc;
|
2020-12-02 21:05:12 +08:00
|
|
|
/* unlike the required attrs, we have to track the optional attributes
|
|
|
|
* that have been effectively parsed.
|
|
|
|
*/
|
|
|
|
unsigned long parsed_optattrs;
|
2017-08-05 18:38:26 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
|
|
|
|
{
|
|
|
|
return (struct seg6_local_lwt *)lwt->data;
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:39:48 +08:00
|
|
|
static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
2017-08-30 16:50:37 +08:00
|
|
|
int len, srhoff = 0;
|
2017-08-05 18:39:48 +08:00
|
|
|
|
2017-08-30 16:50:37 +08:00
|
|
|
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
|
|
|
|
2017-08-05 18:39:48 +08:00
|
|
|
len = (srh->hdrlen + 1) << 3;
|
|
|
|
|
2017-08-30 16:50:37 +08:00
|
|
|
if (!pskb_may_pull(skb, srhoff + len))
|
2017-08-05 18:39:48 +08:00
|
|
|
return NULL;
|
|
|
|
|
2019-11-16 23:05:52 +08:00
|
|
|
/* note that pskb_may_pull may change pointers in header;
|
|
|
|
* for this reason it is necessary to reload them when needed.
|
|
|
|
*/
|
|
|
|
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
|
|
|
|
2020-06-03 14:54:42 +08:00
|
|
|
if (!seg6_validate_srh(srh, len, true))
|
2017-08-05 18:39:48 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return srh;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
|
|
|
|
srh = get_srh(skb);
|
|
|
|
if (!srh)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (srh->segments_left == 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
#ifdef CONFIG_IPV6_SEG6_HMAC
|
|
|
|
if (!seg6_hmac_validate_skb(skb))
|
|
|
|
return NULL;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return srh;
|
|
|
|
}
|
|
|
|
|
2017-08-25 15:56:47 +08:00
|
|
|
static bool decap_and_validate(struct sk_buff *skb, int proto)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
unsigned int off = 0;
|
|
|
|
|
|
|
|
srh = get_srh(skb);
|
|
|
|
if (srh && srh->segments_left > 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
#ifdef CONFIG_IPV6_SEG6_HMAC
|
|
|
|
if (srh && !seg6_hmac_validate_skb(skb))
|
|
|
|
return false;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!pskb_pull(skb, off))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
skb_postpull_rcsum(skb, skb_network_header(skb), off);
|
|
|
|
|
|
|
|
skb_reset_network_header(skb);
|
|
|
|
skb_reset_transport_header(skb);
|
2020-01-20 12:48:37 +08:00
|
|
|
if (iptunnel_pull_offloads(skb))
|
|
|
|
return false;
|
2017-08-25 15:56:47 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
|
|
|
|
{
|
|
|
|
struct in6_addr *addr;
|
|
|
|
|
|
|
|
srh->segments_left--;
|
|
|
|
addr = srh->segments + srh->segments_left;
|
|
|
|
*daddr = *addr;
|
|
|
|
}
|
|
|
|
|
2019-11-23 00:22:42 +08:00
|
|
|
static int
|
|
|
|
seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
|
|
|
|
u32 tbl_id, bool local_delivery)
|
2017-08-25 15:56:47 +08:00
|
|
|
{
|
|
|
|
struct net *net = dev_net(skb->dev);
|
|
|
|
struct ipv6hdr *hdr = ipv6_hdr(skb);
|
|
|
|
int flags = RT6_LOOKUP_F_HAS_SADDR;
|
|
|
|
struct dst_entry *dst = NULL;
|
|
|
|
struct rt6_info *rt;
|
|
|
|
struct flowi6 fl6;
|
2019-11-23 00:22:42 +08:00
|
|
|
int dev_flags = 0;
|
2017-08-25 15:56:47 +08:00
|
|
|
|
|
|
|
fl6.flowi6_iif = skb->dev->ifindex;
|
|
|
|
fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
|
|
|
|
fl6.saddr = hdr->saddr;
|
|
|
|
fl6.flowlabel = ip6_flowinfo(hdr);
|
|
|
|
fl6.flowi6_mark = skb->mark;
|
|
|
|
fl6.flowi6_proto = hdr->nexthdr;
|
|
|
|
|
|
|
|
if (nhaddr)
|
|
|
|
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
|
|
|
|
|
|
|
|
if (!tbl_id) {
|
2018-03-03 00:32:17 +08:00
|
|
|
dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
|
2017-08-25 15:56:47 +08:00
|
|
|
} else {
|
|
|
|
struct fib6_table *table;
|
|
|
|
|
|
|
|
table = fib6_get_table(net, tbl_id);
|
|
|
|
if (!table)
|
|
|
|
goto out;
|
|
|
|
|
2018-03-03 00:32:17 +08:00
|
|
|
rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
|
2017-08-25 15:56:47 +08:00
|
|
|
dst = &rt->dst;
|
|
|
|
}
|
|
|
|
|
2019-11-23 00:22:42 +08:00
|
|
|
/* we want to discard traffic destined for local packet processing,
|
|
|
|
* if @local_delivery is set to false.
|
|
|
|
*/
|
|
|
|
if (!local_delivery)
|
|
|
|
dev_flags |= IFF_LOOPBACK;
|
|
|
|
|
|
|
|
if (dst && (dst->dev->flags & dev_flags) && !dst->error) {
|
2017-08-25 15:56:47 +08:00
|
|
|
dst_release(dst);
|
|
|
|
dst = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (!dst) {
|
|
|
|
rt = net->ipv6.ip6_blk_hole_entry;
|
|
|
|
dst = &rt->dst;
|
|
|
|
dst_hold(dst);
|
|
|
|
}
|
|
|
|
|
|
|
|
skb_dst_drop(skb);
|
|
|
|
skb_dst_set(skb, dst);
|
2018-05-20 21:58:13 +08:00
|
|
|
return dst->error;
|
2017-08-25 15:56:47 +08:00
|
|
|
}
|
|
|
|
|
2019-11-23 00:22:42 +08:00
|
|
|
int seg6_lookup_nexthop(struct sk_buff *skb,
|
|
|
|
struct in6_addr *nhaddr, u32 tbl_id)
|
|
|
|
{
|
|
|
|
return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false);
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:39:48 +08:00
|
|
|
/* regular endpoint function */
|
|
|
|
static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
|
|
|
|
srh = get_and_validate_srh(skb);
|
|
|
|
if (!srh)
|
|
|
|
goto drop;
|
|
|
|
|
2017-08-25 15:56:47 +08:00
|
|
|
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
|
2017-08-05 18:39:48 +08:00
|
|
|
|
2018-05-20 21:58:13 +08:00
|
|
|
seg6_lookup_nexthop(skb, NULL, 0);
|
2017-08-05 18:39:48 +08:00
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* regular endpoint, and forward to specified nexthop */
|
|
|
|
static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
|
|
|
|
srh = get_and_validate_srh(skb);
|
|
|
|
if (!srh)
|
|
|
|
goto drop;
|
|
|
|
|
2017-08-25 15:56:47 +08:00
|
|
|
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
|
2017-08-05 18:39:48 +08:00
|
|
|
|
2018-05-20 21:58:13 +08:00
|
|
|
seg6_lookup_nexthop(skb, &slwt->nh6, 0);
|
2017-08-05 18:39:48 +08:00
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-08-25 15:58:17 +08:00
|
|
|
static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
|
|
|
|
srh = get_and_validate_srh(skb);
|
|
|
|
if (!srh)
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
|
|
|
|
|
2018-05-20 21:58:13 +08:00
|
|
|
seg6_lookup_nexthop(skb, NULL, slwt->table);
|
2017-08-25 15:58:17 +08:00
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* decapsulate and forward inner L2 frame on specified interface */
|
|
|
|
static int input_action_end_dx2(struct sk_buff *skb,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct net *net = dev_net(skb->dev);
|
|
|
|
struct net_device *odev;
|
|
|
|
struct ethhdr *eth;
|
|
|
|
|
2020-03-12 00:54:06 +08:00
|
|
|
if (!decap_and_validate(skb, IPPROTO_ETHERNET))
|
2017-08-25 15:58:17 +08:00
|
|
|
goto drop;
|
|
|
|
|
|
|
|
if (!pskb_may_pull(skb, ETH_HLEN))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
skb_reset_mac_header(skb);
|
|
|
|
eth = (struct ethhdr *)skb->data;
|
|
|
|
|
|
|
|
/* To determine the frame's protocol, we assume it is 802.3. This avoids
|
|
|
|
* a call to eth_type_trans(), which is not really relevant for our
|
|
|
|
* use case.
|
|
|
|
*/
|
|
|
|
if (!eth_proto_is_802_3(eth->h_proto))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
odev = dev_get_by_index_rcu(net, slwt->oif);
|
|
|
|
if (!odev)
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
/* As we accept Ethernet frames, make sure the egress device is of
|
|
|
|
* the correct type.
|
|
|
|
*/
|
|
|
|
if (odev->type != ARPHRD_ETHER)
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
skb_orphan(skb);
|
|
|
|
|
|
|
|
if (skb_warn_if_lro(skb))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
skb_forward_csum(skb);
|
|
|
|
|
|
|
|
if (skb->len - ETH_HLEN > odev->mtu)
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
skb->dev = odev;
|
|
|
|
skb->protocol = eth->h_proto;
|
|
|
|
|
|
|
|
return dev_queue_xmit(skb);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:39:48 +08:00
|
|
|
/* decapsulate and forward to specified nexthop */
|
|
|
|
static int input_action_end_dx6(struct sk_buff *skb,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
2017-08-25 15:56:47 +08:00
|
|
|
struct in6_addr *nhaddr = NULL;
|
2017-08-05 18:39:48 +08:00
|
|
|
|
|
|
|
/* this function accepts IPv6 encapsulated packets, with either
|
|
|
|
* an SRH with SL=0, or no SRH.
|
|
|
|
*/
|
|
|
|
|
2017-08-25 15:56:47 +08:00
|
|
|
if (!decap_and_validate(skb, IPPROTO_IPV6))
|
2017-08-05 18:39:48 +08:00
|
|
|
goto drop;
|
|
|
|
|
2017-08-25 15:56:47 +08:00
|
|
|
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
|
2017-08-05 18:39:48 +08:00
|
|
|
goto drop;
|
|
|
|
|
|
|
|
/* The inner packet is not associated to any local interface,
|
|
|
|
* so we do not call netif_rx().
|
|
|
|
*
|
|
|
|
* If slwt->nh6 is set to ::, then lookup the nexthop for the
|
|
|
|
* inner packet's DA. Otherwise, use the specified nexthop.
|
|
|
|
*/
|
|
|
|
|
2017-08-25 15:56:47 +08:00
|
|
|
if (!ipv6_addr_any(&slwt->nh6))
|
|
|
|
nhaddr = &slwt->nh6;
|
2017-08-05 18:39:48 +08:00
|
|
|
|
2019-11-16 23:05:53 +08:00
|
|
|
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
|
|
|
|
|
2018-05-20 21:58:13 +08:00
|
|
|
seg6_lookup_nexthop(skb, nhaddr, 0);
|
2017-08-05 18:39:48 +08:00
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-08-25 15:58:17 +08:00
|
|
|
static int input_action_end_dx4(struct sk_buff *skb,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct iphdr *iph;
|
|
|
|
__be32 nhaddr;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!decap_and_validate(skb, IPPROTO_IPIP))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
skb->protocol = htons(ETH_P_IP);
|
|
|
|
|
|
|
|
iph = ip_hdr(skb);
|
|
|
|
|
|
|
|
nhaddr = slwt->nh4.s_addr ?: iph->daddr;
|
|
|
|
|
|
|
|
skb_dst_drop(skb);
|
|
|
|
|
2019-11-16 23:05:53 +08:00
|
|
|
skb_set_transport_header(skb, sizeof(struct iphdr));
|
|
|
|
|
2017-08-25 15:58:17 +08:00
|
|
|
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
|
|
|
|
if (err)
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-12-02 21:05:14 +08:00
|
|
|
#ifdef CONFIG_NET_L3_MASTER_DEV
|
|
|
|
static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
|
|
|
|
{
|
|
|
|
const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
|
|
|
|
|
|
|
|
return nli->nl_net;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
|
|
|
|
u16 family, struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct seg6_end_dt_info *info = &slwt->dt_info;
|
|
|
|
int vrf_ifindex;
|
|
|
|
struct net *net;
|
|
|
|
|
|
|
|
net = fib6_config_get_net(cfg);
|
|
|
|
|
|
|
|
/* note that vrf_table was already set by parse_nla_vrftable() */
|
|
|
|
vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
|
|
|
|
info->vrf_table);
|
|
|
|
if (vrf_ifindex < 0) {
|
|
|
|
if (vrf_ifindex == -EPERM) {
|
|
|
|
NL_SET_ERR_MSG(extack,
|
|
|
|
"Strict mode for VRF is disabled");
|
|
|
|
} else if (vrf_ifindex == -ENODEV) {
|
|
|
|
NL_SET_ERR_MSG(extack,
|
|
|
|
"Table has no associated VRF device");
|
|
|
|
} else {
|
|
|
|
pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
|
|
|
|
vrf_ifindex);
|
|
|
|
}
|
|
|
|
|
|
|
|
return vrf_ifindex;
|
|
|
|
}
|
|
|
|
|
|
|
|
info->net = net;
|
|
|
|
info->vrf_ifindex = vrf_ifindex;
|
|
|
|
|
|
|
|
switch (family) {
|
|
|
|
case AF_INET:
|
|
|
|
info->proto = htons(ETH_P_IP);
|
|
|
|
info->hdrlen = sizeof(struct iphdr);
|
|
|
|
break;
|
2020-12-02 21:05:15 +08:00
|
|
|
case AF_INET6:
|
|
|
|
info->proto = htons(ETH_P_IPV6);
|
|
|
|
info->hdrlen = sizeof(struct ipv6hdr);
|
|
|
|
break;
|
2020-12-02 21:05:14 +08:00
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
info->family = family;
|
|
|
|
info->mode = DT_VRF_MODE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
|
|
|
|
* routes the IPv4/IPv6 packet by looking at the configured routing table.
|
|
|
|
*
|
|
|
|
* In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
|
|
|
|
* Routing Header packets) from several interfaces and the outer IPv6
|
|
|
|
* destination address (DA) is used for retrieving the specific instance of the
|
|
|
|
* End.DT4/DT6 behavior that should process the packets.
|
|
|
|
*
|
|
|
|
* However, the inner IPv4/IPv6 packet is not really bound to any receiving
|
|
|
|
* interface and thus the End.DT4/DT6 sets the VRF (associated with the
|
|
|
|
* corresponding routing table) as the *receiving* interface.
|
|
|
|
* In other words, the End.DT4/DT6 processes a packet as if it has been received
|
|
|
|
* directly by the VRF (and not by one of its slave devices, if any).
|
|
|
|
* In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
|
|
|
|
* according to the routing table configured by the End.DT4/DT6 instance.
|
|
|
|
*
|
|
|
|
* This design allows you to get some interesting features like:
|
|
|
|
* 1) the statistics on rx packets;
|
|
|
|
* 2) the possibility to install a packet sniffer on the receiving interface
|
|
|
|
* (the VRF one) for looking at the incoming packets;
|
|
|
|
* 3) the possibility to leverage the netfilter prerouting hook for the inner
|
|
|
|
* IPv4 packet.
|
|
|
|
*
|
|
|
|
* This function returns:
|
|
|
|
* - the sk_buff* when the VRF rcv handler has processed the packet correctly;
|
|
|
|
* - NULL when the skb is consumed by the VRF rcv handler;
|
|
|
|
* - a pointer which encodes a negative error number in case of error.
|
|
|
|
* Note that in this case, the function takes care of freeing the skb.
|
|
|
|
*/
|
|
|
|
static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
|
|
|
|
struct net_device *dev)
|
|
|
|
{
|
|
|
|
/* based on l3mdev_ip_rcv; we are only interested in the master */
|
|
|
|
if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
/* the decap packet IPv4/IPv6 does not come with any mac header info.
|
|
|
|
* We must unset the mac header to allow the VRF device to rebuild it,
|
|
|
|
* just in case there is a sniffer attached on the device.
|
|
|
|
*/
|
|
|
|
skb_unset_mac_header(skb);
|
|
|
|
|
|
|
|
skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
|
|
|
|
if (!skb)
|
|
|
|
/* the skb buffer was consumed by the handler */
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* when a packet is received by a VRF or by one of its slaves, the
|
|
|
|
* master device reference is set into the skb.
|
|
|
|
*/
|
|
|
|
if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
return skb;
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
|
|
|
|
struct seg6_end_dt_info *info)
|
|
|
|
{
|
|
|
|
int vrf_ifindex = info->vrf_ifindex;
|
|
|
|
struct net *net = info->net;
|
|
|
|
|
|
|
|
if (unlikely(vrf_ifindex < 0))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
if (unlikely(!net_eq(dev_net(skb->dev), net)))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
return dev_get_by_index_rcu(net, vrf_ifindex);
|
|
|
|
|
|
|
|
error:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct seg6_end_dt_info *info = &slwt->dt_info;
|
|
|
|
struct net_device *vrf;
|
|
|
|
|
|
|
|
vrf = end_dt_get_vrf_rcu(skb, info);
|
|
|
|
if (unlikely(!vrf))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
skb->protocol = info->proto;
|
|
|
|
|
|
|
|
skb_dst_drop(skb);
|
|
|
|
|
|
|
|
skb_set_transport_header(skb, info->hdrlen);
|
|
|
|
|
|
|
|
return end_dt_vrf_rcv(skb, info->family, vrf);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int input_action_end_dt4(struct sk_buff *skb,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct iphdr *iph;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!decap_and_validate(skb, IPPROTO_IPIP))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
skb = end_dt_vrf_core(skb, slwt);
|
|
|
|
if (!skb)
|
|
|
|
/* packet has been processed and consumed by the VRF */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (IS_ERR(skb))
|
|
|
|
return PTR_ERR(skb);
|
|
|
|
|
|
|
|
iph = ip_hdr(skb);
|
|
|
|
|
|
|
|
err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
|
|
|
|
if (unlikely(err))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
|
|
|
|
}
|
2020-12-02 21:05:15 +08:00
|
|
|
|
|
|
|
static enum
|
|
|
|
seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
unsigned long parsed_optattrs = slwt->parsed_optattrs;
|
|
|
|
bool legacy, vrfmode;
|
|
|
|
|
|
|
|
legacy = !!(parsed_optattrs & (1 << SEG6_LOCAL_TABLE));
|
|
|
|
vrfmode = !!(parsed_optattrs & (1 << SEG6_LOCAL_VRFTABLE));
|
|
|
|
|
|
|
|
if (!(legacy ^ vrfmode))
|
|
|
|
/* both are absent or present: invalid DT6 mode */
|
|
|
|
return DT_INVALID_MODE;
|
|
|
|
|
|
|
|
return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct seg6_end_dt_info *info = &slwt->dt_info;
|
|
|
|
|
|
|
|
return info->mode;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
|
|
|
|
struct seg6_end_dt_info *info = &slwt->dt_info;
|
|
|
|
|
|
|
|
switch (mode) {
|
|
|
|
case DT_LEGACY_MODE:
|
|
|
|
info->mode = DT_LEGACY_MODE;
|
|
|
|
return 0;
|
|
|
|
case DT_VRF_MODE:
|
|
|
|
return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
|
|
|
|
default:
|
|
|
|
NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
2020-12-02 21:05:14 +08:00
|
|
|
#endif
|
|
|
|
|
2017-08-25 15:58:17 +08:00
|
|
|
static int input_action_end_dt6(struct sk_buff *skb,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
if (!decap_and_validate(skb, IPPROTO_IPV6))
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
|
|
|
|
goto drop;
|
|
|
|
|
2020-12-02 21:05:15 +08:00
|
|
|
#ifdef CONFIG_NET_L3_MASTER_DEV
|
|
|
|
if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
|
|
|
|
goto legacy_mode;
|
|
|
|
|
|
|
|
/* DT6_VRF_MODE */
|
|
|
|
skb = end_dt_vrf_core(skb, slwt);
|
|
|
|
if (!skb)
|
|
|
|
/* packet has been processed and consumed by the VRF */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (IS_ERR(skb))
|
|
|
|
return PTR_ERR(skb);
|
|
|
|
|
|
|
|
/* note: this time we do not need to specify the table because the VRF
|
|
|
|
* takes care of selecting the correct table.
|
|
|
|
*/
|
|
|
|
seg6_lookup_any_nexthop(skb, NULL, 0, true);
|
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
legacy_mode:
|
|
|
|
#endif
|
2019-11-16 23:05:53 +08:00
|
|
|
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
|
|
|
|
|
2019-11-23 00:22:42 +08:00
|
|
|
seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
|
2017-08-25 15:58:17 +08:00
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:39:48 +08:00
|
|
|
/* push an SRH on top of the current one */
|
|
|
|
static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
int err = -EINVAL;
|
|
|
|
|
|
|
|
srh = get_and_validate_srh(skb);
|
|
|
|
if (!srh)
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
err = seg6_do_srh_inline(skb, slwt->srh);
|
|
|
|
if (err)
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
|
|
|
|
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
|
|
|
|
|
2018-05-20 21:58:13 +08:00
|
|
|
seg6_lookup_nexthop(skb, NULL, 0);
|
2017-08-05 18:39:48 +08:00
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* encapsulate within an outer IPv6 header and a specified SRH */
|
|
|
|
static int input_action_end_b6_encap(struct sk_buff *skb,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
int err = -EINVAL;
|
|
|
|
|
|
|
|
srh = get_and_validate_srh(skb);
|
|
|
|
if (!srh)
|
|
|
|
goto drop;
|
|
|
|
|
2017-08-25 15:56:47 +08:00
|
|
|
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
|
2017-08-05 18:39:48 +08:00
|
|
|
|
|
|
|
skb_reset_inner_headers(skb);
|
|
|
|
skb->encapsulation = 1;
|
|
|
|
|
2017-08-25 15:56:44 +08:00
|
|
|
err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
|
2017-08-05 18:39:48 +08:00
|
|
|
if (err)
|
|
|
|
goto drop;
|
|
|
|
|
|
|
|
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
|
|
|
|
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
|
|
|
|
|
2018-05-20 21:58:13 +08:00
|
|
|
seg6_lookup_nexthop(skb, NULL, 0);
|
2017-08-05 18:39:48 +08:00
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
drop:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
bpf: Add IPv6 Segment Routing helpers
The BPF seg6local hook should be powerful enough to enable users to
implement most of the use-cases one could think of. After some thinking,
we figured out that the following actions should be possible on a SRv6
packet, requiring 3 specific helpers :
- bpf_lwt_seg6_store_bytes: Modify non-sensitive fields of the SRH
- bpf_lwt_seg6_adjust_srh: Allow to grow or shrink a SRH
(to add/delete TLVs)
- bpf_lwt_seg6_action: Apply some SRv6 network programming actions
(specifically End.X, End.T, End.B6 and
End.B6.Encap)
The specifications of these helpers are provided in the patch (see
include/uapi/linux/bpf.h).
The non-sensitive fields of the SRH are the following : flags, tag and
TLVs. The other fields can not be modified, to maintain the SRH
integrity. Flags, tag and TLVs can easily be modified as their validity
can be checked afterwards via seg6_validate_srh. It is not allowed to
modify the segments directly. If one wants to add segments on the path,
he should stack a new SRH using the End.B6 action via
bpf_lwt_seg6_action.
Growing, shrinking or editing TLVs via the helpers will flag the SRH as
invalid, and it will have to be re-validated before re-entering the IPv6
layer. This flag is stored in a per-CPU buffer, along with the current
header length in bytes.
Storing the SRH len in bytes in the control block is mandatory when using
bpf_lwt_seg6_adjust_srh. The Header Ext. Length field contains the SRH
len rounded to 8 bytes (a padding TLV can be inserted to ensure the 8-bytes
boundary). When adding/deleting TLVs within the BPF program, the SRH may
temporary be in an invalid state where its length cannot be rounded to 8
bytes without remainder, hence the need to store the length in bytes
separately. The caller of the BPF program can then ensure that the SRH's
final length is valid using this value. Again, a final SRH modified by a
BPF program which doesn’t respect the 8-bytes boundary will be discarded
as it will be considered as invalid.
Finally, a fourth helper is provided, bpf_lwt_push_encap, which is
available from the LWT BPF IN hook, but not from the seg6local BPF one.
This helper allows to encapsulate a Segment Routing Header (either with
a new outer IPv6 header, or by inlining it directly in the existing IPv6
header) into a non-SRv6 packet. This helper is required if we want to
offer the possibility to dynamically encapsulate a SRH for non-SRv6 packet,
as the BPF seg6local hook only works on traffic already containing a SRH.
This is the BPF equivalent of the seg6 LWT infrastructure, which achieves
the same purpose but with a static SRH per route.
These helpers require CONFIG_IPV6=y (and not =m).
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:14 +08:00
|
|
|
DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
|
|
|
|
|
2018-07-26 10:10:40 +08:00
|
|
|
bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct seg6_bpf_srh_state *srh_state =
|
|
|
|
this_cpu_ptr(&seg6_bpf_srh_states);
|
|
|
|
struct ipv6_sr_hdr *srh = srh_state->srh;
|
|
|
|
|
|
|
|
if (unlikely(srh == NULL))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (unlikely(!srh_state->valid)) {
|
|
|
|
if ((srh_state->hdrlen & 7) != 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
|
2020-06-03 14:54:42 +08:00
|
|
|
if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true))
|
2018-07-26 10:10:40 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
srh_state->valid = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
static int input_action_end_bpf(struct sk_buff *skb,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct seg6_bpf_srh_state *srh_state =
|
|
|
|
this_cpu_ptr(&seg6_bpf_srh_states);
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
srh = get_and_validate_srh(skb);
|
2018-07-26 10:10:40 +08:00
|
|
|
if (!srh) {
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
|
|
|
|
|
|
|
|
/* preempt_disable is needed to protect the per-CPU buffer srh_state,
|
|
|
|
* which is also accessed by the bpf_lwt_seg6_* helpers
|
|
|
|
*/
|
|
|
|
preempt_disable();
|
2018-07-26 10:10:40 +08:00
|
|
|
srh_state->srh = srh;
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
srh_state->hdrlen = srh->hdrlen << 3;
|
2018-07-26 10:10:40 +08:00
|
|
|
srh_state->valid = true;
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
bpf_compute_data_pointers(skb);
|
|
|
|
ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
switch (ret) {
|
|
|
|
case BPF_OK:
|
|
|
|
case BPF_REDIRECT:
|
|
|
|
break;
|
|
|
|
case BPF_DROP:
|
|
|
|
goto drop;
|
|
|
|
default:
|
|
|
|
pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
|
2018-07-26 10:10:40 +08:00
|
|
|
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
goto drop;
|
|
|
|
|
2018-07-26 10:10:40 +08:00
|
|
|
preempt_enable();
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
if (ret != BPF_REDIRECT)
|
|
|
|
seg6_lookup_nexthop(skb, NULL, 0);
|
|
|
|
|
|
|
|
return dst_input(skb);
|
|
|
|
|
|
|
|
drop:
|
2018-07-26 10:10:40 +08:00
|
|
|
preempt_enable();
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
static struct seg6_action_desc seg6_action_table[] = {
|
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END,
|
|
|
|
.attrs = 0,
|
2017-08-05 18:39:48 +08:00
|
|
|
.input = input_action_end,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_X,
|
|
|
|
.attrs = (1 << SEG6_LOCAL_NH6),
|
|
|
|
.input = input_action_end_x,
|
2017-08-05 18:38:26 +08:00
|
|
|
},
|
2017-08-25 15:58:17 +08:00
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_T,
|
|
|
|
.attrs = (1 << SEG6_LOCAL_TABLE),
|
|
|
|
.input = input_action_end_t,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_DX2,
|
|
|
|
.attrs = (1 << SEG6_LOCAL_OIF),
|
|
|
|
.input = input_action_end_dx2,
|
|
|
|
},
|
2017-08-05 18:39:48 +08:00
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_DX6,
|
|
|
|
.attrs = (1 << SEG6_LOCAL_NH6),
|
|
|
|
.input = input_action_end_dx6,
|
|
|
|
},
|
2017-08-25 15:58:17 +08:00
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_DX4,
|
|
|
|
.attrs = (1 << SEG6_LOCAL_NH4),
|
|
|
|
.input = input_action_end_dx4,
|
|
|
|
},
|
2020-12-02 21:05:14 +08:00
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_DT4,
|
|
|
|
.attrs = (1 << SEG6_LOCAL_VRFTABLE),
|
|
|
|
#ifdef CONFIG_NET_L3_MASTER_DEV
|
|
|
|
.input = input_action_end_dt4,
|
|
|
|
.slwt_ops = {
|
|
|
|
.build_state = seg6_end_dt4_build,
|
|
|
|
},
|
|
|
|
#endif
|
|
|
|
},
|
2017-08-25 15:58:17 +08:00
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_DT6,
|
2020-12-02 21:05:15 +08:00
|
|
|
#ifdef CONFIG_NET_L3_MASTER_DEV
|
|
|
|
.attrs = 0,
|
|
|
|
.optattrs = (1 << SEG6_LOCAL_TABLE) |
|
|
|
|
(1 << SEG6_LOCAL_VRFTABLE),
|
|
|
|
.slwt_ops = {
|
|
|
|
.build_state = seg6_end_dt6_build,
|
|
|
|
},
|
|
|
|
#else
|
2017-08-25 15:58:17 +08:00
|
|
|
.attrs = (1 << SEG6_LOCAL_TABLE),
|
2020-12-02 21:05:15 +08:00
|
|
|
#endif
|
2017-08-25 15:58:17 +08:00
|
|
|
.input = input_action_end_dt6,
|
|
|
|
},
|
2017-08-05 18:39:48 +08:00
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_B6,
|
|
|
|
.attrs = (1 << SEG6_LOCAL_SRH),
|
|
|
|
.input = input_action_end_b6,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
|
|
|
|
.attrs = (1 << SEG6_LOCAL_SRH),
|
|
|
|
.input = input_action_end_b6_encap,
|
|
|
|
.static_headroom = sizeof(struct ipv6hdr),
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
},
|
|
|
|
{
|
|
|
|
.action = SEG6_LOCAL_ACTION_END_BPF,
|
|
|
|
.attrs = (1 << SEG6_LOCAL_BPF),
|
|
|
|
.input = input_action_end_bpf,
|
|
|
|
},
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct seg6_action_desc *__get_action_desc(int action)
|
|
|
|
{
|
|
|
|
struct seg6_action_desc *desc;
|
|
|
|
int i, count;
|
|
|
|
|
2018-01-08 07:50:26 +08:00
|
|
|
count = ARRAY_SIZE(seg6_action_table);
|
2017-08-05 18:38:26 +08:00
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
desc = &seg6_action_table[i];
|
|
|
|
if (desc->action == action)
|
|
|
|
return desc;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int seg6_local_input(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct dst_entry *orig_dst = skb_dst(skb);
|
|
|
|
struct seg6_action_desc *desc;
|
|
|
|
struct seg6_local_lwt *slwt;
|
|
|
|
|
2017-08-25 15:56:46 +08:00
|
|
|
if (skb->protocol != htons(ETH_P_IPV6)) {
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
|
|
|
|
desc = slwt->desc;
|
|
|
|
|
|
|
|
return desc->input(skb, slwt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
|
|
|
|
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
|
|
|
|
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
|
|
|
|
[SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
|
2020-12-02 21:05:14 +08:00
|
|
|
[SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
|
2017-08-05 18:38:26 +08:00
|
|
|
[SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
|
|
|
|
.len = sizeof(struct in_addr) },
|
|
|
|
[SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
|
|
|
|
.len = sizeof(struct in6_addr) },
|
|
|
|
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
|
|
|
|
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
|
2017-08-05 18:38:26 +08:00
|
|
|
};
|
|
|
|
|
2017-08-05 18:38:27 +08:00
|
|
|
static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
srh = nla_data(attrs[SEG6_LOCAL_SRH]);
|
|
|
|
len = nla_len(attrs[SEG6_LOCAL_SRH]);
|
|
|
|
|
|
|
|
/* SRH must contain at least one segment */
|
|
|
|
if (len < sizeof(*srh) + sizeof(struct in6_addr))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-06-03 14:54:42 +08:00
|
|
|
if (!seg6_validate_srh(srh, len, false))
|
2017-08-05 18:38:27 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
2018-07-23 16:33:19 +08:00
|
|
|
slwt->srh = kmemdup(srh, len, GFP_KERNEL);
|
2017-08-05 18:38:27 +08:00
|
|
|
if (!slwt->srh)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
slwt->headroom += len;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct ipv6_sr_hdr *srh;
|
|
|
|
struct nlattr *nla;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
srh = slwt->srh;
|
|
|
|
len = (srh->hdrlen + 1) << 3;
|
|
|
|
|
|
|
|
nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
|
|
|
|
if (!nla)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
memcpy(nla_data(nla), srh, len);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
|
|
|
{
|
|
|
|
int len = (a->srh->hdrlen + 1) << 3;
|
|
|
|
|
|
|
|
if (len != ((b->srh->hdrlen + 1) << 3))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return memcmp(a->srh, b->srh, len);
|
|
|
|
}
|
|
|
|
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
static void destroy_attr_srh(struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
kfree(slwt->srh);
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:38:27 +08:00
|
|
|
static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
|
|
|
{
|
|
|
|
if (a->table != b->table)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-12-02 21:05:14 +08:00
|
|
|
static struct
|
|
|
|
seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_NET_L3_MASTER_DEV
|
|
|
|
return &slwt->dt_info;
|
|
|
|
#else
|
|
|
|
return ERR_PTR(-EOPNOTSUPP);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_nla_vrftable(struct nlattr **attrs,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
|
|
|
|
|
|
|
|
if (IS_ERR(info))
|
|
|
|
return PTR_ERR(info);
|
|
|
|
|
|
|
|
info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
|
|
|
|
|
|
|
|
if (IS_ERR(info))
|
|
|
|
return PTR_ERR(info);
|
|
|
|
|
|
|
|
if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
|
|
|
{
|
|
|
|
struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
|
|
|
|
struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
|
|
|
|
|
|
|
|
if (info_a->vrf_table != info_b->vrf_table)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:38:27 +08:00
|
|
|
static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
|
|
|
|
sizeof(struct in_addr));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct nlattr *nla;
|
|
|
|
|
|
|
|
nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
|
|
|
|
if (!nla)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
|
|
|
{
|
|
|
|
return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
|
|
|
|
sizeof(struct in6_addr));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct nlattr *nla;
|
|
|
|
|
|
|
|
nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
|
|
|
|
if (!nla)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
|
|
|
{
|
|
|
|
return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
|
|
|
{
|
|
|
|
if (a->iif != b->iif)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
|
|
|
{
|
|
|
|
if (a->oif != b->oif)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
#define MAX_PROG_NAME 256
|
|
|
|
static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
|
|
|
|
[SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, },
|
|
|
|
[SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
|
|
|
|
.len = MAX_PROG_NAME },
|
|
|
|
};
|
|
|
|
|
|
|
|
static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
|
|
|
|
struct bpf_prog *p;
|
|
|
|
int ret;
|
|
|
|
u32 fd;
|
|
|
|
|
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
|
|
|
ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX,
|
|
|
|
attrs[SEG6_LOCAL_BPF],
|
|
|
|
bpf_prog_policy, NULL);
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
|
|
|
|
if (!slwt->bpf.name)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
|
|
|
|
p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
|
|
|
|
if (IS_ERR(p)) {
|
|
|
|
kfree(slwt->bpf.name);
|
|
|
|
return PTR_ERR(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
slwt->bpf.prog = p;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct nlattr *nest;
|
|
|
|
|
|
|
|
if (!slwt->bpf.prog)
|
|
|
|
return 0;
|
|
|
|
|
2019-04-26 17:13:06 +08:00
|
|
|
nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF);
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
if (!nest)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
if (slwt->bpf.name &&
|
|
|
|
nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return nla_nest_end(skb, nest);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
|
|
|
{
|
|
|
|
if (!a->bpf.name && !b->bpf.name)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!a->bpf.name || !b->bpf.name)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return strcmp(a->bpf.name, b->bpf.name);
|
|
|
|
}
|
|
|
|
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
kfree(slwt->bpf.name);
|
|
|
|
if (slwt->bpf.prog)
|
|
|
|
bpf_prog_put(slwt->bpf.prog);
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
struct seg6_action_param {
|
|
|
|
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
|
|
|
|
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
|
|
|
|
int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
|
|
|
|
/* optional destroy() callback useful for releasing resources which
|
|
|
|
* have been previously acquired in the corresponding parse()
|
|
|
|
* function.
|
|
|
|
*/
|
|
|
|
void (*destroy)(struct seg6_local_lwt *slwt);
|
2017-08-05 18:38:26 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
|
2017-08-05 18:38:27 +08:00
|
|
|
[SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
|
|
|
|
.put = put_nla_srh,
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
.cmp = cmp_nla_srh,
|
|
|
|
.destroy = destroy_attr_srh },
|
2017-08-05 18:38:26 +08:00
|
|
|
|
2017-08-05 18:38:27 +08:00
|
|
|
[SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
|
|
|
|
.put = put_nla_table,
|
|
|
|
.cmp = cmp_nla_table },
|
2017-08-05 18:38:26 +08:00
|
|
|
|
2017-08-05 18:38:27 +08:00
|
|
|
[SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4,
|
|
|
|
.put = put_nla_nh4,
|
|
|
|
.cmp = cmp_nla_nh4 },
|
2017-08-05 18:38:26 +08:00
|
|
|
|
2017-08-05 18:38:27 +08:00
|
|
|
[SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6,
|
|
|
|
.put = put_nla_nh6,
|
|
|
|
.cmp = cmp_nla_nh6 },
|
2017-08-05 18:38:26 +08:00
|
|
|
|
2017-08-05 18:38:27 +08:00
|
|
|
[SEG6_LOCAL_IIF] = { .parse = parse_nla_iif,
|
|
|
|
.put = put_nla_iif,
|
|
|
|
.cmp = cmp_nla_iif },
|
2017-08-05 18:38:26 +08:00
|
|
|
|
2017-08-05 18:38:27 +08:00
|
|
|
[SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
|
|
|
|
.put = put_nla_oif,
|
|
|
|
.cmp = cmp_nla_oif },
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
|
|
|
|
[SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
|
|
|
|
.put = put_nla_bpf,
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
.cmp = cmp_nla_bpf,
|
|
|
|
.destroy = destroy_attr_bpf },
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
|
2020-12-02 21:05:14 +08:00
|
|
|
[SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable,
|
|
|
|
.put = put_nla_vrftable,
|
|
|
|
.cmp = cmp_nla_vrftable },
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
};
|
|
|
|
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
/* call the destroy() callback (if available) for each set attribute in
|
2020-12-02 21:05:12 +08:00
|
|
|
* @parsed_attrs, starting from the first attribute up to the @max_parsed
|
|
|
|
* (excluded) attribute.
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
*/
|
2020-12-02 21:05:12 +08:00
|
|
|
static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
|
|
|
|
struct seg6_local_lwt *slwt)
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
{
|
|
|
|
struct seg6_action_param *param;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Every required seg6local attribute is identified by an ID which is
|
|
|
|
* encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
|
|
|
|
*
|
2020-12-02 21:05:12 +08:00
|
|
|
* We scan the 'parsed_attrs' bitmask, starting from the first attribute
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
* up to the @max_parsed (excluded) attribute.
|
|
|
|
* For each set attribute, we retrieve the corresponding destroy()
|
|
|
|
* callback. If the callback is not available, then we skip to the next
|
|
|
|
* attribute; otherwise, we call the destroy() callback.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < max_parsed; ++i) {
|
2020-12-02 21:05:12 +08:00
|
|
|
if (!(parsed_attrs & (1 << i)))
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
param = &seg6_action_params[i];
|
|
|
|
|
|
|
|
if (param->destroy)
|
|
|
|
param->destroy(slwt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* release all the resources that may have been acquired during parsing
|
|
|
|
* operations.
|
|
|
|
*/
|
|
|
|
static void destroy_attrs(struct seg6_local_lwt *slwt)
|
|
|
|
{
|
2020-12-02 21:05:12 +08:00
|
|
|
unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
|
|
|
|
|
|
|
|
__destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_nla_optional_attrs(struct nlattr **attrs,
|
|
|
|
struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct seg6_action_desc *desc = slwt->desc;
|
|
|
|
unsigned long parsed_optattrs = 0;
|
|
|
|
struct seg6_action_param *param;
|
|
|
|
int err, i;
|
|
|
|
|
|
|
|
for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) {
|
|
|
|
if (!(desc->optattrs & (1 << i)) || !attrs[i])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* once here, the i-th attribute is provided by the
|
|
|
|
* userspace AND it is identified optional as well.
|
|
|
|
*/
|
|
|
|
param = &seg6_action_params[i];
|
|
|
|
|
|
|
|
err = param->parse(attrs, slwt);
|
|
|
|
if (err < 0)
|
|
|
|
goto parse_optattrs_err;
|
|
|
|
|
|
|
|
/* current attribute has been correctly parsed */
|
|
|
|
parsed_optattrs |= (1 << i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* store in the tunnel state all the optional attributed successfully
|
|
|
|
* parsed.
|
|
|
|
*/
|
|
|
|
slwt->parsed_optattrs = parsed_optattrs;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
parse_optattrs_err:
|
|
|
|
__destroy_attrs(parsed_optattrs, i, slwt);
|
|
|
|
|
|
|
|
return err;
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
}
|
|
|
|
|
2020-12-02 21:05:13 +08:00
|
|
|
/* call the custom constructor of the behavior during its initialization phase
|
|
|
|
* and after that all its attributes have been parsed successfully.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct seg6_action_desc *desc = slwt->desc;
|
|
|
|
struct seg6_local_lwtunnel_ops *ops;
|
|
|
|
|
|
|
|
ops = &desc->slwt_ops;
|
|
|
|
if (!ops->build_state)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return ops->build_state(slwt, cfg, extack);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* call the custom destructor of the behavior which is invoked before the
|
|
|
|
* tunnel is going to be destroyed.
|
|
|
|
*/
|
|
|
|
static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct seg6_action_desc *desc = slwt->desc;
|
|
|
|
struct seg6_local_lwtunnel_ops *ops;
|
|
|
|
|
|
|
|
ops = &desc->slwt_ops;
|
|
|
|
if (!ops->destroy_state)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ops->destroy_state(slwt);
|
|
|
|
}
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
|
|
|
{
|
|
|
|
struct seg6_action_param *param;
|
|
|
|
struct seg6_action_desc *desc;
|
2020-12-02 21:05:12 +08:00
|
|
|
unsigned long invalid_attrs;
|
2017-08-05 18:38:26 +08:00
|
|
|
int i, err;
|
|
|
|
|
|
|
|
desc = __get_action_desc(slwt->action);
|
|
|
|
if (!desc)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!desc->input)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
slwt->desc = desc;
|
|
|
|
slwt->headroom += desc->static_headroom;
|
|
|
|
|
2020-12-02 21:05:12 +08:00
|
|
|
/* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
|
|
|
|
* disjoined, this allow us to release acquired resources by optional
|
|
|
|
* attributes and by required attributes independently from each other
|
|
|
|
* without any interfarence.
|
|
|
|
* In other terms, we are sure that we do not release some the acquired
|
|
|
|
* resources twice.
|
|
|
|
*
|
|
|
|
* Note that if an attribute is configured both as required and as
|
|
|
|
* optional, it means that the user has messed something up in the
|
|
|
|
* seg6_action_table. Therefore, this check is required for SRv6
|
|
|
|
* behaviors to work properly.
|
|
|
|
*/
|
|
|
|
invalid_attrs = desc->attrs & desc->optattrs;
|
|
|
|
if (invalid_attrs) {
|
|
|
|
WARN_ONCE(1,
|
|
|
|
"An attribute cannot be both required AND optional");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* parse the required attributes */
|
2017-08-05 18:38:26 +08:00
|
|
|
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
|
|
|
|
if (desc->attrs & (1 << i)) {
|
|
|
|
if (!attrs[i])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
param = &seg6_action_params[i];
|
|
|
|
|
|
|
|
err = param->parse(attrs, slwt);
|
|
|
|
if (err < 0)
|
2020-12-02 21:05:12 +08:00
|
|
|
goto parse_attrs_err;
|
2017-08-05 18:38:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-02 21:05:12 +08:00
|
|
|
/* parse the optional attributes, if any */
|
|
|
|
err = parse_nla_optional_attrs(attrs, slwt);
|
|
|
|
if (err < 0)
|
|
|
|
goto parse_attrs_err;
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
return 0;
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
|
2020-12-02 21:05:12 +08:00
|
|
|
parse_attrs_err:
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
/* release any resource that may have been acquired during the i-1
|
|
|
|
* parse() operations.
|
|
|
|
*/
|
2020-12-02 21:05:12 +08:00
|
|
|
__destroy_attrs(desc->attrs, i, slwt);
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
|
|
|
|
return err;
|
2017-08-05 18:38:26 +08:00
|
|
|
}
|
|
|
|
|
2020-03-28 06:00:21 +08:00
|
|
|
static int seg6_local_build_state(struct net *net, struct nlattr *nla,
|
|
|
|
unsigned int family, const void *cfg,
|
|
|
|
struct lwtunnel_state **ts,
|
2017-08-05 18:38:26 +08:00
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[SEG6_LOCAL_MAX + 1];
|
|
|
|
struct lwtunnel_state *newts;
|
|
|
|
struct seg6_local_lwt *slwt;
|
|
|
|
int err;
|
|
|
|
|
2017-08-25 15:56:46 +08:00
|
|
|
if (family != AF_INET6)
|
|
|
|
return -EINVAL;
|
|
|
|
|
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
|
|
|
err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla,
|
|
|
|
seg6_local_policy, extack);
|
2017-08-05 18:38:26 +08:00
|
|
|
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (!tb[SEG6_LOCAL_ACTION])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
newts = lwtunnel_state_alloc(sizeof(*slwt));
|
|
|
|
if (!newts)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
slwt = seg6_local_lwtunnel(newts);
|
|
|
|
slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
|
|
|
|
|
|
|
|
err = parse_nla_action(tb, slwt);
|
|
|
|
if (err < 0)
|
|
|
|
goto out_free;
|
|
|
|
|
2020-12-02 21:05:13 +08:00
|
|
|
err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
|
|
|
|
if (err < 0)
|
|
|
|
goto out_destroy_attrs;
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
|
|
|
|
newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
|
|
|
|
newts->headroom = slwt->headroom;
|
|
|
|
|
|
|
|
*ts = newts;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
2020-12-02 21:05:13 +08:00
|
|
|
out_destroy_attrs:
|
|
|
|
destroy_attrs(slwt);
|
2017-08-05 18:38:26 +08:00
|
|
|
out_free:
|
|
|
|
kfree(newts);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
|
|
|
|
{
|
|
|
|
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
|
|
|
|
|
2020-12-02 21:05:13 +08:00
|
|
|
seg6_local_lwtunnel_destroy_state(slwt);
|
|
|
|
|
seg6: improve management of behavior attributes
Depending on the attribute (i.e.: SEG6_LOCAL_SRH, SEG6_LOCAL_TABLE, etc),
the parse() callback performs some validity checks on the provided input
and updates the tunnel state (slwt) with the result of the parsing
operation. However, an attribute may also need to reserve some additional
resources (i.e.: memory or setting up an eBPF program) in the parse()
callback to complete the parsing operation.
The parse() callbacks are invoked by the parse_nla_action() for each
attribute belonging to a specific behavior. Given a behavior with N
attributes, if the parsing of the i-th attribute fails, the
parse_nla_action() returns immediately with an error. Nonetheless, the
resources acquired during the parsing of the i-1 attributes are not freed
by the parse_nla_action().
Attributes which acquire resources must release them *in an explicit way*
in both the seg6_local_{build/destroy}_state(). However, adding a new
attribute of this type requires changes to
seg6_local_{build/destroy}_state() to release the resources correctly.
The seg6local infrastructure still lacks a simple and structured way to
release the resources acquired in the parse() operations.
We introduced a new callback in the struct seg6_action_param named
destroy(). This callback releases any resource which may have been acquired
in the parse() counterpart. Each attribute may or may not implement the
destroy() callback depending on whether it needs to free some acquired
resources.
The destroy() callback comes with several of advantages:
1) we can have many attributes as we want for a given behavior with no
need to explicitly free the taken resources;
2) As in case of the seg6_local_build_state(), the
seg6_local_destroy_state() does not need to handle the release of
resources directly. Indeed, it calls the destroy_attrs() function which
is in charge of calling the destroy() callback for every set attribute.
We do not need to patch seg6_local_{build/destroy}_state() anymore as
we add new attributes;
3) the code is more readable and better structured. Indeed, all the
information needed to handle a given attribute are contained in only
one place;
4) it facilitates the integration with new features introduced in further
patches.
Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-02 21:05:11 +08:00
|
|
|
destroy_attrs(slwt);
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
|
|
|
|
return;
|
2017-08-05 18:38:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int seg6_local_fill_encap(struct sk_buff *skb,
|
|
|
|
struct lwtunnel_state *lwt)
|
|
|
|
{
|
|
|
|
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
|
|
|
|
struct seg6_action_param *param;
|
2020-12-02 21:05:12 +08:00
|
|
|
unsigned long attrs;
|
2017-08-05 18:38:26 +08:00
|
|
|
int i, err;
|
|
|
|
|
|
|
|
if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2020-12-02 21:05:12 +08:00
|
|
|
attrs = slwt->desc->attrs | slwt->parsed_optattrs;
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
|
2020-12-02 21:05:12 +08:00
|
|
|
if (attrs & (1 << i)) {
|
2017-08-05 18:38:26 +08:00
|
|
|
param = &seg6_action_params[i];
|
|
|
|
err = param->put(skb, slwt);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
|
|
|
|
{
|
|
|
|
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
|
|
|
|
unsigned long attrs;
|
|
|
|
int nlsize;
|
|
|
|
|
|
|
|
nlsize = nla_total_size(4); /* action */
|
|
|
|
|
2020-12-02 21:05:12 +08:00
|
|
|
attrs = slwt->desc->attrs | slwt->parsed_optattrs;
|
2017-08-05 18:38:26 +08:00
|
|
|
|
|
|
|
if (attrs & (1 << SEG6_LOCAL_SRH))
|
|
|
|
nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
|
|
|
|
|
|
|
|
if (attrs & (1 << SEG6_LOCAL_TABLE))
|
|
|
|
nlsize += nla_total_size(4);
|
|
|
|
|
|
|
|
if (attrs & (1 << SEG6_LOCAL_NH4))
|
|
|
|
nlsize += nla_total_size(4);
|
|
|
|
|
|
|
|
if (attrs & (1 << SEG6_LOCAL_NH6))
|
|
|
|
nlsize += nla_total_size(16);
|
|
|
|
|
|
|
|
if (attrs & (1 << SEG6_LOCAL_IIF))
|
|
|
|
nlsize += nla_total_size(4);
|
|
|
|
|
|
|
|
if (attrs & (1 << SEG6_LOCAL_OIF))
|
|
|
|
nlsize += nla_total_size(4);
|
|
|
|
|
ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure.
This action works like any other seg6local End action, meaning that an IPv6
header with SRH is needed, whose DA has to be equal to the SID of the
action. It will also advance the SRH to the next segment, the BPF program
does not have to take care of this.
Since the BPF program may not be a source of instability in the kernel, it
is important to ensure that the integrity of the packet is maintained
before yielding it back to the IPv6 layer. The hook hence keeps track if
the SRH has been altered through the helpers, and re-validates its
content if needed with seg6_validate_srh. The state kept for validation is
stored in a per-CPU buffer. The BPF program is not allowed to directly
write into the packet, and only some fields of the SRH can be altered
through the helper bpf_lwt_seg6_store_bytes.
Performances profiling has shown that the SRH re-validation does not induce
a significant overhead. If the altered SRH is deemed as invalid, the packet
is dropped.
This validation is also done before executing any action through
bpf_lwt_seg6_action, and will not be performed again if the SRH is not
modified after calling the action.
The BPF program may return 3 types of return codes:
- BPF_OK: the End.BPF action will look up the next destination through
seg6_lookup_nexthop.
- BPF_REDIRECT: if an action has been executed through the
bpf_lwt_seg6_action helper, the BPF program should return this
value, as the skb's destination is already set and the default
lookup should not be performed.
- BPF_DROP : the packet will be dropped.
Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-20 21:58:16 +08:00
|
|
|
if (attrs & (1 << SEG6_LOCAL_BPF))
|
|
|
|
nlsize += nla_total_size(sizeof(struct nlattr)) +
|
|
|
|
nla_total_size(MAX_PROG_NAME) +
|
|
|
|
nla_total_size(4);
|
|
|
|
|
2020-12-02 21:05:14 +08:00
|
|
|
if (attrs & (1 << SEG6_LOCAL_VRFTABLE))
|
|
|
|
nlsize += nla_total_size(4);
|
|
|
|
|
2017-08-05 18:38:26 +08:00
|
|
|
return nlsize;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int seg6_local_cmp_encap(struct lwtunnel_state *a,
|
|
|
|
struct lwtunnel_state *b)
|
|
|
|
{
|
|
|
|
struct seg6_local_lwt *slwt_a, *slwt_b;
|
|
|
|
struct seg6_action_param *param;
|
2020-12-02 21:05:12 +08:00
|
|
|
unsigned long attrs_a, attrs_b;
|
2017-08-05 18:38:26 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
slwt_a = seg6_local_lwtunnel(a);
|
|
|
|
slwt_b = seg6_local_lwtunnel(b);
|
|
|
|
|
|
|
|
if (slwt_a->action != slwt_b->action)
|
|
|
|
return 1;
|
|
|
|
|
2020-12-02 21:05:12 +08:00
|
|
|
attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
|
|
|
|
attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
|
|
|
|
|
|
|
|
if (attrs_a != attrs_b)
|
2017-08-05 18:38:26 +08:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
|
2020-12-02 21:05:12 +08:00
|
|
|
if (attrs_a & (1 << i)) {
|
2017-08-05 18:38:26 +08:00
|
|
|
param = &seg6_action_params[i];
|
|
|
|
if (param->cmp(slwt_a, slwt_b))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct lwtunnel_encap_ops seg6_local_ops = {
|
|
|
|
.build_state = seg6_local_build_state,
|
|
|
|
.destroy_state = seg6_local_destroy_state,
|
|
|
|
.input = seg6_local_input,
|
|
|
|
.fill_encap = seg6_local_fill_encap,
|
|
|
|
.get_encap_size = seg6_local_get_encap_size,
|
|
|
|
.cmp_encap = seg6_local_cmp_encap,
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
};
|
|
|
|
|
|
|
|
int __init seg6_local_init(void)
|
|
|
|
{
|
|
|
|
return lwtunnel_encap_add_ops(&seg6_local_ops,
|
|
|
|
LWTUNNEL_ENCAP_SEG6_LOCAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void seg6_local_exit(void)
|
|
|
|
{
|
|
|
|
lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
|
|
|
|
}
|