Merge branch 'vxlan_group_policy_extension'
Thomas Graf says: ==================== VXLAN Group Policy Extension Implements supports for the Group Policy VXLAN extension [0] to provide a lightweight and simple security label mechanism across network peers based on VXLAN. The security context and associated metadata is mapped to/from skb->mark. This allows further mapping to a SELinux context using SECMARK, to implement ACLs directly with nftables, iptables, OVS, tc, etc. The extension is disabled by default and should be run on a distinct port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs which ignore unknown reserved bits will be able to receive VXLAN-GBP frames. Simple usage example: 10.1.1.1: # ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp # iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200 10.1.1.2: # ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp # iptables -I INPUT -m mark --mark 0x200 -j DROP iproute2 [1] and OVS [2] support will be provided in separate patches. [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy [1] https://github.com/tgraf/iproute2/tree/vxlan-gbp [2] https://github.com/tgraf/ovs/tree/vxlan-gbp ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
2e62fa699f
|
@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
|
|||
return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
|
||||
}
|
||||
|
||||
/* Find VXLAN socket based on network namespace, address family and UDP port */
|
||||
static struct vxlan_sock *vxlan_find_sock(struct net *net,
|
||||
sa_family_t family, __be16 port)
|
||||
/* Find VXLAN socket based on network namespace, address family and UDP port
|
||||
* and enabled unshareable flags.
|
||||
*/
|
||||
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
|
||||
__be16 port, u32 flags)
|
||||
{
|
||||
struct vxlan_sock *vs;
|
||||
u32 match_flags = flags & VXLAN_F_UNSHAREABLE;
|
||||
|
||||
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
|
||||
if (inet_sk(vs->sock->sk)->inet_sport == port &&
|
||||
inet_sk(vs->sock->sk)->sk.sk_family == family)
|
||||
inet_sk(vs->sock->sk)->sk.sk_family == family &&
|
||||
(vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
|
||||
return vs;
|
||||
}
|
||||
return NULL;
|
||||
|
@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
|
|||
|
||||
/* Look up VNI in a per net namespace table */
|
||||
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
|
||||
sa_family_t family, __be16 port)
|
||||
sa_family_t family, __be16 port,
|
||||
u32 flags)
|
||||
{
|
||||
struct vxlan_sock *vs;
|
||||
|
||||
vs = vxlan_find_sock(net, family, port);
|
||||
vs = vxlan_find_sock(net, family, port, flags);
|
||||
if (!vs)
|
||||
return NULL;
|
||||
|
||||
|
@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
|
|||
continue;
|
||||
|
||||
vh2 = (struct vxlanhdr *)(p->data + off_vx);
|
||||
if (vh->vx_vni != vh2->vx_vni) {
|
||||
if (vh->vx_flags != vh2->vx_flags ||
|
||||
vh->vx_vni != vh2->vx_vni) {
|
||||
NAPI_GRO_CB(p)->same_flow = 0;
|
||||
continue;
|
||||
}
|
||||
|
@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|||
struct vxlan_sock *vs;
|
||||
struct vxlanhdr *vxh;
|
||||
u32 flags, vni;
|
||||
struct vxlan_metadata md = {0};
|
||||
|
||||
/* Need Vxlan and inner Ethernet header to be present */
|
||||
if (!pskb_may_pull(skb, VXLAN_HLEN))
|
||||
|
@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|||
vni &= VXLAN_VID_MASK;
|
||||
}
|
||||
|
||||
/* For backwards compatibility, only allow reserved fields to be
|
||||
* used by VXLAN extensions if explicitly requested.
|
||||
*/
|
||||
if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
|
||||
struct vxlanhdr_gbp *gbp;
|
||||
|
||||
gbp = (struct vxlanhdr_gbp *)vxh;
|
||||
md.gbp = ntohs(gbp->policy_id);
|
||||
|
||||
if (gbp->dont_learn)
|
||||
md.gbp |= VXLAN_GBP_DONT_LEARN;
|
||||
|
||||
if (gbp->policy_applied)
|
||||
md.gbp |= VXLAN_GBP_POLICY_APPLIED;
|
||||
|
||||
flags &= ~VXLAN_GBP_USED_BITS;
|
||||
}
|
||||
|
||||
if (flags || (vni & ~VXLAN_VID_MASK)) {
|
||||
/* If there are any unprocessed flags remaining treat
|
||||
* this as a malformed packet. This behavior diverges from
|
||||
|
@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|||
goto bad_flags;
|
||||
}
|
||||
|
||||
vs->rcv(vs, skb, vxh->vx_vni);
|
||||
md.vni = vxh->vx_vni;
|
||||
vs->rcv(vs, skb, &md);
|
||||
return 0;
|
||||
|
||||
drop:
|
||||
|
@ -1246,8 +1272,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void vxlan_rcv(struct vxlan_sock *vs,
|
||||
struct sk_buff *skb, __be32 vx_vni)
|
||||
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
|
||||
struct vxlan_metadata *md)
|
||||
{
|
||||
struct iphdr *oip = NULL;
|
||||
struct ipv6hdr *oip6 = NULL;
|
||||
|
@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
|
|||
int err = 0;
|
||||
union vxlan_addr *remote_ip;
|
||||
|
||||
vni = ntohl(vx_vni) >> 8;
|
||||
vni = ntohl(md->vni) >> 8;
|
||||
/* Is this VNI defined? */
|
||||
vxlan = vxlan_vs_find_vni(vs, vni);
|
||||
if (!vxlan)
|
||||
|
@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
|
|||
goto drop;
|
||||
|
||||
skb_reset_network_header(skb);
|
||||
skb->mark = md->gbp;
|
||||
|
||||
if (oip6)
|
||||
err = IP6_ECN_decapsulate(oip6, skb);
|
||||
|
@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
|
|||
return false;
|
||||
}
|
||||
|
||||
static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
|
||||
struct vxlan_metadata *md)
|
||||
{
|
||||
struct vxlanhdr_gbp *gbp;
|
||||
|
||||
gbp = (struct vxlanhdr_gbp *)vxh;
|
||||
vxh->vx_flags |= htonl(VXLAN_HF_GBP);
|
||||
|
||||
if (md->gbp & VXLAN_GBP_DONT_LEARN)
|
||||
gbp->dont_learn = 1;
|
||||
|
||||
if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
|
||||
gbp->policy_applied = 1;
|
||||
|
||||
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
static int vxlan6_xmit_skb(struct vxlan_sock *vs,
|
||||
struct dst_entry *dst, struct sk_buff *skb,
|
||||
struct net_device *dev, struct in6_addr *saddr,
|
||||
struct in6_addr *daddr, __u8 prio, __u8 ttl,
|
||||
__be16 src_port, __be16 dst_port, __be32 vni,
|
||||
bool xnet)
|
||||
__be16 src_port, __be16 dst_port,
|
||||
struct vxlan_metadata *md, bool xnet)
|
||||
{
|
||||
struct vxlanhdr *vxh;
|
||||
int min_headroom;
|
||||
|
@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
|
|||
|
||||
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
|
||||
vxh->vx_flags = htonl(VXLAN_HF_VNI);
|
||||
vxh->vx_vni = vni;
|
||||
vxh->vx_vni = md->vni;
|
||||
|
||||
if (type & SKB_GSO_TUNNEL_REMCSUM) {
|
||||
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
|
||||
|
@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
|
|||
}
|
||||
}
|
||||
|
||||
if (vs->flags & VXLAN_F_GBP)
|
||||
vxlan_build_gbp_hdr(vxh, vs, md);
|
||||
|
||||
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
|
||||
|
||||
udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
|
||||
|
@ -1728,7 +1775,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
|
|||
int vxlan_xmit_skb(struct vxlan_sock *vs,
|
||||
struct rtable *rt, struct sk_buff *skb,
|
||||
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
|
||||
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
|
||||
__be16 src_port, __be16 dst_port,
|
||||
struct vxlan_metadata *md, bool xnet)
|
||||
{
|
||||
struct vxlanhdr *vxh;
|
||||
int min_headroom;
|
||||
|
@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
|
|||
|
||||
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
|
||||
vxh->vx_flags = htonl(VXLAN_HF_VNI);
|
||||
vxh->vx_vni = vni;
|
||||
vxh->vx_vni = md->vni;
|
||||
|
||||
if (type & SKB_GSO_TUNNEL_REMCSUM) {
|
||||
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
|
||||
|
@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
|
|||
}
|
||||
}
|
||||
|
||||
if (vs->flags & VXLAN_F_GBP)
|
||||
vxlan_build_gbp_hdr(vxh, vs, md);
|
||||
|
||||
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
|
||||
|
||||
return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
|
||||
|
@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|||
const struct iphdr *old_iph;
|
||||
struct flowi4 fl4;
|
||||
union vxlan_addr *dst;
|
||||
struct vxlan_metadata md;
|
||||
__be16 src_port = 0, dst_port;
|
||||
u32 vni;
|
||||
__be16 df = 0;
|
||||
|
@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|||
|
||||
ip_rt_put(rt);
|
||||
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
|
||||
dst->sa.sa_family, dst_port);
|
||||
dst->sa.sa_family, dst_port,
|
||||
vxlan->flags);
|
||||
if (!dst_vxlan)
|
||||
goto tx_error;
|
||||
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
|
||||
|
@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|||
|
||||
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
|
||||
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
|
||||
md.vni = htonl(vni << 8);
|
||||
md.gbp = skb->mark;
|
||||
|
||||
err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
|
||||
fl4.saddr, dst->sin.sin_addr.s_addr,
|
||||
tos, ttl, df, src_port, dst_port,
|
||||
htonl(vni << 8),
|
||||
tos, ttl, df, src_port, dst_port, &md,
|
||||
!net_eq(vxlan->net, dev_net(vxlan->dev)));
|
||||
if (err < 0) {
|
||||
/* skb is already freed. */
|
||||
|
@ -1968,7 +2022,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|||
|
||||
dst_release(ndst);
|
||||
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
|
||||
dst->sa.sa_family, dst_port);
|
||||
dst->sa.sa_family, dst_port,
|
||||
vxlan->flags);
|
||||
if (!dst_vxlan)
|
||||
goto tx_error;
|
||||
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
|
||||
|
@ -1976,10 +2031,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|||
}
|
||||
|
||||
ttl = ttl ? : ip6_dst_hoplimit(ndst);
|
||||
md.vni = htonl(vni << 8);
|
||||
md.gbp = skb->mark;
|
||||
|
||||
err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
|
||||
dev, &fl6.saddr, &fl6.daddr, 0, ttl,
|
||||
src_port, dst_port, htonl(vni << 8),
|
||||
src_port, dst_port, &md,
|
||||
!net_eq(vxlan->net, dev_net(vxlan->dev)));
|
||||
#endif
|
||||
}
|
||||
|
@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev)
|
|||
|
||||
spin_lock(&vn->sock_lock);
|
||||
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
|
||||
vxlan->dst_port);
|
||||
vxlan->dst_port, vxlan->flags);
|
||||
if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
|
||||
/* If we have a socket with same port already, reuse it */
|
||||
vxlan_vs_add_dev(vs, vxlan);
|
||||
|
@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
|
|||
[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
|
||||
[IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
|
||||
[IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
|
||||
[IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
|
||||
};
|
||||
|
||||
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
|
||||
|
@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
|
|||
return vs;
|
||||
|
||||
spin_lock(&vn->sock_lock);
|
||||
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
|
||||
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
|
||||
if (vs && ((vs->rcv != rcv) ||
|
||||
!atomic_add_unless(&vs->refcnt, 1, 0)))
|
||||
vs = ERR_PTR(-EBUSY);
|
||||
|
@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
|
|||
nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
|
||||
vxlan->flags |= VXLAN_F_REMCSUM_RX;
|
||||
|
||||
if (data[IFLA_VXLAN_GBP])
|
||||
vxlan->flags |= VXLAN_F_GBP;
|
||||
|
||||
if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
|
||||
vxlan->dst_port)) {
|
||||
vxlan->dst_port, vxlan->flags)) {
|
||||
pr_info("duplicate VNI %u\n", vni);
|
||||
return -EEXIST;
|
||||
}
|
||||
|
@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
|
|||
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (vxlan->flags & VXLAN_F_GBP &&
|
||||
nla_put_flag(skb, IFLA_VXLAN_GBP))
|
||||
goto nla_put_failure;
|
||||
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
|
|
|
@ -97,7 +97,10 @@ struct ip_tunnel {
|
|||
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
|
||||
#define TUNNEL_OAM __cpu_to_be16(0x0200)
|
||||
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
|
||||
#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800)
|
||||
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
|
||||
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
|
||||
|
||||
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
|
||||
|
||||
struct tnl_ptk_info {
|
||||
__be16 flags;
|
||||
|
|
|
@ -11,15 +11,76 @@
|
|||
#define VNI_HASH_BITS 10
|
||||
#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
|
||||
|
||||
/* VXLAN protocol header */
|
||||
/*
|
||||
* VXLAN Group Based Policy Extension:
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
* |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R| Group Policy ID |
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
* | VXLAN Network Identifier (VNI) | Reserved |
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
*
|
||||
* D = Don't Learn bit. When set, this bit indicates that the egress
|
||||
* VTEP MUST NOT learn the source address of the encapsulated frame.
|
||||
*
|
||||
* A = Indicates that the group policy has already been applied to
|
||||
* this packet. Policies MUST NOT be applied by devices when the
|
||||
* A bit is set.
|
||||
*
|
||||
* [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
|
||||
*/
|
||||
struct vxlanhdr_gbp {
|
||||
__u8 vx_flags;
|
||||
#ifdef __LITTLE_ENDIAN_BITFIELD
|
||||
__u8 reserved_flags1:3,
|
||||
policy_applied:1,
|
||||
reserved_flags2:2,
|
||||
dont_learn:1,
|
||||
reserved_flags3:1;
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
__u8 reserved_flags1:1,
|
||||
dont_learn:1,
|
||||
reserved_flags2:2,
|
||||
policy_applied:1,
|
||||
reserved_flags3:3;
|
||||
#else
|
||||
#error "Please fix <asm/byteorder.h>"
|
||||
#endif
|
||||
__be16 policy_id;
|
||||
__be32 vx_vni;
|
||||
};
|
||||
|
||||
#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF)
|
||||
|
||||
/* skb->mark mapping
|
||||
*
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
* |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID |
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
*/
|
||||
#define VXLAN_GBP_DONT_LEARN (BIT(6) << 16)
|
||||
#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
|
||||
#define VXLAN_GBP_ID_MASK (0xFFFF)
|
||||
|
||||
/* VXLAN protocol header:
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
* |G|R|R|R|I|R|R|C| Reserved |
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
* | VXLAN Network Identifier (VNI) | Reserved |
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
*
|
||||
* G = 1 Group Policy (VXLAN-GBP)
|
||||
* I = 1 VXLAN Network Identifier (VNI) present
|
||||
* C = 1 Remote checksum offload (RCO)
|
||||
*/
|
||||
struct vxlanhdr {
|
||||
__be32 vx_flags;
|
||||
__be32 vx_vni;
|
||||
};
|
||||
|
||||
/* VXLAN header flags. */
|
||||
#define VXLAN_HF_VNI 0x08000000
|
||||
#define VXLAN_HF_RCO 0x00200000
|
||||
#define VXLAN_HF_RCO BIT(24)
|
||||
#define VXLAN_HF_VNI BIT(27)
|
||||
#define VXLAN_HF_GBP BIT(31)
|
||||
|
||||
/* Remote checksum offload header option */
|
||||
#define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */
|
||||
|
@ -32,8 +93,14 @@ struct vxlanhdr {
|
|||
#define VXLAN_VID_MASK (VXLAN_N_VID - 1)
|
||||
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
|
||||
|
||||
struct vxlan_metadata {
|
||||
__be32 vni;
|
||||
u32 gbp;
|
||||
};
|
||||
|
||||
struct vxlan_sock;
|
||||
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
|
||||
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
|
||||
struct vxlan_metadata *md);
|
||||
|
||||
/* per UDP socket information */
|
||||
struct vxlan_sock {
|
||||
|
@ -60,6 +127,10 @@ struct vxlan_sock {
|
|||
#define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
|
||||
#define VXLAN_F_REMCSUM_TX 0x200
|
||||
#define VXLAN_F_REMCSUM_RX 0x400
|
||||
#define VXLAN_F_GBP 0x800
|
||||
|
||||
/* These flags must match in order for a socket to be shareable */
|
||||
#define VXLAN_F_UNSHAREABLE VXLAN_F_GBP
|
||||
|
||||
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
|
||||
vxlan_rcv_t *rcv, void *data,
|
||||
|
@ -70,7 +141,8 @@ void vxlan_sock_release(struct vxlan_sock *vs);
|
|||
int vxlan_xmit_skb(struct vxlan_sock *vs,
|
||||
struct rtable *rt, struct sk_buff *skb,
|
||||
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
|
||||
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
|
||||
__be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
|
||||
bool xnet);
|
||||
|
||||
static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
|
||||
netdev_features_t features)
|
||||
|
|
|
@ -372,6 +372,7 @@ enum {
|
|||
IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
|
||||
IFLA_VXLAN_REMCSUM_TX,
|
||||
IFLA_VXLAN_REMCSUM_RX,
|
||||
IFLA_VXLAN_GBP,
|
||||
__IFLA_VXLAN_MAX
|
||||
};
|
||||
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
|
||||
|
|
|
@ -252,11 +252,21 @@ enum ovs_vport_attr {
|
|||
|
||||
#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
|
||||
|
||||
enum {
|
||||
OVS_VXLAN_EXT_UNSPEC,
|
||||
OVS_VXLAN_EXT_GBP, /* Flag or __u32 */
|
||||
__OVS_VXLAN_EXT_MAX,
|
||||
};
|
||||
|
||||
#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
|
||||
|
||||
|
||||
/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
|
||||
*/
|
||||
enum {
|
||||
OVS_TUNNEL_ATTR_UNSPEC,
|
||||
OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
|
||||
OVS_TUNNEL_ATTR_EXTENSION,
|
||||
__OVS_TUNNEL_ATTR_MAX
|
||||
};
|
||||
|
||||
|
@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr {
|
|||
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */
|
||||
OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */
|
||||
OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */
|
||||
OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */
|
||||
__OVS_TUNNEL_KEY_ATTR_MAX
|
||||
};
|
||||
|
||||
|
|
|
@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
|
|||
BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
|
||||
8)) - 1
|
||||
> sizeof(key->tun_opts));
|
||||
memcpy(GENEVE_OPTS(key, tun_info->options_len),
|
||||
memcpy(TUN_METADATA_OPTS(key, tun_info->options_len),
|
||||
tun_info->options, tun_info->options_len);
|
||||
key->tun_opts_len = tun_info->options_len;
|
||||
} else {
|
||||
|
|
|
@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel {
|
|||
|
||||
struct ovs_tunnel_info {
|
||||
struct ovs_key_ipv4_tunnel tunnel;
|
||||
const struct geneve_opt *options;
|
||||
const void *options;
|
||||
u8 options_len;
|
||||
};
|
||||
|
||||
|
@ -61,10 +61,10 @@ struct ovs_tunnel_info {
|
|||
* maximum size. This allows us to get the benefits of variable length
|
||||
* matching for small options.
|
||||
*/
|
||||
#define GENEVE_OPTS(flow_key, opt_len) \
|
||||
((struct geneve_opt *)((flow_key)->tun_opts + \
|
||||
FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
|
||||
opt_len))
|
||||
#define TUN_METADATA_OFFSET(opt_len) \
|
||||
(FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
|
||||
#define TUN_METADATA_OPTS(flow_key, opt_len) \
|
||||
((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
|
||||
|
||||
static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
|
||||
__be32 saddr, __be32 daddr,
|
||||
|
@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
|
|||
__be16 tp_dst,
|
||||
__be64 tun_id,
|
||||
__be16 tun_flags,
|
||||
const struct geneve_opt *opts,
|
||||
const void *opts,
|
||||
u8 opts_len)
|
||||
{
|
||||
tun_info->tunnel.tun_id = tun_id;
|
||||
|
@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
|
|||
__be16 tp_dst,
|
||||
__be64 tun_id,
|
||||
__be16 tun_flags,
|
||||
const struct geneve_opt *opts,
|
||||
const void *opts,
|
||||
u8 opts_len)
|
||||
{
|
||||
__ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
|
||||
|
|
|
@ -49,6 +49,14 @@
|
|||
#include <net/mpls.h>
|
||||
|
||||
#include "flow_netlink.h"
|
||||
#include "vport-vxlan.h"
|
||||
|
||||
struct ovs_len_tbl {
|
||||
int len;
|
||||
const struct ovs_len_tbl *next;
|
||||
};
|
||||
|
||||
#define OVS_ATTR_NESTED -1
|
||||
|
||||
static void update_range(struct sw_flow_match *match,
|
||||
size_t offset, size_t size, bool is_mask)
|
||||
|
@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void)
|
|||
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
|
||||
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
|
||||
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
|
||||
/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
|
||||
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
|
||||
*/
|
||||
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
|
||||
+ nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
|
||||
}
|
||||
|
@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void)
|
|||
+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
|
||||
}
|
||||
|
||||
static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
|
||||
[OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) },
|
||||
[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) },
|
||||
[OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) },
|
||||
[OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 },
|
||||
[OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 },
|
||||
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
|
||||
[OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 },
|
||||
[OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) },
|
||||
[OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) },
|
||||
[OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 },
|
||||
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED },
|
||||
[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED },
|
||||
};
|
||||
|
||||
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
|
||||
static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
|
||||
[OVS_KEY_ATTR_ENCAP] = -1,
|
||||
[OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
|
||||
[OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
|
||||
[OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
|
||||
[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
|
||||
[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
|
||||
[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
|
||||
[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
|
||||
[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
|
||||
[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
|
||||
[OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
|
||||
[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
|
||||
[OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
|
||||
[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
|
||||
[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
|
||||
[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
|
||||
[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
|
||||
[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
|
||||
[OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
|
||||
[OVS_KEY_ATTR_TUNNEL] = -1,
|
||||
[OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
|
||||
static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
|
||||
[OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED },
|
||||
[OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) },
|
||||
[OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) },
|
||||
[OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) },
|
||||
[OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) },
|
||||
[OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) },
|
||||
[OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
|
||||
[OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) },
|
||||
[OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) },
|
||||
[OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) },
|
||||
[OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
|
||||
[OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) },
|
||||
[OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) },
|
||||
[OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) },
|
||||
[OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) },
|
||||
[OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) },
|
||||
[OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) },
|
||||
[OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
|
||||
[OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
|
||||
[OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
|
||||
.next = ovs_tunnel_key_lens, },
|
||||
[OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
|
||||
};
|
||||
|
||||
static bool is_all_zero(const u8 *fp, size_t size)
|
||||
|
@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
expected_len = ovs_key_lens[type];
|
||||
if (nla_len(nla) != expected_len && expected_len != -1) {
|
||||
expected_len = ovs_key_lens[type].len;
|
||||
if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) {
|
||||
OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
|
||||
type, nla_len(nla), expected_len);
|
||||
return -EINVAL;
|
||||
|
@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
|
|||
SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
|
||||
}
|
||||
|
||||
opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
|
||||
nla_len(a));
|
||||
opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
|
||||
SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
|
||||
nla_len(a), is_mask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
|
||||
[OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 },
|
||||
};
|
||||
|
||||
static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
|
||||
struct sw_flow_match *match, bool is_mask,
|
||||
bool log)
|
||||
{
|
||||
struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
|
||||
unsigned long opt_key_offset;
|
||||
struct ovs_vxlan_opts opts;
|
||||
int err;
|
||||
|
||||
BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
|
||||
|
||||
err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
memset(&opts, 0, sizeof(opts));
|
||||
|
||||
if (tb[OVS_VXLAN_EXT_GBP])
|
||||
opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
|
||||
|
||||
if (!is_mask)
|
||||
SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
|
||||
else
|
||||
SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
|
||||
|
||||
opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
|
||||
SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
|
||||
is_mask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ipv4_tun_from_nlattr(const struct nlattr *attr,
|
||||
struct sw_flow_match *match, bool is_mask,
|
||||
bool log)
|
||||
|
@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
|
|||
int rem;
|
||||
bool ttl = false;
|
||||
__be16 tun_flags = 0;
|
||||
int opts_type = 0;
|
||||
|
||||
nla_for_each_nested(a, attr, rem) {
|
||||
int type = nla_type(a);
|
||||
int err;
|
||||
|
||||
static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
|
||||
[OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
|
||||
[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
|
||||
[OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
|
||||
[OVS_TUNNEL_KEY_ATTR_TOS] = 1,
|
||||
[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
|
||||
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
|
||||
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
|
||||
[OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
|
||||
[OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
|
||||
[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
|
||||
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
|
||||
};
|
||||
|
||||
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
|
||||
OVS_NLERR(log, "Tunnel attr %d out of range max %d",
|
||||
type, OVS_TUNNEL_KEY_ATTR_MAX);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ovs_tunnel_key_lens[type] != nla_len(a) &&
|
||||
ovs_tunnel_key_lens[type] != -1) {
|
||||
if (ovs_tunnel_key_lens[type].len != nla_len(a) &&
|
||||
ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) {
|
||||
OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
|
||||
type, nla_len(a), ovs_tunnel_key_lens[type]);
|
||||
type, nla_len(a), ovs_tunnel_key_lens[type].len);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
|
|||
tun_flags |= TUNNEL_OAM;
|
||||
break;
|
||||
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
|
||||
if (opts_type) {
|
||||
OVS_NLERR(log, "Multiple metadata blocks provided");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
tun_flags |= TUNNEL_OPTIONS_PRESENT;
|
||||
tun_flags |= TUNNEL_GENEVE_OPT;
|
||||
opts_type = type;
|
||||
break;
|
||||
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
|
||||
if (opts_type) {
|
||||
OVS_NLERR(log, "Multiple metadata blocks provided");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
tun_flags |= TUNNEL_VXLAN_OPT;
|
||||
opts_type = type;
|
||||
break;
|
||||
default:
|
||||
OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
|
||||
|
@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
|
|||
}
|
||||
}
|
||||
|
||||
return opts_type;
|
||||
}
|
||||
|
||||
static int vxlan_opt_to_nlattr(struct sk_buff *skb,
|
||||
const void *tun_opts, int swkey_tun_opts_len)
|
||||
{
|
||||
const struct ovs_vxlan_opts *opts = tun_opts;
|
||||
struct nlattr *nla;
|
||||
|
||||
nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
|
||||
if (!nla)
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
|
||||
return -EMSGSIZE;
|
||||
|
||||
nla_nest_end(skb, nla);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
|
||||
const struct ovs_key_ipv4_tunnel *output,
|
||||
const struct geneve_opt *tun_opts,
|
||||
int swkey_tun_opts_len)
|
||||
const void *tun_opts, int swkey_tun_opts_len)
|
||||
{
|
||||
if (output->tun_flags & TUNNEL_KEY &&
|
||||
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
|
||||
|
@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
|
|||
if ((output->tun_flags & TUNNEL_OAM) &&
|
||||
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
|
||||
return -EMSGSIZE;
|
||||
if (tun_opts &&
|
||||
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
|
||||
swkey_tun_opts_len, tun_opts))
|
||||
return -EMSGSIZE;
|
||||
if (tun_opts) {
|
||||
if (output->tun_flags & TUNNEL_GENEVE_OPT &&
|
||||
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
|
||||
swkey_tun_opts_len, tun_opts))
|
||||
return -EMSGSIZE;
|
||||
else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
|
||||
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
|
||||
const struct ovs_key_ipv4_tunnel *output,
|
||||
const struct geneve_opt *tun_opts,
|
||||
int swkey_tun_opts_len)
|
||||
const void *tun_opts, int swkey_tun_opts_len)
|
||||
{
|
||||
struct nlattr *nla;
|
||||
int err;
|
||||
|
@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
|
|||
}
|
||||
if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
|
||||
if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
|
||||
is_mask, log))
|
||||
is_mask, log) < 0)
|
||||
return -EINVAL;
|
||||
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
|
||||
}
|
||||
|
@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
|
||||
static void nlattr_set(struct nlattr *attr, u8 val,
|
||||
const struct ovs_len_tbl *tbl)
|
||||
{
|
||||
struct nlattr *nla;
|
||||
int rem;
|
||||
|
||||
/* The nlattr stream should already have been validated */
|
||||
nla_for_each_nested(nla, attr, rem) {
|
||||
/* We assume that ovs_key_lens[type] == -1 means that type is a
|
||||
* nested attribute
|
||||
*/
|
||||
if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1)
|
||||
nlattr_set(nla, val, false);
|
||||
if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
|
||||
nlattr_set(nla, val, tbl[nla_type(nla)].next);
|
||||
else
|
||||
memset(nla_data(nla), val, nla_len(nla));
|
||||
}
|
||||
|
@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
|
|||
|
||||
static void mask_set_nlattr(struct nlattr *attr, u8 val)
|
||||
{
|
||||
nlattr_set(attr, val, true);
|
||||
nlattr_set(attr, val, ovs_key_lens);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1148,10 +1233,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
|
|||
goto nla_put_failure;
|
||||
|
||||
if ((swkey->tun_key.ipv4_dst || is_mask)) {
|
||||
const struct geneve_opt *opts = NULL;
|
||||
const void *opts = NULL;
|
||||
|
||||
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
|
||||
opts = GENEVE_OPTS(output, swkey->tun_opts_len);
|
||||
opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
|
||||
|
||||
if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
|
||||
swkey->tun_opts_len))
|
||||
|
@ -1540,6 +1625,34 @@ void ovs_match_init(struct sw_flow_match *match,
|
|||
}
|
||||
}
|
||||
|
||||
static int validate_geneve_opts(struct sw_flow_key *key)
|
||||
{
|
||||
struct geneve_opt *option;
|
||||
int opts_len = key->tun_opts_len;
|
||||
bool crit_opt = false;
|
||||
|
||||
option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
|
||||
while (opts_len > 0) {
|
||||
int len;
|
||||
|
||||
if (opts_len < sizeof(*option))
|
||||
return -EINVAL;
|
||||
|
||||
len = sizeof(*option) + option->length * 4;
|
||||
if (len > opts_len)
|
||||
return -EINVAL;
|
||||
|
||||
crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
|
||||
|
||||
option = (struct geneve_opt *)((u8 *)option + len);
|
||||
opts_len -= len;
|
||||
};
|
||||
|
||||
key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int validate_and_copy_set_tun(const struct nlattr *attr,
|
||||
struct sw_flow_actions **sfa, bool log)
|
||||
{
|
||||
|
@ -1547,36 +1660,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
|
|||
struct sw_flow_key key;
|
||||
struct ovs_tunnel_info *tun_info;
|
||||
struct nlattr *a;
|
||||
int err, start;
|
||||
int err, start, opts_type;
|
||||
|
||||
ovs_match_init(&match, &key, NULL);
|
||||
err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
|
||||
if (err)
|
||||
return err;
|
||||
opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
|
||||
if (opts_type < 0)
|
||||
return opts_type;
|
||||
|
||||
if (key.tun_opts_len) {
|
||||
struct geneve_opt *option = GENEVE_OPTS(&key,
|
||||
key.tun_opts_len);
|
||||
int opts_len = key.tun_opts_len;
|
||||
bool crit_opt = false;
|
||||
|
||||
while (opts_len > 0) {
|
||||
int len;
|
||||
|
||||
if (opts_len < sizeof(*option))
|
||||
return -EINVAL;
|
||||
|
||||
len = sizeof(*option) + option->length * 4;
|
||||
if (len > opts_len)
|
||||
return -EINVAL;
|
||||
|
||||
crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
|
||||
|
||||
option = (struct geneve_opt *)((u8 *)option + len);
|
||||
opts_len -= len;
|
||||
};
|
||||
|
||||
key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
|
||||
switch (opts_type) {
|
||||
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
|
||||
err = validate_geneve_opts(&key);
|
||||
if (err < 0)
|
||||
return err;
|
||||
break;
|
||||
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
|
||||
|
@ -1597,9 +1697,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
|
|||
* everything else will go away after flow setup. We can append
|
||||
* it to tun_info and then point there.
|
||||
*/
|
||||
memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
|
||||
key.tun_opts_len);
|
||||
tun_info->options = (struct geneve_opt *)(tun_info + 1);
|
||||
memcpy((tun_info + 1),
|
||||
TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len);
|
||||
tun_info->options = (tun_info + 1);
|
||||
} else {
|
||||
tun_info->options = NULL;
|
||||
}
|
||||
|
@ -1622,8 +1722,8 @@ static int validate_set(const struct nlattr *a,
|
|||
return -EINVAL;
|
||||
|
||||
if (key_type > OVS_KEY_ATTR_MAX ||
|
||||
(ovs_key_lens[key_type] != nla_len(ovs_key) &&
|
||||
ovs_key_lens[key_type] != -1))
|
||||
(ovs_key_lens[key_type].len != nla_len(ovs_key) &&
|
||||
ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
|
||||
return -EINVAL;
|
||||
|
||||
switch (key_type) {
|
||||
|
|
|
@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
|
|||
|
||||
opts_len = geneveh->opt_len * 4;
|
||||
|
||||
flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
|
||||
flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
|
||||
(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
|
||||
(geneveh->oam ? TUNNEL_OAM : 0) |
|
||||
(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
|
||||
|
@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
|
|||
__be16 sport;
|
||||
struct rtable *rt;
|
||||
struct flowi4 fl;
|
||||
u8 vni[3];
|
||||
u8 vni[3], opts_len, *opts;
|
||||
__be16 df;
|
||||
int err;
|
||||
|
||||
|
@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
|
|||
tunnel_id_to_vni(tun_key->tun_id, vni);
|
||||
skb->ignore_df = 1;
|
||||
|
||||
if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
|
||||
opts = (u8 *)tun_info->options;
|
||||
opts_len = tun_info->options_len;
|
||||
} else {
|
||||
opts = NULL;
|
||||
opts_len = 0;
|
||||
}
|
||||
|
||||
err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
|
||||
tun_key->ipv4_dst, tun_key->ipv4_tos,
|
||||
tun_key->ipv4_ttl, df, sport, dport,
|
||||
tun_key->tun_flags, vni,
|
||||
tun_info->options_len, (u8 *)tun_info->options,
|
||||
tun_key->tun_flags, vni, opts_len, opts,
|
||||
false);
|
||||
if (err < 0)
|
||||
ip_rt_put(rt);
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
|
||||
#include "datapath.h"
|
||||
#include "vport.h"
|
||||
#include "vport-vxlan.h"
|
||||
|
||||
/**
|
||||
* struct vxlan_port - Keeps track of open UDP ports
|
||||
|
@ -49,6 +50,7 @@
|
|||
struct vxlan_port {
|
||||
struct vxlan_sock *vs;
|
||||
char name[IFNAMSIZ];
|
||||
u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
|
||||
};
|
||||
|
||||
static struct vport_ops ovs_vxlan_vport_ops;
|
||||
|
@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
|
|||
}
|
||||
|
||||
/* Called with rcu_read_lock and BH disabled. */
|
||||
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
|
||||
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
|
||||
struct vxlan_metadata *md)
|
||||
{
|
||||
struct ovs_tunnel_info tun_info;
|
||||
struct vxlan_port *vxlan_port;
|
||||
struct vport *vport = vs->data;
|
||||
struct iphdr *iph;
|
||||
struct ovs_vxlan_opts opts = {
|
||||
.gbp = md->gbp,
|
||||
};
|
||||
__be64 key;
|
||||
__be16 flags;
|
||||
|
||||
flags = TUNNEL_KEY;
|
||||
vxlan_port = vxlan_vport(vport);
|
||||
if (vxlan_port->exts & VXLAN_F_GBP)
|
||||
flags |= TUNNEL_VXLAN_OPT;
|
||||
|
||||
/* Save outer tunnel values */
|
||||
iph = ip_hdr(skb);
|
||||
key = cpu_to_be64(ntohl(vx_vni) >> 8);
|
||||
key = cpu_to_be64(ntohl(md->vni) >> 8);
|
||||
ovs_flow_tun_info_init(&tun_info, iph,
|
||||
udp_hdr(skb)->source, udp_hdr(skb)->dest,
|
||||
key, TUNNEL_KEY, NULL, 0);
|
||||
key, flags, &opts, sizeof(opts));
|
||||
|
||||
ovs_vport_receive(vport, skb, &tun_info);
|
||||
}
|
||||
|
@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
|
|||
|
||||
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (vxlan_port->exts) {
|
||||
struct nlattr *exts;
|
||||
|
||||
exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
|
||||
if (!exts)
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (vxlan_port->exts & VXLAN_F_GBP &&
|
||||
nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
|
||||
return -EMSGSIZE;
|
||||
|
||||
nla_nest_end(skb, exts);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport)
|
|||
ovs_vport_deferred_free(vport);
|
||||
}
|
||||
|
||||
static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
|
||||
[OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
|
||||
};
|
||||
|
||||
static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
|
||||
{
|
||||
struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
|
||||
struct vxlan_port *vxlan_port;
|
||||
int err;
|
||||
|
||||
if (nla_len(attr) < sizeof(struct nlattr))
|
||||
return -EINVAL;
|
||||
|
||||
err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
vxlan_port = vxlan_vport(vport);
|
||||
|
||||
if (exts[OVS_VXLAN_EXT_GBP])
|
||||
vxlan_port->exts |= VXLAN_F_GBP;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
|
||||
{
|
||||
struct net *net = ovs_dp_get_net(parms->dp);
|
||||
|
@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
|
|||
vxlan_port = vxlan_vport(vport);
|
||||
strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
|
||||
|
||||
vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
|
||||
a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
|
||||
if (a) {
|
||||
err = vxlan_configure_exts(vport, a);
|
||||
if (err) {
|
||||
ovs_vport_free(vport);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
|
||||
vxlan_port->exts);
|
||||
if (IS_ERR(vs)) {
|
||||
ovs_vport_free(vport);
|
||||
return (void *)vs;
|
||||
|
@ -140,12 +203,28 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int vxlan_ext_gbp(struct sk_buff *skb)
|
||||
{
|
||||
const struct ovs_tunnel_info *tun_info;
|
||||
const struct ovs_vxlan_opts *opts;
|
||||
|
||||
tun_info = OVS_CB(skb)->egress_tun_info;
|
||||
opts = tun_info->options;
|
||||
|
||||
if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
|
||||
tun_info->options_len >= sizeof(*opts))
|
||||
return opts->gbp;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
|
||||
{
|
||||
struct net *net = ovs_dp_get_net(vport->dp);
|
||||
struct vxlan_port *vxlan_port = vxlan_vport(vport);
|
||||
__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
|
||||
const struct ovs_key_ipv4_tunnel *tun_key;
|
||||
struct vxlan_metadata md = {0};
|
||||
struct rtable *rt;
|
||||
struct flowi4 fl;
|
||||
__be16 src_port;
|
||||
|
@ -170,12 +249,14 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
|
|||
skb->ignore_df = 1;
|
||||
|
||||
src_port = udp_flow_src_port(net, skb, 0, 0, true);
|
||||
md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
|
||||
md.gbp = vxlan_ext_gbp(skb);
|
||||
|
||||
err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
|
||||
fl.saddr, tun_key->ipv4_dst,
|
||||
tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
|
||||
src_port, dst_port,
|
||||
htonl(be64_to_cpu(tun_key->tun_id) << 8),
|
||||
&md,
|
||||
false);
|
||||
if (err < 0)
|
||||
ip_rt_put(rt);
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
#ifndef VPORT_VXLAN_H
|
||||
#define VPORT_VXLAN_H 1
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct ovs_vxlan_opts {
|
||||
__u32 gbp;
|
||||
};
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue