From d4ac05ff3697e036dcb0e2e284c5f7eb77cc0966 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 16 Feb 2016 21:58:57 +0100 Subject: [PATCH 1/7] vxlan: introduce vxlan_hdr Currently, pointer to the vxlan header is kept in a local variable. It has to be reloaded whenever the pskb pull operations are performed which usually happens somewhere deep in called functions. Create a vxlan_hdr function and use it to reference the vxlan header instead. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 17 +++++++---------- include/net/vxlan.h | 5 +++++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ee1206d9f8df..524e3b139122 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1257,7 +1257,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct metadata_dst *tun_dst = NULL; struct vxlan_sock *vs; - struct vxlanhdr *vxh; u32 flags, vni; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; @@ -1266,9 +1265,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!pskb_may_pull(skb, VXLAN_HLEN)) goto error; - vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); - flags = ntohl(vxh->vx_flags); - vni = ntohl(vxh->vx_vni); + flags = ntohl(vxlan_hdr(skb)->vx_flags); + vni = ntohl(vxlan_hdr(skb)->vx_vni); if (flags & VXLAN_HF_VNI) { flags &= ~VXLAN_HF_VNI; @@ -1279,16 +1277,14 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) goto drop; - vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); vs = rcu_dereference_sk_user_data(sk); if (!vs) goto drop; if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { - vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni, - !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL)); - if (!vxh) + if (!vxlan_remcsum(skb, vxlan_hdr(skb), sizeof(struct vxlanhdr), vni, + !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL))) goto drop; flags &= ~VXLAN_HF_RCO; @@ -1313,7 +1309,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { struct vxlanhdr_gbp *gbp; - gbp = (struct vxlanhdr_gbp *)vxh; + gbp = (struct vxlanhdr_gbp *)vxlan_hdr(skb); md->gbp = ntohs(gbp->policy_id); if (tun_dst) @@ -1351,7 +1347,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) bad_flags: netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", - ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); + ntohl(vxlan_hdr(skb)->vx_flags), + ntohl(vxlan_hdr(skb)->vx_vni)); error: if (tun_dst) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b314e4af89c5..3f38b40ec4aa 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -262,6 +262,11 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, /* IPv6 header + UDP + VXLAN + Ethernet header */ #define VXLAN6_HEADROOM (40 + 8 + 8 + 14) +static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) +{ + return (struct vxlanhdr *)(udp_hdr(skb) + 1); +} + #if IS_ENABLED(CONFIG_VXLAN) void vxlan_get_rx_port(struct net_device *netdev); #else From 54bfd872bf16d40b61bd0cd9b769b2fef67dd272 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 16 Feb 2016 21:58:58 +0100 Subject: [PATCH 2/7] vxlan: keep flags and vni in network byte order Prevent repeated conversions from and to network order in the fast path. To achieve this, define all flag constants in big endian order and store VNI as __be32. To prevent confusion between the actual VNI value and the VNI field from the header (which contains additional reserved byte), strictly distinguish between "vni" and "vni_field". Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 115 ++++++++++++++++++++++---------------------- include/net/vxlan.h | 70 ++++++++++++++++++++++----- 2 files changed, 116 insertions(+), 69 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 524e3b139122..4e3d3dfe2a0e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -197,9 +197,9 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, #endif /* Virtual Network hash table head */ -static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id) +static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni) { - return &vs->vni_list[hash_32(id, VNI_HASH_BITS)]; + return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)]; } /* Socket hash table head */ @@ -242,12 +242,12 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, return NULL; } -static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) +static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni) { struct vxlan_dev *vxlan; - hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) { - if (vxlan->default_dst.remote_vni == id) + hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) { + if (vxlan->default_dst.remote_vni == vni) return vxlan; } @@ -255,7 +255,7 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) } /* Look up VNI in a per net namespace table */ -static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, +static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni, sa_family_t family, __be16 port, u32 flags) { @@ -265,7 +265,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, if (!vs) return NULL; - return vxlan_vs_find_vni(vs, id); + return vxlan_vs_find_vni(vs, vni); } /* Fill in neighbour message in skbuff. */ @@ -315,7 +315,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, nla_put_be16(skb, NDA_PORT, rdst->remote_port)) goto nla_put_failure; if (rdst->remote_vni != vxlan->default_dst.remote_vni && - nla_put_u32(skb, NDA_VNI, rdst->remote_vni)) + nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni))) goto nla_put_failure; if (rdst->remote_ifindex && nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) @@ -383,7 +383,7 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa) }; struct vxlan_rdst remote = { .remote_ip = *ipa, /* goes to NDA_DST */ - .remote_vni = VXLAN_N_VID, + .remote_vni = cpu_to_be32(VXLAN_N_VID), }; vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH); @@ -452,7 +452,7 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, /* caller should hold vxlan->hash_lock */ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, union vxlan_addr *ip, __be16 port, - __u32 vni, __u32 ifindex) + __be32 vni, __u32 ifindex) { struct vxlan_rdst *rd; @@ -469,7 +469,8 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, /* Replace destination of unicast mac */ static int vxlan_fdb_replace(struct vxlan_fdb *f, - union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex) + union vxlan_addr *ip, __be16 port, __be32 vni, + __u32 ifindex) { struct vxlan_rdst *rd; @@ -491,7 +492,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f, /* Add/update destinations for multicast */ static int vxlan_fdb_append(struct vxlan_fdb *f, - union vxlan_addr *ip, __be16 port, __u32 vni, + union vxlan_addr *ip, __be16 port, __be32 vni, __u32 ifindex, struct vxlan_rdst **rdp) { struct vxlan_rdst *rd; @@ -523,7 +524,8 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, unsigned int off, struct vxlanhdr *vh, size_t hdrlen, - u32 data, struct gro_remcsum *grc, + __be32 vni_field, + struct gro_remcsum *grc, bool nopartial) { size_t start, offset; @@ -534,10 +536,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, if (!NAPI_GRO_CB(skb)->csum_valid) return NULL; - start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; - offset = start + ((data & VXLAN_RCO_UDP) ? - offsetof(struct udphdr, check) : - offsetof(struct tcphdr, check)); + start = vxlan_rco_start(vni_field); + offset = start + vxlan_rco_offset(vni_field); vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen, start, offset, grc, nopartial); @@ -557,7 +557,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, int flush = 1; struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock, udp_offloads); - u32 flags; + __be32 flags; struct gro_remcsum grc; skb_gro_remcsum_init(&grc); @@ -573,11 +573,11 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr)); - flags = ntohl(vh->vx_flags); + flags = vh->vx_flags; if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr), - ntohl(vh->vx_vni), &grc, + vh->vx_vni, &grc, !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL)); @@ -668,7 +668,7 @@ static void vxlan_notify_del_rx_port(struct vxlan_sock *vs) static int vxlan_fdb_create(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, __u16 state, __u16 flags, - __be16 port, __u32 vni, __u32 ifindex, + __be16 port, __be32 vni, __u32 ifindex, __u8 ndm_flags) { struct vxlan_rdst *rd = NULL; @@ -777,7 +777,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) } static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, - union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex) + union vxlan_addr *ip, __be16 *port, __be32 *vni, + u32 *ifindex) { struct net *net = dev_net(vxlan->dev); int err; @@ -810,7 +811,7 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, if (tb[NDA_VNI]) { if (nla_len(tb[NDA_VNI]) != sizeof(u32)) return -EINVAL; - *vni = nla_get_u32(tb[NDA_VNI]); + *vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI])); } else { *vni = vxlan->default_dst.remote_vni; } @@ -840,7 +841,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* struct net *net = dev_net(vxlan->dev); */ union vxlan_addr ip; __be16 port; - u32 vni, ifindex; + __be32 vni; + u32 ifindex; int err; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { @@ -877,7 +879,8 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct vxlan_rdst *rd = NULL; union vxlan_addr ip; __be16 port; - u32 vni, ifindex; + __be32 vni; + u32 ifindex; int err; err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); @@ -1133,17 +1136,16 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) } static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, - size_t hdrlen, u32 data, bool nopartial) + size_t hdrlen, __be32 vni_field, + bool nopartial) { size_t start, offset, plen; if (skb->remcsum_offload) return vh; - start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; - offset = start + ((data & VXLAN_RCO_UDP) ? - offsetof(struct udphdr, check) : - offsetof(struct tcphdr, check)); + start = vxlan_rco_start(vni_field); + offset = start + vxlan_rco_offset(vni_field); plen = hdrlen + offset + sizeof(u16); @@ -1159,7 +1161,7 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, } static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, - struct vxlan_metadata *md, u32 vni, + struct vxlan_metadata *md, __be32 vni, struct metadata_dst *tun_dst) { struct iphdr *oip = NULL; @@ -1257,7 +1259,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct metadata_dst *tun_dst = NULL; struct vxlan_sock *vs; - u32 flags, vni; + __be32 flags, vni_field; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; @@ -1265,8 +1267,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!pskb_may_pull(skb, VXLAN_HLEN)) goto error; - flags = ntohl(vxlan_hdr(skb)->vx_flags); - vni = ntohl(vxlan_hdr(skb)->vx_vni); + flags = vxlan_hdr(skb)->vx_flags; + vni_field = vxlan_hdr(skb)->vx_vni; if (flags & VXLAN_HF_VNI) { flags &= ~VXLAN_HF_VNI; @@ -1283,17 +1285,18 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { - if (!vxlan_remcsum(skb, vxlan_hdr(skb), sizeof(struct vxlanhdr), vni, + if (!vxlan_remcsum(skb, vxlan_hdr(skb), sizeof(struct vxlanhdr), + vni_field, !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL))) goto drop; flags &= ~VXLAN_HF_RCO; - vni &= VXLAN_VNI_MASK; + vni_field &= VXLAN_VNI_MASK; } if (vxlan_collect_metadata(vs)) { tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, - cpu_to_be64(vni >> 8), sizeof(*md)); + vxlan_vni(vni_field), sizeof(*md)); if (!tun_dst) goto drop; @@ -1324,7 +1327,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) flags &= ~VXLAN_GBP_USED_BITS; } - if (flags || vni & ~VXLAN_VNI_MASK) { + if (flags || vni_field & ~VXLAN_VNI_MASK) { /* If there are any unprocessed flags remaining treat * this as a malformed packet. This behavior diverges from * VXLAN RFC (RFC7348) which stipulates that bits in reserved @@ -1337,7 +1340,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto bad_flags; } - vxlan_rcv(vs, skb, md, vni >> 8, tun_dst); + vxlan_rcv(vs, skb, md, vxlan_vni(vni_field), tun_dst); return 0; drop: @@ -1680,7 +1683,7 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, return; gbp = (struct vxlanhdr_gbp *)vxh; - vxh->vx_flags |= htonl(VXLAN_HF_GBP); + vxh->vx_flags |= VXLAN_HF_GBP; if (md->gbp & VXLAN_GBP_DONT_LEARN) gbp->dont_learn = 1; @@ -1700,7 +1703,6 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, int min_headroom; int err; int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; - u16 hdrlen = sizeof(struct vxlanhdr); if ((vxflags & VXLAN_F_REMCSUM_TX) && skb->ip_summed == CHECKSUM_PARTIAL) { @@ -1733,18 +1735,15 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, return PTR_ERR(skb); vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); - vxh->vx_flags = htonl(VXLAN_HF_VNI); - vxh->vx_vni = vni; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vni); if (type & SKB_GSO_TUNNEL_REMCSUM) { - u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> - VXLAN_RCO_SHIFT; + unsigned int start; - if (skb->csum_offset == offsetof(struct udphdr, check)) - data |= VXLAN_RCO_UDP; - - vxh->vx_vni |= htonl(data); - vxh->vx_flags |= htonl(VXLAN_HF_RCO); + start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr); + vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset); + vxh->vx_flags |= VXLAN_HF_RCO; if (!skb_is_gso(skb)) { skb->ip_summed = CHECKSUM_NONE; @@ -1892,7 +1891,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; - u32 vni; + __be32 vni; __be16 df = 0; __u8 tos, ttl; int err; @@ -1914,7 +1913,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto drop; } dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - vni = be64_to_cpu(info->key.tun_id); + vni = vxlan_tun_id_to_vni(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); if (remote_ip.sa.sa_family == AF_INET) remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; @@ -2007,7 +2006,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr), - htonl(vni << 8), md, flags, udp_sum); + vni, md, flags, udp_sum); if (err < 0) goto xmit_tx_error; @@ -2065,7 +2064,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ttl = ttl ? : ip6_dst_hoplimit(ndst); skb_scrub_packet(skb, xnet); err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), - htonl(vni << 8), md, flags, udp_sum); + vni, md, flags, udp_sum); if (err < 0) { dst_release(ndst); return; @@ -2222,7 +2221,7 @@ static void vxlan_cleanup(unsigned long arg) static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); - __u32 vni = vxlan->default_dst.remote_vni; + __be32 vni = vxlan->default_dst.remote_vni; spin_lock(&vn->sock_lock); hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni)); @@ -2837,7 +2836,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, memset(&conf, 0, sizeof(conf)); if (data[IFLA_VXLAN_ID]) - conf.vni = nla_get_u32(data[IFLA_VXLAN_ID]); + conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID])); if (data[IFLA_VXLAN_GROUP]) { conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]); @@ -2941,7 +2940,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, break; case -EEXIST: - pr_info("duplicate VNI %u\n", conf.vni); + pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni)); break; } @@ -2999,7 +2998,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) .high = htons(vxlan->cfg.port_max), }; - if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni)) + if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni))) goto nla_put_failure; if (!vxlan_addr_any(&dst->remote_ip)) { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 3f38b40ec4aa..1b85a3b40c5a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -24,11 +24,11 @@ struct vxlanhdr { }; /* VXLAN header flags. */ -#define VXLAN_HF_VNI BIT(27) +#define VXLAN_HF_VNI cpu_to_be32(BIT(27)) #define VXLAN_N_VID (1u << 24) #define VXLAN_VID_MASK (VXLAN_N_VID - 1) -#define VXLAN_VNI_MASK (VXLAN_VID_MASK << 8) +#define VXLAN_VNI_MASK cpu_to_be32(VXLAN_VID_MASK << 8) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) #define VNI_HASH_BITS 10 @@ -55,14 +55,14 @@ struct vxlanhdr { */ /* VXLAN-RCO header flags. */ -#define VXLAN_HF_RCO BIT(21) +#define VXLAN_HF_RCO cpu_to_be32(BIT(21)) /* Remote checksum offload header option */ -#define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ -#define VXLAN_RCO_UDP 0x80 /* Indicate UDP RCO (TCP when not set *) */ -#define VXLAN_RCO_SHIFT 1 /* Left shift of start */ +#define VXLAN_RCO_MASK cpu_to_be32(0x7f) /* Last byte of vni field */ +#define VXLAN_RCO_UDP cpu_to_be32(0x80) /* Indicate UDP RCO (TCP when not set *) */ +#define VXLAN_RCO_SHIFT 1 /* Left shift of start */ #define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) -#define VXLAN_MAX_REMCSUM_START (VXLAN_RCO_MASK << VXLAN_RCO_SHIFT) +#define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT) /* * VXLAN Group Based Policy Extension (VXLAN_F_GBP): @@ -105,9 +105,9 @@ struct vxlanhdr_gbp { }; /* VXLAN-GBP header flags. */ -#define VXLAN_HF_GBP BIT(31) +#define VXLAN_HF_GBP cpu_to_be32(BIT(31)) -#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF) +#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF)) /* skb->mark mapping * @@ -144,7 +144,7 @@ union vxlan_addr { struct vxlan_rdst { union vxlan_addr remote_ip; __be16 remote_port; - u32 remote_vni; + __be32 remote_vni; u32 remote_ifindex; struct list_head list; struct rcu_head rcu; @@ -154,7 +154,7 @@ struct vxlan_rdst { struct vxlan_config { union vxlan_addr remote_ip; union vxlan_addr saddr; - u32 vni; + __be32 vni; int remote_ifindex; int mtu; __be16 dst_port; @@ -267,6 +267,54 @@ static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) return (struct vxlanhdr *)(udp_hdr(skb) + 1); } +static inline __be32 vxlan_vni(__be32 vni_field) +{ +#if defined(__BIG_ENDIAN) + return vni_field >> 8; +#else + return (vni_field & VXLAN_VNI_MASK) << 8; +#endif +} + +static inline __be32 vxlan_vni_field(__be32 vni) +{ +#if defined(__BIG_ENDIAN) + return vni << 8; +#else + return vni >> 8; +#endif +} + +static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) +{ +#if defined(__BIG_ENDIAN) + return tun_id; +#else + return tun_id >> 32; +#endif +} + +static inline size_t vxlan_rco_start(__be32 vni_field) +{ + return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; +} + +static inline size_t vxlan_rco_offset(__be32 vni_field) +{ + return (vni_field & VXLAN_RCO_UDP) ? + offsetof(struct udphdr, check) : + offsetof(struct tcphdr, check); +} + +static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) +{ + __be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT); + + if (offset == offsetof(struct udphdr, check)) + vni_field |= VXLAN_RCO_UDP; + return vni_field; +} + #if IS_ENABLED(CONFIG_VXLAN) void vxlan_get_rx_port(struct net_device *netdev); #else From be5cfeab8f95995d5590ab919b9f4dde19d50ea7 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 16 Feb 2016 21:58:59 +0100 Subject: [PATCH 3/7] vxlan: simplify vxlan_remcsum Part of the parameters is not needed. Simplify the caller of this function in preparation of making vxlan rx more comprehensible. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 4e3d3dfe2a0e..161e39ce3914 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1135,29 +1135,25 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) return ret; } -static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, - size_t hdrlen, __be32 vni_field, - bool nopartial) +static bool vxlan_remcsum(struct sk_buff *skb, u32 vxflags, __be32 vni_field) { size_t start, offset, plen; if (skb->remcsum_offload) - return vh; + return true; start = vxlan_rco_start(vni_field); offset = start + vxlan_rco_offset(vni_field); - plen = hdrlen + offset + sizeof(u16); + plen = sizeof(struct vxlanhdr) + offset + sizeof(u16); if (!pskb_may_pull(skb, plen)) - return NULL; + return false; - vh = (struct vxlanhdr *)(udp_hdr(skb) + 1); + skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset, + !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL)); - skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset, - nopartial); - - return vh; + return true; } static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, @@ -1285,9 +1281,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { - if (!vxlan_remcsum(skb, vxlan_hdr(skb), sizeof(struct vxlanhdr), - vni_field, - !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL))) + if (!vxlan_remcsum(skb, vs->flags, vni_field)) goto drop; flags &= ~VXLAN_HF_RCO; From 3288af0892e322b019a4c98810959b2a1c6dae7b Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 16 Feb 2016 21:59:00 +0100 Subject: [PATCH 4/7] vxlan: move GBP header parsing to a separate function To make vxlan_udp_encap_recv shorter and more comprehensible. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 161e39ce3914..f65a923c2999 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1156,6 +1156,24 @@ static bool vxlan_remcsum(struct sk_buff *skb, u32 vxflags, __be32 vni_field) return true; } +static void vxlan_parse_gbp_hdr(struct sk_buff *skb, struct vxlan_metadata *md, + struct metadata_dst *tun_dst) +{ + struct vxlanhdr_gbp *gbp; + + gbp = (struct vxlanhdr_gbp *)vxlan_hdr(skb); + md->gbp = ntohs(gbp->policy_id); + + if (tun_dst) + tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; + + if (gbp->dont_learn) + md->gbp |= VXLAN_GBP_DONT_LEARN; + + if (gbp->policy_applied) + md->gbp |= VXLAN_GBP_POLICY_APPLIED; +} + static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, struct vxlan_metadata *md, __be32 vni, struct metadata_dst *tun_dst) @@ -1304,20 +1322,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) * used by VXLAN extensions if explicitly requested. */ if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { - struct vxlanhdr_gbp *gbp; - - gbp = (struct vxlanhdr_gbp *)vxlan_hdr(skb); - md->gbp = ntohs(gbp->policy_id); - - if (tun_dst) - tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; - - if (gbp->dont_learn) - md->gbp |= VXLAN_GBP_DONT_LEARN; - - if (gbp->policy_applied) - md->gbp |= VXLAN_GBP_POLICY_APPLIED; - + vxlan_parse_gbp_hdr(skb, md, tun_dst); flags &= ~VXLAN_GBP_USED_BITS; } From f14ecebb3a4e83eb6233e0167aa4ba675c99e514 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 16 Feb 2016 21:59:01 +0100 Subject: [PATCH 5/7] vxlan: clean up extension handling on rx Bring the extension handling to a single place and move the actual handling logic out of vxlan_udp_encap_recv as much as possible. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 62 +++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f65a923c2999..ac688dc75c66 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1135,15 +1135,16 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) return ret; } -static bool vxlan_remcsum(struct sk_buff *skb, u32 vxflags, __be32 vni_field) +static bool vxlan_remcsum(struct vxlanhdr *unparsed, + struct sk_buff *skb, u32 vxflags) { size_t start, offset, plen; - if (skb->remcsum_offload) - return true; + if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload) + goto out; - start = vxlan_rco_start(vni_field); - offset = start + vxlan_rco_offset(vni_field); + start = vxlan_rco_start(unparsed->vx_vni); + offset = start + vxlan_rco_offset(unparsed->vx_vni); plen = sizeof(struct vxlanhdr) + offset + sizeof(u16); @@ -1152,16 +1153,21 @@ static bool vxlan_remcsum(struct sk_buff *skb, u32 vxflags, __be32 vni_field) skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset, !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL)); - +out: + unparsed->vx_flags &= ~VXLAN_HF_RCO; + unparsed->vx_vni &= VXLAN_VNI_MASK; return true; } -static void vxlan_parse_gbp_hdr(struct sk_buff *skb, struct vxlan_metadata *md, +static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, + struct vxlan_metadata *md, struct metadata_dst *tun_dst) { - struct vxlanhdr_gbp *gbp; + struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed; + + if (!(unparsed->vx_flags & VXLAN_HF_GBP)) + goto out; - gbp = (struct vxlanhdr_gbp *)vxlan_hdr(skb); md->gbp = ntohs(gbp->policy_id); if (tun_dst) @@ -1172,6 +1178,9 @@ static void vxlan_parse_gbp_hdr(struct sk_buff *skb, struct vxlan_metadata *md, if (gbp->policy_applied) md->gbp |= VXLAN_GBP_POLICY_APPLIED; + +out: + unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS; } static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, @@ -1273,7 +1282,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct metadata_dst *tun_dst = NULL; struct vxlan_sock *vs; - __be32 flags, vni_field; + struct vxlanhdr unparsed; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; @@ -1281,11 +1290,10 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!pskb_may_pull(skb, VXLAN_HLEN)) goto error; - flags = vxlan_hdr(skb)->vx_flags; - vni_field = vxlan_hdr(skb)->vx_vni; - - if (flags & VXLAN_HF_VNI) { - flags &= ~VXLAN_HF_VNI; + unparsed = *vxlan_hdr(skb); + if (unparsed.vx_flags & VXLAN_HF_VNI) { + unparsed.vx_flags &= ~VXLAN_HF_VNI; + unparsed.vx_vni &= ~VXLAN_VNI_MASK; } else { /* VNI flag always required to be set */ goto bad_flags; @@ -1298,17 +1306,10 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!vs) goto drop; - if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { - if (!vxlan_remcsum(skb, vs->flags, vni_field)) - goto drop; - - flags &= ~VXLAN_HF_RCO; - vni_field &= VXLAN_VNI_MASK; - } - if (vxlan_collect_metadata(vs)) { tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, - vxlan_vni(vni_field), sizeof(*md)); + vxlan_vni(vxlan_hdr(skb)->vx_vni), + sizeof(*md)); if (!tun_dst) goto drop; @@ -1321,12 +1322,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) /* For backwards compatibility, only allow reserved fields to be * used by VXLAN extensions if explicitly requested. */ - if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { - vxlan_parse_gbp_hdr(skb, md, tun_dst); - flags &= ~VXLAN_GBP_USED_BITS; - } + if (vs->flags & VXLAN_F_REMCSUM_RX) + if (!vxlan_remcsum(&unparsed, skb, vs->flags)) + goto drop; + if (vs->flags & VXLAN_F_GBP) + vxlan_parse_gbp_hdr(&unparsed, md, tun_dst); - if (flags || vni_field & ~VXLAN_VNI_MASK) { + if (unparsed.vx_flags || unparsed.vx_vni) { /* If there are any unprocessed flags remaining treat * this as a malformed packet. This behavior diverges from * VXLAN RFC (RFC7348) which stipulates that bits in reserved @@ -1339,7 +1341,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto bad_flags; } - vxlan_rcv(vs, skb, md, vxlan_vni(vni_field), tun_dst); + vxlan_rcv(vs, skb, md, vxlan_vni(vxlan_hdr(skb)->vx_vni), tun_dst); return 0; drop: From 288b01c8c46eda01cd3921c6e3aca6293e57cee8 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 16 Feb 2016 21:59:02 +0100 Subject: [PATCH 6/7] vxlan: clean up rx error path When there are unrecognized flags present in the vxlan header, it doesn't make much sense to return the packet for further UDP processing, especially considering that for other invalid flag combinations we drop the packet because of previous checks. This means we return positive value only at the beginning of the function where tun_dst is not yet allocated. This allows us to get rid of the bad_flags and error jump labels. When we're dropping packet, we need to free tun_dst now. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ac688dc75c66..dfbb3cbd14b5 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1288,16 +1288,19 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) - goto error; + return 1; unparsed = *vxlan_hdr(skb); - if (unparsed.vx_flags & VXLAN_HF_VNI) { - unparsed.vx_flags &= ~VXLAN_HF_VNI; - unparsed.vx_vni &= ~VXLAN_VNI_MASK; - } else { - /* VNI flag always required to be set */ - goto bad_flags; + /* VNI flag always required to be set */ + if (!(unparsed.vx_flags & VXLAN_HF_VNI)) { + netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", + ntohl(vxlan_hdr(skb)->vx_flags), + ntohl(vxlan_hdr(skb)->vx_vni)); + /* Return non vxlan pkt */ + return 1; } + unparsed.vx_flags &= ~VXLAN_HF_VNI; + unparsed.vx_vni &= ~VXLAN_VNI_MASK; if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) goto drop; @@ -1337,29 +1340,19 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) * is more robust and provides a little more security in * adding extensions to VXLAN. */ - - goto bad_flags; + goto drop; } vxlan_rcv(vs, skb, md, vxlan_vni(vxlan_hdr(skb)->vx_vni), tun_dst); return 0; drop: - /* Consume bad packet */ - kfree_skb(skb); - return 0; - -bad_flags: - netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", - ntohl(vxlan_hdr(skb)->vx_flags), - ntohl(vxlan_hdr(skb)->vx_vni)); - -error: if (tun_dst) dst_release((struct dst_entry *)tun_dst); - /* Return non vxlan pkt */ - return 1; + /* Consume bad packet */ + kfree_skb(skb); + return 0; } static int arp_reduce(struct net_device *dev, struct sk_buff *skb) From b9167b2e775999d267874b5a1a3a79020f54753a Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 16 Feb 2016 21:59:03 +0100 Subject: [PATCH 7/7] vxlan: treat vni in metadata based tunnels consistently For metadata based tunnels, VNI is ignored when doing vxlan device lookups (because such tunnel receives all VNIs). However, this was not honored by vxlan_xmit_one when doing encapsulation bypass. Move the check for metadata based tunnel to the common place where it belongs. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index dfbb3cbd14b5..3a84680b5117 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -246,6 +246,10 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni) { struct vxlan_dev *vxlan; + /* For flow based devices, map all packets to VNI 0 */ + if (vs->flags & VXLAN_F_COLLECT_METADATA) + vni = 0; + hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) { if (vxlan->default_dst.remote_vni == vni) return vxlan; @@ -1194,10 +1198,6 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, union vxlan_addr saddr; int err = 0; - /* For flow based devices, map all packets to VNI 0 */ - if (vs->flags & VXLAN_F_COLLECT_METADATA) - vni = 0; - /* Is this VNI defined? */ vxlan = vxlan_vs_find_vni(vs, vni); if (!vxlan)