diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 65439188c582..65f52472a52c 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1684,18 +1684,14 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); } -#if IS_ENABLED(CONFIG_IPV6) -static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, - struct net_device *dev, struct in6_addr *saddr, - struct in6_addr *daddr, __u8 prio, __u8 ttl, - __be16 src_port, __be16 dst_port, __be32 vni, - struct vxlan_metadata *md, bool xnet, u32 vxflags) +static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, + int iphdr_len, __be32 vni, + struct vxlan_metadata *md, u32 vxflags, + bool udp_sum) { struct vxlanhdr *vxh; int min_headroom; int err; - bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX); int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; u16 hdrlen = sizeof(struct vxlanhdr); @@ -1712,93 +1708,8 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, } } - skb_scrub_packet(skb, xnet); - min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len - + VXLAN_HLEN + sizeof(struct ipv6hdr) - + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); - - /* Need space for new headers (invalidates iph ptr) */ - err = skb_cow_head(skb, min_headroom); - if (unlikely(err)) { - kfree_skb(skb); - goto err; - } - - skb = vlan_hwaccel_push_inside(skb); - if (WARN_ON(!skb)) { - err = -ENOMEM; - goto err; - } - - skb = iptunnel_handle_offloads(skb, udp_sum, type); - if (IS_ERR(skb)) { - err = -EINVAL; - goto err; - } - - vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); - vxh->vx_flags = htonl(VXLAN_HF_VNI); - vxh->vx_vni = vni; - - if (type & SKB_GSO_TUNNEL_REMCSUM) { - u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> - VXLAN_RCO_SHIFT; - - if (skb->csum_offset == offsetof(struct udphdr, check)) - data |= VXLAN_RCO_UDP; - - vxh->vx_vni |= htonl(data); - vxh->vx_flags |= htonl(VXLAN_HF_RCO); - - if (!skb_is_gso(skb)) { - skb->ip_summed = CHECKSUM_NONE; - skb->encapsulation = 0; - } - } - - if (vxflags & VXLAN_F_GBP) - vxlan_build_gbp_hdr(vxh, vxflags, md); - - skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - - udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio, - ttl, src_port, dst_port, - !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX)); - return 0; -err: - dst_release(dst); - return err; -} -#endif - -static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni, - struct vxlan_metadata *md, bool xnet, u32 vxflags) -{ - struct vxlanhdr *vxh; - int min_headroom; - int err; - bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM); - int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; - u16 hdrlen = sizeof(struct vxlanhdr); - - if ((vxflags & VXLAN_F_REMCSUM_TX) && - skb->ip_summed == CHECKSUM_PARTIAL) { - int csum_start = skb_checksum_start_offset(skb); - - if (csum_start <= VXLAN_MAX_REMCSUM_START && - !(csum_start & VXLAN_RCO_SHIFT_MASK) && - (skb->csum_offset == offsetof(struct udphdr, check) || - skb->csum_offset == offsetof(struct tcphdr, check))) { - udp_sum = false; - type |= SKB_GSO_TUNNEL_REMCSUM; - } - } - - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len - + VXLAN_HLEN + sizeof(struct iphdr) + + VXLAN_HLEN + iphdr_len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); /* Need space for new headers (invalidates iph ptr) */ @@ -1840,13 +1751,30 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk vxlan_build_gbp_hdr(vxh, vxflags, md); skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - - udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df, - src_port, dst_port, xnet, - !(vxflags & VXLAN_F_UDP_CSUM)); return 0; } +static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, + struct sk_buff *skb, int oif, u8 tos, + __be32 daddr, __be32 *saddr) +{ + struct rtable *rt = NULL; + struct flowi4 fl4; + + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_oif = oif; + fl4.flowi4_tos = RT_TOS(tos); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = IPPROTO_UDP; + fl4.daddr = daddr; + fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr; + + rt = ip_route_output_key(vxlan->net, &fl4); + if (!IS_ERR(rt)) + *saddr = fl4.saddr; + return rt; +} + #if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, struct sk_buff *skb, int oif, @@ -1928,7 +1856,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct sock *sk; struct rtable *rt = NULL; const struct iphdr *old_iph; - struct flowi4 fl4; union vxlan_addr *dst; union vxlan_addr remote_ip; struct vxlan_metadata _md; @@ -1939,6 +1866,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, __u8 tos, ttl; int err; u32 flags = vxlan->flags; + bool udp_sum = false; + bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev)); info = skb_tunnel_info(skb); @@ -1987,6 +1916,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (info) { ttl = info->key.ttl; tos = info->key.tos; + udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); if (info->options_len) md = ip_tunnel_info_opts(info); @@ -1995,6 +1925,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (dst->sa.sa_family == AF_INET) { + __be32 saddr; + if (!vxlan->vn4_sock) goto drop; sk = vxlan->vn4_sock->sock->sk; @@ -2002,22 +1934,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (info) { if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) df = htons(IP_DF); - - if (info->key.tun_flags & TUNNEL_CSUM) - flags |= VXLAN_F_UDP_CSUM; - else - flags &= ~VXLAN_F_UDP_CSUM; + } else { + udp_sum = !!(flags & VXLAN_F_UDP_CSUM); } - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0; - fl4.flowi4_tos = RT_TOS(tos); - fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = IPPROTO_UDP; - fl4.daddr = dst->sin.sin_addr.s_addr; - fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr; - - rt = ip_route_output_key(vxlan->net, &fl4); + rt = vxlan_get_route(vxlan, skb, + rdst ? rdst->remote_ifindex : 0, tos, + dst->sin.sin_addr.s_addr, &saddr); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", &dst->sin.sin_addr.s_addr); @@ -2049,16 +1972,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); - err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr, - dst->sin.sin_addr.s_addr, tos, ttl, df, - src_port, dst_port, htonl(vni << 8), md, - !net_eq(vxlan->net, dev_net(vxlan->dev)), - flags); - if (err < 0) { - /* skb is already freed. */ - skb = NULL; - goto rt_tx_error; - } + err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr), + htonl(vni << 8), md, flags, udp_sum); + if (err < 0) + goto xmit_tx_error; + + udp_tunnel_xmit_skb(rt, sk, skb, saddr, + dst->sin.sin_addr.s_addr, tos, ttl, df, + src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; @@ -2103,18 +2024,20 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } - if (info) { - if (info->key.tun_flags & TUNNEL_CSUM) - flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX; - else - flags |= VXLAN_F_UDP_ZERO_CSUM6_TX; - } + if (!info) + udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); ttl = ttl ? : ip6_dst_hoplimit(ndst); - err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr, - 0, ttl, src_port, dst_port, htonl(vni << 8), md, - !net_eq(vxlan->net, dev_net(vxlan->dev)), - flags); + skb_scrub_packet(skb, xnet); + err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), + htonl(vni << 8), md, flags, udp_sum); + if (err < 0) { + dst_release(ndst); + return; + } + udp_tunnel6_xmit_skb(ndst, sk, skb, dev, + &saddr, &dst->sin6.sin6_addr, + 0, ttl, src_port, dst_port, !udp_sum); #endif } @@ -2124,6 +2047,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dev->stats.tx_dropped++; goto tx_free; +xmit_tx_error: + /* skb is already freed. */ + skb = NULL; rt_tx_error: ip_rt_put(rt); tx_error: @@ -2390,31 +2316,6 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb, - struct ip_tunnel_info *info, - __be16 sport, __be16 dport) -{ - struct vxlan_dev *vxlan = netdev_priv(dev); - struct rtable *rt; - struct flowi4 fl4; - - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_tos = RT_TOS(info->key.tos); - fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = IPPROTO_UDP; - fl4.daddr = info->key.u.ipv4.dst; - - rt = ip_route_output_key(vxlan->net, &fl4); - if (IS_ERR(rt)) - return PTR_ERR(rt); - ip_rt_put(rt); - - info->key.u.ipv4.src = fl4.saddr; - info->key.tp_src = sport; - info->key.tp_dst = dport; - return 0; -} - static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct vxlan_dev *vxlan = netdev_priv(dev); @@ -2426,9 +2327,16 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) dport = info->key.tp_dst ? : vxlan->cfg.dst_port; if (ip_tunnel_info_af(info) == AF_INET) { + struct rtable *rt; + if (!vxlan->vn4_sock) return -EINVAL; - return egress_ipv4_tun_info(dev, skb, info, sport, dport); + rt = vxlan_get_route(vxlan, skb, 0, info->key.tos, + info->key.u.ipv4.dst, + &info->key.u.ipv4.src); + if (IS_ERR(rt)) + return PTR_ERR(rt); + ip_rt_put(rt); } else { #if IS_ENABLED(CONFIG_IPV6) struct dst_entry *ndst; @@ -2441,13 +2349,12 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) if (IS_ERR(ndst)) return PTR_ERR(ndst); dst_release(ndst); - - info->key.tp_src = sport; - info->key.tp_dst = dport; #else /* !CONFIG_IPV6 */ return -EPFNOSUPPORT; #endif } + info->key.tp_src = sport; + info->key.tp_dst = dport; return 0; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0fb86442544b..25bd919c9ef0 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -9,17 +9,71 @@ #include #include -#define VNI_HASH_BITS 10 -#define VNI_HASH_SIZE (1<mark mapping @@ -62,44 +119,6 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) -/* VXLAN protocol header: - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * |G|R|R|R|I|R|R|C| Reserved | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | VXLAN Network Identifier (VNI) | Reserved | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * G = 1 Group Policy (VXLAN-GBP) - * I = 1 VXLAN Network Identifier (VNI) present - * C = 1 Remote checksum offload (RCO) - */ -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -}; - -/* VXLAN header flags. */ -#define VXLAN_HF_RCO BIT(21) -#define VXLAN_HF_VNI BIT(27) -#define VXLAN_HF_GBP BIT(31) - -/* Remote checksum offload header option */ -#define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ -#define VXLAN_RCO_UDP 0x80 /* Indicate UDP RCO (TCP when not set *) */ -#define VXLAN_RCO_SHIFT 1 /* Left shift of start */ -#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) -#define VXLAN_MAX_REMCSUM_START (VXLAN_RCO_MASK << VXLAN_RCO_SHIFT) - -#define VXLAN_N_VID (1u << 24) -#define VXLAN_VID_MASK (VXLAN_N_VID - 1) -#define VXLAN_VNI_MASK (VXLAN_VID_MASK << 8) -#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) - -#define VNI_HASH_BITS 10 -#define VNI_HASH_SIZE (1<