From b7fe10e5ebac2a3f37e95535e616494b65fa020f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 19 Aug 2015 17:07:32 -0700 Subject: [PATCH 1/3] gro: Fix remcsum offload to deal with frags in GRO The remote checksum offload GRO did not consider the case that frag0 might be in use. This patch fixes that by accessing headers using the skb_gro functions and not saving offsets relative to skb->head. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 23 ++++++++------------ include/linux/netdevice.h | 44 ++++++++++++++++++++++++++++----------- net/ipv4/fou.c | 28 +++++++++++-------------- 3 files changed, 53 insertions(+), 42 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 54615bb9d916..64fcd2402562 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -519,10 +519,10 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, u32 data, struct gro_remcsum *grc, bool nopartial) { - size_t start, offset, plen; + size_t start, offset; if (skb->remcsum_offload) - return NULL; + return vh; if (!NAPI_GRO_CB(skb)->csum_valid) return NULL; @@ -532,17 +532,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, offsetof(struct udphdr, check) : offsetof(struct tcphdr, check)); - plen = hdrlen + offset + sizeof(u16); - - /* Pull checksum that will be written */ - if (skb_gro_header_hard(skb, off + plen)) { - vh = skb_gro_header_slow(skb, off + plen, off); - if (!vh) - return NULL; - } - - skb_gro_remcsum_process(skb, (void *)vh + hdrlen, - start, offset, grc, nopartial); + vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen, + start, offset, grc, nopartial); skb->remcsum_offload = 1; @@ -573,7 +564,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, goto out; } - skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */ skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr)); flags = ntohl(vh->vx_flags); @@ -588,6 +578,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, goto out; } + skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */ + flush = 0; for (p = *head; p; p = p->next) { @@ -1110,6 +1102,9 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, { size_t start, offset, plen; + if (skb->remcsum_offload) + return vh; + start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; offset = start + ((data & VXLAN_RCO_UDP) ? offsetof(struct udphdr, check) : diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4bd177faa90c..6abe0d6f1e1d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2311,8 +2311,7 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb); static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) { - return (NAPI_GRO_CB(skb)->gro_remcsum_start - skb_headroom(skb) == - skb_gro_offset(skb)); + return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); } static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, @@ -2408,37 +2407,58 @@ static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) grc->delta = 0; } -static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, - int start, int offset, - struct gro_remcsum *grc, - bool nopartial) +static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, + unsigned int off, size_t hdrlen, + int start, int offset, + struct gro_remcsum *grc, + bool nopartial) { __wsum delta; + size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); if (!nopartial) { - NAPI_GRO_CB(skb)->gro_remcsum_start = - ((unsigned char *)ptr + start) - skb->head; - return; + NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; + return ptr; } - delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset); + ptr = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, off + plen)) { + ptr = skb_gro_header_slow(skb, off + plen, off); + if (!ptr) + return NULL; + } + + delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, + start, offset); /* Adjust skb->csum since we changed the packet */ NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); - grc->offset = (ptr + offset) - (void *)skb->head; + grc->offset = off + hdrlen + offset; grc->delta = delta; + + return ptr; } static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, struct gro_remcsum *grc) { + void *ptr; + size_t plen = grc->offset + sizeof(u16); + if (!grc->delta) return; - remcsum_unadjust((__sum16 *)(skb->head + grc->offset), grc->delta); + ptr = skb_gro_header_fast(skb, grc->offset); + if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) { + ptr = skb_gro_header_slow(skb, plen, grc->offset); + if (!ptr) + return; + } + + remcsum_unadjust((__sum16 *)ptr, grc->delta); } static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 34968cd5c146..eb11f9506894 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -79,7 +79,11 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, __be16 *pd = data; size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); - size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); + size_t plen = sizeof(struct udphdr) + hdrlen + + max_t(size_t, offset + sizeof(u16), start); + + if (skb->remcsum_offload) + return guehdr; if (!pskb_may_pull(skb, plen)) return NULL; @@ -221,29 +225,21 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff, static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, struct guehdr *guehdr, void *data, - size_t hdrlen, u8 ipproto, - struct gro_remcsum *grc, bool nopartial) + size_t hdrlen, struct gro_remcsum *grc, + bool nopartial) { __be16 *pd = data; size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); - size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); if (skb->remcsum_offload) - return NULL; + return guehdr; if (!NAPI_GRO_CB(skb)->csum_valid) return NULL; - /* Pull checksum that will be written */ - if (skb_gro_header_hard(skb, off + plen)) { - guehdr = skb_gro_header_slow(skb, off + plen, off); - if (!guehdr) - return NULL; - } - - skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, - start, offset, grc, nopartial); + guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen, + start, offset, grc, nopartial); skb->remcsum_offload = 1; @@ -307,10 +303,10 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, if (flags & GUE_PFLAG_REMCSUM) { guehdr = gue_gro_remcsum(skb, off, guehdr, - data + doffset, hdrlen, - guehdr->proto_ctype, &grc, + data + doffset, hdrlen, &grc, !!(fou->flags & FOU_F_REMCSUM_NOPARTIAL)); + if (!guehdr) goto out; From 58ce31cca1ffe057f4744c3f671e3e84606d3d4a Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 19 Aug 2015 17:07:33 -0700 Subject: [PATCH 2/3] vxlan: GRO support at tunnel layer Add calls to gro_cells infrastructure to do GRO when receiving on a tunnel. Testing: Ran 200 netperf TCP_STREAM instance - With fix (GRO enabled on VXLAN interface) Verify GRO is happening. 9084 MBps tput 3.44% CPU utilization - Without fix (GRO disabled on VXLAN interface) Verified no GRO is happening. 9084 MBps tput 5.54% CPU utilization Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 9 +++++++-- include/net/vxlan.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 64fcd2402562..61b457b9ec00 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1208,7 +1208,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, stats->rx_bytes += skb->len; u64_stats_update_end(&stats->syncp); - netif_rx(skb); + gro_cells_receive(&vxlan->gro_cells, skb); return; drop: @@ -2446,6 +2446,8 @@ static void vxlan_setup(struct net_device *dev) vxlan->dev = dev; + gro_cells_init(&vxlan->gro_cells, dev); + for (h = 0; h < FDB_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vxlan->fdb_head[h]); } @@ -2885,6 +2887,7 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head) hlist_del_rcu(&vxlan->hlist); spin_unlock(&vn->sock_lock); + gro_cells_destroy(&vxlan->gro_cells); list_del(&vxlan->next); unregister_netdevice_queue(dev, head); } @@ -3093,8 +3096,10 @@ static void __net_exit vxlan_exit_net(struct net *net) /* If vxlan->dev is in the same netns, it has already been added * to the list by the previous loop. */ - if (!net_eq(dev_net(vxlan->dev), net)) + if (!net_eq(dev_net(vxlan->dev), net)) { + gro_cells_destroy(&vxlan->gro_cells); unregister_netdevice_queue(vxlan->dev, &list); + } } unregister_netdevice_many(&list); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 43677e6b9c43..6b3234599a2c 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -161,6 +161,7 @@ struct vxlan_dev { struct timer_list age_timer; spinlock_t hash_lock; unsigned int addrcnt; + struct gro_cells gro_cells; struct vxlan_config cfg; From 270136613bf7306e2b83457628e2b2f6c6be3989 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 19 Aug 2015 17:07:34 -0700 Subject: [PATCH 3/3] fou: Do WARN_ON_ONCE in gue_gro_receive for bad proto callbacks Do WARN_ON_ONCE instead of WARN_ON in gue_gro_receive when the offload callcaks are bad (either don't exist or gro_receive is not specified). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/fou.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index eb11f9506894..2d1646cff057 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -347,7 +347,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[guehdr->proto_ctype]); - if (WARN_ON(!ops || !ops->callbacks.gro_receive)) + if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) goto out_unlock; pp = ops->callbacks.gro_receive(head, skb);