From bf5c25d608613eaf4dcdba5a9cac5b2afe67d635 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 22 Dec 2017 19:00:17 -0500 Subject: [PATCH 1/4] skbuff: in skb_segment, call zerocopy functions once per nskb This is a net-next follow-up to commit 268b79067942 ("skbuff: orphan frags before zerocopy clone"), which fixed a bug in net, but added a call to skb_zerocopy_clone at each frag to do so. When segmenting skbs with user frags, either the user frags must be replaced with private copies and uarg released, or the uarg must have its refcount increased for each new skb. skb_orphan_frags does the first, except for cases that can handle reference counting. skb_zerocopy_clone then does the second. Call these once per nskb, instead of once per frag. That is, in the common case. With a frag list, also refresh when the origin skb (frag_skb) changes. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a3cb0be4c6f3..00b0757830e2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3656,6 +3656,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; + if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) + goto err; + while (pos < offset + len) { if (i >= nfrags) { BUG_ON(skb_headlen(list_skb)); @@ -3667,6 +3671,11 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, BUG_ON(!nfrags); + if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, frag_skb, + GFP_ATOMIC)) + goto err; + list_skb = list_skb->next; } @@ -3678,11 +3687,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, goto err; } - if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) - goto err; - if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) - goto err; - *nskb_frag = *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); From 111856c758d9a06145da446e0db8f71988eebf02 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 22 Dec 2017 19:00:18 -0500 Subject: [PATCH 2/4] tcp: push full zerocopy packets Skbs that reach MAX_SKB_FRAGS cannot be extended further. Do the same for zerocopy frags as non-zerocopy frags and set the PSH bit. This improves GRO assembly. Suggested-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 67d39b79c801..44102484a76f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1371,8 +1371,10 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) pfrag->offset += copy; } else { err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); - if (err == -EMSGSIZE || err == -EEXIST) + if (err == -EMSGSIZE || err == -EEXIST) { + tcp_mark_push(tp, skb); goto new_segment; + } if (err < 0) goto do_error; copy = err; From 02583adeff12fa0a4d72558853a926867afb226c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 22 Dec 2017 19:00:19 -0500 Subject: [PATCH 3/4] tcp: place all zerocopy payload in frags This avoids an unnecessary copy of 1-2KB and improves tso_fragment, which has to fall back to tcp_fragment if skb->len != skb_data_len. It also avoids a surprising inconsistency in notifications: Zerocopy packets sent over loopback have their frags copied, so set SO_EE_CODE_ZEROCOPY_COPIED in the notification. But this currently does not happen for small packets, because when all data fits in the linear fragment, data is not copied in skb_orphan_frags_rx. Reported-by: Tom Deseyn Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 44102484a76f..947348872c3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1186,7 +1186,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; bool process_backlog = false; - bool sg; + bool sg, zc = false; long timeo; flags = msg->msg_flags; @@ -1204,7 +1204,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) goto out_err; } - if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG)) + zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG; + if (!zc) uarg->zerocopy = 0; } @@ -1325,13 +1326,13 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) copy = msg_data_left(msg); /* Where to copy to? */ - if (skb_availroom(skb) > 0) { + if (skb_availroom(skb) > 0 && !zc) { /* We have some space in skb head. Superb! */ copy = min_t(int, copy, skb_availroom(skb)); err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; - } else if (!uarg || !uarg->zerocopy) { + } else if (!zc) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); From 8ddab50839e29e965460b2cf794fd2b06a946893 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 22 Dec 2017 19:00:20 -0500 Subject: [PATCH 4/4] tcp: do not allocate linear memory for zerocopy skbs Zerocopy payload is now always stored in frags, and space for headers is reversed, so this memory is unused. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 947348872c3e..7ac583a2b9fe 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1104,12 +1104,15 @@ static int linear_payload_sz(bool first_skb) return 0; } -static int select_size(const struct sock *sk, bool sg, bool first_skb) +static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc) { const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { + if (zc) + return 0; + if (sk_can_gso(sk)) { tmp = linear_payload_sz(first_skb); } else { @@ -1282,6 +1285,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; + int linear; new_segment: /* Allocate new segment. If the interface is SG, @@ -1295,9 +1299,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - skb = sk_stream_alloc_skb(sk, - select_size(sk, sg, first_skb), - sk->sk_allocation, + linear = select_size(sk, sg, first_skb, zc); + skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, first_skb); if (!skb) goto wait_for_memory;