inet: frags: get rid of ipfrag_skb_cb/FRAG_CB

ip_defrag uses skb->cb[] to store the fragment offset, and unfortunately
this integer is currently in a different cache line than skb->next,
meaning that we use two cache lines per skb when finding the insertion point.

By aliasing skb->ip_defrag_offset and skb->dev, we pack all the fields
in a single cache line and save precious memory bandwidth.

Note that after the fast path added by Changli Gao in commit
d6bebca92c ("fragment: add fast path for in-order fragments")
this change wont help the fast path, since we still need
to access prev->len (2nd cache line), but will show great
benefits when slow path is entered, since we perform
a linear scan of a potentially long list.

Also, note that this potential long list is an attack vector,
we might consider also using an rb-tree there eventually.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2018-03-31 12:58:58 -07:00 committed by David S. Miller
parent c2615cf5a7
commit bf66337140
2 changed files with 15 additions and 21 deletions

View File

@ -672,6 +672,7 @@ struct sk_buff {
* UDP receive path is one user.
*/
unsigned long dev_scratch;
int ip_defrag_offset;
};
};
struct rb_node rbnode; /* used in netem & tcp stack */

View File

@ -57,14 +57,6 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
struct ipfrag_skb_cb
{
struct inet_skb_parm h;
int offset;
};
#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
@ -353,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* this fragment, right?
*/
prev = qp->q.fragments_tail;
if (!prev || FRAG_CB(prev)->offset < offset) {
if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = qp->q.fragments; next != NULL; next = next->next) {
if (FRAG_CB(next)->offset >= offset)
if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@ -370,7 +362,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* any overlaps are eliminated.
*/
if (prev) {
int i = (FRAG_CB(prev)->offset + prev->len) - offset;
int i = (prev->ip_defrag_offset + prev->len) - offset;
if (i > 0) {
offset += i;
@ -387,8 +379,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
err = -ENOMEM;
while (next && FRAG_CB(next)->offset < end) {
int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
while (next && next->ip_defrag_offset < end) {
int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
if (i < next->len) {
/* Eat head of the next overlapped fragment
@ -396,7 +388,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
*/
if (!pskb_pull(next, i))
goto err;
FRAG_CB(next)->offset += i;
next->ip_defrag_offset += i;
qp->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
@ -420,7 +412,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
}
FRAG_CB(skb)->offset = offset;
/* Note : skb->ip_defrag_offset and skb->dev share the same location */
dev = skb->dev;
if (dev)
qp->iif = dev->ifindex;
/* Makes sure compiler wont do silly aliasing games */
barrier();
skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@ -431,11 +429,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
else
qp->q.fragments = skb;
dev = skb->dev;
if (dev) {
qp->iif = dev->ifindex;
skb->dev = NULL;
}
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
@ -511,7 +504,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
WARN_ON(!head);
WARN_ON(FRAG_CB(head)->offset != 0);
WARN_ON(head->ip_defrag_offset != 0);
/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);