mirror of https://gitee.com/openkylin/linux.git
Merge branch 'skbuff-introduce-skbuff_heads-bulking-and-reusing'
Alexander Lobakin says: ==================== skbuff: introduce skbuff_heads bulking and reusing Currently, all sorts of skb allocation always do allocate skbuff_heads one by one via kmem_cache_alloc(). On the other hand, we have percpu napi_alloc_cache to store skbuff_heads queued up for freeing and flush them by bulks. We can use this cache not only for bulk-wiping, but also to obtain heads for new skbs and avoid unconditional allocations, as well as for bulk-allocating (like XDP's cpumap code and veth driver already do). As this might affect latencies, cache pressure and lots of hardware and driver-dependent stuff, this new feature is mostly optional and can be issued via: - a new napi_build_skb() function (as a replacement for build_skb()); - existing {,__}napi_alloc_skb() and napi_get_frags() functions; - __alloc_skb() with passing SKB_ALLOC_NAPI in flags. iperf3 showed 35-70 Mbps bumps for both TCP and UDP while performing VLAN NAT on 1.2 GHz MIPS board. The boost is likely to be bigger on more powerful hosts and NICs with tens of Mpps. Note on skbuff_heads from distant slabs or pfmemalloc'ed slabs: - kmalloc()/kmem_cache_alloc() itself allows by default allocating memory from the remote nodes to defragment their slabs. This is controlled by sysctl, but according to this, skbuff_head from a remote node is an OK case; - The easiest way to check if the slab of skbuff_head is remote or pfmemalloc'ed is: if (!dev_page_is_reusable(virt_to_head_page(skb))) /* drop it */; ...*but*, regarding that most slabs are built of compound pages, virt_to_head_page() will hit unlikely-branch every single call. This check costed at least 20 Mbps in test scenarios and seems like it'd be better to _not_ do this. Since v5 [4]: - revert flags-to-bool conversion and simplify flags testing in __alloc_skb() (Alexander Duyck). Since v4 [3]: - rebase on top of net-next and address kernel build robot issue; - reorder checks a bit in __alloc_skb() to make new condition even more harmless. Since v3 [2]: - make the feature mostly optional, so driver developers could decide whether to use it or not (Paolo Abeni). This reuses the old flag for __alloc_skb() and introduces a new napi_build_skb(); - reduce bulk-allocation size from 32 to 16 elements (also Paolo). This equals to the value of XDP's devmap and veth batch processing (which were tested a lot) and should be sane enough; - don't waste cycles on explicit in_serving_softirq() check. Since v2 [1]: - also cover {,__}alloc_skb() and {,__}build_skb() cases (became handy after the changes that pass tiny skbs requests to kmalloc layer); - cover the cache with KASAN instrumentation (suggested by Eric Dumazet, help of Dmitry Vyukov); - completely drop redundant __kfree_skb_flush() (also Eric); - lots of code cleanups; - expand the commit message with NUMA and pfmemalloc points (Jakub). Since v1 [0]: - use one unified cache instead of two separate to greatly simplify the logics and reduce hotpath overhead (Edward Cree); - new: recycle also GRO_MERGED_FREE skbs instead of immediate freeing; - correct performance numbers after optimizations and performing lots of tests for different use cases. [0] https://lore.kernel.org/netdev/20210111182655.12159-1-alobakin@pm.me [1] https://lore.kernel.org/netdev/20210113133523.39205-1-alobakin@pm.me [2] https://lore.kernel.org/netdev/20210209204533.327360-1-alobakin@pm.me [3] https://lore.kernel.org/netdev/20210210162732.80467-1-alobakin@pm.me [4] https://lore.kernel.org/netdev/20210211185220.9753-1-alobakin@pm.me ==================== Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
c476299312
|
@ -1087,6 +1087,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size);
|
|||
struct sk_buff *build_skb_around(struct sk_buff *skb,
|
||||
void *data, unsigned int frag_size);
|
||||
|
||||
struct sk_buff *napi_build_skb(void *data, unsigned int frag_size);
|
||||
|
||||
/**
|
||||
* alloc_skb - allocate a network buffer
|
||||
* @size: size to allocate
|
||||
|
@ -2919,7 +2921,7 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
|
|||
}
|
||||
void napi_consume_skb(struct sk_buff *skb, int budget);
|
||||
|
||||
void __kfree_skb_flush(void);
|
||||
void napi_skb_free_stolen_head(struct sk_buff *skb);
|
||||
void __kfree_skb_defer(struct sk_buff *skb);
|
||||
|
||||
/**
|
||||
|
|
|
@ -4944,8 +4944,6 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
|
|||
else
|
||||
__kfree_skb_defer(skb);
|
||||
}
|
||||
|
||||
__kfree_skb_flush();
|
||||
}
|
||||
|
||||
if (sd->output_queue) {
|
||||
|
@ -6097,13 +6095,6 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
|
|||
}
|
||||
EXPORT_SYMBOL(gro_find_complete_by_type);
|
||||
|
||||
static void napi_skb_free_stolen_head(struct sk_buff *skb)
|
||||
{
|
||||
skb_dst_drop(skb);
|
||||
skb_ext_put(skb);
|
||||
kmem_cache_free(skbuff_head_cache, skb);
|
||||
}
|
||||
|
||||
static gro_result_t napi_skb_finish(struct napi_struct *napi,
|
||||
struct sk_buff *skb,
|
||||
gro_result_t ret)
|
||||
|
@ -6117,7 +6108,7 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi,
|
|||
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
|
||||
napi_skb_free_stolen_head(skb);
|
||||
else
|
||||
__kfree_skb(skb);
|
||||
__kfree_skb_defer(skb);
|
||||
break;
|
||||
|
||||
case GRO_HELD:
|
||||
|
@ -7012,7 +7003,6 @@ static int napi_threaded_poll(void *data)
|
|||
__napi_poll(napi, &repoll);
|
||||
netpoll_poll_unlock(have);
|
||||
|
||||
__kfree_skb_flush();
|
||||
local_bh_enable();
|
||||
|
||||
if (!repoll)
|
||||
|
@ -7042,7 +7032,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
|
|||
|
||||
if (list_empty(&list)) {
|
||||
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
|
||||
goto out;
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7069,8 +7059,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
|
|||
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
||||
|
||||
net_rps_action_and_irq_enable(sd);
|
||||
out:
|
||||
__kfree_skb_flush();
|
||||
}
|
||||
|
||||
struct netdev_adjacent {
|
||||
|
|
|
@ -119,6 +119,220 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
|
|||
skb_panic(skb, sz, addr, __func__);
|
||||
}
|
||||
|
||||
#define NAPI_SKB_CACHE_SIZE 64
|
||||
#define NAPI_SKB_CACHE_BULK 16
|
||||
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
|
||||
|
||||
struct napi_alloc_cache {
|
||||
struct page_frag_cache page;
|
||||
unsigned int skb_count;
|
||||
void *skb_cache[NAPI_SKB_CACHE_SIZE];
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
|
||||
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
|
||||
|
||||
static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align_mask)
|
||||
{
|
||||
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
||||
|
||||
return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask);
|
||||
}
|
||||
|
||||
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
|
||||
{
|
||||
fragsz = SKB_DATA_ALIGN(fragsz);
|
||||
|
||||
return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(__napi_alloc_frag_align);
|
||||
|
||||
void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
|
||||
{
|
||||
struct page_frag_cache *nc;
|
||||
void *data;
|
||||
|
||||
fragsz = SKB_DATA_ALIGN(fragsz);
|
||||
if (in_irq() || irqs_disabled()) {
|
||||
nc = this_cpu_ptr(&netdev_alloc_cache);
|
||||
data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
|
||||
} else {
|
||||
local_bh_disable();
|
||||
data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
|
||||
local_bh_enable();
|
||||
}
|
||||
return data;
|
||||
}
|
||||
EXPORT_SYMBOL(__netdev_alloc_frag_align);
|
||||
|
||||
static struct sk_buff *napi_skb_cache_get(void)
|
||||
{
|
||||
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
||||
struct sk_buff *skb;
|
||||
|
||||
if (unlikely(!nc->skb_count))
|
||||
nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache,
|
||||
GFP_ATOMIC,
|
||||
NAPI_SKB_CACHE_BULK,
|
||||
nc->skb_cache);
|
||||
if (unlikely(!nc->skb_count))
|
||||
return NULL;
|
||||
|
||||
skb = nc->skb_cache[--nc->skb_count];
|
||||
kasan_unpoison_object_data(skbuff_head_cache, skb);
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
/* Caller must provide SKB that is memset cleared */
|
||||
static void __build_skb_around(struct sk_buff *skb, void *data,
|
||||
unsigned int frag_size)
|
||||
{
|
||||
struct skb_shared_info *shinfo;
|
||||
unsigned int size = frag_size ? : ksize(data);
|
||||
|
||||
size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
|
||||
/* Assumes caller memset cleared SKB */
|
||||
skb->truesize = SKB_TRUESIZE(size);
|
||||
refcount_set(&skb->users, 1);
|
||||
skb->head = data;
|
||||
skb->data = data;
|
||||
skb_reset_tail_pointer(skb);
|
||||
skb->end = skb->tail + size;
|
||||
skb->mac_header = (typeof(skb->mac_header))~0U;
|
||||
skb->transport_header = (typeof(skb->transport_header))~0U;
|
||||
|
||||
/* make sure we initialize shinfo sequentially */
|
||||
shinfo = skb_shinfo(skb);
|
||||
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
|
||||
atomic_set(&shinfo->dataref, 1);
|
||||
|
||||
skb_set_kcov_handle(skb, kcov_common_handle());
|
||||
}
|
||||
|
||||
/**
|
||||
* __build_skb - build a network buffer
|
||||
* @data: data buffer provided by caller
|
||||
* @frag_size: size of data, or 0 if head was kmalloced
|
||||
*
|
||||
* Allocate a new &sk_buff. Caller provides space holding head and
|
||||
* skb_shared_info. @data must have been allocated by kmalloc() only if
|
||||
* @frag_size is 0, otherwise data should come from the page allocator
|
||||
* or vmalloc()
|
||||
* The return is the new skb buffer.
|
||||
* On a failure the return is %NULL, and @data is not freed.
|
||||
* Notes :
|
||||
* Before IO, driver allocates only data buffer where NIC put incoming frame
|
||||
* Driver should add room at head (NET_SKB_PAD) and
|
||||
* MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
|
||||
* After IO, driver calls build_skb(), to allocate sk_buff and populate it
|
||||
* before giving packet to stack.
|
||||
* RX rings only contains data buffers, not full skbs.
|
||||
*/
|
||||
struct sk_buff *__build_skb(void *data, unsigned int frag_size)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
|
||||
skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
|
||||
if (unlikely(!skb))
|
||||
return NULL;
|
||||
|
||||
memset(skb, 0, offsetof(struct sk_buff, tail));
|
||||
__build_skb_around(skb, data, frag_size);
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
/* build_skb() is wrapper over __build_skb(), that specifically
|
||||
* takes care of skb->head and skb->pfmemalloc
|
||||
* This means that if @frag_size is not zero, then @data must be backed
|
||||
* by a page fragment, not kmalloc() or vmalloc()
|
||||
*/
|
||||
struct sk_buff *build_skb(void *data, unsigned int frag_size)
|
||||
{
|
||||
struct sk_buff *skb = __build_skb(data, frag_size);
|
||||
|
||||
if (skb && frag_size) {
|
||||
skb->head_frag = 1;
|
||||
if (page_is_pfmemalloc(virt_to_head_page(data)))
|
||||
skb->pfmemalloc = 1;
|
||||
}
|
||||
return skb;
|
||||
}
|
||||
EXPORT_SYMBOL(build_skb);
|
||||
|
||||
/**
|
||||
* build_skb_around - build a network buffer around provided skb
|
||||
* @skb: sk_buff provide by caller, must be memset cleared
|
||||
* @data: data buffer provided by caller
|
||||
* @frag_size: size of data, or 0 if head was kmalloced
|
||||
*/
|
||||
struct sk_buff *build_skb_around(struct sk_buff *skb,
|
||||
void *data, unsigned int frag_size)
|
||||
{
|
||||
if (unlikely(!skb))
|
||||
return NULL;
|
||||
|
||||
__build_skb_around(skb, data, frag_size);
|
||||
|
||||
if (frag_size) {
|
||||
skb->head_frag = 1;
|
||||
if (page_is_pfmemalloc(virt_to_head_page(data)))
|
||||
skb->pfmemalloc = 1;
|
||||
}
|
||||
return skb;
|
||||
}
|
||||
EXPORT_SYMBOL(build_skb_around);
|
||||
|
||||
/**
|
||||
* __napi_build_skb - build a network buffer
|
||||
* @data: data buffer provided by caller
|
||||
* @frag_size: size of data, or 0 if head was kmalloced
|
||||
*
|
||||
* Version of __build_skb() that uses NAPI percpu caches to obtain
|
||||
* skbuff_head instead of inplace allocation.
|
||||
*
|
||||
* Returns a new &sk_buff on success, %NULL on allocation failure.
|
||||
*/
|
||||
static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
|
||||
skb = napi_skb_cache_get();
|
||||
if (unlikely(!skb))
|
||||
return NULL;
|
||||
|
||||
memset(skb, 0, offsetof(struct sk_buff, tail));
|
||||
__build_skb_around(skb, data, frag_size);
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
/**
|
||||
* napi_build_skb - build a network buffer
|
||||
* @data: data buffer provided by caller
|
||||
* @frag_size: size of data, or 0 if head was kmalloced
|
||||
*
|
||||
* Version of __napi_build_skb() that takes care of skb->head_frag
|
||||
* and skb->pfmemalloc when the data is a page or page fragment.
|
||||
*
|
||||
* Returns a new &sk_buff on success, %NULL on allocation failure.
|
||||
*/
|
||||
struct sk_buff *napi_build_skb(void *data, unsigned int frag_size)
|
||||
{
|
||||
struct sk_buff *skb = __napi_build_skb(data, frag_size);
|
||||
|
||||
if (likely(skb) && frag_size) {
|
||||
skb->head_frag = 1;
|
||||
skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
|
||||
}
|
||||
|
||||
return skb;
|
||||
}
|
||||
EXPORT_SYMBOL(napi_build_skb);
|
||||
|
||||
/*
|
||||
* kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
|
||||
* the caller if emergency pfmemalloc reserves are being used. If it is and
|
||||
|
@ -126,11 +340,8 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
|
|||
* may be used. Otherwise, the packet data may be discarded until enough
|
||||
* memory is free
|
||||
*/
|
||||
#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
|
||||
__kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
|
||||
|
||||
static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
|
||||
unsigned long ip, bool *pfmemalloc)
|
||||
static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
|
||||
bool *pfmemalloc)
|
||||
{
|
||||
void *obj;
|
||||
bool ret_pfmemalloc = false;
|
||||
|
@ -183,7 +394,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|||
int flags, int node)
|
||||
{
|
||||
struct kmem_cache *cache;
|
||||
struct skb_shared_info *shinfo;
|
||||
struct sk_buff *skb;
|
||||
u8 *data;
|
||||
bool pfmemalloc;
|
||||
|
@ -195,9 +405,13 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|||
gfp_mask |= __GFP_MEMALLOC;
|
||||
|
||||
/* Get the HEAD */
|
||||
skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
|
||||
if (!skb)
|
||||
goto out;
|
||||
if ((flags & (SKB_ALLOC_FCLONE | SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI &&
|
||||
likely(node == NUMA_NO_NODE || node == numa_mem_id()))
|
||||
skb = napi_skb_cache_get();
|
||||
else
|
||||
skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node);
|
||||
if (unlikely(!skb))
|
||||
return NULL;
|
||||
prefetchw(skb);
|
||||
|
||||
/* We do our best to align skb_shared_info on a separate cache
|
||||
|
@ -208,7 +422,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|||
size = SKB_DATA_ALIGN(size);
|
||||
size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
|
||||
if (!data)
|
||||
if (unlikely(!data))
|
||||
goto nodata;
|
||||
/* kmalloc(size) might give us more room than requested.
|
||||
* Put skb_shared_info exactly at the end of allocated zone,
|
||||
|
@ -223,21 +437,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|||
* the tail pointer in struct sk_buff!
|
||||
*/
|
||||
memset(skb, 0, offsetof(struct sk_buff, tail));
|
||||
/* Account for allocated memory : skb + skb->head */
|
||||
skb->truesize = SKB_TRUESIZE(size);
|
||||
__build_skb_around(skb, data, 0);
|
||||
skb->pfmemalloc = pfmemalloc;
|
||||
refcount_set(&skb->users, 1);
|
||||
skb->head = data;
|
||||
skb->data = data;
|
||||
skb_reset_tail_pointer(skb);
|
||||
skb->end = skb->tail + size;
|
||||
skb->mac_header = (typeof(skb->mac_header))~0U;
|
||||
skb->transport_header = (typeof(skb->transport_header))~0U;
|
||||
|
||||
/* make sure we initialize shinfo sequentially */
|
||||
shinfo = skb_shinfo(skb);
|
||||
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
|
||||
atomic_set(&shinfo->dataref, 1);
|
||||
|
||||
if (flags & SKB_ALLOC_FCLONE) {
|
||||
struct sk_buff_fclones *fclones;
|
||||
|
@ -250,164 +451,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|||
fclones->skb2.fclone = SKB_FCLONE_CLONE;
|
||||
}
|
||||
|
||||
skb_set_kcov_handle(skb, kcov_common_handle());
|
||||
|
||||
out:
|
||||
return skb;
|
||||
|
||||
nodata:
|
||||
kmem_cache_free(cache, skb);
|
||||
skb = NULL;
|
||||
goto out;
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(__alloc_skb);
|
||||
|
||||
/* Caller must provide SKB that is memset cleared */
|
||||
static struct sk_buff *__build_skb_around(struct sk_buff *skb,
|
||||
void *data, unsigned int frag_size)
|
||||
{
|
||||
struct skb_shared_info *shinfo;
|
||||
unsigned int size = frag_size ? : ksize(data);
|
||||
|
||||
size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
|
||||
/* Assumes caller memset cleared SKB */
|
||||
skb->truesize = SKB_TRUESIZE(size);
|
||||
refcount_set(&skb->users, 1);
|
||||
skb->head = data;
|
||||
skb->data = data;
|
||||
skb_reset_tail_pointer(skb);
|
||||
skb->end = skb->tail + size;
|
||||
skb->mac_header = (typeof(skb->mac_header))~0U;
|
||||
skb->transport_header = (typeof(skb->transport_header))~0U;
|
||||
|
||||
/* make sure we initialize shinfo sequentially */
|
||||
shinfo = skb_shinfo(skb);
|
||||
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
|
||||
atomic_set(&shinfo->dataref, 1);
|
||||
|
||||
skb_set_kcov_handle(skb, kcov_common_handle());
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
/**
|
||||
* __build_skb - build a network buffer
|
||||
* @data: data buffer provided by caller
|
||||
* @frag_size: size of data, or 0 if head was kmalloced
|
||||
*
|
||||
* Allocate a new &sk_buff. Caller provides space holding head and
|
||||
* skb_shared_info. @data must have been allocated by kmalloc() only if
|
||||
* @frag_size is 0, otherwise data should come from the page allocator
|
||||
* or vmalloc()
|
||||
* The return is the new skb buffer.
|
||||
* On a failure the return is %NULL, and @data is not freed.
|
||||
* Notes :
|
||||
* Before IO, driver allocates only data buffer where NIC put incoming frame
|
||||
* Driver should add room at head (NET_SKB_PAD) and
|
||||
* MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
|
||||
* After IO, driver calls build_skb(), to allocate sk_buff and populate it
|
||||
* before giving packet to stack.
|
||||
* RX rings only contains data buffers, not full skbs.
|
||||
*/
|
||||
struct sk_buff *__build_skb(void *data, unsigned int frag_size)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
|
||||
skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
|
||||
if (unlikely(!skb))
|
||||
return NULL;
|
||||
|
||||
memset(skb, 0, offsetof(struct sk_buff, tail));
|
||||
|
||||
return __build_skb_around(skb, data, frag_size);
|
||||
}
|
||||
|
||||
/* build_skb() is wrapper over __build_skb(), that specifically
|
||||
* takes care of skb->head and skb->pfmemalloc
|
||||
* This means that if @frag_size is not zero, then @data must be backed
|
||||
* by a page fragment, not kmalloc() or vmalloc()
|
||||
*/
|
||||
struct sk_buff *build_skb(void *data, unsigned int frag_size)
|
||||
{
|
||||
struct sk_buff *skb = __build_skb(data, frag_size);
|
||||
|
||||
if (skb && frag_size) {
|
||||
skb->head_frag = 1;
|
||||
if (page_is_pfmemalloc(virt_to_head_page(data)))
|
||||
skb->pfmemalloc = 1;
|
||||
}
|
||||
return skb;
|
||||
}
|
||||
EXPORT_SYMBOL(build_skb);
|
||||
|
||||
/**
|
||||
* build_skb_around - build a network buffer around provided skb
|
||||
* @skb: sk_buff provide by caller, must be memset cleared
|
||||
* @data: data buffer provided by caller
|
||||
* @frag_size: size of data, or 0 if head was kmalloced
|
||||
*/
|
||||
struct sk_buff *build_skb_around(struct sk_buff *skb,
|
||||
void *data, unsigned int frag_size)
|
||||
{
|
||||
if (unlikely(!skb))
|
||||
return NULL;
|
||||
|
||||
skb = __build_skb_around(skb, data, frag_size);
|
||||
|
||||
if (skb && frag_size) {
|
||||
skb->head_frag = 1;
|
||||
if (page_is_pfmemalloc(virt_to_head_page(data)))
|
||||
skb->pfmemalloc = 1;
|
||||
}
|
||||
return skb;
|
||||
}
|
||||
EXPORT_SYMBOL(build_skb_around);
|
||||
|
||||
#define NAPI_SKB_CACHE_SIZE 64
|
||||
|
||||
struct napi_alloc_cache {
|
||||
struct page_frag_cache page;
|
||||
unsigned int skb_count;
|
||||
void *skb_cache[NAPI_SKB_CACHE_SIZE];
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
|
||||
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
|
||||
|
||||
static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align_mask)
|
||||
{
|
||||
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
||||
|
||||
return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask);
|
||||
}
|
||||
|
||||
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
|
||||
{
|
||||
fragsz = SKB_DATA_ALIGN(fragsz);
|
||||
|
||||
return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(__napi_alloc_frag_align);
|
||||
|
||||
void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
|
||||
{
|
||||
struct page_frag_cache *nc;
|
||||
void *data;
|
||||
|
||||
fragsz = SKB_DATA_ALIGN(fragsz);
|
||||
if (in_irq() || irqs_disabled()) {
|
||||
nc = this_cpu_ptr(&netdev_alloc_cache);
|
||||
data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
|
||||
} else {
|
||||
local_bh_disable();
|
||||
data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
|
||||
local_bh_enable();
|
||||
}
|
||||
return data;
|
||||
}
|
||||
EXPORT_SYMBOL(__netdev_alloc_frag_align);
|
||||
|
||||
/**
|
||||
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
|
||||
* @dev: network device to receive on
|
||||
|
@ -511,7 +562,8 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
|
|||
if (len <= SKB_WITH_OVERHEAD(1024) ||
|
||||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
|
||||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
|
||||
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
|
||||
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
|
||||
NUMA_NO_NODE);
|
||||
if (!skb)
|
||||
goto skb_fail;
|
||||
goto skb_success;
|
||||
|
@ -528,7 +580,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
|
|||
if (unlikely(!data))
|
||||
return NULL;
|
||||
|
||||
skb = __build_skb(data, len);
|
||||
skb = __napi_build_skb(data, len);
|
||||
if (unlikely(!skb)) {
|
||||
skb_free_frag(data);
|
||||
return NULL;
|
||||
|
@ -859,43 +911,36 @@ void __consume_stateless_skb(struct sk_buff *skb)
|
|||
kfree_skbmem(skb);
|
||||
}
|
||||
|
||||
void __kfree_skb_flush(void)
|
||||
static void napi_skb_cache_put(struct sk_buff *skb)
|
||||
{
|
||||
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
||||
u32 i;
|
||||
|
||||
/* flush skb_cache if containing objects */
|
||||
if (nc->skb_count) {
|
||||
kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
|
||||
nc->skb_cache);
|
||||
nc->skb_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void _kfree_skb_defer(struct sk_buff *skb)
|
||||
{
|
||||
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
||||
|
||||
/* drop skb->head and call any destructors for packet */
|
||||
skb_release_all(skb);
|
||||
|
||||
/* record skb to CPU local list */
|
||||
kasan_poison_object_data(skbuff_head_cache, skb);
|
||||
nc->skb_cache[nc->skb_count++] = skb;
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
/* SLUB writes into objects when freeing */
|
||||
prefetchw(skb);
|
||||
#endif
|
||||
|
||||
/* flush skb_cache if it is filled */
|
||||
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
|
||||
kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
|
||||
nc->skb_cache);
|
||||
nc->skb_count = 0;
|
||||
for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
|
||||
kasan_unpoison_object_data(skbuff_head_cache,
|
||||
nc->skb_cache[i]);
|
||||
|
||||
kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_HALF,
|
||||
nc->skb_cache + NAPI_SKB_CACHE_HALF);
|
||||
nc->skb_count = NAPI_SKB_CACHE_HALF;
|
||||
}
|
||||
}
|
||||
|
||||
void __kfree_skb_defer(struct sk_buff *skb)
|
||||
{
|
||||
_kfree_skb_defer(skb);
|
||||
skb_release_all(skb);
|
||||
napi_skb_cache_put(skb);
|
||||
}
|
||||
|
||||
void napi_skb_free_stolen_head(struct sk_buff *skb)
|
||||
{
|
||||
skb_dst_drop(skb);
|
||||
skb_ext_put(skb);
|
||||
napi_skb_cache_put(skb);
|
||||
}
|
||||
|
||||
void napi_consume_skb(struct sk_buff *skb, int budget)
|
||||
|
@ -920,7 +965,8 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
|
|||
return;
|
||||
}
|
||||
|
||||
_kfree_skb_defer(skb);
|
||||
skb_release_all(skb);
|
||||
napi_skb_cache_put(skb);
|
||||
}
|
||||
EXPORT_SYMBOL(napi_consume_skb);
|
||||
|
||||
|
|
Loading…
Reference in New Issue