From 5c2e26f6f674ee93719769d024cc72fad3ba9134 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Mon, 14 Mar 2016 16:36:14 +0530 Subject: [PATCH 1/2] net: thunderx: Set recevie buffer page usage count in bulk Instead of calling get_page() for every receive buffer carved out of page, set page's usage count at the end, to reduce no of atomic calls. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 1 + .../ethernet/cavium/thunder/nicvf_queues.c | 31 ++++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 092f097a5943..872b22d831ee 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -294,6 +294,7 @@ struct nicvf { u32 speed; struct page *rb_page; u32 rb_page_offset; + u16 rb_pageref; bool rb_alloc_fail; bool rb_work_scheduled; struct delayed_work rbdr_work; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 0dd1abf86079..fa05e347262f 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -18,6 +18,15 @@ #include "q_struct.h" #include "nicvf_queues.h" +static void nicvf_get_page(struct nicvf *nic) +{ + if (!nic->rb_pageref || !nic->rb_page) + return; + + atomic_add(nic->rb_pageref, &nic->rb_page->_count); + nic->rb_pageref = 0; +} + /* Poll a register for a specific value */ static int nicvf_poll_reg(struct nicvf *nic, int qidx, u64 reg, int bit_pos, int bits, int val) @@ -81,16 +90,15 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, int order = (PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0; /* Check if request can be accomodated in previous allocated page */ - if (nic->rb_page) { - if ((nic->rb_page_offset + buf_len + buf_len) > - (PAGE_SIZE << order)) { - nic->rb_page = NULL; - } else { - nic->rb_page_offset += buf_len; - get_page(nic->rb_page); - } + if (nic->rb_page && + ((nic->rb_page_offset + buf_len) < (PAGE_SIZE << order))) { + nic->rb_pageref++; + goto ret; } + nicvf_get_page(nic); + nic->rb_page = NULL; + /* Allocate a new page */ if (!nic->rb_page) { nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, @@ -102,7 +110,9 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, nic->rb_page_offset = 0; } +ret: *rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset); + nic->rb_page_offset += buf_len; return 0; } @@ -158,6 +168,9 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, desc = GET_RBDR_DESC(rbdr, idx); desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN; } + + nicvf_get_page(nic); + return 0; } @@ -241,6 +254,8 @@ static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp) new_rb++; } + nicvf_get_page(nic); + /* make sure all memory stores are done before ringing doorbell */ smp_wmb(); From 1d368790bc2d4111feae6fcc0b3b68d6ae92ff0f Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Mon, 14 Mar 2016 16:36:15 +0530 Subject: [PATCH 2/2] net: thunderx: Adjust nicvf structure to reduce cache misses Adjusted nicvf structure such that all elements used in hot path like napi, xmit e.t.c fall into same cache line. This reduced no of cache misses and resulted in ~2% increase in no of packets handled on a core. Also modified elements with :1 notation to boolean, to be consistent with other element definitions. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 60 +++++++++++++---------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 872b22d831ee..83025bb4737c 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -272,46 +272,54 @@ struct nicvf { struct nicvf *pnicvf; struct net_device *netdev; struct pci_dev *pdev; - u8 vf_id; - u8 node; - u8 tns_mode:1; - u8 sqs_mode:1; - u8 loopback_supported:1; - bool hw_tso; - u16 mtu; - struct queue_set *qs; -#define MAX_SQS_PER_VF_SINGLE_NODE 5 -#define MAX_SQS_PER_VF 11 - u8 sqs_id; - u8 sqs_count; /* Secondary Qset count */ - struct nicvf *snicvf[MAX_SQS_PER_VF]; - u8 rx_queues; - u8 tx_queues; - u8 max_queues; void __iomem *reg_base; - bool link_up; - u8 duplex; - u32 speed; - struct page *rb_page; + struct queue_set *qs; + struct nicvf_cq_poll *napi[8]; + u8 vf_id; + u8 sqs_id; + bool sqs_mode; + bool hw_tso; + + /* Receive buffer alloc */ u32 rb_page_offset; u16 rb_pageref; bool rb_alloc_fail; bool rb_work_scheduled; + struct page *rb_page; struct delayed_work rbdr_work; struct tasklet_struct rbdr_task; - struct tasklet_struct qs_err_task; - struct tasklet_struct cq_task; - struct nicvf_cq_poll *napi[8]; - struct nicvf_rss_info rss_info; + + /* Secondary Qset */ + u8 sqs_count; +#define MAX_SQS_PER_VF_SINGLE_NODE 5 +#define MAX_SQS_PER_VF 11 + struct nicvf *snicvf[MAX_SQS_PER_VF]; + + /* Queue count */ + u8 rx_queues; + u8 tx_queues; + u8 max_queues; + + u8 node; u8 cpi_alg; + u16 mtu; + bool link_up; + u8 duplex; + u32 speed; + bool tns_mode; + bool loopback_supported; + struct nicvf_rss_info rss_info; + struct tasklet_struct qs_err_task; + struct work_struct reset_task; + /* Interrupt coalescing settings */ u32 cq_coalesce_usecs; - u32 msg_enable; + + /* Stats */ struct nicvf_hw_stats hw_stats; struct nicvf_drv_stats drv_stats; struct bgx_stats bgx_stats; - struct work_struct reset_task; /* MSI-X */ bool msix_enabled;