net: hns: optimize XGE capability by reducing cpu usage

here is the patch raising the performance of XGE by:
1)changes the way page management method for enet momery, and
2)reduces the count of rmb, and
3)adds Memory prefetching

Signed-off-by: Kejian Yan <yankejian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
yankejian 2015-12-08 11:02:31 +08:00 committed by David S. Miller
parent bd1060a1d6
commit 9cbe9fd521
3 changed files with 55 additions and 30 deletions

View File

@ -341,7 +341,8 @@ struct hnae_queue {
void __iomem *io_base;
phys_addr_t phy_base;
struct hnae_ae_dev *dev; /* the device who use this queue */
struct hnae_ring rx_ring, tx_ring;
struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;
struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp;
struct hnae_handle *handle;
};
@ -597,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i,
struct hnae_desc_cb *res_cb)
{
struct hnae_buf_ops *bops = ring->q->handle->bops;
struct hnae_desc_cb tmp_cb = ring->desc_cb[i];
bops->unmap_buffer(ring, &ring->desc_cb[i]);
ring->desc_cb[i] = *res_cb;
*res_cb = tmp_cb;
ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
}

View File

@ -341,7 +341,6 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
else
flag = RCB_INT_FLAG_RX;
hns_rcb_int_clr_hw(ring->q, flag);
hns_rcb_int_ctrl_hw(ring->q, flag, mask);
}

View File

@ -33,6 +33,7 @@
#define RCB_IRQ_NOT_INITED 0
#define RCB_IRQ_INITED 1
#define HNS_BUFFER_SIZE_2048 2048
#define BD_MAX_SEND_SIZE 8191
#define SKB_TMP_LEN(SKB) \
@ -491,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag,
return max_size;
}
static void
hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset)
static void hns_nic_reuse_page(struct sk_buff *skb, int i,
struct hnae_ring *ring, int pull_len,
struct hnae_desc_cb *desc_cb)
{
struct hnae_desc *desc;
int truesize, size;
int last_offset;
desc = &ring->desc[ring->next_to_clean];
size = le16_to_cpu(desc->rx.size);
#if (PAGE_SIZE < 8192)
if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
truesize = hnae_buf_size(ring);
} else {
truesize = ALIGN(size, L1_CACHE_BYTES);
last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
}
#else
truesize = ALIGN(size, L1_CACHE_BYTES);
last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
#endif
skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
size - pull_len, truesize - pull_len);
/* avoid re-using remote pages,flag default unreuse */
if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) {
#if (PAGE_SIZE < 8192)
if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
/* if we are only owner of page we can reuse it */
if (likely(page_count(desc_cb->priv) == 1)) {
/* flip page offset to other buffer */
desc_cb->page_offset ^= truesize;
desc_cb->reuse_flag = 1;
/* bump ref count on page before it is given*/
get_page(desc_cb->priv);
}
return;
}
#endif
/* move offset up to the next cache line */
desc_cb->page_offset += tsize;
desc_cb->page_offset += truesize;
if (desc_cb->page_offset <= last_offset) {
desc_cb->reuse_flag = 1;
@ -529,11 +568,10 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
struct hnae_desc *desc;
struct hnae_desc_cb *desc_cb;
unsigned char *va;
int bnum, length, size, i, truesize, last_offset;
int bnum, length, i;
int pull_len;
u32 bnum_flag;
last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
desc = &ring->desc[ring->next_to_clean];
desc_cb = &ring->desc_cb[ring->next_to_clean];
@ -555,17 +593,12 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
return -ENOMEM;
}
prefetchw(skb->data);
length = le16_to_cpu(desc->rx.pkt_len);
bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag);
priv->ops.get_rxd_bnum(bnum_flag, &bnum);
*out_bnum = bnum;
/* we will be copying header into skb->data in
* pskb_may_pull so it is in our interest to prefetch
* it now to avoid a possible cache miss
*/
prefetchw(skb->data);
if (length <= HNS_RX_HEAD_SIZE) {
memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
@ -588,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
memcpy(__skb_put(skb, pull_len), va,
ALIGN(pull_len, sizeof(long)));
size = le16_to_cpu(desc->rx.size);
truesize = ALIGN(size, L1_CACHE_BYTES);
skb_add_rx_frag(skb, 0, desc_cb->priv,
desc_cb->page_offset + pull_len,
size - pull_len, truesize - pull_len);
hns_nic_reuse_page(desc_cb, truesize, last_offset);
hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
ring_ptr_move_fw(ring, next_to_clean);
if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/
@ -604,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
for (i = 1; i < bnum; i++) {
desc = &ring->desc[ring->next_to_clean];
desc_cb = &ring->desc_cb[ring->next_to_clean];
size = le16_to_cpu(desc->rx.size);
truesize = ALIGN(size, L1_CACHE_BYTES);
skb_add_rx_frag(skb, i, desc_cb->priv,
desc_cb->page_offset,
size, truesize);
hns_nic_reuse_page(desc_cb, truesize, last_offset);
hns_nic_reuse_page(skb, i, ring, 0, desc_cb);
ring_ptr_move_fw(ring, next_to_clean);
}
}
@ -750,9 +772,10 @@ static int hns_nic_rx_poll_one(struct hns_nic_ring_data *ring_data,
/* make all data has been write before submit */
if (recv_pkts < budget) {
ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
rmb(); /*complete read rx ring bd number*/
if (ex_num > clean_count) {
num += ex_num - clean_count;
rmb(); /*complete read rx ring bd number*/
goto recv;
}
}
@ -849,8 +872,11 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
bytes = 0;
pkts = 0;
while (head != ring->next_to_clean)
while (head != ring->next_to_clean) {
hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
/* issue prefetch for next Tx descriptor */
prefetch(&ring->desc_cb[ring->next_to_clean]);
}
NETIF_TX_UNLOCK(ndev);
@ -926,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget)
ring_data->ring, 0);
ring_data->fini_process(ring_data);
return 0;
}
return clean_complete;