net: hns: optimize XGE capability by reducing cpu usage
here is the patch raising the performance of XGE by: 1)changes the way page management method for enet momery, and 2)reduces the count of rmb, and 3)adds Memory prefetching Signed-off-by: Kejian Yan <yankejian@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
bd1060a1d6
commit
9cbe9fd521
|
@ -341,7 +341,8 @@ struct hnae_queue {
|
||||||
void __iomem *io_base;
|
void __iomem *io_base;
|
||||||
phys_addr_t phy_base;
|
phys_addr_t phy_base;
|
||||||
struct hnae_ae_dev *dev; /* the device who use this queue */
|
struct hnae_ae_dev *dev; /* the device who use this queue */
|
||||||
struct hnae_ring rx_ring, tx_ring;
|
struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;
|
||||||
|
struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp;
|
||||||
struct hnae_handle *handle;
|
struct hnae_handle *handle;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -597,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i,
|
||||||
struct hnae_desc_cb *res_cb)
|
struct hnae_desc_cb *res_cb)
|
||||||
{
|
{
|
||||||
struct hnae_buf_ops *bops = ring->q->handle->bops;
|
struct hnae_buf_ops *bops = ring->q->handle->bops;
|
||||||
struct hnae_desc_cb tmp_cb = ring->desc_cb[i];
|
|
||||||
|
|
||||||
bops->unmap_buffer(ring, &ring->desc_cb[i]);
|
bops->unmap_buffer(ring, &ring->desc_cb[i]);
|
||||||
ring->desc_cb[i] = *res_cb;
|
ring->desc_cb[i] = *res_cb;
|
||||||
*res_cb = tmp_cb;
|
|
||||||
ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
|
ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
|
||||||
ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
|
ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -341,7 +341,6 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
|
||||||
else
|
else
|
||||||
flag = RCB_INT_FLAG_RX;
|
flag = RCB_INT_FLAG_RX;
|
||||||
|
|
||||||
hns_rcb_int_clr_hw(ring->q, flag);
|
|
||||||
hns_rcb_int_ctrl_hw(ring->q, flag, mask);
|
hns_rcb_int_ctrl_hw(ring->q, flag, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
|
|
||||||
#define RCB_IRQ_NOT_INITED 0
|
#define RCB_IRQ_NOT_INITED 0
|
||||||
#define RCB_IRQ_INITED 1
|
#define RCB_IRQ_INITED 1
|
||||||
|
#define HNS_BUFFER_SIZE_2048 2048
|
||||||
|
|
||||||
#define BD_MAX_SEND_SIZE 8191
|
#define BD_MAX_SEND_SIZE 8191
|
||||||
#define SKB_TMP_LEN(SKB) \
|
#define SKB_TMP_LEN(SKB) \
|
||||||
|
@ -491,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag,
|
||||||
return max_size;
|
return max_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void hns_nic_reuse_page(struct sk_buff *skb, int i,
|
||||||
hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset)
|
struct hnae_ring *ring, int pull_len,
|
||||||
|
struct hnae_desc_cb *desc_cb)
|
||||||
{
|
{
|
||||||
|
struct hnae_desc *desc;
|
||||||
|
int truesize, size;
|
||||||
|
int last_offset;
|
||||||
|
|
||||||
|
desc = &ring->desc[ring->next_to_clean];
|
||||||
|
size = le16_to_cpu(desc->rx.size);
|
||||||
|
|
||||||
|
#if (PAGE_SIZE < 8192)
|
||||||
|
if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
|
||||||
|
truesize = hnae_buf_size(ring);
|
||||||
|
} else {
|
||||||
|
truesize = ALIGN(size, L1_CACHE_BYTES);
|
||||||
|
last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
truesize = ALIGN(size, L1_CACHE_BYTES);
|
||||||
|
last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
|
||||||
|
size - pull_len, truesize - pull_len);
|
||||||
|
|
||||||
/* avoid re-using remote pages,flag default unreuse */
|
/* avoid re-using remote pages,flag default unreuse */
|
||||||
if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) {
|
if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) {
|
||||||
|
#if (PAGE_SIZE < 8192)
|
||||||
|
if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
|
||||||
|
/* if we are only owner of page we can reuse it */
|
||||||
|
if (likely(page_count(desc_cb->priv) == 1)) {
|
||||||
|
/* flip page offset to other buffer */
|
||||||
|
desc_cb->page_offset ^= truesize;
|
||||||
|
|
||||||
|
desc_cb->reuse_flag = 1;
|
||||||
|
/* bump ref count on page before it is given*/
|
||||||
|
get_page(desc_cb->priv);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
/* move offset up to the next cache line */
|
/* move offset up to the next cache line */
|
||||||
desc_cb->page_offset += tsize;
|
desc_cb->page_offset += truesize;
|
||||||
|
|
||||||
if (desc_cb->page_offset <= last_offset) {
|
if (desc_cb->page_offset <= last_offset) {
|
||||||
desc_cb->reuse_flag = 1;
|
desc_cb->reuse_flag = 1;
|
||||||
|
@ -529,11 +568,10 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
|
||||||
struct hnae_desc *desc;
|
struct hnae_desc *desc;
|
||||||
struct hnae_desc_cb *desc_cb;
|
struct hnae_desc_cb *desc_cb;
|
||||||
unsigned char *va;
|
unsigned char *va;
|
||||||
int bnum, length, size, i, truesize, last_offset;
|
int bnum, length, i;
|
||||||
int pull_len;
|
int pull_len;
|
||||||
u32 bnum_flag;
|
u32 bnum_flag;
|
||||||
|
|
||||||
last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
|
|
||||||
desc = &ring->desc[ring->next_to_clean];
|
desc = &ring->desc[ring->next_to_clean];
|
||||||
desc_cb = &ring->desc_cb[ring->next_to_clean];
|
desc_cb = &ring->desc_cb[ring->next_to_clean];
|
||||||
|
|
||||||
|
@ -555,17 +593,12 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prefetchw(skb->data);
|
||||||
length = le16_to_cpu(desc->rx.pkt_len);
|
length = le16_to_cpu(desc->rx.pkt_len);
|
||||||
bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag);
|
bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag);
|
||||||
priv->ops.get_rxd_bnum(bnum_flag, &bnum);
|
priv->ops.get_rxd_bnum(bnum_flag, &bnum);
|
||||||
*out_bnum = bnum;
|
*out_bnum = bnum;
|
||||||
|
|
||||||
/* we will be copying header into skb->data in
|
|
||||||
* pskb_may_pull so it is in our interest to prefetch
|
|
||||||
* it now to avoid a possible cache miss
|
|
||||||
*/
|
|
||||||
prefetchw(skb->data);
|
|
||||||
|
|
||||||
if (length <= HNS_RX_HEAD_SIZE) {
|
if (length <= HNS_RX_HEAD_SIZE) {
|
||||||
memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
|
memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
|
||||||
|
|
||||||
|
@ -588,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
|
||||||
memcpy(__skb_put(skb, pull_len), va,
|
memcpy(__skb_put(skb, pull_len), va,
|
||||||
ALIGN(pull_len, sizeof(long)));
|
ALIGN(pull_len, sizeof(long)));
|
||||||
|
|
||||||
size = le16_to_cpu(desc->rx.size);
|
hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
|
||||||
truesize = ALIGN(size, L1_CACHE_BYTES);
|
|
||||||
skb_add_rx_frag(skb, 0, desc_cb->priv,
|
|
||||||
desc_cb->page_offset + pull_len,
|
|
||||||
size - pull_len, truesize - pull_len);
|
|
||||||
|
|
||||||
hns_nic_reuse_page(desc_cb, truesize, last_offset);
|
|
||||||
ring_ptr_move_fw(ring, next_to_clean);
|
ring_ptr_move_fw(ring, next_to_clean);
|
||||||
|
|
||||||
if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/
|
if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/
|
||||||
|
@ -604,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
|
||||||
for (i = 1; i < bnum; i++) {
|
for (i = 1; i < bnum; i++) {
|
||||||
desc = &ring->desc[ring->next_to_clean];
|
desc = &ring->desc[ring->next_to_clean];
|
||||||
desc_cb = &ring->desc_cb[ring->next_to_clean];
|
desc_cb = &ring->desc_cb[ring->next_to_clean];
|
||||||
size = le16_to_cpu(desc->rx.size);
|
|
||||||
truesize = ALIGN(size, L1_CACHE_BYTES);
|
|
||||||
skb_add_rx_frag(skb, i, desc_cb->priv,
|
|
||||||
desc_cb->page_offset,
|
|
||||||
size, truesize);
|
|
||||||
|
|
||||||
hns_nic_reuse_page(desc_cb, truesize, last_offset);
|
hns_nic_reuse_page(skb, i, ring, 0, desc_cb);
|
||||||
ring_ptr_move_fw(ring, next_to_clean);
|
ring_ptr_move_fw(ring, next_to_clean);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -750,9 +772,10 @@ static int hns_nic_rx_poll_one(struct hns_nic_ring_data *ring_data,
|
||||||
/* make all data has been write before submit */
|
/* make all data has been write before submit */
|
||||||
if (recv_pkts < budget) {
|
if (recv_pkts < budget) {
|
||||||
ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
|
ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
|
||||||
rmb(); /*complete read rx ring bd number*/
|
|
||||||
if (ex_num > clean_count) {
|
if (ex_num > clean_count) {
|
||||||
num += ex_num - clean_count;
|
num += ex_num - clean_count;
|
||||||
|
rmb(); /*complete read rx ring bd number*/
|
||||||
goto recv;
|
goto recv;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -849,8 +872,11 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
|
||||||
|
|
||||||
bytes = 0;
|
bytes = 0;
|
||||||
pkts = 0;
|
pkts = 0;
|
||||||
while (head != ring->next_to_clean)
|
while (head != ring->next_to_clean) {
|
||||||
hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
|
hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
|
||||||
|
/* issue prefetch for next Tx descriptor */
|
||||||
|
prefetch(&ring->desc_cb[ring->next_to_clean]);
|
||||||
|
}
|
||||||
|
|
||||||
NETIF_TX_UNLOCK(ndev);
|
NETIF_TX_UNLOCK(ndev);
|
||||||
|
|
||||||
|
@ -926,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget)
|
||||||
ring_data->ring, 0);
|
ring_data->ring, 0);
|
||||||
|
|
||||||
ring_data->fini_process(ring_data);
|
ring_data->fini_process(ring_data);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return clean_complete;
|
return clean_complete;
|
||||||
|
|
Loading…
Reference in New Issue