mirror of https://gitee.com/openkylin/linux.git
i40e/i40evf: enable hardware feature head write back
The hardware supports a feature to avoid updating the descriptor ring by marking each descriptor with a DD bit, and instead writes a memory location with an update to where the driver should clean up to. Enable this feature. Change-ID: I5da4e0681f0b581a6401c950a81808792267fe57 Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Signed-off-by: Mitch Williams <mitch.a.williams@intel.com> Signed-off-by: Catherine Sullivan <catherine.sullivan@intel.com> Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
This commit is contained in:
parent
6c167f582e
commit
1943d8ba95
|
@ -2181,6 +2181,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
|
|||
tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
|
||||
I40E_FLAG_FD_ATR_ENABLED));
|
||||
tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
|
||||
/* FDIR VSI tx ring can still use RS bit and writebacks */
|
||||
if (vsi->type != I40E_VSI_FDIR)
|
||||
tx_ctx.head_wb_ena = 1;
|
||||
tx_ctx.head_wb_addr = ring->dma +
|
||||
(ring->count * sizeof(struct i40e_tx_desc));
|
||||
|
||||
/* As part of VSI creation/update, FW allocates certain
|
||||
* Tx arbitration queue sets for each TC enabled for
|
||||
|
|
|
@ -618,6 +618,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* i40e_get_head - Retrieve head from head writeback
|
||||
* @tx_ring: tx ring to fetch head of
|
||||
*
|
||||
* Returns value of Tx ring head based on value stored
|
||||
* in head write-back location
|
||||
**/
|
||||
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
|
||||
{
|
||||
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
|
||||
|
||||
return le32_to_cpu(*(volatile __le32 *)head);
|
||||
}
|
||||
|
||||
/**
|
||||
* i40e_clean_tx_irq - Reclaim resources after transmit completes
|
||||
* @tx_ring: tx ring to clean
|
||||
|
@ -629,6 +643,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
|||
{
|
||||
u16 i = tx_ring->next_to_clean;
|
||||
struct i40e_tx_buffer *tx_buf;
|
||||
struct i40e_tx_desc *tx_head;
|
||||
struct i40e_tx_desc *tx_desc;
|
||||
unsigned int total_packets = 0;
|
||||
unsigned int total_bytes = 0;
|
||||
|
@ -637,6 +652,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
|||
tx_desc = I40E_TX_DESC(tx_ring, i);
|
||||
i -= tx_ring->count;
|
||||
|
||||
tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
|
||||
|
||||
do {
|
||||
struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
|
||||
|
||||
|
@ -647,9 +664,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
|||
/* prevent any other reads prior to eop_desc */
|
||||
read_barrier_depends();
|
||||
|
||||
/* if the descriptor isn't done, no work yet to do */
|
||||
if (!(eop_desc->cmd_type_offset_bsz &
|
||||
cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
|
||||
/* we have caught up to head, no work left to do */
|
||||
if (tx_head == tx_desc)
|
||||
break;
|
||||
|
||||
/* clear next_to_watch to prevent false hangs */
|
||||
|
@ -905,6 +921,10 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
|
|||
|
||||
/* round up to nearest 4K */
|
||||
tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
|
||||
/* add u32 for head writeback, align after this takes care of
|
||||
* guaranteeing this is at least one cache line in size
|
||||
*/
|
||||
tx_ring->size += sizeof(u32);
|
||||
tx_ring->size = ALIGN(tx_ring->size, 4096);
|
||||
tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
|
||||
&tx_ring->dma, GFP_KERNEL);
|
||||
|
@ -2042,9 +2062,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
|
|||
tx_bi = &tx_ring->tx_bi[i];
|
||||
}
|
||||
|
||||
tx_desc->cmd_type_offset_bsz =
|
||||
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||
cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
|
||||
/* Place RS bit on last descriptor of any packet that spans across the
|
||||
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
|
||||
*/
|
||||
#define WB_STRIDE 0x3
|
||||
if (((i & WB_STRIDE) != WB_STRIDE) &&
|
||||
(first <= &tx_ring->tx_bi[i]) &&
|
||||
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
|
||||
tx_desc->cmd_type_offset_bsz =
|
||||
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||
cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
|
||||
I40E_TXD_QW1_CMD_SHIFT);
|
||||
} else {
|
||||
tx_desc->cmd_type_offset_bsz =
|
||||
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||
cpu_to_le64((u64)I40E_TXD_CMD <<
|
||||
I40E_TXD_QW1_CMD_SHIFT);
|
||||
}
|
||||
|
||||
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
|
||||
tx_ring->queue_index),
|
||||
|
|
|
@ -230,6 +230,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx,
|
|||
tx_ctx.qlen = info->ring_len;
|
||||
tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]);
|
||||
tx_ctx.rdylist_act = 0;
|
||||
tx_ctx.head_wb_ena = 1;
|
||||
tx_ctx.head_wb_addr = info->dma_ring_addr +
|
||||
(info->ring_len * sizeof(struct i40e_tx_desc));
|
||||
|
||||
/* clear the context in the HMC */
|
||||
ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);
|
||||
|
|
|
@ -169,6 +169,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* i40e_get_head - Retrieve head from head writeback
|
||||
* @tx_ring: tx ring to fetch head of
|
||||
*
|
||||
* Returns value of Tx ring head based on value stored
|
||||
* in head write-back location
|
||||
**/
|
||||
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
|
||||
{
|
||||
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
|
||||
|
||||
return le32_to_cpu(*(volatile __le32 *)head);
|
||||
}
|
||||
|
||||
/**
|
||||
* i40e_clean_tx_irq - Reclaim resources after transmit completes
|
||||
* @tx_ring: tx ring to clean
|
||||
|
@ -180,6 +194,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
|||
{
|
||||
u16 i = tx_ring->next_to_clean;
|
||||
struct i40e_tx_buffer *tx_buf;
|
||||
struct i40e_tx_desc *tx_head;
|
||||
struct i40e_tx_desc *tx_desc;
|
||||
unsigned int total_packets = 0;
|
||||
unsigned int total_bytes = 0;
|
||||
|
@ -188,6 +203,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
|||
tx_desc = I40E_TX_DESC(tx_ring, i);
|
||||
i -= tx_ring->count;
|
||||
|
||||
tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
|
||||
|
||||
do {
|
||||
struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
|
||||
|
||||
|
@ -198,9 +215,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
|||
/* prevent any other reads prior to eop_desc */
|
||||
read_barrier_depends();
|
||||
|
||||
/* if the descriptor isn't done, no work yet to do */
|
||||
if (!(eop_desc->cmd_type_offset_bsz &
|
||||
cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
|
||||
/* we have caught up to head, no work left to do */
|
||||
if (tx_head == tx_desc)
|
||||
break;
|
||||
|
||||
/* clear next_to_watch to prevent false hangs */
|
||||
|
@ -432,6 +448,10 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
|
|||
|
||||
/* round up to nearest 4K */
|
||||
tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
|
||||
/* add u32 for head writeback, align after this takes care of
|
||||
* guaranteeing this is at least one cache line in size
|
||||
*/
|
||||
tx_ring->size += sizeof(u32);
|
||||
tx_ring->size = ALIGN(tx_ring->size, 4096);
|
||||
tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
|
||||
&tx_ring->dma, GFP_KERNEL);
|
||||
|
@ -1377,9 +1397,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
|
|||
tx_bi = &tx_ring->tx_bi[i];
|
||||
}
|
||||
|
||||
tx_desc->cmd_type_offset_bsz =
|
||||
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||
cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
|
||||
/* Place RS bit on last descriptor of any packet that spans across the
|
||||
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
|
||||
*/
|
||||
#define WB_STRIDE 0x3
|
||||
if (((i & WB_STRIDE) != WB_STRIDE) &&
|
||||
(first <= &tx_ring->tx_bi[i]) &&
|
||||
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
|
||||
tx_desc->cmd_type_offset_bsz =
|
||||
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||
cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
|
||||
I40E_TXD_QW1_CMD_SHIFT);
|
||||
} else {
|
||||
tx_desc->cmd_type_offset_bsz =
|
||||
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||
cpu_to_le64((u64)I40E_TXD_CMD <<
|
||||
I40E_TXD_QW1_CMD_SHIFT);
|
||||
}
|
||||
|
||||
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
|
||||
tx_ring->queue_index),
|
||||
|
|
Loading…
Reference in New Issue