diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 8629a9f77e42..23fd11243583 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -245,16 +245,6 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; - /* check to see if there are any non-cache aligned descriptors - * waiting to be written back, and kick the hardware to force - * them to be written back in case of napi polling - */ - if (budget && - !((i & WB_STRIDE) == WB_STRIDE) && - !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), total_packets, total_bytes); @@ -1770,6 +1760,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; + u16 desc_count = 0; + bool tail_bump = true; + bool do_rs = false; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -1810,6 +1803,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1829,6 +1824,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1843,35 +1840,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* Place RS bit on last descriptor of any packet that spans across the - * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. - */ #define WB_STRIDE 0x3 - if (((i & WB_STRIDE) != WB_STRIDE) && - (first <= &tx_ring->tx_bi[i]) && - (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << - I40E_TXD_QW1_CMD_SHIFT); - } else { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TXD_CMD << - I40E_TXD_QW1_CMD_SHIFT); - } - - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; @@ -1881,15 +1850,78 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + first->bytecount); i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* Algorithm to optimize tail and RS bit setting: + * if xmit_more is supported + * if xmit_more is true + * do not update tail and do not mark RS bit. + * if xmit_more is false and last xmit_more was false + * if every packet spanned less than 4 desc + * then set RS bit on 4th packet and update tail + * on every packet + * else + * update tail and set RS bit on every packet. + * if xmit_more is false and last_xmit_more was true + * update tail and set RS bit. + * else (kernel < 3.18) + * if every packet spanned less than 4 desc + * then set RS bit on 4th packet and update tail + * on every packet + * else + * set RS bit on EOP for every packet and update tail + * + * Optimization: wmb to be issued only in case of tail update. + * Also optimize the Descriptor WB path for RS bit with the same + * algorithm. + * + * Note: If there are less than 4 packets + * pending and interrupts were disabled the service task will + * trigger a force WB. + */ + if (skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index))) { + tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + tail_bump = false; + } else if (!skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index)) && + (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && + (tx_ring->packet_stride < WB_STRIDE) && + (desc_count < WB_STRIDE)) { + tx_ring->packet_stride++; + } else { + tx_ring->packet_stride = 0; + tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + do_rs = true; + } + if (do_rs) + tx_ring->packet_stride = 0; + + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : + I40E_TX_DESC_CMD_EOP) << + I40E_TXD_QW1_CMD_SHIFT); + /* notify HW of packet */ - if (!skb->xmit_more || - netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) - writel(i, tx_ring->tail); - else + if (!tail_bump) prefetchw(tx_desc + 1); + if (tail_bump) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, tx_ring->tail); + } + return; dma_error: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 997b374e86af..929ddd9f8215 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -268,6 +268,8 @@ struct i40e_ring { bool ring_active; /* is ring online or not */ bool arm_wb; /* do something to arm write back */ + u8 packet_stride; +#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)