diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index e7462793d48d..c64d18d4cb2d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -243,7 +243,6 @@ struct i40e_pf { struct pci_dev *pdev; struct i40e_hw hw; unsigned long state; - unsigned long link_check_timeout; struct msix_entry *msix_entries; bool fc_autoneg_status; @@ -667,7 +666,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, bool is_vf, bool is_netdev); void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan, bool is_vf, bool is_netdev); -int i40e_sync_vsi_filters(struct i40e_vsi *vsi); +int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl); struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, u16 uplink, u32 param1); int i40e_vsi_release(struct i40e_vsi *vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index d7c15d17faa6..508efb034e87 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1146,7 +1146,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, } f = i40e_add_filter(vsi, ma, vlan, false, false); - ret = i40e_sync_vsi_filters(vsi); + ret = i40e_sync_vsi_filters(vsi, true); if (f && !ret) dev_info(&pf->pdev->dev, "add macaddr: %pM vlan=%d added to VSI %d\n", @@ -1183,7 +1183,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, } i40e_del_filter(vsi, ma, vlan, false, false); - ret = i40e_sync_vsi_filters(vsi); + ret = i40e_sync_vsi_filters(vsi, true); if (!ret) dev_info(&pf->pdev->dev, "del macaddr: %pM vlan=%d removed from VSI %d\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 530d8b6739f9..52e58f304b21 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -299,25 +299,69 @@ static void i40e_tx_timeout(struct net_device *netdev) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + struct i40e_ring *tx_ring = NULL; + unsigned int i, hung_queue = 0; + u32 head, val; pf->tx_timeout_count++; + /* find the stopped queue the same way the stack does */ + for (i = 0; i < netdev->num_tx_queues; i++) { + struct netdev_queue *q; + unsigned long trans_start; + + q = netdev_get_tx_queue(netdev, i); + trans_start = q->trans_start ? : netdev->trans_start; + if (netif_xmit_stopped(q) && + time_after(jiffies, + (trans_start + netdev->watchdog_timeo))) { + hung_queue = i; + break; + } + } + + if (i == netdev->num_tx_queues) { + netdev_info(netdev, "tx_timeout: no netdev hung queue found\n"); + } else { + /* now that we have an index, find the tx_ring struct */ + for (i = 0; i < vsi->num_queue_pairs; i++) { + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { + if (hung_queue == + vsi->tx_rings[i]->queue_index) { + tx_ring = vsi->tx_rings[i]; + break; + } + } + } + } + if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ*20))) - pf->tx_timeout_recovery_level = 1; + pf->tx_timeout_recovery_level = 1; /* reset after some time */ + else if (time_before(jiffies, + (pf->tx_timeout_last_recovery + netdev->watchdog_timeo))) + return; /* don't do any new action before the next timeout */ + + if (tx_ring) { + head = i40e_get_head(tx_ring); + /* Read interrupt register */ + if (pf->flags & I40E_FLAG_MSIX_ENABLED) + val = rd32(&pf->hw, + I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx + + tx_ring->vsi->base_vector - 1)); + else + val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); + + netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n", + vsi->seid, hung_queue, tx_ring->next_to_clean, + head, tx_ring->next_to_use, + readl(tx_ring->tail), val); + } + pf->tx_timeout_last_recovery = jiffies; - netdev_info(netdev, "tx_timeout recovery level %d\n", - pf->tx_timeout_recovery_level); + netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n", + pf->tx_timeout_recovery_level, hung_queue); switch (pf->tx_timeout_recovery_level) { - case 0: - /* disable and re-enable queues for the VSI */ - if (in_interrupt()) { - set_bit(__I40E_REINIT_REQUESTED, &pf->state); - set_bit(__I40E_REINIT_REQUESTED, &vsi->state); - } else { - i40e_vsi_reinit_locked(vsi); - } - break; case 1: set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); break; @@ -329,10 +373,9 @@ static void i40e_tx_timeout(struct net_device *netdev) break; default: netdev_err(netdev, "tx_timeout recovery unsuccessful\n"); - set_bit(__I40E_DOWN_REQUESTED, &pf->state); - set_bit(__I40E_DOWN_REQUESTED, &vsi->state); break; } + i40e_service_event_schedule(pf); pf->tx_timeout_recovery_level++; } @@ -754,7 +797,6 @@ static void i40e_update_link_xoff_rx(struct i40e_pf *pf) struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw *hw = &pf->hw; u64 xoff = 0; - u16 i, v; if ((hw->fc.current_mode != I40E_FC_FULL) && (hw->fc.current_mode != I40E_FC_RX_PAUSE)) @@ -769,18 +811,6 @@ static void i40e_update_link_xoff_rx(struct i40e_pf *pf) if (!(nsd->link_xoff_rx - xoff)) return; - /* Clear the __I40E_HANG_CHECK_ARMED bit for all Tx rings */ - for (v = 0; v < pf->num_alloc_vsi; v++) { - struct i40e_vsi *vsi = pf->vsi[v]; - - if (!vsi || !vsi->tx_rings[0]) - continue; - - for (i = 0; i < vsi->num_queue_pairs; i++) { - struct i40e_ring *ring = vsi->tx_rings[i]; - clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state); - } - } } /** @@ -796,7 +826,7 @@ static void i40e_update_prio_xoff_rx(struct i40e_pf *pf) bool xoff[I40E_MAX_TRAFFIC_CLASS] = {false}; struct i40e_dcbx_config *dcb_cfg; struct i40e_hw *hw = &pf->hw; - u16 i, v; + u16 i; u8 tc; dcb_cfg = &hw->local_dcbx_config; @@ -821,23 +851,6 @@ static void i40e_update_prio_xoff_rx(struct i40e_pf *pf) tc = dcb_cfg->etscfg.prioritytable[i]; xoff[tc] = true; } - - /* Clear the __I40E_HANG_CHECK_ARMED bit for Tx rings */ - for (v = 0; v < pf->num_alloc_vsi; v++) { - struct i40e_vsi *vsi = pf->vsi[v]; - - if (!vsi || !vsi->tx_rings[0]) - continue; - - for (i = 0; i < vsi->num_queue_pairs; i++) { - struct i40e_ring *ring = vsi->tx_rings[i]; - - tc = ring->dcb_tc; - if (xoff[tc]) - clear_bit(__I40E_HANG_CHECK_ARMED, - &ring->state); - } - } } /** @@ -1514,7 +1527,7 @@ static int i40e_set_mac(struct net_device *netdev, void *p) f->is_laa = true; } - i40e_sync_vsi_filters(vsi); + i40e_sync_vsi_filters(vsi, false); ether_addr_copy(netdev->dev_addr, addr->sa_data); return 0; @@ -1751,12 +1764,13 @@ static void i40e_set_rx_mode(struct net_device *netdev) /** * i40e_sync_vsi_filters - Update the VSI filter list to the HW * @vsi: ptr to the VSI + * @grab_rtnl: whether RTNL needs to be grabbed * * Push any outstanding VSI filter changes through the AdminQ. * * Returns 0 or error value **/ -int i40e_sync_vsi_filters(struct i40e_vsi *vsi) +int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) { struct i40e_mac_filter *f, *ftmp; bool promisc_forced_on = false; @@ -1945,7 +1959,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) */ if (pf->cur_promisc != cur_promisc) { pf->cur_promisc = cur_promisc; - i40e_do_reset_safe(pf, + if (grab_rtnl) + i40e_do_reset_safe(pf, + BIT(__I40E_PF_RESET_REQUESTED)); + else + i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED)); } } else { @@ -1996,7 +2014,7 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v] && (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) - i40e_sync_vsi_filters(pf->vsi[v]); + i40e_sync_vsi_filters(pf->vsi[v], true); } } @@ -2203,7 +2221,7 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) return 0; - return i40e_sync_vsi_filters(vsi); + return i40e_sync_vsi_filters(vsi, false); } /** @@ -2275,7 +2293,7 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) return 0; - return i40e_sync_vsi_filters(vsi); + return i40e_sync_vsi_filters(vsi, false); } /** @@ -2609,8 +2627,6 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl); i40e_flush(hw); - clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state); - /* cache tail off for easier writes later */ ring->tail = hw->hw_addr + I40E_QTX_TAIL(pf_q); @@ -4145,6 +4161,108 @@ static int i40e_pf_wait_txq_disabled(struct i40e_pf *pf) } #endif + +/** + * i40e_detect_recover_hung_queue - Function to detect and recover hung_queue + * @q_idx: TX queue number + * @vsi: Pointer to VSI struct + * + * This function checks specified queue for given VSI. Detects hung condition. + * Sets hung bit since it is two step process. Before next run of service task + * if napi_poll runs, it reset 'hung' bit for respective q_vector. If not, + * hung condition remain unchanged and during subsequent run, this function + * issues SW interrupt to recover from hung condition. + **/ +static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) +{ + struct i40e_ring *tx_ring = NULL; + struct i40e_pf *pf; + u32 head, val, tx_pending; + int i; + + pf = vsi->back; + + /* now that we have an index, find the tx_ring struct */ + for (i = 0; i < vsi->num_queue_pairs; i++) { + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { + if (q_idx == vsi->tx_rings[i]->queue_index) { + tx_ring = vsi->tx_rings[i]; + break; + } + } + } + + if (!tx_ring) + return; + + /* Read interrupt register */ + if (pf->flags & I40E_FLAG_MSIX_ENABLED) + val = rd32(&pf->hw, + I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx + + tx_ring->vsi->base_vector - 1)); + else + val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); + + head = i40e_get_head(tx_ring); + + tx_pending = i40e_get_tx_pending(tx_ring); + + /* Interrupts are disabled and TX pending is non-zero, + * trigger the SW interrupt (don't wait). Worst case + * there will be one extra interrupt which may result + * into not cleaning any queues because queues are cleaned. + */ + if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) + i40e_force_wb(vsi, tx_ring->q_vector); +} + +/** + * i40e_detect_recover_hung - Function to detect and recover hung_queues + * @pf: pointer to PF struct + * + * LAN VSI has netdev and netdev has TX queues. This function is to check + * each of those TX queues if they are hung, trigger recovery by issuing + * SW interrupt. + **/ +static void i40e_detect_recover_hung(struct i40e_pf *pf) +{ + struct net_device *netdev; + struct i40e_vsi *vsi; + int i; + + /* Only for LAN VSI */ + vsi = pf->vsi[pf->lan_vsi]; + + if (!vsi) + return; + + /* Make sure, VSI state is not DOWN/RECOVERY_PENDING */ + if (test_bit(__I40E_DOWN, &vsi->back->state) || + test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) + return; + + /* Make sure type is MAIN VSI */ + if (vsi->type != I40E_VSI_MAIN) + return; + + netdev = vsi->netdev; + if (!netdev) + return; + + /* Bail out if netif_carrier is not OK */ + if (!netif_carrier_ok(netdev)) + return; + + /* Go thru' TX queues for netdev */ + for (i = 0; i < netdev->num_tx_queues; i++) { + struct netdev_queue *q; + + q = netdev_get_tx_queue(netdev, i); + if (q) + i40e_detect_recover_hung_queue(i, vsi); + } +} + /** * i40e_get_iscsi_tc_map - Return TC map for iSCSI APP * @pf: pointer to PF @@ -5758,68 +5876,6 @@ static void i40e_link_event(struct i40e_pf *pf) i40e_ptp_set_increment(pf); } -/** - * i40e_check_hang_subtask - Check for hung queues and dropped interrupts - * @pf: board private structure - * - * Set the per-queue flags to request a check for stuck queues in the irq - * clean functions, then force interrupts to be sure the irq clean is called. - **/ -static void i40e_check_hang_subtask(struct i40e_pf *pf) -{ - int i, v; - - /* If we're down or resetting, just bail */ - if (test_bit(__I40E_DOWN, &pf->state) || - test_bit(__I40E_CONFIG_BUSY, &pf->state)) - return; - - /* for each VSI/netdev - * for each Tx queue - * set the check flag - * for each q_vector - * force an interrupt - */ - for (v = 0; v < pf->num_alloc_vsi; v++) { - struct i40e_vsi *vsi = pf->vsi[v]; - int armed = 0; - - if (!pf->vsi[v] || - test_bit(__I40E_DOWN, &vsi->state) || - (vsi->netdev && !netif_carrier_ok(vsi->netdev))) - continue; - - for (i = 0; i < vsi->num_queue_pairs; i++) { - set_check_for_tx_hang(vsi->tx_rings[i]); - if (test_bit(__I40E_HANG_CHECK_ARMED, - &vsi->tx_rings[i]->state)) - armed++; - } - - if (armed) { - if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) { - wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, - (I40E_PFINT_DYN_CTL0_INTENA_MASK | - I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK | - I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | - I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK | - I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK)); - } else { - u16 vec = vsi->base_vector - 1; - u32 val = (I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | - I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | - I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK | - I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK); - for (i = 0; i < vsi->num_q_vectors; i++, vec++) - wr32(&vsi->back->hw, - I40E_PFINT_DYN_CTLN(vec), val); - } - i40e_flush(&vsi->back->hw); - } - } -} - /** * i40e_watchdog_subtask - periodic checks not using event driven response * @pf: board private structure @@ -5839,7 +5895,6 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf) return; pf->service_timer_previous = jiffies; - i40e_check_hang_subtask(pf); i40e_link_event(pf); /* Update the stats for active netdevs so the network stack @@ -6807,6 +6862,7 @@ static void i40e_service_task(struct work_struct *work) return; } + i40e_detect_recover_hung(pf); i40e_reset_subtask(pf); i40e_handle_mdd_event(pf); i40e_vc_process_vflr_event(pf); @@ -8770,7 +8826,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) i40e_del_filter(vsi, f->macaddr, f->vlan, f->is_vf, f->is_netdev); - i40e_sync_vsi_filters(vsi); + i40e_sync_vsi_filters(vsi, false); i40e_vsi_delete(vsi); i40e_vsi_free_q_vectors(vsi); @@ -10101,7 +10157,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&pf->service_task, i40e_service_task); clear_bit(__I40E_SERVICE_SCHED, &pf->state); pf->flags |= I40E_FLAG_NEED_LINK_UPDATE; - pf->link_check_timeout = jiffies; /* WoL defaults to disabled */ pf->wol_en = false; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 738aca68f665..3ce4900c0c43 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -600,20 +600,6 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) } } -/** - * i40e_get_head - Retrieve head from head writeback - * @tx_ring: tx ring to fetch head of - * - * Returns value of Tx ring head based on value stored - * in head write-back location - **/ -static inline u32 i40e_get_head(struct i40e_ring *tx_ring) -{ - void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; - - return le32_to_cpu(*(volatile __le32 *)head); -} - /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors @@ -621,7 +607,7 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -static u32 i40e_get_tx_pending(struct i40e_ring *ring) +u32 i40e_get_tx_pending(struct i40e_ring *ring) { u32 head, tail; @@ -635,50 +621,6 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring) return 0; } -/** - * i40e_check_tx_hang - Is there a hang in the Tx queue - * @tx_ring: the ring of descriptors - **/ -static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) -{ - u32 tx_done = tx_ring->stats.packets; - u32 tx_done_old = tx_ring->tx_stats.tx_done_old; - u32 tx_pending = i40e_get_tx_pending(tx_ring); - struct i40e_pf *pf = tx_ring->vsi->back; - bool ret = false; - - clear_check_for_tx_hang(tx_ring); - - /* Check for a hung queue, but be thorough. This verifies - * that a transmit has been completed since the previous - * check AND there is at least one packet pending. The - * ARMED bit is set to indicate a potential hang. The - * bit is cleared if a pause frame is received to remove - * false hang detection due to PFC or 802.3x frames. By - * requiring this to fail twice we avoid races with - * PFC clearing the ARMED bit and conditions where we - * run the check_tx_hang logic with a transmit completion - * pending but without time to complete it yet. - */ - if ((tx_done_old == tx_done) && tx_pending) { - /* make sure it is true for two checks in a row */ - ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, - &tx_ring->state); - } else if (tx_done_old == tx_done && - (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) { - if (I40E_DEBUG_FLOW & pf->hw.debug_mask) - dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d", - tx_pending, tx_ring->queue_index); - pf->tx_sluggish_count++; - } else { - /* update completed stats and disarm the hang check */ - tx_ring->tx_stats.tx_done_old = tx_done; - clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); - } - - return ret; -} - #define WB_STRIDE 0x3 /** @@ -784,42 +726,21 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; - /* check to see if there are any non-cache aligned descriptors - * waiting to be written back, and kick the hardware to force - * them to be written back in case of napi polling - */ - if (budget && - !((i & WB_STRIDE) == WB_STRIDE) && - !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; - else - tx_ring->arm_wb = false; + if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { + unsigned int j = 0; - if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { - /* schedule immediate reset if we believe we hung */ - dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" - " VSI <%d>\n" - " Tx Queue <%d>\n" - " next_to_use <%x>\n" - " next_to_clean <%x>\n", - tx_ring->vsi->seid, - tx_ring->queue_index, - tx_ring->next_to_use, i); - - netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); - - dev_info(tx_ring->dev, - "tx hang detected on queue %d, reset requested\n", - tx_ring->queue_index); - - /* do not fire the reset immediately, wait for the stack to - * decide we are truly stuck, also prevents every queue from - * simultaneously requesting a reset + /* check to see if there are < 4 descriptors + * waiting to be written back, then kick the hardware to force + * them to be written back in case we stay in NAPI. + * In this mode on X722 we do not enable Interrupt. */ + j = i40e_get_tx_pending(tx_ring); - /* the adapter is about to reset, no point in enabling polling */ - budget = 1; + if (budget && + ((j / (WB_STRIDE + 1)) == 0) && (j != 0) && + !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && + (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) + tx_ring->arm_wb = true; } netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, @@ -851,7 +772,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) * @q_vector: the vector on which to force writeback * **/ -static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { u16 flags = q_vector->tx.ring[0].flags; @@ -2324,6 +2245,9 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; break; + case IPPROTO_GRE: + l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING; + break; default: return; } @@ -2581,6 +2505,9 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; + u16 desc_count = 0; + bool tail_bump = true; + bool do_rs = false; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -2621,6 +2548,8 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -2640,6 +2569,8 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -2654,34 +2585,6 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* Place RS bit on last descriptor of any packet that spans across the - * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. - */ - if (((i & WB_STRIDE) != WB_STRIDE) && - (first <= &tx_ring->tx_bi[i]) && - (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << - I40E_TXD_QW1_CMD_SHIFT); - } else { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TXD_CMD << - I40E_TXD_QW1_CMD_SHIFT); - } - - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; @@ -2691,15 +2594,72 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* Algorithm to optimize tail and RS bit setting: + * if xmit_more is supported + * if xmit_more is true + * do not update tail and do not mark RS bit. + * if xmit_more is false and last xmit_more was false + * if every packet spanned less than 4 desc + * then set RS bit on 4th packet and update tail + * on every packet + * else + * update tail and set RS bit on every packet. + * if xmit_more is false and last_xmit_more was true + * update tail and set RS bit. + * + * Optimization: wmb to be issued only in case of tail update. + * Also optimize the Descriptor WB path for RS bit with the same + * algorithm. + * + * Note: If there are less than 4 packets + * pending and interrupts were disabled the service task will + * trigger a force WB. + */ + if (skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index))) { + tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + tail_bump = false; + } else if (!skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index)) && + (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && + (tx_ring->packet_stride < WB_STRIDE) && + (desc_count < WB_STRIDE)) { + tx_ring->packet_stride++; + } else { + tx_ring->packet_stride = 0; + tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + do_rs = true; + } + if (do_rs) + tx_ring->packet_stride = 0; + + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : + I40E_TX_DESC_CMD_EOP) << + I40E_TXD_QW1_CMD_SHIFT); + /* notify HW of packet */ - if (!skb->xmit_more || - netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) - writel(i, tx_ring->tail); - else + if (!tail_bump) prefetchw(tx_desc + 1); + if (tail_bump) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, tx_ring->tail); + } + return; dma_error: diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index f1385a1989fa..a3978c2b5fc9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -199,8 +199,6 @@ struct i40e_rx_queue_stats { enum i40e_ring_state_t { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, - __I40E_TX_DETECT_HANG, - __I40E_HANG_CHECK_ARMED, __I40E_RX_PS_ENABLED, __I40E_RX_16BYTE_DESC_ENABLED, }; @@ -211,12 +209,6 @@ enum i40e_ring_state_t { set_bit(__I40E_RX_PS_ENABLED, &(ring)->state) #define clear_ring_ps_enabled(ring) \ clear_bit(__I40E_RX_PS_ENABLED, &(ring)->state) -#define check_for_tx_hang(ring) \ - test_bit(__I40E_TX_DETECT_HANG, &(ring)->state) -#define set_check_for_tx_hang(ring) \ - set_bit(__I40E_TX_DETECT_HANG, &(ring)->state) -#define clear_check_for_tx_hang(ring) \ - clear_bit(__I40E_TX_DETECT_HANG, &(ring)->state) #define ring_is_16byte_desc_enabled(ring) \ test_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state) #define set_ring_16byte_desc_enabled(ring) \ @@ -264,10 +256,12 @@ struct i40e_ring { bool ring_active; /* is ring online or not */ bool arm_wb; /* do something to arm write back */ + u8 packet_stride; u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) #define I40E_TXR_FLAGS_OUTER_UDP_CSUM BIT(1) +#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) /* stats structs */ struct i40e_queue_stats stats; @@ -326,4 +320,20 @@ int i40e_xmit_descriptor_count(struct sk_buff *skb, struct i40e_ring *tx_ring); int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, struct i40e_ring *tx_ring, u32 *flags); #endif +void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); +u32 i40e_get_tx_pending(struct i40e_ring *ring); + +/** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index d99c116032f3..eacce9389962 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -561,7 +561,7 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) } /* program mac filter */ - ret = i40e_sync_vsi_filters(vsi); + ret = i40e_sync_vsi_filters(vsi, false); if (ret) dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); @@ -1605,7 +1605,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) } /* program the updated filter list */ - if (i40e_sync_vsi_filters(vsi)) + if (i40e_sync_vsi_filters(vsi, false)) dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n"); error_param: @@ -1656,7 +1656,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) I40E_VLAN_ANY, true, false); /* program the updated filter list */ - if (i40e_sync_vsi_filters(vsi)) + if (i40e_sync_vsi_filters(vsi, false)) dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n"); error_param: @@ -2062,7 +2062,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id); /* program mac filter */ - if (i40e_sync_vsi_filters(vsi)) { + if (i40e_sync_vsi_filters(vsi, false)) { dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); ret = -EIO; goto error_param; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 7e91d825c760..830979380466 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -140,65 +140,6 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) return le32_to_cpu(*(volatile __le32 *)head); } -/** - * i40e_get_tx_pending - how many tx descriptors not processed - * @tx_ring: the ring of descriptors - * - * Since there is no access to the ring head register - * in XL710, we need to use our local copies - **/ -static u32 i40e_get_tx_pending(struct i40e_ring *ring) -{ - u32 head, tail; - - head = i40e_get_head(ring); - tail = readl(ring->tail); - - if (head != tail) - return (head < tail) ? - tail - head : (tail + ring->count - head); - - return 0; -} - -/** - * i40e_check_tx_hang - Is there a hang in the Tx queue - * @tx_ring: the ring of descriptors - **/ -static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) -{ - u32 tx_done = tx_ring->stats.packets; - u32 tx_done_old = tx_ring->tx_stats.tx_done_old; - u32 tx_pending = i40e_get_tx_pending(tx_ring); - bool ret = false; - - clear_check_for_tx_hang(tx_ring); - - /* Check for a hung queue, but be thorough. This verifies - * that a transmit has been completed since the previous - * check AND there is at least one packet pending. The - * ARMED bit is set to indicate a potential hang. The - * bit is cleared if a pause frame is received to remove - * false hang detection due to PFC or 802.3x frames. By - * requiring this to fail twice we avoid races with - * PFC clearing the ARMED bit and conditions where we - * run the check_tx_hang logic with a transmit completion - * pending but without time to complete it yet. - */ - if ((tx_done_old == tx_done) && tx_pending) { - /* make sure it is true for two checks in a row */ - ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, - &tx_ring->state); - } else if (tx_done_old == tx_done && - (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) { - /* update completed stats and disarm the hang check */ - tx_ring->tx_stats.tx_done_old = tx_done; - clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); - } - - return ret; -} - #define WB_STRIDE 0x3 /** @@ -304,6 +245,10 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; + /* check to see if there are any non-cache aligned descriptors + * waiting to be written back, and kick the hardware to force + * them to be written back in case of napi polling + */ if (budget && !((i & WB_STRIDE) == WB_STRIDE) && !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && @@ -312,29 +257,6 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) else tx_ring->arm_wb = false; - if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { - /* schedule immediate reset if we believe we hung */ - dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" - " VSI <%d>\n" - " Tx Queue <%d>\n" - " next_to_use <%x>\n" - " next_to_clean <%x>\n", - tx_ring->vsi->seid, - tx_ring->queue_index, - tx_ring->next_to_use, i); - - netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); - - dev_info(tx_ring->dev, - "tx hang detected on queue %d, resetting adapter\n", - tx_ring->queue_index); - - tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev); - - /* the adapter is about to reset, no point in enabling stuff */ - return true; - } - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), total_packets, total_bytes); @@ -355,16 +277,16 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) } } - return budget > 0; + return !!budget; } /** - * i40e_force_wb -Arm hardware to do a wb on noncache aligned descriptors + * i40evf_force_wb -Arm hardware to do a wb on noncache aligned descriptors * @vsi: the VSI we care about * @q_vector: the vector on which to force writeback * **/ -static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +static void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { u16 flags = q_vector->tx.ring[0].flags; @@ -1385,7 +1307,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { if (arm_wb) - i40e_force_wb(vsi, q_vector); + i40evf_force_wb(vsi, q_vector); return budget; } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 9a30f5d8c089..d5cb7aca87b4 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -198,8 +198,6 @@ struct i40e_rx_queue_stats { enum i40e_ring_state_t { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, - __I40E_TX_DETECT_HANG, - __I40E_HANG_CHECK_ARMED, __I40E_RX_PS_ENABLED, __I40E_RX_16BYTE_DESC_ENABLED, }; @@ -210,12 +208,6 @@ enum i40e_ring_state_t { set_bit(__I40E_RX_PS_ENABLED, &(ring)->state) #define clear_ring_ps_enabled(ring) \ clear_bit(__I40E_RX_PS_ENABLED, &(ring)->state) -#define check_for_tx_hang(ring) \ - test_bit(__I40E_TX_DETECT_HANG, &(ring)->state) -#define set_check_for_tx_hang(ring) \ - set_bit(__I40E_TX_DETECT_HANG, &(ring)->state) -#define clear_check_for_tx_hang(ring) \ - clear_bit(__I40E_TX_DETECT_HANG, &(ring)->state) #define ring_is_16byte_desc_enabled(ring) \ test_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state) #define set_ring_16byte_desc_enabled(ring) \ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e174fbbdba40..ba019fc87fd1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2986,6 +2986,9 @@ static int igb_sw_init(struct igb_adapter *adapter) } #endif /* CONFIG_PCI_IOV */ + /* Assume MSI-X interrupts, will be checked during IRQ allocation */ + adapter->flags |= IGB_FLAG_HAS_MSIX; + igb_probe_vfs(adapter); igb_init_queue_configuration(adapter);