fm10k: wait for queues to drain if stop_hw() fails once

It turns out that sometimes during a reset the Tx queues will be
temporarily stuck longer than .stop_hw() expects. Work around this issue
by attempting to .stop_hw() first. If it tails, wait a number of
attempts until the Tx queues appear to be drained. After this, attempt
stop_hw() again. This ensures that we avoid waiting if we don't need to,
such as during the first initialization of a VF, and give the proper
amount of time necessary to recover from most situations. It is possible
that the hardware is actually stuck. For PFs, this is usually fixed by
a datapath reset. Unfortunately the VF cannot request a similar reset
for itself.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <Krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
This commit is contained in:
Jacob Keller 2016-06-07 16:08:51 -07:00 committed by Jeff Kirsher
parent 106ca42356
commit 94877768cf
3 changed files with 40 additions and 7 deletions

View File

@ -458,6 +458,7 @@ __be16 fm10k_tx_encap_offload(struct sk_buff *skb);
netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
struct fm10k_ring *tx_ring);
void fm10k_tx_timeout_reset(struct fm10k_intfc *interface);
u64 fm10k_get_tx_pending(struct fm10k_ring *ring);
bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring);
void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count);

View File

@ -1128,7 +1128,7 @@ static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
return ring->stats.packets;
}
static u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
{
struct fm10k_intfc *interface = ring->q_vector->interface;
struct fm10k_hw *hw = &interface->hw;

View File

@ -1613,7 +1613,7 @@ void fm10k_down(struct fm10k_intfc *interface)
{
struct net_device *netdev = interface->netdev;
struct fm10k_hw *hw = &interface->hw;
int err;
int err, i = 0, count = 0;
/* signal that we are down to the interrupt handler and service task */
if (test_and_set_bit(__FM10K_DOWN, &interface->state))
@ -1629,9 +1629,6 @@ void fm10k_down(struct fm10k_intfc *interface)
/* reset Rx filters */
fm10k_reset_rx_state(interface);
/* allow 10ms for device to quiesce */
usleep_range(10000, 20000);
/* disable polling routines */
fm10k_napi_disable_all(interface);
@ -1642,11 +1639,46 @@ void fm10k_down(struct fm10k_intfc *interface)
while (test_and_set_bit(__FM10K_UPDATING_STATS, &interface->state))
usleep_range(1000, 2000);
/* skip waiting for TX DMA if we lost PCIe link */
if (FM10K_REMOVED(hw->hw_addr))
goto skip_tx_dma_drain;
/* In some rare circumstances it can take a while for Tx queues to
* quiesce and be fully disabled. Attempt to .stop_hw() first, and
* then if we get ERR_REQUESTS_PENDING, go ahead and wait in a loop
* until the Tx queues have emptied, or until a number of retries. If
* we fail to clear within the retry loop, we will issue a warning
* indicating that Tx DMA is probably hung. Note this means we call
* .stop_hw() twice but this shouldn't cause any problems.
*/
err = hw->mac.ops.stop_hw(hw);
if (err != FM10K_ERR_REQUESTS_PENDING)
goto skip_tx_dma_drain;
#define TX_DMA_DRAIN_RETRIES 25
for (count = 0; count < TX_DMA_DRAIN_RETRIES; count++) {
usleep_range(10000, 20000);
/* start checking at the last ring to have pending Tx */
for (; i < interface->num_tx_queues; i++)
if (fm10k_get_tx_pending(interface->tx_ring[i]))
break;
/* if all the queues are drained, we can break now */
if (i == interface->num_tx_queues)
break;
}
if (count >= TX_DMA_DRAIN_RETRIES)
dev_err(&interface->pdev->dev,
"Tx queues failed to drain after %d tries. Tx DMA is probably hung.\n",
count);
skip_tx_dma_drain:
/* Disable DMA engine for Tx/Rx */
err = hw->mac.ops.stop_hw(hw);
if (err == FM10K_ERR_REQUESTS_PENDING)
dev_info(&interface->pdev->dev,
"due to pending requests hw was not shut down gracefully\n");
dev_err(&interface->pdev->dev,
"due to pending requests hw was not shut down gracefully\n");
else if (err)
dev_err(&interface->pdev->dev, "stop_hw failed: %d\n", err);