mirror of https://gitee.com/openkylin/linux.git
net: ena: bug fix in lost tx packets detection mechanism
check_for_missing_tx_completions() is called from a timer
task and looking for lost tx packets.
The old implementation accumulate all the lost tx packets
and did not check if those packets were retrieved on a later stage.
This cause to a situation where the driver reset
the device for no reason.
Fixes: 1738cd3ed3
("Add a driver for Amazon Elastic Network Adapters (ENA)")
Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
a2cc5198da
commit
800c55cb76
|
@ -80,7 +80,6 @@ static const struct ena_stats ena_stats_tx_strings[] = {
|
||||||
ENA_STAT_TX_ENTRY(tx_poll),
|
ENA_STAT_TX_ENTRY(tx_poll),
|
||||||
ENA_STAT_TX_ENTRY(doorbells),
|
ENA_STAT_TX_ENTRY(doorbells),
|
||||||
ENA_STAT_TX_ENTRY(prepare_ctx_err),
|
ENA_STAT_TX_ENTRY(prepare_ctx_err),
|
||||||
ENA_STAT_TX_ENTRY(missing_tx_comp),
|
|
||||||
ENA_STAT_TX_ENTRY(bad_req_id),
|
ENA_STAT_TX_ENTRY(bad_req_id),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1995,6 +1995,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||||
|
|
||||||
tx_info->tx_descs = nb_hw_desc;
|
tx_info->tx_descs = nb_hw_desc;
|
||||||
tx_info->last_jiffies = jiffies;
|
tx_info->last_jiffies = jiffies;
|
||||||
|
tx_info->print_once = 0;
|
||||||
|
|
||||||
tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
|
tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
|
||||||
tx_ring->ring_size);
|
tx_ring->ring_size);
|
||||||
|
@ -2564,13 +2565,44 @@ static void ena_fw_reset_device(struct work_struct *work)
|
||||||
"Reset attempt failed. Can not reset the device\n");
|
"Reset attempt failed. Can not reset the device\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_for_missing_tx_completions(struct ena_adapter *adapter)
|
static int check_missing_comp_in_queue(struct ena_adapter *adapter,
|
||||||
|
struct ena_ring *tx_ring)
|
||||||
{
|
{
|
||||||
struct ena_tx_buffer *tx_buf;
|
struct ena_tx_buffer *tx_buf;
|
||||||
unsigned long last_jiffies;
|
unsigned long last_jiffies;
|
||||||
|
u32 missed_tx = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < tx_ring->ring_size; i++) {
|
||||||
|
tx_buf = &tx_ring->tx_buffer_info[i];
|
||||||
|
last_jiffies = tx_buf->last_jiffies;
|
||||||
|
if (unlikely(last_jiffies &&
|
||||||
|
time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
|
||||||
|
if (!tx_buf->print_once)
|
||||||
|
netif_notice(adapter, tx_err, adapter->netdev,
|
||||||
|
"Found a Tx that wasn't completed on time, qid %d, index %d.\n",
|
||||||
|
tx_ring->qid, i);
|
||||||
|
|
||||||
|
tx_buf->print_once = 1;
|
||||||
|
missed_tx++;
|
||||||
|
|
||||||
|
if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
|
||||||
|
netif_err(adapter, tx_err, adapter->netdev,
|
||||||
|
"The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
|
||||||
|
missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
|
||||||
|
set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_for_missing_tx_completions(struct ena_adapter *adapter)
|
||||||
|
{
|
||||||
struct ena_ring *tx_ring;
|
struct ena_ring *tx_ring;
|
||||||
int i, j, budget;
|
int i, budget, rc;
|
||||||
u32 missed_tx;
|
|
||||||
|
|
||||||
/* Make sure the driver doesn't turn the device in other process */
|
/* Make sure the driver doesn't turn the device in other process */
|
||||||
smp_rmb();
|
smp_rmb();
|
||||||
|
@ -2586,31 +2618,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
|
||||||
for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
|
for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
|
||||||
tx_ring = &adapter->tx_ring[i];
|
tx_ring = &adapter->tx_ring[i];
|
||||||
|
|
||||||
for (j = 0; j < tx_ring->ring_size; j++) {
|
rc = check_missing_comp_in_queue(adapter, tx_ring);
|
||||||
tx_buf = &tx_ring->tx_buffer_info[j];
|
if (unlikely(rc))
|
||||||
last_jiffies = tx_buf->last_jiffies;
|
return;
|
||||||
if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
|
|
||||||
netif_notice(adapter, tx_err, adapter->netdev,
|
|
||||||
"Found a Tx that wasn't completed on time, qid %d, index %d.\n",
|
|
||||||
tx_ring->qid, j);
|
|
||||||
|
|
||||||
u64_stats_update_begin(&tx_ring->syncp);
|
|
||||||
missed_tx = tx_ring->tx_stats.missing_tx_comp++;
|
|
||||||
u64_stats_update_end(&tx_ring->syncp);
|
|
||||||
|
|
||||||
/* Clear last jiffies so the lost buffer won't
|
|
||||||
* be counted twice.
|
|
||||||
*/
|
|
||||||
tx_buf->last_jiffies = 0;
|
|
||||||
|
|
||||||
if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
|
|
||||||
netif_err(adapter, tx_err, adapter->netdev,
|
|
||||||
"The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
|
|
||||||
missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
|
|
||||||
set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
budget--;
|
budget--;
|
||||||
if (!budget)
|
if (!budget)
|
||||||
|
|
|
@ -146,7 +146,18 @@ struct ena_tx_buffer {
|
||||||
u32 tx_descs;
|
u32 tx_descs;
|
||||||
/* num of buffers used by this skb */
|
/* num of buffers used by this skb */
|
||||||
u32 num_of_bufs;
|
u32 num_of_bufs;
|
||||||
/* Save the last jiffies to detect missing tx packets */
|
|
||||||
|
/* Used for detect missing tx packets to limit the number of prints */
|
||||||
|
u32 print_once;
|
||||||
|
/* Save the last jiffies to detect missing tx packets
|
||||||
|
*
|
||||||
|
* sets to non zero value on ena_start_xmit and set to zero on
|
||||||
|
* napi and timer_Service_routine.
|
||||||
|
*
|
||||||
|
* while this value is not protected by lock,
|
||||||
|
* a given packet is not expected to be handled by ena_start_xmit
|
||||||
|
* and by napi/timer_service at the same time.
|
||||||
|
*/
|
||||||
unsigned long last_jiffies;
|
unsigned long last_jiffies;
|
||||||
struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
|
struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
|
||||||
} ____cacheline_aligned;
|
} ____cacheline_aligned;
|
||||||
|
@ -170,7 +181,6 @@ struct ena_stats_tx {
|
||||||
u64 napi_comp;
|
u64 napi_comp;
|
||||||
u64 tx_poll;
|
u64 tx_poll;
|
||||||
u64 doorbells;
|
u64 doorbells;
|
||||||
u64 missing_tx_comp;
|
|
||||||
u64 bad_req_id;
|
u64 bad_req_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue