Merge branch 'qed-doorbell-overflow-recovery'

Denis Bolotin says:

====================
qed: Fix the Doorbell Overflow Recovery mechanism

This patch series fixes and improves the doorbell recovery mechanism.
The main goals of this series are to fix missing attentions from the
doorbells block (DORQ) or not handling them properly, and execute the
recovery from periodic handler instead of the attention handler.

Please consider applying the series to net.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-04-14 13:59:49 -07:00
commit a6b16d8d2a
5 changed files with 105 additions and 76 deletions

View File

@ -431,12 +431,16 @@ struct qed_qm_info {
u8 num_pf_rls;
};
#define QED_OVERFLOW_BIT 1
struct qed_db_recovery_info {
struct list_head list;
/* Lock to protect the doorbell recovery mechanism list */
spinlock_t lock;
bool dorq_attn;
u32 db_recovery_counter;
unsigned long overflow;
};
struct storm_stats {
@ -920,8 +924,7 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
/* doorbell recovery mechanism */
void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
enum qed_db_rec_exec db_exec);
void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
bool qed_edpm_enabled(struct qed_hwfn *p_hwfn);
/* Other Linux specific common definitions */

View File

@ -102,11 +102,15 @@ static void qed_db_recovery_dp_entry(struct qed_hwfn *p_hwfn,
/* Doorbell address sanity (address within doorbell bar range) */
static bool qed_db_rec_sanity(struct qed_dev *cdev,
void __iomem *db_addr, void *db_data)
void __iomem *db_addr,
enum qed_db_rec_width db_width,
void *db_data)
{
u32 width = (db_width == DB_REC_WIDTH_32B) ? 32 : 64;
/* Make sure doorbell address is within the doorbell bar */
if (db_addr < cdev->doorbells ||
(u8 __iomem *)db_addr >
(u8 __iomem *)db_addr + width >
(u8 __iomem *)cdev->doorbells + cdev->db_size) {
WARN(true,
"Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n",
@ -159,7 +163,7 @@ int qed_db_recovery_add(struct qed_dev *cdev,
}
/* Sanitize doorbell address */
if (!qed_db_rec_sanity(cdev, db_addr, db_data))
if (!qed_db_rec_sanity(cdev, db_addr, db_width, db_data))
return -EINVAL;
/* Obtain hwfn from doorbell address */
@ -205,10 +209,6 @@ int qed_db_recovery_del(struct qed_dev *cdev,
return 0;
}
/* Sanitize doorbell address */
if (!qed_db_rec_sanity(cdev, db_addr, db_data))
return -EINVAL;
/* Obtain hwfn from doorbell address */
p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr);
@ -300,31 +300,24 @@ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn)
/* Ring the doorbell of a single doorbell recovery entry */
static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
struct qed_db_recovery_entry *db_entry,
enum qed_db_rec_exec db_exec)
struct qed_db_recovery_entry *db_entry)
{
if (db_exec != DB_REC_ONCE) {
/* Print according to width */
if (db_entry->db_width == DB_REC_WIDTH_32B) {
DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
"%s doorbell address %p data %x\n",
db_exec == DB_REC_DRY_RUN ?
"would have rung" : "ringing",
db_entry->db_addr,
*(u32 *)db_entry->db_data);
} else {
DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
"%s doorbell address %p data %llx\n",
db_exec == DB_REC_DRY_RUN ?
"would have rung" : "ringing",
db_entry->db_addr,
*(u64 *)(db_entry->db_data));
}
/* Print according to width */
if (db_entry->db_width == DB_REC_WIDTH_32B) {
DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
"ringing doorbell address %p data %x\n",
db_entry->db_addr,
*(u32 *)db_entry->db_data);
} else {
DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
"ringing doorbell address %p data %llx\n",
db_entry->db_addr,
*(u64 *)(db_entry->db_data));
}
/* Sanity */
if (!qed_db_rec_sanity(p_hwfn->cdev, db_entry->db_addr,
db_entry->db_data))
db_entry->db_width, db_entry->db_data))
return;
/* Flush the write combined buffer. Since there are multiple doorbelling
@ -334,14 +327,12 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
wmb();
/* Ring the doorbell */
if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) {
if (db_entry->db_width == DB_REC_WIDTH_32B)
DIRECT_REG_WR(db_entry->db_addr,
*(u32 *)(db_entry->db_data));
else
DIRECT_REG_WR64(db_entry->db_addr,
*(u64 *)(db_entry->db_data));
}
if (db_entry->db_width == DB_REC_WIDTH_32B)
DIRECT_REG_WR(db_entry->db_addr,
*(u32 *)(db_entry->db_data));
else
DIRECT_REG_WR64(db_entry->db_addr,
*(u64 *)(db_entry->db_data));
/* Flush the write combined buffer. Next doorbell may come from a
* different entity to the same address...
@ -350,29 +341,21 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
}
/* Traverse the doorbell recovery entry list and ring all the doorbells */
void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
enum qed_db_rec_exec db_exec)
void qed_db_recovery_execute(struct qed_hwfn *p_hwfn)
{
struct qed_db_recovery_entry *db_entry = NULL;
if (db_exec != DB_REC_ONCE) {
DP_NOTICE(p_hwfn,
"Executing doorbell recovery. Counter was %d\n",
p_hwfn->db_recovery_info.db_recovery_counter);
DP_NOTICE(p_hwfn, "Executing doorbell recovery. Counter was %d\n",
p_hwfn->db_recovery_info.db_recovery_counter);
/* Track amount of times recovery was executed */
p_hwfn->db_recovery_info.db_recovery_counter++;
}
/* Track amount of times recovery was executed */
p_hwfn->db_recovery_info.db_recovery_counter++;
/* Protect the list */
spin_lock_bh(&p_hwfn->db_recovery_info.lock);
list_for_each_entry(db_entry,
&p_hwfn->db_recovery_info.list, list_entry) {
qed_db_recovery_ring(p_hwfn, db_entry, db_exec);
if (db_exec == DB_REC_ONCE)
break;
}
&p_hwfn->db_recovery_info.list, list_entry)
qed_db_recovery_ring(p_hwfn, db_entry);
spin_unlock_bh(&p_hwfn->db_recovery_info.lock);
}

View File

@ -378,6 +378,9 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn,
u32 count = QED_DB_REC_COUNT;
u32 usage = 1;
/* Flush any pending (e)dpms as they may never arrive */
qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
/* wait for usage to zero or count to run out. This is necessary since
* EDPM doorbell transactions can take multiple 64b cycles, and as such
* can "split" over the pci. Possibly, the doorbell drop can happen with
@ -406,51 +409,74 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn,
int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
u32 overflow;
u32 attn_ovfl, cur_ovfl;
int rc;
overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow);
if (!overflow) {
qed_db_recovery_execute(p_hwfn, DB_REC_ONCE);
attn_ovfl = test_and_clear_bit(QED_OVERFLOW_BIT,
&p_hwfn->db_recovery_info.overflow);
cur_ovfl = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
if (!cur_ovfl && !attn_ovfl)
return 0;
}
if (qed_edpm_enabled(p_hwfn)) {
DP_NOTICE(p_hwfn, "PF Overflow sticky: attn %u current %u\n",
attn_ovfl, cur_ovfl);
if (cur_ovfl && !p_hwfn->db_bar_no_edpm) {
rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
if (rc)
return rc;
}
/* Flush any pending (e)dpm as they may never arrive */
qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
/* Release overflow sticky indication (stop silently dropping everything) */
qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
/* Repeat all last doorbells (doorbell drop recovery) */
qed_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL);
qed_db_recovery_execute(p_hwfn);
return 0;
}
static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
static void qed_dorq_attn_overflow(struct qed_hwfn *p_hwfn)
{
struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
u32 overflow;
int rc;
overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
if (!overflow)
goto out;
/* Run PF doorbell recovery in next periodic handler */
set_bit(QED_OVERFLOW_BIT, &p_hwfn->db_recovery_info.overflow);
if (!p_hwfn->db_bar_no_edpm) {
rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
if (rc)
goto out;
}
qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
out:
/* Schedule the handler even if overflow was not detected */
qed_periodic_db_rec_start(p_hwfn);
}
static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
{
u32 int_sts, first_drop_reason, details, address, all_drops_reason;
struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
int rc;
int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
/* int_sts may be zero since all PFs were interrupted for doorbell
* overflow but another one already handled it. Can abort here. If
* This PF also requires overflow recovery we will be interrupted again.
* The masked almost full indication may also be set. Ignoring.
*/
int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
return 0;
DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
/* check if db_drop or overflow happened */
if (int_sts & (DORQ_REG_INT_STS_DB_DROP |
DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) {
@ -477,11 +503,6 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4,
first_drop_reason, all_drops_reason);
rc = qed_db_rec_handler(p_hwfn, p_ptt);
qed_periodic_db_rec_start(p_hwfn);
if (rc)
return rc;
/* Clear the doorbell drop details and prepare for next drop */
qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
@ -507,6 +528,25 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
return -EINVAL;
}
static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
{
p_hwfn->db_recovery_info.dorq_attn = true;
qed_dorq_attn_overflow(p_hwfn);
return qed_dorq_attn_int_sts(p_hwfn);
}
static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn)
{
if (p_hwfn->db_recovery_info.dorq_attn)
goto out;
/* Call DORQ callback if the attention was missed */
qed_dorq_attn_cb(p_hwfn);
out:
p_hwfn->db_recovery_info.dorq_attn = false;
}
/* Instead of major changes to the data-structure, we have a some 'special'
* identifiers for sources that changed meaning between adapters.
*/
@ -1080,6 +1120,9 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn,
}
}
/* Handle missed DORQ attention */
qed_dorq_attn_handler(p_hwfn);
/* Clear IGU indication for the deasserted bits */
DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview +
GTT_BAR0_MAP_REG_IGU_CMD +

View File

@ -192,8 +192,8 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev);
/**
* @brief - Doorbell Recovery handler.
* Run DB_REAL_DEAL doorbell recovery in case of PF overflow
* (and flush DORQ if needed), otherwise run DB_REC_ONCE.
* Run doorbell recovery in case of PF overflow (and flush DORQ if
* needed).
*
* @param p_hwfn
* @param p_ptt

View File

@ -970,7 +970,7 @@ static void qed_update_pf_params(struct qed_dev *cdev,
}
}
#define QED_PERIODIC_DB_REC_COUNT 100
#define QED_PERIODIC_DB_REC_COUNT 10
#define QED_PERIODIC_DB_REC_INTERVAL_MS 100
#define QED_PERIODIC_DB_REC_INTERVAL \
msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS)