diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index fa41bf08a589..66ed39d6f357 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -740,12 +740,6 @@ struct qed_dbg_feature { u32 dumped_dwords; }; -struct qed_dbg_params { - struct qed_dbg_feature features[DBG_FEATURE_NUM]; - u8 engine_for_debug; - bool print_data; -}; - struct qed_dev { u32 dp_module; u8 dp_level; @@ -844,6 +838,9 @@ struct qed_dev { /* Recovery */ bool recov_in_prog; + /* Indicates whether should prevent attentions from being reasserted */ + bool attn_clr_en; + /* LLH info */ u8 ppfid_bitmap; struct qed_llh_info *p_llh_info; @@ -872,17 +869,18 @@ struct qed_dev { } protocol_ops; void *ops_cookie; - struct qed_dbg_params dbg_params; - #ifdef CONFIG_QED_LL2 struct qed_cb_ll2_info *ll2; u8 ll2_mac_address[ETH_ALEN]; #endif struct qed_dbg_feature dbg_features[DBG_FEATURE_NUM]; + u8 engine_for_debug; bool disable_ilt_dump; DECLARE_HASHTABLE(connections, 10); const struct firmware *firmware; + bool print_dbg_data; + u32 rdma_max_sge; u32 rdma_max_inline; u32 rdma_max_srq_sge; @@ -1020,6 +1018,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn); +void qed_hw_error_occurred(struct qed_hwfn *p_hwfn, + enum qed_hw_err_type err_type); void qed_get_protocol_stats(struct qed_dev *cdev, enum qed_mcp_protocol_type type, union qed_mcp_protocol_stats *stats); diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index f4eebaabb6d0..57a0dab88431 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -7453,7 +7453,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn, enum qed_dbg_features feature_idx) { struct qed_dbg_feature *feature = - &p_hwfn->cdev->dbg_params.features[feature_idx]; + &p_hwfn->cdev->dbg_features[feature_idx]; u32 text_size_bytes, null_char_pos, i; enum dbg_status rc; char *text_buf; @@ -7502,7 +7502,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn, text_buf[i] = '\n'; /* Dump printable feature to log */ - if (p_hwfn->cdev->dbg_params.print_data) + if (p_hwfn->cdev->print_dbg_data) qed_dbg_print_feature(text_buf, text_size_bytes); /* Free the old dump_buf and point the dump_buf to the newly allocagted @@ -7523,7 +7523,7 @@ static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, enum qed_dbg_features feature_idx) { struct qed_dbg_feature *feature = - &p_hwfn->cdev->dbg_params.features[feature_idx]; + &p_hwfn->cdev->dbg_features[feature_idx]; u32 buf_size_dwords; enum dbg_status rc; @@ -7648,7 +7648,7 @@ static int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer, enum qed_nvm_images image_id) { struct qed_hwfn *p_hwfn = - &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + &cdev->hwfns[cdev->engine_for_debug]; u32 len_rounded, i; __be32 val; int rc; @@ -7780,7 +7780,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) { u8 cur_engine, omit_engine = 0, org_engine; struct qed_hwfn *p_hwfn = - &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + &cdev->hwfns[cdev->engine_for_debug]; struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; int grc_params[MAX_DBG_GRC_PARAMS], i; u32 offset = 0, feature_size; @@ -8000,7 +8000,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) int qed_dbg_all_data_size(struct qed_dev *cdev) { struct qed_hwfn *p_hwfn = - &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + &cdev->hwfns[cdev->engine_for_debug]; u32 regs_len = 0, image_len = 0, ilt_len = 0, total_ilt_len = 0; u8 cur_engine, org_engine; @@ -8059,9 +8059,9 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer, enum qed_dbg_features feature, u32 *num_dumped_bytes) { struct qed_hwfn *p_hwfn = - &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + &cdev->hwfns[cdev->engine_for_debug]; struct qed_dbg_feature *qed_feature = - &cdev->dbg_params.features[feature]; + &cdev->dbg_features[feature]; enum dbg_status dbg_rc; struct qed_ptt *p_ptt; int rc = 0; @@ -8084,7 +8084,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer, DP_VERBOSE(cdev, QED_MSG_DEBUG, "copying debugfs feature to external buffer\n"); memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size); - *num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords * + *num_dumped_bytes = cdev->dbg_features[feature].dumped_dwords * 4; out: @@ -8095,7 +8095,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer, int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature) { struct qed_hwfn *p_hwfn = - &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + &cdev->hwfns[cdev->engine_for_debug]; struct qed_dbg_feature *qed_feature = &cdev->dbg_features[feature]; struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); u32 buf_size_dwords; @@ -8120,14 +8120,14 @@ int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature) u8 qed_get_debug_engine(struct qed_dev *cdev) { - return cdev->dbg_params.engine_for_debug; + return cdev->engine_for_debug; } void qed_set_debug_engine(struct qed_dev *cdev, int engine_number) { DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n", engine_number); - cdev->dbg_params.engine_for_debug = engine_number; + cdev->engine_for_debug = engine_number; } void qed_dbg_pf_init(struct qed_dev *cdev) @@ -8146,7 +8146,7 @@ void qed_dbg_pf_init(struct qed_dev *cdev) } /* Set the hwfn to be 0 as default */ - cdev->dbg_params.engine_for_debug = 0; + cdev->engine_for_debug = 0; } void qed_dbg_pf_exit(struct qed_dev *cdev) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 7119a18af19e..6e857468e993 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -3085,7 +3085,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt, p_hwfn->rel_pf_id, false); if (rc) { - DP_NOTICE(p_hwfn, "Final cleanup failed\n"); + qed_hw_err_notify(p_hwfn, p_hwfn->p_main_ptt, + QED_HW_ERR_RAMROD_FAIL, + "Final cleanup failed\n"); goto load_err; } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 4597015b8bff..f00460d00cab 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -12400,6 +12400,13 @@ struct load_rsp_stc { #define LOAD_RSP_FLAGS0_DRV_EXISTS (0x1 << 0) }; +struct mdump_retain_data_stc { + u32 valid; + u32 epoch; + u32 pf; + u32 status; +}; + union drv_union_data { u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD]; struct mcp_mac wol_mac; @@ -12488,10 +12495,14 @@ struct public_drv_mb { #define DRV_MSG_CODE_BIST_TEST 0x001e0000 #define DRV_MSG_CODE_SET_LED_MODE 0x00200000 #define DRV_MSG_CODE_RESOURCE_CMD 0x00230000 +/* Send crash dump commands with param[3:0] - opcode */ +#define DRV_MSG_CODE_MDUMP_CMD 0x00250000 #define DRV_MSG_CODE_GET_TLV_DONE 0x002f0000 #define DRV_MSG_CODE_GET_ENGINE_CONFIG 0x00370000 #define DRV_MSG_CODE_GET_PPFID_BITMAP 0x43000000 +#define DRV_MSG_CODE_DEBUG_DATA_SEND 0xc0040000 + #define RESOURCE_CMD_REQ_RESC_MASK 0x0000001F #define RESOURCE_CMD_REQ_RESC_SHIFT 0 #define RESOURCE_CMD_REQ_OPCODE_MASK 0x000000E0 @@ -12517,6 +12528,21 @@ struct public_drv_mb { #define RESOURCE_DUMP 0 +/* DRV_MSG_CODE_MDUMP_CMD parameters */ +#define MDUMP_DRV_PARAM_OPCODE_MASK 0x0000000f +#define DRV_MSG_CODE_MDUMP_ACK 0x01 +#define DRV_MSG_CODE_MDUMP_SET_VALUES 0x02 +#define DRV_MSG_CODE_MDUMP_TRIGGER 0x03 +#define DRV_MSG_CODE_MDUMP_GET_CONFIG 0x04 +#define DRV_MSG_CODE_MDUMP_SET_ENABLE 0x05 +#define DRV_MSG_CODE_MDUMP_CLEAR_LOGS 0x06 +#define DRV_MSG_CODE_MDUMP_GET_RETAIN 0x07 +#define DRV_MSG_CODE_MDUMP_CLR_RETAIN 0x08 + +#define DRV_MSG_CODE_HW_DUMP_TRIGGER 0x0a +#define DRV_MSG_CODE_MDUMP_GEN_MDUMP2 0x0b +#define DRV_MSG_CODE_MDUMP_FREE_MDUMP2 0x0c + #define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL 0x002b0000 #define DRV_MSG_CODE_OS_WOL 0x002e0000 @@ -12626,6 +12652,17 @@ struct public_drv_mb { #define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE 0x00000002 #define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK 0x00010000 +/* DRV_MSG_CODE_DEBUG_DATA_SEND parameters */ +#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_OFFSET 0 +#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_MASK 0xFF + +/* Driver attributes params */ +#define DRV_MB_PARAM_ATTRIBUTE_KEY_OFFSET 0 +#define DRV_MB_PARAM_ATTRIBUTE_KEY_MASK 0x00FFFFFF +#define DRV_MB_PARAM_ATTRIBUTE_CMD_OFFSET 24 +#define DRV_MB_PARAM_ATTRIBUTE_CMD_MASK 0xFF000000 + +#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_OFFSET 0 #define DRV_MB_PARAM_NVM_CFG_OPTION_ID_SHIFT 0 #define DRV_MB_PARAM_NVM_CFG_OPTION_ID_MASK 0x0000FFFF #define DRV_MB_PARAM_NVM_CFG_OPTION_ALL_SHIFT 16 @@ -12678,6 +12715,14 @@ struct public_drv_mb { #define FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_DONE 0x00870000 #define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff +#define FW_MSG_CODE_DEBUG_DATA_SEND_INV_ARG 0xb0070000 +#define FW_MSG_CODE_DEBUG_DATA_SEND_BUF_FULL 0xb0080000 +#define FW_MSG_CODE_DEBUG_DATA_SEND_NO_BUF 0xb0090000 +#define FW_MSG_CODE_DEBUG_NOT_ENABLED 0xb00a0000 +#define FW_MSG_CODE_DEBUG_DATA_SEND_OK 0xb00b0000 + +#define FW_MSG_CODE_MDUMP_INVALID_CMD 0x00030000 + u32 fw_mb_param; #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000 #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16 @@ -12742,9 +12787,9 @@ enum MFW_DRV_MSG_TYPE { MFW_DRV_MSG_GET_FCOE_STATS, MFW_DRV_MSG_GET_ISCSI_STATS, MFW_DRV_MSG_GET_RDMA_STATS, - MFW_DRV_MSG_BW_UPDATE10, + MFW_DRV_MSG_FAILURE_DETECTED, MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE, - MFW_DRV_MSG_BW_UPDATE11, + MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED, MFW_DRV_MSG_RESERVED, MFW_DRV_MSG_GET_TLV_REQ, MFW_DRV_MSG_OEM_CFG_UPDATE, diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index 4ab8cfaf63d1..5fa251489536 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -762,9 +762,10 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, dst_type, length_cur); if (qed_status) { - DP_NOTICE(p_hwfn, - "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n", - qed_status, src_addr, dst_addr, length_cur); + qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_DMAE_FAIL, + "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n", + qed_status, src_addr, + dst_addr, length_cur); break; } } @@ -837,6 +838,41 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn, return rc; } +void qed_hw_err_notify(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_hw_err_type err_type, char *fmt, ...) +{ + char buf[QED_HW_ERR_MAX_STR_SIZE]; + va_list vl; + int len; + + if (fmt) { + va_start(vl, fmt); + len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl); + va_end(vl); + + if (len > QED_HW_ERR_MAX_STR_SIZE - 1) + len = QED_HW_ERR_MAX_STR_SIZE - 1; + + DP_NOTICE(p_hwfn, "%s", buf); + } + + /* Fan failure cannot be masked by handling of another HW error */ + if (p_hwfn->cdev->recov_in_prog && + err_type != QED_HW_ERR_FAN_FAIL) { + DP_VERBOSE(p_hwfn, + NETIF_MSG_DRV, + "Recovery is in progress. Avoid notifying about HW error %d.\n", + err_type); + return; + } + + qed_hw_error_occurred(p_hwfn, err_type); + + if (fmt) + qed_mcp_send_raw_debug_data(p_hwfn, p_ptt, buf, len); +} + int qed_dmae_sanity(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, const char *phase) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h index 505e94db939d..f5b109b04b66 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h @@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev, int qed_dmae_sanity(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, const char *phase); +#define QED_HW_ERR_MAX_STR_SIZE 256 + +/** + * @brief qed_hw_err_notify - Notify upper layer driver and management FW + * about a HW error. + * + * @param p_hwfn + * @param p_ptt + * @param err_type + * @param fmt - debug data buffer to send to the MFW + * @param ... - buffer format args + */ +void qed_hw_err_notify(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_hw_err_type err_type, char *fmt, ...); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 9f5113639eaf..b7b974f0ef21 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -96,6 +96,7 @@ struct aeu_invert_reg_bit { #define ATTENTION_BB(value) (value << ATTENTION_BB_SHIFT) #define ATTENTION_BB_DIFFERENT BIT(23) +#define ATTENTION_CLEAR_ENABLE BIT(28) unsigned int flags; /* Callback to call if attention will be triggered */ @@ -363,6 +364,21 @@ static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn) return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt); } +static int qed_fw_assertion(struct qed_hwfn *p_hwfn) +{ + qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_FW_ASSERT, + "FW assertion!\n"); + + return -EINVAL; +} + +static int qed_general_attention_35(struct qed_hwfn *p_hwfn) +{ + DP_INFO(p_hwfn, "General attention 35!\n"); + + return 0; +} + #define QED_DORQ_ATTENTION_REASON_MASK (0xfffff) #define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff) #define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0) @@ -605,13 +621,15 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = { { { /* After Invert 4 */ - {"General Attention 32", ATTENTION_SINGLE, - NULL, MAX_BLOCK_ID}, + {"General Attention 32", ATTENTION_SINGLE | + ATTENTION_CLEAR_ENABLE, qed_fw_assertion, + MAX_BLOCK_ID}, {"General Attention %d", (2 << ATTENTION_LENGTH_SHIFT) | (33 << ATTENTION_OFFSET_SHIFT), NULL, MAX_BLOCK_ID}, - {"General Attention 35", ATTENTION_SINGLE, - NULL, MAX_BLOCK_ID}, + {"General Attention 35", ATTENTION_SINGLE | + ATTENTION_CLEAR_ENABLE, qed_general_attention_35, + MAX_BLOCK_ID}, {"NWS Parity", ATTENTION_PAR | ATTENTION_BB_DIFFERENT | ATTENTION_BB(AEU_INVERT_REG_SPECIAL_CNIG_0), @@ -927,9 +945,12 @@ qed_int_deassertion_aeu_bit(struct qed_hwfn *p_hwfn, qed_int_attn_print(p_hwfn, p_aeu->block_index, ATTN_TYPE_INTERRUPT, !b_fatal); - - /* If the attention is benign, no need to prevent it */ - if (!rc) + /* Reach assertion if attention is fatal */ + if (b_fatal) + qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_HW_ATTN, + "`%s': Fatal attention\n", + p_bit_name); + else /* If the attention is benign, no need to prevent it */ goto out; /* Prevent this Attention from being asserted in the future */ @@ -2349,6 +2370,11 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev) cdev->hwfns[i].b_int_requested = false; } +void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable) +{ + cdev->attn_clr_en = clr_enable; +} + int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u8 timer_res, u16 sb_id, bool tx) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h index 9ad568d93ae6..e09db3386367 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.h +++ b/drivers/net/ethernet/qlogic/qed/qed_int.h @@ -190,6 +190,17 @@ void qed_int_get_num_sbs(struct qed_hwfn *p_hwfn, */ void qed_int_disable_post_isr_release(struct qed_dev *cdev); +/** + * @brief qed_int_attn_clr_enable - sets whether the general behavior is + * preventing attentions from being reasserted, or following the + * attributes of the specific attention. + * + * @param cdev + * @param clr_enable + * + */ +void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable); + /** * @brief - Doorbell Recovery handler. * Run doorbell recovery in case of PF overflow (and flush DORQ if diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 38a1d26ca9db..83e798d4eebb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -2468,6 +2468,39 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn) ops->schedule_recovery_handler(cookie); } +char *qed_hw_err_type_descr[] = { + [QED_HW_ERR_FAN_FAIL] = "Fan Failure", + [QED_HW_ERR_MFW_RESP_FAIL] = "MFW Response Failure", + [QED_HW_ERR_HW_ATTN] = "HW Attention", + [QED_HW_ERR_DMAE_FAIL] = "DMAE Failure", + [QED_HW_ERR_RAMROD_FAIL] = "Ramrod Failure", + [QED_HW_ERR_FW_ASSERT] = "FW Assertion", + [QED_HW_ERR_LAST] = "Unknown", +}; + +void qed_hw_error_occurred(struct qed_hwfn *p_hwfn, + enum qed_hw_err_type err_type) +{ + struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common; + void *cookie = p_hwfn->cdev->ops_cookie; + char *err_str; + + if (err_type > QED_HW_ERR_LAST) + err_type = QED_HW_ERR_LAST; + err_str = qed_hw_err_type_descr[err_type]; + + DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str); + + /* Call the HW error handler of the protocol driver. + * If it is not available - perform a minimal handling of preventing + * HW attentions from being reasserted. + */ + if (ops && ops->schedule_hw_err_handler) + ops->schedule_hw_err_handler(cookie, err_type); + else + qed_int_attn_clr_enable(p_hwfn->cdev, true); +} + static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, void *handle) { @@ -2689,6 +2722,7 @@ const struct qed_common_ops qed_common_ops_pass = { .set_led = &qed_set_led, .recovery_process = &qed_recovery_process, .recovery_prolog = &qed_recovery_prolog, + .attn_clr_enable = &qed_int_attn_clr_enable, .update_drv_state = &qed_update_drv_state, .update_mac = &qed_update_mac, .update_mtu = &qed_update_mtu, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 280527cc0578..9624616806e7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -575,6 +575,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK)) qed_mcp_cmd_set_blocking(p_hwfn, true); + qed_hw_err_notify(p_hwfn, p_ptt, + QED_HW_ERR_MFW_RESP_FAIL, NULL); return -EAGAIN; } @@ -1704,6 +1706,127 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) &resp, ¶m); } +static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + /* A single notification should be sent to upper driver in CMT mode */ + if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev)) + return; + + qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_FAN_FAIL, + "Fan failure was detected on the network interface card and it's going to be shut down.\n"); +} + +struct qed_mdump_cmd_params { + u32 cmd; + void *p_data_src; + u8 data_src_size; + void *p_data_dst; + u8 data_dst_size; + u32 mcp_resp; +}; + +static int +qed_mcp_mdump_cmd(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_mdump_cmd_params *p_mdump_cmd_params) +{ + struct qed_mcp_mb_params mb_params; + int rc; + + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_MDUMP_CMD; + mb_params.param = p_mdump_cmd_params->cmd; + mb_params.p_data_src = p_mdump_cmd_params->p_data_src; + mb_params.data_src_size = p_mdump_cmd_params->data_src_size; + mb_params.p_data_dst = p_mdump_cmd_params->p_data_dst; + mb_params.data_dst_size = p_mdump_cmd_params->data_dst_size; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + return rc; + + p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp; + + if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) { + DP_INFO(p_hwfn, + "The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n", + p_mdump_cmd_params->cmd); + rc = -EOPNOTSUPP; + } else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) { + DP_INFO(p_hwfn, + "The mdump command is not supported by the MFW\n"); + rc = -EOPNOTSUPP; + } + + return rc; +} + +static int qed_mcp_mdump_ack(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + struct qed_mdump_cmd_params mdump_cmd_params; + + memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params)); + mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_ACK; + + return qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params); +} + +int +qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct mdump_retain_data_stc *p_mdump_retain) +{ + struct qed_mdump_cmd_params mdump_cmd_params; + int rc; + + memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params)); + mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN; + mdump_cmd_params.p_data_dst = p_mdump_retain; + mdump_cmd_params.data_dst_size = sizeof(*p_mdump_retain); + + rc = qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params); + if (rc) + return rc; + + if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) { + DP_INFO(p_hwfn, + "Failed to get the mdump retained data [mcp_resp 0x%x]\n", + mdump_cmd_params.mcp_resp); + return -EINVAL; + } + + return 0; +} + +static void qed_mcp_handle_critical_error(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct mdump_retain_data_stc mdump_retain; + int rc; + + /* In CMT mode - no need for more than a single acknowledgment to the + * MFW, and no more than a single notification to the upper driver. + */ + if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev)) + return; + + rc = qed_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain); + if (rc == 0 && mdump_retain.valid) + DP_NOTICE(p_hwfn, + "The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n", + mdump_retain.epoch, + mdump_retain.pf, mdump_retain.status); + else + DP_NOTICE(p_hwfn, + "The MFW notified that a critical error occurred in the device\n"); + + DP_NOTICE(p_hwfn, + "Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n"); + qed_mcp_mdump_ack(p_hwfn, p_ptt); + + qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_HW_ATTN, NULL); +} + void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct public_func shmem_info; @@ -1850,6 +1973,12 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, case MFW_DRV_MSG_S_TAG_UPDATE: qed_mcp_update_stag(p_hwfn, p_ptt); break; + case MFW_DRV_MSG_FAILURE_DETECTED: + qed_mcp_handle_fan_failure(p_hwfn, p_ptt); + break; + case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED: + qed_mcp_handle_critical_error(p_hwfn, p_ptt); + break; case MFW_DRV_MSG_GET_TLV_REQ: qed_mfw_tlv_req(p_hwfn); break; @@ -3819,3 +3948,127 @@ int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, DRV_MSG_CODE_SET_NVM_CFG_OPTION, mb_param, &resp, ¶m, len, (u32 *)p_buf); } + +#define QED_MCP_DBG_DATA_MAX_SIZE MCP_DRV_NVM_BUF_LEN +#define QED_MCP_DBG_DATA_MAX_HEADER_SIZE sizeof(u32) +#define QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE \ + (QED_MCP_DBG_DATA_MAX_SIZE - QED_MCP_DBG_DATA_MAX_HEADER_SIZE) + +static int +__qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_buf, u8 size) +{ + struct qed_mcp_mb_params mb_params; + int rc; + + if (size > QED_MCP_DBG_DATA_MAX_SIZE) { + DP_ERR(p_hwfn, + "Debug data size is %d while it should not exceed %d\n", + size, QED_MCP_DBG_DATA_MAX_SIZE); + return -EINVAL; + } + + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_DEBUG_DATA_SEND; + SET_MFW_FIELD(mb_params.param, DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE, size); + mb_params.p_data_src = p_buf; + mb_params.data_src_size = size; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + return rc; + + if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) { + DP_INFO(p_hwfn, + "The DEBUG_DATA_SEND command is unsupported by the MFW\n"); + return -EOPNOTSUPP; + } else if (mb_params.mcp_resp == (u32)FW_MSG_CODE_DEBUG_NOT_ENABLED) { + DP_INFO(p_hwfn, "The DEBUG_DATA_SEND command is not enabled\n"); + return -EBUSY; + } else if (mb_params.mcp_resp != (u32)FW_MSG_CODE_DEBUG_DATA_SEND_OK) { + DP_NOTICE(p_hwfn, + "Failed to send debug data to the MFW [resp 0x%08x]\n", + mb_params.mcp_resp); + return -EINVAL; + } + + return 0; +} + +enum qed_mcp_dbg_data_type { + QED_MCP_DBG_DATA_TYPE_RAW, +}; + +/* Header format: [31:28] PFID, [27:20] flags, [19:12] type, [11:0] S/N */ +#define QED_MCP_DBG_DATA_HDR_SN_OFFSET 0 +#define QED_MCP_DBG_DATA_HDR_SN_MASK 0x00000fff +#define QED_MCP_DBG_DATA_HDR_TYPE_OFFSET 12 +#define QED_MCP_DBG_DATA_HDR_TYPE_MASK 0x000ff000 +#define QED_MCP_DBG_DATA_HDR_FLAGS_OFFSET 20 +#define QED_MCP_DBG_DATA_HDR_FLAGS_MASK 0x0ff00000 +#define QED_MCP_DBG_DATA_HDR_PF_OFFSET 28 +#define QED_MCP_DBG_DATA_HDR_PF_MASK 0xf0000000 + +#define QED_MCP_DBG_DATA_HDR_FLAGS_FIRST 0x1 +#define QED_MCP_DBG_DATA_HDR_FLAGS_LAST 0x2 + +static int +qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_mcp_dbg_data_type type, u8 *p_buf, u32 size) +{ + u8 raw_data[QED_MCP_DBG_DATA_MAX_SIZE], *p_tmp_buf = p_buf; + u32 tmp_size = size, *p_header, *p_payload; + u8 flags = 0; + u16 seq; + int rc; + + p_header = (u32 *)raw_data; + p_payload = (u32 *)(raw_data + QED_MCP_DBG_DATA_MAX_HEADER_SIZE); + + seq = (u16)atomic_inc_return(&p_hwfn->mcp_info->dbg_data_seq); + + /* First chunk is marked as 'first' */ + flags |= QED_MCP_DBG_DATA_HDR_FLAGS_FIRST; + + *p_header = 0; + SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_SN, seq); + SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_TYPE, type); + SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags); + SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_PF, p_hwfn->abs_pf_id); + + while (tmp_size > QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE) { + memcpy(p_payload, p_tmp_buf, QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE); + rc = __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data, + QED_MCP_DBG_DATA_MAX_SIZE); + if (rc) + return rc; + + /* Clear the 'first' marking after sending the first chunk */ + if (p_tmp_buf == p_buf) { + flags &= ~QED_MCP_DBG_DATA_HDR_FLAGS_FIRST; + SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, + flags); + } + + p_tmp_buf += QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE; + tmp_size -= QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE; + } + + /* Last chunk is marked as 'last' */ + flags |= QED_MCP_DBG_DATA_HDR_FLAGS_LAST; + SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags); + memcpy(p_payload, p_tmp_buf, tmp_size); + + /* Casting the left size to u8 is ok since at this point it is <= 32 */ + return __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data, + (u8)(QED_MCP_DBG_DATA_MAX_HEADER_SIZE + + tmp_size)); +} + +int +qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_buf, u32 size) +{ + return qed_mcp_send_debug_data(p_hwfn, p_ptt, + QED_MCP_DBG_DATA_TYPE_RAW, p_buf, size); +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 9c4c2763de8d..5750b4c5ef63 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -685,6 +685,18 @@ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn, */ int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +/** + * @brief Send raw debug data to the MFW + * + * @param p_hwfn + * @param p_ptt + * @param p_buf - raw debug data buffer + * @param size - buffer size + */ +int +qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_buf, u32 size); + /* Using hwfn number (and not pf_num) is required since in CMT mode, * same pf_num may be used by two different hwfn * TODO - this shouldn't really be in .h file, but until all fields @@ -731,6 +743,9 @@ struct qed_mcp_info { /* Capabilties negotiated with the MFW */ u32 capabilities; + + /* S/N for debug data mailbox commands */ + atomic_t dbg_data_seq; }; struct qed_mcp_mb_params { @@ -1001,6 +1016,19 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn, int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 mask_parities); +/* @brief - Gets the mdump retained data from the MFW. + * + * @param p_hwfn + * @param p_ptt + * @param p_mdump_retain + * + * @param return 0 upon success. + */ +int +qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct mdump_retain_data_stc *p_mdump_retain); + /** * @brief - Sets the MFW's max value for the given resource * diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index f5f3c03b9dd2..790c28d696a0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -160,12 +160,16 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, return 0; } err: - DP_NOTICE(p_hwfn, - "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n", - le32_to_cpu(p_ent->elem.hdr.cid), - p_ent->elem.hdr.cmd_id, - p_ent->elem.hdr.protocol_id, - le16_to_cpu(p_ent->elem.hdr.echo)); + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EBUSY; + qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_RAMROD_FAIL, + "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n", + le32_to_cpu(p_ent->elem.hdr.cid), + p_ent->elem.hdr.cmd_id, + p_ent->elem.hdr.protocol_id, + le16_to_cpu(p_ent->elem.hdr.echo)); + qed_ptt_release(p_hwfn, p_ptt); return -EBUSY; } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index f6f0b51620ab..8857da1208d7 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -278,6 +278,14 @@ struct qede_dev { struct qede_rdma_dev rdma_info; struct bpf_prog *xdp_prog; + + unsigned long err_flags; +#define QEDE_ERR_IS_HANDLED 31 +#define QEDE_ERR_ATTN_CLR_EN 0 +#define QEDE_ERR_GET_DBG_INFO 1 +#define QEDE_ERR_IS_RECOVERABLE 2 +#define QEDE_ERR_WARN 3 + struct qede_dump_info dump_info; }; @@ -485,12 +493,15 @@ struct qede_fastpath { #define QEDE_SP_RECOVERY 0 #define QEDE_SP_RX_MODE 1 +#define QEDE_SP_RSVD1 2 +#define QEDE_SP_RSVD2 3 +#define QEDE_SP_HW_ERR 4 +#define QEDE_SP_ARFS_CONFIG 5 #define QEDE_SP_AER 7 #ifdef CONFIG_RFS_ACCEL int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); -#define QEDE_SP_ARFS_CONFIG 4 #define QEDE_SP_TASK_POLL_DELAY (5 * HZ) #endif @@ -522,7 +533,6 @@ u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, netdev_features_t qede_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); -void qede_tx_log_print(struct qede_dev *edev, struct qede_fastpath *fp); int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy); int qede_free_tx_pkt(struct qede_dev *edev, struct qede_tx_queue *txq, int *len); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 812c7766e096..24cc68391ac4 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -190,12 +190,14 @@ static const struct { enum { QEDE_PRI_FLAG_CMT, QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */ + QEDE_PRI_FLAG_RECOVER_ON_ERROR, QEDE_PRI_FLAG_LEN, }; static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = { "Coupled-Function", "SmartAN capable", + "Recover on error", }; enum qede_ethtool_tests { @@ -417,9 +419,30 @@ static u32 qede_get_priv_flags(struct net_device *dev) if (edev->dev_info.common.smart_an) flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT); + if (edev->err_flags & BIT(QEDE_ERR_IS_RECOVERABLE)) + flags |= BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR); + return flags; } +static int qede_set_priv_flags(struct net_device *dev, u32 flags) +{ + struct qede_dev *edev = netdev_priv(dev); + u32 cflags = qede_get_priv_flags(dev); + u32 dflags = flags ^ cflags; + + /* can only change RECOVER_ON_ERROR flag */ + if (dflags & ~BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR)) + return -EINVAL; + + if (flags & BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR)) + set_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags); + else + clear_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags); + + return 0; +} + struct qede_link_mode_mapping { u32 qed_link_mode; u32 ethtool_link_mode; @@ -2098,6 +2121,7 @@ static const struct ethtool_ops qede_ethtool_ops = { .set_phys_id = qede_set_phys_id, .get_ethtool_stats = qede_get_ethtool_stats, .get_priv_flags = qede_get_priv_flags, + .set_priv_flags = qede_set_priv_flags, .get_sset_count = qede_get_sset_count, .get_rxnfc = qede_get_rxnfc, .set_rxnfc = qede_set_rxnfc, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 300405369c37..f50d9a9b76be 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -139,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev); static void qede_link_update(void *dev, struct qed_link_output *link); static void qede_schedule_recovery_handler(void *dev); static void qede_recovery_handler(struct qede_dev *edev); +static void qede_schedule_hw_err_handler(void *dev, + enum qed_hw_err_type err_type); static void qede_get_eth_tlv_data(void *edev, void *data); static void qede_get_generic_tlv_data(void *edev, struct qed_generic_tlvs *data); - +static void qede_generic_hw_err_handler(struct qede_dev *edev); #ifdef CONFIG_QED_SRIOV static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) @@ -230,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = { #endif .link_update = qede_link_update, .schedule_recovery_handler = qede_schedule_recovery_handler, + .schedule_hw_err_handler = qede_schedule_hw_err_handler, .get_generic_tlv_data = qede_get_generic_tlv_data, .get_protocol_tlv_data = qede_get_eth_tlv_data, }, @@ -536,6 +539,51 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return 0; } +static void qede_tx_log_print(struct qede_dev *edev, struct qede_tx_queue *txq) +{ + DP_NOTICE(edev, + "Txq[%d]: FW cons [host] %04x, SW cons %04x, SW prod %04x [Jiffies %lu]\n", + txq->index, le16_to_cpu(*txq->hw_cons_ptr), + qed_chain_get_cons_idx(&txq->tx_pbl), + qed_chain_get_prod_idx(&txq->tx_pbl), + jiffies); +} + +static void qede_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qede_tx_queue *txq; + int cos; + + netif_carrier_off(dev); + DP_NOTICE(edev, "TX timeout on queue %u!\n", txqueue); + + if (!(edev->fp_array[txqueue].type & QEDE_FASTPATH_TX)) + return; + + for_each_cos_in_txq(edev, cos) { + txq = &edev->fp_array[txqueue].txq[cos]; + + if (qed_chain_get_cons_idx(&txq->tx_pbl) != + qed_chain_get_prod_idx(&txq->tx_pbl)) + qede_tx_log_print(edev, txq); + } + + if (IS_VF(edev)) + return; + + if (test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) || + edev->state == QEDE_STATE_RECOVERY) { + DP_INFO(edev, + "Avoid handling a Tx timeout while another HW error is being handled\n"); + return; + } + + set_bit(QEDE_ERR_GET_DBG_INFO, &edev->err_flags); + set_bit(QEDE_SP_HW_ERR, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); +} + static int qede_setup_tc(struct net_device *ndev, u8 num_tc) { struct qede_dev *edev = netdev_priv(ndev); @@ -623,6 +671,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = qede_change_mtu, .ndo_do_ioctl = qede_ioctl, + .ndo_tx_timeout = qede_tx_timeout, #ifdef CONFIG_QED_SRIOV .ndo_set_vf_mac = qede_set_vf_mac, .ndo_set_vf_vlan = qede_set_vf_vlan, @@ -1009,6 +1058,8 @@ static void qede_sp_task(struct work_struct *work) qede_process_arfs_filters(edev, false); } #endif + if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags)) + qede_generic_hw_err_handler(edev); __qede_unlock(edev); if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) { @@ -2509,6 +2560,100 @@ static void qede_recovery_handler(struct qede_dev *edev) qede_recovery_failed(edev); } +static void qede_atomic_hw_err_handler(struct qede_dev *edev) +{ + struct qed_dev *cdev = edev->cdev; + + DP_NOTICE(edev, + "Generic non-sleepable HW error handling started - err_flags 0x%lx\n", + edev->err_flags); + + /* Get a call trace of the flow that led to the error */ + WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags)); + + /* Prevent HW attentions from being reasserted */ + if (test_bit(QEDE_ERR_ATTN_CLR_EN, &edev->err_flags)) + edev->ops->common->attn_clr_enable(cdev, true); + + DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n"); +} + +static void qede_generic_hw_err_handler(struct qede_dev *edev) +{ + struct qed_dev *cdev = edev->cdev; + + DP_NOTICE(edev, + "Generic sleepable HW error handling started - err_flags 0x%lx\n", + edev->err_flags); + + /* Trigger a recovery process. + * This is placed in the sleep requiring section just to make + * sure it is the last one, and that all the other operations + * were completed. + */ + if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags)) + edev->ops->common->recovery_process(cdev); + + clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags); + + DP_NOTICE(edev, "Generic sleepable HW error handling is done\n"); +} + +static void qede_set_hw_err_flags(struct qede_dev *edev, + enum qed_hw_err_type err_type) +{ + unsigned long err_flags = 0; + + switch (err_type) { + case QED_HW_ERR_DMAE_FAIL: + set_bit(QEDE_ERR_WARN, &err_flags); + fallthrough; + case QED_HW_ERR_MFW_RESP_FAIL: + case QED_HW_ERR_HW_ATTN: + case QED_HW_ERR_RAMROD_FAIL: + case QED_HW_ERR_FW_ASSERT: + set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags); + set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags); + break; + + default: + DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type); + break; + } + + edev->err_flags |= err_flags; +} + +static void qede_schedule_hw_err_handler(void *dev, + enum qed_hw_err_type err_type) +{ + struct qede_dev *edev = dev; + + /* Fan failure cannot be masked by handling of another HW error or by a + * concurrent recovery process. + */ + if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) || + edev->state == QEDE_STATE_RECOVERY) && + err_type != QED_HW_ERR_FAN_FAIL) { + DP_INFO(edev, + "Avoid scheduling an error handling while another HW error is being handled\n"); + return; + } + + if (err_type >= QED_HW_ERR_LAST) { + DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type); + clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags); + return; + } + + qede_set_hw_err_flags(edev, err_type); + qede_atomic_hw_err_handler(edev); + set_bit(QEDE_SP_HW_ERR, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); + + DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type); +} + static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq) { struct netdev_queue *netdev_txq; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8f29e0d8a7b3..48325d7790f8 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -607,6 +607,16 @@ struct qed_sb_info { struct qed_dev *cdev; }; +enum qed_hw_err_type { + QED_HW_ERR_FAN_FAIL, + QED_HW_ERR_MFW_RESP_FAIL, + QED_HW_ERR_HW_ATTN, + QED_HW_ERR_DMAE_FAIL, + QED_HW_ERR_RAMROD_FAIL, + QED_HW_ERR_FW_ASSERT, + QED_HW_ERR_LAST, +}; + enum qed_dev_type { QED_DEV_TYPE_BB, QED_DEV_TYPE_AH, @@ -811,10 +821,11 @@ enum qed_nvm_flash_cmd { struct qed_common_cb_ops { void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); - void (*link_update)(void *dev, - struct qed_link_output *link); + void (*link_update)(void *dev, struct qed_link_output *link); void (*schedule_recovery_handler)(void *dev); - void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); + void (*schedule_hw_err_handler)(void *dev, + enum qed_hw_err_type err_type); + void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data); void (*get_protocol_tlv_data)(void *dev, void *data); }; @@ -1034,6 +1045,15 @@ struct qed_common_ops { */ int (*set_led)(struct qed_dev *cdev, enum qed_led_mode mode); + +/** + * @brief attn_clr_enable - Prevent attentions from being reasserted + * + * @param cdev + * @param clr_enable + */ + void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable); + /** * @brief db_recovery_add - add doorbell information to the doorbell * recovery mechanism.