habanalabs: preboot hard reset support

FW hard reset capability indication is now moved to preboot stage.
Driver will check if HW is dirty only after it validated preboot
is up. If HW is dirty, driver will perform a hard reset according
to the FW capability.
In addition, FW defines a new message which driver need to send in
order to initiate a hard reset.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2020-12-01 10:39:54 +02:00 committed by Oded Gabbay
parent 6585489e80
commit 9c9013cbd8
4 changed files with 31 additions and 25 deletions

View File

@ -627,7 +627,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
security_status = RREG32(cpu_security_boot_status_reg);
/* We read security status multiple times during boot:
* 1. preboot - we check if fw security feature is supported
* 1. preboot - a. Check whether the security status bits are valid
* b. Check whether fw security is enabled
* c. Check whether hard reset is done by fw
* 2. boot cpu - we get boot cpu security status
* 3. FW application - we get FW application security status
*
@ -637,13 +639,20 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
*/
if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
hdev->asic_prop.fw_security_status_valid = 1;
prop->fw_security_disabled =
!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN);
if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN))
prop->fw_security_disabled = true;
if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
hdev->asic_prop.hard_reset_done_by_fw = true;
} else {
hdev->asic_prop.fw_security_status_valid = 0;
prop->fw_security_disabled = true;
}
dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
dev_info(hdev->dev, "firmware-level security is %s\n",
prop->fw_security_disabled ? "disabled" : "enabled");
@ -797,18 +806,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
}
/* Read FW application security bits */
if (hdev->asic_prop.fw_security_status_valid) {
if (hdev->asic_prop.fw_security_status_valid)
hdev->asic_prop.fw_app_security_map =
RREG32(cpu_security_boot_status_reg);
if (hdev->asic_prop.fw_app_security_map &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
hdev->asic_prop.hard_reset_done_by_fw = true;
}
dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
dev_info(hdev->dev, "Successfully loaded firmware to device\n");
out:

View File

@ -654,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev)
if (rc)
goto free_queue_props;
if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true);
}
/* Before continuing in the initialization, we need to read the preboot
* version to determine whether we run with a security-enabled firmware
*/
@ -672,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev)
goto pci_fini;
}
if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true);
}
return 0;
pci_fini:
@ -3881,7 +3881,10 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
/* I don't know what is the state of the CPU so make sure it is
* stopped in any means necessary
*/
WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
if (hdev->asic_prop.hard_reset_done_by_fw)
WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
else
WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);

View File

@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev)
if (rc)
goto free_queue_props;
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true);
}
/* Before continuing in the initialization, we need to read the preboot
* version to determine whether we run with a security-enabled firmware
*/
@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev)
goto pci_fini;
}
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true);
}
if (!hdev->pldm) {
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)

View File

@ -204,6 +204,8 @@ enum kmd_msg {
KMD_MSG_GOTO_WFE,
KMD_MSG_FIT_RDY,
KMD_MSG_SKIP_BMC,
RESERVED,
KMD_MSG_RST_DEV,
};
enum cpu_msg_status {