IB/hfi1: Prevent LNI out of sync by resetting host interface version

When the link is disabled and re-enabled, the host version bit is not
set again, so the firmware behaves as though it’s interacting with an
old driver. This causes LNI to get out of sync. The host version bit
needs to be set at load_8051_firmware() and _dc_start(). Currently, it's
only set at load_8051_firmware().

Create a common function to set the bit with the intent to make the code
more maintainable in the future, set the host version bit at _dc_start()
and modify the 8051 command API to prevent a deadlock as _dc_start() is
already holding the dc8051 lock.

Fixes: 913cc67159 ("IB/hfi1: Always perform offline transition")
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Sebastian Sanchez 2017-10-02 11:04:26 -07:00 committed by Doug Ledford
parent d7d626179f
commit 9be6a5d788
3 changed files with 101 additions and 40 deletions
drivers/infiniband/hw/hfi1

View File

@ -6518,12 +6518,11 @@ static void _dc_start(struct hfi1_devdata *dd)
if (!dd->dc_shutdown) if (!dd->dc_shutdown)
return; return;
/* Take the 8051 out of reset */ /*
write_csr(dd, DC_DC8051_CFG_RST, 0ull); * Take the 8051 out of reset, wait until 8051 is ready, and set host
/* Wait until 8051 is ready */ * version bit.
if (wait_fm_ready(dd, TIMEOUT_8051_START)) */
dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", release_and_wait_ready_8051_firmware(dd);
__func__);
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10); write_csr(dd, DCC_CFG_RESET, 0x10);
@ -8591,30 +8590,23 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
} }
/* /*
* If the 8051 is in reset mode (dd->dc_shutdown == 1), this function
* will still continue executing.
*
* Returns: * Returns:
* < 0 = Linux error, not able to get access * < 0 = Linux error, not able to get access
* > 0 = 8051 command RETURN_CODE * > 0 = 8051 command RETURN_CODE
*/ */
static int do_8051_command( static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
struct hfi1_devdata *dd, u64 *out_data)
u32 type,
u64 in_data,
u64 *out_data)
{ {
u64 reg, completed; u64 reg, completed;
int return_code; int return_code;
unsigned long timeout; unsigned long timeout;
lockdep_assert_held(&dd->dc8051_lock);
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
return_code = -ENODEV;
goto fail;
}
/* /*
* If an 8051 host command timed out previously, then the 8051 is * If an 8051 host command timed out previously, then the 8051 is
* stuck. * stuck.
@ -8714,6 +8706,29 @@ static int do_8051_command(
*/ */
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
return return_code;
}
/*
* Returns:
* < 0 = Linux error, not able to get access
* > 0 = 8051 command RETURN_CODE
*/
static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
u64 *out_data)
{
int return_code;
mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
return_code = -ENODEV;
goto fail;
}
return_code = _do_8051_command(dd, type, in_data, out_data);
fail: fail:
mutex_unlock(&dd->dc8051_lock); mutex_unlock(&dd->dc8051_lock);
return return_code; return return_code;
@ -8724,16 +8739,17 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
} }
int load_8051_config(struct hfi1_devdata *dd, u8 field_id, int _load_8051_config(struct hfi1_devdata *dd, u8 field_id,
u8 lane_id, u32 config_data) u8 lane_id, u32 config_data)
{ {
u64 data; u64 data;
int ret; int ret;
lockdep_assert_held(&dd->dc8051_lock);
data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
| (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
| (u64)config_data << LOAD_DATA_DATA_SHIFT; | (u64)config_data << LOAD_DATA_DATA_SHIFT;
ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL); ret = _do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
if (ret != HCMD_SUCCESS) { if (ret != HCMD_SUCCESS) {
dd_dev_err(dd, dd_dev_err(dd,
"load 8051 config: field id %d, lane %d, err %d\n", "load 8051 config: field id %d, lane %d, err %d\n",
@ -8742,6 +8758,18 @@ int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
return ret; return ret;
} }
int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
u8 lane_id, u32 config_data)
{
int return_code;
mutex_lock(&dd->dc8051_lock);
return_code = _load_8051_config(dd, field_id, lane_id, config_data);
mutex_unlock(&dd->dc8051_lock);
return return_code;
}
/* /*
* Read the 8051 firmware "registers". Use the RAM directly. Always * Read the 8051 firmware "registers". Use the RAM directly. Always
* set the result, even on error. * set the result, even on error.
@ -8857,13 +8885,14 @@ int write_host_interface_version(struct hfi1_devdata *dd, u8 version)
u32 frame; u32 frame;
u32 mask; u32 mask;
lockdep_assert_held(&dd->dc8051_lock);
mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT); mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT);
read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame); read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame);
/* Clear, then set field */ /* Clear, then set field */
frame &= ~mask; frame &= ~mask;
frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT); frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT);
return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, return _load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
frame); frame);
} }
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,

View File

@ -709,6 +709,7 @@ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch); u8 *ver_patch);
int write_host_interface_version(struct hfi1_devdata *dd, u8 version); int write_host_interface_version(struct hfi1_devdata *dd, u8 version);
void read_guid(struct hfi1_devdata *dd); void read_guid(struct hfi1_devdata *dd);
int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
u8 neigh_reason, u8 rem_reason); u8 neigh_reason, u8 rem_reason);

View File

@ -970,6 +970,46 @@ int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout)
} }
} }
/*
* Clear all reset bits, releasing the 8051.
* Wait for firmware to be ready to accept host requests.
* Then, set host version bit.
*
* This function executes even if the 8051 is in reset mode when
* dd->dc_shutdown == 1.
*
* Expects dd->dc8051_lock to be held.
*/
int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd)
{
int ret;
lockdep_assert_held(&dd->dc8051_lock);
/* clear all reset bits, releasing the 8051 */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/*
* Wait for firmware to be ready to accept host
* requests.
*/
ret = wait_fm_ready(dd, TIMEOUT_8051_START);
if (ret) {
dd_dev_err(dd, "8051 start timeout, current FW state 0x%x\n",
get_firmware_state(dd));
return ret;
}
ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to set host interface version, return 0x%x\n",
ret);
return -EIO;
}
return 0;
}
/* /*
* Load the 8051 firmware. * Load the 8051 firmware.
*/ */
@ -1035,31 +1075,22 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
if (ret) if (ret)
return ret; return ret;
/* clear all reset bits, releasing the 8051 */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/* /*
* Clear all reset bits, releasing the 8051.
* DC reset step 5. Wait for firmware to be ready to accept host * DC reset step 5. Wait for firmware to be ready to accept host
* requests. * requests.
* Then, set host version bit.
*/ */
ret = wait_fm_ready(dd, TIMEOUT_8051_START); mutex_lock(&dd->dc8051_lock);
if (ret) { /* timed out */ ret = release_and_wait_ready_8051_firmware(dd);
dd_dev_err(dd, "8051 start timeout, current state 0x%x\n", mutex_unlock(&dd->dc8051_lock);
get_firmware_state(dd)); if (ret)
return -ETIMEDOUT; return ret;
}
read_misc_status(dd, &ver_major, &ver_minor, &ver_patch); read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
(int)ver_major, (int)ver_minor, (int)ver_patch); (int)ver_major, (int)ver_minor, (int)ver_patch);
dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to set host interface version, return 0x%x\n",
ret);
return -EIO;
}
return 0; return 0;
} }