mirror of https://gitee.com/openkylin/linux.git
powerpc/eeh: Cleanup control flow in eeh_handle_normal_event()
Rather than mixing "if (state)" blocks and gotos, convert entirely to "if (state)" blocks to make the state machine behaviour clearer. Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
fef7f90552
commit
b90484ec11
|
@ -808,10 +808,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
||||||
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
|
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
|
||||||
pe->phb->global_number, pe->addr,
|
pe->phb->global_number, pe->addr,
|
||||||
pe->freeze_count);
|
pe->freeze_count);
|
||||||
goto hard_fail;
|
result = PCI_ERS_RESULT_DISCONNECT;
|
||||||
}
|
}
|
||||||
pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
|
|
||||||
pe->freeze_count, eeh_max_freezes);
|
|
||||||
|
|
||||||
/* Walk the various device drivers attached to this slot through
|
/* Walk the various device drivers attached to this slot through
|
||||||
* a reset sequence, giving each an opportunity to do what it needs
|
* a reset sequence, giving each an opportunity to do what it needs
|
||||||
|
@ -823,31 +821,39 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
||||||
* the error. Override the result if necessary to have partially
|
* the error. Override the result if necessary to have partially
|
||||||
* hotplug for this case.
|
* hotplug for this case.
|
||||||
*/
|
*/
|
||||||
pr_info("EEH: Notify device drivers to shutdown\n");
|
if (result != PCI_ERS_RESULT_DISCONNECT) {
|
||||||
eeh_set_channel_state(pe, pci_channel_io_frozen);
|
pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
|
||||||
eeh_set_irq_state(pe, false);
|
pe->freeze_count, eeh_max_freezes);
|
||||||
eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
|
pr_info("EEH: Notify device drivers to shutdown\n");
|
||||||
&result);
|
eeh_set_channel_state(pe, pci_channel_io_frozen);
|
||||||
if ((pe->type & EEH_PE_PHB) &&
|
eeh_set_irq_state(pe, false);
|
||||||
result != PCI_ERS_RESULT_NONE &&
|
eeh_pe_report("error_detected(IO frozen)", pe,
|
||||||
result != PCI_ERS_RESULT_NEED_RESET)
|
eeh_report_error, &result);
|
||||||
result = PCI_ERS_RESULT_NEED_RESET;
|
if ((pe->type & EEH_PE_PHB) &&
|
||||||
|
result != PCI_ERS_RESULT_NONE &&
|
||||||
|
result != PCI_ERS_RESULT_NEED_RESET)
|
||||||
|
result = PCI_ERS_RESULT_NEED_RESET;
|
||||||
|
}
|
||||||
|
|
||||||
/* Get the current PCI slot state. This can take a long time,
|
/* Get the current PCI slot state. This can take a long time,
|
||||||
* sometimes over 300 seconds for certain systems.
|
* sometimes over 300 seconds for certain systems.
|
||||||
*/
|
*/
|
||||||
rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
|
if (result != PCI_ERS_RESULT_DISCONNECT) {
|
||||||
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
|
rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
|
||||||
pr_warn("EEH: Permanent failure\n");
|
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
|
||||||
goto hard_fail;
|
pr_warn("EEH: Permanent failure\n");
|
||||||
|
result = PCI_ERS_RESULT_DISCONNECT;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Since rtas may enable MMIO when posting the error log,
|
/* Since rtas may enable MMIO when posting the error log,
|
||||||
* don't post the error log until after all dev drivers
|
* don't post the error log until after all dev drivers
|
||||||
* have been informed.
|
* have been informed.
|
||||||
*/
|
*/
|
||||||
pr_info("EEH: Collect temporary log\n");
|
if (result != PCI_ERS_RESULT_DISCONNECT) {
|
||||||
eeh_slot_error_detail(pe, EEH_LOG_TEMP);
|
pr_info("EEH: Collect temporary log\n");
|
||||||
|
eeh_slot_error_detail(pe, EEH_LOG_TEMP);
|
||||||
|
}
|
||||||
|
|
||||||
/* If all device drivers were EEH-unaware, then shut
|
/* If all device drivers were EEH-unaware, then shut
|
||||||
* down all of the device drivers, and hope they
|
* down all of the device drivers, and hope they
|
||||||
|
@ -859,7 +865,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
||||||
if (rc) {
|
if (rc) {
|
||||||
pr_warn("%s: Unable to reset, err=%d\n",
|
pr_warn("%s: Unable to reset, err=%d\n",
|
||||||
__func__, rc);
|
__func__, rc);
|
||||||
goto hard_fail;
|
result = PCI_ERS_RESULT_DISCONNECT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -868,9 +874,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
||||||
pr_info("EEH: Enable I/O for affected devices\n");
|
pr_info("EEH: Enable I/O for affected devices\n");
|
||||||
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
|
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
|
||||||
|
|
||||||
if (rc < 0)
|
if (rc < 0) {
|
||||||
goto hard_fail;
|
result = PCI_ERS_RESULT_DISCONNECT;
|
||||||
if (rc) {
|
} else if (rc) {
|
||||||
result = PCI_ERS_RESULT_NEED_RESET;
|
result = PCI_ERS_RESULT_NEED_RESET;
|
||||||
} else {
|
} else {
|
||||||
pr_info("EEH: Notify device drivers to resume I/O\n");
|
pr_info("EEH: Notify device drivers to resume I/O\n");
|
||||||
|
@ -884,9 +890,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
||||||
pr_info("EEH: Enabled DMA for affected devices\n");
|
pr_info("EEH: Enabled DMA for affected devices\n");
|
||||||
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
|
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
|
||||||
|
|
||||||
if (rc < 0)
|
if (rc < 0) {
|
||||||
goto hard_fail;
|
result = PCI_ERS_RESULT_DISCONNECT;
|
||||||
if (rc) {
|
} else if (rc) {
|
||||||
result = PCI_ERS_RESULT_NEED_RESET;
|
result = PCI_ERS_RESULT_NEED_RESET;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
|
@ -899,12 +905,6 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If any device has a hard failure, then shut off everything. */
|
|
||||||
if (result == PCI_ERS_RESULT_DISCONNECT) {
|
|
||||||
pr_warn("EEH: Device driver gave up\n");
|
|
||||||
goto hard_fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If any device called out for a reset, then reset the slot */
|
/* If any device called out for a reset, then reset the slot */
|
||||||
if (result == PCI_ERS_RESULT_NEED_RESET) {
|
if (result == PCI_ERS_RESULT_NEED_RESET) {
|
||||||
pr_info("EEH: Reset without hotplug activity\n");
|
pr_info("EEH: Reset without hotplug activity\n");
|
||||||
|
@ -912,89 +912,81 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
||||||
if (rc) {
|
if (rc) {
|
||||||
pr_warn("%s: Cannot reset, err=%d\n",
|
pr_warn("%s: Cannot reset, err=%d\n",
|
||||||
__func__, rc);
|
__func__, rc);
|
||||||
goto hard_fail;
|
result = PCI_ERS_RESULT_DISCONNECT;
|
||||||
|
} else {
|
||||||
|
result = PCI_ERS_RESULT_NONE;
|
||||||
|
eeh_set_channel_state(pe, pci_channel_io_normal);
|
||||||
|
eeh_set_irq_state(pe, true);
|
||||||
|
eeh_pe_report("slot_reset", pe, eeh_report_reset,
|
||||||
|
&result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((result == PCI_ERS_RESULT_RECOVERED) ||
|
||||||
|
(result == PCI_ERS_RESULT_NONE)) {
|
||||||
|
/*
|
||||||
|
* For those hot removed VFs, we should add back them after PF
|
||||||
|
* get recovered properly.
|
||||||
|
*/
|
||||||
|
list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
|
||||||
|
rmv_entry) {
|
||||||
|
eeh_add_virt_device(edev);
|
||||||
|
list_del(&edev->rmv_entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_info("EEH: Notify device drivers "
|
/* Tell all device drivers that they can resume operations */
|
||||||
"the completion of reset\n");
|
pr_info("EEH: Notify device driver to resume\n");
|
||||||
result = PCI_ERS_RESULT_NONE;
|
|
||||||
eeh_set_channel_state(pe, pci_channel_io_normal);
|
eeh_set_channel_state(pe, pci_channel_io_normal);
|
||||||
eeh_set_irq_state(pe, true);
|
eeh_set_irq_state(pe, true);
|
||||||
eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
|
eeh_pe_report("resume", pe, eeh_report_resume, NULL);
|
||||||
}
|
eeh_for_each_pe(pe, tmp_pe) {
|
||||||
|
eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
|
||||||
|
edev->mode &= ~EEH_DEV_NO_HANDLER;
|
||||||
|
edev->in_error = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* All devices should claim they have recovered by now. */
|
pr_info("EEH: Recovery successful.\n");
|
||||||
if ((result != PCI_ERS_RESULT_RECOVERED) &&
|
} else {
|
||||||
(result != PCI_ERS_RESULT_NONE)) {
|
/*
|
||||||
pr_warn("EEH: Not recovered\n");
|
* About 90% of all real-life EEH failures in the field
|
||||||
goto hard_fail;
|
* are due to poorly seated PCI cards. Only 10% or so are
|
||||||
}
|
* due to actual, failed cards.
|
||||||
|
*/
|
||||||
|
pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
|
||||||
|
"Please try reseating or replacing it\n",
|
||||||
|
pe->phb->global_number, pe->addr);
|
||||||
|
|
||||||
/*
|
eeh_slot_error_detail(pe, EEH_LOG_PERM);
|
||||||
* For those hot removed VFs, we should add back them after PF get
|
|
||||||
* recovered properly.
|
|
||||||
*/
|
|
||||||
list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
|
|
||||||
rmv_entry) {
|
|
||||||
eeh_add_virt_device(edev);
|
|
||||||
list_del(&edev->rmv_entry);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Tell all device drivers that they can resume operations */
|
/* Notify all devices that they're about to go down. */
|
||||||
pr_info("EEH: Notify device driver to resume\n");
|
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
|
||||||
eeh_set_channel_state(pe, pci_channel_io_normal);
|
eeh_set_irq_state(pe, false);
|
||||||
eeh_set_irq_state(pe, true);
|
eeh_pe_report("error_detected(permanent failure)", pe,
|
||||||
eeh_pe_report("resume", pe, eeh_report_resume, NULL);
|
eeh_report_failure, NULL);
|
||||||
eeh_for_each_pe(pe, tmp_pe) {
|
|
||||||
eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
|
/* Mark the PE to be removed permanently */
|
||||||
edev->mode &= ~EEH_DEV_NO_HANDLER;
|
eeh_pe_state_mark(pe, EEH_PE_REMOVED);
|
||||||
edev->in_error = false;
|
|
||||||
|
/*
|
||||||
|
* Shut down the device drivers for good. We mark
|
||||||
|
* all removed devices correctly to avoid access
|
||||||
|
* the their PCI config any more.
|
||||||
|
*/
|
||||||
|
if (pe->type & EEH_PE_VF) {
|
||||||
|
eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
|
||||||
|
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
|
||||||
|
} else {
|
||||||
|
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
|
||||||
|
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
|
||||||
|
|
||||||
|
pci_lock_rescan_remove();
|
||||||
|
pci_hp_remove_devices(bus);
|
||||||
|
pci_unlock_rescan_remove();
|
||||||
|
/* The passed PE should no longer be used */
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_info("EEH: Recovery successful.\n");
|
|
||||||
goto final;
|
|
||||||
|
|
||||||
hard_fail:
|
|
||||||
/*
|
|
||||||
* About 90% of all real-life EEH failures in the field
|
|
||||||
* are due to poorly seated PCI cards. Only 10% or so are
|
|
||||||
* due to actual, failed cards.
|
|
||||||
*/
|
|
||||||
pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
|
|
||||||
"Please try reseating or replacing it\n",
|
|
||||||
pe->phb->global_number, pe->addr);
|
|
||||||
|
|
||||||
eeh_slot_error_detail(pe, EEH_LOG_PERM);
|
|
||||||
|
|
||||||
/* Notify all devices that they're about to go down. */
|
|
||||||
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
|
|
||||||
eeh_set_irq_state(pe, false);
|
|
||||||
eeh_pe_report("error_detected(permanent failure)", pe,
|
|
||||||
eeh_report_failure, NULL);
|
|
||||||
|
|
||||||
/* Mark the PE to be removed permanently */
|
|
||||||
eeh_pe_state_mark(pe, EEH_PE_REMOVED);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Shut down the device drivers for good. We mark
|
|
||||||
* all removed devices correctly to avoid access
|
|
||||||
* the their PCI config any more.
|
|
||||||
*/
|
|
||||||
if (pe->type & EEH_PE_VF) {
|
|
||||||
eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
|
|
||||||
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
|
|
||||||
} else {
|
|
||||||
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
|
|
||||||
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
|
|
||||||
|
|
||||||
pci_lock_rescan_remove();
|
|
||||||
pci_hp_remove_devices(bus);
|
|
||||||
pci_unlock_rescan_remove();
|
|
||||||
/* The passed PE should no longer be used */
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
final:
|
|
||||||
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
|
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue