mirror of https://gitee.com/openkylin/linux.git
powerpc/eeh: Cleanup control flow in eeh_handle_normal_event()
Rather than mixing "if (state)" blocks and gotos, convert entirely to "if (state)" blocks to make the state machine behaviour clearer. Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
fef7f90552
commit
b90484ec11
|
@ -808,10 +808,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
|||
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
|
||||
pe->phb->global_number, pe->addr,
|
||||
pe->freeze_count);
|
||||
goto hard_fail;
|
||||
result = PCI_ERS_RESULT_DISCONNECT;
|
||||
}
|
||||
pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
|
||||
pe->freeze_count, eeh_max_freezes);
|
||||
|
||||
/* Walk the various device drivers attached to this slot through
|
||||
* a reset sequence, giving each an opportunity to do what it needs
|
||||
|
@ -823,31 +821,39 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
|||
* the error. Override the result if necessary to have partially
|
||||
* hotplug for this case.
|
||||
*/
|
||||
pr_info("EEH: Notify device drivers to shutdown\n");
|
||||
eeh_set_channel_state(pe, pci_channel_io_frozen);
|
||||
eeh_set_irq_state(pe, false);
|
||||
eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
|
||||
&result);
|
||||
if ((pe->type & EEH_PE_PHB) &&
|
||||
result != PCI_ERS_RESULT_NONE &&
|
||||
result != PCI_ERS_RESULT_NEED_RESET)
|
||||
result = PCI_ERS_RESULT_NEED_RESET;
|
||||
if (result != PCI_ERS_RESULT_DISCONNECT) {
|
||||
pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
|
||||
pe->freeze_count, eeh_max_freezes);
|
||||
pr_info("EEH: Notify device drivers to shutdown\n");
|
||||
eeh_set_channel_state(pe, pci_channel_io_frozen);
|
||||
eeh_set_irq_state(pe, false);
|
||||
eeh_pe_report("error_detected(IO frozen)", pe,
|
||||
eeh_report_error, &result);
|
||||
if ((pe->type & EEH_PE_PHB) &&
|
||||
result != PCI_ERS_RESULT_NONE &&
|
||||
result != PCI_ERS_RESULT_NEED_RESET)
|
||||
result = PCI_ERS_RESULT_NEED_RESET;
|
||||
}
|
||||
|
||||
/* Get the current PCI slot state. This can take a long time,
|
||||
* sometimes over 300 seconds for certain systems.
|
||||
*/
|
||||
rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
|
||||
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
|
||||
pr_warn("EEH: Permanent failure\n");
|
||||
goto hard_fail;
|
||||
if (result != PCI_ERS_RESULT_DISCONNECT) {
|
||||
rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
|
||||
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
|
||||
pr_warn("EEH: Permanent failure\n");
|
||||
result = PCI_ERS_RESULT_DISCONNECT;
|
||||
}
|
||||
}
|
||||
|
||||
/* Since rtas may enable MMIO when posting the error log,
|
||||
* don't post the error log until after all dev drivers
|
||||
* have been informed.
|
||||
*/
|
||||
pr_info("EEH: Collect temporary log\n");
|
||||
eeh_slot_error_detail(pe, EEH_LOG_TEMP);
|
||||
if (result != PCI_ERS_RESULT_DISCONNECT) {
|
||||
pr_info("EEH: Collect temporary log\n");
|
||||
eeh_slot_error_detail(pe, EEH_LOG_TEMP);
|
||||
}
|
||||
|
||||
/* If all device drivers were EEH-unaware, then shut
|
||||
* down all of the device drivers, and hope they
|
||||
|
@ -859,7 +865,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
|||
if (rc) {
|
||||
pr_warn("%s: Unable to reset, err=%d\n",
|
||||
__func__, rc);
|
||||
goto hard_fail;
|
||||
result = PCI_ERS_RESULT_DISCONNECT;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -868,9 +874,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
|||
pr_info("EEH: Enable I/O for affected devices\n");
|
||||
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
|
||||
|
||||
if (rc < 0)
|
||||
goto hard_fail;
|
||||
if (rc) {
|
||||
if (rc < 0) {
|
||||
result = PCI_ERS_RESULT_DISCONNECT;
|
||||
} else if (rc) {
|
||||
result = PCI_ERS_RESULT_NEED_RESET;
|
||||
} else {
|
||||
pr_info("EEH: Notify device drivers to resume I/O\n");
|
||||
|
@ -884,9 +890,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
|||
pr_info("EEH: Enabled DMA for affected devices\n");
|
||||
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
|
||||
|
||||
if (rc < 0)
|
||||
goto hard_fail;
|
||||
if (rc) {
|
||||
if (rc < 0) {
|
||||
result = PCI_ERS_RESULT_DISCONNECT;
|
||||
} else if (rc) {
|
||||
result = PCI_ERS_RESULT_NEED_RESET;
|
||||
} else {
|
||||
/*
|
||||
|
@ -899,12 +905,6 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
|||
}
|
||||
}
|
||||
|
||||
/* If any device has a hard failure, then shut off everything. */
|
||||
if (result == PCI_ERS_RESULT_DISCONNECT) {
|
||||
pr_warn("EEH: Device driver gave up\n");
|
||||
goto hard_fail;
|
||||
}
|
||||
|
||||
/* If any device called out for a reset, then reset the slot */
|
||||
if (result == PCI_ERS_RESULT_NEED_RESET) {
|
||||
pr_info("EEH: Reset without hotplug activity\n");
|
||||
|
@ -912,89 +912,81 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
|
|||
if (rc) {
|
||||
pr_warn("%s: Cannot reset, err=%d\n",
|
||||
__func__, rc);
|
||||
goto hard_fail;
|
||||
result = PCI_ERS_RESULT_DISCONNECT;
|
||||
} else {
|
||||
result = PCI_ERS_RESULT_NONE;
|
||||
eeh_set_channel_state(pe, pci_channel_io_normal);
|
||||
eeh_set_irq_state(pe, true);
|
||||
eeh_pe_report("slot_reset", pe, eeh_report_reset,
|
||||
&result);
|
||||
}
|
||||
}
|
||||
|
||||
if ((result == PCI_ERS_RESULT_RECOVERED) ||
|
||||
(result == PCI_ERS_RESULT_NONE)) {
|
||||
/*
|
||||
* For those hot removed VFs, we should add back them after PF
|
||||
* get recovered properly.
|
||||
*/
|
||||
list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
|
||||
rmv_entry) {
|
||||
eeh_add_virt_device(edev);
|
||||
list_del(&edev->rmv_entry);
|
||||
}
|
||||
|
||||
pr_info("EEH: Notify device drivers "
|
||||
"the completion of reset\n");
|
||||
result = PCI_ERS_RESULT_NONE;
|
||||
/* Tell all device drivers that they can resume operations */
|
||||
pr_info("EEH: Notify device driver to resume\n");
|
||||
eeh_set_channel_state(pe, pci_channel_io_normal);
|
||||
eeh_set_irq_state(pe, true);
|
||||
eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
|
||||
}
|
||||
eeh_pe_report("resume", pe, eeh_report_resume, NULL);
|
||||
eeh_for_each_pe(pe, tmp_pe) {
|
||||
eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
|
||||
edev->mode &= ~EEH_DEV_NO_HANDLER;
|
||||
edev->in_error = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* All devices should claim they have recovered by now. */
|
||||
if ((result != PCI_ERS_RESULT_RECOVERED) &&
|
||||
(result != PCI_ERS_RESULT_NONE)) {
|
||||
pr_warn("EEH: Not recovered\n");
|
||||
goto hard_fail;
|
||||
}
|
||||
pr_info("EEH: Recovery successful.\n");
|
||||
} else {
|
||||
/*
|
||||
* About 90% of all real-life EEH failures in the field
|
||||
* are due to poorly seated PCI cards. Only 10% or so are
|
||||
* due to actual, failed cards.
|
||||
*/
|
||||
pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
|
||||
"Please try reseating or replacing it\n",
|
||||
pe->phb->global_number, pe->addr);
|
||||
|
||||
/*
|
||||
* For those hot removed VFs, we should add back them after PF get
|
||||
* recovered properly.
|
||||
*/
|
||||
list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
|
||||
rmv_entry) {
|
||||
eeh_add_virt_device(edev);
|
||||
list_del(&edev->rmv_entry);
|
||||
}
|
||||
eeh_slot_error_detail(pe, EEH_LOG_PERM);
|
||||
|
||||
/* Tell all device drivers that they can resume operations */
|
||||
pr_info("EEH: Notify device driver to resume\n");
|
||||
eeh_set_channel_state(pe, pci_channel_io_normal);
|
||||
eeh_set_irq_state(pe, true);
|
||||
eeh_pe_report("resume", pe, eeh_report_resume, NULL);
|
||||
eeh_for_each_pe(pe, tmp_pe) {
|
||||
eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
|
||||
edev->mode &= ~EEH_DEV_NO_HANDLER;
|
||||
edev->in_error = false;
|
||||
/* Notify all devices that they're about to go down. */
|
||||
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
|
||||
eeh_set_irq_state(pe, false);
|
||||
eeh_pe_report("error_detected(permanent failure)", pe,
|
||||
eeh_report_failure, NULL);
|
||||
|
||||
/* Mark the PE to be removed permanently */
|
||||
eeh_pe_state_mark(pe, EEH_PE_REMOVED);
|
||||
|
||||
/*
|
||||
* Shut down the device drivers for good. We mark
|
||||
* all removed devices correctly to avoid access
|
||||
* the their PCI config any more.
|
||||
*/
|
||||
if (pe->type & EEH_PE_VF) {
|
||||
eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
|
||||
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
|
||||
} else {
|
||||
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
|
||||
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
|
||||
|
||||
pci_lock_rescan_remove();
|
||||
pci_hp_remove_devices(bus);
|
||||
pci_unlock_rescan_remove();
|
||||
/* The passed PE should no longer be used */
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
pr_info("EEH: Recovery successful.\n");
|
||||
goto final;
|
||||
|
||||
hard_fail:
|
||||
/*
|
||||
* About 90% of all real-life EEH failures in the field
|
||||
* are due to poorly seated PCI cards. Only 10% or so are
|
||||
* due to actual, failed cards.
|
||||
*/
|
||||
pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
|
||||
"Please try reseating or replacing it\n",
|
||||
pe->phb->global_number, pe->addr);
|
||||
|
||||
eeh_slot_error_detail(pe, EEH_LOG_PERM);
|
||||
|
||||
/* Notify all devices that they're about to go down. */
|
||||
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
|
||||
eeh_set_irq_state(pe, false);
|
||||
eeh_pe_report("error_detected(permanent failure)", pe,
|
||||
eeh_report_failure, NULL);
|
||||
|
||||
/* Mark the PE to be removed permanently */
|
||||
eeh_pe_state_mark(pe, EEH_PE_REMOVED);
|
||||
|
||||
/*
|
||||
* Shut down the device drivers for good. We mark
|
||||
* all removed devices correctly to avoid access
|
||||
* the their PCI config any more.
|
||||
*/
|
||||
if (pe->type & EEH_PE_VF) {
|
||||
eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
|
||||
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
|
||||
} else {
|
||||
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
|
||||
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
|
||||
|
||||
pci_lock_rescan_remove();
|
||||
pci_hp_remove_devices(bus);
|
||||
pci_unlock_rescan_remove();
|
||||
/* The passed PE should no longer be used */
|
||||
return;
|
||||
}
|
||||
final:
|
||||
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue