powerpc/eeh: Add a debugfs interface to check if a driver supports recovery

If a PCI device's current driver implements the error handling callbacks
EEH can use them to recover the device after an error occurs. For devices
without the error handling callbacks we recover them by removing the device
and re-scanning it so the PCI core puts the device back into a known good
state.

Currently there's no way for userspace to determine if the driver supports
recovery or not which makes it difficult to write automated tests for EEH.
This patch addressing that by adding a debugfs interface for querying if
a specific device can be recovered or not.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201103051512.919333-2-oohall@gmail.com
This commit is contained in:
Oliver O'Halloran 2020-11-03 16:15:12 +11:00 committed by Michael Ellerman
parent b5e904b830
commit 9e85741683
1 changed files with 50 additions and 0 deletions

View File

@ -1868,6 +1868,53 @@ static const struct file_operations eeh_dev_break_fops = {
.read = eeh_debugfs_dev_usage,
};
static ssize_t eeh_dev_can_recover(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
struct pci_driver *drv;
struct pci_dev *pdev;
size_t ret;
pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
if (IS_ERR(pdev))
return PTR_ERR(pdev);
/*
* In order for error recovery to work the driver needs to implement
* .error_detected(), so it can quiesce IO to the device, and
* .slot_reset() so it can re-initialise the device after a reset.
*
* Ideally they'd implement .resume() too, but some drivers which
* we need to support (notably IPR) don't so I guess we can tolerate
* that.
*
* .mmio_enabled() is mostly there as a work-around for devices which
* take forever to re-init after a hot reset. Implementing that is
* strictly optional.
*/
drv = pci_dev_driver(pdev);
if (drv &&
drv->err_handler &&
drv->err_handler->error_detected &&
drv->err_handler->slot_reset) {
ret = count;
} else {
ret = -EOPNOTSUPP;
}
pci_dev_put(pdev);
return ret;
}
static const struct file_operations eeh_dev_can_recover_fops = {
.open = simple_open,
.llseek = no_llseek,
.write = eeh_dev_can_recover,
.read = eeh_debugfs_dev_usage,
};
#endif
static int __init eeh_init_proc(void)
@ -1892,6 +1939,9 @@ static int __init eeh_init_proc(void)
debugfs_create_file_unsafe("eeh_force_recover", 0600,
powerpc_debugfs_root, NULL,
&eeh_force_recover_fops);
debugfs_create_file_unsafe("eeh_dev_can_recover", 0600,
powerpc_debugfs_root, NULL,
&eeh_dev_can_recover_fops);
eeh_cache_debugfs_init();
#endif
}