mirror of https://gitee.com/openkylin/linux.git
Merge branch 'pci/aer'
- unify AER decoding for native and ACPI CPER sources (Alexandru Gagniuc) - add TLP header info to AER tracepoint (Thomas Tai) - add generic pcie_wait_for_link() interface (Oza Pawandeep) - handle AER ERR_FATAL by removing and re-enumerating devices, as Downstream Port Containment does (Oza Pawandeep) - factor out common code between AER and DPC recovery (Oza Pawandeep) - stop triggering DPC for ERR_NONFATAL errors (Oza Pawandeep) - share ERR_FATAL recovery path between AER and DPC (Oza Pawandeep) * pci/aer: PCI/AER: Replace struct pcie_device with pci_dev PCI/AER: Remove unused parameters PCI/AER: Decode Error Source Requester ID PCI/AER: Remove aer_recover_work_func() forward declaration PCI/DPC: Use the generic pcie_do_fatal_recovery() path PCI/AER: Pass service type to pcie_do_fatal_recovery() PCI/DPC: Disable ERR_NONFATAL handling by DPC PCI/portdrv: Add generic pcie_port_find_device() PCI/portdrv: Add generic pcie_port_find_service() PCI/AER: Factor out error reporting to drivers/pci/pcie/err.c PCI/AER: Rename error recovery interfaces to generic PCI naming PCI/AER: Handle ERR_FATAL with removal and re-enumeration of devices PCI: Add generic pcie_wait_for_link() interface PCI/AER: Add TLP header information to tracepoint PCI/AER: Unify error bit printing for native and CPER reporting
This commit is contained in:
commit
d7e02c08a9
|
@ -110,7 +110,7 @@ The actual steps taken by a platform to recover from a PCI error
|
|||
event will be platform-dependent, but will follow the general
|
||||
sequence described below.
|
||||
|
||||
STEP 0: Error Event
|
||||
STEP 0: Error Event: ERR_NONFATAL
|
||||
-------------------
|
||||
A PCI bus error is detected by the PCI hardware. On powerpc, the slot
|
||||
is isolated, in that all I/O is blocked: all reads return 0xffffffff,
|
||||
|
@ -228,13 +228,7 @@ proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
|
|||
If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
|
||||
proceeds to STEP 4 (Slot Reset)
|
||||
|
||||
STEP 3: Link Reset
|
||||
------------------
|
||||
The platform resets the link. This is a PCI-Express specific step
|
||||
and is done whenever a fatal error has been detected that can be
|
||||
"solved" by resetting the link.
|
||||
|
||||
STEP 4: Slot Reset
|
||||
STEP 3: Slot Reset
|
||||
------------------
|
||||
|
||||
In response to a return value of PCI_ERS_RESULT_NEED_RESET, the
|
||||
|
@ -320,7 +314,7 @@ Failure).
|
|||
>>> However, it probably should.
|
||||
|
||||
|
||||
STEP 5: Resume Operations
|
||||
STEP 4: Resume Operations
|
||||
-------------------------
|
||||
The platform will call the resume() callback on all affected device
|
||||
drivers if all drivers on the segment have returned
|
||||
|
@ -332,7 +326,7 @@ a result code.
|
|||
At this point, if a new error happens, the platform will restart
|
||||
a new error recovery sequence.
|
||||
|
||||
STEP 6: Permanent Failure
|
||||
STEP 5: Permanent Failure
|
||||
-------------------------
|
||||
A "permanent failure" has occurred, and the platform cannot recover
|
||||
the device. The platform will call error_detected() with a
|
||||
|
@ -355,6 +349,27 @@ errors. See the discussion in powerpc/eeh-pci-error-recovery.txt
|
|||
for additional detail on real-life experience of the causes of
|
||||
software errors.
|
||||
|
||||
STEP 0: Error Event: ERR_FATAL
|
||||
-------------------
|
||||
PCI bus error is detected by the PCI hardware. On powerpc, the slot is
|
||||
isolated, in that all I/O is blocked: all reads return 0xffffffff, all
|
||||
writes are ignored.
|
||||
|
||||
STEP 1: Remove devices
|
||||
--------------------
|
||||
Platform removes the devices depending on the error agent, it could be
|
||||
this port for all subordinates or upstream component (likely downstream
|
||||
port)
|
||||
|
||||
STEP 2: Reset link
|
||||
--------------------
|
||||
The platform resets the link. This is a PCI-Express specific step and is
|
||||
done whenever a fatal error has been detected that can be "solved" by
|
||||
resetting the link.
|
||||
|
||||
STEP 3: Re-enumerate the devices
|
||||
--------------------
|
||||
Initiates the re-enumeration.
|
||||
|
||||
Conclusion; General Remarks
|
||||
---------------------------
|
||||
|
|
|
@ -231,25 +231,11 @@ bool pciehp_check_link_active(struct controller *ctrl)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void __pcie_wait_link_active(struct controller *ctrl, bool active)
|
||||
{
|
||||
int timeout = 1000;
|
||||
|
||||
if (pciehp_check_link_active(ctrl) == active)
|
||||
return;
|
||||
while (timeout > 0) {
|
||||
msleep(10);
|
||||
timeout -= 10;
|
||||
if (pciehp_check_link_active(ctrl) == active)
|
||||
return;
|
||||
}
|
||||
ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n",
|
||||
active ? "set" : "cleared");
|
||||
}
|
||||
|
||||
static void pcie_wait_link_active(struct controller *ctrl)
|
||||
{
|
||||
__pcie_wait_link_active(ctrl, true);
|
||||
struct pci_dev *pdev = ctrl_dev(ctrl);
|
||||
|
||||
pcie_wait_for_link(pdev, true);
|
||||
}
|
||||
|
||||
static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
|
||||
|
|
|
@ -1535,7 +1535,7 @@ static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH)
|
||||
#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH)
|
||||
/**
|
||||
* pci_uevent_ers - emit a uevent during recovery path of PCI device
|
||||
* @pdev: PCI device undergoing error recovery
|
||||
|
|
|
@ -4138,6 +4138,35 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
|
|||
|
||||
return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS);
|
||||
}
|
||||
/**
|
||||
* pcie_wait_for_link - Wait until link is active or inactive
|
||||
* @pdev: Bridge device
|
||||
* @active: waiting for active or inactive?
|
||||
*
|
||||
* Use this to wait till link becomes active or inactive.
|
||||
*/
|
||||
bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
|
||||
{
|
||||
int timeout = 1000;
|
||||
bool ret;
|
||||
u16 lnk_status;
|
||||
|
||||
for (;;) {
|
||||
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
|
||||
ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
|
||||
if (ret == active)
|
||||
return true;
|
||||
if (timeout <= 0)
|
||||
break;
|
||||
msleep(10);
|
||||
timeout -= 10;
|
||||
}
|
||||
|
||||
pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n",
|
||||
active ? "set" : "cleared");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void pci_reset_secondary_bus(struct pci_dev *dev)
|
||||
{
|
||||
|
|
|
@ -353,6 +353,11 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
|
|||
|
||||
void pci_enable_acs(struct pci_dev *dev);
|
||||
|
||||
/* PCI error reporting and recovery */
|
||||
void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service);
|
||||
void pcie_do_nonfatal_recovery(struct pci_dev *dev);
|
||||
|
||||
bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
|
||||
#ifdef CONFIG_PCIEASPM
|
||||
void pcie_aspm_init_link_state(struct pci_dev *pdev);
|
||||
void pcie_aspm_exit_link_state(struct pci_dev *pdev);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
#
|
||||
# Makefile for PCI Express features and port driver
|
||||
|
||||
pcieportdrv-y := portdrv_core.o portdrv_pci.o
|
||||
pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o
|
||||
|
||||
obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o
|
||||
|
||||
|
|
|
@ -94,7 +94,7 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev,
|
|||
*/
|
||||
static void aer_enable_rootport(struct aer_rpc *rpc)
|
||||
{
|
||||
struct pci_dev *pdev = rpc->rpd->port;
|
||||
struct pci_dev *pdev = rpc->rpd;
|
||||
int aer_pos;
|
||||
u16 reg16;
|
||||
u32 reg32;
|
||||
|
@ -136,7 +136,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc)
|
|||
*/
|
||||
static void aer_disable_rootport(struct aer_rpc *rpc)
|
||||
{
|
||||
struct pci_dev *pdev = rpc->rpd->port;
|
||||
struct pci_dev *pdev = rpc->rpd;
|
||||
u32 reg32;
|
||||
int pos;
|
||||
|
||||
|
@ -232,7 +232,7 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev)
|
|||
/* Initialize Root lock access, e_lock, to Root Error Status Reg */
|
||||
spin_lock_init(&rpc->e_lock);
|
||||
|
||||
rpc->rpd = dev;
|
||||
rpc->rpd = dev->port;
|
||||
INIT_WORK(&rpc->dpc_handler, aer_isr);
|
||||
mutex_init(&rpc->rpc_mutex);
|
||||
|
||||
|
@ -353,10 +353,7 @@ static void aer_error_resume(struct pci_dev *dev)
|
|||
pos = dev->aer_cap;
|
||||
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
|
||||
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
|
||||
if (dev->error_state == pci_channel_io_normal)
|
||||
status &= ~mask; /* Clear corresponding nonfatal bits */
|
||||
else
|
||||
status &= mask; /* Clear corresponding fatal bits */
|
||||
status &= ~mask; /* Clear corresponding nonfatal bits */
|
||||
pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
|
||||
}
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ struct aer_err_source {
|
|||
};
|
||||
|
||||
struct aer_rpc {
|
||||
struct pcie_device *rpd; /* Root Port device */
|
||||
struct pci_dev *rpd; /* Root Port device */
|
||||
struct work_struct dpc_handler;
|
||||
struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
|
||||
struct aer_err_info e_info;
|
||||
|
@ -76,36 +76,6 @@ struct aer_rpc {
|
|||
*/
|
||||
};
|
||||
|
||||
struct aer_broadcast_data {
|
||||
enum pci_channel_state state;
|
||||
enum pci_ers_result result;
|
||||
};
|
||||
|
||||
static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
|
||||
enum pci_ers_result new)
|
||||
{
|
||||
if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
|
||||
return PCI_ERS_RESULT_NO_AER_DRIVER;
|
||||
|
||||
if (new == PCI_ERS_RESULT_NONE)
|
||||
return orig;
|
||||
|
||||
switch (orig) {
|
||||
case PCI_ERS_RESULT_CAN_RECOVER:
|
||||
case PCI_ERS_RESULT_RECOVERED:
|
||||
orig = new;
|
||||
break;
|
||||
case PCI_ERS_RESULT_DISCONNECT:
|
||||
if (new == PCI_ERS_RESULT_NEED_RESET)
|
||||
orig = PCI_ERS_RESULT_NEED_RESET;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return orig;
|
||||
}
|
||||
|
||||
extern struct bus_type pcie_port_bus_type;
|
||||
void aer_isr(struct work_struct *work);
|
||||
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/kfifo.h>
|
||||
#include "aerdrv.h"
|
||||
#include "../../pci.h"
|
||||
|
||||
#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
|
||||
PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
|
||||
|
@ -227,329 +228,14 @@ static bool find_source_device(struct pci_dev *parent,
|
|||
return true;
|
||||
}
|
||||
|
||||
static int report_error_detected(struct pci_dev *dev, void *data)
|
||||
{
|
||||
pci_ers_result_t vote;
|
||||
const struct pci_error_handlers *err_handler;
|
||||
struct aer_broadcast_data *result_data;
|
||||
result_data = (struct aer_broadcast_data *) data;
|
||||
|
||||
device_lock(&dev->dev);
|
||||
dev->error_state = result_data->state;
|
||||
|
||||
if (!dev->driver ||
|
||||
!dev->driver->err_handler ||
|
||||
!dev->driver->err_handler->error_detected) {
|
||||
if (result_data->state == pci_channel_io_frozen &&
|
||||
dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
|
||||
/*
|
||||
* In case of fatal recovery, if one of down-
|
||||
* stream device has no driver. We might be
|
||||
* unable to recover because a later insmod
|
||||
* of a driver for this device is unaware of
|
||||
* its hw state.
|
||||
*/
|
||||
pci_printk(KERN_DEBUG, dev, "device has %s\n",
|
||||
dev->driver ?
|
||||
"no AER-aware driver" : "no driver");
|
||||
}
|
||||
|
||||
/*
|
||||
* If there's any device in the subtree that does not
|
||||
* have an error_detected callback, returning
|
||||
* PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
|
||||
* the subsequent mmio_enabled/slot_reset/resume
|
||||
* callbacks of "any" device in the subtree. All the
|
||||
* devices in the subtree are left in the error state
|
||||
* without recovery.
|
||||
*/
|
||||
|
||||
if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
|
||||
vote = PCI_ERS_RESULT_NO_AER_DRIVER;
|
||||
else
|
||||
vote = PCI_ERS_RESULT_NONE;
|
||||
} else {
|
||||
err_handler = dev->driver->err_handler;
|
||||
vote = err_handler->error_detected(dev, result_data->state);
|
||||
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
|
||||
}
|
||||
|
||||
result_data->result = merge_result(result_data->result, vote);
|
||||
device_unlock(&dev->dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int report_mmio_enabled(struct pci_dev *dev, void *data)
|
||||
{
|
||||
pci_ers_result_t vote;
|
||||
const struct pci_error_handlers *err_handler;
|
||||
struct aer_broadcast_data *result_data;
|
||||
result_data = (struct aer_broadcast_data *) data;
|
||||
|
||||
device_lock(&dev->dev);
|
||||
if (!dev->driver ||
|
||||
!dev->driver->err_handler ||
|
||||
!dev->driver->err_handler->mmio_enabled)
|
||||
goto out;
|
||||
|
||||
err_handler = dev->driver->err_handler;
|
||||
vote = err_handler->mmio_enabled(dev);
|
||||
result_data->result = merge_result(result_data->result, vote);
|
||||
out:
|
||||
device_unlock(&dev->dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int report_slot_reset(struct pci_dev *dev, void *data)
|
||||
{
|
||||
pci_ers_result_t vote;
|
||||
const struct pci_error_handlers *err_handler;
|
||||
struct aer_broadcast_data *result_data;
|
||||
result_data = (struct aer_broadcast_data *) data;
|
||||
|
||||
device_lock(&dev->dev);
|
||||
if (!dev->driver ||
|
||||
!dev->driver->err_handler ||
|
||||
!dev->driver->err_handler->slot_reset)
|
||||
goto out;
|
||||
|
||||
err_handler = dev->driver->err_handler;
|
||||
vote = err_handler->slot_reset(dev);
|
||||
result_data->result = merge_result(result_data->result, vote);
|
||||
out:
|
||||
device_unlock(&dev->dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int report_resume(struct pci_dev *dev, void *data)
|
||||
{
|
||||
const struct pci_error_handlers *err_handler;
|
||||
|
||||
device_lock(&dev->dev);
|
||||
dev->error_state = pci_channel_io_normal;
|
||||
|
||||
if (!dev->driver ||
|
||||
!dev->driver->err_handler ||
|
||||
!dev->driver->err_handler->resume)
|
||||
goto out;
|
||||
|
||||
err_handler = dev->driver->err_handler;
|
||||
err_handler->resume(dev);
|
||||
pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
|
||||
out:
|
||||
device_unlock(&dev->dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* broadcast_error_message - handle message broadcast to downstream drivers
|
||||
* @dev: pointer to from where in a hierarchy message is broadcasted down
|
||||
* @state: error state
|
||||
* @error_mesg: message to print
|
||||
* @cb: callback to be broadcasted
|
||||
*
|
||||
* Invoked during error recovery process. Once being invoked, the content
|
||||
* of error severity will be broadcasted to all downstream drivers in a
|
||||
* hierarchy in question.
|
||||
*/
|
||||
static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
|
||||
enum pci_channel_state state,
|
||||
char *error_mesg,
|
||||
int (*cb)(struct pci_dev *, void *))
|
||||
{
|
||||
struct aer_broadcast_data result_data;
|
||||
|
||||
pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
|
||||
result_data.state = state;
|
||||
if (cb == report_error_detected)
|
||||
result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
|
||||
else
|
||||
result_data.result = PCI_ERS_RESULT_RECOVERED;
|
||||
|
||||
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
|
||||
/*
|
||||
* If the error is reported by a bridge, we think this error
|
||||
* is related to the downstream link of the bridge, so we
|
||||
* do error recovery on all subordinates of the bridge instead
|
||||
* of the bridge and clear the error status of the bridge.
|
||||
*/
|
||||
if (cb == report_error_detected)
|
||||
dev->error_state = state;
|
||||
pci_walk_bus(dev->subordinate, cb, &result_data);
|
||||
if (cb == report_resume) {
|
||||
pci_cleanup_aer_uncorrect_error_status(dev);
|
||||
dev->error_state = pci_channel_io_normal;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* If the error is reported by an end point, we think this
|
||||
* error is related to the upstream link of the end point.
|
||||
*/
|
||||
if (state == pci_channel_io_normal)
|
||||
/*
|
||||
* the error is non fatal so the bus is ok, just invoke
|
||||
* the callback for the function that logged the error.
|
||||
*/
|
||||
cb(dev, &result_data);
|
||||
else
|
||||
pci_walk_bus(dev->bus, cb, &result_data);
|
||||
}
|
||||
|
||||
return result_data.result;
|
||||
}
|
||||
|
||||
/**
|
||||
* default_reset_link - default reset function
|
||||
* @dev: pointer to pci_dev data structure
|
||||
*
|
||||
* Invoked when performing link reset on a Downstream Port or a
|
||||
* Root Port with no aer driver.
|
||||
*/
|
||||
static pci_ers_result_t default_reset_link(struct pci_dev *dev)
|
||||
{
|
||||
pci_reset_bridge_secondary_bus(dev);
|
||||
pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
|
||||
return PCI_ERS_RESULT_RECOVERED;
|
||||
}
|
||||
|
||||
static int find_aer_service_iter(struct device *device, void *data)
|
||||
{
|
||||
struct pcie_port_service_driver *service_driver, **drv;
|
||||
|
||||
drv = (struct pcie_port_service_driver **) data;
|
||||
|
||||
if (device->bus == &pcie_port_bus_type && device->driver) {
|
||||
service_driver = to_service_driver(device->driver);
|
||||
if (service_driver->service == PCIE_PORT_SERVICE_AER) {
|
||||
*drv = service_driver;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev)
|
||||
{
|
||||
struct pcie_port_service_driver *drv = NULL;
|
||||
|
||||
device_for_each_child(&dev->dev, &drv, find_aer_service_iter);
|
||||
|
||||
return drv;
|
||||
}
|
||||
|
||||
static pci_ers_result_t reset_link(struct pci_dev *dev)
|
||||
{
|
||||
struct pci_dev *udev;
|
||||
pci_ers_result_t status;
|
||||
struct pcie_port_service_driver *driver;
|
||||
|
||||
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
|
||||
/* Reset this port for all subordinates */
|
||||
udev = dev;
|
||||
} else {
|
||||
/* Reset the upstream component (likely downstream port) */
|
||||
udev = dev->bus->self;
|
||||
}
|
||||
|
||||
/* Use the aer driver of the component firstly */
|
||||
driver = find_aer_service(udev);
|
||||
|
||||
if (driver && driver->reset_link) {
|
||||
status = driver->reset_link(udev);
|
||||
} else if (udev->has_secondary_link) {
|
||||
status = default_reset_link(udev);
|
||||
} else {
|
||||
pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
|
||||
pci_name(udev));
|
||||
return PCI_ERS_RESULT_DISCONNECT;
|
||||
}
|
||||
|
||||
if (status != PCI_ERS_RESULT_RECOVERED) {
|
||||
pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
|
||||
pci_name(udev));
|
||||
return PCI_ERS_RESULT_DISCONNECT;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* do_recovery - handle nonfatal/fatal error recovery process
|
||||
* @dev: pointer to a pci_dev data structure of agent detecting an error
|
||||
* @severity: error severity type
|
||||
*
|
||||
* Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
|
||||
* error detected message to all downstream drivers within a hierarchy in
|
||||
* question and return the returned code.
|
||||
*/
|
||||
static void do_recovery(struct pci_dev *dev, int severity)
|
||||
{
|
||||
pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
|
||||
enum pci_channel_state state;
|
||||
|
||||
if (severity == AER_FATAL)
|
||||
state = pci_channel_io_frozen;
|
||||
else
|
||||
state = pci_channel_io_normal;
|
||||
|
||||
status = broadcast_error_message(dev,
|
||||
state,
|
||||
"error_detected",
|
||||
report_error_detected);
|
||||
|
||||
if (severity == AER_FATAL) {
|
||||
result = reset_link(dev);
|
||||
if (result != PCI_ERS_RESULT_RECOVERED)
|
||||
goto failed;
|
||||
}
|
||||
|
||||
if (status == PCI_ERS_RESULT_CAN_RECOVER)
|
||||
status = broadcast_error_message(dev,
|
||||
state,
|
||||
"mmio_enabled",
|
||||
report_mmio_enabled);
|
||||
|
||||
if (status == PCI_ERS_RESULT_NEED_RESET) {
|
||||
/*
|
||||
* TODO: Should call platform-specific
|
||||
* functions to reset slot before calling
|
||||
* drivers' slot_reset callbacks?
|
||||
*/
|
||||
status = broadcast_error_message(dev,
|
||||
state,
|
||||
"slot_reset",
|
||||
report_slot_reset);
|
||||
}
|
||||
|
||||
if (status != PCI_ERS_RESULT_RECOVERED)
|
||||
goto failed;
|
||||
|
||||
broadcast_error_message(dev,
|
||||
state,
|
||||
"resume",
|
||||
report_resume);
|
||||
|
||||
pci_info(dev, "AER: Device recovery successful\n");
|
||||
return;
|
||||
|
||||
failed:
|
||||
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
|
||||
/* TODO: Should kernel panic here? */
|
||||
pci_info(dev, "AER: Device recovery failed\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_error_source - handle logging error into an event log
|
||||
* @aerdev: pointer to pcie_device data structure of the root port
|
||||
* @dev: pointer to pci_dev data structure of error source device
|
||||
* @info: comprehensive error information
|
||||
*
|
||||
* Invoked when an error being detected by Root Port.
|
||||
*/
|
||||
static void handle_error_source(struct pcie_device *aerdev,
|
||||
struct pci_dev *dev,
|
||||
struct aer_err_info *info)
|
||||
static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
|
||||
{
|
||||
int pos;
|
||||
|
||||
|
@ -562,12 +248,13 @@ static void handle_error_source(struct pcie_device *aerdev,
|
|||
if (pos)
|
||||
pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
|
||||
info->status);
|
||||
} else
|
||||
do_recovery(dev, info->severity);
|
||||
} else if (info->severity == AER_NONFATAL)
|
||||
pcie_do_nonfatal_recovery(dev);
|
||||
else if (info->severity == AER_FATAL)
|
||||
pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI_PCIEAER
|
||||
static void aer_recover_work_func(struct work_struct *work);
|
||||
|
||||
#define AER_RECOVER_RING_ORDER 4
|
||||
#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER)
|
||||
|
@ -582,6 +269,30 @@ struct aer_recover_entry {
|
|||
|
||||
static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
|
||||
AER_RECOVER_RING_SIZE);
|
||||
|
||||
static void aer_recover_work_func(struct work_struct *work)
|
||||
{
|
||||
struct aer_recover_entry entry;
|
||||
struct pci_dev *pdev;
|
||||
|
||||
while (kfifo_get(&aer_recover_ring, &entry)) {
|
||||
pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
|
||||
entry.devfn);
|
||||
if (!pdev) {
|
||||
pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
|
||||
entry.domain, entry.bus,
|
||||
PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
|
||||
continue;
|
||||
}
|
||||
cper_print_aer(pdev, entry.severity, entry.regs);
|
||||
if (entry.severity == AER_NONFATAL)
|
||||
pcie_do_nonfatal_recovery(pdev);
|
||||
else if (entry.severity == AER_FATAL)
|
||||
pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_AER);
|
||||
pci_dev_put(pdev);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Mutual exclusion for writers of aer_recover_ring, reader side don't
|
||||
* need lock, because there is only one reader and lock is not needed
|
||||
|
@ -611,27 +322,6 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
|
|||
spin_unlock_irqrestore(&aer_recover_ring_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(aer_recover_queue);
|
||||
|
||||
static void aer_recover_work_func(struct work_struct *work)
|
||||
{
|
||||
struct aer_recover_entry entry;
|
||||
struct pci_dev *pdev;
|
||||
|
||||
while (kfifo_get(&aer_recover_ring, &entry)) {
|
||||
pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
|
||||
entry.devfn);
|
||||
if (!pdev) {
|
||||
pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
|
||||
entry.domain, entry.bus,
|
||||
PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
|
||||
continue;
|
||||
}
|
||||
cper_print_aer(pdev, entry.severity, entry.regs);
|
||||
if (entry.severity != AER_CORRECTABLE)
|
||||
do_recovery(pdev, entry.severity);
|
||||
pci_dev_put(pdev);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
@ -695,8 +385,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static inline void aer_process_err_devices(struct pcie_device *p_device,
|
||||
struct aer_err_info *e_info)
|
||||
static inline void aer_process_err_devices(struct aer_err_info *e_info)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -707,19 +396,19 @@ static inline void aer_process_err_devices(struct pcie_device *p_device,
|
|||
}
|
||||
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
|
||||
if (get_device_error_info(e_info->dev[i], e_info))
|
||||
handle_error_source(p_device, e_info->dev[i], e_info);
|
||||
handle_error_source(e_info->dev[i], e_info);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* aer_isr_one_error - consume an error detected by root port
|
||||
* @p_device: pointer to error root port service device
|
||||
* @rpc: pointer to the root port which holds an error
|
||||
* @e_src: pointer to an error source
|
||||
*/
|
||||
static void aer_isr_one_error(struct pcie_device *p_device,
|
||||
static void aer_isr_one_error(struct aer_rpc *rpc,
|
||||
struct aer_err_source *e_src)
|
||||
{
|
||||
struct aer_rpc *rpc = get_service_data(p_device);
|
||||
struct pci_dev *pdev = rpc->rpd;
|
||||
struct aer_err_info *e_info = &rpc->e_info;
|
||||
|
||||
/*
|
||||
|
@ -734,11 +423,10 @@ static void aer_isr_one_error(struct pcie_device *p_device,
|
|||
e_info->multi_error_valid = 1;
|
||||
else
|
||||
e_info->multi_error_valid = 0;
|
||||
aer_print_port_info(pdev, e_info);
|
||||
|
||||
aer_print_port_info(p_device->port, e_info);
|
||||
|
||||
if (find_source_device(p_device->port, e_info))
|
||||
aer_process_err_devices(p_device, e_info);
|
||||
if (find_source_device(pdev, e_info))
|
||||
aer_process_err_devices(e_info);
|
||||
}
|
||||
|
||||
if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
|
||||
|
@ -754,10 +442,10 @@ static void aer_isr_one_error(struct pcie_device *p_device,
|
|||
else
|
||||
e_info->multi_error_valid = 0;
|
||||
|
||||
aer_print_port_info(p_device->port, e_info);
|
||||
aer_print_port_info(pdev, e_info);
|
||||
|
||||
if (find_source_device(p_device->port, e_info))
|
||||
aer_process_err_devices(p_device, e_info);
|
||||
if (find_source_device(pdev, e_info))
|
||||
aer_process_err_devices(e_info);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -799,11 +487,10 @@ static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src)
|
|||
void aer_isr(struct work_struct *work)
|
||||
{
|
||||
struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
|
||||
struct pcie_device *p_device = rpc->rpd;
|
||||
struct aer_err_source uninitialized_var(e_src);
|
||||
|
||||
mutex_lock(&rpc->rpc_mutex);
|
||||
while (get_e_source(rpc, &e_src))
|
||||
aer_isr_one_error(p_device, &e_src);
|
||||
aer_isr_one_error(rpc, &e_src);
|
||||
mutex_unlock(&rpc->rpc_mutex);
|
||||
}
|
||||
|
|
|
@ -163,17 +163,17 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
|
|||
int id = ((dev->bus->number << 8) | dev->devfn);
|
||||
|
||||
if (!info->status) {
|
||||
pci_err(dev, "PCIe Bus Error: severity=%s, type=Unaccessible, id=%04x(Unregistered Agent ID)\n",
|
||||
aer_error_severity_string[info->severity], id);
|
||||
pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
|
||||
aer_error_severity_string[info->severity]);
|
||||
goto out;
|
||||
}
|
||||
|
||||
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
|
||||
agent = AER_GET_AGENT(info->severity, info->status);
|
||||
|
||||
pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
|
||||
pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
|
||||
aer_error_severity_string[info->severity],
|
||||
aer_error_layer[layer], id, aer_agent_string[agent]);
|
||||
aer_error_layer[layer], aer_agent_string[agent]);
|
||||
|
||||
pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
|
||||
dev->vendor, dev->device,
|
||||
|
@ -186,17 +186,21 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
|
|||
|
||||
out:
|
||||
if (info->id && info->error_dev_num > 1 && info->id == id)
|
||||
pci_err(dev, " Error of this Agent(%04x) is reported first\n", id);
|
||||
pci_err(dev, " Error of this Agent is reported first\n");
|
||||
|
||||
trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
|
||||
info->severity);
|
||||
info->severity, info->tlp_header_valid, &info->tlp);
|
||||
}
|
||||
|
||||
void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
|
||||
{
|
||||
pci_info(dev, "AER: %s%s error received: id=%04x\n",
|
||||
u8 bus = info->id >> 8;
|
||||
u8 devfn = info->id & 0xff;
|
||||
|
||||
pci_info(dev, "AER: %s%s error received: %04x:%02x:%02x.%d\n",
|
||||
info->multi_error_valid ? "Multiple " : "",
|
||||
aer_error_severity_string[info->severity], info->id);
|
||||
aer_error_severity_string[info->severity],
|
||||
pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI_PCIEAER
|
||||
|
@ -216,28 +220,30 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
|
|||
void cper_print_aer(struct pci_dev *dev, int aer_severity,
|
||||
struct aer_capability_regs *aer)
|
||||
{
|
||||
int layer, agent, status_strs_size, tlp_header_valid = 0;
|
||||
int layer, agent, tlp_header_valid = 0;
|
||||
u32 status, mask;
|
||||
const char **status_strs;
|
||||
struct aer_err_info info;
|
||||
|
||||
if (aer_severity == AER_CORRECTABLE) {
|
||||
status = aer->cor_status;
|
||||
mask = aer->cor_mask;
|
||||
status_strs = aer_correctable_error_string;
|
||||
status_strs_size = ARRAY_SIZE(aer_correctable_error_string);
|
||||
} else {
|
||||
status = aer->uncor_status;
|
||||
mask = aer->uncor_mask;
|
||||
status_strs = aer_uncorrectable_error_string;
|
||||
status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string);
|
||||
tlp_header_valid = status & AER_LOG_TLP_MASKS;
|
||||
}
|
||||
|
||||
layer = AER_GET_LAYER_ERROR(aer_severity, status);
|
||||
agent = AER_GET_AGENT(aer_severity, status);
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.severity = aer_severity;
|
||||
info.status = status;
|
||||
info.mask = mask;
|
||||
info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
|
||||
|
||||
pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
|
||||
cper_print_bits("", status, status_strs, status_strs_size);
|
||||
__aer_print_error(dev, &info);
|
||||
pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
|
||||
aer_error_layer[layer], aer_agent_string[agent]);
|
||||
|
||||
|
@ -249,6 +255,6 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity,
|
|||
__print_tlp_header(dev, &aer->header_log);
|
||||
|
||||
trace_aer_event(dev_name(&dev->dev), (status & ~mask),
|
||||
aer_severity);
|
||||
aer_severity, tlp_header_valid, &aer->header_log);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -68,44 +68,35 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
|
|||
|
||||
static void dpc_wait_link_inactive(struct dpc_dev *dpc)
|
||||
{
|
||||
unsigned long timeout = jiffies + HZ;
|
||||
struct pci_dev *pdev = dpc->dev->port;
|
||||
struct device *dev = &dpc->dev->device;
|
||||
u16 lnk_status;
|
||||
|
||||
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
|
||||
while (lnk_status & PCI_EXP_LNKSTA_DLLLA &&
|
||||
!time_after(jiffies, timeout)) {
|
||||
msleep(10);
|
||||
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
|
||||
}
|
||||
if (lnk_status & PCI_EXP_LNKSTA_DLLLA)
|
||||
dev_warn(dev, "Link state not disabled for DPC event\n");
|
||||
pcie_wait_for_link(pdev, false);
|
||||
}
|
||||
|
||||
static void dpc_work(struct work_struct *work)
|
||||
static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
|
||||
{
|
||||
struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
|
||||
struct pci_dev *dev, *temp, *pdev = dpc->dev->port;
|
||||
struct pci_bus *parent = pdev->subordinate;
|
||||
u16 cap = dpc->cap_pos, ctl;
|
||||
struct dpc_dev *dpc;
|
||||
struct pcie_device *pciedev;
|
||||
struct device *devdpc;
|
||||
u16 cap, ctl;
|
||||
|
||||
pci_lock_rescan_remove();
|
||||
list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
|
||||
bus_list) {
|
||||
pci_dev_get(dev);
|
||||
pci_dev_set_disconnected(dev, NULL);
|
||||
if (pci_has_subordinate(dev))
|
||||
pci_walk_bus(dev->subordinate,
|
||||
pci_dev_set_disconnected, NULL);
|
||||
pci_stop_and_remove_bus_device(dev);
|
||||
pci_dev_put(dev);
|
||||
}
|
||||
pci_unlock_rescan_remove();
|
||||
/*
|
||||
* DPC disables the Link automatically in hardware, so it has
|
||||
* already been reset by the time we get here.
|
||||
*/
|
||||
devdpc = pcie_port_find_device(pdev, PCIE_PORT_SERVICE_DPC);
|
||||
pciedev = to_pcie_device(devdpc);
|
||||
dpc = get_service_data(pciedev);
|
||||
cap = dpc->cap_pos;
|
||||
|
||||
/*
|
||||
* Wait until the Link is inactive, then clear DPC Trigger Status
|
||||
* to allow the Port to leave DPC.
|
||||
*/
|
||||
dpc_wait_link_inactive(dpc);
|
||||
|
||||
if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc))
|
||||
return;
|
||||
return PCI_ERS_RESULT_DISCONNECT;
|
||||
if (dpc->rp_extensions && dpc->rp_pio_status) {
|
||||
pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS,
|
||||
dpc->rp_pio_status);
|
||||
|
@ -118,6 +109,17 @@ static void dpc_work(struct work_struct *work)
|
|||
pci_read_config_word(pdev, cap + PCI_EXP_DPC_CTL, &ctl);
|
||||
pci_write_config_word(pdev, cap + PCI_EXP_DPC_CTL,
|
||||
ctl | PCI_EXP_DPC_CTL_INT_EN);
|
||||
|
||||
return PCI_ERS_RESULT_RECOVERED;
|
||||
}
|
||||
|
||||
static void dpc_work(struct work_struct *work)
|
||||
{
|
||||
struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
|
||||
struct pci_dev *pdev = dpc->dev->port;
|
||||
|
||||
/* We configure DPC so it only triggers on ERR_FATAL */
|
||||
pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_DPC);
|
||||
}
|
||||
|
||||
static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
|
||||
|
@ -270,7 +272,7 @@ static int dpc_probe(struct pcie_device *dev)
|
|||
}
|
||||
}
|
||||
|
||||
ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN;
|
||||
ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
|
||||
pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
|
||||
|
||||
dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
|
||||
|
@ -288,7 +290,7 @@ static void dpc_remove(struct pcie_device *dev)
|
|||
u16 ctl;
|
||||
|
||||
pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
|
||||
ctl &= ~(PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN);
|
||||
ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN);
|
||||
pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
|
||||
}
|
||||
|
||||
|
@ -298,6 +300,7 @@ static struct pcie_port_service_driver dpcdriver = {
|
|||
.service = PCIE_PORT_SERVICE_DPC,
|
||||
.probe = dpc_probe,
|
||||
.remove = dpc_remove,
|
||||
.reset_link = dpc_reset_link,
|
||||
};
|
||||
|
||||
static int __init dpc_service_init(void)
|
||||
|
|
|
@ -0,0 +1,388 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* This file implements the error recovery as a core part of PCIe error
|
||||
* reporting. When a PCIe error is delivered, an error message will be
|
||||
* collected and printed to console, then, an error recovery procedure
|
||||
* will be executed by following the PCI error recovery rules.
|
||||
*
|
||||
* Copyright (C) 2006 Intel Corp.
|
||||
* Tom Long Nguyen (tom.l.nguyen@intel.com)
|
||||
* Zhang Yanmin (yanmin.zhang@intel.com)
|
||||
*/
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/aer.h>
|
||||
#include "portdrv.h"
|
||||
#include "../pci.h"
|
||||
|
||||
struct aer_broadcast_data {
|
||||
enum pci_channel_state state;
|
||||
enum pci_ers_result result;
|
||||
};
|
||||
|
||||
static pci_ers_result_t merge_result(enum pci_ers_result orig,
|
||||
enum pci_ers_result new)
|
||||
{
|
||||
if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
|
||||
return PCI_ERS_RESULT_NO_AER_DRIVER;
|
||||
|
||||
if (new == PCI_ERS_RESULT_NONE)
|
||||
return orig;
|
||||
|
||||
switch (orig) {
|
||||
case PCI_ERS_RESULT_CAN_RECOVER:
|
||||
case PCI_ERS_RESULT_RECOVERED:
|
||||
orig = new;
|
||||
break;
|
||||
case PCI_ERS_RESULT_DISCONNECT:
|
||||
if (new == PCI_ERS_RESULT_NEED_RESET)
|
||||
orig = PCI_ERS_RESULT_NEED_RESET;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return orig;
|
||||
}
|
||||
|
||||
static int report_error_detected(struct pci_dev *dev, void *data)
|
||||
{
|
||||
pci_ers_result_t vote;
|
||||
const struct pci_error_handlers *err_handler;
|
||||
struct aer_broadcast_data *result_data;
|
||||
|
||||
result_data = (struct aer_broadcast_data *) data;
|
||||
|
||||
device_lock(&dev->dev);
|
||||
dev->error_state = result_data->state;
|
||||
|
||||
if (!dev->driver ||
|
||||
!dev->driver->err_handler ||
|
||||
!dev->driver->err_handler->error_detected) {
|
||||
if (result_data->state == pci_channel_io_frozen &&
|
||||
dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
|
||||
/*
|
||||
* In case of fatal recovery, if one of down-
|
||||
* stream device has no driver. We might be
|
||||
* unable to recover because a later insmod
|
||||
* of a driver for this device is unaware of
|
||||
* its hw state.
|
||||
*/
|
||||
pci_printk(KERN_DEBUG, dev, "device has %s\n",
|
||||
dev->driver ?
|
||||
"no AER-aware driver" : "no driver");
|
||||
}
|
||||
|
||||
/*
|
||||
* If there's any device in the subtree that does not
|
||||
* have an error_detected callback, returning
|
||||
* PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
|
||||
* the subsequent mmio_enabled/slot_reset/resume
|
||||
* callbacks of "any" device in the subtree. All the
|
||||
* devices in the subtree are left in the error state
|
||||
* without recovery.
|
||||
*/
|
||||
|
||||
if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
|
||||
vote = PCI_ERS_RESULT_NO_AER_DRIVER;
|
||||
else
|
||||
vote = PCI_ERS_RESULT_NONE;
|
||||
} else {
|
||||
err_handler = dev->driver->err_handler;
|
||||
vote = err_handler->error_detected(dev, result_data->state);
|
||||
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
|
||||
}
|
||||
|
||||
result_data->result = merge_result(result_data->result, vote);
|
||||
device_unlock(&dev->dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int report_mmio_enabled(struct pci_dev *dev, void *data)
|
||||
{
|
||||
pci_ers_result_t vote;
|
||||
const struct pci_error_handlers *err_handler;
|
||||
struct aer_broadcast_data *result_data;
|
||||
|
||||
result_data = (struct aer_broadcast_data *) data;
|
||||
|
||||
device_lock(&dev->dev);
|
||||
if (!dev->driver ||
|
||||
!dev->driver->err_handler ||
|
||||
!dev->driver->err_handler->mmio_enabled)
|
||||
goto out;
|
||||
|
||||
err_handler = dev->driver->err_handler;
|
||||
vote = err_handler->mmio_enabled(dev);
|
||||
result_data->result = merge_result(result_data->result, vote);
|
||||
out:
|
||||
device_unlock(&dev->dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int report_slot_reset(struct pci_dev *dev, void *data)
|
||||
{
|
||||
pci_ers_result_t vote;
|
||||
const struct pci_error_handlers *err_handler;
|
||||
struct aer_broadcast_data *result_data;
|
||||
|
||||
result_data = (struct aer_broadcast_data *) data;
|
||||
|
||||
device_lock(&dev->dev);
|
||||
if (!dev->driver ||
|
||||
!dev->driver->err_handler ||
|
||||
!dev->driver->err_handler->slot_reset)
|
||||
goto out;
|
||||
|
||||
err_handler = dev->driver->err_handler;
|
||||
vote = err_handler->slot_reset(dev);
|
||||
result_data->result = merge_result(result_data->result, vote);
|
||||
out:
|
||||
device_unlock(&dev->dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int report_resume(struct pci_dev *dev, void *data)
|
||||
{
|
||||
const struct pci_error_handlers *err_handler;
|
||||
|
||||
device_lock(&dev->dev);
|
||||
dev->error_state = pci_channel_io_normal;
|
||||
|
||||
if (!dev->driver ||
|
||||
!dev->driver->err_handler ||
|
||||
!dev->driver->err_handler->resume)
|
||||
goto out;
|
||||
|
||||
err_handler = dev->driver->err_handler;
|
||||
err_handler->resume(dev);
|
||||
pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
|
||||
out:
|
||||
device_unlock(&dev->dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* default_reset_link - default reset function
|
||||
* @dev: pointer to pci_dev data structure
|
||||
*
|
||||
* Invoked when performing link reset on a Downstream Port or a
|
||||
* Root Port with no aer driver.
|
||||
*/
|
||||
static pci_ers_result_t default_reset_link(struct pci_dev *dev)
|
||||
{
|
||||
pci_reset_bridge_secondary_bus(dev);
|
||||
pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
|
||||
return PCI_ERS_RESULT_RECOVERED;
|
||||
}
|
||||
|
||||
static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
|
||||
{
|
||||
struct pci_dev *udev;
|
||||
pci_ers_result_t status;
|
||||
struct pcie_port_service_driver *driver = NULL;
|
||||
|
||||
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
|
||||
/* Reset this port for all subordinates */
|
||||
udev = dev;
|
||||
} else {
|
||||
/* Reset the upstream component (likely downstream port) */
|
||||
udev = dev->bus->self;
|
||||
}
|
||||
|
||||
/* Use the aer driver of the component firstly */
|
||||
driver = pcie_port_find_service(udev, service);
|
||||
|
||||
if (driver && driver->reset_link) {
|
||||
status = driver->reset_link(udev);
|
||||
} else if (udev->has_secondary_link) {
|
||||
status = default_reset_link(udev);
|
||||
} else {
|
||||
pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
|
||||
pci_name(udev));
|
||||
return PCI_ERS_RESULT_DISCONNECT;
|
||||
}
|
||||
|
||||
if (status != PCI_ERS_RESULT_RECOVERED) {
|
||||
pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
|
||||
pci_name(udev));
|
||||
return PCI_ERS_RESULT_DISCONNECT;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* broadcast_error_message - handle message broadcast to downstream drivers
|
||||
* @dev: pointer to from where in a hierarchy message is broadcasted down
|
||||
* @state: error state
|
||||
* @error_mesg: message to print
|
||||
* @cb: callback to be broadcasted
|
||||
*
|
||||
* Invoked during error recovery process. Once being invoked, the content
|
||||
* of error severity will be broadcasted to all downstream drivers in a
|
||||
* hierarchy in question.
|
||||
*/
|
||||
static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
|
||||
enum pci_channel_state state,
|
||||
char *error_mesg,
|
||||
int (*cb)(struct pci_dev *, void *))
|
||||
{
|
||||
struct aer_broadcast_data result_data;
|
||||
|
||||
pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
|
||||
result_data.state = state;
|
||||
if (cb == report_error_detected)
|
||||
result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
|
||||
else
|
||||
result_data.result = PCI_ERS_RESULT_RECOVERED;
|
||||
|
||||
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
|
||||
/*
|
||||
* If the error is reported by a bridge, we think this error
|
||||
* is related to the downstream link of the bridge, so we
|
||||
* do error recovery on all subordinates of the bridge instead
|
||||
* of the bridge and clear the error status of the bridge.
|
||||
*/
|
||||
if (cb == report_error_detected)
|
||||
dev->error_state = state;
|
||||
pci_walk_bus(dev->subordinate, cb, &result_data);
|
||||
if (cb == report_resume) {
|
||||
pci_cleanup_aer_uncorrect_error_status(dev);
|
||||
dev->error_state = pci_channel_io_normal;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* If the error is reported by an end point, we think this
|
||||
* error is related to the upstream link of the end point.
|
||||
*/
|
||||
if (state == pci_channel_io_normal)
|
||||
/*
|
||||
* the error is non fatal so the bus is ok, just invoke
|
||||
* the callback for the function that logged the error.
|
||||
*/
|
||||
cb(dev, &result_data);
|
||||
else
|
||||
pci_walk_bus(dev->bus, cb, &result_data);
|
||||
}
|
||||
|
||||
return result_data.result;
|
||||
}
|
||||
|
||||
/**
|
||||
* pcie_do_fatal_recovery - handle fatal error recovery process
|
||||
* @dev: pointer to a pci_dev data structure of agent detecting an error
|
||||
*
|
||||
* Invoked when an error is fatal. Once being invoked, removes the devices
|
||||
* beneath this AER agent, followed by reset link e.g. secondary bus reset
|
||||
* followed by re-enumeration of devices.
|
||||
*/
|
||||
void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
|
||||
{
|
||||
struct pci_dev *udev;
|
||||
struct pci_bus *parent;
|
||||
struct pci_dev *pdev, *temp;
|
||||
pci_ers_result_t result;
|
||||
|
||||
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
|
||||
udev = dev;
|
||||
else
|
||||
udev = dev->bus->self;
|
||||
|
||||
parent = udev->subordinate;
|
||||
pci_lock_rescan_remove();
|
||||
list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
|
||||
bus_list) {
|
||||
pci_dev_get(pdev);
|
||||
pci_dev_set_disconnected(pdev, NULL);
|
||||
if (pci_has_subordinate(pdev))
|
||||
pci_walk_bus(pdev->subordinate,
|
||||
pci_dev_set_disconnected, NULL);
|
||||
pci_stop_and_remove_bus_device(pdev);
|
||||
pci_dev_put(pdev);
|
||||
}
|
||||
|
||||
result = reset_link(udev, service);
|
||||
|
||||
if ((service == PCIE_PORT_SERVICE_AER) &&
|
||||
(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
|
||||
/*
|
||||
* If the error is reported by a bridge, we think this error
|
||||
* is related to the downstream link of the bridge, so we
|
||||
* do error recovery on all subordinates of the bridge instead
|
||||
* of the bridge and clear the error status of the bridge.
|
||||
*/
|
||||
pci_cleanup_aer_uncorrect_error_status(dev);
|
||||
}
|
||||
|
||||
if (result == PCI_ERS_RESULT_RECOVERED) {
|
||||
if (pcie_wait_for_link(udev, true))
|
||||
pci_rescan_bus(udev->bus);
|
||||
pci_info(dev, "Device recovery from fatal error successful\n");
|
||||
} else {
|
||||
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
|
||||
pci_info(dev, "Device recovery from fatal error failed\n");
|
||||
}
|
||||
|
||||
pci_unlock_rescan_remove();
|
||||
}
|
||||
|
||||
/**
|
||||
* pcie_do_nonfatal_recovery - handle nonfatal error recovery process
|
||||
* @dev: pointer to a pci_dev data structure of agent detecting an error
|
||||
*
|
||||
* Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
|
||||
* error detected message to all downstream drivers within a hierarchy in
|
||||
* question and return the returned code.
|
||||
*/
|
||||
void pcie_do_nonfatal_recovery(struct pci_dev *dev)
|
||||
{
|
||||
pci_ers_result_t status;
|
||||
enum pci_channel_state state;
|
||||
|
||||
state = pci_channel_io_normal;
|
||||
|
||||
status = broadcast_error_message(dev,
|
||||
state,
|
||||
"error_detected",
|
||||
report_error_detected);
|
||||
|
||||
if (status == PCI_ERS_RESULT_CAN_RECOVER)
|
||||
status = broadcast_error_message(dev,
|
||||
state,
|
||||
"mmio_enabled",
|
||||
report_mmio_enabled);
|
||||
|
||||
if (status == PCI_ERS_RESULT_NEED_RESET) {
|
||||
/*
|
||||
* TODO: Should call platform-specific
|
||||
* functions to reset slot before calling
|
||||
* drivers' slot_reset callbacks?
|
||||
*/
|
||||
status = broadcast_error_message(dev,
|
||||
state,
|
||||
"slot_reset",
|
||||
report_slot_reset);
|
||||
}
|
||||
|
||||
if (status != PCI_ERS_RESULT_RECOVERED)
|
||||
goto failed;
|
||||
|
||||
broadcast_error_message(dev,
|
||||
state,
|
||||
"resume",
|
||||
report_resume);
|
||||
|
||||
pci_info(dev, "AER: Device recovery successful\n");
|
||||
return;
|
||||
|
||||
failed:
|
||||
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
|
||||
|
||||
/* TODO: Should kernel panic here? */
|
||||
pci_info(dev, "AER: Device recovery failed\n");
|
||||
}
|
|
@ -112,4 +112,7 @@ static inline bool pcie_pme_no_msi(void) { return false; }
|
|||
static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {}
|
||||
#endif /* !CONFIG_PCIE_PME */
|
||||
|
||||
struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev,
|
||||
u32 service);
|
||||
struct device *pcie_port_find_device(struct pci_dev *dev, u32 service);
|
||||
#endif /* _PORTDRV_H_ */
|
||||
|
|
|
@ -19,6 +19,12 @@
|
|||
#include "../pci.h"
|
||||
#include "portdrv.h"
|
||||
|
||||
struct portdrv_service_data {
|
||||
struct pcie_port_service_driver *drv;
|
||||
struct device *dev;
|
||||
u32 service;
|
||||
};
|
||||
|
||||
/**
|
||||
* release_pcie_device - free PCI Express port service device structure
|
||||
* @dev: Port service device to release
|
||||
|
@ -398,6 +404,69 @@ static int remove_iter(struct device *dev, void *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int find_service_iter(struct device *device, void *data)
|
||||
{
|
||||
struct pcie_port_service_driver *service_driver;
|
||||
struct portdrv_service_data *pdrvs;
|
||||
u32 service;
|
||||
|
||||
pdrvs = (struct portdrv_service_data *) data;
|
||||
service = pdrvs->service;
|
||||
|
||||
if (device->bus == &pcie_port_bus_type && device->driver) {
|
||||
service_driver = to_service_driver(device->driver);
|
||||
if (service_driver->service == service) {
|
||||
pdrvs->drv = service_driver;
|
||||
pdrvs->dev = device;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* pcie_port_find_service - find the service driver
|
||||
* @dev: PCI Express port the service is associated with
|
||||
* @service: Service to find
|
||||
*
|
||||
* Find PCI Express port service driver associated with given service
|
||||
*/
|
||||
struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev,
|
||||
u32 service)
|
||||
{
|
||||
struct pcie_port_service_driver *drv;
|
||||
struct portdrv_service_data pdrvs;
|
||||
|
||||
pdrvs.drv = NULL;
|
||||
pdrvs.service = service;
|
||||
device_for_each_child(&dev->dev, &pdrvs, find_service_iter);
|
||||
|
||||
drv = pdrvs.drv;
|
||||
return drv;
|
||||
}
|
||||
|
||||
/**
|
||||
* pcie_port_find_device - find the struct device
|
||||
* @dev: PCI Express port the service is associated with
|
||||
* @service: For the service to find
|
||||
*
|
||||
* Find the struct device associated with given service on a pci_dev
|
||||
*/
|
||||
struct device *pcie_port_find_device(struct pci_dev *dev,
|
||||
u32 service)
|
||||
{
|
||||
struct device *device;
|
||||
struct portdrv_service_data pdrvs;
|
||||
|
||||
pdrvs.dev = NULL;
|
||||
pdrvs.service = service;
|
||||
device_for_each_child(&dev->dev, &pdrvs, find_service_iter);
|
||||
|
||||
device = pdrvs.dev;
|
||||
return device;
|
||||
}
|
||||
|
||||
/**
|
||||
* pcie_port_device_remove - unregister PCI Express port service devices
|
||||
* @dev: PCI Express port the service devices to unregister are associated with
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#define AER_NONFATAL 0
|
||||
#define AER_FATAL 1
|
||||
#define AER_CORRECTABLE 2
|
||||
#define DPC_FATAL 3
|
||||
|
||||
struct pci_dev;
|
||||
|
||||
|
|
|
@ -2284,7 +2284,7 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev)
|
|||
return false;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH)
|
||||
#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH)
|
||||
void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -298,30 +298,44 @@ TRACE_EVENT(non_standard_event,
|
|||
TRACE_EVENT(aer_event,
|
||||
TP_PROTO(const char *dev_name,
|
||||
const u32 status,
|
||||
const u8 severity),
|
||||
const u8 severity,
|
||||
const u8 tlp_header_valid,
|
||||
struct aer_header_log_regs *tlp),
|
||||
|
||||
TP_ARGS(dev_name, status, severity),
|
||||
TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string( dev_name, dev_name )
|
||||
__field( u32, status )
|
||||
__field( u8, severity )
|
||||
__field( u8, tlp_header_valid)
|
||||
__array( u32, tlp_header, 4 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(dev_name, dev_name);
|
||||
__entry->status = status;
|
||||
__entry->severity = severity;
|
||||
__entry->tlp_header_valid = tlp_header_valid;
|
||||
if (tlp_header_valid) {
|
||||
__entry->tlp_header[0] = tlp->dw0;
|
||||
__entry->tlp_header[1] = tlp->dw1;
|
||||
__entry->tlp_header[2] = tlp->dw2;
|
||||
__entry->tlp_header[3] = tlp->dw3;
|
||||
}
|
||||
),
|
||||
|
||||
TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
|
||||
TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
|
||||
__get_str(dev_name),
|
||||
__entry->severity == AER_CORRECTABLE ? "Corrected" :
|
||||
__entry->severity == AER_FATAL ?
|
||||
"Fatal" : "Uncorrected, non-fatal",
|
||||
__entry->severity == AER_CORRECTABLE ?
|
||||
__print_flags(__entry->status, "|", aer_correctable_errors) :
|
||||
__print_flags(__entry->status, "|", aer_uncorrectable_errors))
|
||||
__print_flags(__entry->status, "|", aer_uncorrectable_errors),
|
||||
__entry->tlp_header_valid ?
|
||||
__print_array(__entry->tlp_header, 4, 4) :
|
||||
"Not available")
|
||||
);
|
||||
|
||||
/*
|
||||
|
|
|
@ -981,6 +981,7 @@
|
|||
#define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */
|
||||
|
||||
#define PCI_EXP_DPC_CTL 6 /* DPC control */
|
||||
#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */
|
||||
#define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */
|
||||
#define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */
|
||||
|
||||
|
|
Loading…
Reference in New Issue