Merge branch 'pci/aer'

- unify AER decoding for native and ACPI CPER sources (Alexandru Gagniuc)

  - add TLP header info to AER tracepoint (Thomas Tai)

  - add generic pcie_wait_for_link() interface (Oza Pawandeep)

  - handle AER ERR_FATAL by removing and re-enumerating devices, as
    Downstream Port Containment does (Oza Pawandeep)

  - factor out common code between AER and DPC recovery (Oza Pawandeep)

  - stop triggering DPC for ERR_NONFATAL errors (Oza Pawandeep)

  - share ERR_FATAL recovery path between AER and DPC (Oza Pawandeep)

* pci/aer:
  PCI/AER: Replace struct pcie_device with pci_dev
  PCI/AER: Remove unused parameters
  PCI/AER: Decode Error Source Requester ID
  PCI/AER: Remove aer_recover_work_func() forward declaration
  PCI/DPC: Use the generic pcie_do_fatal_recovery() path
  PCI/AER: Pass service type to pcie_do_fatal_recovery()
  PCI/DPC: Disable ERR_NONFATAL handling by DPC
  PCI/portdrv: Add generic pcie_port_find_device()
  PCI/portdrv: Add generic pcie_port_find_service()
  PCI/AER: Factor out error reporting to drivers/pci/pcie/err.c
  PCI/AER: Rename error recovery interfaces to generic PCI naming
  PCI/AER: Handle ERR_FATAL with removal and re-enumeration of devices
  PCI: Add generic pcie_wait_for_link() interface
  PCI/AER: Add TLP header information to tracepoint
  PCI/AER: Unify error bit printing for native and CPER reporting
This commit is contained in:
Bjorn Helgaas 2018-06-06 16:10:03 -05:00
commit d7e02c08a9
18 changed files with 648 additions and 474 deletions

View File

@ -110,7 +110,7 @@ The actual steps taken by a platform to recover from a PCI error
event will be platform-dependent, but will follow the general
sequence described below.
STEP 0: Error Event
STEP 0: Error Event: ERR_NONFATAL
-------------------
A PCI bus error is detected by the PCI hardware. On powerpc, the slot
is isolated, in that all I/O is blocked: all reads return 0xffffffff,
@ -228,13 +228,7 @@ proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
proceeds to STEP 4 (Slot Reset)
STEP 3: Link Reset
------------------
The platform resets the link. This is a PCI-Express specific step
and is done whenever a fatal error has been detected that can be
"solved" by resetting the link.
STEP 4: Slot Reset
STEP 3: Slot Reset
------------------
In response to a return value of PCI_ERS_RESULT_NEED_RESET, the
@ -320,7 +314,7 @@ Failure).
>>> However, it probably should.
STEP 5: Resume Operations
STEP 4: Resume Operations
-------------------------
The platform will call the resume() callback on all affected device
drivers if all drivers on the segment have returned
@ -332,7 +326,7 @@ a result code.
At this point, if a new error happens, the platform will restart
a new error recovery sequence.
STEP 6: Permanent Failure
STEP 5: Permanent Failure
-------------------------
A "permanent failure" has occurred, and the platform cannot recover
the device. The platform will call error_detected() with a
@ -355,6 +349,27 @@ errors. See the discussion in powerpc/eeh-pci-error-recovery.txt
for additional detail on real-life experience of the causes of
software errors.
STEP 0: Error Event: ERR_FATAL
-------------------
PCI bus error is detected by the PCI hardware. On powerpc, the slot is
isolated, in that all I/O is blocked: all reads return 0xffffffff, all
writes are ignored.
STEP 1: Remove devices
--------------------
Platform removes the devices depending on the error agent, it could be
this port for all subordinates or upstream component (likely downstream
port)
STEP 2: Reset link
--------------------
The platform resets the link. This is a PCI-Express specific step and is
done whenever a fatal error has been detected that can be "solved" by
resetting the link.
STEP 3: Re-enumerate the devices
--------------------
Initiates the re-enumeration.
Conclusion; General Remarks
---------------------------

View File

@ -231,25 +231,11 @@ bool pciehp_check_link_active(struct controller *ctrl)
return ret;
}
static void __pcie_wait_link_active(struct controller *ctrl, bool active)
{
int timeout = 1000;
if (pciehp_check_link_active(ctrl) == active)
return;
while (timeout > 0) {
msleep(10);
timeout -= 10;
if (pciehp_check_link_active(ctrl) == active)
return;
}
ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n",
active ? "set" : "cleared");
}
static void pcie_wait_link_active(struct controller *ctrl)
{
__pcie_wait_link_active(ctrl, true);
struct pci_dev *pdev = ctrl_dev(ctrl);
pcie_wait_for_link(pdev, true);
}
static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)

View File

@ -1535,7 +1535,7 @@ static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
return 0;
}
#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH)
#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH)
/**
* pci_uevent_ers - emit a uevent during recovery path of PCI device
* @pdev: PCI device undergoing error recovery

View File

@ -4138,6 +4138,35 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS);
}
/**
* pcie_wait_for_link - Wait until link is active or inactive
* @pdev: Bridge device
* @active: waiting for active or inactive?
*
* Use this to wait till link becomes active or inactive.
*/
bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
{
int timeout = 1000;
bool ret;
u16 lnk_status;
for (;;) {
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
if (ret == active)
return true;
if (timeout <= 0)
break;
msleep(10);
timeout -= 10;
}
pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n",
active ? "set" : "cleared");
return false;
}
void pci_reset_secondary_bus(struct pci_dev *dev)
{

View File

@ -353,6 +353,11 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
void pci_enable_acs(struct pci_dev *dev);
/* PCI error reporting and recovery */
void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service);
void pcie_do_nonfatal_recovery(struct pci_dev *dev);
bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
#ifdef CONFIG_PCIEASPM
void pcie_aspm_init_link_state(struct pci_dev *pdev);
void pcie_aspm_exit_link_state(struct pci_dev *pdev);

View File

@ -2,7 +2,7 @@
#
# Makefile for PCI Express features and port driver
pcieportdrv-y := portdrv_core.o portdrv_pci.o
pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o
obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o

View File

@ -94,7 +94,7 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev,
*/
static void aer_enable_rootport(struct aer_rpc *rpc)
{
struct pci_dev *pdev = rpc->rpd->port;
struct pci_dev *pdev = rpc->rpd;
int aer_pos;
u16 reg16;
u32 reg32;
@ -136,7 +136,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc)
*/
static void aer_disable_rootport(struct aer_rpc *rpc)
{
struct pci_dev *pdev = rpc->rpd->port;
struct pci_dev *pdev = rpc->rpd;
u32 reg32;
int pos;
@ -232,7 +232,7 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev)
/* Initialize Root lock access, e_lock, to Root Error Status Reg */
spin_lock_init(&rpc->e_lock);
rpc->rpd = dev;
rpc->rpd = dev->port;
INIT_WORK(&rpc->dpc_handler, aer_isr);
mutex_init(&rpc->rpc_mutex);
@ -353,10 +353,7 @@ static void aer_error_resume(struct pci_dev *dev)
pos = dev->aer_cap;
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
if (dev->error_state == pci_channel_io_normal)
status &= ~mask; /* Clear corresponding nonfatal bits */
else
status &= mask; /* Clear corresponding fatal bits */
status &= ~mask; /* Clear corresponding nonfatal bits */
pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
}

View File

@ -58,7 +58,7 @@ struct aer_err_source {
};
struct aer_rpc {
struct pcie_device *rpd; /* Root Port device */
struct pci_dev *rpd; /* Root Port device */
struct work_struct dpc_handler;
struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
struct aer_err_info e_info;
@ -76,36 +76,6 @@ struct aer_rpc {
*/
};
struct aer_broadcast_data {
enum pci_channel_state state;
enum pci_ers_result result;
};
static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
enum pci_ers_result new)
{
if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
return PCI_ERS_RESULT_NO_AER_DRIVER;
if (new == PCI_ERS_RESULT_NONE)
return orig;
switch (orig) {
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
orig = new;
break;
case PCI_ERS_RESULT_DISCONNECT:
if (new == PCI_ERS_RESULT_NEED_RESET)
orig = PCI_ERS_RESULT_NEED_RESET;
break;
default:
break;
}
return orig;
}
extern struct bus_type pcie_port_bus_type;
void aer_isr(struct work_struct *work);
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);

View File

@ -20,6 +20,7 @@
#include <linux/slab.h>
#include <linux/kfifo.h>
#include "aerdrv.h"
#include "../../pci.h"
#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
@ -227,329 +228,14 @@ static bool find_source_device(struct pci_dev *parent,
return true;
}
static int report_error_detected(struct pci_dev *dev, void *data)
{
pci_ers_result_t vote;
const struct pci_error_handlers *err_handler;
struct aer_broadcast_data *result_data;
result_data = (struct aer_broadcast_data *) data;
device_lock(&dev->dev);
dev->error_state = result_data->state;
if (!dev->driver ||
!dev->driver->err_handler ||
!dev->driver->err_handler->error_detected) {
if (result_data->state == pci_channel_io_frozen &&
dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
/*
* In case of fatal recovery, if one of down-
* stream device has no driver. We might be
* unable to recover because a later insmod
* of a driver for this device is unaware of
* its hw state.
*/
pci_printk(KERN_DEBUG, dev, "device has %s\n",
dev->driver ?
"no AER-aware driver" : "no driver");
}
/*
* If there's any device in the subtree that does not
* have an error_detected callback, returning
* PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
* the subsequent mmio_enabled/slot_reset/resume
* callbacks of "any" device in the subtree. All the
* devices in the subtree are left in the error state
* without recovery.
*/
if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
vote = PCI_ERS_RESULT_NO_AER_DRIVER;
else
vote = PCI_ERS_RESULT_NONE;
} else {
err_handler = dev->driver->err_handler;
vote = err_handler->error_detected(dev, result_data->state);
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
}
result_data->result = merge_result(result_data->result, vote);
device_unlock(&dev->dev);
return 0;
}
static int report_mmio_enabled(struct pci_dev *dev, void *data)
{
pci_ers_result_t vote;
const struct pci_error_handlers *err_handler;
struct aer_broadcast_data *result_data;
result_data = (struct aer_broadcast_data *) data;
device_lock(&dev->dev);
if (!dev->driver ||
!dev->driver->err_handler ||
!dev->driver->err_handler->mmio_enabled)
goto out;
err_handler = dev->driver->err_handler;
vote = err_handler->mmio_enabled(dev);
result_data->result = merge_result(result_data->result, vote);
out:
device_unlock(&dev->dev);
return 0;
}
static int report_slot_reset(struct pci_dev *dev, void *data)
{
pci_ers_result_t vote;
const struct pci_error_handlers *err_handler;
struct aer_broadcast_data *result_data;
result_data = (struct aer_broadcast_data *) data;
device_lock(&dev->dev);
if (!dev->driver ||
!dev->driver->err_handler ||
!dev->driver->err_handler->slot_reset)
goto out;
err_handler = dev->driver->err_handler;
vote = err_handler->slot_reset(dev);
result_data->result = merge_result(result_data->result, vote);
out:
device_unlock(&dev->dev);
return 0;
}
static int report_resume(struct pci_dev *dev, void *data)
{
const struct pci_error_handlers *err_handler;
device_lock(&dev->dev);
dev->error_state = pci_channel_io_normal;
if (!dev->driver ||
!dev->driver->err_handler ||
!dev->driver->err_handler->resume)
goto out;
err_handler = dev->driver->err_handler;
err_handler->resume(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
out:
device_unlock(&dev->dev);
return 0;
}
/**
* broadcast_error_message - handle message broadcast to downstream drivers
* @dev: pointer to from where in a hierarchy message is broadcasted down
* @state: error state
* @error_mesg: message to print
* @cb: callback to be broadcasted
*
* Invoked during error recovery process. Once being invoked, the content
* of error severity will be broadcasted to all downstream drivers in a
* hierarchy in question.
*/
static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
enum pci_channel_state state,
char *error_mesg,
int (*cb)(struct pci_dev *, void *))
{
struct aer_broadcast_data result_data;
pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
result_data.state = state;
if (cb == report_error_detected)
result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
else
result_data.result = PCI_ERS_RESULT_RECOVERED;
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
/*
* If the error is reported by a bridge, we think this error
* is related to the downstream link of the bridge, so we
* do error recovery on all subordinates of the bridge instead
* of the bridge and clear the error status of the bridge.
*/
if (cb == report_error_detected)
dev->error_state = state;
pci_walk_bus(dev->subordinate, cb, &result_data);
if (cb == report_resume) {
pci_cleanup_aer_uncorrect_error_status(dev);
dev->error_state = pci_channel_io_normal;
}
} else {
/*
* If the error is reported by an end point, we think this
* error is related to the upstream link of the end point.
*/
if (state == pci_channel_io_normal)
/*
* the error is non fatal so the bus is ok, just invoke
* the callback for the function that logged the error.
*/
cb(dev, &result_data);
else
pci_walk_bus(dev->bus, cb, &result_data);
}
return result_data.result;
}
/**
* default_reset_link - default reset function
* @dev: pointer to pci_dev data structure
*
* Invoked when performing link reset on a Downstream Port or a
* Root Port with no aer driver.
*/
static pci_ers_result_t default_reset_link(struct pci_dev *dev)
{
pci_reset_bridge_secondary_bus(dev);
pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
return PCI_ERS_RESULT_RECOVERED;
}
static int find_aer_service_iter(struct device *device, void *data)
{
struct pcie_port_service_driver *service_driver, **drv;
drv = (struct pcie_port_service_driver **) data;
if (device->bus == &pcie_port_bus_type && device->driver) {
service_driver = to_service_driver(device->driver);
if (service_driver->service == PCIE_PORT_SERVICE_AER) {
*drv = service_driver;
return 1;
}
}
return 0;
}
static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev)
{
struct pcie_port_service_driver *drv = NULL;
device_for_each_child(&dev->dev, &drv, find_aer_service_iter);
return drv;
}
static pci_ers_result_t reset_link(struct pci_dev *dev)
{
struct pci_dev *udev;
pci_ers_result_t status;
struct pcie_port_service_driver *driver;
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
/* Reset this port for all subordinates */
udev = dev;
} else {
/* Reset the upstream component (likely downstream port) */
udev = dev->bus->self;
}
/* Use the aer driver of the component firstly */
driver = find_aer_service(udev);
if (driver && driver->reset_link) {
status = driver->reset_link(udev);
} else if (udev->has_secondary_link) {
status = default_reset_link(udev);
} else {
pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
pci_name(udev));
return PCI_ERS_RESULT_DISCONNECT;
}
if (status != PCI_ERS_RESULT_RECOVERED) {
pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
pci_name(udev));
return PCI_ERS_RESULT_DISCONNECT;
}
return status;
}
/**
* do_recovery - handle nonfatal/fatal error recovery process
* @dev: pointer to a pci_dev data structure of agent detecting an error
* @severity: error severity type
*
* Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
* error detected message to all downstream drivers within a hierarchy in
* question and return the returned code.
*/
static void do_recovery(struct pci_dev *dev, int severity)
{
pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
enum pci_channel_state state;
if (severity == AER_FATAL)
state = pci_channel_io_frozen;
else
state = pci_channel_io_normal;
status = broadcast_error_message(dev,
state,
"error_detected",
report_error_detected);
if (severity == AER_FATAL) {
result = reset_link(dev);
if (result != PCI_ERS_RESULT_RECOVERED)
goto failed;
}
if (status == PCI_ERS_RESULT_CAN_RECOVER)
status = broadcast_error_message(dev,
state,
"mmio_enabled",
report_mmio_enabled);
if (status == PCI_ERS_RESULT_NEED_RESET) {
/*
* TODO: Should call platform-specific
* functions to reset slot before calling
* drivers' slot_reset callbacks?
*/
status = broadcast_error_message(dev,
state,
"slot_reset",
report_slot_reset);
}
if (status != PCI_ERS_RESULT_RECOVERED)
goto failed;
broadcast_error_message(dev,
state,
"resume",
report_resume);
pci_info(dev, "AER: Device recovery successful\n");
return;
failed:
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
/* TODO: Should kernel panic here? */
pci_info(dev, "AER: Device recovery failed\n");
}
/**
* handle_error_source - handle logging error into an event log
* @aerdev: pointer to pcie_device data structure of the root port
* @dev: pointer to pci_dev data structure of error source device
* @info: comprehensive error information
*
* Invoked when an error being detected by Root Port.
*/
static void handle_error_source(struct pcie_device *aerdev,
struct pci_dev *dev,
struct aer_err_info *info)
static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
{
int pos;
@ -562,12 +248,13 @@ static void handle_error_source(struct pcie_device *aerdev,
if (pos)
pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
info->status);
} else
do_recovery(dev, info->severity);
} else if (info->severity == AER_NONFATAL)
pcie_do_nonfatal_recovery(dev);
else if (info->severity == AER_FATAL)
pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER);
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
static void aer_recover_work_func(struct work_struct *work);
#define AER_RECOVER_RING_ORDER 4
#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER)
@ -582,6 +269,30 @@ struct aer_recover_entry {
static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
AER_RECOVER_RING_SIZE);
static void aer_recover_work_func(struct work_struct *work)
{
struct aer_recover_entry entry;
struct pci_dev *pdev;
while (kfifo_get(&aer_recover_ring, &entry)) {
pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
entry.devfn);
if (!pdev) {
pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
entry.domain, entry.bus,
PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
continue;
}
cper_print_aer(pdev, entry.severity, entry.regs);
if (entry.severity == AER_NONFATAL)
pcie_do_nonfatal_recovery(pdev);
else if (entry.severity == AER_FATAL)
pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_AER);
pci_dev_put(pdev);
}
}
/*
* Mutual exclusion for writers of aer_recover_ring, reader side don't
* need lock, because there is only one reader and lock is not needed
@ -611,27 +322,6 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
spin_unlock_irqrestore(&aer_recover_ring_lock, flags);
}
EXPORT_SYMBOL_GPL(aer_recover_queue);
static void aer_recover_work_func(struct work_struct *work)
{
struct aer_recover_entry entry;
struct pci_dev *pdev;
while (kfifo_get(&aer_recover_ring, &entry)) {
pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
entry.devfn);
if (!pdev) {
pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
entry.domain, entry.bus,
PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
continue;
}
cper_print_aer(pdev, entry.severity, entry.regs);
if (entry.severity != AER_CORRECTABLE)
do_recovery(pdev, entry.severity);
pci_dev_put(pdev);
}
}
#endif
/**
@ -695,8 +385,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
return 1;
}
static inline void aer_process_err_devices(struct pcie_device *p_device,
struct aer_err_info *e_info)
static inline void aer_process_err_devices(struct aer_err_info *e_info)
{
int i;
@ -707,19 +396,19 @@ static inline void aer_process_err_devices(struct pcie_device *p_device,
}
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
if (get_device_error_info(e_info->dev[i], e_info))
handle_error_source(p_device, e_info->dev[i], e_info);
handle_error_source(e_info->dev[i], e_info);
}
}
/**
* aer_isr_one_error - consume an error detected by root port
* @p_device: pointer to error root port service device
* @rpc: pointer to the root port which holds an error
* @e_src: pointer to an error source
*/
static void aer_isr_one_error(struct pcie_device *p_device,
static void aer_isr_one_error(struct aer_rpc *rpc,
struct aer_err_source *e_src)
{
struct aer_rpc *rpc = get_service_data(p_device);
struct pci_dev *pdev = rpc->rpd;
struct aer_err_info *e_info = &rpc->e_info;
/*
@ -734,11 +423,10 @@ static void aer_isr_one_error(struct pcie_device *p_device,
e_info->multi_error_valid = 1;
else
e_info->multi_error_valid = 0;
aer_print_port_info(pdev, e_info);
aer_print_port_info(p_device->port, e_info);
if (find_source_device(p_device->port, e_info))
aer_process_err_devices(p_device, e_info);
if (find_source_device(pdev, e_info))
aer_process_err_devices(e_info);
}
if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
@ -754,10 +442,10 @@ static void aer_isr_one_error(struct pcie_device *p_device,
else
e_info->multi_error_valid = 0;
aer_print_port_info(p_device->port, e_info);
aer_print_port_info(pdev, e_info);
if (find_source_device(p_device->port, e_info))
aer_process_err_devices(p_device, e_info);
if (find_source_device(pdev, e_info))
aer_process_err_devices(e_info);
}
}
@ -799,11 +487,10 @@ static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src)
void aer_isr(struct work_struct *work)
{
struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
struct pcie_device *p_device = rpc->rpd;
struct aer_err_source uninitialized_var(e_src);
mutex_lock(&rpc->rpc_mutex);
while (get_e_source(rpc, &e_src))
aer_isr_one_error(p_device, &e_src);
aer_isr_one_error(rpc, &e_src);
mutex_unlock(&rpc->rpc_mutex);
}

View File

@ -163,17 +163,17 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
int id = ((dev->bus->number << 8) | dev->devfn);
if (!info->status) {
pci_err(dev, "PCIe Bus Error: severity=%s, type=Unaccessible, id=%04x(Unregistered Agent ID)\n",
aer_error_severity_string[info->severity], id);
pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
aer_error_severity_string[info->severity]);
goto out;
}
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);
pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
aer_error_severity_string[info->severity],
aer_error_layer[layer], id, aer_agent_string[agent]);
aer_error_layer[layer], aer_agent_string[agent]);
pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
dev->vendor, dev->device,
@ -186,17 +186,21 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
out:
if (info->id && info->error_dev_num > 1 && info->id == id)
pci_err(dev, " Error of this Agent(%04x) is reported first\n", id);
pci_err(dev, " Error of this Agent is reported first\n");
trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
info->severity);
info->severity, info->tlp_header_valid, &info->tlp);
}
void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
{
pci_info(dev, "AER: %s%s error received: id=%04x\n",
u8 bus = info->id >> 8;
u8 devfn = info->id & 0xff;
pci_info(dev, "AER: %s%s error received: %04x:%02x:%02x.%d\n",
info->multi_error_valid ? "Multiple " : "",
aer_error_severity_string[info->severity], info->id);
aer_error_severity_string[info->severity],
pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
@ -216,28 +220,30 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
void cper_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer)
{
int layer, agent, status_strs_size, tlp_header_valid = 0;
int layer, agent, tlp_header_valid = 0;
u32 status, mask;
const char **status_strs;
struct aer_err_info info;
if (aer_severity == AER_CORRECTABLE) {
status = aer->cor_status;
mask = aer->cor_mask;
status_strs = aer_correctable_error_string;
status_strs_size = ARRAY_SIZE(aer_correctable_error_string);
} else {
status = aer->uncor_status;
mask = aer->uncor_mask;
status_strs = aer_uncorrectable_error_string;
status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string);
tlp_header_valid = status & AER_LOG_TLP_MASKS;
}
layer = AER_GET_LAYER_ERROR(aer_severity, status);
agent = AER_GET_AGENT(aer_severity, status);
memset(&info, 0, sizeof(info));
info.severity = aer_severity;
info.status = status;
info.mask = mask;
info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
cper_print_bits("", status, status_strs, status_strs_size);
__aer_print_error(dev, &info);
pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
aer_error_layer[layer], aer_agent_string[agent]);
@ -249,6 +255,6 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity,
__print_tlp_header(dev, &aer->header_log);
trace_aer_event(dev_name(&dev->dev), (status & ~mask),
aer_severity);
aer_severity, tlp_header_valid, &aer->header_log);
}
#endif

View File

@ -68,44 +68,35 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
static void dpc_wait_link_inactive(struct dpc_dev *dpc)
{
unsigned long timeout = jiffies + HZ;
struct pci_dev *pdev = dpc->dev->port;
struct device *dev = &dpc->dev->device;
u16 lnk_status;
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
while (lnk_status & PCI_EXP_LNKSTA_DLLLA &&
!time_after(jiffies, timeout)) {
msleep(10);
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
}
if (lnk_status & PCI_EXP_LNKSTA_DLLLA)
dev_warn(dev, "Link state not disabled for DPC event\n");
pcie_wait_for_link(pdev, false);
}
static void dpc_work(struct work_struct *work)
static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
{
struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
struct pci_dev *dev, *temp, *pdev = dpc->dev->port;
struct pci_bus *parent = pdev->subordinate;
u16 cap = dpc->cap_pos, ctl;
struct dpc_dev *dpc;
struct pcie_device *pciedev;
struct device *devdpc;
u16 cap, ctl;
pci_lock_rescan_remove();
list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
bus_list) {
pci_dev_get(dev);
pci_dev_set_disconnected(dev, NULL);
if (pci_has_subordinate(dev))
pci_walk_bus(dev->subordinate,
pci_dev_set_disconnected, NULL);
pci_stop_and_remove_bus_device(dev);
pci_dev_put(dev);
}
pci_unlock_rescan_remove();
/*
* DPC disables the Link automatically in hardware, so it has
* already been reset by the time we get here.
*/
devdpc = pcie_port_find_device(pdev, PCIE_PORT_SERVICE_DPC);
pciedev = to_pcie_device(devdpc);
dpc = get_service_data(pciedev);
cap = dpc->cap_pos;
/*
* Wait until the Link is inactive, then clear DPC Trigger Status
* to allow the Port to leave DPC.
*/
dpc_wait_link_inactive(dpc);
if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc))
return;
return PCI_ERS_RESULT_DISCONNECT;
if (dpc->rp_extensions && dpc->rp_pio_status) {
pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS,
dpc->rp_pio_status);
@ -118,6 +109,17 @@ static void dpc_work(struct work_struct *work)
pci_read_config_word(pdev, cap + PCI_EXP_DPC_CTL, &ctl);
pci_write_config_word(pdev, cap + PCI_EXP_DPC_CTL,
ctl | PCI_EXP_DPC_CTL_INT_EN);
return PCI_ERS_RESULT_RECOVERED;
}
static void dpc_work(struct work_struct *work)
{
struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
struct pci_dev *pdev = dpc->dev->port;
/* We configure DPC so it only triggers on ERR_FATAL */
pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_DPC);
}
static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
@ -270,7 +272,7 @@ static int dpc_probe(struct pcie_device *dev)
}
}
ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN;
ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
@ -288,7 +290,7 @@ static void dpc_remove(struct pcie_device *dev)
u16 ctl;
pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
ctl &= ~(PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN);
ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN);
pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
}
@ -298,6 +300,7 @@ static struct pcie_port_service_driver dpcdriver = {
.service = PCIE_PORT_SERVICE_DPC,
.probe = dpc_probe,
.remove = dpc_remove,
.reset_link = dpc_reset_link,
};
static int __init dpc_service_init(void)

388
drivers/pci/pcie/err.c Normal file
View File

@ -0,0 +1,388 @@
// SPDX-License-Identifier: GPL-2.0
/*
* This file implements the error recovery as a core part of PCIe error
* reporting. When a PCIe error is delivered, an error message will be
* collected and printed to console, then, an error recovery procedure
* will be executed by following the PCI error recovery rules.
*
* Copyright (C) 2006 Intel Corp.
* Tom Long Nguyen (tom.l.nguyen@intel.com)
* Zhang Yanmin (yanmin.zhang@intel.com)
*/
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/aer.h>
#include "portdrv.h"
#include "../pci.h"
struct aer_broadcast_data {
enum pci_channel_state state;
enum pci_ers_result result;
};
static pci_ers_result_t merge_result(enum pci_ers_result orig,
enum pci_ers_result new)
{
if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
return PCI_ERS_RESULT_NO_AER_DRIVER;
if (new == PCI_ERS_RESULT_NONE)
return orig;
switch (orig) {
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
orig = new;
break;
case PCI_ERS_RESULT_DISCONNECT:
if (new == PCI_ERS_RESULT_NEED_RESET)
orig = PCI_ERS_RESULT_NEED_RESET;
break;
default:
break;
}
return orig;
}
static int report_error_detected(struct pci_dev *dev, void *data)
{
pci_ers_result_t vote;
const struct pci_error_handlers *err_handler;
struct aer_broadcast_data *result_data;
result_data = (struct aer_broadcast_data *) data;
device_lock(&dev->dev);
dev->error_state = result_data->state;
if (!dev->driver ||
!dev->driver->err_handler ||
!dev->driver->err_handler->error_detected) {
if (result_data->state == pci_channel_io_frozen &&
dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
/*
* In case of fatal recovery, if one of down-
* stream device has no driver. We might be
* unable to recover because a later insmod
* of a driver for this device is unaware of
* its hw state.
*/
pci_printk(KERN_DEBUG, dev, "device has %s\n",
dev->driver ?
"no AER-aware driver" : "no driver");
}
/*
* If there's any device in the subtree that does not
* have an error_detected callback, returning
* PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
* the subsequent mmio_enabled/slot_reset/resume
* callbacks of "any" device in the subtree. All the
* devices in the subtree are left in the error state
* without recovery.
*/
if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
vote = PCI_ERS_RESULT_NO_AER_DRIVER;
else
vote = PCI_ERS_RESULT_NONE;
} else {
err_handler = dev->driver->err_handler;
vote = err_handler->error_detected(dev, result_data->state);
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
}
result_data->result = merge_result(result_data->result, vote);
device_unlock(&dev->dev);
return 0;
}
static int report_mmio_enabled(struct pci_dev *dev, void *data)
{
pci_ers_result_t vote;
const struct pci_error_handlers *err_handler;
struct aer_broadcast_data *result_data;
result_data = (struct aer_broadcast_data *) data;
device_lock(&dev->dev);
if (!dev->driver ||
!dev->driver->err_handler ||
!dev->driver->err_handler->mmio_enabled)
goto out;
err_handler = dev->driver->err_handler;
vote = err_handler->mmio_enabled(dev);
result_data->result = merge_result(result_data->result, vote);
out:
device_unlock(&dev->dev);
return 0;
}
static int report_slot_reset(struct pci_dev *dev, void *data)
{
pci_ers_result_t vote;
const struct pci_error_handlers *err_handler;
struct aer_broadcast_data *result_data;
result_data = (struct aer_broadcast_data *) data;
device_lock(&dev->dev);
if (!dev->driver ||
!dev->driver->err_handler ||
!dev->driver->err_handler->slot_reset)
goto out;
err_handler = dev->driver->err_handler;
vote = err_handler->slot_reset(dev);
result_data->result = merge_result(result_data->result, vote);
out:
device_unlock(&dev->dev);
return 0;
}
static int report_resume(struct pci_dev *dev, void *data)
{
const struct pci_error_handlers *err_handler;
device_lock(&dev->dev);
dev->error_state = pci_channel_io_normal;
if (!dev->driver ||
!dev->driver->err_handler ||
!dev->driver->err_handler->resume)
goto out;
err_handler = dev->driver->err_handler;
err_handler->resume(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
out:
device_unlock(&dev->dev);
return 0;
}
/**
* default_reset_link - default reset function
* @dev: pointer to pci_dev data structure
*
* Invoked when performing link reset on a Downstream Port or a
* Root Port with no aer driver.
*/
static pci_ers_result_t default_reset_link(struct pci_dev *dev)
{
pci_reset_bridge_secondary_bus(dev);
pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
return PCI_ERS_RESULT_RECOVERED;
}
static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
{
struct pci_dev *udev;
pci_ers_result_t status;
struct pcie_port_service_driver *driver = NULL;
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
/* Reset this port for all subordinates */
udev = dev;
} else {
/* Reset the upstream component (likely downstream port) */
udev = dev->bus->self;
}
/* Use the aer driver of the component firstly */
driver = pcie_port_find_service(udev, service);
if (driver && driver->reset_link) {
status = driver->reset_link(udev);
} else if (udev->has_secondary_link) {
status = default_reset_link(udev);
} else {
pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
pci_name(udev));
return PCI_ERS_RESULT_DISCONNECT;
}
if (status != PCI_ERS_RESULT_RECOVERED) {
pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
pci_name(udev));
return PCI_ERS_RESULT_DISCONNECT;
}
return status;
}
/**
* broadcast_error_message - handle message broadcast to downstream drivers
* @dev: pointer to from where in a hierarchy message is broadcasted down
* @state: error state
* @error_mesg: message to print
* @cb: callback to be broadcasted
*
* Invoked during error recovery process. Once being invoked, the content
* of error severity will be broadcasted to all downstream drivers in a
* hierarchy in question.
*/
static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
enum pci_channel_state state,
char *error_mesg,
int (*cb)(struct pci_dev *, void *))
{
struct aer_broadcast_data result_data;
pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
result_data.state = state;
if (cb == report_error_detected)
result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
else
result_data.result = PCI_ERS_RESULT_RECOVERED;
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
/*
* If the error is reported by a bridge, we think this error
* is related to the downstream link of the bridge, so we
* do error recovery on all subordinates of the bridge instead
* of the bridge and clear the error status of the bridge.
*/
if (cb == report_error_detected)
dev->error_state = state;
pci_walk_bus(dev->subordinate, cb, &result_data);
if (cb == report_resume) {
pci_cleanup_aer_uncorrect_error_status(dev);
dev->error_state = pci_channel_io_normal;
}
} else {
/*
* If the error is reported by an end point, we think this
* error is related to the upstream link of the end point.
*/
if (state == pci_channel_io_normal)
/*
* the error is non fatal so the bus is ok, just invoke
* the callback for the function that logged the error.
*/
cb(dev, &result_data);
else
pci_walk_bus(dev->bus, cb, &result_data);
}
return result_data.result;
}
/**
* pcie_do_fatal_recovery - handle fatal error recovery process
* @dev: pointer to a pci_dev data structure of agent detecting an error
*
* Invoked when an error is fatal. Once being invoked, removes the devices
* beneath this AER agent, followed by reset link e.g. secondary bus reset
* followed by re-enumeration of devices.
*/
void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
{
struct pci_dev *udev;
struct pci_bus *parent;
struct pci_dev *pdev, *temp;
pci_ers_result_t result;
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
udev = dev;
else
udev = dev->bus->self;
parent = udev->subordinate;
pci_lock_rescan_remove();
list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
bus_list) {
pci_dev_get(pdev);
pci_dev_set_disconnected(pdev, NULL);
if (pci_has_subordinate(pdev))
pci_walk_bus(pdev->subordinate,
pci_dev_set_disconnected, NULL);
pci_stop_and_remove_bus_device(pdev);
pci_dev_put(pdev);
}
result = reset_link(udev, service);
if ((service == PCIE_PORT_SERVICE_AER) &&
(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
/*
* If the error is reported by a bridge, we think this error
* is related to the downstream link of the bridge, so we
* do error recovery on all subordinates of the bridge instead
* of the bridge and clear the error status of the bridge.
*/
pci_cleanup_aer_uncorrect_error_status(dev);
}
if (result == PCI_ERS_RESULT_RECOVERED) {
if (pcie_wait_for_link(udev, true))
pci_rescan_bus(udev->bus);
pci_info(dev, "Device recovery from fatal error successful\n");
} else {
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
pci_info(dev, "Device recovery from fatal error failed\n");
}
pci_unlock_rescan_remove();
}
/**
* pcie_do_nonfatal_recovery - handle nonfatal error recovery process
* @dev: pointer to a pci_dev data structure of agent detecting an error
*
* Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
* error detected message to all downstream drivers within a hierarchy in
* question and return the returned code.
*/
void pcie_do_nonfatal_recovery(struct pci_dev *dev)
{
pci_ers_result_t status;
enum pci_channel_state state;
state = pci_channel_io_normal;
status = broadcast_error_message(dev,
state,
"error_detected",
report_error_detected);
if (status == PCI_ERS_RESULT_CAN_RECOVER)
status = broadcast_error_message(dev,
state,
"mmio_enabled",
report_mmio_enabled);
if (status == PCI_ERS_RESULT_NEED_RESET) {
/*
* TODO: Should call platform-specific
* functions to reset slot before calling
* drivers' slot_reset callbacks?
*/
status = broadcast_error_message(dev,
state,
"slot_reset",
report_slot_reset);
}
if (status != PCI_ERS_RESULT_RECOVERED)
goto failed;
broadcast_error_message(dev,
state,
"resume",
report_resume);
pci_info(dev, "AER: Device recovery successful\n");
return;
failed:
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
/* TODO: Should kernel panic here? */
pci_info(dev, "AER: Device recovery failed\n");
}

View File

@ -112,4 +112,7 @@ static inline bool pcie_pme_no_msi(void) { return false; }
static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {}
#endif /* !CONFIG_PCIE_PME */
struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev,
u32 service);
struct device *pcie_port_find_device(struct pci_dev *dev, u32 service);
#endif /* _PORTDRV_H_ */

View File

@ -19,6 +19,12 @@
#include "../pci.h"
#include "portdrv.h"
struct portdrv_service_data {
struct pcie_port_service_driver *drv;
struct device *dev;
u32 service;
};
/**
* release_pcie_device - free PCI Express port service device structure
* @dev: Port service device to release
@ -398,6 +404,69 @@ static int remove_iter(struct device *dev, void *data)
return 0;
}
static int find_service_iter(struct device *device, void *data)
{
struct pcie_port_service_driver *service_driver;
struct portdrv_service_data *pdrvs;
u32 service;
pdrvs = (struct portdrv_service_data *) data;
service = pdrvs->service;
if (device->bus == &pcie_port_bus_type && device->driver) {
service_driver = to_service_driver(device->driver);
if (service_driver->service == service) {
pdrvs->drv = service_driver;
pdrvs->dev = device;
return 1;
}
}
return 0;
}
/**
* pcie_port_find_service - find the service driver
* @dev: PCI Express port the service is associated with
* @service: Service to find
*
* Find PCI Express port service driver associated with given service
*/
struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev,
u32 service)
{
struct pcie_port_service_driver *drv;
struct portdrv_service_data pdrvs;
pdrvs.drv = NULL;
pdrvs.service = service;
device_for_each_child(&dev->dev, &pdrvs, find_service_iter);
drv = pdrvs.drv;
return drv;
}
/**
* pcie_port_find_device - find the struct device
* @dev: PCI Express port the service is associated with
* @service: For the service to find
*
* Find the struct device associated with given service on a pci_dev
*/
struct device *pcie_port_find_device(struct pci_dev *dev,
u32 service)
{
struct device *device;
struct portdrv_service_data pdrvs;
pdrvs.dev = NULL;
pdrvs.service = service;
device_for_each_child(&dev->dev, &pdrvs, find_service_iter);
device = pdrvs.dev;
return device;
}
/**
* pcie_port_device_remove - unregister PCI Express port service devices
* @dev: PCI Express port the service devices to unregister are associated with

View File

@ -14,6 +14,7 @@
#define AER_NONFATAL 0
#define AER_FATAL 1
#define AER_CORRECTABLE 2
#define DPC_FATAL 3
struct pci_dev;

View File

@ -2284,7 +2284,7 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev)
return false;
}
#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH)
#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH)
void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type);
#endif

View File

@ -298,30 +298,44 @@ TRACE_EVENT(non_standard_event,
TRACE_EVENT(aer_event,
TP_PROTO(const char *dev_name,
const u32 status,
const u8 severity),
const u8 severity,
const u8 tlp_header_valid,
struct aer_header_log_regs *tlp),
TP_ARGS(dev_name, status, severity),
TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
TP_STRUCT__entry(
__string( dev_name, dev_name )
__field( u32, status )
__field( u8, severity )
__field( u8, tlp_header_valid)
__array( u32, tlp_header, 4 )
),
TP_fast_assign(
__assign_str(dev_name, dev_name);
__entry->status = status;
__entry->severity = severity;
__entry->tlp_header_valid = tlp_header_valid;
if (tlp_header_valid) {
__entry->tlp_header[0] = tlp->dw0;
__entry->tlp_header[1] = tlp->dw1;
__entry->tlp_header[2] = tlp->dw2;
__entry->tlp_header[3] = tlp->dw3;
}
),
TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
__get_str(dev_name),
__entry->severity == AER_CORRECTABLE ? "Corrected" :
__entry->severity == AER_FATAL ?
"Fatal" : "Uncorrected, non-fatal",
__entry->severity == AER_CORRECTABLE ?
__print_flags(__entry->status, "|", aer_correctable_errors) :
__print_flags(__entry->status, "|", aer_uncorrectable_errors))
__print_flags(__entry->status, "|", aer_uncorrectable_errors),
__entry->tlp_header_valid ?
__print_array(__entry->tlp_header, 4, 4) :
"Not available")
);
/*

View File

@ -981,6 +981,7 @@
#define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */
#define PCI_EXP_DPC_CTL 6 /* DPC control */
#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */
#define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */
#define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */