linux/drivers/pci/remove.c

184 lines
4.4 KiB
C
Raw Normal View History

#include <linux/pci.h>
#include <linux/module.h>
#include <linux/pci-aspm.h>
#include "pci.h"
static void pci_free_resources(struct pci_dev *dev)
{
int i;
msi_remove_pci_irq_vectors(dev);
pci_cleanup_rom(dev);
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
struct resource *res = dev->resource + i;
if (res->parent)
release_resource(res);
}
}
static void pci_stop_dev(struct pci_dev *dev)
{
if (dev->is_added) {
pci_proc_detach_device(dev);
pci_remove_sysfs_dev_files(dev);
device_unregister(&dev->dev);
dev->is_added = 0;
}
if (dev->bus->self)
pcie_aspm_exit_link_state(dev);
}
static void pci_destroy_dev(struct pci_dev *dev)
{
/* Remove the device from the device lists, and prevent any further
* list accesses from this device */
down_write(&pci_bus_sem);
list_del(&dev->bus_list);
dev->bus_list.next = dev->bus_list.prev = NULL;
up_write(&pci_bus_sem);
pci_free_resources(dev);
pci_dev_put(dev);
}
/**
* pci_remove_device_safe - remove an unused hotplug device
* @dev: the device to remove
*
* Delete the device structure from the device lists and
* notify userspace (/sbin/hotplug), but only if the device
* in question is not being used by a driver.
* Returns 0 on success.
*/
#if 0
int pci_remove_device_safe(struct pci_dev *dev)
{
if (pci_dev_driver(dev))
return -EBUSY;
pci_destroy_dev(dev);
return 0;
}
#endif /* 0 */
void pci_remove_bus(struct pci_bus *pci_bus)
{
pci_proc_detach_bus(pci_bus);
down_write(&pci_bus_sem);
list_del(&pci_bus->node);
pci_bus_release_busn_res(pci_bus);
up_write(&pci_bus_sem);
if (!pci_bus->is_added)
return;
pci_remove_legacy_files(pci_bus);
device_unregister(&pci_bus->dev);
}
EXPORT_SYMBOL(pci_remove_bus);
static void __pci_remove_behind_bridge(struct pci_dev *dev);
/**
* pci_stop_and_remove_bus_device - remove a PCI device and any children
* @dev: the device to remove
*
* Remove a PCI device from the device lists, informing the drivers
* that the device has been removed. We also remove any subordinate
* buses and children in a depth-first manner.
*
* For each device we remove, delete the device structure from the
* device lists, remove the /proc entry, and notify userspace
* (/sbin/hotplug).
*/
void __pci_remove_bus_device(struct pci_dev *dev)
{
if (dev->subordinate) {
struct pci_bus *b = dev->subordinate;
__pci_remove_behind_bridge(dev);
pci_remove_bus(b);
dev->subordinate = NULL;
}
pci_destroy_dev(dev);
}
EXPORT_SYMBOL(__pci_remove_bus_device);
void pci_stop_and_remove_bus_device(struct pci_dev *dev)
{
pci_stop_bus_device(dev);
__pci_remove_bus_device(dev);
}
static void __pci_remove_behind_bridge(struct pci_dev *dev)
{
struct list_head *l, *n;
if (dev->subordinate)
list_for_each_safe(l, n, &dev->subordinate->devices)
__pci_remove_bus_device(pci_dev_b(l));
}
static void pci_stop_behind_bridge(struct pci_dev *dev)
{
struct list_head *l, *n;
if (dev->subordinate)
list_for_each_safe(l, n, &dev->subordinate->devices)
pci_stop_bus_device(pci_dev_b(l));
}
/**
* pci_stop_and_remove_behind_bridge - stop and remove all devices behind
* a PCI bridge
* @dev: PCI bridge device
*
* Remove all devices on the bus, except for the parent bridge.
* This also removes any child buses, and any devices they may
* contain in a depth-first manner.
*/
void pci_stop_and_remove_behind_bridge(struct pci_dev *dev)
{
pci_stop_behind_bridge(dev);
__pci_remove_behind_bridge(dev);
}
static void pci_stop_bus_devices(struct pci_bus *bus)
{
struct list_head *l, *n;
PCI: make sriov work with hotplug remove When hot removing a pci express module that has a pcie switch and supports SRIOV, we got: [ 5918.610127] pciehp 0000:80:02.2:pcie04: pcie_isr: intr_loc 1 [ 5918.615779] pciehp 0000:80:02.2:pcie04: Attention button interrupt received [ 5918.622730] pciehp 0000:80:02.2:pcie04: Button pressed on Slot(3) [ 5918.629002] pciehp 0000:80:02.2:pcie04: pciehp_get_power_status: SLOTCTRL a8 value read 1f9 [ 5918.637416] pciehp 0000:80:02.2:pcie04: PCI slot #3 - powering off due to button press. [ 5918.647125] pciehp 0000:80:02.2:pcie04: pcie_isr: intr_loc 10 [ 5918.653039] pciehp 0000:80:02.2:pcie04: pciehp_green_led_blink: SLOTCTRL a8 write cmd 200 [ 5918.661229] pciehp 0000:80:02.2:pcie04: pciehp_set_attention_status: SLOTCTRL a8 write cmd c0 [ 5924.667627] pciehp 0000:80:02.2:pcie04: Disabling domain:bus:device=0000:b0:00 [ 5924.674909] pciehp 0000:80:02.2:pcie04: pciehp_get_power_status: SLOTCTRL a8 value read 2f9 [ 5924.683262] pciehp 0000:80:02.2:pcie04: pciehp_unconfigure_device: domain:bus:dev = 0000:b0:00 [ 5924.693976] libfcoe_device_notification: NETDEV_UNREGISTER eth6 [ 5924.764979] libfcoe_device_notification: NETDEV_UNREGISTER eth14 [ 5924.873539] libfcoe_device_notification: NETDEV_UNREGISTER eth15 [ 5924.995209] libfcoe_device_notification: NETDEV_UNREGISTER eth16 [ 5926.114407] sxge 0000:b2:00.0: PCI INT A disabled [ 5926.119342] BUG: unable to handle kernel NULL pointer dereference at (null) [ 5926.127189] IP: [<ffffffff81353a3b>] pci_stop_bus_device+0x33/0x83 [ 5926.133377] PGD 0 [ 5926.135402] Oops: 0000 [#1] SMP [ 5926.138659] CPU 2 [ 5926.140499] Modules linked in: ... [ 5926.143754] [ 5926.275823] Call Trace: [ 5926.278267] [<ffffffff81353a38>] pci_stop_bus_device+0x30/0x83 [ 5926.284180] [<ffffffff81353af4>] pci_remove_bus_device+0x1a/0xba [ 5926.290264] [<ffffffff81366311>] pciehp_unconfigure_device+0x110/0x17b [ 5926.296866] [<ffffffff81365dd9>] ? pciehp_disable_slot+0x188/0x188 [ 5926.303123] [<ffffffff81365d6f>] pciehp_disable_slot+0x11e/0x188 [ 5926.309206] [<ffffffff81365e68>] pciehp_power_thread+0x8f/0xe0 ... +-[0000:80]-+-00.0-[81-8f]-- | +-01.0-[90-9f]-- | +-02.0-[a0-af]-- | +-02.2-[b0-bf]----00.0-[b1-b3]--+-02.0-[b2]--+-00.0 Device | | | +-00.1 Device | | | +-00.2 Device | | | \-00.3 Device | | \-03.0-[b3]--+-00.0 Device | | +-00.1 Device | | +-00.2 Device | | \-00.3 Device root complex: 80:02.2 pci express modules: have pcie switch and are listed as b0:00.0, b1:02.0 and b1:03.0. end devices are b2:00.0 and b3.00.0. VFs are: b2:00.1,... b2:00.3, and b3:00.1,...,b3:00.3 Root cause: when doing pci_stop_bus_device() with phys fn, it will stop virt fn and remove the fn, so list_for_each_safe(l, n, &bus->devices) will have problem to refer freed n that is pointed to vf entry. Solution is just replacing list_for_each_safe() with list_for_each_prev_safe(). This will make sure we can get valid n pointer to PF instead of the freed VF pointer (because newly added devices are inserted to the bus->devices list tail). During reviewing the patch, Bjorn said: | The PCI hot-remove path calls pci_stop_bus_devices() via | pci_remove_bus_device(). | | pci_stop_bus_devices() traverses the bus->devices list (point A below), | stopping each device in turn, which calls the driver remove() method. When | the device is an SR-IOV PF, the driver calls pci_disable_sriov(), which | also uses pci_remove_bus_device() to remove the VF devices from the | bus->devices list (point B). | | pci_remove_bus_device | pci_stop_bus_device | pci_stop_bus_devices(subordinate) | list_for_each(bus->devices) <-- A | pci_stop_bus_device(PF) | ... | driver->remove | pci_disable_sriov | ... | pci_remove_bus_device(VF) | <remove from bus_list> <-- B | | At B, we're changing the same list we're iterating through at A, so when | the driver remove() method returns, the pci_stop_bus_devices() iterator has | a pointer to a list entry that has already been freed. Discussion thread can be found : https://lkml.org/lkml/2011/10/15/141 https://lkml.org/lkml/2012/1/23/360 -v5: According to Linus to make remove more robust, Change to list_for_each_prev_safe instead. That is more reasonable, because those devices are added to tail of the list before. Signed-off-by: Yinghai Lu <yinghai@kernel.org> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2012-01-28 02:55:09 +08:00
/*
* VFs could be removed by pci_stop_and_remove_bus_device() in the
PCI: make sriov work with hotplug remove When hot removing a pci express module that has a pcie switch and supports SRIOV, we got: [ 5918.610127] pciehp 0000:80:02.2:pcie04: pcie_isr: intr_loc 1 [ 5918.615779] pciehp 0000:80:02.2:pcie04: Attention button interrupt received [ 5918.622730] pciehp 0000:80:02.2:pcie04: Button pressed on Slot(3) [ 5918.629002] pciehp 0000:80:02.2:pcie04: pciehp_get_power_status: SLOTCTRL a8 value read 1f9 [ 5918.637416] pciehp 0000:80:02.2:pcie04: PCI slot #3 - powering off due to button press. [ 5918.647125] pciehp 0000:80:02.2:pcie04: pcie_isr: intr_loc 10 [ 5918.653039] pciehp 0000:80:02.2:pcie04: pciehp_green_led_blink: SLOTCTRL a8 write cmd 200 [ 5918.661229] pciehp 0000:80:02.2:pcie04: pciehp_set_attention_status: SLOTCTRL a8 write cmd c0 [ 5924.667627] pciehp 0000:80:02.2:pcie04: Disabling domain:bus:device=0000:b0:00 [ 5924.674909] pciehp 0000:80:02.2:pcie04: pciehp_get_power_status: SLOTCTRL a8 value read 2f9 [ 5924.683262] pciehp 0000:80:02.2:pcie04: pciehp_unconfigure_device: domain:bus:dev = 0000:b0:00 [ 5924.693976] libfcoe_device_notification: NETDEV_UNREGISTER eth6 [ 5924.764979] libfcoe_device_notification: NETDEV_UNREGISTER eth14 [ 5924.873539] libfcoe_device_notification: NETDEV_UNREGISTER eth15 [ 5924.995209] libfcoe_device_notification: NETDEV_UNREGISTER eth16 [ 5926.114407] sxge 0000:b2:00.0: PCI INT A disabled [ 5926.119342] BUG: unable to handle kernel NULL pointer dereference at (null) [ 5926.127189] IP: [<ffffffff81353a3b>] pci_stop_bus_device+0x33/0x83 [ 5926.133377] PGD 0 [ 5926.135402] Oops: 0000 [#1] SMP [ 5926.138659] CPU 2 [ 5926.140499] Modules linked in: ... [ 5926.143754] [ 5926.275823] Call Trace: [ 5926.278267] [<ffffffff81353a38>] pci_stop_bus_device+0x30/0x83 [ 5926.284180] [<ffffffff81353af4>] pci_remove_bus_device+0x1a/0xba [ 5926.290264] [<ffffffff81366311>] pciehp_unconfigure_device+0x110/0x17b [ 5926.296866] [<ffffffff81365dd9>] ? pciehp_disable_slot+0x188/0x188 [ 5926.303123] [<ffffffff81365d6f>] pciehp_disable_slot+0x11e/0x188 [ 5926.309206] [<ffffffff81365e68>] pciehp_power_thread+0x8f/0xe0 ... +-[0000:80]-+-00.0-[81-8f]-- | +-01.0-[90-9f]-- | +-02.0-[a0-af]-- | +-02.2-[b0-bf]----00.0-[b1-b3]--+-02.0-[b2]--+-00.0 Device | | | +-00.1 Device | | | +-00.2 Device | | | \-00.3 Device | | \-03.0-[b3]--+-00.0 Device | | +-00.1 Device | | +-00.2 Device | | \-00.3 Device root complex: 80:02.2 pci express modules: have pcie switch and are listed as b0:00.0, b1:02.0 and b1:03.0. end devices are b2:00.0 and b3.00.0. VFs are: b2:00.1,... b2:00.3, and b3:00.1,...,b3:00.3 Root cause: when doing pci_stop_bus_device() with phys fn, it will stop virt fn and remove the fn, so list_for_each_safe(l, n, &bus->devices) will have problem to refer freed n that is pointed to vf entry. Solution is just replacing list_for_each_safe() with list_for_each_prev_safe(). This will make sure we can get valid n pointer to PF instead of the freed VF pointer (because newly added devices are inserted to the bus->devices list tail). During reviewing the patch, Bjorn said: | The PCI hot-remove path calls pci_stop_bus_devices() via | pci_remove_bus_device(). | | pci_stop_bus_devices() traverses the bus->devices list (point A below), | stopping each device in turn, which calls the driver remove() method. When | the device is an SR-IOV PF, the driver calls pci_disable_sriov(), which | also uses pci_remove_bus_device() to remove the VF devices from the | bus->devices list (point B). | | pci_remove_bus_device | pci_stop_bus_device | pci_stop_bus_devices(subordinate) | list_for_each(bus->devices) <-- A | pci_stop_bus_device(PF) | ... | driver->remove | pci_disable_sriov | ... | pci_remove_bus_device(VF) | <remove from bus_list> <-- B | | At B, we're changing the same list we're iterating through at A, so when | the driver remove() method returns, the pci_stop_bus_devices() iterator has | a pointer to a list entry that has already been freed. Discussion thread can be found : https://lkml.org/lkml/2011/10/15/141 https://lkml.org/lkml/2012/1/23/360 -v5: According to Linus to make remove more robust, Change to list_for_each_prev_safe instead. That is more reasonable, because those devices are added to tail of the list before. Signed-off-by: Yinghai Lu <yinghai@kernel.org> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2012-01-28 02:55:09 +08:00
* pci_stop_bus_devices() code path for PF.
* aka, bus->devices get updated in the process.
* but VFs are inserted after PFs when SRIOV is enabled for PF,
* We can iterate the list backwards to get prev valid PF instead
* of removed VF.
*/
list_for_each_prev_safe(l, n, &bus->devices) {
struct pci_dev *dev = pci_dev_b(l);
pci_stop_bus_device(dev);
}
}
/**
* pci_stop_bus_device - stop a PCI device and any children
* @dev: the device to stop
*
* Stop a PCI device (detach the driver, remove from the global list
* and so on). This also stop any subordinate buses and children in a
* depth-first manner.
*/
void pci_stop_bus_device(struct pci_dev *dev)
{
if (dev->subordinate)
pci_stop_bus_devices(dev->subordinate);
pci_stop_dev(dev);
}
EXPORT_SYMBOL(pci_stop_and_remove_bus_device);
EXPORT_SYMBOL(pci_stop_and_remove_behind_bridge);
EXPORT_SYMBOL_GPL(pci_stop_bus_device);