linux/drivers/pci/hotplug/acpiphp_glue.c

1015 lines
25 KiB
C
Raw Normal View History

/*
* ACPI PCI HotPlug glue functions to ACPI CA subsystem
*
* Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
* Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
* Copyright (C) 2002,2003 NEC Corporation
* Copyright (C) 2003-2005 Matthew Wilcox (matthew.wilcox@hp.com)
* Copyright (C) 2003-2005 Hewlett Packard
* Copyright (C) 2005 Rajesh Shah (rajesh.shah@intel.com)
* Copyright (C) 2005 Intel Corporation
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to <kristen.c.accardi@intel.com>
*
*/
/*
* Lifetime rules for pci_dev:
* - The one in acpiphp_bridge has its refcount elevated by pci_get_slot()
* when the bridge is scanned and it loses a refcount when the bridge
* is removed.
* - When a P2P bridge is present, we elevate the refcount on the subordinate
* bus. It loses the refcount when the the driver unloads.
*/
#define pr_fmt(fmt) "acpiphp_glue: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
#include <linux/pci-acpi.h>
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
#include <linux/pm_runtime.h>
#include <linux/mutex.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-29 07:40:53 +08:00
#include <linux/acpi.h>
#include "../pci.h"
#include "acpiphp.h"
static LIST_HEAD(bridge_list);
static DEFINE_MUTEX(bridge_mutex);
static int acpiphp_hotplug_notify(struct acpi_device *adev, u32 type);
static void acpiphp_post_dock_fixup(struct acpi_device *adev);
static void acpiphp_sanitize_bus(struct pci_bus *bus);
static void hotplug_event(u32 type, struct acpiphp_context *context);
static void free_bridge(struct kref *kref);
/**
* acpiphp_init_context - Create hotplug context and grab a reference to it.
* @adev: ACPI device object to create the context for.
*
* Call under acpi_hp_context_lock.
*/
static struct acpiphp_context *acpiphp_init_context(struct acpi_device *adev)
{
struct acpiphp_context *context;
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return NULL;
context->refcount = 1;
context->hp.notify = acpiphp_hotplug_notify;
context->hp.fixup = acpiphp_post_dock_fixup;
acpi_set_hp_context(adev, &context->hp);
return context;
}
/**
* acpiphp_get_context - Get hotplug context and grab a reference to it.
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
* @adev: ACPI device object to get the context for.
*
* Call under acpi_hp_context_lock.
*/
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
static struct acpiphp_context *acpiphp_get_context(struct acpi_device *adev)
{
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
struct acpiphp_context *context;
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
if (!adev->hp)
return NULL;
context = to_acpiphp_context(adev->hp);
context->refcount++;
return context;
}
/**
* acpiphp_put_context - Drop a reference to ACPI hotplug context.
* @context: ACPI hotplug context to drop a reference to.
*
* The context object is removed if there are no more references to it.
*
* Call under acpi_hp_context_lock.
*/
static void acpiphp_put_context(struct acpiphp_context *context)
{
if (--context->refcount)
return;
WARN_ON(context->bridge);
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
context->hp.self->hp = NULL;
kfree(context);
}
static inline void get_bridge(struct acpiphp_bridge *bridge)
{
kref_get(&bridge->ref);
}
static inline void put_bridge(struct acpiphp_bridge *bridge)
{
kref_put(&bridge->ref, free_bridge);
}
static struct acpiphp_context *acpiphp_grab_context(struct acpi_device *adev)
{
struct acpiphp_context *context;
acpi_lock_hp_context();
context = acpiphp_get_context(adev);
if (!context || context->func.parent->is_going_away) {
acpi_unlock_hp_context();
return NULL;
}
get_bridge(context->func.parent);
acpiphp_put_context(context);
acpi_unlock_hp_context();
return context;
}
static void acpiphp_let_context_go(struct acpiphp_context *context)
{
put_bridge(context->func.parent);
}
static void free_bridge(struct kref *kref)
{
struct acpiphp_context *context;
struct acpiphp_bridge *bridge;
struct acpiphp_slot *slot, *next;
struct acpiphp_func *func, *tmp;
acpi_lock_hp_context();
bridge = container_of(kref, struct acpiphp_bridge, ref);
list_for_each_entry_safe(slot, next, &bridge->slots, node) {
list_for_each_entry_safe(func, tmp, &slot->funcs, sibling)
acpiphp_put_context(func_to_context(func));
kfree(slot);
}
context = bridge->context;
/* Root bridges will not have hotplug context. */
if (context) {
/* Release the reference taken by acpiphp_enumerate_slots(). */
put_bridge(context->func.parent);
context->bridge = NULL;
acpiphp_put_context(context);
}
put_device(&bridge->pci_bus->dev);
pci_dev_put(bridge->pci_dev);
kfree(bridge);
acpi_unlock_hp_context();
}
/**
* acpiphp_post_dock_fixup - Post-dock fixups for PCI devices.
* @adev: ACPI device object corresponding to a PCI device.
*
* TBD - figure out a way to only call fixups for systems that require them.
*/
static void acpiphp_post_dock_fixup(struct acpi_device *adev)
{
struct acpiphp_context *context = acpiphp_grab_context(adev);
struct pci_bus *bus;
u32 buses;
if (!context)
return;
bus = context->func.slot->bus;
if (!bus->self)
goto out;
/* fixup bad _DCK function that rewrites
* secondary bridge on slot
*/
pci_read_config_dword(bus->self, PCI_PRIMARY_BUS, &buses);
if (((buses >> 8) & 0xff) != bus->busn_res.start) {
buses = (buses & 0xff000000)
| ((unsigned int)(bus->primary) << 0)
| ((unsigned int)(bus->busn_res.start) << 8)
| ((unsigned int)(bus->busn_res.end) << 16);
pci_write_config_dword(bus->self, PCI_PRIMARY_BUS, buses);
}
out:
acpiphp_let_context_go(context);
ACPI / hotplug / PCI: Fix bridge removal race vs dock events If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge unregister_hotplug_dock_device (drops dock references to the bridge) put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (context) Now, if a dock event affecting one of the bridge's child devices occurs (roughly at the same time), it will lead to the following code path: acpi_dock_deferred_cb dock_notify handle_eject_request hot_remove_dock_devices dock_hotplug_event hotplug_event (dereferences context) That may lead to a kernel crash in hotplug_event() if it is executed after the last kfree() in the bridge removal code path. To prevent that from happening, add a wrapper around hotplug_event() called dock_event() and point the .handler pointer in acpiphp_dock_ops to it. Make that wrapper retrieve the device's ACPIPHP context using acpiphp_get_context() (instead of taking it from the data argument) under acpiphp_context_lock and check if the parent bridge's is_going_away flag is set. If that flag is set, it will return immediately and if it is not set it will grab a reference to the device's parent bridge before executing hotplug_event(). Then, in the above scenario, the reference to the parent bridge held by dock_event() will prevent free_bridge() from being executed for it until hotplug_event() returns. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-04 05:30:15 +08:00
}
/**
* acpiphp_add_context - Add ACPIPHP context to an ACPI device object.
* @handle: ACPI handle of the object to add a context to.
* @lvl: Not used.
* @data: The object's parent ACPIPHP bridge.
* @rv: Not used.
*/
static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data,
void **rv)
{
struct acpiphp_bridge *bridge = data;
struct acpiphp_context *context;
struct acpi_device *adev;
struct acpiphp_slot *slot;
struct acpiphp_func *newfunc;
acpi_status status = AE_OK;
unsigned long long adr;
int device, function;
struct pci_bus *pbus = bridge->pci_bus;
struct pci_dev *pdev = bridge->pci_dev;
u32 val;
status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
if (ACPI_FAILURE(status)) {
if (status != AE_NOT_FOUND)
acpi_handle_warn(handle,
"can't evaluate _ADR (%#x)\n", status);
return AE_OK;
}
if (acpi_bus_get_device(handle, &adev))
return AE_OK;
device = (adr >> 16) & 0xffff;
function = adr & 0xffff;
acpi_lock_hp_context();
context = acpiphp_init_context(adev);
if (!context) {
acpi_unlock_hp_context();
acpi_handle_err(handle, "No hotplug context\n");
return AE_NOT_EXIST;
}
newfunc = &context->func;
newfunc->function = function;
newfunc->parent = bridge;
acpi_unlock_hp_context();
/*
* If this is a dock device, its _EJ0 should be executed by the dock
* notify handler after calling _DCK.
*/
if (!is_dock_device(adev) && acpi_has_method(handle, "_EJ0"))
newfunc->flags = FUNC_HAS_EJ0;
if (acpi_has_method(handle, "_STA"))
newfunc->flags |= FUNC_HAS_STA;
/* search for objects that share the same slot */
list_for_each_entry(slot, &bridge->slots, node)
if (slot->device == device)
goto slot_found;
slot = kzalloc(sizeof(struct acpiphp_slot), GFP_KERNEL);
if (!slot) {
acpi_lock_hp_context();
acpiphp_put_context(context);
acpi_unlock_hp_context();
return AE_NO_MEMORY;
}
slot->bus = bridge->pci_bus;
slot->device = device;
INIT_LIST_HEAD(&slot->funcs);
list_add_tail(&slot->node, &bridge->slots);
/*
* Expose slots to user space for functions that have _EJ0 or _RMV or
* are located in dock stations. Do not expose them for devices handled
* by the native PCIe hotplug (PCIeHP), becuase that code is supposed to
* expose slots to user space in those cases.
*/
if ((acpi_pci_check_ejectable(pbus, handle) || is_dock_device(adev))
2016-10-28 16:52:06 +08:00
&& !(pdev && pdev->is_hotplug_bridge && pciehp_is_native(pdev))) {
unsigned long long sun;
int retval;
bridge->nr_slots++;
status = acpi_evaluate_integer(handle, "_SUN", NULL, &sun);
if (ACPI_FAILURE(status))
sun = bridge->nr_slots;
pr_debug("found ACPI PCI Hotplug slot %llu at PCI %04x:%02x:%02x\n",
sun, pci_domain_nr(pbus), pbus->number, device);
retval = acpiphp_register_hotplug_slot(slot, sun);
if (retval) {
slot->slot = NULL;
bridge->nr_slots--;
if (retval == -EBUSY)
pr_warn("Slot %llu already registered by another hotplug driver\n", sun);
else
pr_warn("acpiphp_register_hotplug_slot failed (err code = 0x%x)\n", retval);
}
/* Even if the slot registration fails, we can still use it. */
}
slot_found:
newfunc->slot = slot;
list_add_tail(&newfunc->sibling, &slot->funcs);
if (pci_bus_read_dev_vendor_id(pbus, PCI_DEVFN(device, function),
&val, 60*1000))
slot->flags |= SLOT_ENABLED;
return AE_OK;
}
static void cleanup_bridge(struct acpiphp_bridge *bridge)
{
struct acpiphp_slot *slot;
struct acpiphp_func *func;
list_for_each_entry(slot, &bridge->slots, node) {
list_for_each_entry(func, &slot->funcs, sibling) {
struct acpi_device *adev = func_to_acpi_device(func);
acpi_lock_hp_context();
adev->hp->notify = NULL;
adev->hp->fixup = NULL;
acpi_unlock_hp_context();
}
slot->flags |= SLOT_IS_GOING_AWAY;
if (slot->slot)
acpiphp_unregister_hotplug_slot(slot);
}
mutex_lock(&bridge_mutex);
list_del(&bridge->list);
mutex_unlock(&bridge_mutex);
acpi_lock_hp_context();
bridge->is_going_away = true;
acpi_unlock_hp_context();
}
/**
* acpiphp_max_busnr - return the highest reserved bus number under the given bus.
* @bus: bus to start search with
*/
static unsigned char acpiphp_max_busnr(struct pci_bus *bus)
{
struct pci_bus *tmp;
unsigned char max, n;
/*
* pci_bus_max_busnr will return the highest
* reserved busnr for all these children.
* that is equivalent to the bus->subordinate
* value. We don't want to use the parent's
* bus->subordinate value because it could have
* padding in it.
*/
max = bus->busn_res.start;
list_for_each_entry(tmp, &bus->children, node) {
n = pci_bus_max_busnr(tmp);
if (n > max)
max = n;
}
return max;
}
static void acpiphp_set_acpi_region(struct acpiphp_slot *slot)
{
struct acpiphp_func *func;
union acpi_object params[2];
struct acpi_object_list arg_list;
list_for_each_entry(func, &slot->funcs, sibling) {
arg_list.count = 2;
arg_list.pointer = params;
params[0].type = ACPI_TYPE_INTEGER;
params[0].integer.value = ACPI_ADR_SPACE_PCI_CONFIG;
params[1].type = ACPI_TYPE_INTEGER;
params[1].integer.value = 1;
/* _REG is optional, we don't care about if there is failure */
acpi_evaluate_object(func_to_handle(func), "_REG", &arg_list,
NULL);
}
}
static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev)
{
struct acpiphp_func *func;
/* quirk, or pcie could set it already */
if (dev->is_hotplug_bridge)
return;
list_for_each_entry(func, &slot->funcs, sibling) {
if (PCI_FUNC(dev->devfn) == func->function) {
dev->is_hotplug_bridge = 1;
break;
}
}
}
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 06:07:28 +08:00
static int acpiphp_rescan_slot(struct acpiphp_slot *slot)
{
struct acpiphp_func *func;
list_for_each_entry(func, &slot->funcs, sibling) {
struct acpi_device *adev = func_to_acpi_device(func);
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 06:07:28 +08:00
acpi_bus_scan(adev->handle);
if (acpi_device_enumerated(adev))
acpi_device_set_power(adev, ACPI_STATE_D0);
}
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 06:07:28 +08:00
return pci_scan_slot(slot->bus, PCI_DEVFN(slot->device, 0));
}
/**
* enable_slot - enable, configure a slot
* @slot: slot to be enabled
*
* This function should be called per *physical slot*,
* not per each slot object in ACPI namespace.
*/
static void enable_slot(struct acpiphp_slot *slot)
{
struct pci_dev *dev;
struct pci_bus *bus = slot->bus;
struct acpiphp_func *func;
int max, pass;
PCI / ACPI: Use boot-time resource allocation rules during hotplug On x86 platforms, the kernel respects PCI resource assignments from the BIOS and only reassigns resources for unassigned BARs at boot time. However, with the ACPI-based hotplug (acpiphp), it ignores the BIOS' PCI resource assignments completely and reassigns all resources by itself. This causes differences in PCI resource allocation between boot time and runtime hotplug to occur, which is generally undesirable and sometimes actively breaks things. Namely, if there are enough resources, reassigning all PCI resources during runtime hotplug should work, but it may fail if the resources are constrained. This may happen, for instance, when some PCI devices with huge MMIO BARs are involved in the runtime hotplug operations, because the current PCI MMIO alignment algorithm may waste huge chunks of MMIO address space in those cases. On the Alexander's Sony VAIO VPCZ23A4R the BIOS allocates limited MMIO resources for the dock station which contains a device (graphics adapter) with a 256MB MMIO BAR. An attempt to reassign that during runtime hotplug causes the dock station MMIO window to be exhausted and acpiphp fails to allocate resources for the majority of devices on the dock station as a result. To prevent that from happening, modify acpiphp to follow the boot time resources allocation behavior so that the BIOS' resource assignments are respected during runtime hotplug too. [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=56531 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Jiang Liu <jiang.liu@huawei.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-06-23 07:01:35 +08:00
LIST_HEAD(add_list);
acpiphp_rescan_slot(slot);
max = acpiphp_max_busnr(bus);
for (pass = 0; pass < 2; pass++) {
list_for_each_entry(dev, &bus->devices, bus_list) {
if (PCI_SLOT(dev->devfn) != slot->device)
continue;
if (pci_is_bridge(dev)) {
max = pci_scan_bridge(bus, dev, max, pass);
if (pass && dev->subordinate) {
check_hotplug_bridge(slot, dev);
PCI / ACPI: Use boot-time resource allocation rules during hotplug On x86 platforms, the kernel respects PCI resource assignments from the BIOS and only reassigns resources for unassigned BARs at boot time. However, with the ACPI-based hotplug (acpiphp), it ignores the BIOS' PCI resource assignments completely and reassigns all resources by itself. This causes differences in PCI resource allocation between boot time and runtime hotplug to occur, which is generally undesirable and sometimes actively breaks things. Namely, if there are enough resources, reassigning all PCI resources during runtime hotplug should work, but it may fail if the resources are constrained. This may happen, for instance, when some PCI devices with huge MMIO BARs are involved in the runtime hotplug operations, because the current PCI MMIO alignment algorithm may waste huge chunks of MMIO address space in those cases. On the Alexander's Sony VAIO VPCZ23A4R the BIOS allocates limited MMIO resources for the dock station which contains a device (graphics adapter) with a 256MB MMIO BAR. An attempt to reassign that during runtime hotplug causes the dock station MMIO window to be exhausted and acpiphp fails to allocate resources for the majority of devices on the dock station as a result. To prevent that from happening, modify acpiphp to follow the boot time resources allocation behavior so that the BIOS' resource assignments are respected during runtime hotplug too. [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=56531 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Jiang Liu <jiang.liu@huawei.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-06-23 07:01:35 +08:00
pcibios_resource_survey_bus(dev->subordinate);
__pci_bus_size_bridges(dev->subordinate,
&add_list);
}
}
}
}
PCI / ACPI: Use boot-time resource allocation rules during hotplug On x86 platforms, the kernel respects PCI resource assignments from the BIOS and only reassigns resources for unassigned BARs at boot time. However, with the ACPI-based hotplug (acpiphp), it ignores the BIOS' PCI resource assignments completely and reassigns all resources by itself. This causes differences in PCI resource allocation between boot time and runtime hotplug to occur, which is generally undesirable and sometimes actively breaks things. Namely, if there are enough resources, reassigning all PCI resources during runtime hotplug should work, but it may fail if the resources are constrained. This may happen, for instance, when some PCI devices with huge MMIO BARs are involved in the runtime hotplug operations, because the current PCI MMIO alignment algorithm may waste huge chunks of MMIO address space in those cases. On the Alexander's Sony VAIO VPCZ23A4R the BIOS allocates limited MMIO resources for the dock station which contains a device (graphics adapter) with a 256MB MMIO BAR. An attempt to reassign that during runtime hotplug causes the dock station MMIO window to be exhausted and acpiphp fails to allocate resources for the majority of devices on the dock station as a result. To prevent that from happening, modify acpiphp to follow the boot time resources allocation behavior so that the BIOS' resource assignments are respected during runtime hotplug too. [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=56531 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Jiang Liu <jiang.liu@huawei.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-06-23 07:01:35 +08:00
__pci_bus_assign_resources(bus, &add_list, NULL);
acpiphp_sanitize_bus(bus);
pcie_bus_configure_settings(bus);
acpiphp_set_acpi_region(slot);
list_for_each_entry(dev, &bus->devices, bus_list) {
/* Assume that newly added devices are powered on already. */
if (!dev->is_added)
dev->current_state = PCI_D0;
}
pci_bus_add_devices(bus);
slot->flags |= SLOT_ENABLED;
list_for_each_entry(func, &slot->funcs, sibling) {
PCI Hotplug: acpiphp: don't store a pci_dev in acpiphp_func An oops can occur if a user attempts to use both PCI logical hotplug and the ACPI physical hotplug driver (acpiphp) in this sequence, where $slot/address == $device. In other words, if acpiphp has claimed a PCI device, and that device is logically removed, then acpiphp may oops when it attempts to access it again. # echo 1 > /sys/bus/pci/devices/$device/remove # echo 0 > /sys/bus/pci/slots/$slot/power Unable to handle kernel NULL pointer dereference (address 0000000000000000) Call Trace: [<a000000100016390>] show_stack+0x50/0xa0 [<a000000100016c60>] show_regs+0x820/0x860 [<a00000010003b390>] die+0x190/0x2a0 [<a000000100066a40>] ia64_do_page_fault+0x8e0/0xa40 [<a00000010000c7a0>] ia64_native_leave_kernel+0x0/0x270 [<a0000001003b2660>] pci_remove_bus_device+0x120/0x260 [<a0000002060549f0>] acpiphp_disable_slot+0x410/0x540 [acpiphp] [<a0000002060505c0>] disable_slot+0xc0/0x120 [acpiphp] [<a0000002040d21c0>] power_write_file+0x1e0/0x2a0 [pci_hotplug] [<a0000001003bb820>] pci_slot_attr_store+0x60/0xa0 [<a000000100240f70>] sysfs_write_file+0x230/0x2c0 [<a000000100195750>] vfs_write+0x190/0x2e0 [<a0000001001961a0>] sys_write+0x80/0x100 [<a00000010000c600>] ia64_ret_from_syscall+0x0/0x20 [<a000000000010720>] __kernel_syscall_via_break+0x0/0x20 The root cause of this oops is that the logical remove ("echo 1 > /sys/bus/pci/devices/$device/remove") destroyed the pci_dev. The pci_dev struct itself wasn't deallocated because acpiphp kept a reference, but some of its fields became invalid. acpiphp doesn't have any real reason to keep a pointer to a pci_dev around. It can always derive it using pci_get_slot(). If a logical remove destroys the pci_dev, acpiphp won't find it and is thus prevented from causing mischief. Reviewed-by: Matthew Wilcox <willy@linux.intel.com> Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Tested-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Reported-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com> Signed-off-by: Alex Chiang <achiang@hp.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-05-22 06:21:15 +08:00
dev = pci_get_slot(bus, PCI_DEVFN(slot->device,
func->function));
if (!dev) {
/* Do not set SLOT_ENABLED flag if some funcs
are not added. */
slot->flags &= (~SLOT_ENABLED);
continue;
}
}
}
/**
* disable_slot - disable a slot
* @slot: ACPI PHP slot
*/
static void disable_slot(struct acpiphp_slot *slot)
{
struct pci_bus *bus = slot->bus;
struct pci_dev *dev, *prev;
struct acpiphp_func *func;
/*
* enable_slot() enumerates all functions in this device via
* pci_scan_slot(), whether they have associated ACPI hotplug
* methods (_EJ0, etc.) or not. Therefore, we remove all functions
* here.
*/
list_for_each_entry_safe_reverse(dev, prev, &bus->devices, bus_list)
if (PCI_SLOT(dev->devfn) == slot->device)
pci_stop_and_remove_bus_device(dev);
list_for_each_entry(func, &slot->funcs, sibling)
acpi_bus_trim(func_to_acpi_device(func));
slot->flags &= (~SLOT_ENABLED);
}
ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk> Reported-and-tested-by: <madcatx@atlas.cz> Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Dave Airlie <airlied@linux.ie> Cc: Takashi Iwai <tiwai@suse.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
2013-12-31 20:39:42 +08:00
static bool slot_no_hotplug(struct acpiphp_slot *slot)
{
PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device Powering off a hot-pluggable device, e.g., with pci_set_power_state(D3cold), normally generates a hot-remove event that unbinds the driver. Some drivers expect to remain bound to a device even while they power it off and back on again. This can be dangerous, because if the device is removed or replaced while it is powered off, the driver doesn't know that anything changed. But some drivers accept that risk. Add pci_ignore_hotplug() for use by drivers that know their device cannot be removed. Using pci_ignore_hotplug() tells the PCI core that hot-plug events for the device should be ignored. The radeon and nouveau drivers use this to switch between a low-power, integrated GPU and a higher-power, higher-performance discrete GPU. They power off the unused GPU, but they want to remain bound to it. This is a reimplementation of f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") but extends it to work with both acpiphp and pciehp. This fixes a problem where systems with dual GPUs using the radeon drivers become unusable, freezing every few seconds (see bugzillas below). The resume of the radeon device may also fail, e.g., This fixes problems on dual GPU systems where the radeon driver becomes unusable because of problems while suspending the device, as in bug 79701: [drm] radeon: finishing device. radeon 0000:01:00.0: Userspace still has active objects ! radeon 0000:01:00.0: ffff8800cb4ec288 ffff8800cb4ec000 16384 4294967297 force free ... WARNING: CPU: 0 PID: 67 at /home/apw/COD/linux/drivers/gpu/drm/radeon/radeon_gart.c:234 radeon_gart_unbind+0xd2/0xe0 [radeon]() trying to unbind memory from uninitialized GART ! or while resuming it, as in bug 77261: radeon 0000:01:00.0: ring 0 stalled for more than 10158msec radeon 0000:01:00.0: GPU lockup ... radeon 0000:01:00.0: GPU pci config reset pciehp 0000:00:01.0:pcie04: Card not present on Slot(1-1) radeon 0000:01:00.0: GPU reset succeeded, trying to resume *ERROR* radeon: dpm resume failed radeon 0000:01:00.0: Wait for MC idle timedout ! Link: https://bugzilla.kernel.org/show_bug.cgi?id=77261 Link: https://bugzilla.kernel.org/show_bug.cgi?id=79701 Reported-by: Shawn Starr <shawn.starr@rogers.com> Reported-by: Jose P. <lbdkmjdf@sharklasers.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Rajat Jain <rajatxjain@gmail.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> CC: stable@vger.kernel.org # v3.15+
2014-09-11 03:45:01 +08:00
struct pci_bus *bus = slot->bus;
struct pci_dev *dev;
ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk> Reported-and-tested-by: <madcatx@atlas.cz> Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Dave Airlie <airlied@linux.ie> Cc: Takashi Iwai <tiwai@suse.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
2013-12-31 20:39:42 +08:00
PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device Powering off a hot-pluggable device, e.g., with pci_set_power_state(D3cold), normally generates a hot-remove event that unbinds the driver. Some drivers expect to remain bound to a device even while they power it off and back on again. This can be dangerous, because if the device is removed or replaced while it is powered off, the driver doesn't know that anything changed. But some drivers accept that risk. Add pci_ignore_hotplug() for use by drivers that know their device cannot be removed. Using pci_ignore_hotplug() tells the PCI core that hot-plug events for the device should be ignored. The radeon and nouveau drivers use this to switch between a low-power, integrated GPU and a higher-power, higher-performance discrete GPU. They power off the unused GPU, but they want to remain bound to it. This is a reimplementation of f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") but extends it to work with both acpiphp and pciehp. This fixes a problem where systems with dual GPUs using the radeon drivers become unusable, freezing every few seconds (see bugzillas below). The resume of the radeon device may also fail, e.g., This fixes problems on dual GPU systems where the radeon driver becomes unusable because of problems while suspending the device, as in bug 79701: [drm] radeon: finishing device. radeon 0000:01:00.0: Userspace still has active objects ! radeon 0000:01:00.0: ffff8800cb4ec288 ffff8800cb4ec000 16384 4294967297 force free ... WARNING: CPU: 0 PID: 67 at /home/apw/COD/linux/drivers/gpu/drm/radeon/radeon_gart.c:234 radeon_gart_unbind+0xd2/0xe0 [radeon]() trying to unbind memory from uninitialized GART ! or while resuming it, as in bug 77261: radeon 0000:01:00.0: ring 0 stalled for more than 10158msec radeon 0000:01:00.0: GPU lockup ... radeon 0000:01:00.0: GPU pci config reset pciehp 0000:00:01.0:pcie04: Card not present on Slot(1-1) radeon 0000:01:00.0: GPU reset succeeded, trying to resume *ERROR* radeon: dpm resume failed radeon 0000:01:00.0: Wait for MC idle timedout ! Link: https://bugzilla.kernel.org/show_bug.cgi?id=77261 Link: https://bugzilla.kernel.org/show_bug.cgi?id=79701 Reported-by: Shawn Starr <shawn.starr@rogers.com> Reported-by: Jose P. <lbdkmjdf@sharklasers.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Rajat Jain <rajatxjain@gmail.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> CC: stable@vger.kernel.org # v3.15+
2014-09-11 03:45:01 +08:00
list_for_each_entry(dev, &bus->devices, bus_list) {
if (PCI_SLOT(dev->devfn) == slot->device && dev->ignore_hotplug)
ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk> Reported-and-tested-by: <madcatx@atlas.cz> Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Dave Airlie <airlied@linux.ie> Cc: Takashi Iwai <tiwai@suse.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
2013-12-31 20:39:42 +08:00
return true;
PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device Powering off a hot-pluggable device, e.g., with pci_set_power_state(D3cold), normally generates a hot-remove event that unbinds the driver. Some drivers expect to remain bound to a device even while they power it off and back on again. This can be dangerous, because if the device is removed or replaced while it is powered off, the driver doesn't know that anything changed. But some drivers accept that risk. Add pci_ignore_hotplug() for use by drivers that know their device cannot be removed. Using pci_ignore_hotplug() tells the PCI core that hot-plug events for the device should be ignored. The radeon and nouveau drivers use this to switch between a low-power, integrated GPU and a higher-power, higher-performance discrete GPU. They power off the unused GPU, but they want to remain bound to it. This is a reimplementation of f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") but extends it to work with both acpiphp and pciehp. This fixes a problem where systems with dual GPUs using the radeon drivers become unusable, freezing every few seconds (see bugzillas below). The resume of the radeon device may also fail, e.g., This fixes problems on dual GPU systems where the radeon driver becomes unusable because of problems while suspending the device, as in bug 79701: [drm] radeon: finishing device. radeon 0000:01:00.0: Userspace still has active objects ! radeon 0000:01:00.0: ffff8800cb4ec288 ffff8800cb4ec000 16384 4294967297 force free ... WARNING: CPU: 0 PID: 67 at /home/apw/COD/linux/drivers/gpu/drm/radeon/radeon_gart.c:234 radeon_gart_unbind+0xd2/0xe0 [radeon]() trying to unbind memory from uninitialized GART ! or while resuming it, as in bug 77261: radeon 0000:01:00.0: ring 0 stalled for more than 10158msec radeon 0000:01:00.0: GPU lockup ... radeon 0000:01:00.0: GPU pci config reset pciehp 0000:00:01.0:pcie04: Card not present on Slot(1-1) radeon 0000:01:00.0: GPU reset succeeded, trying to resume *ERROR* radeon: dpm resume failed radeon 0000:01:00.0: Wait for MC idle timedout ! Link: https://bugzilla.kernel.org/show_bug.cgi?id=77261 Link: https://bugzilla.kernel.org/show_bug.cgi?id=79701 Reported-by: Shawn Starr <shawn.starr@rogers.com> Reported-by: Jose P. <lbdkmjdf@sharklasers.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Rajat Jain <rajatxjain@gmail.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> CC: stable@vger.kernel.org # v3.15+
2014-09-11 03:45:01 +08:00
}
ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk> Reported-and-tested-by: <madcatx@atlas.cz> Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Dave Airlie <airlied@linux.ie> Cc: Takashi Iwai <tiwai@suse.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
2013-12-31 20:39:42 +08:00
return false;
}
/**
* get_slot_status - get ACPI slot status
* @slot: ACPI PHP slot
*
* If a slot has _STA for each function and if any one of them
* returned non-zero status, return it.
*
* If a slot doesn't have _STA and if any one of its functions'
* configuration space is configured, return 0x0f as a _STA.
*
* Otherwise return 0.
*/
static unsigned int get_slot_status(struct acpiphp_slot *slot)
{
unsigned long long sta = 0;
struct acpiphp_func *func;
list_for_each_entry(func, &slot->funcs, sibling) {
if (func->flags & FUNC_HAS_STA) {
acpi_status status;
status = acpi_evaluate_integer(func_to_handle(func),
"_STA", NULL, &sta);
if (ACPI_SUCCESS(status) && sta)
break;
} else {
u32 dvid;
pci_bus_read_config_dword(slot->bus,
PCI_DEVFN(slot->device,
func->function),
PCI_VENDOR_ID, &dvid);
if (dvid != 0xffffffff) {
sta = ACPI_STA_ALL;
break;
}
}
}
return (unsigned int)sta;
}
static inline bool device_status_valid(unsigned int sta)
{
/*
* ACPI spec says that _STA may return bit 0 clear with bit 3 set
* if the device is valid but does not require a device driver to be
* loaded (Section 6.3.7 of ACPI 5.0A).
*/
unsigned int mask = ACPI_STA_DEVICE_ENABLED | ACPI_STA_DEVICE_FUNCTIONING;
return (sta & mask) == mask;
}
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
/**
* trim_stale_devices - remove PCI devices that are not responding.
* @dev: PCI device to start walking the hierarchy from.
*/
static void trim_stale_devices(struct pci_dev *dev)
{
struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
struct pci_bus *bus = dev->subordinate;
bool alive = dev->ignore_hotplug;
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
if (adev) {
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
acpi_status status;
unsigned long long sta;
status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
alive = alive || (ACPI_SUCCESS(status) && device_status_valid(sta));
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
}
if (!alive)
alive = pci_device_is_present(dev);
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
if (!alive) {
pci_stop_and_remove_bus_device(dev);
if (adev)
acpi_bus_trim(adev);
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
} else if (bus) {
struct pci_dev *child, *tmp;
/* The device is a bridge. so check the bus below it. */
pm_runtime_get_sync(&dev->dev);
list_for_each_entry_safe_reverse(child, tmp, &bus->devices, bus_list)
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
trim_stale_devices(child);
pm_runtime_put(&dev->dev);
}
}
/**
* acpiphp_check_bridge - re-enumerate devices
* @bridge: where to begin re-enumeration
*
* Iterate over all slots under this bridge and make sure that if a
* card is present they are enabled, and if not they are disabled.
*/
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
{
struct acpiphp_slot *slot;
/* Bail out if the bridge is going away. */
if (bridge->is_going_away)
return;
if (bridge->pci_dev)
pm_runtime_get_sync(&bridge->pci_dev->dev);
list_for_each_entry(slot, &bridge->slots, node) {
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
struct pci_bus *bus = slot->bus;
struct pci_dev *dev, *tmp;
ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk> Reported-and-tested-by: <madcatx@atlas.cz> Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Dave Airlie <airlied@linux.ie> Cc: Takashi Iwai <tiwai@suse.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
2013-12-31 20:39:42 +08:00
if (slot_no_hotplug(slot)) {
; /* do nothing */
} else if (device_status_valid(get_slot_status(slot))) {
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
/* remove stale devices if any */
list_for_each_entry_safe_reverse(dev, tmp,
&bus->devices, bus_list)
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
if (PCI_SLOT(dev->devfn) == slot->device)
trim_stale_devices(dev);
/* configure all functions */
enable_slot(slot);
} else {
disable_slot(slot);
}
}
if (bridge->pci_dev)
pm_runtime_put(&bridge->pci_dev->dev);
}
/*
* Remove devices for which we could not assign resources, call
* arch specific code to fix-up the bus
*/
static void acpiphp_sanitize_bus(struct pci_bus *bus)
{
struct pci_dev *dev, *tmp;
int i;
unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM;
list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) {
for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
struct resource *res = &dev->resource[i];
if ((res->flags & type_mask) && !res->start &&
res->end) {
/* Could not assign a required resources
* for this device, remove it */
pci_stop_and_remove_bus_device(dev);
break;
}
}
}
}
/*
* ACPI event handlers
*/
void acpiphp_check_host_bridge(struct acpi_device *adev)
{
struct acpiphp_bridge *bridge = NULL;
acpi_lock_hp_context();
if (adev->hp) {
bridge = to_acpiphp_root_context(adev->hp)->root_bridge;
if (bridge)
get_bridge(bridge);
}
acpi_unlock_hp_context();
if (bridge) {
pci_lock_rescan_remove();
acpiphp_check_bridge(bridge);
pci_unlock_rescan_remove();
put_bridge(bridge);
}
}
static int acpiphp_disable_and_eject_slot(struct acpiphp_slot *slot);
static void hotplug_event(u32 type, struct acpiphp_context *context)
{
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
acpi_handle handle = context->hp.self->handle;
struct acpiphp_func *func = &context->func;
struct acpiphp_slot *slot = func->slot;
struct acpiphp_bridge *bridge;
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-29 07:40:53 +08:00
acpi_lock_hp_context();
bridge = context->bridge;
if (bridge)
get_bridge(bridge);
acpi_unlock_hp_context();
ACPI / hotplug: Fix concurrency issues and memory leaks This changeset is aimed at fixing a few different but related problems in the ACPI hotplug infrastructure. First of all, since notify handlers may be run in parallel with acpi_bus_scan(), acpi_bus_trim() and acpi_bus_hot_remove_device() and some of them are installed for ACPI handles that have no struct acpi_device objects attached (i.e. before those objects are created), those notify handlers have to take acpi_scan_lock to prevent races from taking place (e.g. a struct acpi_device is found to be present for the given ACPI handle, but right after that it is removed by acpi_bus_trim() running in parallel to the given notify handler). Moreover, since some of them call acpi_bus_scan() and acpi_bus_trim(), this leads to the conclusion that acpi_scan_lock should be acquired by the callers of these two funtions rather by these functions themselves. For these reasons, make all notify handlers that can handle device addition and eject events take acpi_scan_lock and remove the acpi_scan_lock locking from acpi_bus_scan() and acpi_bus_trim(). Accordingly, update all of their users to make sure that they are always called under acpi_scan_lock. Furthermore, since eject operations are carried out asynchronously with respect to the notify events that trigger them, with the help of acpi_bus_hot_remove_device(), even if notify handlers take the ACPI scan lock, it still is possible that, for example, acpi_bus_trim() will run between acpi_bus_hot_remove_device() and the notify handler that scheduled its execution and that acpi_bus_trim() will remove the device node passed to acpi_bus_hot_remove_device() for ejection. In that case, the struct acpi_device object obtained by acpi_bus_hot_remove_device() will be invalid and not-so-funny things will ensue. To protect agaist that, make the users of acpi_bus_hot_remove_device() run get_device() on ACPI device node objects that are about to be passed to it and make acpi_bus_hot_remove_device() run put_device() on them and check if their ACPI handles are not NULL (make acpi_device_unregister() clear the device nodes' ACPI handles for that check to work). Finally, observe that acpi_os_hotplug_execute() actually can fail, in which case its caller ought to free memory allocated for the context object to prevent leaks from happening. It also needs to run put_device() on the device node that it ran get_device() on previously in that case. Modify the code accordingly. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org>
2013-02-13 21:36:47 +08:00
pci_lock_rescan_remove();
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
/* bus re-enumerate */
acpi_handle_debug(handle, "Bus check in %s()\n", __func__);
if (bridge)
acpiphp_check_bridge(bridge);
else if (!(slot->flags & SLOT_IS_GOING_AWAY))
enable_slot(slot);
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-17 04:10:35 +08:00
break;
case ACPI_NOTIFY_DEVICE_CHECK:
/* device check */
acpi_handle_debug(handle, "Device check in %s()\n", __func__);
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 06:07:28 +08:00
if (bridge) {
acpiphp_check_bridge(bridge);
} else if (!(slot->flags & SLOT_IS_GOING_AWAY)) {
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 06:07:28 +08:00
/*
* Check if anything has changed in the slot and rescan
* from the parent if that's the case.
*/
if (acpiphp_rescan_slot(slot))
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 06:07:28 +08:00
acpiphp_check_bridge(func->parent);
}
break;
case ACPI_NOTIFY_EJECT_REQUEST:
/* request device eject */
acpi_handle_debug(handle, "Eject request in %s()\n", __func__);
acpiphp_disable_and_eject_slot(slot);
break;
}
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-29 07:40:53 +08:00
pci_unlock_rescan_remove();
if (bridge)
put_bridge(bridge);
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 17:22:53 +08:00
}
static int acpiphp_hotplug_notify(struct acpi_device *adev, u32 type)
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-29 07:40:53 +08:00
{
struct acpiphp_context *context;
context = acpiphp_grab_context(adev);
if (!context)
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
return -ENODATA;
ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event() If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (child context) Now, if a hotplug notify is dispatched for one of the bridge's children and the timing is such that handle_hotplug_event() for that notify is executed while free_bridge() above is running, the get_bridge(context->func.parent) in handle_hotplug_event() will not really help, because it is too late to prevent the bridge from going away and the child's context may be freed before hotplug_event_work() scheduled from handle_hotplug_event() dereferences the pointer to it passed via the data argument. That will cause a kernel crash to happpen in hotplug_event_work(). To prevent that from happening, make handle_hotplug_event() check the is_going_away flag of the function's parent bridge (under acpiphp_context_lock) and bail out if it's set. Also, make cleanup_bridge() set the bridge's is_going_away flag under acpiphp_context_lock so that it cannot be changed between the check and the subsequent get_bridge(context->func.parent) in handle_hotplug_event(). Then, in the above scenario, handle_hotplug_event() will notice that context->func.parent->is_going_away is already set and it will exit immediately preventing the crash from happening. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-04 05:30:06 +08:00
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
hotplug_event(type, context);
acpiphp_let_context_go(context);
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
return 0;
}
/**
* acpiphp_enumerate_slots - Enumerate PCI slots for a given bus.
* @bus: PCI bus to enumerate the slots for.
*
* A "slot" is an object associated with a PCI device number. All functions
* (PCI devices) with the same bus and device number belong to the same slot.
*/
void acpiphp_enumerate_slots(struct pci_bus *bus)
{
struct acpiphp_bridge *bridge;
struct acpi_device *adev;
acpi_handle handle;
acpi_status status;
if (acpiphp_disabled)
return;
adev = ACPI_COMPANION(bus->bridge);
if (!adev)
return;
handle = adev->handle;
bridge = kzalloc(sizeof(struct acpiphp_bridge), GFP_KERNEL);
if (!bridge) {
acpi_handle_err(handle, "No memory for bridge object\n");
return;
}
INIT_LIST_HEAD(&bridge->slots);
kref_init(&bridge->ref);
bridge->pci_dev = pci_dev_get(bus->self);
bridge->pci_bus = bus;
/*
* Grab a ref to the subordinate PCI bus in case the bus is
* removed via PCI core logical hotplug. The ref pins the bus
* (which we access during module unload).
*/
get_device(&bus->dev);
acpi_lock_hp_context();
if (pci_is_root_bus(bridge->pci_bus)) {
struct acpiphp_root_context *root_context;
root_context = kzalloc(sizeof(*root_context), GFP_KERNEL);
if (!root_context)
goto err;
root_context->root_bridge = bridge;
acpi_set_hp_context(adev, &root_context->hp);
} else {
struct acpiphp_context *context;
/*
* This bridge should have been registered as a hotplug function
* under its parent, so the context should be there, unless the
* parent is going to be handled by pciehp, in which case this
* bridge is not interesting to us either.
*/
ACPI / hotplug / PCI: Consolidate ACPIPHP with ACPI core hotplug The ACPI-based PCI hotplug (ACPIPHP) code currently attaches its hotplug context objects directly to ACPI namespace nodes representing hotplug devices. However, after recent changes causing struct acpi_device to be created for every namespace node representing a device (regardless of its status), that is not necessary any more. Moreover, it's vulnerable to the theoretical issue that the ACPI handle passed in the context between handle_hotplug_event() and hotplug_event_work() may become invalid in the meantime (as a result of a concurrent table unload). In principle, this issue might be addressed by adding a non-empty release handler for ACPIPHP hotplug context objects analogous to acpi_scan_drop_device(), but that would duplicate the code in that function and in acpi_device_del_work_fn(). For this reason, it's better to modify ACPIPHP to attach its device hotplug contexts to struct device objects representing hotplug devices and make it use acpi_hotplug_notify_cb() as its notify handler. At the same time, acpi_device_hotplug() can be modified to dispatch the new .hp.event() callback pointing to acpiphp_hotplug_event() from ACPI device objects associated with PCI devices or use the generic ACPI device hotplug code for device objects with matching scan handlers. This allows the existing code duplication between ACPIPHP and the ACPI core to be reduced too and makes further ACPI-based device hotplug consolidation possible. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-07 00:31:37 +08:00
context = acpiphp_get_context(adev);
if (!context)
goto err;
bridge->context = context;
context->bridge = bridge;
/* Get a reference to the parent bridge. */
get_bridge(context->func.parent);
}
acpi_unlock_hp_context();
/* Must be added to the list prior to calling acpiphp_add_context(). */
mutex_lock(&bridge_mutex);
list_add(&bridge->list, &bridge_list);
mutex_unlock(&bridge_mutex);
/* register all slot objects under this bridge */
status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
acpiphp_add_context, NULL, bridge, NULL);
if (ACPI_FAILURE(status)) {
acpi_handle_err(handle, "failed to register slots\n");
cleanup_bridge(bridge);
put_bridge(bridge);
}
return;
err:
acpi_unlock_hp_context();
put_device(&bus->dev);
pci_dev_put(bridge->pci_dev);
kfree(bridge);
}
static void acpiphp_drop_bridge(struct acpiphp_bridge *bridge)
{
if (pci_is_root_bus(bridge->pci_bus)) {
struct acpiphp_root_context *root_context;
struct acpi_device *adev;
acpi_lock_hp_context();
adev = ACPI_COMPANION(bridge->pci_bus->bridge);
root_context = to_acpiphp_root_context(adev->hp);
adev->hp = NULL;
acpi_unlock_hp_context();
kfree(root_context);
}
cleanup_bridge(bridge);
put_bridge(bridge);
}
/**
* acpiphp_remove_slots - Remove slot objects associated with a given bus.
* @bus: PCI bus to remove the slot objects for.
*/
void acpiphp_remove_slots(struct pci_bus *bus)
{
struct acpiphp_bridge *bridge;
if (acpiphp_disabled)
return;
mutex_lock(&bridge_mutex);
list_for_each_entry(bridge, &bridge_list, list)
if (bridge->pci_bus == bus) {
mutex_unlock(&bridge_mutex);
acpiphp_drop_bridge(bridge);
return;
}
mutex_unlock(&bridge_mutex);
}
/**
* acpiphp_enable_slot - power on slot
* @slot: ACPI PHP slot
*/
int acpiphp_enable_slot(struct acpiphp_slot *slot)
{
pci_lock_rescan_remove();
if (slot->flags & SLOT_IS_GOING_AWAY) {
pci_unlock_rescan_remove();
return -ENODEV;
}
/* configure all functions */
if (!(slot->flags & SLOT_ENABLED))
enable_slot(slot);
pci_unlock_rescan_remove();
return 0;
}
/**
* acpiphp_disable_and_eject_slot - power off and eject slot
* @slot: ACPI PHP slot
*/
static int acpiphp_disable_and_eject_slot(struct acpiphp_slot *slot)
{
struct acpiphp_func *func;
if (slot->flags & SLOT_IS_GOING_AWAY)
return -ENODEV;
/* unconfigure all functions */
disable_slot(slot);
list_for_each_entry(func, &slot->funcs, sibling)
if (func->flags & FUNC_HAS_EJ0) {
acpi_handle handle = func_to_handle(func);
if (ACPI_FAILURE(acpi_evaluate_ej0(handle)))
acpi_handle_err(handle, "_EJ0 failed\n");
break;
}
return 0;
}
int acpiphp_disable_slot(struct acpiphp_slot *slot)
{
int ret;
/*
* Acquire acpi_scan_lock to ensure that the execution of _EJ0 in
* acpiphp_disable_and_eject_slot() will be synchronized properly.
*/
acpi_scan_lock_acquire();
pci_lock_rescan_remove();
ret = acpiphp_disable_and_eject_slot(slot);
pci_unlock_rescan_remove();
acpi_scan_lock_release();
return ret;
}
/*
* slot enabled: 1
* slot disabled: 0
*/
u8 acpiphp_get_power_status(struct acpiphp_slot *slot)
{
return (slot->flags & SLOT_ENABLED);
}
/*
* latch open: 1
* latch closed: 0
*/
u8 acpiphp_get_latch_status(struct acpiphp_slot *slot)
{
return !(get_slot_status(slot) & ACPI_STA_DEVICE_UI);
}
/*
* adapter presence : 1
* absence : 0
*/
u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot)
{
return !!get_slot_status(slot);
}