mirror of https://gitee.com/openkylin/linux.git
NVMe: Device resume error handling
Adds controller error handling on resume power management. If the device fails to initialize, the device is queued for a reset. If the reset fails, a thread is spawned to remove the pci device. If the device resumes as "busy", the device is responding to admin commands but will not create IO queues. In this case, we need to remove the gendisks and free the IO queues since they can't be used and may be holding bios in their lists. From testing, the dma pools require a pci device so this had to change the pci driver 'remove' to release the dma resources in line with that call instead of after all references to the device are released. Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
This commit is contained in:
parent
68608c268b
commit
9a6b94584d
|
@ -58,6 +58,7 @@ module_param(use_threaded_interrupts, int, 0);
|
|||
static DEFINE_SPINLOCK(dev_list_lock);
|
||||
static LIST_HEAD(dev_list);
|
||||
static struct task_struct *nvme_thread;
|
||||
static struct workqueue_struct *nvme_workq;
|
||||
|
||||
/*
|
||||
* An NVM Express queue. Each device has at least two (one for admin
|
||||
|
@ -1968,7 +1969,6 @@ static int nvme_dev_map(struct nvme_dev *dev)
|
|||
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
|
||||
goto disable;
|
||||
|
||||
pci_set_drvdata(pdev, dev);
|
||||
dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
|
||||
if (!dev->bar)
|
||||
goto disable;
|
||||
|
@ -1995,9 +1995,9 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
|
|||
if (dev->bar) {
|
||||
iounmap(dev->bar);
|
||||
dev->bar = NULL;
|
||||
pci_release_regions(dev->pci_dev);
|
||||
}
|
||||
|
||||
pci_release_regions(dev->pci_dev);
|
||||
if (pci_is_enabled(dev->pci_dev))
|
||||
pci_disable_device(dev->pci_dev);
|
||||
}
|
||||
|
@ -2085,11 +2085,6 @@ static void nvme_release_instance(struct nvme_dev *dev)
|
|||
static void nvme_free_dev(struct kref *kref)
|
||||
{
|
||||
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
|
||||
nvme_dev_remove(dev);
|
||||
nvme_dev_shutdown(dev);
|
||||
nvme_free_queues(dev);
|
||||
nvme_release_instance(dev);
|
||||
nvme_release_prp_pools(dev);
|
||||
kfree(dev->queues);
|
||||
kfree(dev->entry);
|
||||
kfree(dev);
|
||||
|
@ -2161,6 +2156,70 @@ static int nvme_dev_start(struct nvme_dev *dev)
|
|||
return result;
|
||||
}
|
||||
|
||||
static int nvme_remove_dead_ctrl(void *arg)
|
||||
{
|
||||
struct nvme_dev *dev = (struct nvme_dev *)arg;
|
||||
struct pci_dev *pdev = dev->pci_dev;
|
||||
|
||||
if (pci_get_drvdata(pdev))
|
||||
pci_stop_and_remove_bus_device(pdev);
|
||||
kref_put(&dev->kref, nvme_free_dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvme_remove_disks(struct work_struct *ws)
|
||||
{
|
||||
int i;
|
||||
struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
|
||||
|
||||
nvme_dev_remove(dev);
|
||||
spin_lock(&dev_list_lock);
|
||||
for (i = dev->queue_count - 1; i > 0; i--) {
|
||||
BUG_ON(!dev->queues[i] || !dev->queues[i]->q_suspended);
|
||||
nvme_free_queue(dev->queues[i]);
|
||||
dev->queue_count--;
|
||||
dev->queues[i] = NULL;
|
||||
}
|
||||
spin_unlock(&dev_list_lock);
|
||||
}
|
||||
|
||||
static int nvme_dev_resume(struct nvme_dev *dev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = nvme_dev_start(dev);
|
||||
if (ret && ret != -EBUSY)
|
||||
return ret;
|
||||
if (ret == -EBUSY) {
|
||||
spin_lock(&dev_list_lock);
|
||||
INIT_WORK(&dev->reset_work, nvme_remove_disks);
|
||||
queue_work(nvme_workq, &dev->reset_work);
|
||||
spin_unlock(&dev_list_lock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvme_dev_reset(struct nvme_dev *dev)
|
||||
{
|
||||
nvme_dev_shutdown(dev);
|
||||
if (nvme_dev_resume(dev)) {
|
||||
dev_err(&dev->pci_dev->dev, "Device failed to resume\n");
|
||||
kref_get(&dev->kref);
|
||||
if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
|
||||
dev->instance))) {
|
||||
dev_err(&dev->pci_dev->dev,
|
||||
"Failed to start controller remove task\n");
|
||||
kref_put(&dev->kref, nvme_free_dev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_reset_failed_dev(struct work_struct *ws)
|
||||
{
|
||||
struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
|
||||
nvme_dev_reset(dev);
|
||||
}
|
||||
|
||||
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
{
|
||||
int result = -ENOMEM;
|
||||
|
@ -2180,7 +2239,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||
|
||||
INIT_LIST_HEAD(&dev->namespaces);
|
||||
dev->pci_dev = pdev;
|
||||
|
||||
pci_set_drvdata(pdev, dev);
|
||||
result = nvme_set_instance(dev);
|
||||
if (result)
|
||||
goto free;
|
||||
|
@ -2232,7 +2291,19 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||
static void nvme_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
||||
|
||||
spin_lock(&dev_list_lock);
|
||||
list_del_init(&dev->node);
|
||||
spin_unlock(&dev_list_lock);
|
||||
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
flush_work(&dev->reset_work);
|
||||
misc_deregister(&dev->miscdev);
|
||||
nvme_dev_remove(dev);
|
||||
nvme_dev_shutdown(dev);
|
||||
nvme_free_queues(dev);
|
||||
nvme_release_instance(dev);
|
||||
nvme_release_prp_pools(dev);
|
||||
kref_put(&dev->kref, nvme_free_dev);
|
||||
}
|
||||
|
||||
|
@ -2256,13 +2327,12 @@ static int nvme_resume(struct device *dev)
|
|||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct nvme_dev *ndev = pci_get_drvdata(pdev);
|
||||
int ret;
|
||||
|
||||
ret = nvme_dev_start(ndev);
|
||||
/* XXX: should remove gendisks if resume fails */
|
||||
if (ret)
|
||||
nvme_free_queues(ndev);
|
||||
return ret;
|
||||
if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
|
||||
INIT_WORK(&ndev->reset_work, nvme_reset_failed_dev);
|
||||
queue_work(nvme_workq, &ndev->reset_work);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
|
||||
|
@ -2303,9 +2373,14 @@ static int __init nvme_init(void)
|
|||
if (IS_ERR(nvme_thread))
|
||||
return PTR_ERR(nvme_thread);
|
||||
|
||||
result = -ENOMEM;
|
||||
nvme_workq = create_singlethread_workqueue("nvme");
|
||||
if (!nvme_workq)
|
||||
goto kill_kthread;
|
||||
|
||||
result = register_blkdev(nvme_major, "nvme");
|
||||
if (result < 0)
|
||||
goto kill_kthread;
|
||||
goto kill_workq;
|
||||
else if (result > 0)
|
||||
nvme_major = result;
|
||||
|
||||
|
@ -2316,6 +2391,8 @@ static int __init nvme_init(void)
|
|||
|
||||
unregister_blkdev:
|
||||
unregister_blkdev(nvme_major, "nvme");
|
||||
kill_workq:
|
||||
destroy_workqueue(nvme_workq);
|
||||
kill_kthread:
|
||||
kthread_stop(nvme_thread);
|
||||
return result;
|
||||
|
@ -2325,6 +2402,7 @@ static void __exit nvme_exit(void)
|
|||
{
|
||||
pci_unregister_driver(&nvme_driver);
|
||||
unregister_blkdev(nvme_major, "nvme");
|
||||
destroy_workqueue(nvme_workq);
|
||||
kthread_stop(nvme_thread);
|
||||
}
|
||||
|
||||
|
|
|
@ -87,6 +87,7 @@ struct nvme_dev {
|
|||
struct list_head namespaces;
|
||||
struct kref kref;
|
||||
struct miscdevice miscdev;
|
||||
struct work_struct reset_work;
|
||||
char name[12];
|
||||
char serial[20];
|
||||
char model[40];
|
||||
|
|
Loading…
Reference in New Issue