From 94d22763207ac6633612b8d8e0ca4fba0f7aa139 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 12 May 2021 01:06:40 -0700 Subject: [PATCH 1/2] PCI: hv: Fix a race condition when removing the device On removing the device, any work item (hv_pci_devices_present() or hv_pci_eject_device()) scheduled on workqueue hbus->wq may still be running and race with hv_pci_remove(). This can happen because the host may send PCI_EJECT or PCI_BUS_RELATIONS(2) and decide to rescind the channel immediately after that. Fix this by flushing/destroying the workqueue of hbus before doing hbus remove. Link: https://lore.kernel.org/r/1620806800-30983-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Long Li Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley --- drivers/pci/controller/pci-hyperv.c | 30 ++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 6511648271b2..272c63ac49f9 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -444,7 +444,6 @@ enum hv_pcibus_state { hv_pcibus_probed, hv_pcibus_installed, hv_pcibus_removing, - hv_pcibus_removed, hv_pcibus_maximum }; @@ -3243,8 +3242,9 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) struct pci_packet teardown_packet; u8 buffer[sizeof(struct pci_message)]; } pkt; - struct hv_dr_state *dr; struct hv_pci_compl comp_pkt; + struct hv_pci_dev *hpdev, *tmp; + unsigned long flags; int ret; /* @@ -3256,9 +3256,16 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) if (!keep_devs) { /* Delete any children which might still exist. */ - dr = kzalloc(sizeof(*dr), GFP_KERNEL); - if (dr && hv_pci_start_relations_work(hbus, dr)) - kfree(dr); + spin_lock_irqsave(&hbus->device_list_lock, flags); + list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) { + list_del(&hpdev->list_entry); + if (hpdev->pci_slot) + pci_destroy_slot(hpdev->pci_slot); + /* For the two refs got in new_pcichild_device() */ + put_pcichild(hpdev); + put_pcichild(hpdev); + } + spin_unlock_irqrestore(&hbus->device_list_lock, flags); } ret = hv_send_resources_released(hdev); @@ -3301,13 +3308,23 @@ static int hv_pci_remove(struct hv_device *hdev) hbus = hv_get_drvdata(hdev); if (hbus->state == hv_pcibus_installed) { + tasklet_disable(&hdev->channel->callback_event); + hbus->state = hv_pcibus_removing; + tasklet_enable(&hdev->channel->callback_event); + destroy_workqueue(hbus->wq); + hbus->wq = NULL; + /* + * At this point, no work is running or can be scheduled + * on hbus-wq. We can't race with hv_pci_devices_present() + * or hv_pci_eject_device(), it's safe to proceed. + */ + /* Remove the bus from PCI's point of view. */ pci_lock_rescan_remove(); pci_stop_root_bus(hbus->pci_bus); hv_pci_remove_slots(hbus); pci_remove_root_bus(hbus->pci_bus); pci_unlock_rescan_remove(); - hbus->state = hv_pcibus_removed; } ret = hv_pci_bus_exit(hdev, false); @@ -3322,7 +3339,6 @@ static int hv_pci_remove(struct hv_device *hdev) irq_domain_free_fwnode(hbus->sysdata.fwnode); put_hvpcibus(hbus); wait_for_completion(&hbus->remove_event); - destroy_workqueue(hbus->wq); hv_put_dom_num(hbus->sysdata.domain); From 326dc2e1e59a98c61c3c71616496422af522678c Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 12 May 2021 01:06:49 -0700 Subject: [PATCH 2/2] PCI: hv: Remove bus device removal unused refcount/functions With the new method of flushing/stopping the workqueue before doing bus removal, the old mechanism of using refcount and wait for completion is no longer needed. Remove those dead code. Link: https://lore.kernel.org/r/1620806809-31055-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Long Li [lorenzo.pieralisi@arm.com: Reworded subject] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley --- drivers/pci/controller/pci-hyperv.c | 34 +++-------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 272c63ac49f9..8807b6247e9e 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -452,7 +452,6 @@ struct hv_pcibus_device { /* Protocol version negotiated with the host */ enum pci_protocol_version_t protocol_version; enum hv_pcibus_state state; - refcount_t remove_lock; struct hv_device *hdev; resource_size_t low_mmio_space; resource_size_t high_mmio_space; @@ -460,7 +459,6 @@ struct hv_pcibus_device { struct resource *low_mmio_res; struct resource *high_mmio_res; struct completion *survey_event; - struct completion remove_event; struct pci_bus *pci_bus; spinlock_t config_lock; /* Avoid two threads writing index page */ spinlock_t device_list_lock; /* Protect lists below */ @@ -592,9 +590,6 @@ static void put_pcichild(struct hv_pci_dev *hpdev) kfree(hpdev); } -static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus); -static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus); - /* * There is no good way to get notified from vmbus_onoffer_rescind(), * so let's use polling here, since this is not a hot path. @@ -2063,10 +2058,8 @@ static void pci_devices_present_work(struct work_struct *work) } spin_unlock_irqrestore(&hbus->device_list_lock, flags); - if (!dr) { - put_hvpcibus(hbus); + if (!dr) return; - } /* First, mark all existing children as reported missing. */ spin_lock_irqsave(&hbus->device_list_lock, flags); @@ -2149,7 +2142,6 @@ static void pci_devices_present_work(struct work_struct *work) break; } - put_hvpcibus(hbus); kfree(dr); } @@ -2190,12 +2182,10 @@ static int hv_pci_start_relations_work(struct hv_pcibus_device *hbus, list_add_tail(&dr->list_entry, &hbus->dr_list); spin_unlock_irqrestore(&hbus->device_list_lock, flags); - if (pending_dr) { + if (pending_dr) kfree(dr_wrk); - } else { - get_hvpcibus(hbus); + else queue_work(hbus->wq, &dr_wrk->wrk); - } return 0; } @@ -2338,8 +2328,6 @@ static void hv_eject_device_work(struct work_struct *work) put_pcichild(hpdev); put_pcichild(hpdev); /* hpdev has been freed. Do not use it any more. */ - - put_hvpcibus(hbus); } /** @@ -2363,7 +2351,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev) hpdev->state = hv_pcichild_ejecting; get_pcichild(hpdev); INIT_WORK(&hpdev->wrk, hv_eject_device_work); - get_hvpcibus(hbus); queue_work(hbus->wq, &hpdev->wrk); } @@ -2963,17 +2950,6 @@ static int hv_send_resources_released(struct hv_device *hdev) return 0; } -static void get_hvpcibus(struct hv_pcibus_device *hbus) -{ - refcount_inc(&hbus->remove_lock); -} - -static void put_hvpcibus(struct hv_pcibus_device *hbus) -{ - if (refcount_dec_and_test(&hbus->remove_lock)) - complete(&hbus->remove_event); -} - #define HVPCI_DOM_MAP_SIZE (64 * 1024) static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE); @@ -3093,14 +3069,12 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->sysdata.domain = dom; hbus->hdev = hdev; - refcount_set(&hbus->remove_lock, 1); INIT_LIST_HEAD(&hbus->children); INIT_LIST_HEAD(&hbus->dr_list); INIT_LIST_HEAD(&hbus->resources_for_children); spin_lock_init(&hbus->config_lock); spin_lock_init(&hbus->device_list_lock); spin_lock_init(&hbus->retarget_msi_interrupt_lock); - init_completion(&hbus->remove_event); hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, hbus->sysdata.domain); if (!hbus->wq) { @@ -3337,8 +3311,6 @@ static int hv_pci_remove(struct hv_device *hdev) hv_pci_free_bridge_windows(hbus); irq_domain_remove(hbus->irq_domain); irq_domain_free_fwnode(hbus->sysdata.fwnode); - put_hvpcibus(hbus); - wait_for_completion(&hbus->remove_event); hv_put_dom_num(hbus->sysdata.domain);