From 5aba6c47401d14247a44c3ae94d1bdab5653f692 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 4 Mar 2019 11:07:37 +0200 Subject: [PATCH 01/45] iommu/vt-d: Switch to bitmap_zalloc() Switch to bitmap_zalloc() to show clearly what we are allocating. Besides that it returns pointer of bitmap type instead of opaque void *. Signed-off-by: Andy Shevchenko Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 2d74641b7f7b..634d8f059019 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -548,8 +548,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) goto out_free_table; } - bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES), - sizeof(long), GFP_ATOMIC); + bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_ATOMIC); if (bitmap == NULL) { pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id); goto out_free_pages; @@ -616,7 +615,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) return 0; out_free_bitmap: - kfree(bitmap); + bitmap_free(bitmap); out_free_pages: __free_pages(pages, INTR_REMAP_PAGE_ORDER); out_free_table: @@ -640,7 +639,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu) } free_pages((unsigned long)iommu->ir_table->base, INTR_REMAP_PAGE_ORDER); - kfree(iommu->ir_table->bitmap); + bitmap_free(iommu->ir_table->bitmap); kfree(iommu->ir_table); iommu->ir_table = NULL; } From 8cec63e52966b6c1242f8323535532d791d440e8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 20 Mar 2019 09:40:24 +0800 Subject: [PATCH 02/45] iommu: Remove iommu_callback_data The iommu_callback_data is not used anywhere, remove it to make the code more concise. Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 33a982e33716..1164b9926a2b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -45,10 +45,6 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; #endif static bool iommu_dma_strict __read_mostly = true; -struct iommu_callback_data { - const struct iommu_ops *ops; -}; - struct iommu_group { struct kobject kobj; struct kobject *devices_kobj; @@ -1215,9 +1211,6 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) { int err; struct notifier_block *nb; - struct iommu_callback_data cb = { - .ops = ops, - }; nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); if (!nb) @@ -1229,7 +1222,7 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) if (err) goto out_free; - err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group); + err = bus_for_each_dev(bus, NULL, NULL, add_iommu_group); if (err) goto out_err; @@ -1238,7 +1231,7 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) out_err: /* Clean up */ - bus_for_each_dev(bus, NULL, &cb, remove_iommu_group); + bus_for_each_dev(bus, NULL, NULL, remove_iommu_group); bus_unregister_notifier(bus, nb); out_free: From 954a03be033c7cef80ddc232e7cbdb17df735663 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 1 Mar 2019 11:20:17 -0800 Subject: [PATCH 03/45] iommu/arm-smmu: Break insecure users by disabling bypass by default If you're bisecting why your peripherals stopped working, it's probably this CL. Specifically if you see this in your dmesg: Unexpected global fault, this could be serious ...then it's almost certainly this CL. Running your IOMMU-enabled peripherals with the IOMMU in bypass mode is insecure and effectively disables the protection they provide. There are few reasons to allow unmatched stream bypass, and even fewer good ones. This patch starts the transition over to make it much harder to run your system insecurely. Expected steps: 1. By default disable bypass (so anyone insecure will notice) but make it easy for someone to re-enable bypass with just a KConfig change. That's this patch. 2. After people have had a little time to come to grips with the fact that they need to set their IOMMUs properly and have had time to dig into how to do this, the KConfig will be eliminated and bypass will simply be disabled. Folks who are truly upset and still haven't fixed their system can either figure out how to add 'arm-smmu.disable_bypass=n' to their command line or revert the patch in their own private kernel. Of course these folks will be less secure. Suggested-by: Robin Murphy Reviewed-by: Marc Gonzalez Tested-by: Marc Gonzalez Signed-off-by: Douglas Anderson Signed-off-by: Will Deacon --- drivers/iommu/Kconfig | 25 +++++++++++++++++++++++++ drivers/iommu/arm-smmu.c | 3 ++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 6f07f3b21816..15b831113ded 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -359,6 +359,31 @@ config ARM_SMMU Say Y here if your SoC includes an IOMMU device implementing the ARM SMMU architecture. +config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT + bool "Default to disabling bypass on ARM SMMU v1 and v2" + depends on ARM_SMMU + default y + help + Say Y here to (by default) disable bypass streams such that + incoming transactions from devices that are not attached to + an iommu domain will report an abort back to the device and + will not be allowed to pass through the SMMU. + + Any old kernels that existed before this KConfig was + introduced would default to _allowing_ bypass (AKA the + equivalent of NO for this config). However the default for + this option is YES because the old behavior is insecure. + + There are few reasons to allow unmatched stream bypass, and + even fewer good ones. If saying YES here breaks your board + you should work on fixing your board. This KConfig option + is expected to be removed in the future and we'll simply + hardcode the bypass disable in the code. + + NOTE: the kernel command line parameter + 'arm-smmu.disable_bypass' will continue to override this + config. + config ARM_SMMU_V3 bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support" depends on ARM64 diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 045d93884164..930c07635956 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -110,7 +110,8 @@ static int force_stage; module_param(force_stage, int, S_IRUGO); MODULE_PARM_DESC(force_stage, "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation."); -static bool disable_bypass; +static bool disable_bypass = + IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT); module_param(disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU."); From 43a0541e312f7136e081e6bf58f6c8a2e9672688 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 7 Mar 2019 01:50:07 +0300 Subject: [PATCH 04/45] iommu/tegra-smmu: Fix invalid ASID bits on Tegra30/114 Both Tegra30 and Tegra114 have 4 ASID's and the corresponding bitfield of the TLB_FLUSH register differs from later Tegra generations that have 128 ASID's. In a result the PTE's are now flushed correctly from TLB and this fixes problems with graphics (randomly failing tests) on Tegra30. Cc: stable Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 5182c7d6171e..8d30653cd13a 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -102,7 +102,6 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) #define SMMU_TLB_FLUSH_VA_MATCH_ALL (0 << 0) #define SMMU_TLB_FLUSH_VA_MATCH_SECTION (2 << 0) #define SMMU_TLB_FLUSH_VA_MATCH_GROUP (3 << 0) -#define SMMU_TLB_FLUSH_ASID(x) (((x) & 0x7f) << 24) #define SMMU_TLB_FLUSH_VA_SECTION(addr) ((((addr) & 0xffc00000) >> 12) | \ SMMU_TLB_FLUSH_VA_MATCH_SECTION) #define SMMU_TLB_FLUSH_VA_GROUP(addr) ((((addr) & 0xffffc000) >> 12) | \ @@ -205,8 +204,12 @@ static inline void smmu_flush_tlb_asid(struct tegra_smmu *smmu, { u32 value; - value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | - SMMU_TLB_FLUSH_VA_MATCH_ALL; + if (smmu->soc->num_asids == 4) + value = (asid & 0x3) << 29; + else + value = (asid & 0x7f) << 24; + + value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_MATCH_ALL; smmu_writel(smmu, value, SMMU_TLB_FLUSH); } @@ -216,8 +219,12 @@ static inline void smmu_flush_tlb_section(struct tegra_smmu *smmu, { u32 value; - value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | - SMMU_TLB_FLUSH_VA_SECTION(iova); + if (smmu->soc->num_asids == 4) + value = (asid & 0x3) << 29; + else + value = (asid & 0x7f) << 24; + + value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_SECTION(iova); smmu_writel(smmu, value, SMMU_TLB_FLUSH); } @@ -227,8 +234,12 @@ static inline void smmu_flush_tlb_group(struct tegra_smmu *smmu, { u32 value; - value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | - SMMU_TLB_FLUSH_VA_GROUP(iova); + if (smmu->soc->num_asids == 4) + value = (asid & 0x3) << 29; + else + value = (asid & 0x7f) << 24; + + value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_GROUP(iova); smmu_writel(smmu, value, SMMU_TLB_FLUSH); } From 4f97031ff8605c1e8388a650bc5708ceda4357d5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 7 Mar 2019 01:50:08 +0300 Subject: [PATCH 05/45] iommu/tegra-smmu: Properly release domain resources Release all memory allocations associated with a released domain and emit warning if domain is in-use at the time of destruction. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 8d30653cd13a..27b1249f0773 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -327,6 +327,9 @@ static void tegra_smmu_domain_free(struct iommu_domain *domain) /* TODO: free page directory and page tables */ + WARN_ON_ONCE(as->use_count); + kfree(as->count); + kfree(as->pts); kfree(as); } From 43d957b1334670a9b232f9a4a027d51e05155c83 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 7 Mar 2019 01:50:09 +0300 Subject: [PATCH 06/45] iommu/tegra-smmu: Respect IOMMU API read-write protections Set PTE read/write attributes accordingly to the the protections requested by IOMMU API. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 27b1249f0773..463ee08f7d3a 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -145,8 +145,6 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) #define SMMU_PDE_ATTR (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \ SMMU_PDE_NONSECURE) -#define SMMU_PTE_ATTR (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \ - SMMU_PTE_NONSECURE) static unsigned int iova_pd_index(unsigned long iova) { @@ -659,6 +657,7 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, { struct tegra_smmu_as *as = to_smmu_as(domain); dma_addr_t pte_dma; + u32 pte_attrs; u32 *pte; pte = as_get_pte(as, iova, &pte_dma); @@ -669,8 +668,16 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, if (*pte == 0) tegra_smmu_pte_get_use(as, iova); + pte_attrs = SMMU_PTE_NONSECURE; + + if (prot & IOMMU_READ) + pte_attrs |= SMMU_PTE_READABLE; + + if (prot & IOMMU_WRITE) + pte_attrs |= SMMU_PTE_WRITABLE; + tegra_smmu_set_pte(as, iova, pte, pte_dma, - __phys_to_pfn(paddr) | SMMU_PTE_ATTR); + __phys_to_pfn(paddr) | pte_attrs); return 0; } From 14bd9a607f9082e7b5690c27e69072f2aeae0de4 Mon Sep 17 00:00:00 2001 From: Jinyu Qi Date: Wed, 3 Apr 2019 16:35:21 +0800 Subject: [PATCH 07/45] iommu/iova: Separate atomic variables to improve performance In struct iova_domain, there are three atomic variables, the former two are about TLB flush counters which use atomic_add operation, anoter is used to flush timer that use cmpxhg operation. These variables are in the same cache line, so it will cause some performance loss under the condition that many cores call queue_iova function, Let's isolate the two type atomic variables to different cache line to reduce cache line conflict. Cc: Joerg Roedel Signed-off-by: Jinyu Qi Signed-off-by: Joerg Roedel --- include/linux/iova.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/iova.h b/include/linux/iova.h index 0b93bf96693e..28a5128405f8 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -76,6 +76,14 @@ struct iova_domain { unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; unsigned long max32_alloc_size; /* Size of last failed allocation */ + struct iova_fq __percpu *fq; /* Flush Queue */ + + atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that + have been started */ + + atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that + have been finished */ + struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ @@ -85,14 +93,6 @@ struct iova_domain { iova_entry_dtor entry_dtor; /* IOMMU driver specific destructor for iova entry */ - struct iova_fq __percpu *fq; /* Flush Queue */ - - atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that - have been started */ - - atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that - have been finished */ - struct timer_list fq_timer; /* Timer to regularily empty the flush-queues */ atomic_t fq_timer_on; /* 1 when timer is active, 0 From a3a195929d40b38833ffd0f82b2db2cc898641eb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 25 Mar 2019 09:30:28 +0800 Subject: [PATCH 08/45] iommu: Add APIs for multiple domains per device Sharing a physical PCI device in a finer-granularity way is becoming a consensus in the industry. IOMMU vendors are also engaging efforts to support such sharing as well as possible. Among the efforts, the capability of support finer-granularity DMA isolation is a common requirement due to the security consideration. With finer-granularity DMA isolation, subsets of a PCI function can be isolated from each others by the IOMMU. As a result, there is a request in software to attach multiple domains to a physical PCI device. One example of such use model is the Intel Scalable IOV [1] [2]. The Intel vt-d 3.0 spec [3] introduces the scalable mode which enables PASID granularity DMA isolation. This adds the APIs to support multiple domains per device. In order to ease the discussions, we call it 'a domain in auxiliary mode' or simply 'auxiliary domain' when multiple domains are attached to a physical device. The APIs include: * iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX) - Detect both IOMMU and PCI endpoint devices supporting the feature (aux-domain here) without the host driver dependency. * iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) - Check the enabling status of the feature (aux-domain here). The aux-domain interfaces are available only if this returns true. * iommu_dev_enable/disable_feature(dev, IOMMU_DEV_FEAT_AUX) - Enable/disable device specific aux-domain feature. * iommu_aux_attach_device(domain, dev) - Attaches @domain to @dev in the auxiliary mode. Multiple domains could be attached to a single device in the auxiliary mode with each domain representing an isolated address space for an assignable subset of the device. * iommu_aux_detach_device(domain, dev) - Detach @domain which has been attached to @dev in the auxiliary mode. * iommu_aux_get_pasid(domain, dev) - Return ID used for finer-granularity DMA translation. For the Intel Scalable IOV usage model, this will be a PASID. The device which supports Scalable IOV needs to write this ID to the device register so that DMA requests could be tagged with a right PASID prefix. This has been updated with the latest proposal from Joerg posted here [5]. Many people involved in discussions of this design. Kevin Tian Liu Yi L Ashok Raj Sanjay Kumar Jacob Pan Alex Williamson Jean-Philippe Brucker Joerg Roedel and some discussions can be found here [4] [5]. [1] https://software.intel.com/en-us/download/intel-scalable-io-virtualization-technical-specification [2] https://schd.ws/hosted_files/lc32018/00/LC3-SIOV-final.pdf [3] https://software.intel.com/en-us/download/intel-virtualization-technology-for-directed-io-architecture-specification [4] https://lkml.org/lkml/2018/7/26/4 [5] https://www.spinics.net/lists/iommu/msg31874.html Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Suggested-by: Kevin Tian Suggested-by: Jean-Philippe Brucker Suggested-by: Joerg Roedel Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 96 +++++++++++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 70 +++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 109de67d5d72..344e27e8f188 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2039,3 +2039,99 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) return 0; } EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); + +/* + * Per device IOMMU features. + */ +bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_has_feat) + return ops->dev_has_feat(dev, feat); + + return false; +} +EXPORT_SYMBOL_GPL(iommu_dev_has_feature); + +int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_enable_feat) + return ops->dev_enable_feat(dev, feat); + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); + +/* + * The device drivers should do the necessary cleanups before calling this. + * For example, before disabling the aux-domain feature, the device driver + * should detach all aux-domains. Otherwise, this will return -EBUSY. + */ +int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_disable_feat) + return ops->dev_disable_feat(dev, feat); + + return -EBUSY; +} +EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); + +bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_feat_enabled) + return ops->dev_feat_enabled(dev, feat); + + return false; +} +EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled); + +/* + * Aux-domain specific attach/detach. + * + * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns + * true. Also, as long as domains are attached to a device through this + * interface, any tries to call iommu_attach_device() should fail + * (iommu_detach_device() can't fail, so we fail when trying to re-attach). + * This should make us safe against a device being attached to a guest as a + * whole while there are still pasid users on it (aux and sva). + */ +int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) +{ + int ret = -ENODEV; + + if (domain->ops->aux_attach_dev) + ret = domain->ops->aux_attach_dev(domain, dev); + + if (!ret) + trace_attach_device_to_domain(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_aux_attach_device); + +void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) +{ + if (domain->ops->aux_detach_dev) { + domain->ops->aux_detach_dev(domain, dev); + trace_detach_device_from_domain(dev); + } +} +EXPORT_SYMBOL_GPL(iommu_aux_detach_device); + +int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) +{ + int ret = -ENODEV; + + if (domain->ops->aux_get_pasid) + ret = domain->ops->aux_get_pasid(domain, dev); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ffbbc7e39cee..8239ece9fdfc 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -156,6 +156,11 @@ struct iommu_resv_region { enum iommu_resv_type type; }; +/* Per device IOMMU features */ +enum iommu_dev_features { + IOMMU_DEV_FEAT_AUX, /* Aux-domain feature */ +}; + #ifdef CONFIG_IOMMU_API /** @@ -186,6 +191,11 @@ struct iommu_resv_region { * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) + * @dev_has/enable/disable_feat: per device entries to check/enable/disable + * iommu specific features. + * @dev_feat_enabled: check enabled feature + * @aux_attach/detach_dev: aux-domain specific attach/detach entries. + * @aux_get_pasid: get the pasid given an aux-domain * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { @@ -230,6 +240,17 @@ struct iommu_ops { int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); + /* Per device IOMMU features */ + bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); + bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); + int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); + int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); + + /* Aux-domain specific attach/detach entries */ + int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev); + void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); + unsigned long pgsize_bitmap; }; @@ -416,6 +437,14 @@ static inline void dev_iommu_fwspec_set(struct device *dev, int iommu_probe_device(struct device *dev); void iommu_release_device(struct device *dev); +bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features f); +int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); +int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); +bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); +int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); +void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); +int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -700,6 +729,47 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) return NULL; } +static inline bool +iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) +{ + return false; +} + +static inline bool +iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) +{ + return false; +} + +static inline int +iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) +{ + return -ENODEV; +} + +static inline int +iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) +{ + return -ENODEV; +} + +static inline int +iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) +{ + return -ENODEV; +} + +static inline void +iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) +{ +} + +static inline int +iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_API */ #ifdef CONFIG_IOMMU_DEBUGFS From 26b25a2b98e45aeb40eedcedc586ad5034cbd984 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 10 Apr 2019 16:15:16 +0100 Subject: [PATCH 09/45] iommu: Bind process address spaces to devices Add bind() and unbind() operations to the IOMMU API. iommu_sva_bind_device() binds a device to an mm, and returns a handle to the bond, which is released by calling iommu_sva_unbind_device(). Each mm bound to devices gets a PASID (by convention, a 20-bit system-wide ID representing the address space), which can be retrieved with iommu_sva_get_pasid(). When programming DMA addresses, device drivers include this PASID in a device-specific manner, to let the device access the given address space. Since the process memory may be paged out, device and IOMMU must support I/O page faults (e.g. PCI PRI). Using iommu_sva_set_ops(), device drivers provide an mm_exit() callback that is called by the IOMMU driver if the process exits before the device driver called unbind(). In mm_exit(), device driver should disable DMA from the given context, so that the core IOMMU can reallocate the PASID. Whether the process exited or nor, the device driver should always release the handle with unbind(). To use these functions, device driver must first enable the IOMMU_DEV_FEAT_SVA device feature with iommu_dev_enable_feature(). Signed-off-by: Jean-Philippe Brucker Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 104 ++++++++++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 70 ++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 344e27e8f188..f8fe112e507a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2135,3 +2135,107 @@ int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) return ret; } EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); + +/** + * iommu_sva_bind_device() - Bind a process address space to a device + * @dev: the device + * @mm: the mm to bind, caller must hold a reference to it + * + * Create a bond between device and address space, allowing the device to access + * the mm using the returned PASID. If a bond already exists between @device and + * @mm, it is returned and an additional reference is taken. Caller must call + * iommu_sva_unbind_device() to release each reference. + * + * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to + * initialize the required SVA features. + * + * On error, returns an ERR_PTR value. + */ +struct iommu_sva * +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) +{ + struct iommu_group *group; + struct iommu_sva *handle = ERR_PTR(-EINVAL); + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (!ops || !ops->sva_bind) + return ERR_PTR(-ENODEV); + + group = iommu_group_get(dev); + if (!group) + return ERR_PTR(-ENODEV); + + /* Ensure device count and domain don't change while we're binding */ + mutex_lock(&group->mutex); + + /* + * To keep things simple, SVA currently doesn't support IOMMU groups + * with more than one device. Existing SVA-capable systems are not + * affected by the problems that required IOMMU groups (lack of ACS + * isolation, device ID aliasing and other hardware issues). + */ + if (iommu_group_device_count(group) != 1) + goto out_unlock; + + handle = ops->sva_bind(dev, mm, drvdata); + +out_unlock: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return handle; +} +EXPORT_SYMBOL_GPL(iommu_sva_bind_device); + +/** + * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device + * @handle: the handle returned by iommu_sva_bind_device() + * + * Put reference to a bond between device and address space. The device should + * not be issuing any more transaction for this PASID. All outstanding page + * requests for this PASID must have been flushed to the IOMMU. + * + * Returns 0 on success, or an error value + */ +void iommu_sva_unbind_device(struct iommu_sva *handle) +{ + struct iommu_group *group; + struct device *dev = handle->dev; + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (!ops || !ops->sva_unbind) + return; + + group = iommu_group_get(dev); + if (!group) + return; + + mutex_lock(&group->mutex); + ops->sva_unbind(handle); + mutex_unlock(&group->mutex); + + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); + +int iommu_sva_set_ops(struct iommu_sva *handle, + const struct iommu_sva_ops *sva_ops) +{ + if (handle->ops && handle->ops != sva_ops) + return -EEXIST; + + handle->ops = sva_ops; + return 0; +} +EXPORT_SYMBOL_GPL(iommu_sva_set_ops); + +int iommu_sva_get_pasid(struct iommu_sva *handle) +{ + const struct iommu_ops *ops = handle->dev->bus->iommu_ops; + + if (!ops || !ops->sva_get_pasid) + return IOMMU_PASID_INVALID; + + return ops->sva_get_pasid(handle); +} +EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8239ece9fdfc..480921dfbadf 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -48,6 +48,7 @@ struct bus_type; struct device; struct iommu_domain; struct notifier_block; +struct iommu_sva; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -55,6 +56,8 @@ struct notifier_block; typedef int (*iommu_fault_handler_t)(struct iommu_domain *, struct device *, unsigned long, int, void *); +typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *, + void *); struct iommu_domain_geometry { dma_addr_t aperture_start; /* First address that can be mapped */ @@ -159,6 +162,28 @@ struct iommu_resv_region { /* Per device IOMMU features */ enum iommu_dev_features { IOMMU_DEV_FEAT_AUX, /* Aux-domain feature */ + IOMMU_DEV_FEAT_SVA, /* Shared Virtual Addresses */ +}; + +#define IOMMU_PASID_INVALID (-1U) + +/** + * struct iommu_sva_ops - device driver callbacks for an SVA context + * + * @mm_exit: called when the mm is about to be torn down by exit_mmap. After + * @mm_exit returns, the device must not issue any more transaction + * with the PASID given as argument. + * + * The @mm_exit handler is allowed to sleep. Be careful about the + * locks taken in @mm_exit, because they might lead to deadlocks if + * they are also held when dropping references to the mm. Consider the + * following call chain: + * mutex_lock(A); mmput(mm) -> exit_mm() -> @mm_exit() -> mutex_lock(A) + * Using mmput_async() prevents this scenario. + * + */ +struct iommu_sva_ops { + iommu_mm_exit_handler_t mm_exit; }; #ifdef CONFIG_IOMMU_API @@ -196,6 +221,9 @@ enum iommu_dev_features { * @dev_feat_enabled: check enabled feature * @aux_attach/detach_dev: aux-domain specific attach/detach entries. * @aux_get_pasid: get the pasid given an aux-domain + * @sva_bind: Bind process address space to device + * @sva_unbind: Unbind process address space from device + * @sva_get_pasid: Get PASID associated to a SVA handle * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { @@ -251,6 +279,11 @@ struct iommu_ops { void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); + struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, + void *drvdata); + void (*sva_unbind)(struct iommu_sva *handle); + int (*sva_get_pasid)(struct iommu_sva *handle); + unsigned long pgsize_bitmap; }; @@ -417,6 +450,14 @@ struct iommu_fwspec { u32 ids[1]; }; +/** + * struct iommu_sva - handle to a device-mm bond + */ +struct iommu_sva { + struct device *dev; + const struct iommu_sva_ops *ops; +}; + int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); @@ -445,6 +486,14 @@ int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); +struct iommu_sva *iommu_sva_bind_device(struct device *dev, + struct mm_struct *mm, + void *drvdata); +void iommu_sva_unbind_device(struct iommu_sva *handle); +int iommu_sva_set_ops(struct iommu_sva *handle, + const struct iommu_sva_ops *ops); +int iommu_sva_get_pasid(struct iommu_sva *handle); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -770,6 +819,27 @@ iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) return -ENODEV; } +static inline struct iommu_sva * +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) +{ + return NULL; +} + +static inline void iommu_sva_unbind_device(struct iommu_sva *handle) +{ +} + +static inline int iommu_sva_set_ops(struct iommu_sva *handle, + const struct iommu_sva_ops *ops) +{ + return -EINVAL; +} + +static inline int iommu_sva_get_pasid(struct iommu_sva *handle) +{ + return IOMMU_PASID_INVALID; +} + #endif /* CONFIG_IOMMU_API */ #ifdef CONFIG_IOMMU_DEBUGFS From d7cbc0f3220fabbdfa9b3aa79275baa5b16fef5d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 25 Mar 2019 09:30:29 +0800 Subject: [PATCH 10/45] iommu/vt-d: Make intel_iommu_enable_pasid() more generic This moves intel_iommu_enable_pasid() out of the scope of CONFIG_INTEL_IOMMU_SVM with more and more features requiring pasid function. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 21 +++++++-------------- drivers/iommu/intel-svm.c | 19 ++++++++++++++++++- include/linux/intel-iommu.h | 2 +- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 28cb713d728c..d2e613875b3a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5307,8 +5307,7 @@ static void intel_iommu_put_resv_regions(struct device *dev, } } -#ifdef CONFIG_INTEL_IOMMU_SVM -int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) +int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) { struct device_domain_info *info; struct context_entry *context; @@ -5317,7 +5316,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd u64 ctx_lo; int ret; - domain = get_valid_domain_for_dev(sdev->dev); + domain = get_valid_domain_for_dev(dev); if (!domain) return -EINVAL; @@ -5325,7 +5324,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd spin_lock(&iommu->lock); ret = -EINVAL; - info = sdev->dev->archdata.iommu; + info = dev->archdata.iommu; if (!info || !info->pasid_supported) goto out; @@ -5335,14 +5334,13 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd ctx_lo = context[0].lo; - sdev->did = FLPT_DEFAULT_DID; - sdev->sid = PCI_DEVID(info->bus, info->devfn); - if (!(ctx_lo & CONTEXT_PASIDE)) { ctx_lo |= CONTEXT_PASIDE; context[0].lo = ctx_lo; wmb(); - iommu->flush.flush_context(iommu, sdev->did, sdev->sid, + iommu->flush.flush_context(iommu, + domain->iommu_did[iommu->seq_id], + PCI_DEVID(info->bus, info->devfn), DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); } @@ -5351,12 +5349,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd if (!info->pasid_enabled) iommu_enable_dev_iotlb(info); - if (info->ats_enabled) { - sdev->dev_iotlb = 1; - sdev->qdep = info->ats_qdep; - if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) - sdev->qdep = 0; - } ret = 0; out: @@ -5366,6 +5358,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd return ret; } +#ifdef CONFIG_INTEL_IOMMU_SVM struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) { struct intel_iommu *iommu; diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 3a4b09ae8561..8f87304f915c 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -228,6 +228,7 @@ static LIST_HEAD(global_svm_list); int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; struct mm_struct *mm = NULL; @@ -291,13 +292,29 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } sdev->dev = dev; - ret = intel_iommu_enable_pasid(iommu, sdev); + ret = intel_iommu_enable_pasid(iommu, dev); if (ret || !pasid) { /* If they don't actually want to assign a PASID, this is * just an enabling check/preparation. */ kfree(sdev); goto out; } + + info = dev->archdata.iommu; + if (!info || !info->pasid_supported) { + kfree(sdev); + goto out; + } + + sdev->did = FLPT_DEFAULT_DID; + sdev->sid = PCI_DEVID(info->bus, info->devfn); + if (info->ats_enabled) { + sdev->dev_iotlb = 1; + sdev->qdep = info->ats_qdep; + if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) + sdev->qdep = 0; + } + /* Finish the setup now we know we're keeping it */ sdev->users = 1; sdev->ops = ops; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index fa364de9db18..b7d1e2fbb9ca 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -650,6 +650,7 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); void iommu_flush_write_buffer(struct intel_iommu *iommu); +int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); #ifdef CONFIG_INTEL_IOMMU_SVM int intel_svm_init(struct intel_iommu *iommu); @@ -679,7 +680,6 @@ struct intel_svm { struct list_head list; }; -extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev); extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev); #endif From 95587a75de179da35aea03735843be8278cf1857 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 25 Mar 2019 09:30:30 +0800 Subject: [PATCH 11/45] iommu/vt-d: Add per-device IOMMU feature ops entries This adds the iommu ops entries for aux-domain per-device feature query and enable/disable. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 159 ++++++++++++++++++++++++++++++++++++ include/linux/intel-iommu.h | 1 + 2 files changed, 160 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d2e613875b3a..ba5f2e5b3a03 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2485,6 +2485,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->domain = domain; info->iommu = iommu; info->pasid_table = NULL; + info->auxd_enabled = 0; if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -5223,6 +5224,42 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } +static inline bool scalable_mode_support(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool ret = true; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!sm_supported(iommu)) { + ret = false; + break; + } + } + rcu_read_unlock(); + + return ret; +} + +static inline bool iommu_pasid_support(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool ret = true; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!pasid_supported(iommu)) { + ret = false; + break; + } + } + rcu_read_unlock(); + + return ret; +} + static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) @@ -5380,6 +5417,124 @@ struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) } #endif /* CONFIG_INTEL_IOMMU_SVM */ +static int intel_iommu_enable_auxd(struct device *dev) +{ + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + u8 bus, devfn; + int ret; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu || dmar_disabled) + return -EINVAL; + + if (!sm_supported(iommu) || !pasid_supported(iommu)) + return -EINVAL; + + ret = intel_iommu_enable_pasid(iommu, dev); + if (ret) + return -ENODEV; + + spin_lock_irqsave(&device_domain_lock, flags); + info = dev->archdata.iommu; + info->auxd_enabled = 1; + spin_unlock_irqrestore(&device_domain_lock, flags); + + return 0; +} + +static int intel_iommu_disable_auxd(struct device *dev) +{ + struct device_domain_info *info; + unsigned long flags; + + spin_lock_irqsave(&device_domain_lock, flags); + info = dev->archdata.iommu; + if (!WARN_ON(!info)) + info->auxd_enabled = 0; + spin_unlock_irqrestore(&device_domain_lock, flags); + + return 0; +} + +/* + * A PCI express designated vendor specific extended capability is defined + * in the section 3.7 of Intel scalable I/O virtualization technical spec + * for system software and tools to detect endpoint devices supporting the + * Intel scalable IO virtualization without host driver dependency. + * + * Returns the address of the matching extended capability structure within + * the device's PCI configuration space or 0 if the device does not support + * it. + */ +static int siov_find_pci_dvsec(struct pci_dev *pdev) +{ + int pos; + u16 vendor, id; + + pos = pci_find_next_ext_capability(pdev, 0, 0x23); + while (pos) { + pci_read_config_word(pdev, pos + 4, &vendor); + pci_read_config_word(pdev, pos + 8, &id); + if (vendor == PCI_VENDOR_ID_INTEL && id == 5) + return pos; + + pos = pci_find_next_ext_capability(pdev, pos, 0x23); + } + + return 0; +} + +static bool +intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat == IOMMU_DEV_FEAT_AUX) { + int ret; + + if (!dev_is_pci(dev) || dmar_disabled || + !scalable_mode_support() || !iommu_pasid_support()) + return false; + + ret = pci_pasid_features(to_pci_dev(dev)); + if (ret < 0) + return false; + + return !!siov_find_pci_dvsec(to_pci_dev(dev)); + } + + return false; +} + +static int +intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat == IOMMU_DEV_FEAT_AUX) + return intel_iommu_enable_auxd(dev); + + return -ENODEV; +} + +static int +intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat == IOMMU_DEV_FEAT_AUX) + return intel_iommu_disable_auxd(dev); + + return -ENODEV; +} + +static bool +intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) +{ + struct device_domain_info *info = dev->archdata.iommu; + + if (feat == IOMMU_DEV_FEAT_AUX) + return scalable_mode_support() && info && info->auxd_enabled; + + return false; +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5394,6 +5549,10 @@ const struct iommu_ops intel_iommu_ops = { .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = intel_iommu_put_resv_regions, .device_group = pci_device_group, + .dev_has_feat = intel_iommu_dev_has_feat, + .dev_feat_enabled = intel_iommu_dev_feat_enabled, + .dev_enable_feat = intel_iommu_dev_enable_feat, + .dev_disable_feat = intel_iommu_dev_disable_feat, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b7d1e2fbb9ca..4f0745479b6d 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -568,6 +568,7 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; + u8 auxd_enabled:1; /* Multiple domains per device */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ From 8cc3759a6c4931fca2ad9c1b0ead24dc0c30041c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 25 Mar 2019 09:30:31 +0800 Subject: [PATCH 12/45] iommu/vt-d: Move common code out of iommu_attch_device() This part of code could be used by both normal and aux domain specific attach entries. Hence move them into a common function to avoid duplication. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 60 ++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ba5f2e5b3a03..a0f9c748ca9f 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5066,35 +5066,14 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) domain_exit(to_dmar_domain(domain)); } -static int intel_iommu_attach_device(struct iommu_domain *domain, - struct device *dev) +static int prepare_domain_attach_device(struct iommu_domain *domain, + struct device *dev) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct intel_iommu *iommu; int addr_width; u8 bus, devfn; - if (device_is_rmrr_locked(dev)) { - dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); - return -EPERM; - } - - /* normally dev is not mapped */ - if (unlikely(domain_context_mapped(dev))) { - struct dmar_domain *old_domain; - - old_domain = find_domain(dev); - if (old_domain) { - rcu_read_lock(); - dmar_remove_one_dev_info(dev); - rcu_read_unlock(); - - if (!domain_type_is_vm_or_si(old_domain) && - list_empty(&old_domain->devices)) - domain_exit(old_domain); - } - } - iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; @@ -5127,7 +5106,40 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, dmar_domain->agaw--; } - return domain_add_dev_info(dmar_domain, dev); + return 0; +} + +static int intel_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + int ret; + + if (device_is_rmrr_locked(dev)) { + dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); + return -EPERM; + } + + /* normally dev is not mapped */ + if (unlikely(domain_context_mapped(dev))) { + struct dmar_domain *old_domain; + + old_domain = find_domain(dev); + if (old_domain) { + rcu_read_lock(); + dmar_remove_one_dev_info(dev); + rcu_read_unlock(); + + if (!domain_type_is_vm_or_si(old_domain) && + list_empty(&old_domain->devices)) + domain_exit(old_domain); + } + } + + ret = prepare_domain_attach_device(domain, dev); + if (ret) + return ret; + + return domain_add_dev_info(to_dmar_domain(domain), dev); } static void intel_iommu_detach_device(struct iommu_domain *domain, From 67b8e02b5e76159a4f94f85bee370af1d9f442f9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 25 Mar 2019 09:30:32 +0800 Subject: [PATCH 13/45] iommu/vt-d: Aux-domain specific domain attach/detach When multiple domains per device has been enabled by the device driver, the device will tag the default PASID for the domain to all DMA traffics out of the subset of this device; and the IOMMU should translate the DMA requests in PASID granularity. This adds the intel_iommu_aux_attach/detach_device() ops to support managing PASID granular translation structures when the device driver has enabled multiple domains per device. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 152 ++++++++++++++++++++++++++++++++++++ include/linux/intel-iommu.h | 10 +++ 2 files changed, 162 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a0f9c748ca9f..28a998afaf74 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2486,6 +2486,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->iommu = iommu; info->pasid_table = NULL; info->auxd_enabled = 0; + INIT_LIST_HEAD(&info->auxiliary_domains); if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -5066,6 +5067,131 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) domain_exit(to_dmar_domain(domain)); } +/* + * Check whether a @domain could be attached to the @dev through the + * aux-domain attach/detach APIs. + */ +static inline bool +is_aux_domain(struct device *dev, struct iommu_domain *domain) +{ + struct device_domain_info *info = dev->archdata.iommu; + + return info && info->auxd_enabled && + domain->type == IOMMU_DOMAIN_UNMANAGED; +} + +static void auxiliary_link_device(struct dmar_domain *domain, + struct device *dev) +{ + struct device_domain_info *info = dev->archdata.iommu; + + assert_spin_locked(&device_domain_lock); + if (WARN_ON(!info)) + return; + + domain->auxd_refcnt++; + list_add(&domain->auxd, &info->auxiliary_domains); +} + +static void auxiliary_unlink_device(struct dmar_domain *domain, + struct device *dev) +{ + struct device_domain_info *info = dev->archdata.iommu; + + assert_spin_locked(&device_domain_lock); + if (WARN_ON(!info)) + return; + + list_del(&domain->auxd); + domain->auxd_refcnt--; + + if (!domain->auxd_refcnt && domain->default_pasid > 0) + intel_pasid_free_id(domain->default_pasid); +} + +static int aux_domain_add_dev(struct dmar_domain *domain, + struct device *dev) +{ + int ret; + u8 bus, devfn; + unsigned long flags; + struct intel_iommu *iommu; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (domain->default_pasid <= 0) { + int pasid; + + pasid = intel_pasid_alloc_id(domain, PASID_MIN, + pci_max_pasids(to_pci_dev(dev)), + GFP_KERNEL); + if (pasid <= 0) { + pr_err("Can't allocate default pasid\n"); + return -ENODEV; + } + domain->default_pasid = pasid; + } + + spin_lock_irqsave(&device_domain_lock, flags); + /* + * iommu->lock must be held to attach domain to iommu and setup the + * pasid entry for second level translation. + */ + spin_lock(&iommu->lock); + ret = domain_attach_iommu(domain, iommu); + if (ret) + goto attach_failed; + + /* Setup the PASID entry for mediated devices: */ + ret = intel_pasid_setup_second_level(iommu, domain, dev, + domain->default_pasid); + if (ret) + goto table_failed; + spin_unlock(&iommu->lock); + + auxiliary_link_device(domain, dev); + + spin_unlock_irqrestore(&device_domain_lock, flags); + + return 0; + +table_failed: + domain_detach_iommu(domain, iommu); +attach_failed: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + if (!domain->auxd_refcnt && domain->default_pasid > 0) + intel_pasid_free_id(domain->default_pasid); + + return ret; +} + +static void aux_domain_remove_dev(struct dmar_domain *domain, + struct device *dev) +{ + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + + if (!is_aux_domain(dev, &domain->domain)) + return; + + spin_lock_irqsave(&device_domain_lock, flags); + info = dev->archdata.iommu; + iommu = info->iommu; + + auxiliary_unlink_device(domain, dev); + + spin_lock(&iommu->lock); + intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid); + domain_detach_iommu(domain, iommu); + spin_unlock(&iommu->lock); + + spin_unlock_irqrestore(&device_domain_lock, flags); +} + static int prepare_domain_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -5119,6 +5245,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EPERM; } + if (is_aux_domain(dev, domain)) + return -EPERM; + /* normally dev is not mapped */ if (unlikely(domain_context_mapped(dev))) { struct dmar_domain *old_domain; @@ -5142,12 +5271,33 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return domain_add_dev_info(to_dmar_domain(domain), dev); } +static int intel_iommu_aux_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + int ret; + + if (!is_aux_domain(dev, domain)) + return -EPERM; + + ret = prepare_domain_attach_device(domain, dev); + if (ret) + return ret; + + return aux_domain_add_dev(to_dmar_domain(domain), dev); +} + static void intel_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { dmar_remove_one_dev_info(dev); } +static void intel_iommu_aux_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + aux_domain_remove_dev(to_dmar_domain(domain), dev); +} + static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot) @@ -5553,6 +5703,8 @@ const struct iommu_ops intel_iommu_ops = { .domain_free = intel_iommu_domain_free, .attach_dev = intel_iommu_attach_device, .detach_dev = intel_iommu_detach_device, + .aux_attach_dev = intel_iommu_aux_attach_device, + .aux_detach_dev = intel_iommu_aux_detach_device, .map = intel_iommu_map, .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4f0745479b6d..6925a18a5ca3 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -489,9 +489,11 @@ struct dmar_domain { /* Domain ids per IOMMU. Use u16 since * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ + unsigned int auxd_refcnt; /* Refcount of auxiliary attaching */ bool has_iotlb_device; struct list_head devices; /* all devices' list */ + struct list_head auxd; /* link to device's auxiliary list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -510,6 +512,11 @@ struct dmar_domain { 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ u64 max_addr; /* maximum mapped address */ + int default_pasid; /* + * The default pasid used for non-SVM + * traffic on mediated devices. + */ + struct iommu_domain domain; /* generic domain data structure for iommu core */ }; @@ -559,6 +566,9 @@ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ + struct list_head auxiliary_domains; /* auxiliary domains + * attached to this device + */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ u16 pfsid; /* SRIOV physical function source ID */ From 0e8000f8f616df653d0d7fcc43fafae7e5b07c0b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 25 Mar 2019 09:30:33 +0800 Subject: [PATCH 14/45] iommu/vt-d: Return ID associated with an auxiliary domain This adds support to return the default pasid associated with an auxiliary domain. The PCI device which is bound with this domain should use this value as the pasid for all DMA requests of the subset of device which is isolated and protected with this domain. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 28a998afaf74..c137f0f2cf49 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5697,6 +5697,15 @@ intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) return false; } +static int +intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + return dmar_domain->default_pasid > 0 ? + dmar_domain->default_pasid : -EINVAL; +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5705,6 +5714,7 @@ const struct iommu_ops intel_iommu_ops = { .detach_dev = intel_iommu_detach_device, .aux_attach_dev = intel_iommu_aux_attach_device, .aux_detach_dev = intel_iommu_aux_detach_device, + .aux_get_pasid = intel_iommu_aux_get_pasid, .map = intel_iommu_map, .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, From 83d18bdff18f680ce2c0af10a663da19f7dede93 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 10 Apr 2019 16:21:08 +0100 Subject: [PATCH 15/45] iommu/amd: Use pci_prg_resp_pasid_required() Commit e5567f5f6762 ("PCI/ATS: Add pci_prg_resp_pasid_required() interface.") added a common interface to check the PASID bit in the PRI capability. Use it in the AMD driver. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index f7cdd2ab7f11..7a0de274934c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2122,23 +2122,6 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev) return ret; } -/* FIXME: Move this to PCI code */ -#define PCI_PRI_TLP_OFF (1 << 15) - -static bool pci_pri_tlp_required(struct pci_dev *pdev) -{ - u16 status; - int pos; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (!pos) - return false; - - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); - - return (status & PCI_PRI_TLP_OFF) ? true : false; -} - /* * If a device is not yet associated with a domain, this function makes the * device visible in the domain @@ -2167,7 +2150,7 @@ static int attach_device(struct device *dev, dev_data->ats.enabled = true; dev_data->ats.qdep = pci_ats_queue_depth(pdev); - dev_data->pri_tlp = pci_pri_tlp_required(pdev); + dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev); } } else if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) { From 7a5dbf3ab2f04905cf8468c66fcdbfb643068bcb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Apr 2019 18:50:14 +0200 Subject: [PATCH 16/45] iommu/amd: Remove the leftover of bypass support The AMD iommu dma_ops are only attached on a per-device basis when an actual translation is needed. Remove the leftover bypass support which in parts was already broken (e.g. it always returns 0 from ->map_sg). Use the opportunity to remove a few local variables and move assignments into the declaration line where they were previously separated by the bypass check. Signed-off-by: Christoph Hellwig Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 80 +++++++++------------------------------ 1 file changed, 17 insertions(+), 63 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7a0de274934c..f467cc4b498e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2481,20 +2481,10 @@ static dma_addr_t map_page(struct device *dev, struct page *page, unsigned long attrs) { phys_addr_t paddr = page_to_phys(page) + offset; - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - u64 dma_mask; + struct protection_domain *domain = get_domain(dev); + struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); - domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) - return (dma_addr_t)paddr; - else if (IS_ERR(domain)) - return DMA_MAPPING_ERROR; - - dma_mask = *dev->dma_mask; - dma_dom = to_dma_ops_domain(domain); - - return __map_single(dev, dma_dom, paddr, size, dir, dma_mask); + return __map_single(dev, dma_dom, paddr, size, dir, *dev->dma_mask); } /* @@ -2503,14 +2493,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page, static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - - domain = get_domain(dev); - if (IS_ERR(domain)) - return; - - dma_dom = to_dma_ops_domain(domain); + struct protection_domain *domain = get_domain(dev); + struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); __unmap_single(dma_dom, dma_addr, size, dir); } @@ -2550,20 +2534,13 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, unsigned long attrs) { int mapped_pages = 0, npages = 0, prot = 0, i; - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; + struct protection_domain *domain = get_domain(dev); + struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); struct scatterlist *s; unsigned long address; - u64 dma_mask; + u64 dma_mask = *dev->dma_mask; int ret; - domain = get_domain(dev); - if (IS_ERR(domain)) - return 0; - - dma_dom = to_dma_ops_domain(domain); - dma_mask = *dev->dma_mask; - npages = sg_num_pages(dev, sglist, nelems); address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask); @@ -2635,20 +2612,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, unsigned long attrs) { - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - unsigned long startaddr; - int npages = 2; + struct protection_domain *domain = get_domain(dev); + struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); - domain = get_domain(dev); - if (IS_ERR(domain)) - return; - - startaddr = sg_dma_address(sglist) & PAGE_MASK; - dma_dom = to_dma_ops_domain(domain); - npages = sg_num_pages(dev, sglist, nelems); - - __unmap_single(dma_dom, startaddr, npages << PAGE_SHIFT, dir); + __unmap_single(dma_dom, sg_dma_address(sglist) & PAGE_MASK, + sg_num_pages(dev, sglist, nelems) << PAGE_SHIFT, dir); } /* @@ -2659,16 +2627,11 @@ static void *alloc_coherent(struct device *dev, size_t size, unsigned long attrs) { u64 dma_mask = dev->coherent_dma_mask; - struct protection_domain *domain; + struct protection_domain *domain = get_domain(dev); struct dma_ops_domain *dma_dom; struct page *page; - domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) { - page = alloc_pages(flag, get_order(size)); - *dma_addr = page_to_phys(page); - return page_address(page); - } else if (IS_ERR(domain)) + if (IS_ERR(domain)) return NULL; dma_dom = to_dma_ops_domain(domain); @@ -2714,22 +2677,13 @@ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr, unsigned long attrs) { - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - struct page *page; + struct protection_domain *domain = get_domain(dev); + struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); + struct page *page = virt_to_page(virt_addr); - page = virt_to_page(virt_addr); size = PAGE_ALIGN(size); - domain = get_domain(dev); - if (IS_ERR(domain)) - goto free_mem; - - dma_dom = to_dma_ops_domain(domain); - __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL); - -free_mem: if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) __free_pages(page, get_order(size)); } From 48b2c937ea37a3bece0094b46450ed5267525289 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Apr 2019 18:14:06 +0200 Subject: [PATCH 17/45] iommu/vt-d: Clean up iommu_no_mapping Invert the return value to avoid double negatives, use a bool instead of int as the return value, and reduce some indentation after early returns. Signed-off-by: Christoph Hellwig Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 47 +++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c137f0f2cf49..46337a4ea33e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3608,45 +3608,40 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev) } /* Check if the dev needs to go through non-identity map and unmap process.*/ -static int iommu_no_mapping(struct device *dev) +static bool iommu_need_mapping(struct device *dev) { int found; if (iommu_dummy(dev)) - return 1; + return false; if (!iommu_identity_mapping) - return 0; + return true; found = identity_mapping(dev); if (found) { if (iommu_should_identity_map(dev, 0)) - return 1; - else { - /* - * 32 bit DMA is removed from si_domain and fall back - * to non-identity mapping. - */ - dmar_remove_one_dev_info(dev); - dev_info(dev, "32bit DMA uses non-identity mapping\n"); - return 0; - } + return false; + + /* + * 32 bit DMA is removed from si_domain and fall back to + * non-identity mapping. + */ + dmar_remove_one_dev_info(dev); + dev_info(dev, "32bit DMA uses non-identity mapping\n"); } else { /* * In case of a detached 64 bit DMA device from vm, the device * is put into si_domain for identity mapping. */ - if (iommu_should_identity_map(dev, 0)) { - int ret; - ret = domain_add_dev_info(si_domain, dev); - if (!ret) { - dev_info(dev, "64bit DMA uses identity mapping\n"); - return 1; - } + if (iommu_should_identity_map(dev, 0) && + !domain_add_dev_info(si_domain, dev)) { + dev_info(dev, "64bit DMA uses identity mapping\n"); + return false; } } - return 0; + return true; } static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, @@ -3662,7 +3657,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - if (iommu_no_mapping(dev)) + if (!iommu_need_mapping(dev)) return paddr; domain = get_valid_domain_for_dev(dev); @@ -3733,7 +3728,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) struct intel_iommu *iommu; struct page *freelist; - if (iommu_no_mapping(dev)) + if (!iommu_need_mapping(dev)) return; domain = find_domain(dev); @@ -3784,7 +3779,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, size = PAGE_ALIGN(size); order = get_order(size); - if (!iommu_no_mapping(dev)) + if (iommu_need_mapping(dev)) flags &= ~(GFP_DMA | GFP_DMA32); else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) @@ -3798,7 +3793,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, page = dma_alloc_from_contiguous(dev, count, order, flags & __GFP_NOWARN); - if (page && iommu_no_mapping(dev) && + if (page && !iommu_need_mapping(dev) && page_to_phys(page) + size > dev->coherent_dma_mask) { dma_release_from_contiguous(dev, page, count); page = NULL; @@ -3880,7 +3875,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (iommu_no_mapping(dev)) + if (!iommu_need_mapping(dev)) return intel_nontranslate_map_sg(dev, sglist, nelems, dir); domain = get_valid_domain_for_dev(dev); From 9cc0c2af8d0387f3f99572bd7fe15ab41eb89bb9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Apr 2019 18:14:07 +0200 Subject: [PATCH 18/45] iommu/vt-d: Use dma_direct for bypass devices The intel-iommu driver currently has a partial reimplementation of the direct mapping code for devices that use pass through mode. Replace that code with calls to the relevant dma_direct routines at the highest level. This means we have exactly the same behvior as the dma direct code itself, and can prepare for eventually only attaching the intel_iommu ops to devices that actually need dynamic iommu mappings. Signed-off-by: Christoph Hellwig Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 71 ++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 46337a4ea33e..1b7dd11e5581 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3657,9 +3657,6 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - if (!iommu_need_mapping(dev)) - return paddr; - domain = get_valid_domain_for_dev(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3708,15 +3705,20 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return __intel_map_single(dev, page_to_phys(page) + offset, size, - dir, *dev->dma_mask); + if (iommu_need_mapping(dev)) + return __intel_map_single(dev, page_to_phys(page) + offset, + size, dir, *dev->dma_mask); + return dma_direct_map_page(dev, page, offset, size, dir, attrs); } static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask); + if (iommu_need_mapping(dev)) + return __intel_map_single(dev, phys_addr, size, dir, + *dev->dma_mask); + return dma_direct_map_resource(dev, phys_addr, size, dir, attrs); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3728,9 +3730,6 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) struct intel_iommu *iommu; struct page *freelist; - if (!iommu_need_mapping(dev)) - return; - domain = find_domain(dev); BUG_ON(!domain); @@ -3766,7 +3765,17 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - intel_unmap(dev, dev_addr, size); + if (iommu_need_mapping(dev)) + intel_unmap(dev, dev_addr, size); + else + dma_direct_unmap_page(dev, dev_addr, size, dir, attrs); +} + +static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + if (iommu_need_mapping(dev)) + intel_unmap(dev, dev_addr, size); } static void *intel_alloc_coherent(struct device *dev, size_t size, @@ -3776,28 +3785,18 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, struct page *page = NULL; int order; + if (!iommu_need_mapping(dev)) + return dma_direct_alloc(dev, size, dma_handle, flags, attrs); + size = PAGE_ALIGN(size); order = get_order(size); - - if (iommu_need_mapping(dev)) - flags &= ~(GFP_DMA | GFP_DMA32); - else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { - if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) - flags |= GFP_DMA; - else - flags |= GFP_DMA32; - } + flags &= ~(GFP_DMA | GFP_DMA32); if (gfpflags_allow_blocking(flags)) { unsigned int count = size >> PAGE_SHIFT; page = dma_alloc_from_contiguous(dev, count, order, flags & __GFP_NOWARN); - if (page && !iommu_need_mapping(dev) && - page_to_phys(page) + size > dev->coherent_dma_mask) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } } if (!page) @@ -3823,6 +3822,9 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, int order; struct page *page = virt_to_page(vaddr); + if (!iommu_need_mapping(dev)) + return dma_direct_free(dev, size, vaddr, dma_handle, attrs); + size = PAGE_ALIGN(size); order = get_order(size); @@ -3840,6 +3842,9 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *sg; int i; + if (!iommu_need_mapping(dev)) + return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs); + for_each_sg(sglist, sg, nelems, i) { nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg)); } @@ -3847,20 +3852,6 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT); } -static int intel_nontranslate_map_sg(struct device *hddev, - struct scatterlist *sglist, int nelems, int dir) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sglist, sg, nelems, i) { - BUG_ON(!sg_page(sg)); - sg->dma_address = sg_phys(sg); - sg->dma_length = sg->length; - } - return nelems; -} - static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, unsigned long attrs) { @@ -3876,7 +3867,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele BUG_ON(dir == DMA_NONE); if (!iommu_need_mapping(dev)) - return intel_nontranslate_map_sg(dev, sglist, nelems, dir); + return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); domain = get_valid_domain_for_dev(dev); if (!domain) @@ -3926,7 +3917,7 @@ static const struct dma_map_ops intel_dma_ops = { .map_page = intel_map_page, .unmap_page = intel_unmap_page, .map_resource = intel_map_resource, - .unmap_resource = intel_unmap_page, + .unmap_resource = intel_unmap_resource, .dma_supported = dma_direct_supported, }; From f7ae70a5e347f7a314e266c66c67876adbab3245 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Apr 2019 18:14:08 +0200 Subject: [PATCH 19/45] iommu/vt-d: Don't clear GFP_DMA and GFP_DMA32 flags We already do this in the caller. Signed-off-by: Christoph Hellwig Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1b7dd11e5581..599a25809440 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3790,7 +3790,6 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, size = PAGE_ALIGN(size); order = get_order(size); - flags &= ~(GFP_DMA | GFP_DMA32); if (gfpflags_allow_blocking(flags)) { unsigned int count = size >> PAGE_SHIFT; From 8ac13175cbe985e78bd41f9637b613b59df36936 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 12 Apr 2019 12:13:24 +0800 Subject: [PATCH 20/45] vfio/mdev: Add iommu related member in mdev_device A parent device might create different types of mediated devices. For example, a mediated device could be created by the parent device with full isolation and protection provided by the IOMMU. One usage case could be found on Intel platforms where a mediated device is an assignable subset of a PCI, the DMA requests on behalf of it are all tagged with a PASID. Since IOMMU supports PASID-granular translations (scalable mode in VT-d 3.0), this mediated device could be individually protected and isolated by an IOMMU. This patch adds a new member in the struct mdev_device to indicate that the mediated device represented by mdev could be isolated and protected by attaching a domain to a device represented by mdev->iommu_device. It also adds a helper to add or set the iommu device. * mdev_device->iommu_device - This, if set, indicates that the mediated device could be fully isolated and protected by IOMMU via attaching an iommu domain to this device. If empty, it indicates using vendor defined isolation, hence bypass IOMMU. * mdev_set/get_iommu_device(dev, iommu_device) - Set or get the iommu device which represents this mdev in IOMMU's device scope. Drivers don't need to set the iommu device if it uses vendor defined isolation. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Suggested-by: Kevin Tian Suggested-by: Alex Williamson Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kirti Wankhede Acked-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/vfio/mdev/mdev_core.c | 18 ++++++++++++++++++ drivers/vfio/mdev/mdev_private.h | 1 + include/linux/mdev.h | 14 ++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index b96fedc77ee5..1b6435529166 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -390,6 +390,24 @@ int mdev_device_remove(struct device *dev, bool force_remove) return 0; } +int mdev_set_iommu_device(struct device *dev, struct device *iommu_device) +{ + struct mdev_device *mdev = to_mdev_device(dev); + + mdev->iommu_device = iommu_device; + + return 0; +} +EXPORT_SYMBOL(mdev_set_iommu_device); + +struct device *mdev_get_iommu_device(struct device *dev) +{ + struct mdev_device *mdev = to_mdev_device(dev); + + return mdev->iommu_device; +} +EXPORT_SYMBOL(mdev_get_iommu_device); + static int __init mdev_init(void) { return mdev_bus_register(); diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index 379758c52b1b..f4eba723fea5 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -33,6 +33,7 @@ struct mdev_device { struct kref ref; struct list_head next; struct kobject *type_kobj; + struct device *iommu_device; bool active; }; diff --git a/include/linux/mdev.h b/include/linux/mdev.h index d7aee90e5da5..df2ea39f47ee 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -15,6 +15,20 @@ struct mdev_device; +/* + * Called by the parent device driver to set the device which represents + * this mdev in iommu protection scope. By default, the iommu device is + * NULL, that indicates using vendor defined isolation. + * + * @dev: the mediated device that iommu will isolate. + * @iommu_device: a pci device which represents the iommu for @dev. + * + * Return 0 for success, otherwise negative error value. + */ +int mdev_set_iommu_device(struct device *dev, struct device *iommu_device); + +struct device *mdev_get_iommu_device(struct device *dev); + /** * struct mdev_parent_ops - Structure to be registered for each parent device to * register the device to mdev module. From 7bd50f0cd2fd5e8cc910bc0b7db93681c1d325be Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 12 Apr 2019 12:13:25 +0800 Subject: [PATCH 21/45] vfio/type1: Add domain at(de)taching group helpers This adds helpers to attach or detach a domain to a group. This will replace iommu_attach_group() which only works for non-mdev devices. If a domain is attaching to a group which includes the mediated devices, it should attach to the iommu device (a pci device which represents the mdev in iommu scope) instead. The added helper supports attaching domain to groups for both pci and mdev devices. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Acked-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/vfio/vfio_iommu_type1.c | 84 ++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0f731c9920a..c956b85264ae 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -97,6 +97,7 @@ struct vfio_dma { struct vfio_group { struct iommu_group *iommu_group; struct list_head next; + bool mdev_group; /* An mdev group */ }; /* @@ -1311,6 +1312,75 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) return ret; } +static struct device *vfio_mdev_get_iommu_device(struct device *dev) +{ + struct device *(*fn)(struct device *dev); + struct device *iommu_device; + + fn = symbol_get(mdev_get_iommu_device); + if (fn) { + iommu_device = fn(dev); + symbol_put(mdev_get_iommu_device); + + return iommu_device; + } + + return NULL; +} + +static int vfio_mdev_attach_domain(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + struct device *iommu_device; + + iommu_device = vfio_mdev_get_iommu_device(dev); + if (iommu_device) { + if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) + return iommu_aux_attach_device(domain, iommu_device); + else + return iommu_attach_device(domain, iommu_device); + } + + return -EINVAL; +} + +static int vfio_mdev_detach_domain(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + struct device *iommu_device; + + iommu_device = vfio_mdev_get_iommu_device(dev); + if (iommu_device) { + if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) + iommu_aux_detach_device(domain, iommu_device); + else + iommu_detach_device(domain, iommu_device); + } + + return 0; +} + +static int vfio_iommu_attach_group(struct vfio_domain *domain, + struct vfio_group *group) +{ + if (group->mdev_group) + return iommu_group_for_each_dev(group->iommu_group, + domain->domain, + vfio_mdev_attach_domain); + else + return iommu_attach_group(domain->domain, group->iommu_group); +} + +static void vfio_iommu_detach_group(struct vfio_domain *domain, + struct vfio_group *group) +{ + if (group->mdev_group) + iommu_group_for_each_dev(group->iommu_group, domain->domain, + vfio_mdev_detach_domain); + else + iommu_detach_group(domain->domain, group->iommu_group); +} + static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -1386,7 +1456,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_domain; } - ret = iommu_attach_group(domain->domain, iommu_group); + ret = vfio_iommu_attach_group(domain, group); if (ret) goto out_domain; @@ -1418,8 +1488,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_for_each_entry(d, &iommu->domain_list, next) { if (d->domain->ops == domain->domain->ops && d->prot == domain->prot) { - iommu_detach_group(domain->domain, iommu_group); - if (!iommu_attach_group(d->domain, iommu_group)) { + vfio_iommu_detach_group(domain, group); + if (!vfio_iommu_attach_group(d, group)) { list_add(&group->next, &d->group_list); iommu_domain_free(domain->domain); kfree(domain); @@ -1427,7 +1497,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, return 0; } - ret = iommu_attach_group(domain->domain, iommu_group); + ret = vfio_iommu_attach_group(domain, group); if (ret) goto out_domain; } @@ -1453,7 +1523,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, return 0; out_detach: - iommu_detach_group(domain->domain, iommu_group); + vfio_iommu_detach_group(domain, group); out_domain: iommu_domain_free(domain->domain); out_free: @@ -1544,7 +1614,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (!group) continue; - iommu_detach_group(domain->domain, iommu_group); + vfio_iommu_detach_group(domain, group); list_del(&group->next); kfree(group); /* @@ -1610,7 +1680,7 @@ static void vfio_release_domain(struct vfio_domain *domain, bool external) list_for_each_entry_safe(group, group_tmp, &domain->group_list, next) { if (!external) - iommu_detach_group(domain->domain, group->iommu_group); + vfio_iommu_detach_group(domain, group); list_del(&group->next); kfree(group); } From be068fa236c3d697c77f02f3dc9cd7afeb0538a6 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 12 Apr 2019 12:13:26 +0800 Subject: [PATCH 22/45] vfio/type1: Handle different mdev isolation type This adds the support to determine the isolation type of a mediated device group by checking whether it has an iommu device. If an iommu device exists, an iommu domain will be allocated and then attached to the iommu device. Otherwise, keep the same behavior as it is. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kirti Wankhede Acked-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/vfio/vfio_iommu_type1.c | 55 +++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c956b85264ae..3be1db3501cc 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -565,7 +565,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, mutex_lock(&iommu->lock); /* Fail if notifier list is empty */ - if ((!iommu->external_domain) || (!iommu->notifier.head)) { + if (!iommu->notifier.head) { ret = -EINVAL; goto pin_done; } @@ -647,11 +647,6 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data, mutex_lock(&iommu->lock); - if (!iommu->external_domain) { - mutex_unlock(&iommu->lock); - return -EINVAL; - } - do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); for (i = 0; i < npage; i++) { struct vfio_dma *dma; @@ -1381,13 +1376,40 @@ static void vfio_iommu_detach_group(struct vfio_domain *domain, iommu_detach_group(domain->domain, group->iommu_group); } +static bool vfio_bus_is_mdev(struct bus_type *bus) +{ + struct bus_type *mdev_bus; + bool ret = false; + + mdev_bus = symbol_get(mdev_bus_type); + if (mdev_bus) { + ret = (bus == mdev_bus); + symbol_put(mdev_bus_type); + } + + return ret; +} + +static int vfio_mdev_iommu_device(struct device *dev, void *data) +{ + struct device **old = data, *new; + + new = vfio_mdev_get_iommu_device(dev); + if (!new || (*old && *old != new)) + return -EINVAL; + + *old = new; + + return 0; +} + static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group) { struct vfio_iommu *iommu = iommu_data; struct vfio_group *group; struct vfio_domain *domain, *d; - struct bus_type *bus = NULL, *mdev_bus; + struct bus_type *bus = NULL; int ret; bool resv_msi, msi_remap; phys_addr_t resv_msi_base; @@ -1422,23 +1444,30 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_free; - mdev_bus = symbol_get(mdev_bus_type); + if (vfio_bus_is_mdev(bus)) { + struct device *iommu_device = NULL; - if (mdev_bus) { - if ((bus == mdev_bus) && !iommu_present(bus)) { - symbol_put(mdev_bus_type); + group->mdev_group = true; + + /* Determine the isolation type */ + ret = iommu_group_for_each_dev(iommu_group, &iommu_device, + vfio_mdev_iommu_device); + if (ret || !iommu_device) { if (!iommu->external_domain) { INIT_LIST_HEAD(&domain->group_list); iommu->external_domain = domain; - } else + } else { kfree(domain); + } list_add(&group->next, &iommu->external_domain->group_list); mutex_unlock(&iommu->lock); + return 0; } - symbol_put(mdev_bus_type); + + bus = iommu_device->bus; } domain->domain = iommu_domain_alloc(bus); From f7b0c4ce8cb3c09cb3cbfc0c663268bf99e5fa9c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 12 Apr 2019 12:26:13 +0800 Subject: [PATCH 23/45] iommu/vt-d: Flush IOTLB for untrusted device in time By default, for performance consideration, Intel IOMMU driver won't flush IOTLB immediately after a buffer is unmapped. It schedules a thread and flushes IOTLB in a batched mode. This isn't suitable for untrusted device since it still can access the memory even if it isn't supposed to do so. Cc: Ashok Raj Cc: Jacob Pan Signed-off-by: Lu Baolu Tested-by: Xu Pengfei Tested-by: Mika Westerberg Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 599a25809440..d93c4bd7de75 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3729,6 +3729,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) unsigned long iova_pfn; struct intel_iommu *iommu; struct page *freelist; + struct pci_dev *pdev = NULL; domain = find_domain(dev); BUG_ON(!domain); @@ -3741,11 +3742,14 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) start_pfn = mm_to_dma_pfn(iova_pfn); last_pfn = start_pfn + nrpages - 1; + if (dev_is_pci(dev)) + pdev = to_pci_dev(dev); + dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn); freelist = domain_unmap(domain, start_pfn, last_pfn); - if (intel_iommu_strict) { + if (intel_iommu_strict || (pdev && pdev->untrusted)) { iommu_flush_iotlb_psi(iommu, domain, start_pfn, nrpages, !freelist, 0); /* free iova */ From 6e1ffbb7c2ab80df54ecbb01cf3d2c64aafed74a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 17 Apr 2019 19:24:40 +0100 Subject: [PATCH 24/45] PCI: Move ATS declarations outside of CONFIG_PCI At the moment, the ATS functions are only defined when CONFIG_PCI is enabled. Since we're about to use them in the Arm SMMUv3 driver, which could be built with CONFIG_PCI disabled, and they are already guarded by CONFIG_PCI_ATS which depends on CONFIG_PCI, move the definitions outside of CONFIG_PCI. Acked-by: Bjorn Helgaas Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- include/linux/pci.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/include/linux/pci.h b/include/linux/pci.h index 77448215ef5b..169c6a18d0b0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1521,21 +1521,6 @@ static inline void pcie_ecrc_get_policy(char *str) { } bool pci_ats_disabled(void); -#ifdef CONFIG_PCI_ATS -/* Address Translation Service */ -void pci_ats_init(struct pci_dev *dev); -int pci_enable_ats(struct pci_dev *dev, int ps); -void pci_disable_ats(struct pci_dev *dev); -int pci_ats_queue_depth(struct pci_dev *dev); -int pci_ats_page_aligned(struct pci_dev *dev); -#else -static inline void pci_ats_init(struct pci_dev *d) { } -static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } -static inline void pci_disable_ats(struct pci_dev *d) { } -static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } -static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; } -#endif - #ifdef CONFIG_PCIE_PTM int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); #else @@ -1730,6 +1715,21 @@ static inline const struct pci_device_id *pci_match_id(const struct pci_device_i { return NULL; } #endif /* CONFIG_PCI */ +#ifdef CONFIG_PCI_ATS +/* Address Translation Service */ +void pci_ats_init(struct pci_dev *dev); +int pci_enable_ats(struct pci_dev *dev, int ps); +void pci_disable_ats(struct pci_dev *dev); +int pci_ats_queue_depth(struct pci_dev *dev); +int pci_ats_page_aligned(struct pci_dev *dev); +#else +static inline void pci_ats_init(struct pci_dev *d) { } +static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } +static inline void pci_disable_ats(struct pci_dev *d) { } +static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } +static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; } +#endif + /* Include architecture-dependent settings and functions */ #include From b9ae16d80a357c88fcb7ff6831cfdc196f0a34dd Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 17 Apr 2019 19:24:41 +0100 Subject: [PATCH 25/45] PCI: Add a stub for pci_ats_disabled() Currently pci_ats_disabled() is only defined when CONFIG_PCI is enabled. Since we're about to use the function in the Arm SMMUv3 driver, which could be built with CONFIG_PCI disabled, add a definition of pci_ats_disabled() for !CONFIG_PCI. Acked-by: Bjorn Helgaas Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/pci.h b/include/linux/pci.h index 169c6a18d0b0..61d7cd888bad 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1713,6 +1713,7 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d, static inline const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, struct pci_dev *dev) { return NULL; } +static inline bool pci_ats_disabled(void) { return true; } #endif /* CONFIG_PCI */ #ifdef CONFIG_PCI_ATS From 5702ee24182f9b3e33476b74b5c92a4f913ad9bd Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 17 Apr 2019 19:24:42 +0100 Subject: [PATCH 26/45] ACPI/IORT: Check ATS capability in root complex nodes Root complex node in IORT has a bit telling whether it supports ATS or not. Store this bit in the IOMMU fwspec when setting up a device, so it can be accessed later by an IOMMU driver. In the future we'll probably want to store this bit at the host bridge or SMMU rather than in each endpoint. Acked-by: Lorenzo Pieralisi Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 11 +++++++++++ include/linux/iommu.h | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index e48894e002ba..4000902e57f0 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1028,6 +1028,14 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) dev_dbg(dev, "dma_pfn_offset(%#08llx)\n", offset); } +static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) +{ + struct acpi_iort_root_complex *pci_rc; + + pci_rc = (struct acpi_iort_root_complex *)node->node_data; + return pci_rc->ats_attribute & ACPI_IORT_ATS_SUPPORTED; +} + /** * iort_iommu_configure - Set-up IOMMU configuration for a device. * @@ -1063,6 +1071,9 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) info.node = node; err = pci_for_each_dma_alias(to_pci_dev(dev), iort_pci_iommu_init, &info); + + if (!err && iort_pci_rc_supports_ats(node)) + dev->iommu_fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS; } else { int i = 0; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ffbbc7e39cee..6c1b4c900191 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -392,10 +392,14 @@ struct iommu_fwspec { const struct iommu_ops *ops; struct fwnode_handle *iommu_fwnode; void *iommu_priv; + u32 flags; unsigned int num_ids; u32 ids[1]; }; +/* ATS is supported */ +#define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) + int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); From b54f4260c7df878c231e7b3a3687636d74d83c8f Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 17 Apr 2019 19:24:43 +0100 Subject: [PATCH 27/45] iommu/arm-smmu-v3: Rename arm_smmu_master_data to arm_smmu_master The arm_smmu_master_data structure already represents more than just the firmware data associated to a master, and will be used extensively to represent a device's state when implementing more SMMU features. Rename the structure to arm_smmu_master. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index d3880010c6cf..50cb037f3d8a 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -591,7 +591,7 @@ struct arm_smmu_device { }; /* SMMU private data for each master */ -struct arm_smmu_master_data { +struct arm_smmu_master { struct arm_smmu_device *smmu; struct arm_smmu_strtab_ent ste; }; @@ -1691,7 +1691,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) { int i, j; - struct arm_smmu_master_data *master = fwspec->iommu_priv; + struct arm_smmu_master *master = fwspec->iommu_priv; struct arm_smmu_device *smmu = master->smmu; for (i = 0; i < fwspec->num_ids; ++i) { @@ -1712,7 +1712,7 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) static void arm_smmu_detach_dev(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct arm_smmu_master_data *master = fwspec->iommu_priv; + struct arm_smmu_master *master = fwspec->iommu_priv; master->ste.assigned = false; arm_smmu_install_ste_for_dev(fwspec); @@ -1724,7 +1724,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct arm_smmu_master_data *master; + struct arm_smmu_master *master; struct arm_smmu_strtab_ent *ste; if (!fwspec) @@ -1860,7 +1860,7 @@ static int arm_smmu_add_device(struct device *dev) { int i, ret; struct arm_smmu_device *smmu; - struct arm_smmu_master_data *master; + struct arm_smmu_master *master; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct iommu_group *group; @@ -1913,7 +1913,7 @@ static int arm_smmu_add_device(struct device *dev) static void arm_smmu_remove_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct arm_smmu_master_data *master; + struct arm_smmu_master *master; struct arm_smmu_device *smmu; if (!fwspec || fwspec->ops != &arm_smmu_ops) From bcecaee434733d98a9e7a45834f7439b64142eb3 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 17 Apr 2019 19:24:44 +0100 Subject: [PATCH 28/45] iommu/arm-smmu-v3: Store SteamIDs in master Simplify the attach/detach code a bit by keeping a pointer to the stream IDs in the master structure. Although not completely obvious here, it does make the subsequent support for ATS, PRI and PASID a bit simpler. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 50cb037f3d8a..25ba546cae7f 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -594,6 +594,8 @@ struct arm_smmu_device { struct arm_smmu_master { struct arm_smmu_device *smmu; struct arm_smmu_strtab_ent ste; + u32 *sids; + unsigned int num_sids; }; /* SMMU private data for an IOMMU domain */ @@ -1688,19 +1690,18 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) return step; } -static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) +static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) { int i, j; - struct arm_smmu_master *master = fwspec->iommu_priv; struct arm_smmu_device *smmu = master->smmu; - for (i = 0; i < fwspec->num_ids; ++i) { - u32 sid = fwspec->ids[i]; + for (i = 0; i < master->num_sids; ++i) { + u32 sid = master->sids[i]; __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); /* Bridged PCI devices may end up with duplicated IDs */ for (j = 0; j < i; j++) - if (fwspec->ids[j] == sid) + if (master->sids[j] == sid) break; if (j < i) continue; @@ -1709,13 +1710,10 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) } } -static void arm_smmu_detach_dev(struct device *dev) +static void arm_smmu_detach_dev(struct arm_smmu_master *master) { - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct arm_smmu_master *master = fwspec->iommu_priv; - master->ste.assigned = false; - arm_smmu_install_ste_for_dev(fwspec); + arm_smmu_install_ste_for_dev(master); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -1736,7 +1734,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) /* Already attached to a different domain? */ if (ste->assigned) - arm_smmu_detach_dev(dev); + arm_smmu_detach_dev(master); mutex_lock(&smmu_domain->init_mutex); @@ -1770,7 +1768,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) ste->s2_cfg = &smmu_domain->s2_cfg; } - arm_smmu_install_ste_for_dev(fwspec); + arm_smmu_install_ste_for_dev(master); out_unlock: mutex_unlock(&smmu_domain->init_mutex); return ret; @@ -1883,12 +1881,14 @@ static int arm_smmu_add_device(struct device *dev) return -ENOMEM; master->smmu = smmu; + master->sids = fwspec->ids; + master->num_sids = fwspec->num_ids; fwspec->iommu_priv = master; } /* Check the SIDs are in range of the SMMU and our stream table */ - for (i = 0; i < fwspec->num_ids; i++) { - u32 sid = fwspec->ids[i]; + for (i = 0; i < master->num_sids; i++) { + u32 sid = master->sids[i]; if (!arm_smmu_sid_in_range(smmu, sid)) return -ERANGE; @@ -1922,7 +1922,7 @@ static void arm_smmu_remove_device(struct device *dev) master = fwspec->iommu_priv; smmu = master->smmu; if (master && master->ste.assigned) - arm_smmu_detach_dev(dev); + arm_smmu_detach_dev(master); iommu_group_remove_device(dev); iommu_device_unlink(&smmu->iommu, dev); kfree(master); From 8be39a1a04c1491a6a408c1549dfd4e191f3a287 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 17 Apr 2019 19:24:45 +0100 Subject: [PATCH 29/45] iommu/arm-smmu-v3: Add a master->domain pointer As we're going to track domain-master links more closely for ATS and CD invalidation, add pointer to the attached domain in struct arm_smmu_master. As a result, arm_smmu_strtab_ent is redundant and can be removed. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 92 ++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 47 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 25ba546cae7f..7b425483f4b6 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -505,19 +505,6 @@ struct arm_smmu_s2_cfg { u64 vtcr; }; -struct arm_smmu_strtab_ent { - /* - * An STE is "assigned" if the master emitting the corresponding SID - * is attached to a domain. The behaviour of an unassigned STE is - * determined by the disable_bypass parameter, whereas an assigned - * STE behaves according to s1_cfg/s2_cfg, which themselves are - * configured according to the domain type. - */ - bool assigned; - struct arm_smmu_s1_cfg *s1_cfg; - struct arm_smmu_s2_cfg *s2_cfg; -}; - struct arm_smmu_strtab_cfg { __le64 *strtab; dma_addr_t strtab_dma; @@ -593,7 +580,7 @@ struct arm_smmu_device { /* SMMU private data for each master */ struct arm_smmu_master { struct arm_smmu_device *smmu; - struct arm_smmu_strtab_ent ste; + struct arm_smmu_domain *domain; u32 *sids; unsigned int num_sids; }; @@ -1087,8 +1074,8 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) arm_smmu_cmdq_issue_sync(smmu); } -static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, - __le64 *dst, struct arm_smmu_strtab_ent *ste) +static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, + __le64 *dst) { /* * This is hideously complicated, but we only really care about @@ -1108,6 +1095,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, */ u64 val = le64_to_cpu(dst[0]); bool ste_live = false; + struct arm_smmu_device *smmu = NULL; + struct arm_smmu_s1_cfg *s1_cfg = NULL; + struct arm_smmu_s2_cfg *s2_cfg = NULL; + struct arm_smmu_domain *smmu_domain = NULL; struct arm_smmu_cmdq_ent prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, .prefetch = { @@ -1115,6 +1106,25 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, }, }; + if (master) { + smmu_domain = master->domain; + smmu = master->smmu; + } + + if (smmu_domain) { + switch (smmu_domain->stage) { + case ARM_SMMU_DOMAIN_S1: + s1_cfg = &smmu_domain->s1_cfg; + break; + case ARM_SMMU_DOMAIN_S2: + case ARM_SMMU_DOMAIN_NESTED: + s2_cfg = &smmu_domain->s2_cfg; + break; + default: + break; + } + } + if (val & STRTAB_STE_0_V) { switch (FIELD_GET(STRTAB_STE_0_CFG, val)) { case STRTAB_STE_0_CFG_BYPASS: @@ -1135,8 +1145,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, val = STRTAB_STE_0_V; /* Bypass/fault */ - if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) { - if (!ste->assigned && disable_bypass) + if (!smmu_domain || !(s1_cfg || s2_cfg)) { + if (!smmu_domain && disable_bypass) val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); else val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); @@ -1154,7 +1164,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, return; } - if (ste->s1_cfg) { + if (s1_cfg) { BUG_ON(ste_live); dst[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | @@ -1169,22 +1179,22 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); - val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) | + val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS); } - if (ste->s2_cfg) { + if (s2_cfg) { BUG_ON(ste_live); dst[2] = cpu_to_le64( - FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) | - FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) | + FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | + FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | #ifdef __BIG_ENDIAN STRTAB_STE_2_S2ENDI | #endif STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R); - dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); + dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); } @@ -1201,10 +1211,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent) { unsigned int i; - struct arm_smmu_strtab_ent ste = { .assigned = false }; for (i = 0; i < nent; ++i) { - arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste); + arm_smmu_write_strtab_ent(NULL, -1, strtab); strtab += STRTAB_STE_DWORDS; } } @@ -1706,13 +1715,16 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) if (j < i) continue; - arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); + arm_smmu_write_strtab_ent(master, sid, step); } } static void arm_smmu_detach_dev(struct arm_smmu_master *master) { - master->ste.assigned = false; + if (!master->domain) + return; + + master->domain = NULL; arm_smmu_install_ste_for_dev(master); } @@ -1723,18 +1735,14 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_master *master; - struct arm_smmu_strtab_ent *ste; if (!fwspec) return -ENOENT; master = fwspec->iommu_priv; smmu = master->smmu; - ste = &master->ste; - /* Already attached to a different domain? */ - if (ste->assigned) - arm_smmu_detach_dev(master); + arm_smmu_detach_dev(master); mutex_lock(&smmu_domain->init_mutex); @@ -1754,19 +1762,10 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out_unlock; } - ste->assigned = true; + master->domain = smmu_domain; - if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) { - ste->s1_cfg = NULL; - ste->s2_cfg = NULL; - } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - ste->s1_cfg = &smmu_domain->s1_cfg; - ste->s2_cfg = NULL; - arm_smmu_write_ctx_desc(smmu, ste->s1_cfg); - } else { - ste->s1_cfg = NULL; - ste->s2_cfg = &smmu_domain->s2_cfg; - } + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); arm_smmu_install_ste_for_dev(master); out_unlock: @@ -1921,8 +1920,7 @@ static void arm_smmu_remove_device(struct device *dev) master = fwspec->iommu_priv; smmu = master->smmu; - if (master && master->ste.assigned) - arm_smmu_detach_dev(master); + arm_smmu_detach_dev(master); iommu_group_remove_device(dev); iommu_device_unlink(&smmu->iommu, dev); kfree(master); From 2a7e62f51696ad476b7fb8157d73307e20257df7 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 17 Apr 2019 19:24:46 +0100 Subject: [PATCH 30/45] iommu/arm-smmu-v3: Link domains and devices When removing a mapping from a domain, we need to send an invalidation to all devices that might have stored it in their Address Translation Cache (ATC). In addition when updating the context descriptor of a live domain, we'll need to send invalidations for all devices attached to it. Maintain a list of devices in each domain, protected by a spinlock. It is updated every time we attach or detach devices to and from domains. It needs to be a spinlock because we'll invalidate ATC entries from within hardirq-safe contexts, but it may be possible to relax the read side with RCU later. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 7b425483f4b6..3e7198ee9530 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -581,6 +581,7 @@ struct arm_smmu_device { struct arm_smmu_master { struct arm_smmu_device *smmu; struct arm_smmu_domain *domain; + struct list_head domain_head; u32 *sids; unsigned int num_sids; }; @@ -607,6 +608,9 @@ struct arm_smmu_domain { }; struct iommu_domain domain; + + struct list_head devices; + spinlock_t devices_lock; }; struct arm_smmu_option_prop { @@ -1504,6 +1508,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) } mutex_init(&smmu_domain->init_mutex); + INIT_LIST_HEAD(&smmu_domain->devices); + spin_lock_init(&smmu_domain->devices_lock); + return &smmu_domain->domain; } @@ -1721,9 +1728,16 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) static void arm_smmu_detach_dev(struct arm_smmu_master *master) { - if (!master->domain) + unsigned long flags; + struct arm_smmu_domain *smmu_domain = master->domain; + + if (!smmu_domain) return; + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_del(&master->domain_head); + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + master->domain = NULL; arm_smmu_install_ste_for_dev(master); } @@ -1731,6 +1745,7 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret = 0; + unsigned long flags; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -1764,6 +1779,10 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) master->domain = smmu_domain; + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_add(&master->domain_head, &smmu_domain->devices); + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); From 9ce27afc0830fca07daa3baeb4da44423b0673ef Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 17 Apr 2019 19:24:47 +0100 Subject: [PATCH 31/45] iommu/arm-smmu-v3: Add support for PCI ATS PCIe devices can implement their own TLB, named Address Translation Cache (ATC). Enable Address Translation Service (ATS) for devices that support it and send them invalidation requests whenever we invalidate the IOTLBs. ATC invalidation is allowed to take up to 90 seconds, according to the PCIe spec, so it is possible to get a SMMU command queue timeout during normal operations. However we expect implementations to complete invalidation in reasonable time. We only enable ATS for "trusted" devices, and currently rely on the pci_dev->untrusted bit. For ATS we have to trust that: (a) The device doesn't issue "translated" memory requests for addresses that weren't returned by the SMMU in a Translation Completion. In particular, if we give control of a device or device partition to a VM or userspace, software cannot program the device to access arbitrary "translated" addresses. (b) The device follows permissions granted by the SMMU in a Translation Completion. If the device requested read+write permission and only got read, then it doesn't write. (c) The device doesn't send Translated transactions for an address that was invalidated by an ATC invalidation. Note that the PCIe specification explicitly requires all of these, so we can assume that implementations will cleanly shield ATCs from software. All ATS translated requests still go through the SMMU, to walk the stream table and check that the device is actually allowed to send translated requests. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 201 ++++++++++++++++++++++++++++++++++-- 1 file changed, 195 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 3e7198ee9530..3bde137a3755 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,7 @@ #define IDR5_VAX_52_BIT 1 #define ARM_SMMU_CR0 0x20 +#define CR0_ATSCHK (1 << 4) #define CR0_CMDQEN (1 << 3) #define CR0_EVTQEN (1 << 2) #define CR0_PRIQEN (1 << 1) @@ -294,6 +296,7 @@ #define CMDQ_ERR_CERROR_NONE_IDX 0 #define CMDQ_ERR_CERROR_ILL_IDX 1 #define CMDQ_ERR_CERROR_ABT_IDX 2 +#define CMDQ_ERR_CERROR_ATC_INV_IDX 3 #define CMDQ_0_OP GENMASK_ULL(7, 0) #define CMDQ_0_SSV (1UL << 11) @@ -312,6 +315,12 @@ #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12) #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12) +#define CMDQ_ATC_0_SSID GENMASK_ULL(31, 12) +#define CMDQ_ATC_0_SID GENMASK_ULL(63, 32) +#define CMDQ_ATC_0_GLOBAL (1UL << 9) +#define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0) +#define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12) + #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12) #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32) #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0) @@ -433,6 +442,16 @@ struct arm_smmu_cmdq_ent { u64 addr; } tlbi; + #define CMDQ_OP_ATC_INV 0x40 + #define ATC_INV_SIZE_ALL 52 + struct { + u32 sid; + u32 ssid; + u64 addr; + u8 size; + bool global; + } atc; + #define CMDQ_OP_PRI_RESP 0x41 struct { u32 sid; @@ -580,10 +599,12 @@ struct arm_smmu_device { /* SMMU private data for each master */ struct arm_smmu_master { struct arm_smmu_device *smmu; + struct device *dev; struct arm_smmu_domain *domain; struct list_head domain_head; u32 *sids; unsigned int num_sids; + bool ats_enabled :1; }; /* SMMU private data for an IOMMU domain */ @@ -813,6 +834,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) case CMDQ_OP_TLBI_S12_VMALL: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); break; + case CMDQ_OP_ATC_INV: + cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); + cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global); + cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid); + cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid); + cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size); + cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK; + break; case CMDQ_OP_PRI_RESP: cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid); @@ -857,6 +886,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) [CMDQ_ERR_CERROR_NONE_IDX] = "No error", [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command", [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", + [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", }; int i; @@ -876,6 +906,14 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) dev_err(smmu->dev, "retrying command fetch\n"); case CMDQ_ERR_CERROR_NONE_IDX: return; + case CMDQ_ERR_CERROR_ATC_INV_IDX: + /* + * ATC Invalidation Completion timeout. CONS is still pointing + * at the CMD_SYNC. Attempt to complete other pending commands + * by repeating the CMD_SYNC, though we might well end up back + * here since the ATC invalidation may still be pending. + */ + return; case CMDQ_ERR_CERROR_ILL_IDX: /* Fallthrough */ default: @@ -992,7 +1030,7 @@ static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) return ret; } -static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) { int ret; bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) && @@ -1002,6 +1040,7 @@ static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) : __arm_smmu_cmdq_issue_sync(smmu); if (ret) dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); + return ret; } /* Context descriptor manipulation functions */ @@ -1174,9 +1213,6 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | -#ifdef CONFIG_PCI_ATS - FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) | -#endif FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1)); if (smmu->features & ARM_SMMU_FEAT_STALLS && @@ -1203,6 +1239,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); } + if (master->ats_enabled) + dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, + STRTAB_STE_1_EATS_TRANS)); + arm_smmu_sync_ste_for_sid(smmu, sid); dst[0] = cpu_to_le64(val); arm_smmu_sync_ste_for_sid(smmu, sid); @@ -1405,6 +1445,96 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) return IRQ_WAKE_THREAD; } +static void +arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, + struct arm_smmu_cmdq_ent *cmd) +{ + size_t log2_span; + size_t span_mask; + /* ATC invalidates are always on 4096-bytes pages */ + size_t inval_grain_shift = 12; + unsigned long page_start, page_end; + + *cmd = (struct arm_smmu_cmdq_ent) { + .opcode = CMDQ_OP_ATC_INV, + .substream_valid = !!ssid, + .atc.ssid = ssid, + }; + + if (!size) { + cmd->atc.size = ATC_INV_SIZE_ALL; + return; + } + + page_start = iova >> inval_grain_shift; + page_end = (iova + size - 1) >> inval_grain_shift; + + /* + * In an ATS Invalidate Request, the address must be aligned on the + * range size, which must be a power of two number of page sizes. We + * thus have to choose between grossly over-invalidating the region, or + * splitting the invalidation into multiple commands. For simplicity + * we'll go with the first solution, but should refine it in the future + * if multiple commands are shown to be more efficient. + * + * Find the smallest power of two that covers the range. The most + * significant differing bit between the start and end addresses, + * fls(start ^ end), indicates the required span. For example: + * + * We want to invalidate pages [8; 11]. This is already the ideal range: + * x = 0b1000 ^ 0b1011 = 0b11 + * span = 1 << fls(x) = 4 + * + * To invalidate pages [7; 10], we need to invalidate [0; 15]: + * x = 0b0111 ^ 0b1010 = 0b1101 + * span = 1 << fls(x) = 16 + */ + log2_span = fls_long(page_start ^ page_end); + span_mask = (1ULL << log2_span) - 1; + + page_start &= ~span_mask; + + cmd->atc.addr = page_start << inval_grain_shift; + cmd->atc.size = log2_span; +} + +static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, + struct arm_smmu_cmdq_ent *cmd) +{ + int i; + + if (!master->ats_enabled) + return 0; + + for (i = 0; i < master->num_sids; i++) { + cmd->atc.sid = master->sids[i]; + arm_smmu_cmdq_issue_cmd(master->smmu, cmd); + } + + return arm_smmu_cmdq_issue_sync(master->smmu); +} + +static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, + int ssid, unsigned long iova, size_t size) +{ + int ret = 0; + unsigned long flags; + struct arm_smmu_cmdq_ent cmd; + struct arm_smmu_master *master; + + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) + return 0; + + arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) + ret |= arm_smmu_atc_inv_master(master, &cmd); + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + return ret ? -ETIMEDOUT : 0; +} + /* IO_PGTABLE API */ static void arm_smmu_tlb_sync(void *cookie) { @@ -1726,6 +1856,42 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) } } +static int arm_smmu_enable_ats(struct arm_smmu_master *master) +{ + int ret; + size_t stu; + struct pci_dev *pdev; + struct arm_smmu_device *smmu = master->smmu; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); + + if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) || + !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled()) + return -ENXIO; + + pdev = to_pci_dev(master->dev); + if (pdev->untrusted) + return -EPERM; + + /* Smallest Translation Unit: log2 of the smallest supported granule */ + stu = __ffs(smmu->pgsize_bitmap); + + ret = pci_enable_ats(pdev, stu); + if (ret) + return ret; + + master->ats_enabled = true; + return 0; +} + +static void arm_smmu_disable_ats(struct arm_smmu_master *master) +{ + if (!master->ats_enabled || !dev_is_pci(master->dev)) + return; + + pci_disable_ats(to_pci_dev(master->dev)); + master->ats_enabled = false; +} + static void arm_smmu_detach_dev(struct arm_smmu_master *master) { unsigned long flags; @@ -1740,6 +1906,9 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) master->domain = NULL; arm_smmu_install_ste_for_dev(master); + + /* Disabling ATS invalidates all ATC entries */ + arm_smmu_disable_ats(master); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -1783,6 +1952,9 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) list_add(&master->domain_head, &smmu_domain->devices); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) + arm_smmu_enable_ats(master); + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); @@ -1806,12 +1978,18 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + int ret; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; if (!ops) return 0; - return ops->unmap(ops, iova, size); + ret = ops->unmap(ops, iova, size); + if (ret && arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size)) + return 0; + + return ret; } static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) @@ -1898,6 +2076,7 @@ static int arm_smmu_add_device(struct device *dev) if (!master) return -ENOMEM; + master->dev = dev; master->smmu = smmu; master->sids = fwspec->ids; master->num_sids = fwspec->num_ids; @@ -2564,6 +2743,16 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) } } + if (smmu->features & ARM_SMMU_FEAT_ATS) { + enables |= CR0_ATSCHK; + ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, + ARM_SMMU_CR0ACK); + if (ret) { + dev_err(smmu->dev, "failed to enable ATS check\n"); + return ret; + } + } + ret = arm_smmu_setup_irqs(smmu); if (ret) { dev_err(smmu->dev, "failed to setup irqs\n"); From b2fc9b4b7ff4e6d237b0118e98573c9153363ecd Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 17 Apr 2019 19:24:48 +0100 Subject: [PATCH 32/45] iommu/arm-smmu-v3: Disable tagged pointers The ARM architecture has a "Top Byte Ignore" (TBI) option that makes the MMU mask out bits [63:56] of an address, allowing a userspace application to store data in its pointers. This option is incompatible with PCI ATS. If TBI is enabled in the SMMU and userspace triggers DMA transactions on tagged pointers, the endpoint might create ATC entries for addresses that include a tag. Software would then have to send ATC invalidation packets for each 255 possible alias of an address, or just wipe the whole address space. This is not a viable option, so disable TBI. The impact of this change is unclear, since there are very few users of tagged pointers, much less SVA. But the requirement introduced by this patch doesn't seem excessive: a userspace application using both tagged pointers and SVA should now sanitize addresses (clear the tag) before using them for device DMA. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 3bde137a3755..99b83fda11e4 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1057,7 +1057,6 @@ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr) val |= ARM_SMMU_TCR2CD(tcr, EPD0); val |= ARM_SMMU_TCR2CD(tcr, EPD1); val |= ARM_SMMU_TCR2CD(tcr, IPS); - val |= ARM_SMMU_TCR2CD(tcr, TBI0); return val; } From 3f54c447df34ff9efac7809a4a80fd3208efc619 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 23 Apr 2019 11:59:36 +0100 Subject: [PATCH 33/45] iommu/arm-smmu-v3: Don't disable SMMU in kdump kernel Disabling the SMMU when probing from within a kdump kernel so that all incoming transactions are terminated can prevent the core of the crashed kernel from being transferred off the machine if all I/O devices are behind the SMMU. Instead, continue to probe the SMMU after it is disabled so that we can reinitialise it entirely and re-attach the DMA masters as they are reset. Since the kdump kernel may not have drivers for all of the active DMA masters, we suppress fault reporting to avoid spamming the console and swamping the IRQ threads. Reported-by: "Leizhen (ThunderTown)" Tested-by: "Leizhen (ThunderTown)" Tested-by: Bhupesh Sharma Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 99b83fda11e4..4d5a694f02c2 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2649,13 +2649,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Clear CR0 and sync (disables SMMU and queue processing) */ reg = readl_relaxed(smmu->base + ARM_SMMU_CR0); if (reg & CR0_SMMUEN) { - if (is_kdump_kernel()) { - arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); - arm_smmu_device_disable(smmu); - return -EBUSY; - } - dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n"); + WARN_ON(is_kdump_kernel() && !disable_bypass); + arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); } ret = arm_smmu_device_disable(smmu); @@ -2758,6 +2754,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) return ret; } + if (is_kdump_kernel()) + enables &= ~(CR0_EVTQEN | CR0_PRIQEN); /* Enable the SMMU interface, or ensure bypass */ if (!bypass || disable_bypass) { From bc580b56cb7888d1f09fff8a50270228fb834ae8 Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Mon, 22 Apr 2019 12:40:36 +0530 Subject: [PATCH 34/45] iommu/arm-smmu: Log CBFRSYNRA register on context fault Bits[15:0] in CBFRSYNRA register contain information about StreamID of the incoming transaction that generated the fault. Dump CBFRSYNRA register to get this info. This is specially useful in a distributed SMMU architecture where multiple masters are connected to the SMMU. SID information helps to quickly identify the faulting master device. Reviewed-by: Bjorn Andersson Reviewed-by: Robin Murphy Acked-by: Ard Biesheuvel Signed-off-by: Vivek Gautam Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-regs.h | 2 ++ drivers/iommu/arm-smmu.c | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h index a1226e4ab5f8..e9132a926761 100644 --- a/drivers/iommu/arm-smmu-regs.h +++ b/drivers/iommu/arm-smmu-regs.h @@ -147,6 +147,8 @@ enum arm_smmu_s2cr_privcfg { #define CBAR_IRPTNDX_SHIFT 24 #define CBAR_IRPTNDX_MASK 0xff +#define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2)) + #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) #define CBA2R_RW64_32BIT (0 << 0) #define CBA2R_RW64_64BIT (1 << 0) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 930c07635956..5e54cc0a28b3 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -570,12 +570,13 @@ static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = { static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { - u32 fsr, fsynr; + u32 fsr, fsynr, cbfrsynra; unsigned long iova; struct iommu_domain *domain = dev; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *gr1_base = ARM_SMMU_GR1(smmu); void __iomem *cb_base; cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); @@ -586,10 +587,11 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR); + cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx)); dev_err_ratelimited(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n", - fsr, iova, fsynr, cfg->cbndx); + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", + fsr, iova, fsynr, cbfrsynra, cfg->cbndx); writel(fsr, cb_base + ARM_SMMU_CB_FSR); return IRQ_HANDLED; From c805b428f206a9097ef8c905d8b7e40bc411100e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 26 Apr 2019 15:17:20 +0200 Subject: [PATCH 35/45] iommu/amd: Remove amd_iommu_pd_list This variable hold a global list of allocated protection domains in the AMD IOMMU driver. By now this list is never traversed anymore, so the list and the lock protecting it can be removed. Cc: Tom Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 33 --------------------------------- drivers/iommu/amd_iommu_init.c | 8 -------- drivers/iommu/amd_iommu_types.h | 6 ------ 3 files changed, 47 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index f467cc4b498e..3e0696cf965c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1723,31 +1723,6 @@ static void dma_ops_free_iova(struct dma_ops_domain *dma_dom, * ****************************************************************************/ -/* - * This function adds a protection domain to the global protection domain list - */ -static void add_domain_to_list(struct protection_domain *domain) -{ - unsigned long flags; - - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - list_add(&domain->list, &amd_iommu_pd_list); - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); -} - -/* - * This function removes a protection domain to the global - * protection domain list - */ -static void del_domain_from_list(struct protection_domain *domain) -{ - unsigned long flags; - - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - list_del(&domain->list); - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); -} - static u16 domain_id_alloc(void) { int id; @@ -1838,8 +1813,6 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; - del_domain_from_list(&dom->domain); - put_iova_domain(&dom->iovad); free_pagetable(&dom->domain); @@ -1880,8 +1853,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) /* Initialize reserved ranges */ copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); - add_domain_to_list(&dma_dom->domain); - return dma_dom; free_dma_dom: @@ -2834,8 +2805,6 @@ static void protection_domain_free(struct protection_domain *domain) if (!domain) return; - del_domain_from_list(domain); - if (domain->id) domain_id_free(domain->id); @@ -2865,8 +2834,6 @@ static struct protection_domain *protection_domain_alloc(void) if (protection_domain_init(domain)) goto out_err; - add_domain_to_list(domain); - return domain; out_err: diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 1b1378619fc9..4497c6bb9e70 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -188,12 +188,6 @@ static bool amd_iommu_pc_present __read_mostly; bool amd_iommu_force_isolation __read_mostly; -/* - * List of protection domains - used during resume - */ -LIST_HEAD(amd_iommu_pd_list); -spinlock_t amd_iommu_pd_lock; - /* * Pointer to the device table which is shared by all AMD IOMMUs * it is indexed by the PCI device id or the HT unit id and contains @@ -2526,8 +2520,6 @@ static int __init early_amd_iommu_init(void) */ __set_bit(0, amd_iommu_pd_alloc_bitmap); - spin_lock_init(&amd_iommu_pd_lock); - /* * now the data structures are allocated and basically initialized * start the real acpi table scan diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 87965e4d9647..85c488b8daea 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -674,12 +674,6 @@ extern struct list_head amd_iommu_list; */ extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; -/* - * Declarations for the global list of all protection domains - */ -extern spinlock_t amd_iommu_pd_lock; -extern struct list_head amd_iommu_pd_list; - /* * Structure defining one entry in the device table */ From 1eb8e4e2b35b4d83cc40cbf379493490741eb5b8 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 17 Apr 2019 10:41:19 +0800 Subject: [PATCH 36/45] iommu/mediatek: Fix leaked of_node references The call to of_parse_phandle returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. 581 static int mtk_iommu_probe(struct platform_device *pdev) 582 { ... 626 for (i = 0; i < larb_nr; i++) { 627 struct device_node *larbnode; ... 631 larbnode = of_parse_phandle(...); 632 if (!larbnode) 633 return -EINVAL; 634 635 if (!of_device_is_available(larbnode)) 636 continue; ---> leaked here 637 ... 643 if (!plarbdev) 644 return -EPROBE_DEFER; ---> leaked here ... 647 component_match_add_release(dev, &match, release_of, 648 compare_of, larbnode); ---> release_of will call of_node_put 649 } ... 650 Detected by coccinelle with the following warnings: ./drivers/iommu/mtk_iommu.c:644:3-9: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 631, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Joerg Roedel Cc: Matthias Brugger Cc: iommu@lists.linux-foundation.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mediatek@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Matthias Brugger Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index de3e02277b70..b66d11b0286e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -632,16 +632,20 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (!larbnode) return -EINVAL; - if (!of_device_is_available(larbnode)) + if (!of_device_is_available(larbnode)) { + of_node_put(larbnode); continue; + } ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id); if (ret)/* The id is consecutive if there is no this property */ id = i; plarbdev = of_find_device_by_node(larbnode); - if (!plarbdev) + if (!plarbdev) { + of_node_put(larbnode); return -EPROBE_DEFER; + } data->smi_imu.larb_imu[id].dev = &plarbdev->dev; component_match_add_release(dev, &match, release_of, From 553d66cb1e8667aadb57e3804775c5ce1724a49b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 18 Apr 2019 13:46:24 -0500 Subject: [PATCH 37/45] iommu/vt-d: Use struct_size() helper Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes, in particular in the context in which this code is being used. So, replace code of the following form: size = sizeof(*info) + level * sizeof(info->path[0]); with: size = struct_size(info, path, level); Signed-off-by: Gustavo A. R. Silva Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9c49300e9fb7..6d969a172fbb 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -145,7 +145,7 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) for (tmp = dev; tmp; tmp = tmp->bus->self) level++; - size = sizeof(*info) + level * sizeof(info->path[0]); + size = struct_size(info, path, level); if (size <= sizeof(dmar_pci_notify_info_buf)) { info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf; } else { From a7755c3cfa5df755e39447b08c28203e011fb98c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 19 Apr 2019 14:43:29 +0800 Subject: [PATCH 38/45] iommu/vt-d: Don't request page request irq under dmar_global_lock Requesting page reqest irq under dmar_global_lock could cause potential lock race condition (caught by lockdep). [ 4.100055] ====================================================== [ 4.100063] WARNING: possible circular locking dependency detected [ 4.100072] 5.1.0-rc4+ #2169 Not tainted [ 4.100078] ------------------------------------------------------ [ 4.100086] swapper/0/1 is trying to acquire lock: [ 4.100094] 000000007dcbe3c3 (dmar_lock){+.+.}, at: dmar_alloc_hwirq+0x35/0x140 [ 4.100112] but task is already holding lock: [ 4.100120] 0000000060bbe946 (dmar_global_lock){++++}, at: intel_iommu_init+0x191/0x1438 [ 4.100136] which lock already depends on the new lock. [ 4.100146] the existing dependency chain (in reverse order) is: [ 4.100155] -> #2 (dmar_global_lock){++++}: [ 4.100169] down_read+0x44/0xa0 [ 4.100178] intel_irq_remapping_alloc+0xb2/0x7b0 [ 4.100186] mp_irqdomain_alloc+0x9e/0x2e0 [ 4.100195] __irq_domain_alloc_irqs+0x131/0x330 [ 4.100203] alloc_isa_irq_from_domain.isra.4+0x9a/0xd0 [ 4.100212] mp_map_pin_to_irq+0x244/0x310 [ 4.100221] setup_IO_APIC+0x757/0x7ed [ 4.100229] x86_late_time_init+0x17/0x1c [ 4.100238] start_kernel+0x425/0x4e3 [ 4.100247] secondary_startup_64+0xa4/0xb0 [ 4.100254] -> #1 (irq_domain_mutex){+.+.}: [ 4.100265] __mutex_lock+0x7f/0x9d0 [ 4.100273] __irq_domain_add+0x195/0x2b0 [ 4.100280] irq_domain_create_hierarchy+0x3d/0x40 [ 4.100289] msi_create_irq_domain+0x32/0x110 [ 4.100297] dmar_alloc_hwirq+0x111/0x140 [ 4.100305] dmar_set_interrupt.part.14+0x1a/0x70 [ 4.100314] enable_drhd_fault_handling+0x2c/0x6c [ 4.100323] apic_bsp_setup+0x75/0x7a [ 4.100330] x86_late_time_init+0x17/0x1c [ 4.100338] start_kernel+0x425/0x4e3 [ 4.100346] secondary_startup_64+0xa4/0xb0 [ 4.100352] -> #0 (dmar_lock){+.+.}: [ 4.100364] lock_acquire+0xb4/0x1c0 [ 4.100372] __mutex_lock+0x7f/0x9d0 [ 4.100379] dmar_alloc_hwirq+0x35/0x140 [ 4.100389] intel_svm_enable_prq+0x61/0x180 [ 4.100397] intel_iommu_init+0x1128/0x1438 [ 4.100406] pci_iommu_init+0x16/0x3f [ 4.100414] do_one_initcall+0x5d/0x2be [ 4.100422] kernel_init_freeable+0x1f0/0x27c [ 4.100431] kernel_init+0xa/0x110 [ 4.100438] ret_from_fork+0x3a/0x50 [ 4.100444] other info that might help us debug this: [ 4.100454] Chain exists of: dmar_lock --> irq_domain_mutex --> dmar_global_lock [ 4.100469] Possible unsafe locking scenario: [ 4.100476] CPU0 CPU1 [ 4.100483] ---- ---- [ 4.100488] lock(dmar_global_lock); [ 4.100495] lock(irq_domain_mutex); [ 4.100503] lock(dmar_global_lock); [ 4.100512] lock(dmar_lock); [ 4.100518] *** DEADLOCK *** Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Reported-by: Dave Jiang Fixes: a222a7f0bb6c9 ("iommu/vt-d: Implement page request handling") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d93c4bd7de75..52311a6d9d3f 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3498,7 +3498,13 @@ static int __init init_dmars(void) #ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { + /* + * Call dmar_alloc_hwirq() with dmar_global_lock held, + * could cause possible lock race condition. + */ + up_write(&dmar_global_lock); ret = intel_svm_enable_prq(iommu); + down_write(&dmar_global_lock); if (ret) goto free_iommu; } From 095303e0eb566e408435ef1cfa25aef2af2e403e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 29 Apr 2019 09:16:02 +0800 Subject: [PATCH 39/45] iommu/vt-d: Cleanup: no spaces at the start of a line Replace the whitespaces at the start of a line with tabs. No functional changes. Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 47 +++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 52311a6d9d3f..de47bdb57e48 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2341,32 +2341,33 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - struct scatterlist *sg, unsigned long phys_pfn, - unsigned long nr_pages, int prot) + struct scatterlist *sg, unsigned long phys_pfn, + unsigned long nr_pages, int prot) { - int ret; - struct intel_iommu *iommu; + int ret; + struct intel_iommu *iommu; - /* Do the real mapping first */ - ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot); - if (ret) - return ret; + /* Do the real mapping first */ + ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot); + if (ret) + return ret; - /* Notify about the new mapping */ - if (domain_type_is_vm(domain)) { - /* VM typed domains can have more than one IOMMUs */ - int iommu_id; - for_each_domain_iommu(iommu_id, domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); - } - } else { - /* General domains only have one IOMMU */ - iommu = domain_get_iommu(domain); - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); - } + /* Notify about the new mapping */ + if (domain_type_is_vm(domain)) { + /* VM typed domains can have more than one IOMMUs */ + int iommu_id; - return 0; + for_each_domain_iommu(iommu_id, domain) { + iommu = g_iommus[iommu_id]; + __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); + } + } else { + /* General domains only have one IOMMU */ + iommu = domain_get_iommu(domain); + __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); + } + + return 0; } static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, @@ -4083,7 +4084,7 @@ static int init_iommu_hw(void) iommu_disable_protect_mem_regions(iommu); continue; } - + iommu_flush_write_buffer(iommu); iommu_set_root_entry(iommu); From 1a1079011da32db87e19fcb39e70d082f89da921 Mon Sep 17 00:00:00 2001 From: Tom Murphy Date: Mon, 29 Apr 2019 00:47:02 +0100 Subject: [PATCH 40/45] iommu/amd: Flush not present cache in iommu_map_page check if there is a not-present cache present and flush it if there is. Signed-off-by: Tom Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 3e0696cf965c..bc98de5fa867 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1307,6 +1307,16 @@ static void domain_flush_complete(struct protection_domain *domain) } } +/* Flush the not present cache if it exists */ +static void domain_flush_np_cache(struct protection_domain *domain, + dma_addr_t iova, size_t size) +{ + if (unlikely(amd_iommu_np_cache)) { + domain_flush_pages(domain, iova, size); + domain_flush_complete(domain); + } +} + /* * This function flushes the DTEs for all devices in domain @@ -2389,10 +2399,7 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - if (unlikely(amd_iommu_np_cache)) { - domain_flush_pages(&dma_dom->domain, address, size); - domain_flush_complete(&dma_dom->domain); - } + domain_flush_np_cache(&dma_dom->domain, address, size); out: return address; @@ -2548,6 +2555,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, s->dma_length = s->length; } + domain_flush_np_cache(domain, s->dma_address, s->dma_length); + return nelems; out_unmap: @@ -3005,6 +3014,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL); mutex_unlock(&domain->api_lock); + domain_flush_np_cache(domain, iova, page_size); + return ret; } From cf1ec4539a50bdfe688caad4615ca47646884316 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 2 May 2019 09:34:25 +0800 Subject: [PATCH 41/45] iommu/vt-d: Set intel_iommu_gfx_mapped correctly The intel_iommu_gfx_mapped flag is exported by the Intel IOMMU driver to indicate whether an IOMMU is used for the graphic device. In a virtualized IOMMU environment (e.g. QEMU), an include-all IOMMU is used for graphic device. This flag is found to be clear even the IOMMU is used. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Reported-by: Zhenyu Wang Fixes: c0771df8d5297 ("intel-iommu: Export a flag indicating that the IOMMU is used for iGFX.") Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index de47bdb57e48..a814c95f73ce 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4053,9 +4053,7 @@ static void __init init_no_remapping_devices(void) /* This IOMMU has *only* gfx devices. Either bypass it or set the gfx_mapped flag, as appropriate */ - if (dmar_map_gfx) { - intel_iommu_gfx_mapped = 1; - } else { + if (!dmar_map_gfx) { drhd->ignored = 1; for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) @@ -4894,6 +4892,9 @@ int __init intel_iommu_init(void) goto out_free_reserved_range; } + if (dmar_map_gfx) + intel_iommu_gfx_mapped = 1; + init_no_remapping_devices(); ret = init_dmars(); From 5daab58043ee2bca861068e2595564828f3bc663 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 2 May 2019 09:34:26 +0800 Subject: [PATCH 42/45] iommu/vt-d: Make kernel parameter igfx_off work with vIOMMU The kernel parameter igfx_off is used by users to disable DMA remapping for the Intel integrated graphic device. It was designed for bare metal cases where a dedicated IOMMU is used for graphic. This doesn't apply to virtual IOMMU case where an include-all IOMMU is used. This makes the kernel parameter work with virtual IOMMU as well. Cc: Ashok Raj Cc: Jacob Pan Suggested-by: Kevin Tian Fixes: c0771df8d5297 ("intel-iommu: Export a flag indicating that the IOMMU is used for iGFX.") Signed-off-by: Lu Baolu Tested-by: Zhenyu Wang Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a814c95f73ce..a320bda2c305 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3415,9 +3415,12 @@ static int __init init_dmars(void) iommu_identity_mapping |= IDENTMAP_ALL; #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA - iommu_identity_mapping |= IDENTMAP_GFX; + dmar_map_gfx = 0; #endif + if (!dmar_map_gfx) + iommu_identity_mapping |= IDENTMAP_GFX; + check_tylersburg_isoch(); if (iommu_identity_mapping) { From dca4d60f5f8c9263f8f66724fa4750abd32d8db0 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 30 Apr 2019 09:30:04 +0200 Subject: [PATCH 43/45] iommu/vt-d: Fix leak in intel_pasid_alloc_table on error path If alloc_pages_node() fails, pasid_table is leaked. Free it. Fixes: cc580e41260db ("iommu/vt-d: Per PCI device pasid table interfaces") Signed-off-by: Eric Auger Signed-off-by: Joerg Roedel --- drivers/iommu/intel-pasid.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 03b12d2ee213..2fefeafda437 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -154,8 +154,10 @@ int intel_pasid_alloc_table(struct device *dev) order = size ? get_order(size) : 0; pages = alloc_pages_node(info->iommu->node, GFP_KERNEL | __GFP_ZERO, order); - if (!pages) + if (!pages) { + kfree(pasid_table); return -ENOMEM; + } pasid_table->table = page_address(pages); pasid_table->order = order; From 89736a0ee81d14439d085c8d4653bc1d86fe64d8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 6 May 2019 14:24:18 +0200 Subject: [PATCH 44/45] Revert "iommu/amd: Remove the leftover of bypass support" This reverts commit 7a5dbf3ab2f04905cf8468c66fcdbfb643068bcb. This commit not only removes the leftovers of bypass support, it also mostly removes the checking of the return value of the get_domain() function. This can lead to silent data corruption bugs when a device is not attached to its dma_ops domain and a DMA-API function is called for that device. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 80 ++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index bc98de5fa867..23c1a7eebb06 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2459,10 +2459,20 @@ static dma_addr_t map_page(struct device *dev, struct page *page, unsigned long attrs) { phys_addr_t paddr = page_to_phys(page) + offset; - struct protection_domain *domain = get_domain(dev); - struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); + struct protection_domain *domain; + struct dma_ops_domain *dma_dom; + u64 dma_mask; - return __map_single(dev, dma_dom, paddr, size, dir, *dev->dma_mask); + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) + return (dma_addr_t)paddr; + else if (IS_ERR(domain)) + return DMA_MAPPING_ERROR; + + dma_mask = *dev->dma_mask; + dma_dom = to_dma_ops_domain(domain); + + return __map_single(dev, dma_dom, paddr, size, dir, dma_mask); } /* @@ -2471,8 +2481,14 @@ static dma_addr_t map_page(struct device *dev, struct page *page, static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - struct protection_domain *domain = get_domain(dev); - struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); + struct protection_domain *domain; + struct dma_ops_domain *dma_dom; + + domain = get_domain(dev); + if (IS_ERR(domain)) + return; + + dma_dom = to_dma_ops_domain(domain); __unmap_single(dma_dom, dma_addr, size, dir); } @@ -2512,13 +2528,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, unsigned long attrs) { int mapped_pages = 0, npages = 0, prot = 0, i; - struct protection_domain *domain = get_domain(dev); - struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); + struct protection_domain *domain; + struct dma_ops_domain *dma_dom; struct scatterlist *s; unsigned long address; - u64 dma_mask = *dev->dma_mask; + u64 dma_mask; int ret; + domain = get_domain(dev); + if (IS_ERR(domain)) + return 0; + + dma_dom = to_dma_ops_domain(domain); + dma_mask = *dev->dma_mask; + npages = sg_num_pages(dev, sglist, nelems); address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask); @@ -2592,11 +2615,20 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, unsigned long attrs) { - struct protection_domain *domain = get_domain(dev); - struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); + struct protection_domain *domain; + struct dma_ops_domain *dma_dom; + unsigned long startaddr; + int npages = 2; - __unmap_single(dma_dom, sg_dma_address(sglist) & PAGE_MASK, - sg_num_pages(dev, sglist, nelems) << PAGE_SHIFT, dir); + domain = get_domain(dev); + if (IS_ERR(domain)) + return; + + startaddr = sg_dma_address(sglist) & PAGE_MASK; + dma_dom = to_dma_ops_domain(domain); + npages = sg_num_pages(dev, sglist, nelems); + + __unmap_single(dma_dom, startaddr, npages << PAGE_SHIFT, dir); } /* @@ -2607,11 +2639,16 @@ static void *alloc_coherent(struct device *dev, size_t size, unsigned long attrs) { u64 dma_mask = dev->coherent_dma_mask; - struct protection_domain *domain = get_domain(dev); + struct protection_domain *domain; struct dma_ops_domain *dma_dom; struct page *page; - if (IS_ERR(domain)) + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) { + page = alloc_pages(flag, get_order(size)); + *dma_addr = page_to_phys(page); + return page_address(page); + } else if (IS_ERR(domain)) return NULL; dma_dom = to_dma_ops_domain(domain); @@ -2657,13 +2694,22 @@ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr, unsigned long attrs) { - struct protection_domain *domain = get_domain(dev); - struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); - struct page *page = virt_to_page(virt_addr); + struct protection_domain *domain; + struct dma_ops_domain *dma_dom; + struct page *page; + page = virt_to_page(virt_addr); size = PAGE_ALIGN(size); + domain = get_domain(dev); + if (IS_ERR(domain)) + goto free_mem; + + dma_dom = to_dma_ops_domain(domain); + __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL); + +free_mem: if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) __free_pages(page, get_order(size)); } From 97a18f548548a6ee1b9be14c6fc72090b8839875 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 7 May 2019 09:39:32 +0200 Subject: [PATCH 45/45] Revert "iommu/amd: Flush not present cache in iommu_map_page" This reverts commit 1a1079011da32db87e19fcb39e70d082f89da921. This commit caused a NULL-ptr deference bug and must be reverted for now. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 23c1a7eebb06..fde16c5b0a70 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1307,16 +1307,6 @@ static void domain_flush_complete(struct protection_domain *domain) } } -/* Flush the not present cache if it exists */ -static void domain_flush_np_cache(struct protection_domain *domain, - dma_addr_t iova, size_t size) -{ - if (unlikely(amd_iommu_np_cache)) { - domain_flush_pages(domain, iova, size); - domain_flush_complete(domain); - } -} - /* * This function flushes the DTEs for all devices in domain @@ -2399,7 +2389,10 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - domain_flush_np_cache(&dma_dom->domain, address, size); + if (unlikely(amd_iommu_np_cache)) { + domain_flush_pages(&dma_dom->domain, address, size); + domain_flush_complete(&dma_dom->domain); + } out: return address; @@ -2578,8 +2571,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, s->dma_length = s->length; } - domain_flush_np_cache(domain, s->dma_address, s->dma_length); - return nelems; out_unmap: @@ -3060,8 +3051,6 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL); mutex_unlock(&domain->api_lock); - domain_flush_np_cache(domain, iova, page_size); - return ret; }