From 4b21a503adf597773e4b37db05db0e9b16a81d53 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 4 May 2021 13:22:20 +0300 Subject: [PATCH 01/66] iommu/amd: Fix extended features logging print_iommu_info prints the EFR register and then the decoded list of features on a separate line: pci 0000:00:00.2: AMD-Vi: Extended features (0x206d73ef22254ade): PPR X2APIC NX GT IA GA PC GA_vAPIC The second line is emitted via 'pr_cont', which causes it to have a different ('warn') loglevel compared to the previous line ('info'). Commit 9a295ff0ffc9 attempted to rectify this by removing the newline from the pci_info format string, but this doesn't work, as pci_info calls implicitly append a newline anyway. Printing the decoded features on the same line would make it quite long. Instead, change pci_info() to pr_info() to omit PCI bus location info, which is also shown in the preceding message. This results in: pci 0000:00:00.2: AMD-Vi: Found IOMMU cap 0x40 AMD-Vi: Extended features (0x206d73ef22254ade): PPR X2APIC NX GT IA GA PC GA_vAPIC AMD-Vi: Interrupt remapping enabled Fixes: 9a295ff0ffc9 ("iommu/amd: Print extended features in one line to fix divergent log levels") Link: https://lore.kernel.org/lkml/alpine.LNX.2.20.13.2104112326460.11104@monopod.intra.ispras.ru Signed-off-by: Alexander Monakov Cc: Paul Menzel Cc: Joerg Roedel Cc: Suravee Suthikulpanit Cc: iommu@lists.linux-foundation.org Reviewed-by: Paul Menzel Link: https://lore.kernel.org/r/20210504102220.1793-1-amonakov@ispras.ru Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d006724f4dc2..1ded8a69c246 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1908,8 +1908,8 @@ static void print_iommu_info(void) pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pci_info(pdev, "Extended features (%#llx):", - iommu->features); + pr_info("Extended features (%#llx):", iommu->features); + for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); From b1e650db2cc4acca6e7c9974f6a2ca232261173a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 3 Jun 2021 15:02:03 +0200 Subject: [PATCH 02/66] iommu/amd: Add amd_iommu=force_enable option Add this option to enable the IOMMU on platforms like AMD Stoney, where the kernel usually disables it because it may cause problems in some scenarios. Signed-off-by: Joerg Roedel Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20210603130203.29016-1-joro@8bytes.org --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ drivers/iommu/amd/init.c | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cb89dbdedc46..f6bf4e87df80 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -301,6 +301,9 @@ allowed anymore to lift isolation requirements as needed. This option does not override iommu=pt + force_enable - Force enable the IOMMU on platforms known + to be buggy with IOMMU enabled. Use this + option with care. amd_iommu_dump= [HW,X86-64] Enable AMD IOMMU driver option to dump the ACPI table diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1ded8a69c246..00ee9e3caedc 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -153,7 +153,8 @@ int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; static bool amd_iommu_detected; -static bool __initdata amd_iommu_disabled; +static bool amd_iommu_disabled __initdata; +static bool amd_iommu_force_enable __initdata; static int amd_iommu_target_ivhd_type; u16 amd_iommu_last_bdf; /* largest PCI device id we have @@ -2834,6 +2835,9 @@ static bool detect_ivrs(void) acpi_put_table(ivrs_base); + if (amd_iommu_force_enable) + goto out; + /* Don't use IOMMU if there is Stoney Ridge graphics */ for (i = 0; i < 32; i++) { u32 pci_id; @@ -2845,6 +2849,7 @@ static bool detect_ivrs(void) } } +out: /* Make sure ACS will be enabled during PCI probe */ pci_request_acs(); @@ -3100,6 +3105,8 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; + if (strncmp(str, "force_enable", 12) == 0) + amd_iommu_force_enable = true; if (strncmp(str, "off", 3) == 0) amd_iommu_disabled = true; if (strncmp(str, "force_isolation", 15) == 0) From 9197bc958fc98ce17341636b3368f180a23d9653 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 May 2021 14:49:39 +0100 Subject: [PATCH 03/66] iommu/rockchip: Remove redundant DMA syncs When we allocate new pagetable pages, we don't modify them between the initial dma_map_single() call and the dma_sync_single_for_device() call via rk_iommu_flush(), so the latter is entirely pointless. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/5c29a9ff0a20df0167635b1901f94b5195c1fb28.1621604979.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 7a2932772fdf..e5116d6a6260 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -682,7 +682,6 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, dte = rk_mk_dte(pt_dma); *dte_addr = dte; - rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES); rk_table_flush(rk_domain, rk_domain->dt_dma + dte_index * sizeof(u32), 1); done: @@ -1004,8 +1003,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) goto err_free_dt; } - rk_table_flush(rk_domain, rk_domain->dt_dma, NUM_DT_ENTRIES); - spin_lock_init(&rk_domain->iommus_lock); spin_lock_init(&rk_domain->dt_lock); INIT_LIST_HEAD(&rk_domain->iommus); From be227f8e99a663d097536e9f9bc935fb26bdbc41 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 3 Jun 2021 14:48:21 +0100 Subject: [PATCH 04/66] iommu/amd: Tidy up DMA ops init Now that DMA ops are part of the core API via iommu-dma, fold the vestigial remains of the IOMMU_DMA_OPS init state into the IOMMU API phase, and clean up a few other leftovers. This should also close the race window wherein bus_set_iommu() effectively makes the DMA ops state visible before its nominal initialisation - it seems this was previously fairly benign, but since commit a250c23f15c2 ("iommu: remove DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE") it can now lead to the strict flush queue policy inadvertently being picked for default domains allocated during that window, with a corresponding unexpected perfomance impact. Reported-by: Jussi Maki Tested-by: Jussi Maki Signed-off-by: Robin Murphy Fixes: a250c23f15c2 ("iommu: remove DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE") Link: https://lore.kernel.org/r/665db61e23ff8d54ac5eb391bef520b3a803fcb9.1622727974.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 -- drivers/iommu/amd/init.c | 5 ----- drivers/iommu/amd/iommu.c | 31 +++++++++++++------------------ 3 files changed, 13 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 55dd38d814d9..416815a525d6 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -11,8 +11,6 @@ #include "amd_iommu_types.h" -extern int amd_iommu_init_dma_ops(void); -extern int amd_iommu_init_passthrough(void); extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_apply_erratum_63(u16 devid); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d006724f4dc2..a418bf560a4b 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -231,7 +231,6 @@ enum iommu_init_state { IOMMU_ENABLED, IOMMU_PCI_INIT, IOMMU_INTERRUPTS_EN, - IOMMU_DMA_OPS, IOMMU_INITIALIZED, IOMMU_NOT_FOUND, IOMMU_INIT_ERROR, @@ -2895,10 +2894,6 @@ static int __init state_next(void) init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; break; case IOMMU_INTERRUPTS_EN: - ret = amd_iommu_init_dma_ops(); - init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS; - break; - case IOMMU_DMA_OPS: init_state = IOMMU_INITIALIZED; break; case IOMMU_INITIALIZED: diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3ac42bbdefc6..c46dde88a132 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -1773,13 +1772,22 @@ void amd_iommu_domain_update(struct protection_domain *domain) amd_iommu_domain_flush_complete(domain); } +static void __init amd_iommu_init_dma_ops(void) +{ + swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0; + + if (amd_iommu_unmap_flush) + pr_info("IO/TLB flush on unmap enabled\n"); + else + pr_info("Lazy IO/TLB flushing enabled\n"); + iommu_set_dma_strict(amd_iommu_unmap_flush); +} + int __init amd_iommu_init_api(void) { - int ret, err = 0; + int err = 0; - ret = iova_cache_get(); - if (ret) - return ret; + amd_iommu_init_dma_ops(); err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) @@ -1796,19 +1804,6 @@ int __init amd_iommu_init_api(void) return 0; } -int __init amd_iommu_init_dma_ops(void) -{ - swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0; - - if (amd_iommu_unmap_flush) - pr_info("IO/TLB flush on unmap enabled\n"); - else - pr_info("Lazy IO/TLB flushing enabled\n"); - iommu_set_dma_strict(amd_iommu_unmap_flush); - return 0; - -} - /***************************************************************************** * * The following functions belong to the exported interface of AMD IOMMU From 340ec061f76502befea92d9913886991a60cf15e Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Wed, 19 May 2021 11:37:27 +0800 Subject: [PATCH 05/66] iommu/amd: Remove redundant assignment of err 'err' will be initialized and cleanup the redundant initialization. Cc: Joerg Roedel Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1621395447-34738-1-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c46dde88a132..b1fbf2c83df5 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1785,7 +1785,7 @@ static void __init amd_iommu_init_dma_ops(void) int __init amd_iommu_init_api(void) { - int err = 0; + int err; amd_iommu_init_dma_ops(); From a51627c5df58480543b0feefbde27d622a49361f Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 9 May 2021 10:26:07 +0800 Subject: [PATCH 06/66] iommu/arm-smmu-qcom: hook up qcom_smmu_impl for ACPI boot The hookup with qcom_smmu_impl is required to do ACPI boot on SC8180X based devices like Lenovo Flex 5G laptop and Microsoft Surface Pro X. Define acpi_platform_list for these platforms and match them using acpi_match_platform_list() call, and create qcom_smmu_impl accordingly. (np == NULL) is used to check ACPI boot, because fwnode of SMMU device is a static allocation and thus helpers like has_acpi_companion() don't work here. Signed-off-by: Shawn Guo Reviewed-by: Robin Murphy Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210509022607.17534-1-shawn.guo@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 98b3a1c2a181..f7e187f3fa66 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -3,6 +3,7 @@ * Copyright (c) 2019, The Linux Foundation. All rights reserved. */ +#include #include #include #include @@ -339,10 +340,22 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { } }; +static struct acpi_platform_list qcom_acpi_platlist[] = { + { "LENOVO", "CB-01 ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" }, + { "QCOM ", "QCOMEDK2", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" }, + { } +}; + struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; + if (np == NULL) { + /* Match platform for ACPI boot */ + if (acpi_match_platform_list(qcom_acpi_platlist) >= 0) + return qcom_smmu_create(smmu, &qcom_smmu_impl); + } + if (of_match_node(qcom_smmu_impl_of_match, np)) return qcom_smmu_create(smmu, &qcom_smmu_impl); From d56d5162e31760ab5b6ffe592aea8494d5567220 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Thu, 20 May 2021 16:42:19 +0800 Subject: [PATCH 07/66] iommu/arm-smmu-v3: Change *array into *const array Fix checkpatch warning in arm-smmu-v3.c: static const char * array should probably be static const char * const Signed-off-by: Bixuan Cui Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 54b2f27b81d4..70afe1de2de9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -352,7 +352,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) { - static const char *cerror_str[] = { + static const char * const cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command", [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", From 0d97174aeadfc49ad8d281bbcd69fdfeb5fd2fcb Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 3 Jun 2021 18:46:26 +0200 Subject: [PATCH 08/66] iommu/arm-smmu: Implement ->probe_finalize() Implement a ->probe_finalize() callback that can be used by vendor implementations to perform extra programming necessary after devices have been attached to the SMMU. Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20210603164632.1000458-4-thierry.reding@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 13 +++++++++++++ drivers/iommu/arm/arm-smmu/arm-smmu.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 6f72c4d208ca..d20ce4d57df2 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1450,6 +1450,18 @@ static void arm_smmu_release_device(struct device *dev) iommu_fwspec_free(dev); } +static void arm_smmu_probe_finalize(struct device *dev) +{ + struct arm_smmu_master_cfg *cfg; + struct arm_smmu_device *smmu; + + cfg = dev_iommu_priv_get(dev); + smmu = cfg->smmu; + + if (smmu->impl->probe_finalize) + smmu->impl->probe_finalize(smmu, dev); +} + static struct iommu_group *arm_smmu_device_group(struct device *dev) { struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); @@ -1569,6 +1581,7 @@ static struct iommu_ops arm_smmu_ops = { .iova_to_phys = arm_smmu_iova_to_phys, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, + .probe_finalize = arm_smmu_probe_finalize, .device_group = arm_smmu_device_group, .enable_nesting = arm_smmu_enable_nesting, .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index c31a59d35c64..147c95e7c59c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -439,6 +439,7 @@ struct arm_smmu_impl { struct device *dev, int start); void (*write_s2cr)(struct arm_smmu_device *smmu, int idx); void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg); + void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev); }; #define INVALID_SMENDX -1 From 6321484d1c2416ce08f4ffc47b9f3d2bacc6819e Mon Sep 17 00:00:00 2001 From: Martin Botka Date: Sun, 23 May 2021 23:25:33 +0200 Subject: [PATCH 09/66] iommu/arm-smmu-qcom: Add sm6125 compatible Add compatible for SM6125 SoC Signed-off-by: Martin Botka Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210523212535.740979-1-martin.botka@somainline.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index f7e187f3fa66..70870a9bf87c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -334,6 +334,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8180x-smmu-500" }, { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, + { .compatible = "qcom,sm6125-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, { .compatible = "qcom,sm8350-smmu-500" }, From a242f4297cfe3f4589a7620dcd42cc503607fc6b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 26 Mar 2021 16:13:02 -0700 Subject: [PATCH 10/66] iommu/arm-smmu-qcom: Skip the TTBR1 quirk for db820c. db820c wants to use the qcom smmu path to get HUPCF set (which keeps the GPU from wedging and then sometimes wedging the kernel after a page fault), but it doesn't have separate pagetables support yet in drm/msm so we can't go all the way to the TTBR1 path. Signed-off-by: Eric Anholt Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210326231303.3071950-1-eric@anholt.net Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 70870a9bf87c..6f70f0e57c64 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -131,6 +131,16 @@ static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_doma return __arm_smmu_alloc_bitmap(smmu->context_map, start, count); } +static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu) +{ + const struct device_node *np = smmu->dev->of_node; + + if (of_device_is_compatible(np, "qcom,msm8996-smmu-v2")) + return false; + + return true; +} + static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) { @@ -145,7 +155,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, * be AARCH64 stage 1 but double check because the arm-smmu code assumes * that is the case when the TTBR1 quirk is enabled */ - if ((smmu_domain->stage == ARM_SMMU_DOMAIN_S1) && + if (qcom_adreno_can_do_ttbr1(smmu_domain->smmu) && + (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) && (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)) pgtbl_cfg->quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1; From 19c07b91f85dfcf058ce304374988c9f83b5066c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 26 Mar 2021 16:13:03 -0700 Subject: [PATCH 11/66] arm64: dts: msm8996: Mark the GPU's SMMU as an adreno one. This enables the adreno-specific SMMU path that sets HUPCF so (user-managed) page faults don't wedge the GPU. Signed-off-by: Eric Anholt Acked-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210326231303.3071950-2-eric@anholt.net Signed-off-by: Will Deacon --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index ce430ba9c118..5d06216c5c1c 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -1136,7 +1136,7 @@ cci_i2c1: i2c-bus@1 { }; adreno_smmu: iommu@b40000 { - compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2"; + compatible = "qcom,msm8996-smmu-v2", "qcom,adreno-smmu", "qcom,smmu-v2"; reg = <0x00b40000 0x10000>; #global-interrupts = <1>; From ed1d08b9d0c9baed54a74073eae6c28d1e5422e8 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 26 May 2021 18:19:26 +0200 Subject: [PATCH 12/66] dt-bindings: Document stall property for IOMMU masters On ARM systems, some platform devices behind an IOMMU may support stall, which is the ability to recover from page faults. Let the firmware tell us when a device supports stall. Reviewed-by: Eric Auger Reviewed-by: Rob Herring Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210526161927.24268-2-jean-philippe@linaro.org Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/iommu.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt index 3c36334e4f94..26ba9e530f13 100644 --- a/Documentation/devicetree/bindings/iommu/iommu.txt +++ b/Documentation/devicetree/bindings/iommu/iommu.txt @@ -92,6 +92,24 @@ Optional properties: tagging DMA transactions with an address space identifier. By default, this is 0, which means that the device only has one address space. +- dma-can-stall: When present, the master can wait for a transaction to + complete for an indefinite amount of time. Upon translation fault some + IOMMUs, instead of aborting the translation immediately, may first + notify the driver and keep the transaction in flight. This allows the OS + to inspect the fault and, for example, make physical pages resident + before updating the mappings and completing the transaction. Such IOMMU + accepts a limited number of simultaneous stalled transactions before + having to either put back-pressure on the master, or abort new faulting + transactions. + + Firmware has to opt-in stalling, because most buses and masters don't + support it. In particular it isn't compatible with PCI, where + transactions have to complete before a time limit. More generally it + won't work in systems and masters that haven't been designed for + stalling. For example the OS, in order to handle a stalled transaction, + may attempt to retrieve pages from secondary storage in a stalled + domain, leading to a deadlock. + Notes: ====== From 6522b1e0c78fc6947b58178446ca851690374f0d Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 26 May 2021 18:19:27 +0200 Subject: [PATCH 13/66] ACPI/IORT: Enable stall support for platform devices Copy the "Stall supported" bit, that tells whether a named component supports stall, into the dma-can-stall device property. Acked-by: Hanjun Guo Acked-by: Jonathan Cameron Acked-by: Lorenzo Pieralisi Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210526161927.24268-3-jean-philippe@linaro.org Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 3912a1f6058e..0828f70cb782 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -968,13 +968,15 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) static void iort_named_component_init(struct device *dev, struct acpi_iort_node *node) { - struct property_entry props[2] = {}; + struct property_entry props[3] = {}; struct acpi_iort_named_component *nc; nc = (struct acpi_iort_named_component *)node->node_data; props[0] = PROPERTY_ENTRY_U32("pasid-num-bits", FIELD_GET(ACPI_IORT_NC_PASID_BITS, nc->node_flags)); + if (nc->node_flags & ACPI_IORT_NC_STALL_SUPPORTED) + props[1] = PROPERTY_ENTRY_BOOL("dma-can-stall"); if (device_add_properties(dev, props)) dev_warn(dev, "Could not add device properties\n"); From 395ad89d11fd93f79a6b942e91fc409807a63c4b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 26 May 2021 18:19:28 +0200 Subject: [PATCH 14/66] iommu/arm-smmu-v3: Add stall support for platform devices The SMMU provides a Stall model for handling page faults in platform devices. It is similar to PCIe PRI, but doesn't require devices to have their own translation cache. Instead, faulting transactions are parked and the OS is given a chance to fix the page tables and retry the transaction. Enable stall for devices that support it (opt-in by firmware). When an event corresponds to a translation error, call the IOMMU fault handler. If the fault is recoverable, it will call us back to terminate or continue the stall. To use stall device drivers need to enable IOMMU_DEV_FEAT_IOPF, which initializes the fault queue for the device. Tested-by: Zhangfei Gao Reviewed-by: Eric Auger Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210526161927.24268-4-jean-philippe@linaro.org Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 59 +++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 196 +++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 43 ++++ 3 files changed, 283 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index bb251cab61f3..ee66d1f4cb81 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -435,9 +435,13 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) return true; } -static bool arm_smmu_iopf_supported(struct arm_smmu_master *master) +bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master) { - return false; + /* We're not keeping track of SIDs in fault events */ + if (master->num_streams != 1) + return false; + + return master->stall_enabled; } bool arm_smmu_master_sva_supported(struct arm_smmu_master *master) @@ -445,8 +449,8 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master) if (!(master->smmu->features & ARM_SMMU_FEAT_SVA)) return false; - /* SSID and IOPF support are mandatory for the moment */ - return master->ssid_bits && arm_smmu_iopf_supported(master); + /* SSID support is mandatory for the moment */ + return master->ssid_bits; } bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master) @@ -459,13 +463,55 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master) return enabled; } +static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master) +{ + int ret; + struct device *dev = master->dev; + + /* + * Drivers for devices supporting PRI or stall should enable IOPF first. + * Others have device-specific fault handlers and don't need IOPF. + */ + if (!arm_smmu_master_iopf_supported(master)) + return 0; + + if (!master->iopf_enabled) + return -EINVAL; + + ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev); + if (ret) + return ret; + + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) { + iopf_queue_remove_device(master->smmu->evtq.iopf, dev); + return ret; + } + return 0; +} + +static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master) +{ + struct device *dev = master->dev; + + if (!master->iopf_enabled) + return; + + iommu_unregister_device_fault_handler(dev); + iopf_queue_remove_device(master->smmu->evtq.iopf, dev); +} + int arm_smmu_master_enable_sva(struct arm_smmu_master *master) { + int ret; + mutex_lock(&sva_lock); - master->sva_enabled = true; + ret = arm_smmu_master_sva_enable_iopf(master); + if (!ret) + master->sva_enabled = true; mutex_unlock(&sva_lock); - return 0; + return ret; } int arm_smmu_master_disable_sva(struct arm_smmu_master *master) @@ -476,6 +522,7 @@ int arm_smmu_master_disable_sva(struct arm_smmu_master *master) mutex_unlock(&sva_lock); return -EBUSY; } + arm_smmu_master_sva_disable_iopf(master); master->sva_enabled = false; mutex_unlock(&sva_lock); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 70afe1de2de9..67866bb2b3fd 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -32,6 +32,7 @@ #include #include "arm-smmu-v3.h" +#include "../../iommu-sva-lib.h" static bool disable_bypass = true; module_param(disable_bypass, bool, 0444); @@ -313,6 +314,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) } cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp); break; + case CMDQ_OP_RESUME: + cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid); + cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp); + cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag); + break; case CMDQ_OP_CMD_SYNC: if (ent->sync.msiaddr) { cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ); @@ -876,6 +882,44 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); } +static int arm_smmu_page_response(struct device *dev, + struct iommu_fault_event *unused, + struct iommu_page_response *resp) +{ + struct arm_smmu_cmdq_ent cmd = {0}; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + int sid = master->streams[0].id; + + if (master->stall_enabled) { + cmd.opcode = CMDQ_OP_RESUME; + cmd.resume.sid = sid; + cmd.resume.stag = resp->grpid; + switch (resp->code) { + case IOMMU_PAGE_RESP_INVALID: + case IOMMU_PAGE_RESP_FAILURE: + cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; + break; + case IOMMU_PAGE_RESP_SUCCESS: + cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; + break; + default: + return -EINVAL; + } + } else { + return -ENODEV; + } + + arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); + /* + * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP. + * RESUME consumption guarantees that the stalled transaction will be + * terminated... at some point in the future. PRI_RESP is fire and + * forget. + */ + + return 0; +} + /* Context descriptor manipulation functions */ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) { @@ -986,7 +1030,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, u64 val; bool cd_live; __le64 *cdptr; - struct arm_smmu_device *smmu = smmu_domain->smmu; if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) return -E2BIG; @@ -1031,8 +1074,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | CTXDESC_CD_0_V; - /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ - if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) + if (smmu_domain->stall_enabled) val |= CTXDESC_CD_0_S; } @@ -1276,7 +1318,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, FIELD_PREP(STRTAB_STE_1_STRW, strw)); if (smmu->features & ARM_SMMU_FEAT_STALLS && - !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) + !master->stall_enabled) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | @@ -1353,7 +1395,6 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return 0; } -__maybe_unused static struct arm_smmu_master * arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) { @@ -1377,9 +1418,103 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) } /* IRQ and event handlers */ +static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) +{ + int ret; + u32 reason; + u32 perm = 0; + struct arm_smmu_master *master; + bool ssid_valid = evt[0] & EVTQ_0_SSV; + u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]); + struct iommu_fault_event fault_evt = { }; + struct iommu_fault *flt = &fault_evt.fault; + + switch (FIELD_GET(EVTQ_0_ID, evt[0])) { + case EVT_ID_TRANSLATION_FAULT: + reason = IOMMU_FAULT_REASON_PTE_FETCH; + break; + case EVT_ID_ADDR_SIZE_FAULT: + reason = IOMMU_FAULT_REASON_OOR_ADDRESS; + break; + case EVT_ID_ACCESS_FAULT: + reason = IOMMU_FAULT_REASON_ACCESS; + break; + case EVT_ID_PERMISSION_FAULT: + reason = IOMMU_FAULT_REASON_PERMISSION; + break; + default: + return -EOPNOTSUPP; + } + + /* Stage-2 is always pinned at the moment */ + if (evt[1] & EVTQ_1_S2) + return -EFAULT; + + if (evt[1] & EVTQ_1_RnW) + perm |= IOMMU_FAULT_PERM_READ; + else + perm |= IOMMU_FAULT_PERM_WRITE; + + if (evt[1] & EVTQ_1_InD) + perm |= IOMMU_FAULT_PERM_EXEC; + + if (evt[1] & EVTQ_1_PnU) + perm |= IOMMU_FAULT_PERM_PRIV; + + if (evt[1] & EVTQ_1_STALL) { + flt->type = IOMMU_FAULT_PAGE_REQ; + flt->prm = (struct iommu_fault_page_request) { + .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, + .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), + .perm = perm, + .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), + }; + + if (ssid_valid) { + flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); + } + } else { + flt->type = IOMMU_FAULT_DMA_UNRECOV; + flt->event = (struct iommu_fault_unrecoverable) { + .reason = reason, + .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID, + .perm = perm, + .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), + }; + + if (ssid_valid) { + flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID; + flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); + } + } + + mutex_lock(&smmu->streams_mutex); + master = arm_smmu_find_master(smmu, sid); + if (!master) { + ret = -EINVAL; + goto out_unlock; + } + + ret = iommu_report_device_fault(master->dev, &fault_evt); + if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) { + /* Nobody cared, abort the access */ + struct iommu_page_response resp = { + .pasid = flt->prm.pasid, + .grpid = flt->prm.grpid, + .code = IOMMU_PAGE_RESP_FAILURE, + }; + arm_smmu_page_response(master->dev, &fault_evt, &resp); + } + +out_unlock: + mutex_unlock(&smmu->streams_mutex); + return ret; +} + static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) { - int i; + int i, ret; struct arm_smmu_device *smmu = dev; struct arm_smmu_queue *q = &smmu->evtq.q; struct arm_smmu_ll_queue *llq = &q->llq; @@ -1389,6 +1524,10 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) while (!queue_remove_raw(q, evt)) { u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); + ret = arm_smmu_handle_evt(smmu, evt); + if (!ret) + continue; + dev_info(smmu->dev, "event 0x%02x received:\n", id); for (i = 0; i < ARRAY_SIZE(evt); ++i) dev_info(smmu->dev, "\t0x%016llx\n", @@ -1923,6 +2062,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, cfg->s1cdmax = master->ssid_bits; + smmu_domain->stall_enabled = master->stall_enabled; + ret = arm_smmu_alloc_cd_tables(smmu_domain); if (ret) goto out_free_asid; @@ -2270,6 +2411,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) smmu_domain->s1_cfg.s1cdmax, master->ssid_bits); ret = -EINVAL; goto out_unlock; + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && + smmu_domain->stall_enabled != master->stall_enabled) { + dev_err(dev, "cannot attach to stall-%s domain\n", + smmu_domain->stall_enabled ? "enabled" : "disabled"); + ret = -EINVAL; + goto out_unlock; } master->domain = smmu_domain; @@ -2508,6 +2655,11 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->ssid_bits = min_t(u8, master->ssid_bits, CTXDESC_LINEAR_CDMAX); + if ((smmu->features & ARM_SMMU_FEAT_STALLS && + device_property_read_bool(dev, "dma-can-stall")) || + smmu->features & ARM_SMMU_FEAT_STALL_FORCE) + master->stall_enabled = true; + return &smmu->iommu; err_free_master: @@ -2525,7 +2677,8 @@ static void arm_smmu_release_device(struct device *dev) return; master = dev_iommu_priv_get(dev); - WARN_ON(arm_smmu_master_sva_enabled(master)); + if (WARN_ON(arm_smmu_master_sva_enabled(master))) + iopf_queue_remove_device(master->smmu->evtq.iopf, dev); arm_smmu_detach_dev(master); arm_smmu_disable_pasid(master); arm_smmu_remove_master(master); @@ -2595,6 +2748,8 @@ static bool arm_smmu_dev_has_feature(struct device *dev, return false; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + return arm_smmu_master_iopf_supported(master); case IOMMU_DEV_FEAT_SVA: return arm_smmu_master_sva_supported(master); default: @@ -2611,6 +2766,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev, return false; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + return master->iopf_enabled; case IOMMU_DEV_FEAT_SVA: return arm_smmu_master_sva_enabled(master); default: @@ -2621,6 +2778,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev, static int arm_smmu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + if (!arm_smmu_dev_has_feature(dev, feat)) return -ENODEV; @@ -2628,8 +2787,11 @@ static int arm_smmu_dev_enable_feature(struct device *dev, return -EBUSY; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + master->iopf_enabled = true; + return 0; case IOMMU_DEV_FEAT_SVA: - return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev)); + return arm_smmu_master_enable_sva(master); default: return -EINVAL; } @@ -2638,12 +2800,19 @@ static int arm_smmu_dev_enable_feature(struct device *dev, static int arm_smmu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) { + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + if (!arm_smmu_dev_feature_enabled(dev, feat)) return -EINVAL; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + if (master->sva_enabled) + return -EBUSY; + master->iopf_enabled = false; + return 0; case IOMMU_DEV_FEAT_SVA: - return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev)); + return arm_smmu_master_disable_sva(master); default: return -EINVAL; } @@ -2673,6 +2842,7 @@ static struct iommu_ops arm_smmu_ops = { .sva_bind = arm_smmu_sva_bind, .sva_unbind = arm_smmu_sva_unbind, .sva_get_pasid = arm_smmu_sva_get_pasid, + .page_response = arm_smmu_page_response, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, }; @@ -2771,6 +2941,13 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) if (ret) return ret; + if ((smmu->features & ARM_SMMU_FEAT_SVA) && + (smmu->features & ARM_SMMU_FEAT_STALLS)) { + smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev)); + if (!smmu->evtq.iopf) + return -ENOMEM; + } + /* priq */ if (!(smmu->features & ARM_SMMU_FEAT_PRI)) return 0; @@ -3683,6 +3860,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) iommu_device_unregister(&smmu->iommu); iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_disable(smmu); + iopf_queue_free(smmu->evtq.iopf); return 0; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 46e8c49214a8..cd48590ada30 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -354,6 +354,13 @@ #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0) #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12) +#define CMDQ_RESUME_0_RESP_TERM 0UL +#define CMDQ_RESUME_0_RESP_RETRY 1UL +#define CMDQ_RESUME_0_RESP_ABORT 2UL +#define CMDQ_RESUME_0_RESP GENMASK_ULL(13, 12) +#define CMDQ_RESUME_0_SID GENMASK_ULL(63, 32) +#define CMDQ_RESUME_1_STAG GENMASK_ULL(15, 0) + #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12) #define CMDQ_SYNC_0_CS_NONE 0 #define CMDQ_SYNC_0_CS_IRQ 1 @@ -370,6 +377,25 @@ #define EVTQ_0_ID GENMASK_ULL(7, 0) +#define EVT_ID_TRANSLATION_FAULT 0x10 +#define EVT_ID_ADDR_SIZE_FAULT 0x11 +#define EVT_ID_ACCESS_FAULT 0x12 +#define EVT_ID_PERMISSION_FAULT 0x13 + +#define EVTQ_0_SSV (1UL << 11) +#define EVTQ_0_SSID GENMASK_ULL(31, 12) +#define EVTQ_0_SID GENMASK_ULL(63, 32) +#define EVTQ_1_STAG GENMASK_ULL(15, 0) +#define EVTQ_1_STALL (1UL << 31) +#define EVTQ_1_PnU (1UL << 33) +#define EVTQ_1_InD (1UL << 34) +#define EVTQ_1_RnW (1UL << 35) +#define EVTQ_1_S2 (1UL << 39) +#define EVTQ_1_CLASS GENMASK_ULL(41, 40) +#define EVTQ_1_TT_READ (1UL << 44) +#define EVTQ_2_ADDR GENMASK_ULL(63, 0) +#define EVTQ_3_IPA GENMASK_ULL(51, 12) + /* PRI queue */ #define PRIQ_ENT_SZ_SHIFT 4 #define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3) @@ -462,6 +488,13 @@ struct arm_smmu_cmdq_ent { enum pri_resp resp; } pri; + #define CMDQ_OP_RESUME 0x44 + struct { + u32 sid; + u16 stag; + u8 resp; + } resume; + #define CMDQ_OP_CMD_SYNC 0x46 struct { u64 msiaddr; @@ -520,6 +553,7 @@ struct arm_smmu_cmdq_batch { struct arm_smmu_evtq { struct arm_smmu_queue q; + struct iopf_queue *iopf; u32 max_stalls; }; @@ -657,7 +691,9 @@ struct arm_smmu_master { struct arm_smmu_stream *streams; unsigned int num_streams; bool ats_enabled; + bool stall_enabled; bool sva_enabled; + bool iopf_enabled; struct list_head bonds; unsigned int ssid_bits; }; @@ -675,6 +711,7 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; + bool stall_enabled; atomic_t nr_ats_masters; enum arm_smmu_domain_stage stage; @@ -716,6 +753,7 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master); int arm_smmu_master_enable_sva(struct arm_smmu_master *master); int arm_smmu_master_disable_sva(struct arm_smmu_master *master); +bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void arm_smmu_sva_unbind(struct iommu_sva *handle); @@ -747,6 +785,11 @@ static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master) return -ENODEV; } +static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master) +{ + return false; +} + static inline struct iommu_sva * arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata) { From 9cff922bba429b310507eac3b6cb5eb1b57e9ad1 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 31 May 2021 11:56:50 +0200 Subject: [PATCH 15/66] iommu/arm-smmu-v3: Ratelimit event dump When a device or driver misbehaves, it is possible to receive DMA fault events much faster than we can print them out, causing a lock up of the system and inability to cancel the source of the problem. Ratelimit printing of events to help recovery. Tested-by: Aaro Koskinen Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210531095648.118282-1-jean-philippe@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 67866bb2b3fd..9ca9c5373591 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1518,6 +1518,8 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) struct arm_smmu_device *smmu = dev; struct arm_smmu_queue *q = &smmu->evtq.q; struct arm_smmu_ll_queue *llq = &q->llq; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); u64 evt[EVTQ_ENT_DWORDS]; do { @@ -1525,7 +1527,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); ret = arm_smmu_handle_evt(smmu, evt); - if (!ret) + if (!ret || !__ratelimit(&rs)) continue; dev_info(smmu->dev, "event 0x%02x received:\n", id); From f115f3c0d5d846f69b2bc2d86653117f305b6066 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 31 May 2021 20:35:53 +0800 Subject: [PATCH 16/66] iommu/arm-smmu-v3: Decrease the queue size of evtq and priq Commit d25f6ead162e ("iommu/arm-smmu-v3: Increase maximum size of queues") expands the cmdq queue size to improve the success rate of concurrent command queue space allocation by multiple cores. However, this extension does not apply to evtq and priq, because for both of them, the SMMU driver is the consumer. Instead, memory resources are wasted. Therefore, the queue size of evtq and priq is restored to the original setting, one page. Fixes: d25f6ead162e ("iommu/arm-smmu-v3: Increase maximum size of queues") Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20210531123553.9602-1-thunder.leizhen@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index cd48590ada30..4cb136f07914 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -184,6 +184,7 @@ #else #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1) #endif +#define Q_MIN_SZ_SHIFT (PAGE_SHIFT) /* * Stream table. @@ -373,7 +374,7 @@ /* Event queue */ #define EVTQ_ENT_SZ_SHIFT 5 #define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3) -#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT) +#define EVTQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT) #define EVTQ_0_ID GENMASK_ULL(7, 0) @@ -399,7 +400,7 @@ /* PRI queue */ #define PRIQ_ENT_SZ_SHIFT 4 #define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3) -#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT) +#define PRIQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT) #define PRIQ_0_SID GENMASK_ULL(31, 0) #define PRIQ_0_SSID GENMASK_ULL(51, 32) From 571f316074a203e979ea90211d9acf423dfe5f46 Mon Sep 17 00:00:00 2001 From: Srinath Mannam Date: Mon, 14 Sep 2020 12:53:19 +0530 Subject: [PATCH 17/66] iommu/dma: Fix IOVA reserve dma ranges Fix IOVA reserve failure in the case when address of first memory region listed in dma-ranges is equal to 0x0. Fixes: aadad097cd46f ("iommu/dma: Reserve IOVA for PCIe inaccessible DMA address") Signed-off-by: Srinath Mannam Reviewed-by: Robin Murphy Tested-by: Sven Peter Link: https://lore.kernel.org/r/20200914072319.6091-1-srinath.mannam@broadcom.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7bcdd1205535..95e7349ac3f1 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -243,9 +243,11 @@ static int iova_reserve_pci_windows(struct pci_dev *dev, lo = iova_pfn(iovad, start); hi = iova_pfn(iovad, end); reserve_iova(iovad, lo, hi); - } else { + } else if (end < start) { /* dma_ranges list should be sorted */ - dev_err(&dev->dev, "Failed to reserve IOVA\n"); + dev_err(&dev->dev, + "Failed to reserve IOVA [%#010llx-%#010llx]\n", + start, end); return -EINVAL; } From 7978724f399ae7eba5b6d36ae5a7224d5bf3859a Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Mon, 10 May 2021 19:53:02 +0800 Subject: [PATCH 18/66] iommu/iova: Put free_iova_mem() outside of spinlock iova_rbtree_lock It is not necessary to put free_iova_mem() inside of spinlock/unlock iova_rbtree_lock which only leads to more completion for the spinlock. It has a small promote on the performance after the change. And also rename private_free_iova() as remove_iova() because the function will not free iova after that change. Signed-off-by: Xiang Chen Reviewed-by: John Garry Acked-by: Robin Murphy Link: https://lore.kernel.org/r/1620647582-194621-1-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b7ecd5b08039..b6cf5f16123b 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -412,12 +412,11 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) return NULL; } -static void private_free_iova(struct iova_domain *iovad, struct iova *iova) +static void remove_iova(struct iova_domain *iovad, struct iova *iova) { assert_spin_locked(&iovad->iova_rbtree_lock); __cached_rbnode_delete_update(iovad, iova); rb_erase(&iova->node, &iovad->rbroot); - free_iova_mem(iova); } /** @@ -452,8 +451,9 @@ __free_iova(struct iova_domain *iovad, struct iova *iova) unsigned long flags; spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - private_free_iova(iovad, iova); + remove_iova(iovad, iova); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + free_iova_mem(iova); } EXPORT_SYMBOL_GPL(__free_iova); @@ -472,10 +472,13 @@ free_iova(struct iova_domain *iovad, unsigned long pfn) spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); iova = private_find_iova(iovad, pfn); - if (iova) - private_free_iova(iovad, iova); + if (!iova) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return; + } + remove_iova(iovad, iova); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - + free_iova_mem(iova); } EXPORT_SYMBOL_GPL(free_iova); @@ -825,7 +828,8 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) if (WARN_ON(!iova)) continue; - private_free_iova(iovad, iova); + remove_iova(iovad, iova); + free_iova_mem(iova); } spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); From bb6bfd79d9bc69f0808a4156ec3ca9fb78694039 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 27 May 2021 14:37:09 -0500 Subject: [PATCH 19/66] iommu: Remove unused of_get_dma_window() of_get_dma_window() was added in 2012 and removed in 2014 in commit 891846516317 ("memory: Add NVIDIA Tegra memory controller support"). Remove it and simplify the header to use forward declarations for structs rather than includes. Cc: Joerg Roedel Cc: Will Deacon Cc: Frank Rowand Cc: iommu@lists.linux-foundation.org Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210527193710.1281746-1-robh@kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 68 ---------------------------------------- include/linux/of_iommu.h | 17 ++-------- 2 files changed, 3 insertions(+), 82 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index a9d2df001149..5696314ae69e 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -19,74 +19,6 @@ #define NO_IOMMU 1 -/** - * of_get_dma_window - Parse *dma-window property and returns 0 if found. - * - * @dn: device node - * @prefix: prefix for property name if any - * @index: index to start to parse - * @busno: Returns busno if supported. Otherwise pass NULL - * @addr: Returns address that DMA starts - * @size: Returns the range that DMA can handle - * - * This supports different formats flexibly. "prefix" can be - * configured if any. "busno" and "index" are optionally - * specified. Set 0(or NULL) if not used. - */ -int of_get_dma_window(struct device_node *dn, const char *prefix, int index, - unsigned long *busno, dma_addr_t *addr, size_t *size) -{ - const __be32 *dma_window, *end; - int bytes, cur_index = 0; - char propname[NAME_MAX], addrname[NAME_MAX], sizename[NAME_MAX]; - - if (!dn || !addr || !size) - return -EINVAL; - - if (!prefix) - prefix = ""; - - snprintf(propname, sizeof(propname), "%sdma-window", prefix); - snprintf(addrname, sizeof(addrname), "%s#dma-address-cells", prefix); - snprintf(sizename, sizeof(sizename), "%s#dma-size-cells", prefix); - - dma_window = of_get_property(dn, propname, &bytes); - if (!dma_window) - return -ENODEV; - end = dma_window + bytes / sizeof(*dma_window); - - while (dma_window < end) { - u32 cells; - const void *prop; - - /* busno is one cell if supported */ - if (busno) - *busno = be32_to_cpup(dma_window++); - - prop = of_get_property(dn, addrname, NULL); - if (!prop) - prop = of_get_property(dn, "#address-cells", NULL); - - cells = prop ? be32_to_cpup(prop) : of_n_addr_cells(dn); - if (!cells) - return -EINVAL; - *addr = of_read_number(dma_window, cells); - dma_window += cells; - - prop = of_get_property(dn, sizename, NULL); - cells = prop ? be32_to_cpup(prop) : of_n_size_cells(dn); - if (!cells) - return -EINVAL; - *size = of_read_number(dma_window, cells); - dma_window += cells; - - if (cur_index++ == index) - break; - } - return 0; -} -EXPORT_SYMBOL_GPL(of_get_dma_window); - static int of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) { diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 16f4b3e87f20..55c1eb300a86 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -2,29 +2,18 @@ #ifndef __OF_IOMMU_H #define __OF_IOMMU_H -#include -#include -#include +struct device; +struct device_node; +struct iommu_ops; #ifdef CONFIG_OF_IOMMU -extern int of_get_dma_window(struct device_node *dn, const char *prefix, - int index, unsigned long *busno, dma_addr_t *addr, - size_t *size); - extern const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id); #else -static inline int of_get_dma_window(struct device_node *dn, const char *prefix, - int index, unsigned long *busno, dma_addr_t *addr, - size_t *size) -{ - return -EINVAL; -} - static inline const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id) From a4099d47535fa588e663a29cb1a725a24edae463 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 27 May 2021 14:37:10 -0500 Subject: [PATCH 20/66] iommu: Drop unnecessary of_iommu.h includes The only place of_iommu.h is needed is in drivers/of/device.c. Remove it from everywhere else. Cc: Will Deacon Cc: Robin Murphy Cc: Joerg Roedel Cc: Rob Clark Cc: Marek Szyprowski Cc: Krzysztof Kozlowski Cc: Bjorn Andersson Cc: Yong Wu Cc: Matthias Brugger Cc: Heiko Stuebner Cc: Jean-Philippe Brucker Cc: Frank Rowand Cc: linux-arm-kernel@lists.infradead.org Cc: iommu@lists.linux-foundation.org Signed-off-by: Rob Herring Reviewed-by: Robin Murphy Acked-by: Heiko Stuebner Link: https://lore.kernel.org/r/20210527193710.1281746-2-robh@kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 - drivers/iommu/arm/arm-smmu/arm-smmu.c | 1 - drivers/iommu/arm/arm-smmu/qcom_iommu.c | 1 - drivers/iommu/exynos-iommu.c | 1 - drivers/iommu/ipmmu-vmsa.c | 1 - drivers/iommu/msm_iommu.c | 1 - drivers/iommu/mtk_iommu.c | 1 - drivers/iommu/mtk_iommu_v1.c | 1 - drivers/iommu/omap-iommu.c | 1 - drivers/iommu/rockchip-iommu.c | 1 - drivers/iommu/virtio-iommu.c | 1 - drivers/of/platform.c | 1 - 12 files changed, 12 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 54b2f27b81d4..2ddc3cd5a7d1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 6f72c4d208ca..dba15f312cbd 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 4294abe389b2..021cf8f65ffc 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 7623d8c371f5..d0fbf1d10e18 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index aaa6a4d59057..51ea6f00db2f 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 7880f307cb2d..3a38352b603f 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e06b8a0e2b56..6f7c69688ce2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 5915d7b38211..778e66f5f1aa 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 26e517eb0dd3..91749654fd49 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 7a2932772fdf..bb50e015b1d5 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index c6e5ee4d9cef..1bcdd4cf29cd 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 25d448f5af91..74afbb7a4f5e 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include From 7154cbd31c2069726cf730b0ed94e2e79a221602 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Jun 2021 14:49:05 +0200 Subject: [PATCH 21/66] iommu/dma: Fix compile warning in 32-bit builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling the recent dma-iommu changes under 32-bit x86 triggers this compile warning: drivers/iommu/dma-iommu.c:249:5: warning: format ‘%llx’ expects argument of type ‘long long unsigned int’, but argument 3 has type ‘phys_addr_t’ {aka ‘unsigned int’} [-Wformat=] The reason is that %llx is used to print a variable of type phys_addr_t. Fix it by using the correct %pa format specifier for phys_addr_t. Cc: Srinath Mannam Cc: Robin Murphy Cc: Oza Pawandeep Fixes: 571f316074a20 ("iommu/dma: Fix IOVA reserve dma ranges") Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20210607124905.27525-1-joro@8bytes.org --- drivers/iommu/dma-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 95e7349ac3f1..5d96fcc45fec 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -246,8 +246,8 @@ static int iova_reserve_pci_windows(struct pci_dev *dev, } else if (end < start) { /* dma_ranges list should be sorted */ dev_err(&dev->dev, - "Failed to reserve IOVA [%#010llx-%#010llx]\n", - start, end); + "Failed to reserve IOVA [%pa-%pa]\n", + &start, &end); return -EINVAL; } From b65412c25fa600d8a4085e820bdfadb9d9bab6b9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 8 Jun 2021 14:28:43 +0200 Subject: [PATCH 22/66] iommu/amd: Fix section mismatch warning for detect_ivrs() A recent commit introduced this section mismatch warning: WARNING: modpost: vmlinux.o(.text.unlikely+0x22a1f): Section mismatch in reference from the function detect_ivrs() to the variable .init.data:amd_iommu_force_enable The reason is that detect_ivrs() is not marked __init while it should be, because it is only called from another __init function. Mark detect_ivrs() __init to get rid of the warning. Fixes: b1e650db2cc4 ("iommu/amd: Add amd_iommu=force_enable option") Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20210608122843.8413-1-joro@8bytes.org --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 00ee9e3caedc..72f2384fd08c 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2818,7 +2818,7 @@ static int amd_iommu_enable_interrupts(void) return ret; } -static bool detect_ivrs(void) +static bool __init detect_ivrs(void) { struct acpi_table_header *ivrs_base; acpi_status status; From 0b779f562b1473db3d96a751f38b185e92fd6426 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Tue, 8 Jun 2021 18:00:06 +0530 Subject: [PATCH 23/66] iommu/arm-smmu-qcom: Add SC7280 SMMU compatible Add compatible for SC7280 SMMU to use the Qualcomm Technologies, Inc. specific implementation. Signed-off-by: Sai Prakash Ranjan Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/53a50cd91c97b5b598a73941985b79b51acefa14.1623155117.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 6f70f0e57c64..e93b5dbda7de 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -178,6 +178,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,mdss" }, { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7180-mss-pil" }, + { .compatible = "qcom,sc7280-mdss" }, { .compatible = "qcom,sc8180x-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, @@ -342,6 +343,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8998-smmu-v2" }, { .compatible = "qcom,sc7180-smmu-500" }, + { .compatible = "qcom,sc7280-smmu-500" }, { .compatible = "qcom,sc8180x-smmu-500" }, { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, From ab9a77a141cc14ed607b4bf7a83ab6b0057ae084 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Tue, 8 Jun 2021 18:00:07 +0530 Subject: [PATCH 24/66] iommu/arm-smmu-qcom: Move the adreno smmu specific impl Adreno(GPU) SMMU and APSS(Application Processor SubSystem) SMMU both implement "arm,mmu-500" in some QTI SoCs and to run through adreno smmu specific implementation such as enabling split pagetables support, we need to match the "qcom,adreno-smmu" compatible first before apss smmu or else we will be running apps smmu implementation for adreno smmu and the additional features for adreno smmu is never set. For ex: we have "qcom,sc7280-smmu-500" compatible for both apps and adreno smmu implementing "arm,mmu-500", so the adreno smmu implementation is never reached because the current sequence checks for apps smmu compatible(qcom,sc7280-smmu-500) first and runs that specific impl and we never reach adreno smmu specific implementation. Suggested-by: Akhil P Oommen Signed-off-by: Sai Prakash Ranjan Reviewed-by: Bjorn Andersson Acked-by: Jordan Crouse Link: https://lore.kernel.org/r/c42181d313fdd440011541a28cde8cd10fffb9d3.1623155117.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index e93b5dbda7de..83c32566bf64 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -370,11 +370,17 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) return qcom_smmu_create(smmu, &qcom_smmu_impl); } - if (of_match_node(qcom_smmu_impl_of_match, np)) - return qcom_smmu_create(smmu, &qcom_smmu_impl); - + /* + * Do not change this order of implementation, i.e., first adreno + * smmu impl and then apss smmu since we can have both implementing + * arm,mmu-500 in which case we will miss setting adreno smmu specific + * features if the order is changed. + */ if (of_device_is_compatible(np, "qcom,adreno-smmu")) return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl); + if (of_match_node(qcom_smmu_impl_of_match, np)) + return qcom_smmu_create(smmu, &qcom_smmu_impl); + return smmu; } From 249c9dc6aa0db74a0f7908efd04acf774e19b155 Mon Sep 17 00:00:00 2001 From: Amey Narkhede Date: Tue, 8 Jun 2021 22:15:59 +0530 Subject: [PATCH 25/66] iommu/arm: Cleanup resources in case of probe error path If device registration fails, remove sysfs attribute and if setting bus callbacks fails, unregister the device and cleanup the sysfs attribute. Signed-off-by: Amey Narkhede Link: https://lore.kernel.org/r/20210608164559.204023-1-ameynarkhede03@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 14 ++++++++++++-- drivers/iommu/arm/arm-smmu/arm-smmu.c | 15 ++++++++++++--- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 13 +++++++++++-- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9ca9c5373591..141cb7d0949e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3848,10 +3848,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev) ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - return ret; + goto err_sysfs_remove; } - return arm_smmu_set_bus_ops(&arm_smmu_ops); + ret = arm_smmu_set_bus_ops(&arm_smmu_ops); + if (ret) + goto err_unregister_device; + + return 0; + +err_unregister_device: + iommu_device_unregister(&smmu->iommu); +err_sysfs_remove: + iommu_device_sysfs_remove(&smmu->iommu); + return ret; } static int arm_smmu_device_remove(struct platform_device *pdev) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 6f72c4d208ca..88a3023676ce 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2164,7 +2164,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (err) { dev_err(dev, "Failed to register iommu\n"); - return err; + goto err_sysfs_remove; } platform_set_drvdata(pdev, smmu); @@ -2187,10 +2187,19 @@ static int arm_smmu_device_probe(struct platform_device *pdev) * any device which might need it, so we want the bus ops in place * ready to handle default domain setup as soon as any SMMU exists. */ - if (!using_legacy_binding) - return arm_smmu_bus_init(&arm_smmu_ops); + if (!using_legacy_binding) { + err = arm_smmu_bus_init(&arm_smmu_ops); + if (err) + goto err_unregister_device; + } return 0; + +err_unregister_device: + iommu_device_unregister(&smmu->iommu); +err_sysfs_remove: + iommu_device_sysfs_remove(&smmu->iommu); + return err; } static int arm_smmu_device_remove(struct platform_device *pdev) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 4294abe389b2..b785d9fb7602 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -850,10 +850,12 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) ret = iommu_device_register(&qcom_iommu->iommu, &qcom_iommu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - return ret; + goto err_sysfs_remove; } - bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); + ret = bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); + if (ret) + goto err_unregister_device; if (qcom_iommu->local_base) { pm_runtime_get_sync(dev); @@ -862,6 +864,13 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) } return 0; + +err_unregister_device: + iommu_device_unregister(&qcom_iommu->iommu); + +err_sysfs_remove: + iommu_device_sysfs_remove(&qcom_iommu->iommu); + return ret; } static int qcom_iommu_device_remove(struct platform_device *pdev) From e86b041ffeff554c9458882ebabc2aba5c864186 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 13 May 2021 15:58:15 +0800 Subject: [PATCH 26/66] iommu: Delete a duplicate check in iommu_change_dev_def_domain() Function iommu_group_store_type() is the only caller of the static function iommu_change_dev_def_domain() and has performed "if (WARN_ON(!group))" detection before calling it. So the one here is redundant. Signed-off-by: Zhen Lei Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210513075815.6382-1-thunder.leizhen@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 808ab70d5df5..5419c4b9f27a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3059,9 +3059,6 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, int ret, dev_def_dom; struct device *dev; - if (!group) - return -EINVAL; - mutex_lock(&group->mutex); if (group->default_domain != group->domain) { From 7e0fc5072e0b7a3e4dfbc39ad54aefa494bdf3da Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 4 Jun 2021 18:44:38 +0200 Subject: [PATCH 27/66] dt-bindings: iommu: rockchip: Convert IOMMU to DT schema Convert Rockchip IOMMU to DT schema Signed-off-by: Benjamin Gaignard Reviewed-by: Rob Herring Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20210604164441.798362-2-benjamin.gaignard@collabora.com Signed-off-by: Joerg Roedel --- .../bindings/iommu/rockchip,iommu.txt | 38 --------- .../bindings/iommu/rockchip,iommu.yaml | 80 +++++++++++++++++++ 2 files changed, 80 insertions(+), 38 deletions(-) delete mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.txt create mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt deleted file mode 100644 index 6ecefea1c6f9..000000000000 --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt +++ /dev/null @@ -1,38 +0,0 @@ -Rockchip IOMMU -============== - -A Rockchip DRM iommu translates io virtual addresses to physical addresses for -its master device. Each slave device is bound to a single master device, and -shares its clocks, power domain and irq. - -Required properties: -- compatible : Should be "rockchip,iommu" -- reg : Address space for the configuration registers -- interrupts : Interrupt specifier for the IOMMU instance -- interrupt-names : Interrupt name for the IOMMU instance -- #iommu-cells : Should be <0>. This indicates the iommu is a - "single-master" device, and needs no additional information - to associate with its master device. See: - Documentation/devicetree/bindings/iommu/iommu.txt -- clocks : A list of clocks required for the IOMMU to be accessible by - the host CPU. -- clock-names : Should contain the following: - "iface" - Main peripheral bus clock (PCLK/HCL) (required) - "aclk" - AXI bus clock (required) - -Optional properties: -- rockchip,disable-mmu-reset : Don't use the mmu reset operation. - Some mmu instances may produce unexpected results - when the reset operation is used. - -Example: - - vopl_mmu: iommu@ff940300 { - compatible = "rockchip,iommu"; - reg = <0xff940300 0x100>; - interrupts = ; - interrupt-names = "vopl_mmu"; - clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>; - clock-names = "aclk", "iface"; - #iommu-cells = <0>; - }; diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml new file mode 100644 index 000000000000..099fc2578b54 --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iommu/rockchip,iommu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Rockchip IOMMU + +maintainers: + - Heiko Stuebner + +description: |+ + A Rockchip DRM iommu translates io virtual addresses to physical addresses for + its master device. Each slave device is bound to a single master device and + shares its clocks, power domain and irq. + + For information on assigning IOMMU controller to its peripheral devices, + see generic IOMMU bindings. + +properties: + compatible: + const: rockchip,iommu + + reg: + items: + - description: configuration registers for MMU instance 0 + - description: configuration registers for MMU instance 1 + minItems: 1 + maxItems: 2 + + interrupts: + items: + - description: interruption for MMU instance 0 + - description: interruption for MMU instance 1 + minItems: 1 + maxItems: 2 + + clocks: + items: + - description: Core clock + - description: Interface clock + + clock-names: + items: + - const: aclk + - const: iface + + "#iommu-cells": + const: 0 + + rockchip,disable-mmu-reset: + $ref: /schemas/types.yaml#/definitions/flag + description: | + Do not use the mmu reset operation. + Some mmu instances may produce unexpected results + when the reset operation is used. + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - "#iommu-cells" + +additionalProperties: false + +examples: + - | + #include + #include + + vopl_mmu: iommu@ff940300 { + compatible = "rockchip,iommu"; + reg = <0xff940300 0x100>; + interrupts = ; + clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>; + clock-names = "aclk", "iface"; + #iommu-cells = <0>; + }; From 9e6f3cd589cb711b5949cfbeb84e5440906195bd Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 4 Jun 2021 18:44:39 +0200 Subject: [PATCH 28/66] dt-bindings: iommu: rockchip: Add compatible for v2 Add compatible for the second version of IOMMU hardware block. RK356x IOMMU can also be link to a power domain. Signed-off-by: Benjamin Gaignard Reviewed-by: Rob Herring Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20210604164441.798362-3-benjamin.gaignard@collabora.com Signed-off-by: Joerg Roedel --- .../devicetree/bindings/iommu/rockchip,iommu.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml index 099fc2578b54..d2e28a9e3545 100644 --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml @@ -19,7 +19,9 @@ description: |+ properties: compatible: - const: rockchip,iommu + enum: + - rockchip,iommu + - rockchip,rk3568-iommu reg: items: @@ -48,6 +50,9 @@ properties: "#iommu-cells": const: 0 + power-domains: + maxItems: 1 + rockchip,disable-mmu-reset: $ref: /schemas/types.yaml#/definitions/flag description: | From 227014b33f62f93a462f8125038ef6609d7a1572 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 4 Jun 2021 18:44:40 +0200 Subject: [PATCH 29/66] iommu: rockchip: Add internal ops to handle variants Add internal ops to be able to handle incoming variant v2. The goal is to keep the overall structure of the framework but to allow to add the evolution of this hardware block. The ops are global for a SoC because iommu domains are not attached to a specific devices if they are for a virtuel device like drm. Use a global variable shouldn't be since SoC usually doesn't embedded different versions of the iommu hardware block. If that happen one day a WARN_ON will be displayed at probe time. Signed-off-by: Benjamin Gaignard Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20210604164441.798362-4-benjamin.gaignard@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 86 +++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index e5116d6a6260..98639f8d21d6 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -96,6 +96,15 @@ static const char * const rk_iommu_clocks[] = { "aclk", "iface", }; +struct rk_iommu_ops { + phys_addr_t (*pt_address)(u32 dte); + u32 (*mk_dtentries)(dma_addr_t pt_dma); + u32 (*mk_ptentries)(phys_addr_t page, int prot); + phys_addr_t (*dte_addr_phys)(u32 addr); + u32 (*dma_addr_dte)(dma_addr_t dt_dma); + u64 dma_bit_mask; +}; + struct rk_iommu { struct device *dev; void __iomem **bases; @@ -116,6 +125,7 @@ struct rk_iommudata { }; static struct device *dma_dev; +static const struct rk_iommu_ops *rk_ops; static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma, unsigned int count) @@ -215,11 +225,6 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma) #define RK_PTE_PAGE_READABLE BIT(1) #define RK_PTE_PAGE_VALID BIT(0) -static inline phys_addr_t rk_pte_page_address(u32 pte) -{ - return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK; -} - static inline bool rk_pte_is_page_valid(u32 pte) { return pte & RK_PTE_PAGE_VALID; @@ -448,10 +453,10 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) * and verifying that upper 5 nybbles are read back. */ for (i = 0; i < iommu->num_mmu; i++) { - rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY); + dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY); + rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr); - dte_addr = rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR); - if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) { + if (dte_addr != rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR)) { dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n"); return -EFAULT; } @@ -470,6 +475,16 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) return 0; } +static inline phys_addr_t rk_dte_addr_phys(u32 addr) +{ + return (phys_addr_t)addr; +} + +static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma) +{ + return dt_dma; +} + static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) { void __iomem *base = iommu->bases[index]; @@ -489,7 +504,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) page_offset = rk_iova_page_offset(iova); mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR); - mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr; + mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr); dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index); dte_addr = phys_to_virt(dte_addr_phys); @@ -498,14 +513,14 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) if (!rk_dte_is_pt_valid(dte)) goto print_it; - pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4); + pte_addr_phys = rk_ops->pt_address(dte) + (pte_index * 4); pte_addr = phys_to_virt(pte_addr_phys); pte = *pte_addr; if (!rk_pte_is_page_valid(pte)) goto print_it; - page_addr_phys = rk_pte_page_address(pte) + page_offset; + page_addr_phys = rk_ops->pt_address(pte) + page_offset; page_flags = pte & RK_PTE_PAGE_FLAGS_MASK; print_it: @@ -601,13 +616,13 @@ static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain, if (!rk_dte_is_pt_valid(dte)) goto out; - pt_phys = rk_dte_pt_address(dte); + pt_phys = rk_ops->pt_address(dte); page_table = (u32 *)phys_to_virt(pt_phys); pte = page_table[rk_iova_pte_index(iova)]; if (!rk_pte_is_page_valid(pte)) goto out; - phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova); + phys = rk_ops->pt_address(pte) + rk_iova_page_offset(iova); out: spin_unlock_irqrestore(&rk_domain->dt_lock, flags); @@ -679,13 +694,13 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, return ERR_PTR(-ENOMEM); } - dte = rk_mk_dte(pt_dma); + dte = rk_ops->mk_dtentries(pt_dma); *dte_addr = dte; rk_table_flush(rk_domain, rk_domain->dt_dma + dte_index * sizeof(u32), 1); done: - pt_phys = rk_dte_pt_address(dte); + pt_phys = rk_ops->pt_address(dte); return (u32 *)phys_to_virt(pt_phys); } @@ -727,7 +742,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, if (rk_pte_is_page_valid(pte)) goto unwind; - pte_addr[pte_count] = rk_mk_pte(paddr, prot); + pte_addr[pte_count] = rk_ops->mk_ptentries(paddr, prot); paddr += SPAGE_SIZE; } @@ -749,7 +764,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, pte_count * SPAGE_SIZE); iova += pte_count * SPAGE_SIZE; - page_phys = rk_pte_page_address(pte_addr[pte_count]); + page_phys = rk_ops->pt_address(pte_addr[pte_count]); pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n", &iova, &page_phys, &paddr, prot); @@ -784,7 +799,8 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, dte_index = rk_domain->dt[rk_iova_dte_index(iova)]; pte_index = rk_iova_pte_index(iova); pte_addr = &page_table[pte_index]; - pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32); + + pte_dma = rk_ops->pt_address(dte_index) + pte_index * sizeof(u32); ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova, paddr, size, prot); @@ -820,7 +836,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, return 0; } - pt_phys = rk_dte_pt_address(dte); + pt_phys = rk_ops->pt_address(dte); pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova); pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32); unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size); @@ -878,7 +894,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu) for (i = 0; i < iommu->num_mmu; i++) { rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, - rk_domain->dt_dma); + rk_ops->dma_addr_dte(rk_domain->dt_dma)); rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); } @@ -1034,7 +1050,7 @@ static void rk_iommu_domain_free(struct iommu_domain *domain) for (i = 0; i < NUM_DT_ENTRIES; i++) { u32 dte = rk_domain->dt[i]; if (rk_dte_is_pt_valid(dte)) { - phys_addr_t pt_phys = rk_dte_pt_address(dte); + phys_addr_t pt_phys = rk_ops->pt_address(dte); u32 *page_table = phys_to_virt(pt_phys); dma_unmap_single(dma_dev, pt_phys, SPAGE_SIZE, DMA_TO_DEVICE); @@ -1124,6 +1140,7 @@ static int rk_iommu_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct rk_iommu *iommu; struct resource *res; + const struct rk_iommu_ops *ops; int num_res = pdev->num_resources; int err, i; @@ -1135,6 +1152,17 @@ static int rk_iommu_probe(struct platform_device *pdev) iommu->dev = dev; iommu->num_mmu = 0; + ops = of_device_get_match_data(dev); + if (!rk_ops) + rk_ops = ops; + + /* + * That should not happen unless different versions of the + * hardware block are embedded the same SoC + */ + if (WARN_ON(rk_ops != ops)) + return -EINVAL; + iommu->bases = devm_kcalloc(dev, num_res, sizeof(*iommu->bases), GFP_KERNEL); if (!iommu->bases) @@ -1223,6 +1251,8 @@ static int rk_iommu_probe(struct platform_device *pdev) } } + dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask); + return 0; err_remove_sysfs: iommu_device_sysfs_remove(&iommu->iommu); @@ -1274,8 +1304,20 @@ static const struct dev_pm_ops rk_iommu_pm_ops = { pm_runtime_force_resume) }; +static struct rk_iommu_ops iommu_data_ops_v1 = { + .pt_address = &rk_dte_pt_address, + .mk_dtentries = &rk_mk_dte, + .mk_ptentries = &rk_mk_pte, + .dte_addr_phys = &rk_dte_addr_phys, + .dma_addr_dte = &rk_dma_addr_dte, + .dma_bit_mask = DMA_BIT_MASK(32), +}; + + static const struct of_device_id rk_iommu_dt_ids[] = { - { .compatible = "rockchip,iommu" }, + { .compatible = "rockchip,iommu", + .data = &iommu_data_ops_v1, + }, { /* sentinel */ } }; From c55356c534aa651ccc3053ef2d5d8d810adacf5f Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 4 Jun 2021 18:44:41 +0200 Subject: [PATCH 30/66] iommu: rockchip: Add support for iommu v2 This second version of the hardware block has a different bits mapping for page table entries. Add the ops matching to this new mapping. Define a new compatible to distinguish it from the first version. Signed-off-by: Benjamin Gaignard Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20210604164441.798362-5-benjamin.gaignard@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 85 ++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 98639f8d21d6..4808360abf6c 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -189,6 +189,33 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte) return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK; } +/* + * In v2: + * 31:12 - PT address bit 31:0 + * 11: 8 - PT address bit 35:32 + * 7: 4 - PT address bit 39:36 + * 3: 1 - Reserved + * 0 - 1 if PT @ PT address is valid + */ +#define RK_DTE_PT_ADDRESS_MASK_V2 GENMASK_ULL(31, 4) +#define DTE_HI_MASK1 GENMASK(11, 8) +#define DTE_HI_MASK2 GENMASK(7, 4) +#define DTE_HI_SHIFT1 24 /* shift bit 8 to bit 32 */ +#define DTE_HI_SHIFT2 32 /* shift bit 4 to bit 36 */ +#define PAGE_DESC_HI_MASK1 GENMASK_ULL(39, 36) +#define PAGE_DESC_HI_MASK2 GENMASK_ULL(35, 32) + +static inline phys_addr_t rk_dte_pt_address_v2(u32 dte) +{ + u64 dte_v2 = dte; + + dte_v2 = ((dte_v2 & DTE_HI_MASK2) << DTE_HI_SHIFT2) | + ((dte_v2 & DTE_HI_MASK1) << DTE_HI_SHIFT1) | + (dte_v2 & RK_DTE_PT_ADDRESS_MASK); + + return (phys_addr_t)dte_v2; +} + static inline bool rk_dte_is_pt_valid(u32 dte) { return dte & RK_DTE_PT_VALID; @@ -199,6 +226,15 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma) return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID; } +static inline u32 rk_mk_dte_v2(dma_addr_t pt_dma) +{ + pt_dma = (pt_dma & RK_DTE_PT_ADDRESS_MASK) | + ((pt_dma & PAGE_DESC_HI_MASK1) >> DTE_HI_SHIFT1) | + (pt_dma & PAGE_DESC_HI_MASK2) >> DTE_HI_SHIFT2; + + return (pt_dma & RK_DTE_PT_ADDRESS_MASK_V2) | RK_DTE_PT_VALID; +} + /* * Each PTE has a Page address, some flags and a valid bit: * +---------------------+---+-------+-+ @@ -240,6 +276,29 @@ static u32 rk_mk_pte(phys_addr_t page, int prot) return page | flags | RK_PTE_PAGE_VALID; } +/* + * In v2: + * 31:12 - Page address bit 31:0 + * 11:9 - Page address bit 34:32 + * 8:4 - Page address bit 39:35 + * 3 - Security + * 2 - Readable + * 1 - Writable + * 0 - 1 if Page @ Page address is valid + */ +#define RK_PTE_PAGE_READABLE_V2 BIT(2) +#define RK_PTE_PAGE_WRITABLE_V2 BIT(1) + +static u32 rk_mk_pte_v2(phys_addr_t page, int prot) +{ + u32 flags = 0; + + flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0; + flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0; + + return rk_mk_dte_v2(page) | flags; +} + static u32 rk_mk_pte_invalid(u32 pte) { return pte & ~RK_PTE_PAGE_VALID; @@ -485,6 +544,21 @@ static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma) return dt_dma; } +#define DT_HI_MASK GENMASK_ULL(39, 32) +#define DT_SHIFT 28 + +static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr) +{ + return (phys_addr_t)(addr & RK_DTE_PT_ADDRESS_MASK) | + ((addr & DT_HI_MASK) << DT_SHIFT); +} + +static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma) +{ + return (dt_dma & RK_DTE_PT_ADDRESS_MASK) | + ((dt_dma & DT_HI_MASK) >> DT_SHIFT); +} + static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) { void __iomem *base = iommu->bases[index]; @@ -1313,11 +1387,22 @@ static struct rk_iommu_ops iommu_data_ops_v1 = { .dma_bit_mask = DMA_BIT_MASK(32), }; +static struct rk_iommu_ops iommu_data_ops_v2 = { + .pt_address = &rk_dte_pt_address_v2, + .mk_dtentries = &rk_mk_dte_v2, + .mk_ptentries = &rk_mk_pte_v2, + .dte_addr_phys = &rk_dte_addr_phys_v2, + .dma_addr_dte = &rk_dma_addr_dte_v2, + .dma_bit_mask = DMA_BIT_MASK(40), +}; static const struct of_device_id rk_iommu_dt_ids[] = { { .compatible = "rockchip,iommu", .data = &iommu_data_ops_v1, }, + { .compatible = "rockchip,rk3568-iommu", + .data = &iommu_data_ops_v2, + }, { /* sentinel */ } }; From 22c2d71891558170ed6d1a6bfe08caba254eafc6 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 9 Jun 2021 09:55:11 +0800 Subject: [PATCH 31/66] iommu/arm-smmu-qcom: Protect acpi_match_platform_list() call with CONFIG_ACPI The struct acpi_platform_list and function acpi_match_platform_list() defined in include/linux/acpi.h are available only when CONFIG_ACPI is enabled. Add protection to fix the build issues with !CONFIG_ACPI. Reported-by: kernel test robot Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210609015511.3955-1-shawn.guo@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 83c32566bf64..7771d40176de 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -354,21 +354,25 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { } }; +#ifdef CONFIG_ACPI static struct acpi_platform_list qcom_acpi_platlist[] = { { "LENOVO", "CB-01 ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" }, { "QCOM ", "QCOMEDK2", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" }, { } }; +#endif struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; +#ifdef CONFIG_ACPI if (np == NULL) { /* Match platform for ACPI boot */ if (acpi_match_platform_list(qcom_acpi_platlist) >= 0) return qcom_smmu_create(smmu, &qcom_smmu_impl); } +#endif /* * Do not change this order of implementation, i.e., first adreno From 05d2cbf969be465027d645f855419dbe49ded849 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 Jun 2021 10:00:53 +0800 Subject: [PATCH 32/66] iommu/vt-d: Remove redundant assignment to variable agaw The variable agaw is initialized with a value that is never read and it is being updated later with a new value as a counter in a for-loop. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210416171826.64091-1-colin.king@canonical.com Link: https://lore.kernel.org/r/20210610020115.1637656-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index be35284a2016..65c2ed760060 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -564,7 +564,7 @@ static inline int domain_pfn_supported(struct dmar_domain *domain, static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) { unsigned long sagaw; - int agaw = -1; + int agaw; sagaw = cap_sagaw(iommu->cap); for (agaw = width_to_agaw(max_gaw); From 367f82de5a9c8ee77058f40bb470b83bfa521e3d Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Thu, 10 Jun 2021 10:00:54 +0800 Subject: [PATCH 33/66] iommu/vt-d: Fix kernel-doc syntax in file header The opening comment mark '/**' is used for highlighting the beginning of kernel-doc comments. The header for drivers/iommu/intel/pasid.c follows this syntax, but the content inside does not comply with kernel-doc. This line was probably not meant for kernel-doc parsing, but is parsed due to the presence of kernel-doc like comment syntax(i.e, '/**'), which causes unexpected warnings from kernel-doc: warning: Function parameter or member 'fmt' not described in 'pr_fmt' Provide a simple fix by replacing this occurrence with general comment format, i.e. '/*', to prevent kernel-doc from parsing it. Signed-off-by: Aditya Srivastava Acked-by: Randy Dunlap Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210523143245.19040-1-yashsri421@gmail.com Link: https://lore.kernel.org/r/20210610020115.1637656-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 72dc84821dad..c6cf44a6c923 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/** +/* * intel-pasid.c - PASID idr, table and entry manipulation * * Copyright (C) 2018 Intel Corporation From 719a19335692083c0f4ebdc6eef65131e7394ac8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:55 +0800 Subject: [PATCH 34/66] iommu/vt-d: Tweak the description of a DMA fault The Intel IOMMU driver reports the DMA fault reason in a decimal number while the VT-d specification uses a hexadecimal one. It's inconvenient that users need to covert them everytime before consulting the spec. Let's use hexadecimal number for a DMA fault reason. The fault message uses 0xffffffff as PASID for DMA requests w/o PASID. This is confusing. Tweak this by adding "NO_PASID" explicitly. Reviewed-by: Ashok Raj Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210517065425.4953-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 84057cb9596c..cae1078cbfec 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1913,16 +1913,23 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, reason = dmar_get_fault_reason(fault_reason, &fault_type); if (fault_type == INTR_REMAP) - pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n", - source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), addr >> 48, - fault_reason, reason); - else - pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n", + pr_err("[INTR-REMAP] Request device [0x%02x:0x%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n", + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr >> 48, + fault_reason, reason); + else if (pasid == INVALID_IOASID) + pr_err("[%s NO_PASID] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", type ? "DMA Read" : "DMA Write", source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), pasid, addr, + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + else + pr_err("[%s PASID 0x%x] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", + type ? "DMA Read" : "DMA Write", pasid, + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, + fault_reason, reason); + return 0; } @@ -1989,7 +1996,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) if (!ratelimited) /* Using pasid -1 if pasid is not present */ dmar_fault_do_one(iommu, type, fault_reason, - pasid_present ? pasid : -1, + pasid_present ? pasid : INVALID_IOASID, source_id, guest_addr); fault_index++; From 879fcc6bda6956ce9574421c5f68ebbc794bf06b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:56 +0800 Subject: [PATCH 35/66] iommu/vt-d: Select PCI_ATS explicitly The Intel VT-d implementation supports device TLB management. Select PCI_ATS explicitly so that the pci_ats helpers are always available. Signed-off-by: Lu Baolu Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210512065313.3441309-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 28a3d1596c76..7e5b240b801d 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -14,6 +14,7 @@ config INTEL_IOMMU select SWIOTLB select IOASID select IOMMU_DMA + select PCI_ATS help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. From 521f546b4e4cedfbfbb5787f940a592dd20dd1f2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:57 +0800 Subject: [PATCH 36/66] iommu/vt-d: Support asynchronous IOMMU nested capabilities Current VT-d implementation supports nested translation only if all underlying IOMMUs support the nested capability. This is unnecessary as the upper layer is allowed to create different containers and set them with different type of iommu backend. The IOMMU driver needs to guarantee that devices attached to a nested mode iommu_domain should support nested capabilility. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210517065701.5078-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 65c2ed760060..b0ba187cb7f8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4757,6 +4757,13 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (!iommu) return -ENODEV; + if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) && + !ecap_nest(iommu->ecap)) { + dev_err(dev, "%s: iommu not support nested translation\n", + iommu->name); + return -EINVAL; + } + /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -5457,7 +5464,7 @@ intel_iommu_enable_nesting(struct iommu_domain *domain) int ret = -ENODEV; spin_lock_irqsave(&device_domain_lock, flags); - if (nested_mode_support() && list_empty(&dmar_domain->devices)) { + if (list_empty(&dmar_domain->devices)) { dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; ret = 0; From 100b8a14a37074796a467d6a9147317538ee70cf Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:58 +0800 Subject: [PATCH 37/66] iommu/vt-d: Add pasid private data helpers We are about to use iommu_sva_alloc/free_pasid() helpers in iommu core. That means the pasid life cycle will be managed by iommu core. Use a local array to save the per pasid private data instead of attaching it the real pasid. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 62 ++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 5165cea90421..82b0627ad7e7 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,23 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid); #define PRQ_ORDER 0 +static DEFINE_XARRAY_ALLOC(pasid_private_array); +static int pasid_private_add(ioasid_t pasid, void *priv) +{ + return xa_alloc(&pasid_private_array, &pasid, priv, + XA_LIMIT(pasid, pasid), GFP_ATOMIC); +} + +static void pasid_private_remove(ioasid_t pasid) +{ + xa_erase(&pasid_private_array, pasid); +} + +static void *pasid_private_find(ioasid_t pasid) +{ + return xa_load(&pasid_private_array, pasid); +} + int intel_svm_enable_prq(struct intel_iommu *iommu) { struct page *pages; @@ -224,7 +242,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, if (pasid == INVALID_IOASID || pasid >= PASID_MAX) return -EINVAL; - svm = ioasid_find(NULL, pasid, NULL); + svm = pasid_private_find(pasid); if (IS_ERR(svm)) return PTR_ERR(svm); @@ -334,7 +352,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, svm->gpasid = data->gpasid; svm->flags |= SVM_FLAG_GUEST_PASID; } - ioasid_set_data(data->hpasid, svm); + pasid_private_add(data->hpasid, svm); INIT_LIST_HEAD_RCU(&svm->devs); mmput(svm->mm); } @@ -388,7 +406,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, list_add_rcu(&sdev->list, &svm->devs); out: if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { - ioasid_set_data(data->hpasid, NULL); + pasid_private_remove(data->hpasid); kfree(svm); } @@ -431,7 +449,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) * the unbind, IOMMU driver will get notified * and perform cleanup. */ - ioasid_set_data(pasid, NULL); + pasid_private_remove(pasid); kfree(svm); } } @@ -547,8 +565,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, svm = kzalloc(sizeof(*svm), GFP_KERNEL); if (!svm) { ret = -ENOMEM; - kfree(sdev); - goto out; + goto sdev_err; } if (pasid_max > intel_pasid_max_id) @@ -556,13 +573,16 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Do not use PASID 0, reserved for RID to PASID */ svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, svm); + pasid_max - 1, NULL); if (svm->pasid == INVALID_IOASID) { - kfree(svm); - kfree(sdev); ret = -ENOSPC; - goto out; + goto svm_err; } + + ret = pasid_private_add(svm->pasid, svm); + if (ret) + goto pasid_err; + svm->notifier.ops = &intel_mmuops; svm->mm = mm; svm->flags = flags; @@ -571,12 +591,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, ret = -ENOMEM; if (mm) { ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - ioasid_put(svm->pasid); - kfree(svm); - kfree(sdev); - goto out; - } + if (ret) + goto priv_err; } spin_lock_irqsave(&iommu->lock, iflags); @@ -590,8 +606,13 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, if (ret) { if (mm) mmu_notifier_unregister(&svm->notifier, mm); +priv_err: + pasid_private_remove(svm->pasid); +pasid_err: ioasid_put(svm->pasid); +svm_err: kfree(svm); +sdev_err: kfree(sdev); goto out; } @@ -614,10 +635,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - kfree(sdev); - goto out; - } + if (ret) + goto sdev_err; } list_add_rcu(&sdev->list, &svm->devs); success: @@ -670,6 +689,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) load_pasid(svm->mm, PASID_DISABLED); } list_del(&svm->list); + pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. @@ -924,7 +944,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) } if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); - svm = ioasid_find(NULL, req->pasid, NULL); + svm = pasid_private_find(req->pasid); /* It *can't* go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. * So we only need RCU to protect the internal idr code. */ From 40483774141625b9685b177fb6e1f36de48d33f8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:59 +0800 Subject: [PATCH 38/66] iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers Align the pasid alloc/free code with the generic helpers defined in the iommu core. This also refactored the SVA binding code to improve the readability. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 3 + drivers/iommu/intel/svm.c | 284 +++++++++++++++--------------------- include/linux/intel-iommu.h | 1 - 4 files changed, 123 insertions(+), 166 deletions(-) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 7e5b240b801d..a37bd54c5b90 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -42,6 +42,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID + select IOMMU_SVA_LIB help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b0ba187cb7f8..0ca7f8a2f38e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5411,6 +5411,9 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (!info) return -EINVAL; + if (intel_iommu_enable_pasid(info->iommu, dev)) + return -ENODEV; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) return -EINVAL; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 82b0627ad7e7..da4310686ed3 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -23,9 +23,11 @@ #include #include "pasid.h" +#include "../iommu-sva-lib.h" static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, u32 pasid); +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) #define PRQ_ORDER 0 @@ -222,7 +224,6 @@ static const struct mmu_notifier_ops intel_mmuops = { }; static DEFINE_MUTEX(pasid_mutex); -static LIST_HEAD(global_svm_list); #define for_each_svm_dev(sdev, svm, d) \ list_for_each_entry((sdev), &(svm)->devs, list) \ @@ -477,79 +478,80 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) mutex_unlock(&mm->context.lock); } -/* Caller must hold pasid_mutex, mm reference */ -static int -intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct mm_struct *mm, struct intel_svm_dev **sd) +static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, + unsigned int flags) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm *svm = NULL, *t; - struct device_domain_info *info; + ioasid_t max_pasid = dev_is_pci(dev) ? + pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; + + return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); +} + +static void intel_svm_free_pasid(struct mm_struct *mm) +{ + iommu_sva_free_pasid(mm); +} + +static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, + struct device *dev, + struct mm_struct *mm, + unsigned int flags) +{ + struct device_domain_info *info = get_domain_info(dev); + unsigned long iflags, sflags; struct intel_svm_dev *sdev; - unsigned long iflags; - int pasid_max; - int ret; + struct intel_svm *svm; + int ret = 0; - if (!iommu || dmar_disabled) - return -EINVAL; + svm = pasid_private_find(mm->pasid); + if (!svm) { + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) + return ERR_PTR(-ENOMEM); - if (!intel_svm_capable(iommu)) - return -ENOTSUPP; + svm->pasid = mm->pasid; + svm->mm = mm; + svm->flags = flags; + INIT_LIST_HEAD_RCU(&svm->devs); - if (dev_is_pci(dev)) { - pasid_max = pci_max_pasids(to_pci_dev(dev)); - if (pasid_max < 0) - return -EINVAL; - } else - pasid_max = 1 << 20; + if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); + } + } - /* Bind supervisor PASID shuld have mm = NULL */ - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap) || mm) { - pr_err("Supervisor PASID with user provided mm.\n"); - return -EINVAL; + ret = pasid_private_add(svm->pasid, svm); + if (ret) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + kfree(svm); + return ERR_PTR(ret); } } - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm) - continue; - - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; - } - - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - sdev->users++; - goto success; - } - - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) { ret = -ENOMEM; - goto out; + goto free_svm; } + sdev->dev = dev; sdev->iommu = iommu; - - ret = intel_iommu_enable_pasid(iommu, dev); - if (ret) { - kfree(sdev); - goto out; - } - - info = get_domain_info(dev); sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->users = 1; + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->dev_iotlb = 1; sdev->qdep = info->ats_qdep; @@ -557,96 +559,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, sdev->qdep = 0; } - /* Finish the setup now we know we're keeping it */ - sdev->users = 1; - init_rcu_head(&sdev->rcu); + /* Setup the pasid table: */ + sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? + PASID_FLAG_SUPERVISOR_MODE : 0; + sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + spin_lock_irqsave(&iommu->lock, iflags); + ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, + FLPT_DEFAULT_DID, sflags); + spin_unlock_irqrestore(&iommu->lock, iflags); - if (!svm) { - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) { - ret = -ENOMEM; - goto sdev_err; - } + if (ret) + goto free_sdev; - if (pasid_max > intel_pasid_max_id) - pasid_max = intel_pasid_max_id; + /* The newly allocated pasid is loaded to the mm. */ + if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs)) + load_pasid(mm, svm->pasid); - /* Do not use PASID 0, reserved for RID to PASID */ - svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, NULL); - if (svm->pasid == INVALID_IOASID) { - ret = -ENOSPC; - goto svm_err; - } - - ret = pasid_private_add(svm->pasid, svm); - if (ret) - goto pasid_err; - - svm->notifier.ops = &intel_mmuops; - svm->mm = mm; - svm->flags = flags; - INIT_LIST_HEAD_RCU(&svm->devs); - INIT_LIST_HEAD(&svm->list); - ret = -ENOMEM; - if (mm) { - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) - goto priv_err; - } - - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - if (mm) - mmu_notifier_unregister(&svm->notifier, mm); -priv_err: - pasid_private_remove(svm->pasid); -pasid_err: - ioasid_put(svm->pasid); -svm_err: - kfree(svm); -sdev_err: - kfree(sdev); - goto out; - } - - list_add_tail(&svm->list, &global_svm_list); - if (mm) { - /* The newly allocated pasid is loaded to the mm. */ - load_pasid(mm, svm->pasid); - } - } else { - /* - * Binding a new device with existing PASID, need to setup - * the PASID entry. - */ - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) - goto sdev_err; - } list_add_rcu(&sdev->list, &svm->devs); success: - sdev->pasid = svm->pasid; - sdev->sva.dev = dev; - if (sd) - *sd = sdev; - ret = 0; -out: - return ret; + return &sdev->sva; + +free_sdev: + kfree(sdev); +free_svm: + if (list_empty(&svm->devs)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(mm->pasid); + kfree(svm); + } + + return ERR_PTR(ret); } /* Caller must hold pasid_mutex */ @@ -655,6 +598,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; + struct mm_struct *mm; int ret = -EINVAL; iommu = device_to_iommu(dev, NULL, NULL); @@ -664,6 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); if (ret) goto out; + mm = svm->mm; if (sdev) { sdev->users--; @@ -682,13 +627,12 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - ioasid_put(svm->pasid); - if (svm->mm) { - mmu_notifier_unregister(&svm->notifier, svm->mm); + intel_svm_free_pasid(mm); + if (svm->notifier.ops) { + mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ - load_pasid(svm->mm, PASID_DISABLED); + load_pasid(mm, PASID_DISABLED); } - list_del(&svm->list); pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. @@ -1073,31 +1017,42 @@ static irqreturn_t prq_event_thread(int irq, void *d) return IRQ_RETVAL(handled); } -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) -struct iommu_sva * -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) { - struct iommu_sva *sva = ERR_PTR(-EINVAL); - struct intel_svm_dev *sdev = NULL; + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); unsigned int flags = 0; + struct iommu_sva *sva; int ret; - /* - * TODO: Consolidate with generic iommu-sva bind after it is merged. - * It will require shared SVM data structures, i.e. combine io_mm - * and intel_svm etc. - */ if (drvdata) flags = *(unsigned int *)drvdata; - mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, mm, &sdev); - if (ret) - sva = ERR_PTR(ret); - else if (sdev) - sva = &sdev->sva; - else - WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + if (flags & SVM_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + dev_err(dev, "%s: Supervisor PASID not supported\n", + iommu->name); + return ERR_PTR(-EOPNOTSUPP); + } + + if (mm) { + dev_err(dev, "%s: Supervisor PASID with user provided mm\n", + iommu->name); + return ERR_PTR(-EINVAL); + } + + mm = &init_mm; + } + + mutex_lock(&pasid_mutex); + ret = intel_svm_alloc_pasid(dev, mm, flags); + if (ret) { + mutex_unlock(&pasid_mutex); + return ERR_PTR(ret); + } + + sva = intel_svm_bind_mm(iommu, dev, mm, flags); + if (IS_ERR_OR_NULL(sva)) + intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; @@ -1105,10 +1060,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) void intel_svm_unbind(struct iommu_sva *sva) { - struct intel_svm_dev *sdev; + struct intel_svm_dev *sdev = to_intel_svm_dev(sva); mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); intel_svm_unbind_mm(sdev->dev, sdev->pasid); mutex_unlock(&pasid_mutex); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 03faf20a6817..4e8bb186daa7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -791,7 +791,6 @@ struct intel_svm { u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; - struct list_head list; }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} From 9e52cc0fedb0fd6187a2bd01b859ea8150b588c2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:00 +0800 Subject: [PATCH 39/66] iommu/vt-d: Use common helper to lookup svm devices It's common to iterate the svm device list and find a matched device. Add common helpers to do this and consolidate the code. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 68 +++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index da4310686ed3..57867ff97bc2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -48,6 +48,40 @@ static void *pasid_private_find(ioasid_t pasid) return xa_load(&pasid_private_array, pasid); } +static struct intel_svm_dev * +svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid) +{ + struct intel_svm_dev *sdev = NULL, *t; + + rcu_read_lock(); + list_for_each_entry_rcu(t, &svm->devs, list) { + if (t->sid == sid) { + sdev = t; + break; + } + } + rcu_read_unlock(); + + return sdev; +} + +static struct intel_svm_dev * +svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) +{ + struct intel_svm_dev *sdev = NULL, *t; + + rcu_read_lock(); + list_for_each_entry_rcu(t, &svm->devs, list) { + if (t->dev == dev) { + sdev = t; + break; + } + } + rcu_read_unlock(); + + return sdev; +} + int intel_svm_enable_prq(struct intel_iommu *iommu) { struct page *pages; @@ -225,15 +259,11 @@ static const struct mmu_notifier_ops intel_mmuops = { static DEFINE_MUTEX(pasid_mutex); -#define for_each_svm_dev(sdev, svm, d) \ - list_for_each_entry((sdev), &(svm)->devs, list) \ - if ((d) != (sdev)->dev) {} else - static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, struct intel_svm **rsvm, struct intel_svm_dev **rsdev) { - struct intel_svm_dev *d, *sdev = NULL; + struct intel_svm_dev *sdev = NULL; struct intel_svm *svm; /* The caller should hold the pasid_mutex lock */ @@ -256,15 +286,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, */ if (WARN_ON(list_empty(&svm->devs))) return -EINVAL; - - rcu_read_lock(); - list_for_each_entry_rcu(d, &svm->devs, list) { - if (d->dev == dev) { - sdev = d; - break; - } - } - rcu_read_unlock(); + sdev = svm_lookup_device_by_dev(svm, dev); out: *rsvm = svm; @@ -533,7 +555,8 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, } /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { + sdev = svm_lookup_device_by_dev(svm, dev); + if (sdev) { sdev->users++; goto success; } @@ -901,19 +924,8 @@ static irqreturn_t prq_event_thread(int irq, void *d) } } - if (!sdev || sdev->sid != req->rid) { - struct intel_svm_dev *t; - - sdev = NULL; - rcu_read_lock(); - list_for_each_entry_rcu(t, &svm->devs, list) { - if (t->sid == req->rid) { - sdev = t; - break; - } - } - rcu_read_unlock(); - } + if (!sdev || sdev->sid != req->rid) + sdev = svm_lookup_device_by_sid(svm, req->rid); /* Since we're using init_mm.pgd directly, we should never take * any faults on kernel addresses. */ From ae7f09b14b4ff18f65790a906d7c2fe2561568b5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:01 +0800 Subject: [PATCH 40/66] iommu/vt-d: Refactor prq_event_thread() Refactor prq_event_thread() by moving handling single prq event out of the main loop. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 239 ++++++++++++++++++++++---------------- 1 file changed, 136 insertions(+), 103 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 57867ff97bc2..d51ddece4259 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -866,141 +866,174 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) return iommu_report_device_fault(dev, &event); } +static void handle_bad_prq_event(struct intel_iommu *iommu, + struct page_req_dsc *req, int result) +{ + struct qi_desc desc; + + pr_err("%s: Invalid page request: %08llx %08llx\n", + iommu->name, ((unsigned long long *)req)[0], + ((unsigned long long *)req)[1]); + + /* + * Per VT-d spec. v3.0 ch7.7, system software must + * respond with page group response if private data + * is present (PDP) or last page in group (LPIG) bit + * is set. This is an additional VT-d feature beyond + * PCI ATS spec. + */ + if (!req->lpig && !req->priv_data_present) + return; + + desc.qw0 = QI_PGRP_PASID(req->pasid) | + QI_PGRP_DID(req->rid) | + QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_PDP(req->priv_data_present) | + QI_PGRP_RESP_CODE(result) | + QI_PGRP_RESP_TYPE; + desc.qw1 = QI_PGRP_IDX(req->prg_index) | + QI_PGRP_LPIG(req->lpig); + desc.qw2 = 0; + desc.qw3 = 0; + + if (req->priv_data_present) + memcpy(&desc.qw2, req->priv_data, sizeof(req->priv_data)); + qi_submit_sync(iommu, &desc, 1, 0); +} + +static void handle_single_prq_event(struct intel_iommu *iommu, + struct mm_struct *mm, + struct page_req_dsc *req) +{ + u64 address = (u64)req->addr << VTD_PAGE_SHIFT; + int result = QI_RESP_INVALID; + struct vm_area_struct *vma; + struct qi_desc desc; + unsigned int flags; + vm_fault_t ret; + + /* If the mm is already defunct, don't handle faults. */ + if (!mmget_not_zero(mm)) + goto response; + + mmap_read_lock(mm); + vma = find_extend_vma(mm, address); + if (!vma || address < vma->vm_start) + goto invalid; + + if (access_error(vma, req)) + goto invalid; + + flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE; + if (req->wr_req) + flags |= FAULT_FLAG_WRITE; + + ret = handle_mm_fault(vma, address, flags, NULL); + if (!(ret & VM_FAULT_ERROR)) + result = QI_RESP_SUCCESS; +invalid: + mmap_read_unlock(mm); + mmput(mm); + +response: + if (!(req->lpig || req->priv_data_present)) + return; + + desc.qw0 = QI_PGRP_PASID(req->pasid) | + QI_PGRP_DID(req->rid) | + QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_PDP(req->priv_data_present) | + QI_PGRP_RESP_CODE(result) | + QI_PGRP_RESP_TYPE; + desc.qw1 = QI_PGRP_IDX(req->prg_index) | + QI_PGRP_LPIG(req->lpig); + desc.qw2 = 0; + desc.qw3 = 0; + + if (req->priv_data_present) + memcpy(&desc.qw2, req->priv_data, sizeof(req->priv_data)); + + qi_submit_sync(iommu, &desc, 1, 0); +} + static irqreturn_t prq_event_thread(int irq, void *d) { struct intel_svm_dev *sdev = NULL; struct intel_iommu *iommu = d; struct intel_svm *svm = NULL; - int head, tail, handled = 0; - unsigned int flags = 0; + struct page_req_dsc *req; + int head, tail, handled; + u64 address; - /* Clear PPR bit before reading head/tail registers, to - * ensure that we get a new interrupt if needed. */ + /* + * Clear PPR bit before reading head/tail registers, to ensure that + * we get a new interrupt if needed. + */ writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + handled = (head != tail); while (head != tail) { - struct vm_area_struct *vma; - struct page_req_dsc *req; - struct qi_desc resp; - int result; - vm_fault_t ret; - u64 address; - - handled = 1; req = &iommu->prq[head / sizeof(*req)]; - result = QI_RESP_INVALID; address = (u64)req->addr << VTD_PAGE_SHIFT; - if (!req->pasid_present) { - pr_err("%s: Page request without PASID: %08llx %08llx\n", - iommu->name, ((unsigned long long *)req)[0], - ((unsigned long long *)req)[1]); - goto no_pasid; + + if (unlikely(!req->pasid_present)) { + pr_err("IOMMU: %s: Page request without PASID\n", + iommu->name); +bad_req: + svm = NULL; + sdev = NULL; + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + goto prq_advance; } - /* We shall not receive page request for supervisor SVM */ - if (req->pm_req && (req->rd_req | req->wr_req)) { - pr_err("Unexpected page request in Privilege Mode"); - /* No need to find the matching sdev as for bad_req */ - goto no_pasid; + + if (unlikely(!is_canonical_address(address))) { + pr_err("IOMMU: %s: Address is not canonical\n", + iommu->name); + goto bad_req; } - /* DMA read with exec requeset is not supported. */ - if (req->exe_req && req->rd_req) { - pr_err("Execution request not supported\n"); - goto no_pasid; + + if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { + pr_err("IOMMU: %s: Page request in Privilege Mode\n", + iommu->name); + goto bad_req; } + + if (unlikely(req->exe_req && req->rd_req)) { + pr_err("IOMMU: %s: Execution request not supported\n", + iommu->name); + goto bad_req; + } + if (!svm || svm->pasid != req->pasid) { - rcu_read_lock(); - svm = pasid_private_find(req->pasid); - /* It *can't* go away, because the driver is not permitted + /* + * It can't go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. - * So we only need RCU to protect the internal idr code. */ - rcu_read_unlock(); - if (IS_ERR_OR_NULL(svm)) { - pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n", - iommu->name, req->pasid, ((unsigned long long *)req)[0], - ((unsigned long long *)req)[1]); - goto no_pasid; - } + */ + svm = pasid_private_find(req->pasid); + if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE)) + goto bad_req; } - if (!sdev || sdev->sid != req->rid) + if (!sdev || sdev->sid != req->rid) { sdev = svm_lookup_device_by_sid(svm, req->rid); - - /* Since we're using init_mm.pgd directly, we should never take - * any faults on kernel addresses. */ - if (!svm->mm) - goto bad_req; - - /* If address is not canonical, return invalid response */ - if (!is_canonical_address(address)) - goto bad_req; + if (!sdev) + goto bad_req; + } /* * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ if (svm->flags & SVM_FLAG_GUEST_MODE) { - if (sdev && !intel_svm_prq_report(sdev->dev, req)) + if (!intel_svm_prq_report(sdev->dev, req)) goto prq_advance; else goto bad_req; } - /* If the mm is already defunct, don't handle faults. */ - if (!mmget_not_zero(svm->mm)) - goto bad_req; - - mmap_read_lock(svm->mm); - vma = find_extend_vma(svm->mm, address); - if (!vma || address < vma->vm_start) - goto invalid; - - if (access_error(vma, req)) - goto invalid; - - flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE; - if (req->wr_req) - flags |= FAULT_FLAG_WRITE; - - ret = handle_mm_fault(vma, address, flags, NULL); - if (ret & VM_FAULT_ERROR) - goto invalid; - - result = QI_RESP_SUCCESS; -invalid: - mmap_read_unlock(svm->mm); - mmput(svm->mm); -bad_req: - /* We get here in the error case where the PASID lookup failed, - and these can be NULL. Do not use them below this point! */ - sdev = NULL; - svm = NULL; -no_pasid: - if (req->lpig || req->priv_data_present) { - /* - * Per VT-d spec. v3.0 ch7.7, system software must - * respond with page group response if private data - * is present (PDP) or last page in group (LPIG) bit - * is set. This is an additional VT-d feature beyond - * PCI ATS spec. - */ - resp.qw0 = QI_PGRP_PASID(req->pasid) | - QI_PGRP_DID(req->rid) | - QI_PGRP_PASID_P(req->pasid_present) | - QI_PGRP_PDP(req->priv_data_present) | - QI_PGRP_RESP_CODE(result) | - QI_PGRP_RESP_TYPE; - resp.qw1 = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_LPIG(req->lpig); - resp.qw2 = 0; - resp.qw3 = 0; - - if (req->priv_data_present) - memcpy(&resp.qw2, req->priv_data, - sizeof(req->priv_data)); - qi_submit_sync(iommu, &resp, 1, 0); - } + handle_single_prq_event(iommu, svm->mm, req); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } From 4c82b88696ac57810ab923b3c5b0734646b9b69f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:02 +0800 Subject: [PATCH 41/66] iommu/vt-d: Allocate/register iopf queue for sva devices This allocates and registers the iopf queue infrastructure for devices which want to support IO page fault for SVA. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-11-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 66 ++++++++++++++++++++++++++----------- drivers/iommu/intel/svm.c | 37 +++++++++++++++++---- include/linux/intel-iommu.h | 2 ++ 3 files changed, 79 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 0ca7f8a2f38e..e78773d46d7d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -46,6 +46,7 @@ #include #include "../irq_remapping.h" +#include "../iommu-sva-lib.h" #include "pasid.h" #include "cap_audit.h" @@ -5338,6 +5339,34 @@ static int intel_iommu_disable_auxd(struct device *dev) return 0; } +static int intel_iommu_enable_sva(struct device *dev) +{ + struct device_domain_info *info = get_domain_info(dev); + struct intel_iommu *iommu = info->iommu; + + if (!info || !iommu || dmar_disabled) + return -EINVAL; + + if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) + return -ENODEV; + + if (intel_iommu_enable_pasid(iommu, dev)) + return -ENODEV; + + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + + return iopf_queue_add_device(iommu->iopf_queue, dev); +} + +static int intel_iommu_disable_sva(struct device *dev) +{ + struct device_domain_info *info = get_domain_info(dev); + struct intel_iommu *iommu = info->iommu; + + return iopf_queue_remove_device(iommu->iopf_queue, dev); +} + /* * A PCI express designated vendor specific extended capability is defined * in the section 3.7 of Intel scalable I/O virtualization technical spec @@ -5399,38 +5428,37 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) static int intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) { - if (feat == IOMMU_DEV_FEAT_AUX) + switch (feat) { + case IOMMU_DEV_FEAT_AUX: return intel_iommu_enable_auxd(dev); - if (feat == IOMMU_DEV_FEAT_IOPF) + case IOMMU_DEV_FEAT_IOPF: return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV; - if (feat == IOMMU_DEV_FEAT_SVA) { - struct device_domain_info *info = get_domain_info(dev); + case IOMMU_DEV_FEAT_SVA: + return intel_iommu_enable_sva(dev); - if (!info) - return -EINVAL; - - if (intel_iommu_enable_pasid(info->iommu, dev)) - return -ENODEV; - - if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) - return -EINVAL; - - if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) - return 0; + default: + return -ENODEV; } - - return -ENODEV; } static int intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { - if (feat == IOMMU_DEV_FEAT_AUX) + switch (feat) { + case IOMMU_DEV_FEAT_AUX: return intel_iommu_disable_auxd(dev); - return -ENODEV; + case IOMMU_DEV_FEAT_IOPF: + return 0; + + case IOMMU_DEV_FEAT_SVA: + return intel_iommu_disable_sva(dev); + + default: + return -ENODEV; + } } static bool diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index d51ddece4259..4dc3ab36e9ae 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -84,6 +84,7 @@ svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) int intel_svm_enable_prq(struct intel_iommu *iommu) { + struct iopf_queue *iopfq; struct page *pages; int irq, ret; @@ -100,13 +101,20 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", iommu->name); ret = -EINVAL; - err: - free_pages((unsigned long)iommu->prq, PRQ_ORDER); - iommu->prq = NULL; - return ret; + goto free_prq; } iommu->pr_irq = irq; + snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), + "dmar%d-iopfq", iommu->seq_id); + iopfq = iopf_queue_alloc(iommu->iopfq_name); + if (!iopfq) { + pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); + ret = -ENOMEM; + goto free_hwirq; + } + iommu->iopf_queue = iopfq; + snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, @@ -114,9 +122,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) if (ret) { pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", iommu->name); - dmar_free_hwirq(irq); - iommu->pr_irq = 0; - goto err; + goto free_iopfq; } dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); @@ -125,6 +131,18 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) init_completion(&iommu->prq_complete); return 0; + +free_iopfq: + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue = NULL; +free_hwirq: + dmar_free_hwirq(irq); + iommu->pr_irq = 0; +free_prq: + free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu->prq = NULL; + + return ret; } int intel_svm_finish_prq(struct intel_iommu *iommu) @@ -139,6 +157,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) iommu->pr_irq = 0; } + if (iommu->iopf_queue) { + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue = NULL; + } + free_pages((unsigned long)iommu->prq, PRQ_ORDER); iommu->prq = NULL; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4e8bb186daa7..222520d149c1 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -606,6 +606,8 @@ struct intel_iommu { struct completion prq_complete; struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ #endif + struct iopf_queue *iopf_queue; + unsigned char iopfq_name[16]; struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ From d5b9e4bfe0d8848aaf428bb4bbcc270fecadef35 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:03 +0800 Subject: [PATCH 42/66] iommu/vt-d: Report prq to io-pgfault framework Let the IO page fault requests get handled through the io-pgfault framework. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-12-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 14 ++++++- drivers/iommu/intel/svm.c | 84 +++---------------------------------- 2 files changed, 17 insertions(+), 81 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e78773d46d7d..c45d4946f92d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5343,6 +5343,7 @@ static int intel_iommu_enable_sva(struct device *dev) { struct device_domain_info *info = get_domain_info(dev); struct intel_iommu *iommu = info->iommu; + int ret; if (!info || !iommu || dmar_disabled) return -EINVAL; @@ -5356,15 +5357,24 @@ static int intel_iommu_enable_sva(struct device *dev) if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) return -EINVAL; - return iopf_queue_add_device(iommu->iopf_queue, dev); + ret = iopf_queue_add_device(iommu->iopf_queue, dev); + if (!ret) + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + + return ret; } static int intel_iommu_disable_sva(struct device *dev) { struct device_domain_info *info = get_domain_info(dev); struct intel_iommu *iommu = info->iommu; + int ret; - return iopf_queue_remove_device(iommu->iopf_queue, dev); + ret = iommu_unregister_device_fault_handler(dev); + if (!ret) + ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + + return ret; } /* diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 4dc3ab36e9ae..ade157b64ce7 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -724,22 +724,6 @@ struct page_req_dsc { #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) -static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req) -{ - unsigned long requested = 0; - - if (req->exe_req) - requested |= VM_EXEC; - - if (req->rd_req) - requested |= VM_READ; - - if (req->wr_req) - requested |= VM_WRITE; - - return (requested & ~vma->vm_flags) != 0; -} - static bool is_canonical_address(u64 addr) { int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); @@ -809,6 +793,8 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid) goto prq_retry; } + iopf_queue_flush_dev(dev); + /* * Perform steps described in VT-d spec CH7.10 to drain page * requests and responses in hardware. @@ -924,61 +910,6 @@ static void handle_bad_prq_event(struct intel_iommu *iommu, qi_submit_sync(iommu, &desc, 1, 0); } -static void handle_single_prq_event(struct intel_iommu *iommu, - struct mm_struct *mm, - struct page_req_dsc *req) -{ - u64 address = (u64)req->addr << VTD_PAGE_SHIFT; - int result = QI_RESP_INVALID; - struct vm_area_struct *vma; - struct qi_desc desc; - unsigned int flags; - vm_fault_t ret; - - /* If the mm is already defunct, don't handle faults. */ - if (!mmget_not_zero(mm)) - goto response; - - mmap_read_lock(mm); - vma = find_extend_vma(mm, address); - if (!vma || address < vma->vm_start) - goto invalid; - - if (access_error(vma, req)) - goto invalid; - - flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE; - if (req->wr_req) - flags |= FAULT_FLAG_WRITE; - - ret = handle_mm_fault(vma, address, flags, NULL); - if (!(ret & VM_FAULT_ERROR)) - result = QI_RESP_SUCCESS; -invalid: - mmap_read_unlock(mm); - mmput(mm); - -response: - if (!(req->lpig || req->priv_data_present)) - return; - - desc.qw0 = QI_PGRP_PASID(req->pasid) | - QI_PGRP_DID(req->rid) | - QI_PGRP_PASID_P(req->pasid_present) | - QI_PGRP_PDP(req->priv_data_present) | - QI_PGRP_RESP_CODE(result) | - QI_PGRP_RESP_TYPE; - desc.qw1 = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_LPIG(req->lpig); - desc.qw2 = 0; - desc.qw3 = 0; - - if (req->priv_data_present) - memcpy(&desc.qw2, req->priv_data, sizeof(req->priv_data)); - - qi_submit_sync(iommu, &desc, 1, 0); -} - static irqreturn_t prq_event_thread(int irq, void *d) { struct intel_svm_dev *sdev = NULL; @@ -1049,14 +980,8 @@ static irqreturn_t prq_event_thread(int irq, void *d) * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (svm->flags & SVM_FLAG_GUEST_MODE) { - if (!intel_svm_prq_report(sdev->dev, req)) - goto prq_advance; - else - goto bad_req; - } - - handle_single_prq_event(iommu, svm->mm, req); + if (intel_svm_prq_report(sdev->dev, req)) + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } @@ -1073,6 +998,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; if (head == tail) { + iopf_queue_discard_partial(iommu->iopf_queue); writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", iommu->name); From e93a67f5a0eef3e9ab5b4649cac5c3b831c6a9db Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:04 +0800 Subject: [PATCH 43/66] iommu/vt-d: Add prq_report trace event This adds a new trace event to track the page fault request report. This event will provide almost all information defined in a page request descriptor. A sample output: | prq_report: dmar0/0000:00:0a.0 seq# 1: rid=0x50 addr=0x559ef6f97 r---- pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 2: rid=0x50 addr=0x559ef6f9c rw--l pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 3: rid=0x50 addr=0x559ef6f98 r---- pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 4: rid=0x50 addr=0x559ef6f9d rw--l pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 5: rid=0x50 addr=0x559ef6f99 r---- pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 6: rid=0x50 addr=0x559ef6f9e rw--l pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 7: rid=0x50 addr=0x559ef6f9a r---- pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 8: rid=0x50 addr=0x559ef6f9f rw--l pasid=0x2 index=0x1 This will be helpful for I/O page fault related debugging. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-13-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 7 ++++++ include/linux/intel-iommu.h | 29 +++++++++++++++++++++++ include/trace/events/intel_iommu.h | 37 ++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ade157b64ce7..d3d028c6a727 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "pasid.h" #include "../iommu-sva-lib.h" @@ -976,12 +977,18 @@ static irqreturn_t prq_event_thread(int irq, void *d) goto bad_req; } + sdev->prq_seq_number++; + /* * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ if (intel_svm_prq_report(sdev->dev, req)) handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + + trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + sdev->prq_seq_number); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 222520d149c1..98b04fa9373e 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -778,6 +778,7 @@ struct intel_svm_dev { struct device *dev; struct intel_iommu *iommu; struct iommu_sva sva; + unsigned long prq_seq_number; u32 pasid; int users; u16 did; @@ -828,4 +829,32 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) #define intel_iommu_enabled (0) #endif +static inline const char *decode_prq_descriptor(char *str, size_t size, + u64 dw0, u64 dw1, u64 dw2, u64 dw3) +{ + char *buf = str; + int bytes; + + bytes = snprintf(buf, size, + "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx", + FIELD_GET(GENMASK_ULL(31, 16), dw0), + FIELD_GET(GENMASK_ULL(63, 12), dw1), + dw1 & BIT_ULL(0) ? 'r' : '-', + dw1 & BIT_ULL(1) ? 'w' : '-', + dw0 & BIT_ULL(52) ? 'x' : '-', + dw0 & BIT_ULL(53) ? 'p' : '-', + dw1 & BIT_ULL(2) ? 'l' : '-', + FIELD_GET(GENMASK_ULL(51, 32), dw0), + FIELD_GET(GENMASK_ULL(11, 3), dw1)); + + /* Private Data */ + if (dw0 & BIT_ULL(9)) { + size -= bytes; + buf += bytes; + snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3); + } + + return str; +} + #endif diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h index d233f2916584..e5c1ca6d16ee 100644 --- a/include/trace/events/intel_iommu.h +++ b/include/trace/events/intel_iommu.h @@ -15,6 +15,8 @@ #include #include +#define MSG_MAX 256 + TRACE_EVENT(qi_submit, TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), @@ -51,6 +53,41 @@ TRACE_EVENT(qi_submit, __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3 ) ); + +TRACE_EVENT(prq_report, + TP_PROTO(struct intel_iommu *iommu, struct device *dev, + u64 dw0, u64 dw1, u64 dw2, u64 dw3, + unsigned long seq), + + TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq), + + TP_STRUCT__entry( + __field(u64, dw0) + __field(u64, dw1) + __field(u64, dw2) + __field(u64, dw3) + __field(unsigned long, seq) + __string(iommu, iommu->name) + __string(dev, dev_name(dev)) + __dynamic_array(char, buff, MSG_MAX) + ), + + TP_fast_assign( + __entry->dw0 = dw0; + __entry->dw1 = dw1; + __entry->dw2 = dw2; + __entry->dw3 = dw3; + __entry->seq = seq; + __assign_str(iommu, iommu->name); + __assign_str(dev, dev_name(dev)); + ), + + TP_printk("%s/%s seq# %ld: %s", + __get_str(iommu), __get_str(dev), __entry->seq, + decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0, + __entry->dw1, __entry->dw2, __entry->dw3) + ) +); #endif /* _TRACE_INTEL_IOMMU_H */ /* This part must be outside protection */ From 55ee5e67a59a1b6f388d7a1c7b24022145f47a3e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:05 +0800 Subject: [PATCH 44/66] iommu/vt-d: Add common code for dmar latency performance monitors The execution time of some operations is very performance critical, such as cache invalidation and PRQ processing time. This adds some common code to monitor the execution time range of those operations. The interfaces include enabling/disabling, checking status, updating sampling data and providing a common string format for users. Signed-off-by: Fenghua Yu Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-14-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Kconfig | 3 + drivers/iommu/intel/Makefile | 1 + drivers/iommu/intel/perf.c | 166 +++++++++++++++++++++++++++++++++++ drivers/iommu/intel/perf.h | 73 +++++++++++++++ include/linux/intel-iommu.h | 1 + 5 files changed, 244 insertions(+) create mode 100644 drivers/iommu/intel/perf.c create mode 100644 drivers/iommu/intel/perf.h diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index a37bd54c5b90..59be5447b775 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -3,6 +3,9 @@ config DMAR_TABLE bool +config DMAR_PERF + bool + config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64) diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index ae236ec7d219..fa0dae16441c 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o +obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c new file mode 100644 index 000000000000..faaa96dda437 --- /dev/null +++ b/drivers/iommu/intel/perf.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/** + * perf.c - performance monitor + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Lu Baolu + * Fenghua Yu + */ + +#include +#include + +#include "perf.h" + +static DEFINE_SPINLOCK(latency_lock); + +bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + + return lstat && lstat[type].enabled; +} + +int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat; + unsigned long flags; + int ret = -EBUSY; + + if (dmar_latency_enabled(iommu, type)) + return 0; + + spin_lock_irqsave(&latency_lock, flags); + if (!iommu->perf_statistic) { + iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM, + GFP_ATOMIC); + if (!iommu->perf_statistic) { + ret = -ENOMEM; + goto unlock_out; + } + } + + lstat = iommu->perf_statistic; + + if (!lstat[type].enabled) { + lstat[type].enabled = true; + lstat[type].counter[COUNTS_MIN] = UINT_MAX; + ret = 0; + } +unlock_out: + spin_unlock_irqrestore(&latency_lock, flags); + + return ret; +} + +void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + + if (!dmar_latency_enabled(iommu, type)) + return; + + spin_lock_irqsave(&latency_lock, flags); + memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM); + spin_unlock_irqrestore(&latency_lock, flags); +} + +void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + u64 min, max; + + if (!dmar_latency_enabled(iommu, type)) + return; + + spin_lock_irqsave(&latency_lock, flags); + if (latency < 100) + lstat[type].counter[COUNTS_10e2]++; + else if (latency < 1000) + lstat[type].counter[COUNTS_10e3]++; + else if (latency < 10000) + lstat[type].counter[COUNTS_10e4]++; + else if (latency < 100000) + lstat[type].counter[COUNTS_10e5]++; + else if (latency < 1000000) + lstat[type].counter[COUNTS_10e6]++; + else if (latency < 10000000) + lstat[type].counter[COUNTS_10e7]++; + else + lstat[type].counter[COUNTS_10e8_plus]++; + + min = lstat[type].counter[COUNTS_MIN]; + max = lstat[type].counter[COUNTS_MAX]; + lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency); + lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency); + lstat[type].counter[COUNTS_SUM] += latency; + lstat[type].samples++; + spin_unlock_irqrestore(&latency_lock, flags); +} + +static char *latency_counter_names[] = { + " <0.1us", + " 0.1us-1us", " 1us-10us", " 10us-100us", + " 100us-1ms", " 1ms-10ms", " >=10ms", + " min(us)", " max(us)", " average(us)" +}; + +static char *latency_type_names[] = { + " inv_iotlb", " inv_devtlb", " inv_iec", + " svm_prq" +}; + +int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + int bytes = 0, i, j; + + memset(str, 0, size); + + for (i = 0; i < COUNTS_NUM; i++) + bytes += snprintf(str + bytes, size - bytes, + "%s", latency_counter_names[i]); + + spin_lock_irqsave(&latency_lock, flags); + for (i = 0; i < DMAR_LATENCY_NUM; i++) { + if (!dmar_latency_enabled(iommu, i)) + continue; + + bytes += snprintf(str + bytes, size - bytes, + "\n%s", latency_type_names[i]); + + for (j = 0; j < COUNTS_NUM; j++) { + u64 val = lstat[i].counter[j]; + + switch (j) { + case COUNTS_MIN: + if (val == UINT_MAX) + val = 0; + else + val /= 1000; + break; + case COUNTS_MAX: + val /= 1000; + break; + case COUNTS_SUM: + if (lstat[i].samples) + val /= (lstat[i].samples * 1000); + else + val = 0; + break; + default: + break; + } + + bytes += snprintf(str + bytes, size - bytes, + "%12lld", val); + } + } + spin_unlock_irqrestore(&latency_lock, flags); + + return bytes; +} diff --git a/drivers/iommu/intel/perf.h b/drivers/iommu/intel/perf.h new file mode 100644 index 000000000000..fd6db8049d1a --- /dev/null +++ b/drivers/iommu/intel/perf.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * perf.h - performance monitor header + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Lu Baolu + */ + +enum latency_type { + DMAR_LATENCY_INV_IOTLB = 0, + DMAR_LATENCY_INV_DEVTLB, + DMAR_LATENCY_INV_IEC, + DMAR_LATENCY_PRQ, + DMAR_LATENCY_NUM +}; + +enum latency_count { + COUNTS_10e2 = 0, /* < 0.1us */ + COUNTS_10e3, /* 0.1us ~ 1us */ + COUNTS_10e4, /* 1us ~ 10us */ + COUNTS_10e5, /* 10us ~ 100us */ + COUNTS_10e6, /* 100us ~ 1ms */ + COUNTS_10e7, /* 1ms ~ 10ms */ + COUNTS_10e8_plus, /* 10ms and plus*/ + COUNTS_MIN, + COUNTS_MAX, + COUNTS_SUM, + COUNTS_NUM +}; + +struct latency_statistic { + bool enabled; + u64 counter[COUNTS_NUM]; + u64 samples; +}; + +#ifdef CONFIG_DMAR_PERF +int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type); +void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type); +bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type); +void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, + u64 latency); +int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size); +#else +static inline int +dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type) +{ + return -EINVAL; +} + +static inline void +dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type) +{ +} + +static inline bool +dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type) +{ + return false; +} + +static inline void +dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency) +{ +} + +static inline int +dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size) +{ + return 0; +} +#endif /* CONFIG_DMAR_PERF */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 98b04fa9373e..f5cf31dd7280 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -621,6 +621,7 @@ struct intel_iommu { u32 flags; /* Software defined flags */ struct dmar_drhd_unit *drhd; + void *perf_statistic; }; /* Per subdevice private data */ From 456bb0b97f00fe8defba155c0a4c48d951635395 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:06 +0800 Subject: [PATCH 45/66] iommu/vt-d: Expose latency monitor data through debugfs A debugfs interface /sys/kernel/debug/iommu/intel/dmar_perf_latency is created to control and show counts of execution time ranges for various types per DMAR. The interface may help debug any potential performance issue. By default, the interface is disabled. Possible write value of /sys/kernel/debug/iommu/intel/dmar_perf_latency 0 - disable sampling all latency data 1 - enable sampling IOTLB invalidation latency data 2 - enable sampling devTLB invalidation latency data 3 - enable sampling intr entry cache invalidation latency data 4 - enable sampling prq handling latency data Read /sys/kernel/debug/iommu/intel/dmar_perf_latency gives a snapshot of sampling result of all enabled monitors. Signed-off-by: Fenghua Yu Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-15-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/debugfs.c | 111 ++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 59be5447b775..43ebd8af11c5 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -28,6 +28,7 @@ config INTEL_IOMMU config INTEL_IOMMU_DEBUGFS bool "Export Intel IOMMU internals in Debugfs" depends on INTEL_IOMMU && IOMMU_DEBUGFS + select DMAR_PERF help !!!WARNING!!! diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index efea7f02abd9..62e23ff3c987 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -16,6 +16,7 @@ #include #include "pasid.h" +#include "perf.h" struct tbl_walk { u16 bus; @@ -31,6 +32,9 @@ struct iommu_regset { const char *regs; }; +#define DEBUG_BUFFER_SIZE 1024 +static char debug_buf[DEBUG_BUFFER_SIZE]; + #define IOMMU_REGSET_ENTRY(_reg_) \ { DMAR_##_reg_##_REG, __stringify(_reg_) } @@ -538,6 +542,111 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused) DEFINE_SHOW_ATTRIBUTE(ir_translation_struct); #endif +static void latency_show_one(struct seq_file *m, struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) +{ + int ret; + + seq_printf(m, "IOMMU: %s Register Base Address: %llx\n", + iommu->name, drhd->reg_base_addr); + + ret = dmar_latency_snapshot(iommu, debug_buf, DEBUG_BUFFER_SIZE); + if (ret < 0) + seq_puts(m, "Failed to get latency snapshot"); + else + seq_puts(m, debug_buf); + seq_puts(m, "\n"); +} + +static int latency_show(struct seq_file *m, void *v) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + latency_show_one(m, iommu, drhd); + rcu_read_unlock(); + + return 0; +} + +static int dmar_perf_latency_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, latency_show, NULL); +} + +static ssize_t dmar_perf_latency_write(struct file *filp, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int counting; + char buf[64]; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + if (kstrtoint(buf, 0, &counting)) + return -EINVAL; + + switch (counting) { + case 0: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + dmar_latency_disable(iommu, DMAR_LATENCY_INV_IOTLB); + dmar_latency_disable(iommu, DMAR_LATENCY_INV_DEVTLB); + dmar_latency_disable(iommu, DMAR_LATENCY_INV_IEC); + dmar_latency_disable(iommu, DMAR_LATENCY_PRQ); + } + rcu_read_unlock(); + break; + case 1: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + dmar_latency_enable(iommu, DMAR_LATENCY_INV_IOTLB); + rcu_read_unlock(); + break; + case 2: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + dmar_latency_enable(iommu, DMAR_LATENCY_INV_DEVTLB); + rcu_read_unlock(); + break; + case 3: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + dmar_latency_enable(iommu, DMAR_LATENCY_INV_IEC); + rcu_read_unlock(); + break; + case 4: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + dmar_latency_enable(iommu, DMAR_LATENCY_PRQ); + rcu_read_unlock(); + break; + default: + return -EINVAL; + } + + *ppos += cnt; + return cnt; +} + +static const struct file_operations dmar_perf_latency_fops = { + .open = dmar_perf_latency_open, + .write = dmar_perf_latency_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + void __init intel_iommu_debugfs_init(void) { struct dentry *intel_iommu_debug = debugfs_create_dir("intel", @@ -556,4 +665,6 @@ void __init intel_iommu_debugfs_init(void) debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug, NULL, &ir_translation_struct_fops); #endif + debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug, + NULL, &dmar_perf_latency_fops); } From 74eb87a0f9eb8c81264f2eb1b13d578c951b7533 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:07 +0800 Subject: [PATCH 46/66] iommu/vt-d: Add cache invalidation latency sampling Queued invalidation execution time is performance critical and needs to be monitored. This adds code to sample the execution time of IOTLB/ devTLB/ICE cache invalidation. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-16-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index cae1078cbfec..d66f79acd14d 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -34,6 +34,7 @@ #include #include "../irq_remapping.h" +#include "perf.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { @@ -1342,15 +1343,33 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, unsigned int count, unsigned long options) { struct q_inval *qi = iommu->qi; + s64 devtlb_start_ktime = 0; + s64 iotlb_start_ktime = 0; + s64 iec_start_ktime = 0; struct qi_desc wait_desc; int wait_index, index; unsigned long flags; int offset, shift; int rc, i; + u64 type; if (!qi) return 0; + type = desc->qw0 & GENMASK_ULL(3, 0); + + if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) && + dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB)) + iotlb_start_ktime = ktime_to_ns(ktime_get()); + + if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) && + dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB)) + devtlb_start_ktime = ktime_to_ns(ktime_get()); + + if (type == QI_IEC_TYPE && + dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC)) + iec_start_ktime = ktime_to_ns(ktime_get()); + restart: rc = 0; @@ -1425,6 +1444,18 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, if (rc == -EAGAIN) goto restart; + if (iotlb_start_ktime) + dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB, + ktime_to_ns(ktime_get()) - iotlb_start_ktime); + + if (devtlb_start_ktime) + dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB, + ktime_to_ns(ktime_get()) - devtlb_start_ktime); + + if (iec_start_ktime) + dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC, + ktime_to_ns(ktime_get()) - iec_start_ktime); + return rc; } From 0f4834ab255bf488f20544e120713decfa77843e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:08 +0800 Subject: [PATCH 47/66] iommu/vt-d: Add PRQ handling latency sampling The execution time for page fault request handling is performance critical and needs to be monitored. This adds code to sample the execution time of page fault request handling. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-17-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index d3d028c6a727..6bff9a7f9133 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -24,6 +24,7 @@ #include #include "pasid.h" +#include "perf.h" #include "../iommu-sva-lib.h" static irqreturn_t prq_event_thread(int irq, void *d); @@ -838,8 +839,8 @@ static int prq_to_iommu_prot(struct page_req_dsc *req) return prot; } -static int -intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) +static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, + struct page_req_dsc *desc) { struct iommu_fault_event event; @@ -871,6 +872,12 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; memcpy(event.fault.prm.private_data, desc->priv_data, sizeof(desc->priv_data)); + } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { + /* + * If the private data fields are not used by hardware, use it + * to monitor the prq handle latency. + */ + event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); } return iommu_report_device_fault(dev, &event); @@ -983,7 +990,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (intel_svm_prq_report(sdev->dev, req)) + if (intel_svm_prq_report(iommu, sdev->dev, req)) handle_bad_prq_event(iommu, req, QI_RESP_INVALID); trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1, @@ -1172,6 +1179,9 @@ int intel_svm_page_response(struct device *dev, if (private_present) memcpy(&desc.qw2, prm->private_data, sizeof(prm->private_data)); + else if (prm->private_data[0]) + dmar_latency_update(iommu, DMAR_LATENCY_PRQ, + ktime_to_ns(ktime_get()) - prm->private_data[0]); qi_submit_sync(iommu, &desc, 1, 0); } From 606636dcbdbb73b1a4ed61be77c76ea1087f042d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 10 Jun 2021 10:01:09 +0800 Subject: [PATCH 48/66] iommu/vt-d: Fix out-bounds-warning in intel/svm.c Replace a couple of calls to memcpy() with simple assignments in order to fix the following out-of-bounds warning: drivers/iommu/intel/svm.c:1198:4: warning: 'memcpy' offset [25, 32] from the object at 'desc' is out of the bounds of referenced subobject 'qw2' with type 'long long unsigned int' at offset 16 [-Warray-bounds] The problem is that the original code is trying to copy data into a couple of struct members adjacent to each other in a single call to memcpy(). This causes a legitimate compiler warning because memcpy() overruns the length of &desc.qw2 and &resp.qw2, respectively. This helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). Link: https://github.com/KSPP/linux/issues/109 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210414201403.GA392764@embeddedor Link: https://lore.kernel.org/r/20210610020115.1637656-18-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 6bff9a7f9133..9b0f22bc0514 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -870,8 +870,8 @@ static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, */ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; - memcpy(event.fault.prm.private_data, desc->priv_data, - sizeof(desc->priv_data)); + event.fault.prm.private_data[0] = desc->priv_data[0]; + event.fault.prm.private_data[1] = desc->priv_data[1]; } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { /* * If the private data fields are not used by hardware, use it @@ -910,11 +910,15 @@ static void handle_bad_prq_event(struct intel_iommu *iommu, QI_PGRP_RESP_TYPE; desc.qw1 = QI_PGRP_IDX(req->prg_index) | QI_PGRP_LPIG(req->lpig); - desc.qw2 = 0; - desc.qw3 = 0; - if (req->priv_data_present) - memcpy(&desc.qw2, req->priv_data, sizeof(req->priv_data)); + if (req->priv_data_present) { + desc.qw2 = req->priv_data[0]; + desc.qw3 = req->priv_data[1]; + } else { + desc.qw2 = 0; + desc.qw3 = 0; + } + qi_submit_sync(iommu, &desc, 1, 0); } @@ -1176,12 +1180,14 @@ int intel_svm_page_response(struct device *dev, desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); desc.qw2 = 0; desc.qw3 = 0; - if (private_present) - memcpy(&desc.qw2, prm->private_data, - sizeof(prm->private_data)); - else if (prm->private_data[0]) + + if (private_present) { + desc.qw2 = prm->private_data[0]; + desc.qw3 = prm->private_data[1]; + } else if (prm->private_data[0]) { dmar_latency_update(iommu, DMAR_LATENCY_PRQ, ktime_to_ns(ktime_get()) - prm->private_data[0]); + } qi_submit_sync(iommu, &desc, 1, 0); } From 3bc770b0e998a69c7085ec36405eda246b5b10d8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 10 Jun 2021 10:01:10 +0800 Subject: [PATCH 49/66] iommu/vt-d: Use DEVICE_ATTR_RO macro Use DEVICE_ATTR_RO() helper instead of plain DEVICE_ATTR(), which makes the code a bit shorter and easier to read. Signed-off-by: YueHaibing Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210528130229.22108-1-yuehaibing@huawei.com Link: https://lore.kernel.org/r/20210610020115.1637656-19-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 42 ++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index c45d4946f92d..65458aee0d95 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4139,62 +4139,56 @@ static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) return container_of(iommu_dev, struct intel_iommu, iommu); } -static ssize_t intel_iommu_show_version(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t version_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); u32 ver = readl(iommu->reg + DMAR_VER_REG); return sprintf(buf, "%d:%d\n", DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); } -static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL); +static DEVICE_ATTR_RO(version); -static ssize_t intel_iommu_show_address(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t address_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%llx\n", iommu->reg_phys); } -static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL); +static DEVICE_ATTR_RO(address); -static ssize_t intel_iommu_show_cap(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t cap_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%llx\n", iommu->cap); } -static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL); +static DEVICE_ATTR_RO(cap); -static ssize_t intel_iommu_show_ecap(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t ecap_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%llx\n", iommu->ecap); } -static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL); +static DEVICE_ATTR_RO(ecap); -static ssize_t intel_iommu_show_ndoms(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t domains_supported_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap)); } -static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL); +static DEVICE_ATTR_RO(domains_supported); -static ssize_t intel_iommu_show_ndoms_used(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t domains_used_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))); } -static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL); +static DEVICE_ATTR_RO(domains_used); static struct attribute *intel_iommu_attrs[] = { &dev_attr_version.attr, From 1f106ff0ea2782a6bc49bb927e4789681a2ec507 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 10 Jun 2021 10:01:11 +0800 Subject: [PATCH 50/66] iommu/vt-d: Use bitfields for DMAR capabilities IOTLB device presence, iommu coherency and snooping are boolean capabilities. Use them as bits and keep them adjacent. Structure layout before the reorg. $ pahole -C dmar_domain drivers/iommu/intel/dmar.o struct dmar_domain { int nid; /* 0 4 */ unsigned int iommu_refcnt[128]; /* 4 512 */ /* --- cacheline 8 boundary (512 bytes) was 4 bytes ago --- */ u16 iommu_did[128]; /* 516 256 */ /* --- cacheline 12 boundary (768 bytes) was 4 bytes ago --- */ bool has_iotlb_device; /* 772 1 */ /* XXX 3 bytes hole, try to pack */ struct list_head devices; /* 776 16 */ struct list_head subdevices; /* 792 16 */ struct iova_domain iovad __attribute__((__aligned__(8))); /* 808 2320 */ /* --- cacheline 48 boundary (3072 bytes) was 56 bytes ago --- */ struct dma_pte * pgd; /* 3128 8 */ /* --- cacheline 49 boundary (3136 bytes) --- */ int gaw; /* 3136 4 */ int agaw; /* 3140 4 */ int flags; /* 3144 4 */ int iommu_coherency; /* 3148 4 */ int iommu_snooping; /* 3152 4 */ int iommu_count; /* 3156 4 */ int iommu_superpage; /* 3160 4 */ /* XXX 4 bytes hole, try to pack */ u64 max_addr; /* 3168 8 */ u32 default_pasid; /* 3176 4 */ /* XXX 4 bytes hole, try to pack */ struct iommu_domain domain; /* 3184 72 */ /* size: 3256, cachelines: 51, members: 18 */ /* sum members: 3245, holes: 3, sum holes: 11 */ /* forced alignments: 1 */ /* last cacheline: 56 bytes */ } __attribute__((__aligned__(8))); After arranging it for natural padding and to make flags as u8 bits, it saves 8 bytes for the struct. struct dmar_domain { int nid; /* 0 4 */ unsigned int iommu_refcnt[128]; /* 4 512 */ /* --- cacheline 8 boundary (512 bytes) was 4 bytes ago --- */ u16 iommu_did[128]; /* 516 256 */ /* --- cacheline 12 boundary (768 bytes) was 4 bytes ago --- */ u8 has_iotlb_device:1; /* 772: 0 1 */ u8 iommu_coherency:1; /* 772: 1 1 */ u8 iommu_snooping:1; /* 772: 2 1 */ /* XXX 5 bits hole, try to pack */ /* XXX 3 bytes hole, try to pack */ struct list_head devices; /* 776 16 */ struct list_head subdevices; /* 792 16 */ struct iova_domain iovad __attribute__((__aligned__(8))); /* 808 2320 */ /* --- cacheline 48 boundary (3072 bytes) was 56 bytes ago --- */ struct dma_pte * pgd; /* 3128 8 */ /* --- cacheline 49 boundary (3136 bytes) --- */ int gaw; /* 3136 4 */ int agaw; /* 3140 4 */ int flags; /* 3144 4 */ int iommu_count; /* 3148 4 */ int iommu_superpage; /* 3152 4 */ /* XXX 4 bytes hole, try to pack */ u64 max_addr; /* 3160 8 */ u32 default_pasid; /* 3168 4 */ /* XXX 4 bytes hole, try to pack */ struct iommu_domain domain; /* 3176 72 */ /* size: 3248, cachelines: 51, members: 18 */ /* sum members: 3236, holes: 3, sum holes: 11 */ /* sum bitfield members: 3 bits, bit holes: 1, sum bit holes: 5 bits */ /* forced alignments: 1 */ /* last cacheline: 48 bytes */ } __attribute__((__aligned__(8))); Signed-off-by: Parav Pandit Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210530075053.264218-1-parav@nvidia.com Link: https://lore.kernel.org/r/20210610020115.1637656-20-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 18 +++++++++--------- include/linux/intel-iommu.h | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 65458aee0d95..430ef2232d47 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -626,12 +626,12 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) bool found = false; int i; - domain->iommu_coherency = 1; + domain->iommu_coherency = true; for_each_domain_iommu(i, domain) { found = true; if (!iommu_paging_structure_coherency(g_iommus[i])) { - domain->iommu_coherency = 0; + domain->iommu_coherency = false; break; } } @@ -642,18 +642,18 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (!iommu_paging_structure_coherency(iommu)) { - domain->iommu_coherency = 0; + domain->iommu_coherency = false; break; } } rcu_read_unlock(); } -static int domain_update_iommu_snooping(struct intel_iommu *skip) +static bool domain_update_iommu_snooping(struct intel_iommu *skip) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - int ret = 1; + bool ret = true; rcu_read_lock(); for_each_active_iommu(iommu, drhd) { @@ -666,7 +666,7 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip) */ if (!sm_supported(iommu) && !ecap_sc_support(iommu->ecap)) { - ret = 0; + ret = false; break; } } @@ -4506,8 +4506,8 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) adjust_width = guestwidth_to_adjustwidth(guest_width); domain->agaw = width_to_agaw(adjust_width); - domain->iommu_coherency = 0; - domain->iommu_snooping = 0; + domain->iommu_coherency = false; + domain->iommu_snooping = false; domain->iommu_superpage = 0; domain->max_addr = 0; @@ -5131,7 +5131,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) - return domain_update_iommu_snooping(NULL) == 1; + return domain_update_iommu_snooping(NULL); if (cap == IOMMU_CAP_INTR_REMAP) return irq_remapping_enabled == 1; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f5cf31dd7280..2621eff04c82 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -546,7 +546,10 @@ struct dmar_domain { * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ - bool has_iotlb_device; + u8 has_iotlb_device: 1; + u8 iommu_coherency: 1; /* indicate coherency of iommu access */ + u8 iommu_snooping: 1; /* indicate snooping control feature */ + struct list_head devices; /* all devices' list */ struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ @@ -558,9 +561,6 @@ struct dmar_domain { int agaw; int flags; /* flags to find out type of domain */ - - int iommu_coherency;/* indicate coherency of iommu access */ - int iommu_snooping; /* indicate snooping control feature*/ int iommu_count; /* reference count of iommu */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, From 74f6d776ae0b8498cfdb574ab24992bd50a2a2f1 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 10 Jun 2021 10:01:12 +0800 Subject: [PATCH 51/66] iommu/vt-d: Removed unused iommu_count in dmar domain DMAR domain uses per DMAR refcount. It is indexed by iommu seq_id. Older iommu_count is only incremented and decremented but no decisions are taken based on this refcount. This is not of much use. Hence, remove iommu_count and further simplify domain_detach_iommu() by returning void. Signed-off-by: Parav Pandit Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210530075053.264218-1-parav@nvidia.com Link: https://lore.kernel.org/r/20210610020115.1637656-21-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 11 +++-------- include/linux/intel-iommu.h | 1 - 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 430ef2232d47..dd8ecfbfdb23 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1920,7 +1920,6 @@ static int domain_attach_iommu(struct dmar_domain *domain, assert_spin_locked(&iommu->lock); domain->iommu_refcnt[iommu->seq_id] += 1; - domain->iommu_count += 1; if (domain->iommu_refcnt[iommu->seq_id] == 1) { ndomains = cap_ndoms(iommu->cap); num = find_first_zero_bit(iommu->domain_ids, ndomains); @@ -1928,7 +1927,6 @@ static int domain_attach_iommu(struct dmar_domain *domain, if (num >= ndomains) { pr_err("%s: No free domain ids\n", iommu->name); domain->iommu_refcnt[iommu->seq_id] -= 1; - domain->iommu_count -= 1; return -ENOSPC; } @@ -1944,16 +1942,15 @@ static int domain_attach_iommu(struct dmar_domain *domain, return 0; } -static int domain_detach_iommu(struct dmar_domain *domain, - struct intel_iommu *iommu) +static void domain_detach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu) { - int num, count; + int num; assert_spin_locked(&device_domain_lock); assert_spin_locked(&iommu->lock); domain->iommu_refcnt[iommu->seq_id] -= 1; - count = --domain->iommu_count; if (domain->iommu_refcnt[iommu->seq_id] == 0) { num = domain->iommu_did[iommu->seq_id]; clear_bit(num, iommu->domain_ids); @@ -1962,8 +1959,6 @@ static int domain_detach_iommu(struct dmar_domain *domain, domain_update_iommu_cap(domain); domain->iommu_did[iommu->seq_id] = 0; } - - return count; } static inline int guestwidth_to_adjustwidth(int gaw) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2621eff04c82..574b932dfe86 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -561,7 +561,6 @@ struct dmar_domain { int agaw; int flags; /* flags to find out type of domain */ - int iommu_count; /* reference count of iommu */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ From cee57d4fe74e82e784f6566bad3e3bb1ca51a211 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 10 Jun 2021 10:01:13 +0800 Subject: [PATCH 52/66] iommu/vt-d: Remove unnecessary braces No need for braces for single line statement under if() block. Signed-off-by: Parav Pandit Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210530075053.264218-1-parav@nvidia.com Link: https://lore.kernel.org/r/20210610020115.1637656-22-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index dd8ecfbfdb23..d9a248ee5779 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -683,9 +683,8 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain, struct intel_iommu *iommu; int mask = 0x3; - if (!intel_iommu_superpage) { + if (!intel_iommu_superpage) return 0; - } /* set iommu_superpage to the smallest common denominator */ rcu_read_lock(); From 9739ba327c01e26f672661ea751132c29a54d3d9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 10 Jun 2021 10:01:14 +0800 Subject: [PATCH 53/66] iommu/vt-d: Define counter explicitly as unsigned int Avoid below checkpatch warning. WARNING: Prefer 'unsigned int' to bare use of 'unsigned' + unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED]; Fixes: 29a27719abaa ("iommu/vt-d: Replace iommu_bmp with a refcount") Signed-off-by: Parav Pandit Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210530075053.264218-1-parav@nvidia.com Link: https://lore.kernel.org/r/20210610020115.1637656-23-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 574b932dfe86..d0fa0b31994d 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -537,7 +537,7 @@ struct context_entry { struct dmar_domain { int nid; /* node id */ - unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED]; + unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED]; /* Refcount of devices per iommu */ From 7a0f06c197cb5e2106da8507b5610c1bb9c7daac Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 10 Jun 2021 10:01:15 +0800 Subject: [PATCH 54/66] iommu/vt-d: No need to typecast Page directory assignment by alloc_pgtable_page() or phys_to_virt() doesn't need typecasting as both routines return void*. Hence, remove typecasting from both the calls. Signed-off-by: Parav Pandit Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210530075053.264218-1-parav@nvidia.com Link: https://lore.kernel.org/r/20210610020115.1637656-24-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d9a248ee5779..bd93c7ec879e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4506,7 +4506,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + domain->pgd = alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -4774,8 +4774,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, pte = dmar_domain->pgd; if (dma_pte_present(pte)) { - dmar_domain->pgd = (struct dma_pte *) - phys_to_virt(dma_pte_addr(pte)); + dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte)); free_pgtable_page(pte); } dmar_domain->agaw--; From d6a9642bd673dd0bb2839274fe83eaa979b9207e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Jun 2021 10:31:20 +0200 Subject: [PATCH 55/66] iommu/vt-d: Fix linker error on 32-bit A recent commit broke the build on 32-bit x86. The linker throws these messages: ld: drivers/iommu/intel/perf.o: in function `dmar_latency_snapshot': perf.c:(.text+0x40c): undefined reference to `__udivdi3' ld: perf.c:(.text+0x458): undefined reference to `__udivdi3' The reason are the 64-bit divides in dmar_latency_snapshot(). Use the div_u64() helper function for those. Fixes: 55ee5e67a59a ("iommu/vt-d: Add common code for dmar latency performance monitors") Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20210610083120.29224-1-joro@8bytes.org --- drivers/iommu/intel/perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c index faaa96dda437..73b7ec705552 100644 --- a/drivers/iommu/intel/perf.c +++ b/drivers/iommu/intel/perf.c @@ -141,14 +141,14 @@ int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size) if (val == UINT_MAX) val = 0; else - val /= 1000; + val = div_u64(val, 1000); break; case COUNTS_MAX: - val /= 1000; + val = div_u64(val, 1000); break; case COUNTS_SUM: if (lstat[i].samples) - val /= (lstat[i].samples * 1000); + val = div_u64(val, (lstat[i].samples * 1000)); else val = 0; break; From 1adf30f198c26539a62d761e45af72cde570413d Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 10 Jun 2021 10:54:29 +0800 Subject: [PATCH 56/66] iommu/arm-smmu: Fix arm_smmu_device refcount leak when arm_smmu_rpm_get fails arm_smmu_rpm_get() invokes pm_runtime_get_sync(), which increases the refcount of the "smmu" even though the return value is less than 0. The reference counting issue happens in some error handling paths of arm_smmu_rpm_get() in its caller functions. When arm_smmu_rpm_get() fails, the caller functions forget to decrease the refcount of "smmu" increased by arm_smmu_rpm_get(), causing a refcount leak. Fix this issue by calling pm_runtime_resume_and_get() instead of pm_runtime_get_sync() in arm_smmu_rpm_get(), which can keep the refcount balanced in case of failure. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Link: https://lore.kernel.org/r/1623293672-17954-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 88a3023676ce..5de5cf5fcc50 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -74,7 +74,7 @@ static bool using_legacy_binding, using_generic_binding; static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) { if (pm_runtime_enabled(smmu->dev)) - return pm_runtime_get_sync(smmu->dev); + return pm_runtime_resume_and_get(smmu->dev); return 0; } From 7c8f176d6a3fa18aa0f8875da6f7c672ed2a8554 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 10 Jun 2021 10:49:20 +0800 Subject: [PATCH 57/66] iommu/arm-smmu: Fix arm_smmu_device refcount leak in address translation The reference counting issue happens in several exception handling paths of arm_smmu_iova_to_phys_hard(). When those error scenarios occur, the function forgets to decrease the refcount of "smmu" increased by arm_smmu_rpm_get(), causing a refcount leak. Fix this issue by jumping to "out" label when those error scenarios occur. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: Rob Clark Link: https://lore.kernel.org/r/1623293391-17261-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 5de5cf5fcc50..22be819d6b7f 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1271,6 +1271,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, u64 phys; unsigned long va, flags; int ret, idx = cfg->cbndx; + phys_addr_t addr = 0; ret = arm_smmu_rpm_get(smmu); if (ret < 0) @@ -1290,6 +1291,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, dev_err(dev, "iova to phys timed out on %pad. Falling back to software table walk.\n", &iova); + arm_smmu_rpm_put(smmu); return ops->iova_to_phys(ops, iova); } @@ -1298,12 +1300,14 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, if (phys & ARM_SMMU_CB_PAR_F) { dev_err(dev, "translation fault!\n"); dev_err(dev, "PAR = 0x%llx\n", phys); - return 0; + goto out; } + addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff); +out: arm_smmu_rpm_put(smmu); - return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff); + return addr; } static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, From affa909571b0036b5d46aeff940ad4358f4abe1d Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 9 Jun 2021 20:54:38 +0800 Subject: [PATCH 58/66] iommu/arm-smmu-v3: Remove unnecessary oom message Fixes scripts/checkpatch.pl warning: WARNING: Possible unnecessary 'out of memory' message Remove it can help us save a bit of memory. Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20210609125438.14369-1-thunder.leizhen@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 141cb7d0949e..4562214caf5f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2967,10 +2967,8 @@ static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) void *strtab = smmu->strtab_cfg.strtab; cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL); - if (!cfg->l1_desc) { - dev_err(smmu->dev, "failed to allocate l1 stream table desc\n"); + if (!cfg->l1_desc) return -ENOMEM; - } for (i = 0; i < cfg->num_l1_ents; ++i) { arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]); @@ -3761,10 +3759,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev) bool bypass; smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); - if (!smmu) { - dev_err(dev, "failed to allocate arm_smmu_device\n"); + if (!smmu) return -ENOMEM; - } smmu->dev = dev; if (dev->of_node) { From b472191f0a0ce6d98d61e939118cfd6ad0ff91e7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Jun 2021 19:35:46 +0100 Subject: [PATCH 59/66] iommu/arm-smmu: Check smmu->impl pointer before dereferencing Commit 0d97174aeadf ("iommu/arm-smmu: Implement ->probe_finalize()") added a new optional ->probe_finalize callback to 'struct arm_smmu_impl' but neglected to check that 'smmu->impl' is present prior to checking if the new callback is present. Add the missing check, which avoids dereferencing NULL when probing an SMMU which doesn't require any implementation-specific callbacks: | Unable to handle kernel NULL pointer dereference at virtual address | 0000000000000070 | | Call trace: | arm_smmu_probe_finalize+0x14/0x48 | of_iommu_configure+0xe4/0x1b8 | of_dma_configure_id+0xf8/0x2d8 | pci_dma_configure+0x44/0x88 | really_probe+0xc0/0x3c0 Fixes: 0d97174aeadf ("iommu/arm-smmu: Implement ->probe_finalize()") Reported-by: Marek Szyprowski Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index d20ce4d57df2..2fe73a88e500 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1458,7 +1458,7 @@ static void arm_smmu_probe_finalize(struct device *dev) cfg = dev_iommu_priv_get(dev); smmu = cfg->smmu; - if (smmu->impl->probe_finalize) + if (smmu->impl && smmu->impl->probe_finalize) smmu->impl->probe_finalize(smmu, dev); } From 531353e650db087e61ce84f8e6b7e0982c1dc1b0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 14 Jun 2021 15:57:26 +0100 Subject: [PATCH 60/66] iommu: Update "iommu.strict" documentation Consolidating the flush queue logic also meant that the "iommu.strict" option started taking effect on x86 as well. Make sure we document that. Fixes: a250c23f15c2 ("iommu: remove DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE") Signed-off-by: Robin Murphy Reviewed-by: Lu Baolu Reviewed-by: John Garry Link: https://lore.kernel.org/r/2c8c06e1b449d6b060c5bf9ad3b403cd142f405d.1623682646.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- Documentation/admin-guide/kernel-parameters.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cb89dbdedc46..20a32de990ed 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1987,7 +1987,7 @@ forcing Dual Address Cycle for PCI cards supporting greater than 32-bit addressing. - iommu.strict= [ARM64] Configure TLB invalidation behaviour + iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour Format: { "0" | "1" } 0 - Lazy mode. Request that DMA unmap operations use deferred @@ -1998,6 +1998,10 @@ 1 - Strict mode (default). DMA unmap operations invalidate IOMMU hardware TLBs synchronously. + Note: on x86, the default behaviour depends on the + equivalent driver-specific parameters, but a strict + mode explicitly specified by either method takes + precedence. iommu.passthrough= [ARM64, X86] Configure DMA to bypass the IOMMU by default. From 934ed4580c0a13a49ab7c4cbf94cd1958c0679ed Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 11 Jun 2021 14:50:24 +0100 Subject: [PATCH 61/66] iommu/vt-d: Fix dereference of pointer info before it is null checked The assignment of iommu from info->iommu occurs before info is null checked hence leading to a potential null pointer dereference issue. Fix this by assigning iommu and checking if iommu is null after null checking info. Addresses-Coverity: ("Dereference before null check") Fixes: 4c82b88696ac ("iommu/vt-d: Allocate/register iopf queue for sva devices") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210611135024.32781-1-colin.king@canonical.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index bd93c7ec879e..76a58b8ad6c3 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5329,10 +5329,14 @@ static int intel_iommu_disable_auxd(struct device *dev) static int intel_iommu_enable_sva(struct device *dev) { struct device_domain_info *info = get_domain_info(dev); - struct intel_iommu *iommu = info->iommu; + struct intel_iommu *iommu; int ret; - if (!info || !iommu || dmar_disabled) + if (!info || dmar_disabled) + return -EINVAL; + + iommu = info->iommu; + if (!iommu) return -EINVAL; if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) From db59e1b6e49201beacdbd0622aa3594f2de4f727 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:20:56 +0200 Subject: [PATCH 62/66] ACPI: arm64: Move DMA setup operations out of IORT Extract generic DMA setup code out of IORT, so it can be reused by VIOT. Keep it in drivers/acpi/arm64 for now, since it could break x86 platforms that haven't run this code so far, if they have invalid tables. Reviewed-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20210618152059.1194210-2-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/dma.c | 50 ++++++++++++++++++++++++++++++++++ drivers/acpi/arm64/iort.c | 54 ++++++------------------------------- drivers/acpi/scan.c | 2 +- include/linux/acpi.h | 3 +++ include/linux/acpi_iort.h | 6 ++--- 6 files changed, 66 insertions(+), 50 deletions(-) create mode 100644 drivers/acpi/arm64/dma.c diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 6ff50f4ed947..66acbe77f46e 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o +obj-y += dma.o diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c new file mode 100644 index 000000000000..f16739ad3cc0 --- /dev/null +++ b/drivers/acpi/arm64/dma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include + +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +{ + int ret; + u64 end, mask; + u64 dmaaddr = 0, size = 0, offset = 0; + + /* + * If @dev is expected to be DMA-capable then the bus code that created + * it should have initialised its dma_mask pointer by this point. For + * now, we'll continue the legacy behaviour of coercing it to the + * coherent mask if not, but we'll no longer do so quietly. + */ + if (!dev->dma_mask) { + dev_warn(dev, "DMA mask not set\n"); + dev->dma_mask = &dev->coherent_dma_mask; + } + + if (dev->coherent_dma_mask) + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + else + size = 1ULL << 32; + + ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); + if (ret == -ENODEV) + ret = iort_dma_get_ranges(dev, &size); + if (!ret) { + /* + * Limit coherent and dma mask based on size retrieved from + * firmware. + */ + end = dmaaddr + size - 1; + mask = DMA_BIT_MASK(ilog2(end) + 1); + dev->bus_dma_limit = end; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); + } + + *dma_addr = dmaaddr; + *dma_size = size; + + ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); + + dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); +} diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 3912a1f6058e..a940be1cf2af 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1144,56 +1144,18 @@ static int rc_dma_get_range(struct device *dev, u64 *size) } /** - * iort_dma_setup() - Set-up device DMA parameters. + * iort_dma_get_ranges() - Look up DMA addressing limit for the device + * @dev: device to lookup + * @size: DMA range size result pointer * - * @dev: device to configure - * @dma_addr: device DMA address result pointer - * @dma_size: DMA range size result pointer + * Return: 0 on success, an error otherwise. */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +int iort_dma_get_ranges(struct device *dev, u64 *size) { - u64 end, mask, dmaaddr = 0, size = 0, offset = 0; - int ret; - - /* - * If @dev is expected to be DMA-capable then the bus code that created - * it should have initialised its dma_mask pointer by this point. For - * now, we'll continue the legacy behaviour of coercing it to the - * coherent mask if not, but we'll no longer do so quietly. - */ - if (!dev->dma_mask) { - dev_warn(dev, "DMA mask not set\n"); - dev->dma_mask = &dev->coherent_dma_mask; - } - - if (dev->coherent_dma_mask) - size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + if (dev_is_pci(dev)) + return rc_dma_get_range(dev, size); else - size = 1ULL << 32; - - ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); - if (ret == -ENODEV) - ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size) - : nc_dma_get_range(dev, &size); - - if (!ret) { - /* - * Limit coherent and dma mask based on size retrieved from - * firmware. - */ - end = dmaaddr + size - 1; - mask = DMA_BIT_MASK(ilog2(end) + 1); - dev->bus_dma_limit = end; - dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); - *dev->dma_mask = min(*dev->dma_mask, mask); - } - - *dma_addr = dmaaddr; - *dma_size = size; - - ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); - - dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); + return nc_dma_get_range(dev, size); } static void __init acpi_iort_register_irq(int hwirq, const char *name, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index e10d38ac7cf2..ea613df8f913 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1537,7 +1537,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, return 0; } - iort_dma_setup(dev, &dma_addr, &size); + acpi_arch_dma_setup(dev, &dma_addr, &size); iommu = iort_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c60745f657e9..7aaa9559cc19 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -259,9 +259,12 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } +static inline void +acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 1a12baa58e40..f7f054833afd 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -34,7 +34,7 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); +int iort_dma_get_ranges(struct device *dev, u64 *size); const struct iommu_ops *iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); @@ -48,8 +48,8 @@ static inline struct irq_domain *iort_get_device_domain( { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ -static inline void iort_dma_setup(struct device *dev, u64 *dma_addr, - u64 *size) { } +static inline int iort_dma_get_ranges(struct device *dev, u64 *size) +{ return -ENODEV; } static inline const struct iommu_ops *iort_iommu_configure_id( struct device *dev, const u32 *id_in) { return NULL; } From 11a8c5e3a94b12848f24d9c63b5c175ce0b80729 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:20:57 +0200 Subject: [PATCH 63/66] ACPI: Move IOMMU setup code out of IORT Extract the code that sets up the IOMMU infrastructure from IORT, since it can be reused by VIOT. Move it one level up into a new acpi_iommu_configure_id() function, which calls the IORT parsing function which in turn calls the acpi_iommu_fwspec_init() helper. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20210618152059.1194210-3-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 74 +++++---------------------------------- drivers/acpi/scan.c | 73 +++++++++++++++++++++++++++++++++++++- include/acpi/acpi_bus.h | 3 ++ include/linux/acpi_iort.h | 8 ++--- 4 files changed, 86 insertions(+), 72 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index a940be1cf2af..487d1095030d 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -806,23 +806,6 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) return NULL; } -static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - - return (fwspec && fwspec->ops) ? fwspec->ops : NULL; -} - -static inline int iort_add_device_replay(struct device *dev) -{ - int err = 0; - - if (dev->bus && !device_iommu_mapped(dev)) - err = iommu_probe_device(dev); - - return err; -} - /** * iort_iommu_msi_get_resv_regions - Reserved region driver helper * @dev: Device from iommu_get_resv_regions() @@ -900,18 +883,6 @@ static inline bool iort_iommu_driver_enabled(u8 type) } } -static int arm_smmu_iort_xlate(struct device *dev, u32 streamid, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) -{ - int ret = iommu_fwspec_init(dev, fwnode, ops); - - if (!ret) - ret = iommu_fwspec_add_ids(dev, &streamid, 1); - - return ret; -} - static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) { struct acpi_iort_root_complex *pci_rc; @@ -946,7 +917,7 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, return iort_iommu_driver_enabled(node->type) ? -EPROBE_DEFER : -ENODEV; - return arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops); + return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops); } struct iort_pci_alias_info { @@ -1020,24 +991,13 @@ static int iort_nc_iommu_map_id(struct device *dev, * @dev: device to configure * @id_in: optional input id const value pointer * - * Returns: iommu_ops pointer on configuration success - * NULL on configuration failure + * Returns: 0 on success, <0 on failure */ -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *id_in) +int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { struct acpi_iort_node *node; - const struct iommu_ops *ops; int err = -ENODEV; - /* - * If we already translated the fwspec there - * is nothing left to do, return the iommu_ops. - */ - ops = iort_fwspec_iommu_ops(dev); - if (ops) - return ops; - if (dev_is_pci(dev)) { struct iommu_fwspec *fwspec; struct pci_bus *bus = to_pci_dev(dev)->bus; @@ -1046,7 +1006,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX, iort_match_node_callback, &bus->dev); if (!node) - return NULL; + return -ENODEV; info.node = node; err = pci_for_each_dma_alias(to_pci_dev(dev), @@ -1059,7 +1019,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT, iort_match_node_callback, dev); if (!node) - return NULL; + return -ENODEV; err = id_in ? iort_nc_iommu_map_id(dev, node, id_in) : iort_nc_iommu_map(dev, node); @@ -1068,32 +1028,14 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, iort_named_component_init(dev, node); } - /* - * If we have reason to believe the IOMMU driver missed the initial - * add_device callback for dev, replay it to get things in order. - */ - if (!err) { - ops = iort_fwspec_iommu_ops(dev); - err = iort_add_device_replay(dev); - } - - /* Ignore all other errors apart from EPROBE_DEFER */ - if (err == -EPROBE_DEFER) { - ops = ERR_PTR(err); - } else if (err) { - dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - ops = NULL; - } - - return ops; + return err; } #else int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *input_id) -{ return NULL; } +int iort_iommu_configure_id(struct device *dev, const u32 *input_id) +{ return -ENODEV; } #endif static int nc_dma_get_range(struct device *dev, u64 *size) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ea613df8f913..2a2e690040e9 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1520,6 +1521,76 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, return ret >= 0 ? 0 : ret; } +#ifdef CONFIG_IOMMU_API +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops) +{ + int ret = iommu_fwspec_init(dev, fwnode, ops); + + if (!ret) + ret = iommu_fwspec_add_ids(dev, &id, 1); + + return ret; +} + +static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + return fwspec ? fwspec->ops : NULL; +} + +static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, + const u32 *id_in) +{ + int err; + const struct iommu_ops *ops; + + /* + * If we already translated the fwspec there is nothing left to do, + * return the iommu_ops. + */ + ops = acpi_iommu_fwspec_ops(dev); + if (ops) + return ops; + + err = iort_iommu_configure_id(dev, id_in); + + /* + * If we have reason to believe the IOMMU driver missed the initial + * iommu_probe_device() call for dev, replay it to get things in order. + */ + if (!err && dev->bus && !device_iommu_mapped(dev)) + err = iommu_probe_device(dev); + + /* Ignore all other errors apart from EPROBE_DEFER */ + if (err == -EPROBE_DEFER) { + return ERR_PTR(err); + } else if (err) { + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); + return NULL; + } + return acpi_iommu_fwspec_ops(dev); +} + +#else /* !CONFIG_IOMMU_API */ + +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops) +{ + return -ENODEV; +} + +static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, + const u32 *id_in) +{ + return NULL; +} + +#endif /* !CONFIG_IOMMU_API */ + /** * acpi_dma_configure_id - Set-up DMA configuration for the device. * @dev: The pointer to the device @@ -1539,7 +1610,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, acpi_arch_dma_setup(dev, &dma_addr, &size); - iommu = iort_iommu_configure_id(dev, input_id); + iommu = acpi_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) return -EPROBE_DEFER; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 3a82faac5767..41f092a269f6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -588,6 +588,9 @@ struct acpi_pci_root { bool acpi_dma_supported(struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops); int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, u64 *size); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index f7f054833afd..f1f0842a2cb2 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -35,8 +35,7 @@ void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ int iort_dma_get_ranges(struct device *dev, u64 *size); -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *id_in); +int iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else @@ -50,9 +49,8 @@ static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ static inline int iort_dma_get_ranges(struct device *dev, u64 *size) { return -ENODEV; } -static inline const struct iommu_ops *iort_iommu_configure_id( - struct device *dev, const u32 *id_in) -{ return NULL; } +static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) +{ return -ENODEV; } static inline int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } From 3cf485540e7b8550936ce3602edf2f58e4007304 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:20:58 +0200 Subject: [PATCH 64/66] ACPI: Add driver for the VIOT table The ACPI Virtual I/O Translation Table describes topology of para-virtual platforms, similarly to vendor tables DMAR, IVRS and IORT. For now it describes the relation between virtio-iommu and the endpoints it manages. Three steps are needed to configure DMA of endpoints: (1) acpi_viot_init(): parse the VIOT table, find or create the fwnode associated to each vIOMMU device. This needs to happen after acpi_scan_init(), because it relies on the struct device and their fwnode to be available. (2) When probing the vIOMMU device, the driver registers its IOMMU ops within the IOMMU subsystem. This step doesn't require any intervention from the VIOT driver. (3) viot_iommu_configure(): before binding the endpoint to a driver, find the associated IOMMU ops. Register them, along with the endpoint ID, into the device's iommu_fwspec. If step (3) happens before step (2), it is deferred until the IOMMU is initialized, then retried. Tested-by: Eric Auger Reviewed-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20210618152059.1194210-4-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- MAINTAINERS | 8 + drivers/acpi/Kconfig | 3 + drivers/acpi/Makefile | 2 + drivers/acpi/bus.c | 2 + drivers/acpi/scan.c | 3 + drivers/acpi/viot.c | 366 ++++++++++++++++++++++++++++++++++++++ drivers/iommu/Kconfig | 1 + include/linux/acpi_viot.h | 19 ++ 8 files changed, 404 insertions(+) create mode 100644 drivers/acpi/viot.c create mode 100644 include/linux/acpi_viot.h diff --git a/MAINTAINERS b/MAINTAINERS index 503fd21901f1..9f06619f7e41 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -431,6 +431,14 @@ W: https://01.org/linux-acpi B: https://bugzilla.kernel.org F: drivers/acpi/acpi_video.c +ACPI VIOT DRIVER +M: Jean-Philippe Brucker +L: linux-acpi@vger.kernel.org +L: iommu@lists.linux-foundation.org +S: Maintained +F: drivers/acpi/viot.c +F: include/linux/acpi_viot.h + ACPI WMI DRIVER L: platform-driver-x86@vger.kernel.org S: Orphan diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index eedec61e3476..3758c6940ed7 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -526,6 +526,9 @@ endif source "drivers/acpi/pmic/Kconfig" +config ACPI_VIOT + bool + endif # ACPI config X86_PM_TIMER diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 700b41adf2db..a6e644c48987 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -118,3 +118,5 @@ video-objs += acpi_video.o video_detect.o obj-y += dptf/ obj-$(CONFIG_ARM64) += arm64/ + +obj-$(CONFIG_ACPI_VIOT) += viot.o diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index be7da23fad76..4a96b7097a62 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -27,6 +27,7 @@ #include #endif #include +#include #include #include #include @@ -1345,6 +1346,7 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); + acpi_viot_init(); return 0; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 2a2e690040e9..3e2bb04ab528 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1556,6 +1557,8 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, return ops; err = iort_iommu_configure_id(dev, id_in); + if (err && err != -EPROBE_DEFER) + err = viot_iommu_configure(dev); /* * If we have reason to believe the IOMMU driver missed the initial diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c new file mode 100644 index 000000000000..d2256326c73a --- /dev/null +++ b/drivers/acpi/viot.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Virtual I/O topology + * + * The Virtual I/O Translation Table (VIOT) describes the topology of + * para-virtual IOMMUs and the endpoints they manage. The OS uses it to + * initialize devices in the right order, preventing endpoints from issuing DMA + * before their IOMMU is ready. + * + * When binding a driver to a device, before calling the device driver's probe() + * method, the driver infrastructure calls dma_configure(). At that point the + * VIOT driver looks for an IOMMU associated to the device in the VIOT table. + * If an IOMMU exists and has been initialized, the VIOT driver initializes the + * device's IOMMU fwspec, allowing the DMA infrastructure to invoke the IOMMU + * ops when the device driver configures DMA mappings. If an IOMMU exists and + * hasn't yet been initialized, VIOT returns -EPROBE_DEFER to postpone probing + * the device until the IOMMU is available. + */ +#define pr_fmt(fmt) "ACPI: VIOT: " fmt + +#include +#include +#include +#include +#include +#include +#include + +struct viot_iommu { + /* Node offset within the table */ + unsigned int offset; + struct fwnode_handle *fwnode; + struct list_head list; +}; + +struct viot_endpoint { + union { + /* PCI range */ + struct { + u16 segment_start; + u16 segment_end; + u16 bdf_start; + u16 bdf_end; + }; + /* MMIO */ + u64 address; + }; + u32 endpoint_id; + struct viot_iommu *viommu; + struct list_head list; +}; + +static struct acpi_table_viot *viot; +static LIST_HEAD(viot_iommus); +static LIST_HEAD(viot_pci_ranges); +static LIST_HEAD(viot_mmio_endpoints); + +static int __init viot_check_bounds(const struct acpi_viot_header *hdr) +{ + struct acpi_viot_header *start, *end, *hdr_end; + + start = ACPI_ADD_PTR(struct acpi_viot_header, viot, + max_t(size_t, sizeof(*viot), viot->node_offset)); + end = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->header.length); + hdr_end = ACPI_ADD_PTR(struct acpi_viot_header, hdr, sizeof(*hdr)); + + if (hdr < start || hdr_end > end) { + pr_err(FW_BUG "Node pointer overflows\n"); + return -EOVERFLOW; + } + if (hdr->length < sizeof(*hdr)) { + pr_err(FW_BUG "Empty node\n"); + return -EINVAL; + } + return 0; +} + +static int __init viot_get_pci_iommu_fwnode(struct viot_iommu *viommu, + u16 segment, u16 bdf) +{ + struct pci_dev *pdev; + struct fwnode_handle *fwnode; + + pdev = pci_get_domain_bus_and_slot(segment, PCI_BUS_NUM(bdf), + bdf & 0xff); + if (!pdev) { + pr_err("Could not find PCI IOMMU\n"); + return -ENODEV; + } + + fwnode = pdev->dev.fwnode; + if (!fwnode) { + /* + * PCI devices aren't necessarily described by ACPI. Create a + * fwnode so the IOMMU subsystem can identify this device. + */ + fwnode = acpi_alloc_fwnode_static(); + if (!fwnode) { + pci_dev_put(pdev); + return -ENOMEM; + } + set_primary_fwnode(&pdev->dev, fwnode); + } + viommu->fwnode = pdev->dev.fwnode; + pci_dev_put(pdev); + return 0; +} + +static int __init viot_get_mmio_iommu_fwnode(struct viot_iommu *viommu, + u64 address) +{ + struct acpi_device *adev; + struct resource res = { + .start = address, + .end = address, + .flags = IORESOURCE_MEM, + }; + + adev = acpi_resource_consumer(&res); + if (!adev) { + pr_err("Could not find MMIO IOMMU\n"); + return -EINVAL; + } + viommu->fwnode = &adev->fwnode; + return 0; +} + +static struct viot_iommu * __init viot_get_iommu(unsigned int offset) +{ + int ret; + struct viot_iommu *viommu; + struct acpi_viot_header *hdr = ACPI_ADD_PTR(struct acpi_viot_header, + viot, offset); + union { + struct acpi_viot_virtio_iommu_pci pci; + struct acpi_viot_virtio_iommu_mmio mmio; + } *node = (void *)hdr; + + list_for_each_entry(viommu, &viot_iommus, list) + if (viommu->offset == offset) + return viommu; + + if (viot_check_bounds(hdr)) + return NULL; + + viommu = kzalloc(sizeof(*viommu), GFP_KERNEL); + if (!viommu) + return NULL; + + viommu->offset = offset; + switch (hdr->type) { + case ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI: + if (hdr->length < sizeof(node->pci)) + goto err_free; + + ret = viot_get_pci_iommu_fwnode(viommu, node->pci.segment, + node->pci.bdf); + break; + case ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO: + if (hdr->length < sizeof(node->mmio)) + goto err_free; + + ret = viot_get_mmio_iommu_fwnode(viommu, + node->mmio.base_address); + break; + default: + ret = -EINVAL; + } + if (ret) + goto err_free; + + list_add(&viommu->list, &viot_iommus); + return viommu; + +err_free: + kfree(viommu); + return NULL; +} + +static int __init viot_parse_node(const struct acpi_viot_header *hdr) +{ + int ret = -EINVAL; + struct list_head *list; + struct viot_endpoint *ep; + union { + struct acpi_viot_mmio mmio; + struct acpi_viot_pci_range pci; + } *node = (void *)hdr; + + if (viot_check_bounds(hdr)) + return -EINVAL; + + if (hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI || + hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO) + return 0; + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + switch (hdr->type) { + case ACPI_VIOT_NODE_PCI_RANGE: + if (hdr->length < sizeof(node->pci)) { + pr_err(FW_BUG "Invalid PCI node size\n"); + goto err_free; + } + + ep->segment_start = node->pci.segment_start; + ep->segment_end = node->pci.segment_end; + ep->bdf_start = node->pci.bdf_start; + ep->bdf_end = node->pci.bdf_end; + ep->endpoint_id = node->pci.endpoint_start; + ep->viommu = viot_get_iommu(node->pci.output_node); + list = &viot_pci_ranges; + break; + case ACPI_VIOT_NODE_MMIO: + if (hdr->length < sizeof(node->mmio)) { + pr_err(FW_BUG "Invalid MMIO node size\n"); + goto err_free; + } + + ep->address = node->mmio.base_address; + ep->endpoint_id = node->mmio.endpoint; + ep->viommu = viot_get_iommu(node->mmio.output_node); + list = &viot_mmio_endpoints; + break; + default: + pr_warn("Unsupported node %x\n", hdr->type); + ret = 0; + goto err_free; + } + + if (!ep->viommu) { + pr_warn("No IOMMU node found\n"); + /* + * A future version of the table may use the node for other + * purposes. Keep parsing. + */ + ret = 0; + goto err_free; + } + + list_add(&ep->list, list); + return 0; + +err_free: + kfree(ep); + return ret; +} + +/** + * acpi_viot_init - Parse the VIOT table + * + * Parse the VIOT table, prepare the list of endpoints to be used during DMA + * setup of devices. + */ +void __init acpi_viot_init(void) +{ + int i; + acpi_status status; + struct acpi_table_header *hdr; + struct acpi_viot_header *node; + + status = acpi_get_table(ACPI_SIG_VIOT, 0, &hdr); + if (ACPI_FAILURE(status)) { + if (status != AE_NOT_FOUND) { + const char *msg = acpi_format_exception(status); + + pr_err("Failed to get table, %s\n", msg); + } + return; + } + + viot = (void *)hdr; + + node = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->node_offset); + for (i = 0; i < viot->node_count; i++) { + if (viot_parse_node(node)) + return; + + node = ACPI_ADD_PTR(struct acpi_viot_header, node, + node->length); + } + + acpi_put_table(hdr); +} + +static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu, + u32 epid) +{ + const struct iommu_ops *ops; + + if (!viommu) + return -ENODEV; + + /* We're not translating ourself */ + if (viommu->fwnode == dev->fwnode) + return -EINVAL; + + ops = iommu_ops_from_fwnode(viommu->fwnode); + if (!ops) + return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ? + -EPROBE_DEFER : -ENODEV; + + return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops); +} + +static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data) +{ + u32 epid; + struct viot_endpoint *ep; + u32 domain_nr = pci_domain_nr(pdev->bus); + + list_for_each_entry(ep, &viot_pci_ranges, list) { + if (domain_nr >= ep->segment_start && + domain_nr <= ep->segment_end && + dev_id >= ep->bdf_start && + dev_id <= ep->bdf_end) { + epid = ((domain_nr - ep->segment_start) << 16) + + dev_id - ep->bdf_start + ep->endpoint_id; + + /* + * If we found a PCI range managed by the viommu, we're + * the one that has to request ACS. + */ + pci_request_acs(); + + return viot_dev_iommu_init(&pdev->dev, ep->viommu, + epid); + } + } + return -ENODEV; +} + +static int viot_mmio_dev_iommu_init(struct platform_device *pdev) +{ + struct resource *mem; + struct viot_endpoint *ep; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) + return -ENODEV; + + list_for_each_entry(ep, &viot_mmio_endpoints, list) { + if (ep->address == mem->start) + return viot_dev_iommu_init(&pdev->dev, ep->viommu, + ep->endpoint_id); + } + return -ENODEV; +} + +/** + * viot_iommu_configure - Setup IOMMU ops for an endpoint described by VIOT + * @dev: the endpoint + * + * Return: 0 on success, <0 on failure + */ +int viot_iommu_configure(struct device *dev) +{ + if (dev_is_pci(dev)) + return pci_for_each_dma_alias(to_pci_dev(dev), + viot_pci_dev_iommu_init, NULL); + else if (dev_is_platform(dev)) + return viot_mmio_dev_iommu_init(to_platform_device(dev)); + return -ENODEV; +} diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 1f111b399bca..aff8a4830dd1 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -403,6 +403,7 @@ config VIRTIO_IOMMU depends on ARM64 select IOMMU_API select INTERVAL_TREE + select ACPI_VIOT if ACPI help Para-virtualised IOMMU driver with virtio. diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h new file mode 100644 index 000000000000..1eb8ee5b0e5f --- /dev/null +++ b/include/linux/acpi_viot.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ACPI_VIOT_H__ +#define __ACPI_VIOT_H__ + +#include + +#ifdef CONFIG_ACPI_VIOT +void __init acpi_viot_init(void); +int viot_iommu_configure(struct device *dev); +#else +static inline void acpi_viot_init(void) {} +static inline int viot_iommu_configure(struct device *dev) +{ + return -ENODEV; +} +#endif + +#endif /* __ACPI_VIOT_H__ */ From ac6d704679d343e55615551f19e9b2e18d68518b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:20:59 +0200 Subject: [PATCH 65/66] iommu/dma: Pass address limit rather than size to iommu_setup_dma_ops() Passing a 64-bit address width to iommu_setup_dma_ops() is valid on virtual platforms, but isn't currently possible. The overflow check in iommu_dma_init_domain() prevents this even when @dma_base isn't 0. Pass a limit address instead of a size, so callers don't have to fake a size to work around the check. The base and limit parameters are being phased out, because: * they are redundant for x86 callers. dma-iommu already reserves the first page, and the upper limit is already in domain->geometry. * they can now be obtained from dev->dma_range_map on Arm. But removing them on Arm isn't completely straightforward so is left for future work. As an intermediate step, simplify the x86 callers by passing dummy limits. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20210618152059.1194210-5-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 2 +- drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/dma-iommu.c | 12 ++++++------ drivers/iommu/intel/iommu.c | 5 +---- include/linux/dma-iommu.h | 4 ++-- 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4bf1dd3eb041..6719f9efea09 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -50,7 +50,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->dma_coherent = coherent; if (iommu) - iommu_setup_dma_ops(dev, dma_base, size); + iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); #ifdef CONFIG_XEN if (xen_swiotlb_detect()) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3ac42bbdefc6..216323fb27ef 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1713,7 +1713,7 @@ static void amd_iommu_probe_finalize(struct device *dev) /* Domains are initialized for this device - have a look what we ended up with */ domain = iommu_get_domain_for_dev(dev); if (domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0); + iommu_setup_dma_ops(dev, 0, U64_MAX); else set_dma_ops(dev, NULL); } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7bcdd1205535..c62e19bed302 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -319,16 +319,16 @@ static bool dev_is_untrusted(struct device *dev) * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() * @base: IOVA at which the mappable address space starts - * @size: Size of IOVA space + * @limit: Last address of the IOVA space * @dev: Device the domain is being initialised for * - * @base and @size should be exact multiples of IOMMU page granularity to + * @base and @limit + 1 should be exact multiples of IOMMU page granularity to * avoid rounding surprises. If necessary, we reserve the page at address 0 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but * any change which could make prior IOVAs invalid will fail. */ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, - u64 size, struct device *dev) + dma_addr_t limit, struct device *dev) { struct iommu_dma_cookie *cookie = domain->iova_cookie; unsigned long order, base_pfn; @@ -346,7 +346,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* Check the domain allows at least some access to the device... */ if (domain->geometry.force_aperture) { if (base > domain->geometry.aperture_end || - base + size <= domain->geometry.aperture_start) { + limit < domain->geometry.aperture_start) { pr_warn("specified DMA range outside IOMMU capability\n"); return -EFAULT; } @@ -1308,7 +1308,7 @@ static const struct dma_map_ops iommu_dma_ops = { * The IOMMU core code allocates the default DMA domain, which the underlying * IOMMU driver needs to support via the dma-iommu layer. */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -1320,7 +1320,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) * underlying IOMMU driver needs to support via the dma-iommu layer. */ if (domain->type == IOMMU_DOMAIN_DMA) { - if (iommu_dma_init_domain(domain, dma_base, size, dev)) + if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) goto out_err; dev->dma_ops = &iommu_dma_ops; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index be35284a2016..2f7213f0e7a1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5165,13 +5165,10 @@ static void intel_iommu_release_device(struct device *dev) static void intel_iommu_probe_finalize(struct device *dev) { - dma_addr_t base = IOVA_START_PFN << VTD_PAGE_SHIFT; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct dmar_domain *dmar_domain = to_dmar_domain(domain); if (domain && domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, base, - __DOMAIN_MAX_ADDR(dmar_domain->gaw) - base); + iommu_setup_dma_ops(dev, 0, U64_MAX); else set_dma_ops(dev, NULL); } diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 6e75a2d689b4..758ca4694257 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -19,7 +19,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size); +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); /* The DMA API isn't _quite_ the whole story, though... */ /* @@ -50,7 +50,7 @@ struct msi_msg; struct device; static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size) + u64 dma_limit) { } From 8ce4904bfd22de04ac3cd35d469c0a3337bdeb7b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:21:00 +0200 Subject: [PATCH 66/66] iommu/virtio: Enable x86 support With the VIOT support in place, x86 platforms can now use the virtio-iommu. Because the other x86 IOMMU drivers aren't yet ready to use the acpi_dma_setup() path, x86 doesn't implement arch_setup_dma_ops() at the moment. Similarly to Vt-d and AMD IOMMU, clear the DMA ops and call iommu_setup_dma_ops() from probe_finalize(). Acked-by: Joerg Roedel Acked-by: Michael S. Tsirkin Tested-by: Eric Auger Reviewed-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210618152059.1194210-6-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 3 ++- drivers/iommu/dma-iommu.c | 1 + drivers/iommu/virtio-iommu.c | 11 +++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index aff8a4830dd1..07b7c25cbed8 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -400,8 +400,9 @@ config HYPERV_IOMMU config VIRTIO_IOMMU tristate "Virtio IOMMU driver" depends on VIRTIO - depends on ARM64 + depends on (ARM64 || X86) select IOMMU_API + select IOMMU_DMA select INTERVAL_TREE select ACPI_VIOT if ACPI help diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index c62e19bed302..9dbbc95c8189 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1330,6 +1330,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", dev_name(dev)); } +EXPORT_SYMBOL_GPL(iommu_setup_dma_ops); static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index c6e5ee4d9cef..fe581f0c9b3a 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -904,6 +905,15 @@ static struct iommu_device *viommu_probe_device(struct device *dev) return ERR_PTR(ret); } +static void viommu_probe_finalize(struct device *dev) +{ +#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS + /* First clear the DMA ops in case we're switching from a DMA domain */ + set_dma_ops(dev, NULL); + iommu_setup_dma_ops(dev, 0, U64_MAX); +#endif +} + static void viommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); @@ -940,6 +950,7 @@ static struct iommu_ops viommu_ops = { .iova_to_phys = viommu_iova_to_phys, .iotlb_sync = viommu_iotlb_sync, .probe_device = viommu_probe_device, + .probe_finalize = viommu_probe_finalize, .release_device = viommu_release_device, .device_group = viommu_device_group, .get_resv_regions = viommu_get_resv_regions,