From adf5e5168bd51c42332ebaa709351fa6ed65ea73 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 27 Jan 2017 12:22:54 +0000 Subject: [PATCH 01/14] iommu: Better document the IOMMU_PRIV flag This is a fairly subtle thing - let's make sure it's described as clearly as possible to avoid potential misunderstandings. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- include/linux/iommu.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2e4de0deee53..88ec8c6580d3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -32,10 +32,13 @@ #define IOMMU_NOEXEC (1 << 3) #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */ /* - * This is to make the IOMMU API setup privileged - * mapppings accessible by the master only at higher - * privileged execution level and inaccessible at - * less privileged levels. + * Where the bus hardware includes a privilege level as part of its access type + * markings, and certain devices are capable of issuing transactions marked as + * either 'supervisor' or 'user', the IOMMU_PRIV flag requests that the other + * given permission flags only apply to accesses at the higher privilege level, + * and that unprivileged transactions should have as little access as possible. + * This would usually imply the same permissions as kernel mappings on the CPU, + * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) From 53c35dce45713d2a554109c21a8cd617d09eba50 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 13 Mar 2017 11:39:01 +0100 Subject: [PATCH 02/14] iommu/arm-smmu: Print message when Cavium erratum 27704 was detected Firmware is responsible for properly enabling smmu workarounds. Print a message for better diagnostics when Cavium erratum 27704 was detected. Reviewed-by: Robin Murphy Signed-off-by: Robert Richter Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b493c99e17f7..8e16da64e72e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1887,6 +1887,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) atomic_add_return(smmu->num_context_banks, &cavium_smmu_context_count); smmu->cavium_id_base -= smmu->num_context_banks; + dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n"); } /* ID2 */ From 125458ab3aefe9cf2f72dcfe7338dc9ad967da0b Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 28 Mar 2017 16:11:12 +0530 Subject: [PATCH 03/14] iommu/arm-smmu: Fix 16-bit ASID configuration 16-bit ASID should be enabled before initializing TTBR0/1, otherwise only LSB 8-bit ASID will be considered. Hence moving configuration of TTBCR register ahead of TTBR0/1 while initializing context bank. Signed-off-by: Sunil Goutham [will: rewrote comment] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 42 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 8e16da64e72e..e021b360e315 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -758,6 +758,29 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, } writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); + /* + * TTBCR + * We must write this before the TTBRs, since it determines the + * access behaviour of some fields (in particular, ASID[15:8]). + */ + if (stage1) { + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + reg = pgtbl_cfg->arm_v7s_cfg.tcr; + reg2 = 0; + } else { + reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; + reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; + reg2 |= TTBCR2_SEP_UPSTREAM; + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) + reg2 |= TTBCR2_AS; + } + if (smmu->version > ARM_SMMU_V1) + writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2); + } else { + reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + } + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); + /* TTBRs */ if (stage1) { u16 asid = ARM_SMMU_CB_ASID(smmu, cfg); @@ -781,25 +804,6 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); } - /* TTBCR */ - if (stage1) { - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - reg = pgtbl_cfg->arm_v7s_cfg.tcr; - reg2 = 0; - } else { - reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; - reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; - reg2 |= TTBCR2_SEP_UPSTREAM; - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - reg2 |= TTBCR2_AS; - } - if (smmu->version > ARM_SMMU_V1) - writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2); - } else { - reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; - } - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); - /* MAIRs (stage-1 only) */ if (stage1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { From 280b683ceaceb7508dc1e9c7e148ea1dcdf36543 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 30 Mar 2017 17:56:29 +0100 Subject: [PATCH 04/14] iommu/arm-smmu: Simplify ASID/VMID handling Calculating ASIDs/VMIDs dynamically from arm_smmu_cfg was a neat trick, but the global uniqueness workaround makes it somewhat more awkward, and means we end up having to pass extra state around in certain cases just to keep a handle on the offset. We already have 16 bits going spare in arm_smmu_cfg; let's just precalculate an ASID/VMID, plop it in there, and tidy up the users accordingly. We'd also need something like this anyway if we ever get near to thinking about SVM, so it's no bad thing. Reviewed-by: Jordan Crouse Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e021b360e315..05f17ebd70f6 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -404,14 +404,15 @@ enum arm_smmu_context_fmt { struct arm_smmu_cfg { u8 cbndx; u8 irptndx; + union { + u16 asid; + u16 vmid; + }; u32 cbar; enum arm_smmu_context_fmt fmt; }; #define INVALID_IRPTNDX 0xff -#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx) -#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1) - enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, @@ -603,12 +604,10 @@ static void arm_smmu_tlb_inv_context(void *cookie) if (stage1) { base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); - writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg), - base + ARM_SMMU_CB_S1_TLBIASID); + writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); } else { base = ARM_SMMU_GR0(smmu); - writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), - base + ARM_SMMU_GR0_TLBIVMID); + writel_relaxed(cfg->vmid, base + ARM_SMMU_GR0_TLBIVMID); } __arm_smmu_tlb_sync(smmu); @@ -629,14 +628,14 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { iova &= ~12UL; - iova |= ARM_SMMU_CB_ASID(smmu, cfg); + iova |= cfg->asid; do { writel_relaxed(iova, reg); iova += granule; } while (size -= granule); } else { iova >>= 12; - iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48; + iova |= (u64)cfg->asid << 48; do { writeq_relaxed(iova, reg); iova += granule >> 12; @@ -653,7 +652,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } while (size -= granule); } else { reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; - writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg); + writel_relaxed(cfg->vmid, reg); } } @@ -735,7 +734,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, reg = CBA2R_RW64_32BIT; /* 16-bit VMIDs live in CBA2R */ if (smmu->features & ARM_SMMU_FEAT_VMID16) - reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT; + reg |= cfg->vmid << CBA2R_VMID_SHIFT; writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); } @@ -754,7 +753,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT); } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) { /* 8-bit VMIDs live in CBAR */ - reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT; + reg |= cfg->vmid << CBAR_VMID_SHIFT; } writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); @@ -783,20 +782,18 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, /* TTBRs */ if (stage1) { - u16 asid = ARM_SMMU_CB_ASID(smmu, cfg); - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0); reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1); - writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); + writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); } else { reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - reg64 |= (u64)asid << TTBRn_ASID_SHIFT; + reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - reg64 |= (u64)asid << TTBRn_ASID_SHIFT; + reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1); } } else { @@ -945,6 +942,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, cfg->irptndx = cfg->cbndx; } + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2) + cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base; + else + cfg->asid = cfg->cbndx + smmu->cavium_id_base; + pgtbl_cfg = (struct io_pgtable_cfg) { .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, From 452107c79035c6654b963e83c4862d9faa195af4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 30 Mar 2017 17:56:30 +0100 Subject: [PATCH 05/14] iommu/arm-smmu: Tidy up context bank indexing ARM_AMMU_CB() is calculated relative to ARM_SMMU_CB_BASE(), but the latter is never of use on its own, and what we end up with is the same ARM_SMMU_CB_BASE() + ARM_AMMU_CB() expression being duplicated at every callsite. Folding the two together gives us a self-contained context bank accessor which is much more pleasant to work with. Secondly, we might as well simplify CB_BASE itself at the same time. We use the address space size for its own sake precisely once, at probe time, and every other usage is to dynamically calculate CB_BASE over and over and over again. Let's flip things around so that we just maintain the CB_BASE address directly. Reviewed-by: Jordan Crouse Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 05f17ebd70f6..27922ffd330c 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -216,8 +216,7 @@ enum arm_smmu_s2cr_privcfg { #define CBA2R_VMID_MASK 0xffff /* Translation context bank */ -#define ARM_SMMU_CB_BASE(smmu) ((smmu)->base + ((smmu)->size >> 1)) -#define ARM_SMMU_CB(smmu, n) ((n) * (1 << (smmu)->pgshift)) +#define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift)) #define ARM_SMMU_CB_SCTLR 0x0 #define ARM_SMMU_CB_ACTLR 0x4 @@ -344,7 +343,7 @@ struct arm_smmu_device { struct device *dev; void __iomem *base; - unsigned long size; + void __iomem *cb_base; unsigned long pgshift; #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0) @@ -603,7 +602,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) void __iomem *base; if (stage1) { - base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + base = ARM_SMMU_CB(smmu, cfg->cbndx); writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); } else { base = ARM_SMMU_GR0(smmu); @@ -623,7 +622,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, void __iomem *reg; if (stage1) { - reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + reg = ARM_SMMU_CB(smmu, cfg->cbndx); reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { @@ -642,7 +641,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } while (size -= granule); } } else if (smmu->version == ARM_SMMU_V2) { - reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + reg = ARM_SMMU_CB(smmu, cfg->cbndx); reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; iova >>= 12; @@ -672,7 +671,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *cb_base; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); if (!(fsr & FSR_FAULT)) @@ -725,7 +724,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, gr1_base = ARM_SMMU_GR1(smmu); stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); if (smmu->version > ARM_SMMU_V1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) @@ -1011,7 +1010,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) * Disable the context bank and free the page tables before freeing * it. */ - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); if (cfg->irptndx != INVALID_IRPTNDX) { @@ -1362,7 +1361,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, u64 phys; unsigned long va; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); /* ATS1 registers can only be written atomically */ va = iova & ~0xfffUL; @@ -1689,7 +1688,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i); + cb_base = ARM_SMMU_CB(smmu, i); writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR); /* @@ -1869,11 +1868,11 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* Check for size mismatch of SMMU address space from mapped region */ size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1); - size *= 2 << smmu->pgshift; - if (smmu->size != size) + size <<= smmu->pgshift; + if (smmu->cb_base != gr0_base + size) dev_warn(smmu->dev, - "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n", - size, smmu->size); + "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n", + size * 2, (smmu->cb_base - gr0_base) * 2); smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK; smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK; @@ -2110,7 +2109,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->base = devm_ioremap_resource(dev, res); if (IS_ERR(smmu->base)) return PTR_ERR(smmu->base); - smmu->size = resource_size(res); + smmu->cb_base = smmu->base + resource_size(res) / 2; num_irqs = 0; while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) { From 11febfca2419652f28804879a58ffd32adce2372 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 30 Mar 2017 17:56:31 +0100 Subject: [PATCH 06/14] iommu/arm-smmu: Use per-context TLB sync as appropriate TLB synchronisation typically involves the SMMU blocking all incoming transactions until the TLBs report completion of all outstanding operations. In the common SMMUv2 configuration of a single distributed SMMU serving multiple peripherals, that means that a single unmap request has the potential to bring the hammer down on the entire system if synchronised globally. Since stage 1 contexts, and stage 2 contexts under SMMUv2, offer local sync operations, let's make use of those wherever we can in the hope of minimising global disruption. To that end, rather than add any more branches to the already unwieldy monolithic TLB maintenance ops, break them up into smaller, neater, functions which we can then mix and match as appropriate. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 115 +++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 34 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 27922ffd330c..83f50142e63e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -237,6 +237,8 @@ enum arm_smmu_s2cr_privcfg { #define ARM_SMMU_CB_S1_TLBIVAL 0x620 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 +#define ARM_SMMU_CB_TLBSYNC 0x7f0 +#define ARM_SMMU_CB_TLBSTATUS 0x7f4 #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 @@ -569,14 +571,13 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) } /* Wait for any pending TLB invalidations to complete */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, + void __iomem *sync, void __iomem *status) { int count = 0; - void __iomem *gr0_base = ARM_SMMU_GR0(smmu); - writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC); - while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS) - & sTLBGSTATUS_GSACTIVE) { + writel_relaxed(0, sync); + while (readl_relaxed(status) & sTLBGSTATUS_GSACTIVE) { cpu_relax(); if (++count == TLB_LOOP_TIMEOUT) { dev_err_ratelimited(smmu->dev, @@ -587,29 +588,49 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) } } -static void arm_smmu_tlb_sync(void *cookie) +static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu) { - struct arm_smmu_domain *smmu_domain = cookie; - __arm_smmu_tlb_sync(smmu_domain->smmu); + void __iomem *base = ARM_SMMU_GR0(smmu); + + __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC, + base + ARM_SMMU_GR0_sTLBGSTATUS); } -static void arm_smmu_tlb_inv_context(void *cookie) +static void arm_smmu_tlb_sync_context(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx); + + __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC, + base + ARM_SMMU_CB_TLBSTATUS); +} + +static void arm_smmu_tlb_sync_vmid(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + + arm_smmu_tlb_sync_global(smmu_domain->smmu); +} + +static void arm_smmu_tlb_inv_context_s1(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); + + writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); + arm_smmu_tlb_sync_context(cookie); +} + +static void arm_smmu_tlb_inv_context_s2(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; - bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - void __iomem *base; + void __iomem *base = ARM_SMMU_GR0(smmu); - if (stage1) { - base = ARM_SMMU_CB(smmu, cfg->cbndx); - writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); - } else { - base = ARM_SMMU_GR0(smmu); - writel_relaxed(cfg->vmid, base + ARM_SMMU_GR0_TLBIVMID); - } - - __arm_smmu_tlb_sync(smmu); + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); + arm_smmu_tlb_sync_global(smmu); } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, @@ -617,12 +638,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct arm_smmu_device *smmu = smmu_domain->smmu; bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - void __iomem *reg; + void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); if (stage1) { - reg = ARM_SMMU_CB(smmu, cfg->cbndx); reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { @@ -640,8 +659,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, iova += granule >> 12; } while (size -= granule); } - } else if (smmu->version == ARM_SMMU_V2) { - reg = ARM_SMMU_CB(smmu, cfg->cbndx); + } else { reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; iova >>= 12; @@ -649,16 +667,40 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, smmu_write_atomic_lq(iova, reg); iova += granule >> 12; } while (size -= granule); - } else { - reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; - writel_relaxed(cfg->vmid, reg); } } -static const struct iommu_gather_ops arm_smmu_gather_ops = { - .tlb_flush_all = arm_smmu_tlb_inv_context, +/* + * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears + * almost negligible, but the benefit of getting the first one in as far ahead + * of the sync as possible is significant, hence we don't just make this a + * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think. + */ +static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu); + + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); +} + +static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s1, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, - .tlb_sync = arm_smmu_tlb_sync, + .tlb_sync = arm_smmu_tlb_sync_context, +}; + +static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, + .tlb_sync = arm_smmu_tlb_sync_context, +}; + +static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync, + .tlb_sync = arm_smmu_tlb_sync_vmid, }; static irqreturn_t arm_smmu_context_fault(int irq, void *dev) @@ -833,6 +875,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + const struct iommu_gather_ops *tlb_ops; mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) @@ -904,6 +947,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 32UL); oas = min(oas, 32UL); } + tlb_ops = &arm_smmu_s1_tlb_ops; break; case ARM_SMMU_DOMAIN_NESTED: /* @@ -922,12 +966,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 40UL); oas = min(oas, 40UL); } + if (smmu->version == ARM_SMMU_V2) + tlb_ops = &arm_smmu_s2_tlb_ops_v2; + else + tlb_ops = &arm_smmu_s2_tlb_ops_v1; break; default: ret = -EINVAL; goto out_unlock; } - ret = __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks); if (ret < 0) @@ -950,7 +997,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, - .tlb = &arm_smmu_gather_ops, + .tlb = tlb_ops, .iommu_dev = smmu->dev, }; @@ -1734,7 +1781,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) reg |= sCR0_EXIDENABLE; /* Push the button */ - __arm_smmu_tlb_sync(smmu); + arm_smmu_tlb_sync_global(smmu); writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); } From 8513c89300696a68963cc504c21d034c6369e183 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 30 Mar 2017 17:56:32 +0100 Subject: [PATCH 07/14] iommu/arm-smmu: Poll for TLB sync completion more effectively On relatively slow development platforms and software models, the inefficiency of our TLB sync loop tends not to show up - for instance on a Juno r1 board I typically see the TLBI has completed of its own accord by the time we get to the sync, such that the latter finishes instantly. However, on larger systems doing real I/O, it's less realistic for the TLBs to go idle immediately, and at that point falling into the 1MHz polling loop turns out to throw away performance drastically. Let's strike a balance by polling more than once between pauses, such that we have much more chance of catching normal operations completing before committing to the fixed delay, but also backing off exponentially, since if a sync really hasn't completed within one or two "reasonable time" periods, it becomes increasingly unlikely that it ever will. Reviewed-by: Jordan Crouse Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 83f50142e63e..618e133f643a 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -162,6 +162,7 @@ #define ARM_SMMU_GR0_sTLBGSTATUS 0x74 #define sTLBGSTATUS_GSACTIVE (1 << 0) #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ +#define TLB_SPIN_COUNT 10 /* Stream mapping registers */ #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) @@ -574,18 +575,19 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, void __iomem *sync, void __iomem *status) { - int count = 0; + unsigned int spin_cnt, delay; writel_relaxed(0, sync); - while (readl_relaxed(status) & sTLBGSTATUS_GSACTIVE) { - cpu_relax(); - if (++count == TLB_LOOP_TIMEOUT) { - dev_err_ratelimited(smmu->dev, - "TLB sync timed out -- SMMU may be deadlocked\n"); - return; + for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { + for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { + if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE)) + return; + cpu_relax(); } - udelay(1); + udelay(delay); } + dev_err_ratelimited(smmu->dev, + "TLB sync timed out -- SMMU may be deadlocked\n"); } static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu) From 56fbf600dd8e2f32a5317437fe310b56719f7d2b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 31 Mar 2017 12:03:33 +0100 Subject: [PATCH 08/14] iommu/arm-smmu: Add global SMR masking property The current SMR masking support using a 2-cell iommu-specifier is primarily intended to handle individual masters with large and/or complex Stream ID assignments; it quickly gets a bit clunky in other SMR use-cases where we just want to consistently mask out the same part of every Stream ID (e.g. for MMU-500 configurations where the appended TBU number gets in the way unnecessarily). Let's add a new property to allow a single global mask value to better fit the latter situation. Acked-by: Mark Rutland Tested-by: Nipun Gupta Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.txt | 28 +++++++++++++++++++ drivers/iommu/arm-smmu.c | 4 ++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 6cdf32d037fc..8a6ffce12af5 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -60,6 +60,17 @@ conditions. aliases of secure registers have to be used during SMMU configuration. +- stream-match-mask : For SMMUs supporting stream matching and using + #iommu-cells = <1>, specifies a mask of bits to ignore + when matching stream IDs (e.g. this may be programmed + into the SMRn.MASK field of every stream match register + used). For cases where it is desirable to ignore some + portion of every Stream ID (e.g. for certain MMU-500 + configurations given globally unique input IDs). This + property is not valid for SMMUs using stream indexing, + or using stream matching with #iommu-cells = <2>, and + may be ignored if present in such cases. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -109,3 +120,20 @@ conditions. master3 { iommus = <&smmu2 1 0x30>; }; + + + /* ARM MMU-500 with 10-bit stream ID input configuration */ + smmu3: iommu { + compatible = "arm,mmu-500", "arm,smmu-v2"; + ... + #iommu-cells = <1>; + /* always ignore appended 5-bit TBU number */ + stream-match-mask = 0x7c00; + }; + + bus { + /* bus whose child devices emit one unique 10-bit stream + ID each, but may master through multiple SMMU TBUs */ + iommu-map = <0 &smmu3 0 0x400>; + ... + }; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 618e133f643a..6560c4a34b96 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1644,13 +1644,15 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) { - u32 fwid = 0; + u32 mask, fwid = 0; if (args->args_count > 0) fwid |= (u16)args->args[0]; if (args->args_count > 1) fwid |= (u16)args->args[1] << SMR_MASK_SHIFT; + else if (!of_property_read_u32(args->np, "stream-match-mask", &mask)) + fwid |= (u16)mask << SMR_MASK_SHIFT; return iommu_fwspec_add_ids(dev, &fwid, 1); } From 0834cc28fa56c65887c614b6c045be2ba06fdcb0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jan 2017 16:28:17 +0000 Subject: [PATCH 09/14] iommu/arm-smmu: Restrict domain attributes to UNMANAGED domains The ARM SMMU drivers provide a DOMAIN_ATTR_NESTING domain attribute, which allows callers of the IOMMU API to request that the page table for a domain is installed at stage-2, if supported by the hardware. Since setting this attribute only makes sense for UNMANAGED domains, this patch returns -ENODEV if the domain_{get,set}_attr operations are called on other domain types. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 6 ++++++ drivers/iommu/arm-smmu.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 591bb96047c9..b47a88757c18 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1837,6 +1837,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + switch (attr) { case DOMAIN_ATTR_NESTING: *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); @@ -1852,6 +1855,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + mutex_lock(&smmu_domain->init_mutex); switch (attr) { diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 6560c4a34b96..099215bbff89 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1603,6 +1603,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + switch (attr) { case DOMAIN_ATTR_NESTING: *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); @@ -1618,6 +1621,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + mutex_lock(&smmu_domain->init_mutex); switch (attr) { From 61bc671179f19060be883068b6d3d82ae0b24bc0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jan 2017 16:56:03 +0000 Subject: [PATCH 10/14] iommu/arm-smmu: Install bypass S2CRs for IOMMU_DOMAIN_IDENTITY domains In preparation for allowing the default domain type to be overridden, this patch adds support for IOMMU_DOMAIN_IDENTITY domains to the ARM SMMU driver. An identity domain is created by placing the corresponding S2CR registers into "bypass" mode, which allows transactions to flow through the SMMU without any translation. Reviewed-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 099215bbff89..dbd4998661c6 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -419,6 +419,7 @@ enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, ARM_SMMU_DOMAIN_NESTED, + ARM_SMMU_DOMAIN_BYPASS, }; struct arm_smmu_domain { @@ -883,6 +884,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (smmu_domain->smmu) goto out_unlock; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; + smmu_domain->smmu = smmu; + goto out_unlock; + } + /* * Mapping the requested stage onto what we support is surprisingly * complicated, mainly because the spec allows S1+S2 SMMUs without @@ -1052,7 +1059,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) void __iomem *cb_base; int irq; - if (!smmu) + if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; /* @@ -1075,7 +1082,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) + if (type != IOMMU_DOMAIN_UNMANAGED && + type != IOMMU_DOMAIN_DMA && + type != IOMMU_DOMAIN_IDENTITY) return NULL; /* * Allocate the domain and initialise some of its data structures. @@ -1304,10 +1313,15 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, { struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2cr *s2cr = smmu->s2crs; - enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS; u8 cbndx = smmu_domain->cfg.cbndx; + enum arm_smmu_s2cr_type type; int i, idx; + if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) + type = S2CR_TYPE_BYPASS; + else + type = S2CR_TYPE_TRANS; + for_each_cfg_sme(fwspec, i, idx) { if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx) continue; From 67560edcd8e5c57eccec4df562abbfc21c17ad75 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Mar 2017 21:11:29 +0000 Subject: [PATCH 11/14] iommu/arm-smmu-v3: Make arm_smmu_install_ste_for_dev return void arm_smmu_install_ste_for_dev cannot fail and always returns 0, however the fact that it returns int means that callers end up implementing redundant error handling code which complicates STE tracking and is never executed. This patch changes the return type of arm_smmu_install_ste_for_dev to void, to make it explicit that it cannot fail. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index b47a88757c18..97be8de3e834 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1579,7 +1579,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) return step; } -static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) +static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) { int i; struct arm_smmu_master_data *master = fwspec->iommu_priv; @@ -1591,8 +1591,6 @@ static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); } - - return 0; } static void arm_smmu_detach_dev(struct device *dev) @@ -1600,8 +1598,7 @@ static void arm_smmu_detach_dev(struct device *dev) struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv; master->ste.bypass = true; - if (arm_smmu_install_ste_for_dev(dev->iommu_fwspec) < 0) - dev_warn(dev, "failed to install bypass STE\n"); + arm_smmu_install_ste_for_dev(dev->iommu_fwspec); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -1653,10 +1650,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) ste->s2_cfg = &smmu_domain->s2_cfg; } - ret = arm_smmu_install_ste_for_dev(dev->iommu_fwspec); - if (ret < 0) - ste->valid = false; - + arm_smmu_install_ste_for_dev(dev->iommu_fwspec); out_unlock: mutex_unlock(&smmu_domain->init_mutex); return ret; From beb3c6a066bff1ba412f983cb9d1a42f4cd8f76a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jan 2017 16:27:30 +0000 Subject: [PATCH 12/14] iommu/arm-smmu-v3: Install bypass STEs for IOMMU_DOMAIN_IDENTITY domains In preparation for allowing the default domain type to be overridden, this patch adds support for IOMMU_DOMAIN_IDENTITY domains to the ARM SMMUv3 driver. An identity domain is created by placing the corresponding stream table entries into "bypass" mode, which allows transactions to flow through the SMMU without any translation. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 58 +++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 97be8de3e834..803352d78d43 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -554,9 +554,14 @@ struct arm_smmu_s2_cfg { }; struct arm_smmu_strtab_ent { - bool valid; - - bool bypass; /* Overrides s1/s2 config */ + /* + * An STE is "assigned" if the master emitting the corresponding SID + * is attached to a domain. The behaviour of an unassigned STE is + * determined by the disable_bypass parameter, whereas an assigned + * STE behaves according to s1_cfg/s2_cfg, which themselves are + * configured according to the domain type. + */ + bool assigned; struct arm_smmu_s1_cfg *s1_cfg; struct arm_smmu_s2_cfg *s2_cfg; }; @@ -632,6 +637,7 @@ enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, ARM_SMMU_DOMAIN_NESTED, + ARM_SMMU_DOMAIN_BYPASS, }; struct arm_smmu_domain { @@ -1005,9 +1011,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, * This is hideously complicated, but we only really care about * three cases at the moment: * - * 1. Invalid (all zero) -> bypass (init) - * 2. Bypass -> translation (attach) - * 3. Translation -> bypass (detach) + * 1. Invalid (all zero) -> bypass/fault (init) + * 2. Bypass/fault -> translation/bypass (attach) + * 3. Translation/bypass -> bypass/fault (detach) * * Given that we can't update the STE atomically and the SMMU * doesn't read the thing in a defined order, that leaves us @@ -1046,11 +1052,15 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, } /* Nuke the existing STE_0 value, as we're going to rewrite it */ - val = ste->valid ? STRTAB_STE_0_V : 0; + val = STRTAB_STE_0_V; + + /* Bypass/fault */ + if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) { + if (!ste->assigned && disable_bypass) + val |= STRTAB_STE_0_CFG_ABORT; + else + val |= STRTAB_STE_0_CFG_BYPASS; - if (ste->bypass) { - val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT - : STRTAB_STE_0_CFG_BYPASS; dst[0] = cpu_to_le64(val); dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING << STRTAB_STE_1_SHCFG_SHIFT); @@ -1111,10 +1121,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent) { unsigned int i; - struct arm_smmu_strtab_ent ste = { - .valid = true, - .bypass = true, - }; + struct arm_smmu_strtab_ent ste = { .assigned = false }; for (i = 0; i < nent; ++i) { arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste); @@ -1378,7 +1385,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) + if (type != IOMMU_DOMAIN_UNMANAGED && + type != IOMMU_DOMAIN_DMA && + type != IOMMU_DOMAIN_IDENTITY) return NULL; /* @@ -1509,6 +1518,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; + return 0; + } + /* Restrict the stage to what we can actually support */ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) smmu_domain->stage = ARM_SMMU_DOMAIN_S2; @@ -1597,7 +1611,7 @@ static void arm_smmu_detach_dev(struct device *dev) { struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv; - master->ste.bypass = true; + master->ste.assigned = false; arm_smmu_install_ste_for_dev(dev->iommu_fwspec); } @@ -1617,7 +1631,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) ste = &master->ste; /* Already attached to a different domain? */ - if (!ste->bypass) + if (ste->assigned) arm_smmu_detach_dev(dev); mutex_lock(&smmu_domain->init_mutex); @@ -1638,10 +1652,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out_unlock; } - ste->bypass = false; - ste->valid = true; + ste->assigned = true; - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) { + ste->s1_cfg = NULL; + ste->s2_cfg = NULL; + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { ste->s1_cfg = &smmu_domain->s1_cfg; ste->s2_cfg = NULL; arm_smmu_write_ctx_desc(smmu, ste->s1_cfg); @@ -1801,7 +1817,7 @@ static void arm_smmu_remove_device(struct device *dev) master = fwspec->iommu_priv; smmu = master->smmu; - if (master && master->ste.valid) + if (master && master->ste.assigned) arm_smmu_detach_dev(dev); iommu_group_remove_device(dev); iommu_device_unlink(&smmu->iommu, dev); From fccb4e3b8ab0957628abec82675691c72f67003e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 5 Jan 2017 18:38:26 +0000 Subject: [PATCH 13/14] iommu: Allow default domain type to be set on the kernel command line The IOMMU core currently initialises the default domain for each group to IOMMU_DOMAIN_DMA, under the assumption that devices will use IOMMU-backed DMA ops by default. However, in some cases it is desirable for the DMA ops to bypass the IOMMU for performance reasons, reserving use of translation for subsystems such as VFIO that require it for enforcing device isolation. Rather than modify each IOMMU driver to provide different semantics for DMA domains, instead we introduce a command line parameter that can be used to change the type of the default domain. Passthrough can then be specified using "iommu.passthrough=1" on the kernel command line. Signed-off-by: Will Deacon --- .../admin-guide/kernel-parameters.txt | 6 ++++ drivers/iommu/iommu.c | 28 +++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2ba45caabada..187ac5bc8b40 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1644,6 +1644,12 @@ nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. + iommu.passthrough= + [ARM64] Configure DMA to bypass the IOMMU by default. + Format: { "0" | "1" } + 0 - Use IOMMU translation for DMA. + 1 - Bypass the IOMMU for DMA. + unset - Use IOMMU translation for DMA. io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3b67144dead2..770ba7e7ef4d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -36,6 +36,7 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); +static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; struct iommu_callback_data { const struct iommu_ops *ops; @@ -112,6 +113,18 @@ static int __iommu_attach_group(struct iommu_domain *domain, static void __iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group); +static int __init iommu_set_def_domain_type(char *str) +{ + bool pt; + + if (!str || strtobool(str, &pt)) + return -EINVAL; + + iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA; + return 0; +} +early_param("iommu.passthrough", iommu_set_def_domain_type); + static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) { @@ -1015,10 +1028,19 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) * IOMMU driver. */ if (!group->default_domain) { - group->default_domain = __iommu_domain_alloc(dev->bus, - IOMMU_DOMAIN_DMA); + struct iommu_domain *dom; + + dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type); + if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) { + dev_warn(dev, + "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA", + iommu_def_domain_type); + dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA); + } + + group->default_domain = dom; if (!group->domain) - group->domain = group->default_domain; + group->domain = dom; } ret = iommu_group_add_device(group, dev); From 022f4e4f31fea69702f3ec810dc567af6a6d86d8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 3 Apr 2017 13:12:10 +0100 Subject: [PATCH 14/14] iommu/io-pgtable-arm: Avoid shift overflow in block size The recursive nature of __arm_lpae_{map,unmap}() means that ARM_LPAE_BLOCK_SIZE() is evaluated for every level, including those where block mappings aren't possible. This in itself is harmless enough, as we will only ever be called with valid sizes from the pgsize_bitmap, and thus always recurse down past any imaginary block sizes. The only problem is that most of those imaginary sizes overflow the type used for the calculation, and thus trigger warnings under UBsan: [ 63.020939] ================================================================================ [ 63.021284] UBSAN: Undefined behaviour in drivers/iommu/io-pgtable-arm.c:312:22 [ 63.021602] shift exponent 39 is too large for 32-bit type 'int' [ 63.021909] CPU: 0 PID: 1119 Comm: lkvm Not tainted 4.7.0-rc3+ #819 [ 63.022163] Hardware name: FVP Base (DT) [ 63.022345] Call trace: [ 63.022629] [] dump_backtrace+0x0/0x3a8 [ 63.022975] [] show_stack+0x14/0x20 [ 63.023294] [] dump_stack+0x104/0x148 [ 63.023609] [] ubsan_epilogue+0x18/0x68 [ 63.023956] [] __ubsan_handle_shift_out_of_bounds+0x18c/0x1bc [ 63.024365] [] __arm_lpae_map+0x720/0xae0 [ 63.024732] [] arm_lpae_map+0x100/0x190 [ 63.025049] [] arm_smmu_map+0x78/0xc8 [ 63.025390] [] iommu_map+0x130/0x230 [ 63.025763] [] vfio_iommu_type1_attach_group+0x4bc/0xa00 [ 63.026156] [] vfio_fops_unl_ioctl+0x320/0x580 [ 63.026515] [] do_vfs_ioctl+0x140/0xd28 [ 63.026858] [] SyS_ioctl+0x8c/0xa0 [ 63.027179] [] el0_svc_naked+0x24/0x28 [ 63.027412] ================================================================================ Perform the shift in a 64-bit type to prevent the theoretical overflow and keep the peace. As it turns out, this generates identical code for 32-bit ARM, and marginally shorter AArch64 code, so it's good all round. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index f9bc6ebb8140..6e5df5e0a3bd 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -74,7 +74,7 @@ /* Calculate the block/page mapping size at level l for pagetable in d. */ #define ARM_LPAE_BLOCK_SIZE(l,d) \ - (1 << (ilog2(sizeof(arm_lpae_iopte)) + \ + (1ULL << (ilog2(sizeof(arm_lpae_iopte)) + \ ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level))) /* Page table bits */