gpu: host1x: Add IOMMU support

Add support for the Host1x unit to be located behind
an IOMMU. This is required when gather buffers may be
allocated non-contiguously in physical memory, as can
be the case when TegraDRM is also using the IOMMU.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
This commit is contained in:
Mikko Perttunen 2016-12-14 13:16:14 +02:00 committed by Thierry Reding
parent 8cadb01d2c
commit 404bfb78da
7 changed files with 177 additions and 39 deletions

View File

@ -51,9 +51,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb)
struct host1x_cdma *cdma = pb_to_cdma(pb); struct host1x_cdma *cdma = pb_to_cdma(pb);
struct host1x *host1x = cdma_to_host1x(cdma); struct host1x *host1x = cdma_to_host1x(cdma);
if (pb->phys != 0) if (!pb->phys)
dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped, return;
pb->phys);
if (host1x->domain) {
iommu_unmap(host1x->domain, pb->dma, pb->alloc_size);
free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
}
dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
pb->mapped = NULL; pb->mapped = NULL;
pb->phys = 0; pb->phys = 0;
@ -66,28 +72,64 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
{ {
struct host1x_cdma *cdma = pb_to_cdma(pb); struct host1x_cdma *cdma = pb_to_cdma(pb);
struct host1x *host1x = cdma_to_host1x(cdma); struct host1x *host1x = cdma_to_host1x(cdma);
struct iova *alloc;
u32 size;
int err;
pb->mapped = NULL; pb->mapped = NULL;
pb->phys = 0; pb->phys = 0;
pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8; pb->size = HOST1X_PUSHBUFFER_SLOTS * 8;
size = pb->size + 4;
/* initialize buffer pointers */ /* initialize buffer pointers */
pb->fence = pb->size_bytes - 8; pb->fence = pb->size - 8;
pb->pos = 0; pb->pos = 0;
/* allocate and map pushbuffer memory */ if (host1x->domain) {
pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys, unsigned long shift;
GFP_KERNEL);
if (!pb->mapped) size = iova_align(&host1x->iova, size);
goto fail;
pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
GFP_KERNEL);
if (!pb->mapped)
return -ENOMEM;
shift = iova_shift(&host1x->iova);
alloc = alloc_iova(&host1x->iova, size >> shift,
host1x->iova_end >> shift, true);
if (!alloc) {
err = -ENOMEM;
goto iommu_free_mem;
}
pb->dma = iova_dma_addr(&host1x->iova, alloc);
err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
IOMMU_READ);
if (err)
goto iommu_free_iova;
} else {
pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
GFP_KERNEL);
if (!pb->mapped)
return -ENOMEM;
pb->dma = pb->phys;
}
pb->alloc_size = size;
host1x_hw_pushbuffer_init(host1x, pb); host1x_hw_pushbuffer_init(host1x, pb);
return 0; return 0;
fail: iommu_free_iova:
host1x_pushbuffer_destroy(pb); __free_iova(&host1x->iova, alloc);
return -ENOMEM; iommu_free_mem:
dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
return err;
} }
/* /*
@ -101,7 +143,7 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
WARN_ON(pb->pos == pb->fence); WARN_ON(pb->pos == pb->fence);
*(p++) = op1; *(p++) = op1;
*(p++) = op2; *(p++) = op2;
pb->pos = (pb->pos + 8) & (pb->size_bytes - 1); pb->pos = (pb->pos + 8) & (pb->size - 1);
} }
/* /*
@ -111,7 +153,7 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
{ {
/* Advance the next write position */ /* Advance the next write position */
pb->fence = (pb->fence + slots * 8) & (pb->size_bytes - 1); pb->fence = (pb->fence + slots * 8) & (pb->size - 1);
} }
/* /*
@ -119,7 +161,7 @@ static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
*/ */
static u32 host1x_pushbuffer_space(struct push_buffer *pb) static u32 host1x_pushbuffer_space(struct push_buffer *pb)
{ {
return ((pb->fence - pb->pos) & (pb->size_bytes - 1)) / 8; return ((pb->fence - pb->pos) & (pb->size - 1)) / 8;
} }
/* /*

View File

@ -43,10 +43,12 @@ struct host1x_job;
struct push_buffer { struct push_buffer {
void *mapped; /* mapped pushbuffer memory */ void *mapped; /* mapped pushbuffer memory */
dma_addr_t phys; /* physical address of pushbuffer */ dma_addr_t dma; /* device address of pushbuffer */
phys_addr_t phys; /* physical address of pushbuffer */
u32 fence; /* index we've written */ u32 fence; /* index we've written */
u32 pos; /* index to write to */ u32 pos; /* index to write to */
u32 size_bytes; u32 size;
u32 alloc_size;
}; };
struct buffer_timeout { struct buffer_timeout {

View File

@ -27,6 +27,7 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/host1x.h> #include <trace/events/host1x.h>
#undef CREATE_TRACE_POINTS
#include "bus.h" #include "bus.h"
#include "dev.h" #include "dev.h"
@ -168,16 +169,37 @@ static int host1x_probe(struct platform_device *pdev)
return err; return err;
} }
if (iommu_present(&platform_bus_type)) {
struct iommu_domain_geometry *geometry;
unsigned long order;
host->domain = iommu_domain_alloc(&platform_bus_type);
if (!host->domain)
return -ENOMEM;
err = iommu_attach_device(host->domain, &pdev->dev);
if (err)
goto fail_free_domain;
geometry = &host->domain->geometry;
order = __ffs(host->domain->pgsize_bitmap);
init_iova_domain(&host->iova, 1UL << order,
geometry->aperture_start >> order,
geometry->aperture_end >> order);
host->iova_end = geometry->aperture_end;
}
err = host1x_channel_list_init(host); err = host1x_channel_list_init(host);
if (err) { if (err) {
dev_err(&pdev->dev, "failed to initialize channel list\n"); dev_err(&pdev->dev, "failed to initialize channel list\n");
return err; goto fail_detach_device;
} }
err = clk_prepare_enable(host->clk); err = clk_prepare_enable(host->clk);
if (err < 0) { if (err < 0) {
dev_err(&pdev->dev, "failed to enable clock\n"); dev_err(&pdev->dev, "failed to enable clock\n");
return err; goto fail_detach_device;
} }
err = host1x_syncpt_init(host); err = host1x_syncpt_init(host);
@ -206,6 +228,15 @@ static int host1x_probe(struct platform_device *pdev)
host1x_syncpt_deinit(host); host1x_syncpt_deinit(host);
fail_unprepare_disable: fail_unprepare_disable:
clk_disable_unprepare(host->clk); clk_disable_unprepare(host->clk);
fail_detach_device:
if (host->domain) {
put_iova_domain(&host->iova);
iommu_detach_device(host->domain, &pdev->dev);
}
fail_free_domain:
if (host->domain)
iommu_domain_free(host->domain);
return err; return err;
} }
@ -218,6 +249,12 @@ static int host1x_remove(struct platform_device *pdev)
host1x_syncpt_deinit(host); host1x_syncpt_deinit(host);
clk_disable_unprepare(host->clk); clk_disable_unprepare(host->clk);
if (host->domain) {
put_iova_domain(&host->iova);
iommu_detach_device(host->domain, &pdev->dev);
iommu_domain_free(host->domain);
}
return 0; return 0;
} }

View File

@ -19,6 +19,8 @@
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/iommu.h>
#include <linux/iova.h>
#include "channel.h" #include "channel.h"
#include "syncpt.h" #include "syncpt.h"
@ -108,6 +110,10 @@ struct host1x {
struct device *dev; struct device *dev;
struct clk *clk; struct clk *clk;
struct iommu_domain *domain;
struct iova_domain iova;
dma_addr_t iova_end;
struct mutex intr_mutex; struct mutex intr_mutex;
int intr_syncpt_irq; int intr_syncpt_irq;

View File

@ -30,7 +30,7 @@
*/ */
static void push_buffer_init(struct push_buffer *pb) static void push_buffer_init(struct push_buffer *pb)
{ {
*(u32 *)(pb->mapped + pb->size_bytes) = host1x_opcode_restart(0); *(u32 *)(pb->mapped + pb->size) = host1x_opcode_restart(0);
} }
/* /*
@ -55,8 +55,8 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
*(p++) = HOST1X_OPCODE_NOP; *(p++) = HOST1X_OPCODE_NOP;
*(p++) = HOST1X_OPCODE_NOP; *(p++) = HOST1X_OPCODE_NOP;
dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__, dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
&pb->phys, getptr); &pb->dma, getptr);
getptr = (getptr + 8) & (pb->size_bytes - 1); getptr = (getptr + 8) & (pb->size - 1);
} }
wmb(); wmb();
@ -78,10 +78,9 @@ static void cdma_start(struct host1x_cdma *cdma)
HOST1X_CHANNEL_DMACTRL); HOST1X_CHANNEL_DMACTRL);
/* set base, put and end pointer */ /* set base, put and end pointer */
host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
host1x_ch_writel(ch, cdma->push_buffer.phys + host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size + 4,
cdma->push_buffer.size_bytes + 4,
HOST1X_CHANNEL_DMAEND); HOST1X_CHANNEL_DMAEND);
/* reset GET */ /* reset GET */
@ -115,9 +114,8 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
HOST1X_CHANNEL_DMACTRL); HOST1X_CHANNEL_DMACTRL);
/* set base, end pointer (all of memory) */ /* set base, end pointer (all of memory) */
host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
host1x_ch_writel(ch, cdma->push_buffer.phys + host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size,
cdma->push_buffer.size_bytes,
HOST1X_CHANNEL_DMAEND); HOST1X_CHANNEL_DMAEND);
/* set GET, by loading the value in PUT (then reset GET) */ /* set GET, by loading the value in PUT (then reset GET) */

View File

@ -174,9 +174,10 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host,
return 0; return 0;
} }
static unsigned int pin_job(struct host1x_job *job) static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{ {
unsigned int i; unsigned int i;
int err;
job->num_unpins = 0; job->num_unpins = 0;
@ -186,12 +187,16 @@ static unsigned int pin_job(struct host1x_job *job)
dma_addr_t phys_addr; dma_addr_t phys_addr;
reloc->target.bo = host1x_bo_get(reloc->target.bo); reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) if (!reloc->target.bo) {
err = -EINVAL;
goto unpin; goto unpin;
}
phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
if (!phys_addr) if (!phys_addr) {
err = -EINVAL;
goto unpin; goto unpin;
}
job->addr_phys[job->num_unpins] = phys_addr; job->addr_phys[job->num_unpins] = phys_addr;
job->unpins[job->num_unpins].bo = reloc->target.bo; job->unpins[job->num_unpins].bo = reloc->target.bo;
@ -201,28 +206,67 @@ static unsigned int pin_job(struct host1x_job *job)
for (i = 0; i < job->num_gathers; i++) { for (i = 0; i < job->num_gathers; i++) {
struct host1x_job_gather *g = &job->gathers[i]; struct host1x_job_gather *g = &job->gathers[i];
size_t gather_size = 0;
struct scatterlist *sg;
struct sg_table *sgt; struct sg_table *sgt;
dma_addr_t phys_addr; dma_addr_t phys_addr;
unsigned long shift;
struct iova *alloc;
unsigned int j;
g->bo = host1x_bo_get(g->bo); g->bo = host1x_bo_get(g->bo);
if (!g->bo) if (!g->bo) {
err = -EINVAL;
goto unpin; goto unpin;
}
phys_addr = host1x_bo_pin(g->bo, &sgt); phys_addr = host1x_bo_pin(g->bo, &sgt);
if (!phys_addr) if (!phys_addr) {
err = -EINVAL;
goto unpin; goto unpin;
}
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
for_each_sg(sgt->sgl, sg, sgt->nents, j)
gather_size += sg->length;
gather_size = iova_align(&host->iova, gather_size);
shift = iova_shift(&host->iova);
alloc = alloc_iova(&host->iova, gather_size >> shift,
host->iova_end >> shift, true);
if (!alloc) {
err = -ENOMEM;
goto unpin;
}
err = iommu_map_sg(host->domain,
iova_dma_addr(&host->iova, alloc),
sgt->sgl, sgt->nents, IOMMU_READ);
if (err == 0) {
__free_iova(&host->iova, alloc);
err = -EINVAL;
goto unpin;
}
job->addr_phys[job->num_unpins] =
iova_dma_addr(&host->iova, alloc);
job->unpins[job->num_unpins].size = gather_size;
} else {
job->addr_phys[job->num_unpins] = phys_addr;
}
job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
job->addr_phys[job->num_unpins] = phys_addr;
job->unpins[job->num_unpins].bo = g->bo; job->unpins[job->num_unpins].bo = g->bo;
job->unpins[job->num_unpins].sgt = sgt; job->unpins[job->num_unpins].sgt = sgt;
job->num_unpins++; job->num_unpins++;
} }
return job->num_unpins; return 0;
unpin: unpin:
host1x_job_unpin(job); host1x_job_unpin(job);
return 0; return err;
} }
static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
@ -525,8 +569,8 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
host1x_syncpt_load(host->syncpt + i); host1x_syncpt_load(host->syncpt + i);
/* pin memory */ /* pin memory */
err = pin_job(job); err = pin_job(host, job);
if (!err) if (err)
goto out; goto out;
/* patch gathers */ /* patch gathers */
@ -572,11 +616,19 @@ EXPORT_SYMBOL(host1x_job_pin);
void host1x_job_unpin(struct host1x_job *job) void host1x_job_unpin(struct host1x_job *job)
{ {
struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
unsigned int i; unsigned int i;
for (i = 0; i < job->num_unpins; i++) { for (i = 0; i < job->num_unpins; i++) {
struct host1x_job_unpin_data *unpin = &job->unpins[i]; struct host1x_job_unpin_data *unpin = &job->unpins[i];
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
iommu_unmap(host->domain, job->addr_phys[i],
unpin->size);
free_iova(&host->iova,
iova_pfn(&host->iova, job->addr_phys[i]));
}
host1x_bo_unpin(unpin->bo, unpin->sgt); host1x_bo_unpin(unpin->bo, unpin->sgt);
host1x_bo_put(unpin->bo); host1x_bo_put(unpin->bo);
} }

View File

@ -44,6 +44,7 @@ struct host1x_waitchk {
struct host1x_job_unpin_data { struct host1x_job_unpin_data {
struct host1x_bo *bo; struct host1x_bo *bo;
struct sg_table *sgt; struct sg_table *sgt;
size_t size;
}; };
/* /*