From 7aeb7448d8d02868ef30a6d08e856b2220319273 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 24 Oct 2013 01:24:54 -0700 Subject: [PATCH 1/2] drm/ttm: Enable the dma page pool also for intel IOMMUs Used by the vmwgfx driver Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz Reviewed-by: Konrad Rzeszutek Wilk --- drivers/gpu/drm/ttm/Makefile | 6 +----- drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 3 +++ include/drm/ttm/ttm_page_alloc.h | 11 ++++++++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile index b2b33dde2afb..b433b9f040c9 100644 --- a/drivers/gpu/drm/ttm/Makefile +++ b/drivers/gpu/drm/ttm/Makefile @@ -5,10 +5,6 @@ ccflags-y := -Iinclude/drm ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \ ttm_bo_util.o ttm_bo_vm.o ttm_module.o \ ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o \ - ttm_bo_manager.o - -ifeq ($(CONFIG_SWIOTLB),y) -ttm-y += ttm_page_alloc_dma.o -endif + ttm_bo_manager.o ttm_page_alloc_dma.o obj-$(CONFIG_DRM_TTM) += ttm.o diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 7957beeeaf73..fb8259f69839 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -33,6 +33,7 @@ * when freed). */ +#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) #define pr_fmt(fmt) "[TTM] " fmt #include @@ -1142,3 +1143,5 @@ int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data) return 0; } EXPORT_SYMBOL_GPL(ttm_dma_page_alloc_debugfs); + +#endif diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index 706b962c6467..d1f61bfe0ebe 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -62,7 +62,7 @@ extern void ttm_pool_unpopulate(struct ttm_tt *ttm); extern int ttm_page_alloc_debugfs(struct seq_file *m, void *data); -#ifdef CONFIG_SWIOTLB +#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) /** * Initialize pool allocator. */ @@ -94,6 +94,15 @@ static inline int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data) { return 0; } +static inline int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, + struct device *dev) +{ + return -ENOMEM; +} +static inline void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, + struct device *dev) +{ +} #endif #endif From d92d985177c495aab53c7167f310a7efb1853918 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 24 Oct 2013 01:49:26 -0700 Subject: [PATCH 2/2] drm/vmwgfx: Use the linux DMA api to get valid device addresses of pages The code handles three different cases: 1) physical page addresses. The ttm page array is used. 2) DMA subsystem addresses. A scatter-gather list is used. 3) Coherent pages. The ttm dma pool is used, together with the dma_ttm array os dma_addr_t Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 375 ++++++++++++++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 87 +++++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 98 ++++++- drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c | 150 +++++----- 4 files changed, 618 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c index 96dc84dc34d0..7776e6f0aef6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c @@ -141,37 +141,374 @@ struct ttm_placement vmw_srf_placement = { }; struct vmw_ttm_tt { - struct ttm_tt ttm; + struct ttm_dma_tt dma_ttm; struct vmw_private *dev_priv; int gmr_id; + struct sg_table sgt; + struct vmw_sg_table vsgt; + uint64_t sg_alloc_size; + bool mapped; }; +/** + * Helper functions to advance a struct vmw_piter iterator. + * + * @viter: Pointer to the iterator. + * + * These functions return false if past the end of the list, + * true otherwise. Functions are selected depending on the current + * DMA mapping mode. + */ +static bool __vmw_piter_non_sg_next(struct vmw_piter *viter) +{ + return ++(viter->i) < viter->num_pages; +} + +static bool __vmw_piter_sg_next(struct vmw_piter *viter) +{ + return __sg_page_iter_next(&viter->iter); +} + + +/** + * Helper functions to return a pointer to the current page. + * + * @viter: Pointer to the iterator + * + * These functions return a pointer to the page currently + * pointed to by @viter. Functions are selected depending on the + * current mapping mode. + */ +static struct page *__vmw_piter_non_sg_page(struct vmw_piter *viter) +{ + return viter->pages[viter->i]; +} + +static struct page *__vmw_piter_sg_page(struct vmw_piter *viter) +{ + return sg_page_iter_page(&viter->iter); +} + + +/** + * Helper functions to return the DMA address of the current page. + * + * @viter: Pointer to the iterator + * + * These functions return the DMA address of the page currently + * pointed to by @viter. Functions are selected depending on the + * current mapping mode. + */ +static dma_addr_t __vmw_piter_phys_addr(struct vmw_piter *viter) +{ + return page_to_phys(viter->pages[viter->i]); +} + +static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter) +{ + return viter->addrs[viter->i]; +} + +static dma_addr_t __vmw_piter_sg_addr(struct vmw_piter *viter) +{ + return sg_page_iter_dma_address(&viter->iter); +} + + +/** + * vmw_piter_start - Initialize a struct vmw_piter. + * + * @viter: Pointer to the iterator to initialize + * @vsgt: Pointer to a struct vmw_sg_table to initialize from + * + * Note that we're following the convention of __sg_page_iter_start, so that + * the iterator doesn't point to a valid page after initialization; it has + * to be advanced one step first. + */ +void vmw_piter_start(struct vmw_piter *viter, const struct vmw_sg_table *vsgt, + unsigned long p_offset) +{ + viter->i = p_offset - 1; + viter->num_pages = vsgt->num_pages; + switch (vsgt->mode) { + case vmw_dma_phys: + viter->next = &__vmw_piter_non_sg_next; + viter->dma_address = &__vmw_piter_phys_addr; + viter->page = &__vmw_piter_non_sg_page; + viter->pages = vsgt->pages; + break; + case vmw_dma_alloc_coherent: + viter->next = &__vmw_piter_non_sg_next; + viter->dma_address = &__vmw_piter_dma_addr; + viter->page = &__vmw_piter_non_sg_page; + viter->addrs = vsgt->addrs; + break; + case vmw_dma_map_populate: + case vmw_dma_map_bind: + viter->next = &__vmw_piter_sg_next; + viter->dma_address = &__vmw_piter_sg_addr; + viter->page = &__vmw_piter_sg_page; + __sg_page_iter_start(&viter->iter, vsgt->sgt->sgl, + vsgt->sgt->orig_nents, p_offset); + break; + default: + BUG(); + } +} + +/** + * vmw_ttm_unmap_from_dma - unmap device addresses previsouly mapped for + * TTM pages + * + * @vmw_tt: Pointer to a struct vmw_ttm_backend + * + * Used to free dma mappings previously mapped by vmw_ttm_map_for_dma. + */ +static void vmw_ttm_unmap_from_dma(struct vmw_ttm_tt *vmw_tt) +{ + struct device *dev = vmw_tt->dev_priv->dev->dev; + + dma_unmap_sg(dev, vmw_tt->sgt.sgl, vmw_tt->sgt.nents, + DMA_BIDIRECTIONAL); + vmw_tt->sgt.nents = vmw_tt->sgt.orig_nents; +} + +/** + * vmw_ttm_map_for_dma - map TTM pages to get device addresses + * + * @vmw_tt: Pointer to a struct vmw_ttm_backend + * + * This function is used to get device addresses from the kernel DMA layer. + * However, it's violating the DMA API in that when this operation has been + * performed, it's illegal for the CPU to write to the pages without first + * unmapping the DMA mappings, or calling dma_sync_sg_for_cpu(). It is + * therefore only legal to call this function if we know that the function + * dma_sync_sg_for_cpu() is a NOP, and dma_sync_sg_for_device() is at most + * a CPU write buffer flush. + */ +static int vmw_ttm_map_for_dma(struct vmw_ttm_tt *vmw_tt) +{ + struct device *dev = vmw_tt->dev_priv->dev->dev; + int ret; + + ret = dma_map_sg(dev, vmw_tt->sgt.sgl, vmw_tt->sgt.orig_nents, + DMA_BIDIRECTIONAL); + if (unlikely(ret == 0)) + return -ENOMEM; + + vmw_tt->sgt.nents = ret; + + return 0; +} + +/** + * vmw_ttm_map_dma - Make sure TTM pages are visible to the device + * + * @vmw_tt: Pointer to a struct vmw_ttm_tt + * + * Select the correct function for and make sure the TTM pages are + * visible to the device. Allocate storage for the device mappings. + * If a mapping has already been performed, indicated by the storage + * pointer being non NULL, the function returns success. + */ +static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) +{ + struct vmw_private *dev_priv = vmw_tt->dev_priv; + struct ttm_mem_global *glob = vmw_mem_glob(dev_priv); + struct vmw_sg_table *vsgt = &vmw_tt->vsgt; + struct vmw_piter iter; + dma_addr_t old; + int ret = 0; + static size_t sgl_size; + static size_t sgt_size; + + if (vmw_tt->mapped) + return 0; + + vsgt->mode = dev_priv->map_mode; + vsgt->pages = vmw_tt->dma_ttm.ttm.pages; + vsgt->num_pages = vmw_tt->dma_ttm.ttm.num_pages; + vsgt->addrs = vmw_tt->dma_ttm.dma_address; + vsgt->sgt = &vmw_tt->sgt; + + switch (dev_priv->map_mode) { + case vmw_dma_map_bind: + case vmw_dma_map_populate: + if (unlikely(!sgl_size)) { + sgl_size = ttm_round_pot(sizeof(struct scatterlist)); + sgt_size = ttm_round_pot(sizeof(struct sg_table)); + } + vmw_tt->sg_alloc_size = sgt_size + sgl_size * vsgt->num_pages; + ret = ttm_mem_global_alloc(glob, vmw_tt->sg_alloc_size, false, + true); + if (unlikely(ret != 0)) + return ret; + + ret = sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages, + vsgt->num_pages, 0, + (unsigned long) + vsgt->num_pages << PAGE_SHIFT, + GFP_KERNEL); + if (unlikely(ret != 0)) + goto out_sg_alloc_fail; + + if (vsgt->num_pages > vmw_tt->sgt.nents) { + uint64_t over_alloc = + sgl_size * (vsgt->num_pages - + vmw_tt->sgt.nents); + + ttm_mem_global_free(glob, over_alloc); + vmw_tt->sg_alloc_size -= over_alloc; + } + + ret = vmw_ttm_map_for_dma(vmw_tt); + if (unlikely(ret != 0)) + goto out_map_fail; + + break; + default: + break; + } + + old = ~((dma_addr_t) 0); + vmw_tt->vsgt.num_regions = 0; + for (vmw_piter_start(&iter, vsgt, 0); vmw_piter_next(&iter);) { + dma_addr_t cur = vmw_piter_dma_addr(&iter); + + if (cur != old + PAGE_SIZE) + vmw_tt->vsgt.num_regions++; + old = cur; + } + + vmw_tt->mapped = true; + return 0; + +out_map_fail: + sg_free_table(vmw_tt->vsgt.sgt); + vmw_tt->vsgt.sgt = NULL; +out_sg_alloc_fail: + ttm_mem_global_free(glob, vmw_tt->sg_alloc_size); + return ret; +} + +/** + * vmw_ttm_unmap_dma - Tear down any TTM page device mappings + * + * @vmw_tt: Pointer to a struct vmw_ttm_tt + * + * Tear down any previously set up device DMA mappings and free + * any storage space allocated for them. If there are no mappings set up, + * this function is a NOP. + */ +static void vmw_ttm_unmap_dma(struct vmw_ttm_tt *vmw_tt) +{ + struct vmw_private *dev_priv = vmw_tt->dev_priv; + + if (!vmw_tt->vsgt.sgt) + return; + + switch (dev_priv->map_mode) { + case vmw_dma_map_bind: + case vmw_dma_map_populate: + vmw_ttm_unmap_from_dma(vmw_tt); + sg_free_table(vmw_tt->vsgt.sgt); + vmw_tt->vsgt.sgt = NULL; + ttm_mem_global_free(vmw_mem_glob(dev_priv), + vmw_tt->sg_alloc_size); + break; + default: + break; + } + vmw_tt->mapped = false; +} + static int vmw_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { - struct vmw_ttm_tt *vmw_be = container_of(ttm, struct vmw_ttm_tt, ttm); + struct vmw_ttm_tt *vmw_be = + container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm); + int ret; + + ret = vmw_ttm_map_dma(vmw_be); + if (unlikely(ret != 0)) + return ret; vmw_be->gmr_id = bo_mem->start; - return vmw_gmr_bind(vmw_be->dev_priv, ttm->pages, + return vmw_gmr_bind(vmw_be->dev_priv, &vmw_be->vsgt, ttm->num_pages, vmw_be->gmr_id); } static int vmw_ttm_unbind(struct ttm_tt *ttm) { - struct vmw_ttm_tt *vmw_be = container_of(ttm, struct vmw_ttm_tt, ttm); + struct vmw_ttm_tt *vmw_be = + container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm); vmw_gmr_unbind(vmw_be->dev_priv, vmw_be->gmr_id); + + if (vmw_be->dev_priv->map_mode == vmw_dma_map_bind) + vmw_ttm_unmap_dma(vmw_be); + return 0; } static void vmw_ttm_destroy(struct ttm_tt *ttm) { - struct vmw_ttm_tt *vmw_be = container_of(ttm, struct vmw_ttm_tt, ttm); + struct vmw_ttm_tt *vmw_be = + container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm); - ttm_tt_fini(ttm); + vmw_ttm_unmap_dma(vmw_be); + if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent) + ttm_dma_tt_fini(&vmw_be->dma_ttm); + else + ttm_tt_fini(ttm); kfree(vmw_be); } +static int vmw_ttm_populate(struct ttm_tt *ttm) +{ + struct vmw_ttm_tt *vmw_tt = + container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm); + struct vmw_private *dev_priv = vmw_tt->dev_priv; + struct ttm_mem_global *glob = vmw_mem_glob(dev_priv); + int ret; + + if (ttm->state != tt_unpopulated) + return 0; + + if (dev_priv->map_mode == vmw_dma_alloc_coherent) { + size_t size = + ttm_round_pot(ttm->num_pages * sizeof(dma_addr_t)); + ret = ttm_mem_global_alloc(glob, size, false, true); + if (unlikely(ret != 0)) + return ret; + + ret = ttm_dma_populate(&vmw_tt->dma_ttm, dev_priv->dev->dev); + if (unlikely(ret != 0)) + ttm_mem_global_free(glob, size); + } else + ret = ttm_pool_populate(ttm); + + return ret; +} + +static void vmw_ttm_unpopulate(struct ttm_tt *ttm) +{ + struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt, + dma_ttm.ttm); + struct vmw_private *dev_priv = vmw_tt->dev_priv; + struct ttm_mem_global *glob = vmw_mem_glob(dev_priv); + + vmw_ttm_unmap_dma(vmw_tt); + if (dev_priv->map_mode == vmw_dma_alloc_coherent) { + size_t size = + ttm_round_pot(ttm->num_pages * sizeof(dma_addr_t)); + + ttm_dma_unpopulate(&vmw_tt->dma_ttm, dev_priv->dev->dev); + ttm_mem_global_free(glob, size); + } else + ttm_pool_unpopulate(ttm); +} + static struct ttm_backend_func vmw_ttm_func = { .bind = vmw_ttm_bind, .unbind = vmw_ttm_unbind, @@ -183,20 +520,28 @@ struct ttm_tt *vmw_ttm_tt_create(struct ttm_bo_device *bdev, struct page *dummy_read_page) { struct vmw_ttm_tt *vmw_be; + int ret; - vmw_be = kmalloc(sizeof(*vmw_be), GFP_KERNEL); + vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL); if (!vmw_be) return NULL; - vmw_be->ttm.func = &vmw_ttm_func; + vmw_be->dma_ttm.ttm.func = &vmw_ttm_func; vmw_be->dev_priv = container_of(bdev, struct vmw_private, bdev); - if (ttm_tt_init(&vmw_be->ttm, bdev, size, page_flags, dummy_read_page)) { - kfree(vmw_be); - return NULL; - } + if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent) + ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bdev, size, page_flags, + dummy_read_page); + else + ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bdev, size, page_flags, + dummy_read_page); + if (unlikely(ret != 0)) + goto out_no_init; - return &vmw_be->ttm; + return &vmw_be->dma_ttm.ttm; +out_no_init: + kfree(vmw_be); + return NULL; } int vmw_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) @@ -332,8 +677,8 @@ static int vmw_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible) struct ttm_bo_driver vmw_bo_driver = { .ttm_tt_create = &vmw_ttm_tt_create, - .ttm_tt_populate = &ttm_pool_populate, - .ttm_tt_unpopulate = &ttm_pool_unpopulate, + .ttm_tt_populate = &vmw_ttm_populate, + .ttm_tt_unpopulate = &vmw_ttm_unpopulate, .invalidate_caches = vmw_invalidate_caches, .init_mem_type = vmw_init_mem_type, .evict_flags = vmw_evict_flags, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 1a90f0a2f7e5..0b5c7818ebfb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -32,6 +32,7 @@ #include #include #include +#include #define VMWGFX_DRIVER_NAME "vmwgfx" #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" @@ -185,6 +186,9 @@ static struct pci_device_id vmw_pci_id_list[] = { MODULE_DEVICE_TABLE(pci, vmw_pci_id_list); static int enable_fbdev = IS_ENABLED(CONFIG_DRM_VMWGFX_FBCON); +static int vmw_force_iommu; +static int vmw_restrict_iommu; +static int vmw_force_coherent; static int vmw_probe(struct pci_dev *, const struct pci_device_id *); static void vmw_master_init(struct vmw_master *); @@ -193,6 +197,13 @@ static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, MODULE_PARM_DESC(enable_fbdev, "Enable vmwgfx fbdev"); module_param_named(enable_fbdev, enable_fbdev, int, 0600); +MODULE_PARM_DESC(force_dma_api, "Force using the DMA API for TTM pages"); +module_param_named(force_dma_api, vmw_force_iommu, int, 0600); +MODULE_PARM_DESC(restrict_iommu, "Try to limit IOMMU usage for TTM pages"); +module_param_named(restrict_iommu, vmw_restrict_iommu, int, 0600); +MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages"); +module_param_named(force_coherent, vmw_force_coherent, int, 0600); + static void vmw_print_capabilities(uint32_t capabilities) { @@ -427,12 +438,78 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv) dev_priv->initial_height = height; } +/** + * vmw_dma_select_mode - Determine how DMA mappings should be set up for this + * system. + * + * @dev_priv: Pointer to a struct vmw_private + * + * This functions tries to determine the IOMMU setup and what actions + * need to be taken by the driver to make system pages visible to the + * device. + * If this function decides that DMA is not possible, it returns -EINVAL. + * The driver may then try to disable features of the device that require + * DMA. + */ +static int vmw_dma_select_mode(struct vmw_private *dev_priv) +{ + const struct dma_map_ops *dma_ops = get_dma_ops(dev_priv->dev->dev); + static const char *names[vmw_dma_map_max] = { + [vmw_dma_phys] = "Using physical TTM page addresses.", + [vmw_dma_alloc_coherent] = "Using coherent TTM pages.", + [vmw_dma_map_populate] = "Keeping DMA mappings.", + [vmw_dma_map_bind] = "Giving up DMA mappings early."}; + +#ifdef CONFIG_INTEL_IOMMU + if (intel_iommu_enabled) { + dev_priv->map_mode = vmw_dma_map_populate; + goto out_fixup; + } +#endif + + if (!(vmw_force_iommu || vmw_force_coherent)) { + dev_priv->map_mode = vmw_dma_phys; + DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); + return 0; + } + + dev_priv->map_mode = vmw_dma_map_populate; + + if (dma_ops->sync_single_for_cpu) + dev_priv->map_mode = vmw_dma_alloc_coherent; +#ifdef CONFIG_SWIOTLB + if (swiotlb_nr_tbl() == 0) + dev_priv->map_mode = vmw_dma_map_populate; +#endif + +out_fixup: + if (dev_priv->map_mode == vmw_dma_map_populate && + vmw_restrict_iommu) + dev_priv->map_mode = vmw_dma_map_bind; + + if (vmw_force_coherent) + dev_priv->map_mode = vmw_dma_alloc_coherent; + +#if !defined(CONFIG_SWIOTLB) && !defined(CONFIG_INTEL_IOMMU) + /* + * No coherent page pool + */ + if (dev_priv->map_mode == vmw_dma_alloc_coherent) + return -EINVAL; +#endif + + DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); + + return 0; +} + static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) { struct vmw_private *dev_priv; int ret; uint32_t svga_id; enum vmw_res_type i; + bool refuse_dma = false; dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); if (unlikely(dev_priv == NULL)) { @@ -481,6 +558,11 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) } dev_priv->capabilities = vmw_read(dev_priv, SVGA_REG_CAPABILITIES); + ret = vmw_dma_select_mode(dev_priv); + if (unlikely(ret != 0)) { + DRM_INFO("Restricting capabilities due to IOMMU setup.\n"); + refuse_dma = true; + } dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE); dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE); @@ -558,8 +640,9 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) } dev_priv->has_gmr = true; - if (ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_GMR, - dev_priv->max_gmr_ids) != 0) { + if (((dev_priv->capabilities & (SVGA_CAP_GMR | SVGA_CAP_GMR2)) == 0) || + refuse_dma || ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_GMR, + dev_priv->max_gmr_ids) != 0) { DRM_INFO("No GMR memory available. " "Graphics memory resources are very limited.\n"); dev_priv->has_gmr = false; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 150ec64af617..e401d5dbcb96 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -177,6 +177,58 @@ struct vmw_res_cache_entry { struct vmw_resource_val_node *node; }; +/** + * enum vmw_dma_map_mode - indicate how to perform TTM page dma mappings. + */ +enum vmw_dma_map_mode { + vmw_dma_phys, /* Use physical page addresses */ + vmw_dma_alloc_coherent, /* Use TTM coherent pages */ + vmw_dma_map_populate, /* Unmap from DMA just after unpopulate */ + vmw_dma_map_bind, /* Unmap from DMA just before unbind */ + vmw_dma_map_max +}; + +/** + * struct vmw_sg_table - Scatter/gather table for binding, with additional + * device-specific information. + * + * @sgt: Pointer to a struct sg_table with binding information + * @num_regions: Number of regions with device-address contigous pages + */ +struct vmw_sg_table { + enum vmw_dma_map_mode mode; + struct page **pages; + const dma_addr_t *addrs; + struct sg_table *sgt; + unsigned long num_regions; + unsigned long num_pages; +}; + +/** + * struct vmw_piter - Page iterator that iterates over a list of pages + * and DMA addresses that could be either a scatter-gather list or + * arrays + * + * @pages: Array of page pointers to the pages. + * @addrs: DMA addresses to the pages if coherent pages are used. + * @iter: Scatter-gather page iterator. Current position in SG list. + * @i: Current position in arrays. + * @num_pages: Number of pages total. + * @next: Function to advance the iterator. Returns false if past the list + * of pages, true otherwise. + * @dma_address: Function to return the DMA address of the current page. + */ +struct vmw_piter { + struct page **pages; + const dma_addr_t *addrs; + struct sg_page_iter iter; + unsigned long i; + unsigned long num_pages; + bool (*next)(struct vmw_piter *); + dma_addr_t (*dma_address)(struct vmw_piter *); + struct page *(*page)(struct vmw_piter *); +}; + struct vmw_sw_context{ struct drm_open_hash res_ht; bool res_ht_initialized; @@ -358,6 +410,11 @@ struct vmw_private { struct list_head res_lru[vmw_res_max]; uint32_t used_memory_size; + + /* + * DMA mapping stuff. + */ + enum vmw_dma_map_mode map_mode; }; static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res) @@ -405,7 +462,7 @@ void vmw_3d_resource_dec(struct vmw_private *dev_priv, bool hide_svga); */ extern int vmw_gmr_bind(struct vmw_private *dev_priv, - struct page *pages[], + const struct vmw_sg_table *vsgt, unsigned long num_pages, int gmr_id); extern void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id); @@ -568,6 +625,45 @@ extern struct ttm_placement vmw_evictable_placement; extern struct ttm_placement vmw_srf_placement; extern struct ttm_bo_driver vmw_bo_driver; extern int vmw_dma_quiescent(struct drm_device *dev); +extern void vmw_piter_start(struct vmw_piter *viter, + const struct vmw_sg_table *vsgt, + unsigned long p_offs); + +/** + * vmw_piter_next - Advance the iterator one page. + * + * @viter: Pointer to the iterator to advance. + * + * Returns false if past the list of pages, true otherwise. + */ +static inline bool vmw_piter_next(struct vmw_piter *viter) +{ + return viter->next(viter); +} + +/** + * vmw_piter_dma_addr - Return the DMA address of the current page. + * + * @viter: Pointer to the iterator + * + * Returns the DMA address of the page pointed to by @viter. + */ +static inline dma_addr_t vmw_piter_dma_addr(struct vmw_piter *viter) +{ + return viter->dma_address(viter); +} + +/** + * vmw_piter_page - Return a pointer to the current page. + * + * @viter: Pointer to the iterator + * + * Returns the DMA address of the page pointed to by @viter. + */ +static inline struct page *vmw_piter_page(struct vmw_piter *viter) +{ + return viter->page(viter); +} /** * Command submission - vmwgfx_execbuf.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c index 1a0bf07fe54b..6d0952366f91 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c @@ -32,9 +32,11 @@ #define VMW_PPN_SIZE (sizeof(unsigned long)) /* A future safe maximum remap size. */ #define VMW_PPN_PER_REMAP ((31 * 1024) / VMW_PPN_SIZE) +#define DMA_ADDR_INVALID ((dma_addr_t) 0) +#define DMA_PAGE_INVALID 0UL static int vmw_gmr2_bind(struct vmw_private *dev_priv, - struct page *pages[], + struct vmw_piter *iter, unsigned long num_pages, int gmr_id) { @@ -81,11 +83,13 @@ static int vmw_gmr2_bind(struct vmw_private *dev_priv, for (i = 0; i < nr; ++i) { if (VMW_PPN_SIZE <= 4) - *cmd = page_to_pfn(*pages++); + *cmd = vmw_piter_dma_addr(iter) >> PAGE_SHIFT; else - *((uint64_t *)cmd) = page_to_pfn(*pages++); + *((uint64_t *)cmd) = vmw_piter_dma_addr(iter) >> + PAGE_SHIFT; cmd += VMW_PPN_SIZE / sizeof(*cmd); + vmw_piter_next(iter); } num_pages -= nr; @@ -120,22 +124,54 @@ static void vmw_gmr2_unbind(struct vmw_private *dev_priv, vmw_fifo_commit(dev_priv, define_size); } + +static void vmw_gmr_free_descriptors(struct device *dev, dma_addr_t desc_dma, + struct list_head *desc_pages) +{ + struct page *page, *next; + struct svga_guest_mem_descriptor *page_virtual; + unsigned int desc_per_page = PAGE_SIZE / + sizeof(struct svga_guest_mem_descriptor) - 1; + + if (list_empty(desc_pages)) + return; + + list_for_each_entry_safe(page, next, desc_pages, lru) { + list_del_init(&page->lru); + + if (likely(desc_dma != DMA_ADDR_INVALID)) { + dma_unmap_page(dev, desc_dma, PAGE_SIZE, + DMA_TO_DEVICE); + } + + page_virtual = kmap_atomic(page); + desc_dma = page_virtual[desc_per_page].ppn << PAGE_SHIFT; + kunmap_atomic(page_virtual); + + __free_page(page); + } +} + /** * FIXME: Adjust to the ttm lowmem / highmem storage to minimize * the number of used descriptors. + * */ -static int vmw_gmr_build_descriptors(struct list_head *desc_pages, - struct page *pages[], - unsigned long num_pages) +static int vmw_gmr_build_descriptors(struct device *dev, + struct list_head *desc_pages, + struct vmw_piter *iter, + unsigned long num_pages, + dma_addr_t *first_dma) { - struct page *page, *next; + struct page *page; struct svga_guest_mem_descriptor *page_virtual = NULL; struct svga_guest_mem_descriptor *desc_virtual = NULL; unsigned int desc_per_page; unsigned long prev_pfn; unsigned long pfn; int ret; + dma_addr_t desc_dma; desc_per_page = PAGE_SIZE / sizeof(struct svga_guest_mem_descriptor) - 1; @@ -148,23 +184,12 @@ static int vmw_gmr_build_descriptors(struct list_head *desc_pages, } list_add_tail(&page->lru, desc_pages); - - /* - * Point previous page terminating descriptor to this - * page before unmapping it. - */ - - if (likely(page_virtual != NULL)) { - desc_virtual->ppn = page_to_pfn(page); - kunmap_atomic(page_virtual); - } - page_virtual = kmap_atomic(page); desc_virtual = page_virtual - 1; prev_pfn = ~(0UL); while (likely(num_pages != 0)) { - pfn = page_to_pfn(*pages); + pfn = vmw_piter_dma_addr(iter) >> PAGE_SHIFT; if (pfn != prev_pfn + 1) { @@ -181,104 +206,81 @@ static int vmw_gmr_build_descriptors(struct list_head *desc_pages, } prev_pfn = pfn; --num_pages; - ++pages; + vmw_piter_next(iter); } - (++desc_virtual)->ppn = cpu_to_le32(0); + (++desc_virtual)->ppn = DMA_PAGE_INVALID; desc_virtual->num_pages = cpu_to_le32(0); + kunmap_atomic(page_virtual); } - if (likely(page_virtual != NULL)) + desc_dma = 0; + list_for_each_entry_reverse(page, desc_pages, lru) { + page_virtual = kmap_atomic(page); + page_virtual[desc_per_page].ppn = desc_dma >> PAGE_SHIFT; kunmap_atomic(page_virtual); + desc_dma = dma_map_page(dev, page, 0, PAGE_SIZE, + DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(dev, desc_dma))) + goto out_err; + } + *first_dma = desc_dma; return 0; out_err: - list_for_each_entry_safe(page, next, desc_pages, lru) { - list_del_init(&page->lru); - __free_page(page); - } + vmw_gmr_free_descriptors(dev, DMA_ADDR_INVALID, desc_pages); return ret; } -static inline void vmw_gmr_free_descriptors(struct list_head *desc_pages) -{ - struct page *page, *next; - - list_for_each_entry_safe(page, next, desc_pages, lru) { - list_del_init(&page->lru); - __free_page(page); - } -} - static void vmw_gmr_fire_descriptors(struct vmw_private *dev_priv, - int gmr_id, struct list_head *desc_pages) + int gmr_id, dma_addr_t desc_dma) { - struct page *page; - - if (unlikely(list_empty(desc_pages))) - return; - - page = list_entry(desc_pages->next, struct page, lru); - mutex_lock(&dev_priv->hw_mutex); vmw_write(dev_priv, SVGA_REG_GMR_ID, gmr_id); wmb(); - vmw_write(dev_priv, SVGA_REG_GMR_DESCRIPTOR, page_to_pfn(page)); + vmw_write(dev_priv, SVGA_REG_GMR_DESCRIPTOR, desc_dma >> PAGE_SHIFT); mb(); mutex_unlock(&dev_priv->hw_mutex); } -/** - * FIXME: Adjust to the ttm lowmem / highmem storage to minimize - * the number of used descriptors. - */ - -static unsigned long vmw_gmr_count_descriptors(struct page *pages[], - unsigned long num_pages) -{ - unsigned long prev_pfn = ~(0UL); - unsigned long pfn; - unsigned long descriptors = 0; - - while (num_pages--) { - pfn = page_to_pfn(*pages++); - if (prev_pfn + 1 != pfn) - ++descriptors; - prev_pfn = pfn; - } - - return descriptors; -} - int vmw_gmr_bind(struct vmw_private *dev_priv, - struct page *pages[], + const struct vmw_sg_table *vsgt, unsigned long num_pages, int gmr_id) { struct list_head desc_pages; + dma_addr_t desc_dma = 0; + struct device *dev = dev_priv->dev->dev; + struct vmw_piter data_iter; int ret; + vmw_piter_start(&data_iter, vsgt, 0); + + if (unlikely(!vmw_piter_next(&data_iter))) + return 0; + if (likely(dev_priv->capabilities & SVGA_CAP_GMR2)) - return vmw_gmr2_bind(dev_priv, pages, num_pages, gmr_id); + return vmw_gmr2_bind(dev_priv, &data_iter, num_pages, gmr_id); if (unlikely(!(dev_priv->capabilities & SVGA_CAP_GMR))) return -EINVAL; - if (vmw_gmr_count_descriptors(pages, num_pages) > - dev_priv->max_gmr_descriptors) + if (vsgt->num_regions > dev_priv->max_gmr_descriptors) return -EINVAL; INIT_LIST_HEAD(&desc_pages); - ret = vmw_gmr_build_descriptors(&desc_pages, pages, num_pages); + ret = vmw_gmr_build_descriptors(dev, &desc_pages, &data_iter, + num_pages, &desc_dma); if (unlikely(ret != 0)) return ret; - vmw_gmr_fire_descriptors(dev_priv, gmr_id, &desc_pages); - vmw_gmr_free_descriptors(&desc_pages); + vmw_gmr_fire_descriptors(dev_priv, gmr_id, desc_dma); + vmw_gmr_free_descriptors(dev, desc_dma, &desc_pages); return 0; }