From 13ccf3ad99a45052664f2c1a6c64899f9d778152 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Nov 2009 15:07:04 +0000 Subject: [PATCH 01/10] ARM: dma-mapping: split out vmregion code from dma coherent mapping code Signed-off-by: Russell King Acked-by: Greg Ungerer --- arch/arm/mm/Makefile | 2 +- arch/arm/mm/dma-mapping.c | 132 ++++---------------------------------- arch/arm/mm/vmregion.c | 131 +++++++++++++++++++++++++++++++++++++ arch/arm/mm/vmregion.h | 29 +++++++++ 4 files changed, 174 insertions(+), 120 deletions(-) create mode 100644 arch/arm/mm/vmregion.c create mode 100644 arch/arm/mm/vmregion.h diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 055cb2aa8134..42352e75742b 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -6,7 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ iomap.o obj-$(CONFIG_MMU) += fault-armv.o flush.o ioremap.o mmap.o \ - pgd.o mmu.o + pgd.o mmu.o vmregion.o ifneq ($(CONFIG_MMU),y) obj-y += nommu.o diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index b9590a7085ca..c54f1acf92c8 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -68,106 +68,16 @@ static u64 get_coherent_dma_mask(struct device *dev) * These are the page tables (2MB each) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; -static DEFINE_SPINLOCK(consistent_lock); -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - * struct vm_region region; - * unsigned long flags; - * struct page **pages; - * unsigned int nr_pages; - * unsigned long phys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However, vmalloc_head.vm_start is variable (typically, it is dependent on - * the amount of RAM found at boot time.) I would imagine that get_vm_area() - * would have to initialise this each time prior to calling vm_region_alloc(). - */ -struct arm_vm_region { - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; - struct page *vm_pages; - int vm_active; -}; +#include "vmregion.h" -static struct arm_vm_region consistent_head = { +static struct arm_vmregion_head consistent_head = { + .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), .vm_start = CONSISTENT_BASE, .vm_end = CONSISTENT_END, }; -static struct arm_vm_region * -arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp) -{ - unsigned long addr = head->vm_start, end = head->vm_end - size; - unsigned long flags; - struct arm_vm_region *c, *new; - - new = kmalloc(sizeof(struct arm_vm_region), gfp); - if (!new) - goto out; - - spin_lock_irqsave(&consistent_lock, flags); - - list_for_each_entry(c, &head->vm_list, vm_list) { - if ((addr + size) < addr) - goto nospc; - if ((addr + size) <= c->vm_start) - goto found; - addr = c->vm_end; - if (addr > end) - goto nospc; - } - - found: - /* - * Insert this entry _before_ the one we found. - */ - list_add_tail(&new->vm_list, &c->vm_list); - new->vm_start = addr; - new->vm_end = addr + size; - new->vm_active = 1; - - spin_unlock_irqrestore(&consistent_lock, flags); - return new; - - nospc: - spin_unlock_irqrestore(&consistent_lock, flags); - kfree(new); - out: - return NULL; -} - -static struct arm_vm_region *arm_vm_region_find(struct arm_vm_region *head, unsigned long addr) -{ - struct arm_vm_region *c; - - list_for_each_entry(c, &head->vm_list, vm_list) { - if (c->vm_active && c->vm_start == addr) - goto out; - } - c = NULL; - out: - return c; -} - #ifdef CONFIG_HUGETLB_PAGE #error ARM Coherent DMA allocator does not (yet) support huge TLB #endif @@ -177,7 +87,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot) { struct page *page; - struct arm_vm_region *c; + struct arm_vmregion *c; unsigned long order; u64 mask = get_coherent_dma_mask(dev); u64 limit; @@ -191,13 +101,9 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, if (!mask) goto no_page; - /* - * Sanity check the allocation size. - */ size = PAGE_ALIGN(size); limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || - size >= (CONSISTENT_END - CONSISTENT_BASE)) { + if (limit && size >= limit) { printk(KERN_WARNING "coherent allocation too big " "(requested %#x mask %#llx)\n", size, mask); goto no_page; @@ -226,7 +132,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, /* * Allocate a virtual address in the consistent mapping region. */ - c = arm_vm_region_alloc(&consistent_head, size, + c = arm_vmregion_alloc(&consistent_head, size, gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); if (c) { pte_t *pte; @@ -349,15 +255,12 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU - unsigned long flags, user_size, kern_size; - struct arm_vm_region *c; + unsigned long user_size, kern_size; + struct arm_vmregion *c; user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - spin_lock_irqsave(&consistent_lock, flags); - c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); - spin_unlock_irqrestore(&consistent_lock, flags); - + c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); if (c) { unsigned long off = vma->vm_pgoff; @@ -399,8 +302,8 @@ EXPORT_SYMBOL(dma_mmap_writecombine); #ifdef CONFIG_MMU void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) { - struct arm_vm_region *c; - unsigned long flags, addr; + struct arm_vmregion *c; + unsigned long addr; pte_t *ptep; int idx; u32 off; @@ -417,14 +320,10 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr size = PAGE_ALIGN(size); - spin_lock_irqsave(&consistent_lock, flags); - c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); + c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); if (!c) goto no_area; - c->vm_active = 0; - spin_unlock_irqrestore(&consistent_lock, flags); - if ((c->vm_end - c->vm_start) != size) { printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", __func__, c->vm_end - c->vm_start, size); @@ -470,15 +369,10 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr flush_tlb_kernel_range(c->vm_start, c->vm_end); - spin_lock_irqsave(&consistent_lock, flags); - list_del(&c->vm_list); - spin_unlock_irqrestore(&consistent_lock, flags); - - kfree(c); + arm_vmregion_free(&consistent_head, c); return; no_area: - spin_unlock_irqrestore(&consistent_lock, flags); printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", __func__, cpu_addr); dump_stack(); diff --git a/arch/arm/mm/vmregion.c b/arch/arm/mm/vmregion.c new file mode 100644 index 000000000000..19e09bdb1b8a --- /dev/null +++ b/arch/arm/mm/vmregion.c @@ -0,0 +1,131 @@ +#include +#include +#include + +#include "vmregion.h" + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vmregion region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vmregion_alloc with an appropriate + * struct vmregion head (eg): + * + * struct vmregion vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vmregion_alloc(). + */ + +struct arm_vmregion * +arm_vmregion_alloc(struct arm_vmregion_head *head, size_t size, gfp_t gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct arm_vmregion *c, *new; + + if (head->vm_end - head->vm_start < size) { + printk(KERN_WARNING "%s: allocation too big (requested %#x)\n", + __func__, size); + goto out; + } + + new = kmalloc(sizeof(struct arm_vmregion), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&head->vm_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + + found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + new->vm_active = 1; + + spin_unlock_irqrestore(&head->vm_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&head->vm_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct arm_vmregion *__arm_vmregion_find(struct arm_vmregion_head *head, unsigned long addr) +{ + struct arm_vmregion *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_active && c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + +struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *head, unsigned long addr) +{ + struct arm_vmregion *c; + unsigned long flags; + + spin_lock_irqsave(&head->vm_lock, flags); + c = __arm_vmregion_find(head, addr); + spin_unlock_irqrestore(&head->vm_lock, flags); + return c; +} + +struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *head, unsigned long addr) +{ + struct arm_vmregion *c; + unsigned long flags; + + spin_lock_irqsave(&head->vm_lock, flags); + c = __arm_vmregion_find(head, addr); + if (c) + c->vm_active = 0; + spin_unlock_irqrestore(&head->vm_lock, flags); + return c; +} + +void arm_vmregion_free(struct arm_vmregion_head *head, struct arm_vmregion *c) +{ + unsigned long flags; + + spin_lock_irqsave(&head->vm_lock, flags); + list_del(&c->vm_list); + spin_unlock_irqrestore(&head->vm_lock, flags); + + kfree(c); +} diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h new file mode 100644 index 000000000000..6b2cdbdf3a85 --- /dev/null +++ b/arch/arm/mm/vmregion.h @@ -0,0 +1,29 @@ +#ifndef VMREGION_H +#define VMREGION_H + +#include +#include + +struct page; + +struct arm_vmregion_head { + spinlock_t vm_lock; + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; +}; + +struct arm_vmregion { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; + struct page *vm_pages; + int vm_active; +}; + +struct arm_vmregion *arm_vmregion_alloc(struct arm_vmregion_head *, size_t, gfp_t); +struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *, unsigned long); +struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *, unsigned long); +void arm_vmregion_free(struct arm_vmregion_head *, struct arm_vmregion *); + +#endif From 7a9a32a9533fa01de911e1d056142ddd27360782 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Nov 2009 15:31:07 +0000 Subject: [PATCH 02/10] ARM: dma-mapping: functions to allocate/free a coherent buffer Signed-off-by: Russell King Acked-by: Greg Ungerer --- arch/arm/mm/dma-mapping.c | 110 +++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 44 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c54f1acf92c8..dab2d7f04adf 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -63,6 +63,68 @@ static u64 get_coherent_dma_mask(struct device *dev) return mask; } +/* + * Allocate a DMA buffer for 'dev' of size 'size' using the + * specified gfp mask. Note that 'size' must be page aligned. + */ +static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) +{ + unsigned long order = get_order(size); + struct page *page, *p, *e; + void *ptr; + u64 mask = get_coherent_dma_mask(dev); + +#ifdef CONFIG_DMA_API_DEBUG + u64 limit = (mask + 1) & ~mask; + if (limit && size >= limit) { + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", + size, mask); + return NULL; + } +#endif + + if (!mask) + return NULL; + + if (mask < 0xffffffffULL) + gfp |= GFP_DMA; + + page = alloc_pages(gfp, order); + if (!page) + return NULL; + + /* + * Now split the huge page and free the excess pages + */ + split_page(page, order); + for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) + __free_page(p); + + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + ptr = page_address(page); + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); + + return page; +} + +/* + * Free a DMA buffer. 'size' must be page aligned. + */ +static void __dma_free_buffer(struct page *page, size_t size) +{ + struct page *e = page + (size >> PAGE_SHIFT); + + while (page < e) { + __free_page(page); + page++; + } +} + #ifdef CONFIG_MMU /* * These are the page tables (2MB each) covering uncached, DMA consistent allocations @@ -88,9 +150,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, { struct page *page; struct arm_vmregion *c; - unsigned long order; - u64 mask = get_coherent_dma_mask(dev); - u64 limit; if (!consistent_pte[0]) { printk(KERN_ERR "%s: not initialised\n", __func__); @@ -98,37 +157,12 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, return NULL; } - if (!mask) - goto no_page; - size = PAGE_ALIGN(size); - limit = (mask + 1) & ~mask; - if (limit && size >= limit) { - printk(KERN_WARNING "coherent allocation too big " - "(requested %#x mask %#llx)\n", size, mask); - goto no_page; - } - order = get_order(size); - - if (mask < 0xffffffffULL) - gfp |= GFP_DMA; - - page = alloc_pages(gfp, order); + page = __dma_alloc_buffer(dev, size, gfp); if (!page) goto no_page; - /* - * Invalidate any data that might be lurking in the - * kernel direct-mapped region for device DMA. - */ - { - void *ptr = page_address(page); - memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); - outer_flush_range(__pa(ptr), __pa(ptr) + size); - } - /* * Allocate a virtual address in the consistent mapping region. */ @@ -136,15 +170,12 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); if (c) { pte_t *pte; - struct page *end = page + (1 << order); int idx = CONSISTENT_PTE_INDEX(c->vm_start); u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); pte = consistent_pte[idx] + off; c->vm_pages = page; - split_page(page, order); - /* * Set the "dma handle" */ @@ -167,19 +198,11 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, } } while (size -= PAGE_SIZE); - /* - * Free the otherwise unused pages. - */ - while (page < end) { - __free_page(page); - page++; - } - return (void *)c->vm_start; } if (page) - __free_pages(page, order); + __dma_free_buffer(page, size); no_page: *handle = ~0; return NULL; @@ -357,12 +380,9 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr * x86 does not mark the pages reserved... */ ClearPageReserved(page); - - __free_page(page); continue; } } - printk(KERN_CRIT "%s: bad page in kernel page table\n", __func__); } while (size -= PAGE_SIZE); @@ -370,6 +390,8 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr flush_tlb_kernel_range(c->vm_start, c->vm_end); arm_vmregion_free(&consistent_head, c); + + __dma_free_buffer(dma_to_page(dev, handle), size); return; no_area: From 3e82d012e9281a0b6388ff2356e8396b9d781e1c Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Nov 2009 15:38:12 +0000 Subject: [PATCH 03/10] ARM: dma-mapping: fix coherent arch dma_alloc_coherent() The coherent architecture dma_alloc_coherent was using kmalloc/kfree to manage the memory. dma_alloc_coherent() is expected to work with a granularity of a page, so this is wrong. Fix it by using the helper functions now provided. Signed-off-by: Russell King Acked-by: Greg Ungerer --- arch/arm/mm/dma-mapping.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index dab2d7f04adf..a67130873431 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -246,14 +246,16 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gf return memory; if (arch_is_coherent()) { - void *virt; + struct page *page; - virt = kmalloc(size, gfp); - if (!virt) + page = __dma_alloc_buffer(dev, PAGE_ALIGN(size), gfp); + if (!page) { + *handle = ~0; return NULL; - *handle = virt_to_dma(dev, virt); + } - return virt; + *handle = page_to_dma(dev, page); + return page_address(page); } return __dma_alloc(dev, size, handle, gfp, @@ -336,13 +338,13 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; + size = PAGE_ALIGN(size); + if (arch_is_coherent()) { - kfree(cpu_addr); + __dma_free_buffer(dma_to_page(dev, handle), size); return; } - size = PAGE_ALIGN(size); - c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); if (!c) goto no_area; From 04da56943b416dd9fe7058abf8d5b9153164b3e9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Nov 2009 15:54:45 +0000 Subject: [PATCH 04/10] ARM: dma-mapping: fix nommu dma_alloc_coherent() The nommu version of dma_alloc_coherent was using kmalloc/kfree to manage the memory. dma_alloc_coherent() is expected to work with a granularity of a page, so this is wrong. Fix it by using the helper functions now provided. Signed-off-by: Russell King Acked-by: Greg Ungerer --- arch/arm/mm/dma-mapping.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index a67130873431..62b4240b34b3 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -212,24 +212,17 @@ static void * __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot) { - void *virt; - u64 mask = get_coherent_dma_mask(dev); + struct page *page; - if (!mask) - goto error; - - if (mask < 0xffffffffULL) - gfp |= GFP_DMA; - virt = kmalloc(size, gfp); - if (!virt) - goto error; - - *handle = virt_to_dma(dev, virt); - return virt; - -error: *handle = ~0; - return NULL; + size = PAGE_ALIGN(size); + + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + *handle = page_to_dma(dev, page); + return page_address(page); } #endif /* CONFIG_MMU */ @@ -406,7 +399,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr { if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; - kfree(cpu_addr); + __dma_free_buffer(dma_to_page(dev, handle), PAGE_ALIGN(size)); } #endif /* CONFIG_MMU */ EXPORT_SYMBOL(dma_free_coherent); From 695ae0af5a52df09dffcc2ce2d625d56ef36ce14 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Nov 2009 16:31:39 +0000 Subject: [PATCH 05/10] ARM: dma-mapping: factor dma_free_coherent() common code We effectively have three implementations of dma_free_coherent() mixed up in the code; the incoherent MMU, coherent MMU and noMMU versions. The coherent MMU and noMMU versions are actually functionally identical. The incoherent MMU version is almost the same, but with the additional step of unmapping the secondary mapping. Separate out this additional step into __dma_free_remap() and simplify the resulting dma_free_coherent() code. Signed-off-by: Russell King Acked-by: Greg Ungerer --- arch/arm/mm/dma-mapping.c | 141 ++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 73 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 62b4240b34b3..6b24e5ed85c2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -207,7 +207,70 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, *handle = ~0; return NULL; } + +static void __dma_free_remap(void *cpu_addr, size_t size) +{ + struct arm_vmregion *c; + unsigned long addr; + pte_t *ptep; + int idx; + u32 off; + + c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); + if (!c) { + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, cpu_addr); + dump_stack(); + return; + } + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + idx = CONSISTENT_PTE_INDEX(c->vm_start); + off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); + ptep = consistent_pte[idx] + off; + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + unsigned long pfn; + + ptep++; + addr += PAGE_SIZE; + off++; + if (off >= PTRS_PER_PTE) { + off = 0; + ptep = consistent_pte[++idx]; + } + + if (!pte_none(pte) && pte_present(pte)) { + pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + + /* + * x86 does not mark the pages reserved... + */ + ClearPageReserved(page); + continue; + } + } + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + arm_vmregion_free(&consistent_head, c); +} + #else /* !CONFIG_MMU */ + static void * __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot) @@ -224,6 +287,9 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, *handle = page_to_dma(dev, page); return page_address(page); } + +#define __dma_free_remap(addr, size) do { } while (0) + #endif /* CONFIG_MMU */ /* @@ -317,15 +383,8 @@ EXPORT_SYMBOL(dma_mmap_writecombine); * free a page as defined by the above mapping. * Must not be called with IRQs disabled. */ -#ifdef CONFIG_MMU void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) { - struct arm_vmregion *c; - unsigned long addr; - pte_t *ptep; - int idx; - u32 off; - WARN_ON(irqs_disabled()); if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) @@ -333,75 +392,11 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr size = PAGE_ALIGN(size); - if (arch_is_coherent()) { - __dma_free_buffer(dma_to_page(dev, handle), size); - return; - } - - c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); - if (!c) - goto no_area; - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - idx = CONSISTENT_PTE_INDEX(c->vm_start); - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - ptep = consistent_pte[idx] + off; - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - unsigned long pfn; - - ptep++; - addr += PAGE_SIZE; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - ptep = consistent_pte[++idx]; - } - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - - /* - * x86 does not mark the pages reserved... - */ - ClearPageReserved(page); - continue; - } - } - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - arm_vmregion_free(&consistent_head, c); + if (!arch_is_coherent()) + __dma_free_remap(cpu_addr, size); __dma_free_buffer(dma_to_page(dev, handle), size); - return; - - no_area: - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); } -#else /* !CONFIG_MMU */ -void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) -{ - if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) - return; - __dma_free_buffer(dma_to_page(dev, handle), PAGE_ALIGN(size)); -} -#endif /* CONFIG_MMU */ EXPORT_SYMBOL(dma_free_coherent); /* From 88c58f3b92bc7c26439802c300d39b6377739d81 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Nov 2009 16:46:02 +0000 Subject: [PATCH 06/10] ARM: dma-mapping: move consistent_init into CONFIG_MMU section No point wrapping the contents of this function with #ifdef CONFIG_MMU when we can place it and the core_initcall() entirely within the existing conditional block. Signed-off-by: Russell King Acked-by: Greg Ungerer --- arch/arm/mm/dma-mapping.c | 78 +++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 6b24e5ed85c2..19357f76ce89 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -144,6 +144,44 @@ static struct arm_vmregion_head consistent_head = { #error ARM Coherent DMA allocator does not (yet) support huge TLB #endif +/* + * Initialise the consistent memory allocation. + */ +static int __init consistent_init(void) +{ + int ret = 0; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int i = 0; + u32 base = CONSISTENT_BASE; + + do { + pgd = pgd_offset(&init_mm, base); + pmd = pmd_alloc(&init_mm, pgd, base); + if (!pmd) { + printk(KERN_ERR "%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + WARN_ON(!pmd_none(*pmd)); + + pte = pte_alloc_kernel(pmd, base); + if (!pte) { + printk(KERN_ERR "%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } + + consistent_pte[i++] = pte; + base += (1 << PGDIR_SHIFT); + } while (base < CONSISTENT_END); + + return ret; +} + +core_initcall(consistent_init); + static void * __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot) @@ -399,46 +437,6 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr } EXPORT_SYMBOL(dma_free_coherent); -/* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) -{ - int ret = 0; -#ifdef CONFIG_MMU - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int i = 0; - u32 base = CONSISTENT_BASE; - - do { - pgd = pgd_offset(&init_mm, base); - pmd = pmd_alloc(&init_mm, pgd, base); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - WARN_ON(!pmd_none(*pmd)); - - pte = pte_alloc_kernel(pmd, base); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte[i++] = pte; - base += (1 << PGDIR_SHIFT); - } while (base < CONSISTENT_END); -#endif /* !CONFIG_MMU */ - - return ret; -} - -core_initcall(consistent_init); - /* * Make an area consistent for devices. * Note: Drivers should NOT use this function directly, as it will break From ebd7a845fa4332da3ebcbe8cf1b09bb43413420e Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Nov 2009 20:58:31 +0000 Subject: [PATCH 07/10] ARM: dma-mapping: clean up coherent arch dma allocation IXP23xx added support for dma_alloc_coherent() for DMA arches with an exception in dma_alloc_coherent(). This is a subset of what goes on in __dma_alloc(), and there is no reason why dma_alloc_writecombine() should not be given the same treatment (except, maybe, that IXP23xx doesn't use it.) We can better deal with this by moving the arch_is_coherent() test inside __dma_alloc() and killing the code duplication. Signed-off-by: Russell King Acked-by: Greg Ungerer --- arch/arm/mm/dma-mapping.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 19357f76ce89..3a8e527c774c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -189,18 +189,24 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct page *page; struct arm_vmregion *c; - if (!consistent_pte[0]) { - printk(KERN_ERR "%s: not initialised\n", __func__); - dump_stack(); - return NULL; - } - size = PAGE_ALIGN(size); page = __dma_alloc_buffer(dev, size, gfp); if (!page) goto no_page; + if (arch_is_coherent()) { + *handle = page_to_dma(dev, page); + return page_address(page); + } + + if (!consistent_pte[0]) { + printk(KERN_ERR "%s: not initialised\n", __func__); + dump_stack(); + __dma_free_buffer(page, size); + return NULL; + } + /* * Allocate a virtual address in the consistent mapping region. */ @@ -342,19 +348,6 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gf if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - if (arch_is_coherent()) { - struct page *page; - - page = __dma_alloc_buffer(dev, PAGE_ALIGN(size), gfp); - if (!page) { - *handle = ~0; - return NULL; - } - - *handle = page_to_dma(dev, page); - return page_address(page); - } - return __dma_alloc(dev, size, handle, gfp, pgprot_noncached(pgprot_kernel)); } From 31ebf94435f74294523683867fe0b89000e61521 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Nov 2009 21:12:17 +0000 Subject: [PATCH 08/10] ARM: dma-mapping: Factor out noMMU dma buffer allocation code This entirely separates the DMA coherent buffer remapping code from the allocation code, and gets rid of the duplicate copy in the !MMU section. Signed-off-by: Russell King Acked-by: Greg Ungerer --- arch/arm/mm/dma-mapping.c | 47 +++++++++++++-------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 3a8e527c774c..707d81247630 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -183,27 +183,13 @@ static int __init consistent_init(void) core_initcall(consistent_init); static void * -__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, - pgprot_t prot) +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) { - struct page *page; struct arm_vmregion *c; - size = PAGE_ALIGN(size); - - page = __dma_alloc_buffer(dev, size, gfp); - if (!page) - goto no_page; - - if (arch_is_coherent()) { - *handle = page_to_dma(dev, page); - return page_address(page); - } - if (!consistent_pte[0]) { printk(KERN_ERR "%s: not initialised\n", __func__); dump_stack(); - __dma_free_buffer(page, size); return NULL; } @@ -220,11 +206,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pte = consistent_pte[idx] + off; c->vm_pages = page; - /* - * Set the "dma handle" - */ - *handle = page_to_dma(dev, page); - do { BUG_ON(!pte_none(*pte)); @@ -244,11 +225,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, return (void *)c->vm_start; } - - if (page) - __dma_free_buffer(page, size); - no_page: - *handle = ~0; return NULL; } @@ -315,11 +291,17 @@ static void __dma_free_remap(void *cpu_addr, size_t size) #else /* !CONFIG_MMU */ +#define __dma_alloc_remap(page, size, gfp, prot) page_address(page) +#define __dma_free_remap(addr, size) do { } while (0) + +#endif /* CONFIG_MMU */ + static void * __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot) { struct page *page; + void *addr; *handle = ~0; size = PAGE_ALIGN(size); @@ -328,14 +310,17 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, if (!page) return NULL; - *handle = page_to_dma(dev, page); - return page_address(page); + if (!arch_is_coherent()) + addr = __dma_alloc_remap(page, size, gfp, prot); + else + addr = page_address(page); + + if (addr) + *handle = page_to_dma(dev, page); + + return addr; } -#define __dma_free_remap(addr, size) do { } while (0) - -#endif /* CONFIG_MMU */ - /* * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. From acaac256b3a14a09ab278409a72d119f2d75b02b Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 20 Nov 2009 18:19:52 +0000 Subject: [PATCH 09/10] ARM: dma-mapping: get rid of setting/clearing the reserved page bit It's unnecessary; x86 doesn't do it, and ALSA doesn't require it anymore. Signed-off-by: Russell King Acked-by: Greg Ungerer --- arch/arm/mm/dma-mapping.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 707d81247630..6fac793329c6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -209,10 +209,6 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) do { BUG_ON(!pte_none(*pte)); - /* - * x86 does not mark the pages reserved... - */ - SetPageReserved(page); set_pte_ext(pte, mk_pte(page, prot), 0); page++; pte++; @@ -257,7 +253,6 @@ static void __dma_free_remap(void *cpu_addr, size_t size) addr = c->vm_start; do { pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - unsigned long pfn; ptep++; addr += PAGE_SIZE; @@ -267,21 +262,9 @@ static void __dma_free_remap(void *cpu_addr, size_t size) ptep = consistent_pte[++idx]; } - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - - /* - * x86 does not mark the pages reserved... - */ - ClearPageReserved(page); - continue; - } - } - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); + if (pte_none(pte) || !pte_present(pte)) + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); } while (size -= PAGE_SIZE); flush_tlb_kernel_range(c->vm_start, c->vm_end); From 26a26d329688ab018e068b412b03d43d7c299f0a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 20 Nov 2009 21:06:43 +0000 Subject: [PATCH 10/10] ARM: dma-mapping: switch ARMv7 DMA mappings to retain 'memory' attribute On ARMv7, it is invalid to map the same physical address multiple times with different memory types. Since system RAM is already mapped as 'memory', subsequent remapping of it must retain this attribute. However, DMA memory maps it as "strongly ordered". Fix this by introducing 'pgprot_dmacoherent()' which provides the necessary page table bits for DMA mappings. Signed-off-by: Russell King Acked-by: Greg Ungerer Reviewed-by: Catalin Marinas --- arch/arm/include/asm/pgtable.h | 14 ++++++++++++-- arch/arm/include/asm/system.h | 19 ++++++++++++------- arch/arm/mm/dma-mapping.c | 4 ++-- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 201ccaa11f61..11397687f42c 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -304,13 +304,23 @@ PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG); static inline pte_t pte_mkspecial(pte_t pte) { return pte; } +#define __pgprot_modify(prot,mask,bits) \ + __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) + /* * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) \ - __pgprot((pgprot_val(prot) & ~L_PTE_MT_MASK) | L_PTE_MT_UNCACHED) + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED) #define pgprot_writecombine(prot) \ - __pgprot((pgprot_val(prot) & ~L_PTE_MT_MASK) | L_PTE_MT_BUFFERABLE) + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE) +#if __LINUX_ARM_ARCH__ >= 7 +#define pgprot_dmacoherent(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_BUFFERABLE) +#else +#define pgprot_dmacoherent(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_UNCACHED) +#endif #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_present(pmd) (pmd_val(pmd)) diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index d65b2f5bf41f..058e7e90881d 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -138,21 +138,26 @@ extern unsigned int user_debug; #define dmb() __asm__ __volatile__ ("" : : : "memory") #endif -#ifndef CONFIG_SMP +#if __LINUX_ARM_ARCH__ >= 7 || defined(CONFIG_SMP) +#define mb() dmb() +#define rmb() dmb() +#define wmb() dmb() +#else #define mb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) #define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) #define wmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) +#endif + +#ifndef CONFIG_SMP #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() #else -#define mb() dmb() -#define rmb() dmb() -#define wmb() dmb() -#define smp_mb() dmb() -#define smp_rmb() dmb() -#define smp_wmb() dmb() +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() #endif + #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 6fac793329c6..26325cb5d368 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -317,7 +317,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gf return memory; return __dma_alloc(dev, size, handle, gfp, - pgprot_noncached(pgprot_kernel)); + pgprot_dmacoherent(pgprot_kernel)); } EXPORT_SYMBOL(dma_alloc_coherent); @@ -365,7 +365,7 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size) { - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); return dma_mmap(dev, vma, cpu_addr, dma_addr, size); } EXPORT_SYMBOL(dma_mmap_coherent);