From 81b23ba645e6b2b446093b2d927c261a17f7dee3 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 30 Jul 2019 14:08:07 +0800 Subject: [PATCH 01/14] csky: Fixup mb() synchronization problem The mb() is the superset of dma and smp. Using bar.xxx to implement mb() will cause problem when sync data with dma device, becasue bar.xxx couldn't guarantee bus transactions finished at outside bus level. We must use sync.s instead of bar.xxx for dma data synchronization and it will guarantee retirement after getting the bus bresponse. Changes for V2: - Use sync.s for all mb, rmb, wmb, dma_wmb, dma_rmb. Signed-off-by: Guo Ren Cc: Arnd Bergmann --- arch/csky/include/asm/barrier.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/csky/include/asm/barrier.h b/arch/csky/include/asm/barrier.h index 476eb786f22d..a430e7fddf35 100644 --- a/arch/csky/include/asm/barrier.h +++ b/arch/csky/include/asm/barrier.h @@ -9,11 +9,12 @@ #define nop() asm volatile ("nop\n":::"memory") /* - * sync: completion barrier - * sync.s: completion barrier and shareable to other cores - * sync.i: completion barrier with flush cpu pipeline - * sync.is: completion barrier with flush cpu pipeline and shareable to - * other cores + * sync: completion barrier, all sync.xx instructions + * guarantee the last response recieved by bus transaction + * made by ld/st instructions before sync.s + * sync.s: inherit from sync, but also shareable to other cores + * sync.i: inherit from sync, but also flush cpu pipeline + * sync.is: the same with sync.i + sync.s * * bar.brwarw: ordering barrier for all load/store instructions before it * bar.brwarws: ordering barrier for all load/store instructions before it @@ -27,9 +28,7 @@ */ #ifdef CONFIG_CPU_HAS_CACHEV2 -#define mb() asm volatile ("bar.brwarw\n":::"memory") -#define rmb() asm volatile ("bar.brar\n":::"memory") -#define wmb() asm volatile ("bar.bwaw\n":::"memory") +#define mb() asm volatile ("sync.s\n":::"memory") #ifdef CONFIG_SMP #define __smp_mb() asm volatile ("bar.brwarws\n":::"memory") From 7f80fe207de9602aaff028c79345caa68c90cd31 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 30 Jul 2019 14:43:22 +0800 Subject: [PATCH 02/14] csky: Fixup dma_alloc_coherent with PAGE_SO attribute This bug is from commit: 2b070ccdf8c0 (fixup abiv2 mmap(... O_SYNC) failed). In that patch we remove the _PAGE_SO for memory noncache mapping and this will cause problem when drivers use dma descriptors to control the transcations without dma_w/rmb(). After referencing other archs' implementation, pgprot_writecombine is introduced for mmap(... O_SYNC). Signed-off-by: Guo Ren --- arch/csky/include/asm/pgtable.h | 10 ++++++++++ arch/csky/mm/ioremap.c | 6 ++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index c429a6f347de..fc19ba446d62 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -258,6 +258,16 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot) { unsigned long prot = pgprot_val(_prot); + prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED | _PAGE_SO; + + return __pgprot(prot); +} + +#define pgprot_writecombine pgprot_writecombine +static inline pgprot_t pgprot_writecombine(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED; return __pgprot(prot); diff --git a/arch/csky/mm/ioremap.c b/arch/csky/mm/ioremap.c index 8473b6bdf512..48531115fd9d 100644 --- a/arch/csky/mm/ioremap.c +++ b/arch/csky/mm/ioremap.c @@ -29,8 +29,7 @@ void __iomem *ioremap(phys_addr_t addr, size_t size) vaddr = (unsigned long)area->addr; - prot = __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | - _PAGE_GLOBAL | _CACHE_UNCACHED | _PAGE_SO); + prot = pgprot_noncached(PAGE_KERNEL); if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) { free_vm_area(area); @@ -51,10 +50,9 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { if (!pfn_valid(pfn)) { - vma_prot.pgprot |= _PAGE_SO; return pgprot_noncached(vma_prot); } else if (file->f_flags & O_SYNC) { - return pgprot_noncached(vma_prot); + return pgprot_writecombine(vma_prot); } return vma_prot; From 4af9027d3f4061992c0b065102a0a666b72f073b Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 30 Jul 2019 17:02:26 +0800 Subject: [PATCH 03/14] csky/dma: Fixup cache_op failed when cross memory ZONEs If the paddr and size are cross between NORMAL_ZONE and HIGHMEM_ZONE memory range, cache_op will panic in do_page_fault with bad_area. Optimize the code to support the range which cross memory ZONEs. Changes for V2: - Revert back to postcore_initcall Signed-off-by: Guo Ren Cc: Christoph Hellwig Cc: Arnd Bergmann --- arch/csky/mm/dma-mapping.c | 71 ++++++++++++++------------------------ 1 file changed, 26 insertions(+), 45 deletions(-) diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 80783bb71c5c..65f531d54814 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -20,69 +20,50 @@ static int __init atomic_pool_init(void) } postcore_initcall(atomic_pool_init); -void arch_dma_prep_coherent(struct page *page, size_t size) -{ - if (PageHighMem(page)) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - do { - void *ptr = kmap_atomic(page); - size_t _size = (size < PAGE_SIZE) ? size : PAGE_SIZE; - - memset(ptr, 0, _size); - dma_wbinv_range((unsigned long)ptr, - (unsigned long)ptr + _size); - - kunmap_atomic(ptr); - - page++; - size -= PAGE_SIZE; - count--; - } while (count); - } else { - void *ptr = page_address(page); - - memset(ptr, 0, size); - dma_wbinv_range((unsigned long)ptr, (unsigned long)ptr + size); - } -} - static inline void cache_op(phys_addr_t paddr, size_t size, void (*fn)(unsigned long start, unsigned long end)) { - struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); - unsigned int offset = paddr & ~PAGE_MASK; - size_t left = size; - unsigned long start; + struct page *page = phys_to_page(paddr); + void *start = __va(page_to_phys(page)); + unsigned long offset = offset_in_page(paddr); + size_t left = size; do { size_t len = left; + if (offset + len > PAGE_SIZE) + len = PAGE_SIZE - offset; + if (PageHighMem(page)) { - void *addr; + start = kmap_atomic(page); - if (offset + len > PAGE_SIZE) { - if (offset >= PAGE_SIZE) { - page += offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - } - len = PAGE_SIZE - offset; - } + fn((unsigned long)start + offset, + (unsigned long)start + offset + len); - addr = kmap_atomic(page); - start = (unsigned long)(addr + offset); - fn(start, start + len); - kunmap_atomic(addr); + kunmap_atomic(start); } else { - start = (unsigned long)phys_to_virt(paddr); - fn(start, start + size); + fn((unsigned long)start + offset, + (unsigned long)start + offset + len); } offset = 0; + page++; + start += PAGE_SIZE; left -= len; } while (left); } +static void dma_wbinv_set_zero_range(unsigned long start, unsigned long end) +{ + memset((void *)start, 0, end - start); + dma_wbinv_range(start, end); +} + +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + cache_op(page_to_phys(page), size, dma_wbinv_set_zero_range); +} + void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { From ae76f635d4e1cffa6870cc5472567ca9d6940a22 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 30 Jul 2019 17:16:28 +0800 Subject: [PATCH 04/14] csky: Optimize arch_sync_dma_for_cpu/device with dma_inv_range DMA_FROM_DEVICE only need to read dma data of memory into CPU cache, so there is no need to clear cache before. Also clear + inv for DMA_FROM_DEVICE won't cause problem, because the memory range for dma won't be touched by software during dma working. Changes for V2: - Remove clr cache and ignore the DMA_TO_DEVICE in _for_cpu. - Change inv to wbinv cache with DMA_FROM_DEVICE in _for_device. Signed-off-by: Guo Ren Cc: Arnd Bergmann --- arch/csky/include/asm/cache.h | 1 + arch/csky/mm/cachev1.c | 7 ++++++- arch/csky/mm/cachev2.c | 11 ++++++++++- arch/csky/mm/dma-mapping.c | 5 ++--- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/csky/include/asm/cache.h b/arch/csky/include/asm/cache.h index d68373463676..1d5fc2f78fd7 100644 --- a/arch/csky/include/asm/cache.h +++ b/arch/csky/include/asm/cache.h @@ -24,6 +24,7 @@ void cache_wbinv_range(unsigned long start, unsigned long end); void cache_wbinv_all(void); void dma_wbinv_range(unsigned long start, unsigned long end); +void dma_inv_range(unsigned long start, unsigned long end); void dma_wb_range(unsigned long start, unsigned long end); #endif diff --git a/arch/csky/mm/cachev1.c b/arch/csky/mm/cachev1.c index b8a75cce0b8c..494ec912abff 100644 --- a/arch/csky/mm/cachev1.c +++ b/arch/csky/mm/cachev1.c @@ -120,7 +120,12 @@ void dma_wbinv_range(unsigned long start, unsigned long end) cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1); } +void dma_inv_range(unsigned long start, unsigned long end) +{ + cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1); +} + void dma_wb_range(unsigned long start, unsigned long end) { - cache_op_range(start, end, DATA_CACHE|CACHE_INV, 1); + cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1); } diff --git a/arch/csky/mm/cachev2.c b/arch/csky/mm/cachev2.c index baaf05d69f44..b61be6518e21 100644 --- a/arch/csky/mm/cachev2.c +++ b/arch/csky/mm/cachev2.c @@ -69,11 +69,20 @@ void dma_wbinv_range(unsigned long start, unsigned long end) sync_is(); } +void dma_inv_range(unsigned long start, unsigned long end) +{ + unsigned long i = start & ~(L1_CACHE_BYTES - 1); + + for (; i < end; i += L1_CACHE_BYTES) + asm volatile("dcache.iva %0\n"::"r"(i):"memory"); + sync_is(); +} + void dma_wb_range(unsigned long start, unsigned long end) { unsigned long i = start & ~(L1_CACHE_BYTES - 1); for (; i < end; i += L1_CACHE_BYTES) - asm volatile("dcache.civa %0\n"::"r"(i):"memory"); + asm volatile("dcache.cva %0\n"::"r"(i):"memory"); sync_is(); } diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 65f531d54814..106ef02a8f89 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -85,11 +85,10 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, { switch (dir) { case DMA_TO_DEVICE: - cache_op(paddr, size, dma_wb_range); - break; + return; case DMA_FROM_DEVICE: case DMA_BIDIRECTIONAL: - cache_op(paddr, size, dma_wbinv_range); + cache_op(paddr, size, dma_inv_range); break; default: BUG(); From 5336c17928cc464845ff765ce45b368c22f848e0 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 15 Aug 2019 16:24:56 +0800 Subject: [PATCH 05/14] csky: Fixup ioremap function losing Implement the following apis to meet usage in different scenarios. - ioremap (NonCache + StrongOrder) - ioremap_nocache (NonCache + StrongOrder) - ioremap_wc (NonCache + WeakOrder ) - ioremap_cache ( Cache + WeakOrder ) Also change flag VM_ALLOC to VM_IOREMAP in get_vm_area_caller. Signed-off-by: Guo Ren Cc: Arnd Bergmann Cc: Christoph Hellwig --- arch/csky/include/asm/io.h | 23 ++++++++++++----------- arch/csky/mm/ioremap.c | 23 +++++++++++++++++------ 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h index c1dfa9c10e36..80d071e2567f 100644 --- a/arch/csky/include/asm/io.h +++ b/arch/csky/include/asm/io.h @@ -4,17 +4,10 @@ #ifndef __ASM_CSKY_IO_H #define __ASM_CSKY_IO_H -#include +#include #include #include -extern void __iomem *ioremap(phys_addr_t offset, size_t size); - -extern void iounmap(void *addr); - -extern int remap_area_pages(unsigned long address, phys_addr_t phys_addr, - size_t size, unsigned long flags); - /* * I/O memory access primitives. Reads are ordered relative to any * following Normal memory access. Writes are ordered relative to any prior @@ -40,9 +33,17 @@ extern int remap_area_pages(unsigned long address, phys_addr_t phys_addr, #define writel(v,c) ({ wmb(); writel_relaxed((v),(c)); mb(); }) #endif -#define ioremap_nocache(phy, sz) ioremap(phy, sz) -#define ioremap_wc ioremap_nocache -#define ioremap_wt ioremap_nocache +/* + * I/O memory mapping functions. + */ +extern void __iomem *ioremap_cache(phys_addr_t addr, size_t size); +extern void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot); +extern void iounmap(void *addr); + +#define ioremap(addr, size) __ioremap((addr), (size), pgprot_noncached(PAGE_KERNEL)) +#define ioremap_wc(addr, size) __ioremap((addr), (size), pgprot_writecombine(PAGE_KERNEL)) +#define ioremap_nocache(addr, size) ioremap((addr), (size)) +#define ioremap_cache ioremap_cache #include diff --git a/arch/csky/mm/ioremap.c b/arch/csky/mm/ioremap.c index 48531115fd9d..e13cd3497628 100644 --- a/arch/csky/mm/ioremap.c +++ b/arch/csky/mm/ioremap.c @@ -8,12 +8,12 @@ #include -void __iomem *ioremap(phys_addr_t addr, size_t size) +static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size, + pgprot_t prot, void *caller) { phys_addr_t last_addr; unsigned long offset, vaddr; struct vm_struct *area; - pgprot_t prot; last_addr = addr + size - 1; if (!size || last_addr < addr) @@ -23,14 +23,12 @@ void __iomem *ioremap(phys_addr_t addr, size_t size) addr &= PAGE_MASK; size = PAGE_ALIGN(size + offset); - area = get_vm_area_caller(size, VM_ALLOC, __builtin_return_address(0)); + area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) return NULL; vaddr = (unsigned long)area->addr; - prot = pgprot_noncached(PAGE_KERNEL); - if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) { free_vm_area(area); return NULL; @@ -38,7 +36,20 @@ void __iomem *ioremap(phys_addr_t addr, size_t size) return (void __iomem *)(vaddr + offset); } -EXPORT_SYMBOL(ioremap); + +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot) +{ + return __ioremap_caller(phys_addr, size, prot, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__ioremap); + +void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) +{ + return __ioremap_caller(phys_addr, size, PAGE_KERNEL, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); void iounmap(void __iomem *addr) { From be819aa6f11145de32dab8690ec6055348488c18 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 20 Aug 2019 15:31:29 +0800 Subject: [PATCH 06/14] csky: Fixup arch_get_unmapped_area() implementation Current arch_get_unmapped_area() of abiv1 doesn't use standard kernel api. After referring to the implementation of arch/arm, we implement it with vm_unmapped_area() from linux/mm.h. Signed-off-by: Guo Ren Cc: Arnd Bergmann --- arch/csky/abiv1/inc/abi/page.h | 5 ++- arch/csky/abiv1/mmap.c | 77 ++++++++++++++++++---------------- 2 files changed, 44 insertions(+), 38 deletions(-) diff --git a/arch/csky/abiv1/inc/abi/page.h b/arch/csky/abiv1/inc/abi/page.h index 6336e92a103a..c864519117c7 100644 --- a/arch/csky/abiv1/inc/abi/page.h +++ b/arch/csky/abiv1/inc/abi/page.h @@ -1,13 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. -extern unsigned long shm_align_mask; +#include + extern void flush_dcache_page(struct page *page); static inline unsigned long pages_do_alias(unsigned long addr1, unsigned long addr2) { - return (addr1 ^ addr2) & shm_align_mask; + return (addr1 ^ addr2) & (SHMLBA-1); } static inline void clear_user_page(void *addr, unsigned long vaddr, diff --git a/arch/csky/abiv1/mmap.c b/arch/csky/abiv1/mmap.c index b462fd50b23a..6792aca49999 100644 --- a/arch/csky/abiv1/mmap.c +++ b/arch/csky/abiv1/mmap.c @@ -9,58 +9,63 @@ #include #include -unsigned long shm_align_mask = (0x4000 >> 1) - 1; /* Sane caches */ +#define COLOUR_ALIGN(addr,pgoff) \ + ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ + (((pgoff)<mm; + struct vm_area_struct *vma; + int do_align = 0; + struct vm_unmapped_area_info info; + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + do_align = filp || (flags & MAP_SHARED); + + /* + * We enforce the MAP_FIXED case. + */ if (flags & MAP_FIXED) { - /* - * We do not accept a shared mapping if it would violate - * cache aliasing constraints. - */ - if ((flags & MAP_SHARED) && - ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + if (flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) return -EINVAL; return addr; } if (len > TASK_SIZE) return -ENOMEM; - do_color_align = 0; - if (filp || (flags & MAP_SHARED)) - do_color_align = 1; + if (addr) { - if (do_color_align) + if (do_align) addr = COLOUR_ALIGN(addr, pgoff); else addr = PAGE_ALIGN(addr); - vmm = find_vma(current->mm, addr); - if (TASK_SIZE - len >= addr && - (!vmm || addr + len <= vmm->vm_start)) - return addr; - } - addr = TASK_UNMAPPED_BASE; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); - for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { - /* At this point: (!vmm || addr < vmm->vm_end). */ - if (TASK_SIZE - len < addr) - return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vm_start_gap(vma))) return addr; - addr = vmm->vm_end; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); } + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); } From dc140045c0cace809af872e3799e8fbe1b7d7f86 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 20 Aug 2019 12:47:24 +0800 Subject: [PATCH 07/14] csky: Fixup defer cache flush for 610 We use defer cache flush mechanism to improve the performance of 610, but the implementation is wrong. We fix it up now and update the mechanism: - Zero page needn't be flushed. - If page is file mapping & non-touched in user space, defer flush. - If page is anon mapping or dirty file mapping, flush immediately. - In update_mmu_cache finish the defer flush by flush_dcache_page(). For 610 we need take care the dcache aliasing issue: - VIPT cache with 8K-bytes size per way in 4K page granularity. Signed-off-by: Guo Ren Cc: Arnd Bergmann --- arch/csky/abiv1/cacheflush.c | 50 +++++++++++++++------------- arch/csky/abiv1/inc/abi/cacheflush.h | 4 +-- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c index 10af8b6fe322..fee99fc6612f 100644 --- a/arch/csky/abiv1/cacheflush.c +++ b/arch/csky/abiv1/cacheflush.c @@ -11,42 +11,46 @@ #include #include +#define PG_dcache_clean PG_arch_1 + void flush_dcache_page(struct page *page) { - struct address_space *mapping = page_mapping(page); - unsigned long addr; + struct address_space *mapping; - if (mapping && !mapping_mapped(mapping)) { - set_bit(PG_arch_1, &(page)->flags); + if (page == ZERO_PAGE(0)) return; + + mapping = page_mapping_file(page); + + if (mapping && !page_mapcount(page)) + clear_bit(PG_dcache_clean, &page->flags); + else { + dcache_wbinv_all(); + if (mapping) + icache_inv_all(); + set_bit(PG_dcache_clean, &page->flags); } - - /* - * We could delay the flush for the !page_mapping case too. But that - * case is for exec env/arg pages and those are %99 certainly going to - * get faulted into the tlb (and thus flushed) anyways. - */ - addr = (unsigned long) page_address(page); - dcache_wb_range(addr, addr + PAGE_SIZE); } +EXPORT_SYMBOL(flush_dcache_page); -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, - pte_t *pte) +void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) { - unsigned long addr; + unsigned long pfn = pte_pfn(*ptep); struct page *page; - unsigned long pfn; - pfn = pte_pfn(*pte); - if (unlikely(!pfn_valid(pfn))) + if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); - addr = (unsigned long) page_address(page); + if (page == ZERO_PAGE(0)) + return; - if (vma->vm_flags & VM_EXEC || - pages_do_alias(addr, address & PAGE_MASK)) - cache_wbinv_all(); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + dcache_wbinv_all(); - clear_bit(PG_arch_1, &(page)->flags); + if (page_mapping_file(page)) { + if (vma->vm_flags & VM_EXEC) + icache_inv_all(); + } } diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index 5f663aef9b1b..fce5604cef40 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -26,8 +26,8 @@ extern void flush_dcache_page(struct page *); #define flush_icache_page(vma, page) cache_wbinv_all() #define flush_icache_range(start, end) cache_wbinv_range(start, end) -#define flush_icache_user_range(vma, pg, adr, len) \ - cache_wbinv_range(adr, adr + len) +#define flush_icache_user_range(vma,page,addr,len) \ + flush_dcache_page(page) #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ do { \ From c7e6f0e99227b3dcdc5e62f789119e000887ff79 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 20 Aug 2019 20:15:44 +0800 Subject: [PATCH 08/14] csky: Support kernel non-aligned access We prohibit non-aligned access in kernel mode, but some special NIC driver needs to support kernel-state unaligned access. For example, when the bus does not support unaligned access, IP header parsing will cause non-aligned access and driver does not recopy the skb buffer to dma for performance reasons. Added kernel_enable & user_enable to control unaligned access and added kernel_count & user_count for statistical unaligned access. Signed-off-by: Guo Ren Cc: Arnd Bergmann --- arch/csky/abiv1/alignment.c | 62 +++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c index 27ef5b2c43ab..cb2a0d94a144 100644 --- a/arch/csky/abiv1/alignment.c +++ b/arch/csky/abiv1/alignment.c @@ -5,8 +5,10 @@ #include #include -static int align_enable = 1; -static int align_count; +static int align_kern_enable = 1; +static int align_usr_enable = 1; +static int align_kern_count = 0; +static int align_usr_count = 0; static inline uint32_t get_ptreg(struct pt_regs *regs, uint32_t rx) { @@ -32,9 +34,6 @@ static int ldb_asm(uint32_t addr, uint32_t *valp) uint32_t val; int err; - if (!access_ok((void *)addr, 1)) - return 1; - asm volatile ( "movi %0, 0\n" "1:\n" @@ -67,9 +66,6 @@ static int stb_asm(uint32_t addr, uint32_t val) { int err; - if (!access_ok((void *)addr, 1)) - return 1; - asm volatile ( "movi %0, 0\n" "1:\n" @@ -203,8 +199,6 @@ static int stw_c(struct pt_regs *regs, uint32_t rz, uint32_t addr) if (stb_asm(addr, byte3)) return 1; - align_count++; - return 0; } @@ -226,7 +220,14 @@ void csky_alignment(struct pt_regs *regs) uint32_t addr = 0; if (!user_mode(regs)) + goto kernel_area; + + if (!align_usr_enable) { + pr_err("%s user disabled.\n", __func__); goto bad_area; + } + + align_usr_count++; ret = get_user(tmp, (uint16_t *)instruction_pointer(regs)); if (ret) { @@ -234,6 +235,19 @@ void csky_alignment(struct pt_regs *regs) goto bad_area; } + goto good_area; + +kernel_area: + if (!align_kern_enable) { + pr_err("%s kernel disabled.\n", __func__); + goto bad_area; + } + + align_kern_count++; + + tmp = *(uint16_t *)instruction_pointer(regs); + +good_area: opcode = (uint32_t)tmp; rx = opcode & 0xf; @@ -286,18 +300,32 @@ void csky_alignment(struct pt_regs *regs) force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); } -static struct ctl_table alignment_tbl[4] = { +static struct ctl_table alignment_tbl[5] = { { - .procname = "enable", - .data = &align_enable, - .maxlen = sizeof(align_enable), + .procname = "kernel_enable", + .data = &align_kern_enable, + .maxlen = sizeof(align_kern_enable), .mode = 0666, .proc_handler = &proc_dointvec }, { - .procname = "count", - .data = &align_count, - .maxlen = sizeof(align_count), + .procname = "user_enable", + .data = &align_usr_enable, + .maxlen = sizeof(align_usr_enable), + .mode = 0666, + .proc_handler = &proc_dointvec + }, + { + .procname = "kernel_count", + .data = &align_kern_count, + .maxlen = sizeof(align_kern_count), + .mode = 0666, + .proc_handler = &proc_dointvec + }, + { + .procname = "user_count", + .data = &align_usr_count, + .maxlen = sizeof(align_usr_count), .mode = 0666, .proc_handler = &proc_dointvec }, From 4ad35c1f56386c8e7019c921bba1af109fde9693 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 21 Aug 2019 19:15:52 +0800 Subject: [PATCH 09/14] csky: Fixup 610 vipt cache flush mechanism 610 has vipt aliasing issue, so we need to finish the cache flush apis mentioned in cachetlb.rst to avoid data corruption. Here is the list of modified apis in the patch: - flush_kernel_dcache_page (new add) - flush_dcache_mmap_lock (new add) - flush_dcache_mmap_unlock (new add) - flush_kernel_vmap_range (new add) - invalidate_kernel_vmap_range (new add) - flush_anon_page (new add) - flush_cache_range (new add) - flush_cache_vmap (flush all) - flush_cache_vunmap (flush all) - flush_cache_mm (only dcache flush) - flush_icache_page (just nop) - copy_from_user_page (remove no need flush) - copy_to_user_page (remove no need flush) Change to V2: - Fixup compile error with xa_lock*(&mapping->i_pages) Signed-off-by: Guo Ren Cc: Arnd Bergmann Cc: Christoph Hellwig --- arch/csky/abiv1/cacheflush.c | 20 ++++++++++++++ arch/csky/abiv1/inc/abi/cacheflush.h | 41 ++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c index fee99fc6612f..9f1fe80cc847 100644 --- a/arch/csky/abiv1/cacheflush.c +++ b/arch/csky/abiv1/cacheflush.c @@ -54,3 +54,23 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, icache_inv_all(); } } + +void flush_kernel_dcache_page(struct page *page) +{ + struct address_space *mapping; + + mapping = page_mapping_file(page); + + if (!mapping || mapping_mapped(mapping)) + dcache_wbinv_all(); +} +EXPORT_SYMBOL(flush_kernel_dcache_page); + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + dcache_wbinv_all(); + + if (vma->vm_flags & VM_EXEC) + icache_inv_all(); +} diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index fce5604cef40..79ef9e8c1afd 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -4,26 +4,49 @@ #ifndef __ABI_CSKY_CACHEFLUSH_H #define __ABI_CSKY_CACHEFLUSH_H -#include +#include #include #include #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -#define flush_cache_mm(mm) cache_wbinv_all() +#define flush_cache_mm(mm) dcache_wbinv_all() #define flush_cache_page(vma, page, pfn) cache_wbinv_all() #define flush_cache_dup_mm(mm) cache_wbinv_all() +#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE +extern void flush_kernel_dcache_page(struct page *); + +#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) +#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) + +static inline void flush_kernel_vmap_range(void *addr, int size) +{ + dcache_wbinv_all(); +} +static inline void invalidate_kernel_vmap_range(void *addr, int size) +{ + dcache_wbinv_all(); +} + +#define ARCH_HAS_FLUSH_ANON_PAGE +static inline void flush_anon_page(struct vm_area_struct *vma, + struct page *page, unsigned long vmaddr) +{ + if (PageAnon(page)) + cache_wbinv_all(); +} + /* * if (current_mm != vma->mm) cache_wbinv_range(start, end) will be broken. * Use cache_wbinv_all() here and need to be improved in future. */ -#define flush_cache_range(vma, start, end) cache_wbinv_all() -#define flush_cache_vmap(start, end) cache_wbinv_range(start, end) -#define flush_cache_vunmap(start, end) cache_wbinv_range(start, end) +extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +#define flush_cache_vmap(start, end) cache_wbinv_all() +#define flush_cache_vunmap(start, end) cache_wbinv_all() -#define flush_icache_page(vma, page) cache_wbinv_all() +#define flush_icache_page(vma, page) do {} while (0); #define flush_icache_range(start, end) cache_wbinv_range(start, end) #define flush_icache_user_range(vma,page,addr,len) \ @@ -31,19 +54,13 @@ extern void flush_dcache_page(struct page *); #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ do { \ - cache_wbinv_all(); \ memcpy(dst, src, len); \ - cache_wbinv_all(); \ } while (0) #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ - cache_wbinv_all(); \ memcpy(dst, src, len); \ cache_wbinv_all(); \ } while (0) -#define flush_dcache_mmap_lock(mapping) do {} while (0) -#define flush_dcache_mmap_unlock(mapping) do {} while (0) - #endif /* __ABI_CSKY_CACHEFLUSH_H */ From fdbdcddc2c93096e9b956de930d2d710a1342502 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 28 Aug 2019 16:35:19 +0300 Subject: [PATCH 10/14] csky: Use generic free_initrd_mem() The csky implementation of free_initrd_mem() is an open-coded version of free_reserved_area() without poisoning. Remove it and make csky use the generic version of free_initrd_mem(). Signed-off-by: Mike Rapoport Signed-off-by: Guo Ren --- arch/csky/mm/init.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index eb0dc9e5065f..d4c2292ea46b 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -60,22 +60,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (start < end) - pr_info("Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); - - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages_inc(); - } -} -#endif - extern char __init_begin[], __init_end[]; void free_initmem(void) From 48ede51fd94fe9251058fc85626b2aeb5cbb5884 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 25 Sep 2019 19:56:16 +0800 Subject: [PATCH 11/14] csky: Fixup add zero_fp fixup perf backtrace panic We need set fp zero to let backtrace know the end. The patch fixup perf callchain panic problem, because backtrace didn't know what is the end of fp. Signed-off-by: Guo Ren Reported-by: Mao Han --- arch/csky/kernel/entry.S | 50 +++++++++++++++++++++++--------------- arch/csky/kernel/process.c | 2 +- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index a7e84ccccbd8..564dab2fabaa 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -17,6 +17,12 @@ #define PTE_INDX_SHIFT 10 #define _PGDIR_SHIFT 22 +.macro zero_fp +#ifdef CONFIG_STACKTRACE + movi r8, 0 +#endif +.endm + .macro tlbop_begin name, val0, val1, val2 ENTRY(csky_\name) mtcr a3, ss2 @@ -96,6 +102,7 @@ ENTRY(csky_\name) SAVE_ALL 0 .endm .macro tlbop_end is_write + zero_fp RD_MEH a2 psrset ee, ie mov a0, sp @@ -120,6 +127,7 @@ tlbop_end 1 ENTRY(csky_systemcall) SAVE_ALL TRAP0_SIZE + zero_fp psrset ee, ie @@ -136,9 +144,9 @@ ENTRY(csky_systemcall) mov r9, sp bmaski r10, THREAD_SHIFT andn r9, r10 - ldw r8, (r9, TINFO_FLAGS) - ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) - cmpnei r8, 0 + ldw r12, (r9, TINFO_FLAGS) + ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + cmpnei r12, 0 bt csky_syscall_trace #if defined(__CSKYABIV2__) subi sp, 8 @@ -180,7 +188,7 @@ csky_syscall_trace: ENTRY(ret_from_kernel_thread) jbsr schedule_tail - mov a0, r8 + mov a0, r10 jsr r9 jbsr ret_from_exception @@ -189,9 +197,9 @@ ENTRY(ret_from_fork) mov r9, sp bmaski r10, THREAD_SHIFT andn r9, r10 - ldw r8, (r9, TINFO_FLAGS) - ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) - cmpnei r8, 0 + ldw r12, (r9, TINFO_FLAGS) + ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + cmpnei r12, 0 bf ret_from_exception mov a0, sp /* sp = pt_regs pointer */ jbsr syscall_trace_exit @@ -209,9 +217,9 @@ ret_from_exception: bmaski r10, THREAD_SHIFT andn r9, r10 - ldw r8, (r9, TINFO_FLAGS) - andi r8, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) - cmpnei r8, 0 + ldw r12, (r9, TINFO_FLAGS) + andi r12, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) + cmpnei r12, 0 bt exit_work 1: RESTORE_ALL @@ -220,11 +228,11 @@ exit_work: lrw syscallid, ret_from_exception mov lr, syscallid - btsti r8, TIF_NEED_RESCHED + btsti r12, TIF_NEED_RESCHED bt work_resched mov a0, sp - mov a1, r8 + mov a1, r12 jmpi do_notify_resume work_resched: @@ -232,6 +240,7 @@ work_resched: ENTRY(csky_trap) SAVE_ALL 0 + zero_fp psrset ee mov a0, sp /* Push Stack pointer arg */ jbsr trap_c /* Call C-level trap handler */ @@ -265,6 +274,7 @@ ENTRY(csky_get_tls) ENTRY(csky_irq) SAVE_ALL 0 + zero_fp psrset ee #ifdef CONFIG_PREEMPT @@ -276,21 +286,21 @@ ENTRY(csky_irq) * Get task_struct->stack.preempt_count for current, * and increase 1. */ - ldw r8, (r9, TINFO_PREEMPT) - addi r8, 1 - stw r8, (r9, TINFO_PREEMPT) + ldw r12, (r9, TINFO_PREEMPT) + addi r12, 1 + stw r12, (r9, TINFO_PREEMPT) #endif mov a0, sp jbsr csky_do_IRQ #ifdef CONFIG_PREEMPT - subi r8, 1 - stw r8, (r9, TINFO_PREEMPT) - cmpnei r8, 0 + subi r12, 1 + stw r12, (r9, TINFO_PREEMPT) + cmpnei r12, 0 bt 2f - ldw r8, (r9, TINFO_FLAGS) - btsti r8, TIF_NEED_RESCHED + ldw r12, (r9, TINFO_FLAGS) + btsti r12, TIF_NEED_RESCHED bf 2f 1: jbsr preempt_schedule_irq /* irq en/disable is done inside */ diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index e555740c0be5..f320d9248a22 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -55,7 +55,7 @@ int copy_thread(unsigned long clone_flags, if (unlikely(p->flags & PF_KTHREAD)) { memset(childregs, 0, sizeof(struct pt_regs)); childstack->r15 = (unsigned long) ret_from_kernel_thread; - childstack->r8 = kthread_arg; + childstack->r10 = kthread_arg; childstack->r9 = usp; childregs->sr = mfcr("psr"); } else { From 3a09d8e2893b2403a043890e5832966e8640feaf Mon Sep 17 00:00:00 2001 From: Mao Han Date: Wed, 25 Sep 2019 17:23:02 +0800 Subject: [PATCH 12/14] csky: Fixup csky_pmu.max_period assignment The csky_pmu.max_period has type u64, and BIT() can only return 32 bits unsigned long on C-SKY. The initialization for max_period will be incorrect when count_width is bigger than 32. Use BIT_ULL() Signed-off-by: Mao Han Signed-off-by: Guo Ren --- arch/csky/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c index 4c1a1934d76a..7570109cddc6 100644 --- a/arch/csky/kernel/perf_event.c +++ b/arch/csky/kernel/perf_event.c @@ -1306,7 +1306,7 @@ int csky_pmu_device_probe(struct platform_device *pdev, &csky_pmu.count_width)) { csky_pmu.count_width = DEFAULT_COUNT_WIDTH; } - csky_pmu.max_period = BIT(csky_pmu.count_width) - 1; + csky_pmu.max_period = BIT_ULL(csky_pmu.count_width) - 1; csky_pmu.plat_device = pdev; From a2139d3b4fd7ef26a363a1b1eb6cd55be2c1bcd1 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 23 Sep 2019 15:36:14 +0100 Subject: [PATCH 13/14] csky: entry: Remove unneeded need_resched() loop Since the enabling and disabling of IRQs within preempt_schedule_irq() is contained in a need_resched() loop, we don't need the outer arch code loop. Signed-off-by: Valentin Schneider Signed-off-by: Guo Ren --- arch/csky/kernel/entry.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index 564dab2fabaa..a7a5b67df898 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -302,11 +302,7 @@ ENTRY(csky_irq) ldw r12, (r9, TINFO_FLAGS) btsti r12, TIF_NEED_RESCHED bf 2f -1: jbsr preempt_schedule_irq /* irq en/disable is done inside */ - ldw r7, (r9, TINFO_FLAGS) /* get new tasks TI_FLAGS */ - btsti r7, TIF_NEED_RESCHED - bt 1b /* go again */ #endif 2: jmpi ret_from_exception From 9af032a30172e119a5935f802b066631f8ded2d6 Mon Sep 17 00:00:00 2001 From: Krzysztof Wilczynski Date: Tue, 3 Sep 2019 13:36:51 +0200 Subject: [PATCH 14/14] csky: Move static keyword to the front of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the static keyword to the front of declaration of csky_pmu_of_device_ids, and resolve the following compiler warning that can be seen when building with warnings enabled (W=1): arch/csky/kernel/perf_event.c:1340:1: warning: ‘static’ is not at beginning of declaration [-Wold-style-declaration] Signed-off-by: Krzysztof Wilczynski Signed-off-by: Guo Ren --- arch/csky/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c index 7570109cddc6..1a29f1157449 100644 --- a/arch/csky/kernel/perf_event.c +++ b/arch/csky/kernel/perf_event.c @@ -1337,7 +1337,7 @@ int csky_pmu_device_probe(struct platform_device *pdev, return ret; } -const static struct of_device_id csky_pmu_of_device_ids[] = { +static const struct of_device_id csky_pmu_of_device_ids[] = { {.compatible = "csky,csky-pmu"}, {}, };