mirror of https://gitee.com/openkylin/linux.git
parisc: Optimze cache flush algorithms
The attached patch implements three optimizations: 1) Loops in flush_user_dcache_range_asm, flush_kernel_dcache_range_asm, purge_kernel_dcache_range_asm, flush_user_icache_range_asm, and flush_kernel_icache_range_asm are unrolled to reduce branch overhead. 2) The static branch prediction for cmpb instructions in pacache.S have been reviewed and the operand order adjusted where necessary. 3) For flush routines in cache.c, we purge rather flush when we have no context. The pdc instruction at level 0 is not required to write back dirty lines to memory. This provides a performance improvement over the fdc instruction if the feature is implemented. Version 2 adds alternative patching. The patch provides an average improvement of about 2%. Signed-off-by: John David Anglin <dave.anglin@bell.net> Signed-off-by: Helge Deller <deller@gmx.de>
This commit is contained in:
parent
5a23237f14
commit
4c5fe5db1a
|
@ -36,6 +36,7 @@ EXPORT_SYMBOL(dcache_stride);
|
|||
|
||||
void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
|
||||
EXPORT_SYMBOL(flush_dcache_page_asm);
|
||||
void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
|
||||
void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
|
||||
|
||||
|
||||
|
@ -303,6 +304,17 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
|
|||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void
|
||||
__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
|
||||
unsigned long physaddr)
|
||||
{
|
||||
preempt_disable();
|
||||
purge_dcache_page_asm(physaddr, vmaddr);
|
||||
if (vma->vm_flags & VM_EXEC)
|
||||
flush_icache_page_asm(physaddr, vmaddr);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void flush_dcache_page(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page_mapping_file(page);
|
||||
|
@ -563,9 +575,12 @@ void flush_cache_mm(struct mm_struct *mm)
|
|||
pfn = pte_pfn(*ptep);
|
||||
if (!pfn_valid(pfn))
|
||||
continue;
|
||||
if (unlikely(mm->context))
|
||||
if (unlikely(mm->context)) {
|
||||
flush_tlb_page(vma, addr);
|
||||
__flush_cache_page(vma, addr, PFN_PHYS(pfn));
|
||||
__flush_cache_page(vma, addr, PFN_PHYS(pfn));
|
||||
} else {
|
||||
__purge_cache_page(vma, addr, PFN_PHYS(pfn));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -600,9 +615,12 @@ void flush_cache_range(struct vm_area_struct *vma,
|
|||
continue;
|
||||
pfn = pte_pfn(*ptep);
|
||||
if (pfn_valid(pfn)) {
|
||||
if (unlikely(vma->vm_mm->context))
|
||||
if (unlikely(vma->vm_mm->context)) {
|
||||
flush_tlb_page(vma, addr);
|
||||
__flush_cache_page(vma, addr, PFN_PHYS(pfn));
|
||||
__flush_cache_page(vma, addr, PFN_PHYS(pfn));
|
||||
} else {
|
||||
__purge_cache_page(vma, addr, PFN_PHYS(pfn));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -611,9 +629,12 @@ void
|
|||
flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
|
||||
{
|
||||
if (pfn_valid(pfn)) {
|
||||
if (likely(vma->vm_mm->context))
|
||||
if (likely(vma->vm_mm->context)) {
|
||||
flush_tlb_page(vma, vmaddr);
|
||||
__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
|
||||
__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
|
||||
} else {
|
||||
__purge_cache_page(vma, vmaddr, PFN_PHYS(pfn));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -838,8 +838,7 @@ ENTRY_CFI(flush_dcache_page_asm)
|
|||
add %r28, %r25, %r25
|
||||
sub %r25, r31, %r25
|
||||
|
||||
|
||||
1: fdc,m r31(%r28)
|
||||
1: fdc,m r31(%r28)
|
||||
fdc,m r31(%r28)
|
||||
fdc,m r31(%r28)
|
||||
fdc,m r31(%r28)
|
||||
|
@ -854,7 +853,7 @@ ENTRY_CFI(flush_dcache_page_asm)
|
|||
fdc,m r31(%r28)
|
||||
fdc,m r31(%r28)
|
||||
fdc,m r31(%r28)
|
||||
cmpb,COND(<<) %r28, %r25,1b
|
||||
cmpb,COND(>>) %r25, %r28, 1b /* predict taken */
|
||||
fdc,m r31(%r28)
|
||||
|
||||
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
|
||||
|
@ -863,6 +862,67 @@ ENTRY_CFI(flush_dcache_page_asm)
|
|||
nop
|
||||
ENDPROC_CFI(flush_dcache_page_asm)
|
||||
|
||||
ENTRY_CFI(purge_dcache_page_asm)
|
||||
ldil L%(TMPALIAS_MAP_START), %r28
|
||||
#ifdef CONFIG_64BIT
|
||||
#if (TMPALIAS_MAP_START >= 0x80000000)
|
||||
depdi 0, 31,32, %r28 /* clear any sign extension */
|
||||
#endif
|
||||
convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */
|
||||
depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */
|
||||
depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */
|
||||
#else
|
||||
extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */
|
||||
depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */
|
||||
depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */
|
||||
#endif
|
||||
|
||||
/* Purge any old translation */
|
||||
|
||||
#ifdef CONFIG_PA20
|
||||
pdtlb,l %r0(%r28)
|
||||
#else
|
||||
tlb_lock %r20,%r21,%r22
|
||||
0: pdtlb %r0(%r28)
|
||||
tlb_unlock %r20,%r21,%r22
|
||||
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
|
||||
#endif
|
||||
|
||||
88: ldil L%dcache_stride, %r1
|
||||
ldw R%dcache_stride(%r1), r31
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
depdi,z 1, 63-PAGE_SHIFT,1, %r25
|
||||
#else
|
||||
depwi,z 1, 31-PAGE_SHIFT,1, %r25
|
||||
#endif
|
||||
add %r28, %r25, %r25
|
||||
sub %r25, r31, %r25
|
||||
|
||||
1: pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
pdc,m r31(%r28)
|
||||
cmpb,COND(>>) %r25, %r28, 1b /* predict taken */
|
||||
pdc,m r31(%r28)
|
||||
|
||||
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
|
||||
sync
|
||||
bv %r0(%r2)
|
||||
nop
|
||||
ENDPROC_CFI(purge_dcache_page_asm)
|
||||
|
||||
ENTRY_CFI(flush_icache_page_asm)
|
||||
ldil L%(TMPALIAS_MAP_START), %r28
|
||||
#ifdef CONFIG_64BIT
|
||||
|
@ -908,7 +968,6 @@ ENTRY_CFI(flush_icache_page_asm)
|
|||
add %r28, %r25, %r25
|
||||
sub %r25, %r31, %r25
|
||||
|
||||
|
||||
/* fic only has the type 26 form on PA1.1, requiring an
|
||||
* explicit space specification, so use %sr4 */
|
||||
1: fic,m %r31(%sr4,%r28)
|
||||
|
@ -926,7 +985,7 @@ ENTRY_CFI(flush_icache_page_asm)
|
|||
fic,m %r31(%sr4,%r28)
|
||||
fic,m %r31(%sr4,%r28)
|
||||
fic,m %r31(%sr4,%r28)
|
||||
cmpb,COND(<<) %r28, %r25,1b
|
||||
cmpb,COND(>>) %r25, %r28, 1b /* predict taken */
|
||||
fic,m %r31(%sr4,%r28)
|
||||
|
||||
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
|
||||
|
@ -947,7 +1006,6 @@ ENTRY_CFI(flush_kernel_dcache_page_asm)
|
|||
add %r26, %r25, %r25
|
||||
sub %r25, %r23, %r25
|
||||
|
||||
|
||||
1: fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
|
@ -963,7 +1021,7 @@ ENTRY_CFI(flush_kernel_dcache_page_asm)
|
|||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
cmpb,COND(<<) %r26, %r25,1b
|
||||
cmpb,COND(>>) %r25, %r26, 1b /* predict taken */
|
||||
fdc,m %r23(%r26)
|
||||
|
||||
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
|
||||
|
@ -999,7 +1057,7 @@ ENTRY_CFI(purge_kernel_dcache_page_asm)
|
|||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
cmpb,COND(<<) %r26, %r25, 1b
|
||||
cmpb,COND(>>) %r25, %r26, 1b /* predict taken */
|
||||
pdc,m %r23(%r26)
|
||||
|
||||
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
|
||||
|
@ -1014,7 +1072,33 @@ ENTRY_CFI(flush_user_dcache_range_asm)
|
|||
ldo -1(%r23), %r21
|
||||
ANDCM %r26, %r21, %r26
|
||||
|
||||
1: cmpb,COND(<<),n %r26, %r25, 1b
|
||||
#ifdef CONFIG_64BIT
|
||||
depd,z %r23, 59, 60, %r21
|
||||
#else
|
||||
depw,z %r23, 27, 28, %r21
|
||||
#endif
|
||||
add %r26, %r21, %r22
|
||||
cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */
|
||||
1: add %r22, %r21, %r22
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
|
||||
2: cmpb,COND(>>),n %r25, %r26, 2b
|
||||
fdc,m %r23(%sr3, %r26)
|
||||
|
||||
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
|
||||
|
@ -1029,7 +1113,33 @@ ENTRY_CFI(flush_kernel_dcache_range_asm)
|
|||
ldo -1(%r23), %r21
|
||||
ANDCM %r26, %r21, %r26
|
||||
|
||||
1: cmpb,COND(<<),n %r26, %r25,1b
|
||||
#ifdef CONFIG_64BIT
|
||||
depd,z %r23, 59, 60, %r21
|
||||
#else
|
||||
depw,z %r23, 27, 28, %r21
|
||||
#endif
|
||||
add %r26, %r21, %r22
|
||||
cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */
|
||||
1: add %r22, %r21, %r22
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
fdc,m %r23(%r26)
|
||||
cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */
|
||||
fdc,m %r23(%r26)
|
||||
|
||||
2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */
|
||||
fdc,m %r23(%r26)
|
||||
|
||||
sync
|
||||
|
@ -1045,7 +1155,33 @@ ENTRY_CFI(purge_kernel_dcache_range_asm)
|
|||
ldo -1(%r23), %r21
|
||||
ANDCM %r26, %r21, %r26
|
||||
|
||||
1: cmpb,COND(<<),n %r26, %r25,1b
|
||||
#ifdef CONFIG_64BIT
|
||||
depd,z %r23, 59, 60, %r21
|
||||
#else
|
||||
depw,z %r23, 27, 28, %r21
|
||||
#endif
|
||||
add %r26, %r21, %r22
|
||||
cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */
|
||||
1: add %r22, %r21, %r22
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
pdc,m %r23(%r26)
|
||||
cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */
|
||||
pdc,m %r23(%r26)
|
||||
|
||||
2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */
|
||||
pdc,m %r23(%r26)
|
||||
|
||||
sync
|
||||
|
@ -1061,7 +1197,33 @@ ENTRY_CFI(flush_user_icache_range_asm)
|
|||
ldo -1(%r23), %r21
|
||||
ANDCM %r26, %r21, %r26
|
||||
|
||||
1: cmpb,COND(<<),n %r26, %r25,1b
|
||||
#ifdef CONFIG_64BIT
|
||||
depd,z %r23, 59, 60, %r21
|
||||
#else
|
||||
depw,z %r23, 27, 28, %r21
|
||||
#endif
|
||||
add %r26, %r21, %r22
|
||||
cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */
|
||||
1: add %r22, %r21, %r22
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
fic,m %r23(%sr3, %r26)
|
||||
cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */
|
||||
fic,m %r23(%sr3, %r26)
|
||||
|
||||
2: cmpb,COND(>>),n %r25, %r26, 2b
|
||||
fic,m %r23(%sr3, %r26)
|
||||
|
||||
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
|
||||
|
@ -1098,7 +1260,7 @@ ENTRY_CFI(flush_kernel_icache_page)
|
|||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
cmpb,COND(<<) %r26, %r25, 1b
|
||||
cmpb,COND(>>) %r25, %r26, 1b /* predict taken */
|
||||
fic,m %r23(%sr4, %r26)
|
||||
|
||||
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
|
||||
|
@ -1113,7 +1275,33 @@ ENTRY_CFI(flush_kernel_icache_range_asm)
|
|||
ldo -1(%r23), %r21
|
||||
ANDCM %r26, %r21, %r26
|
||||
|
||||
1: cmpb,COND(<<),n %r26, %r25, 1b
|
||||
#ifdef CONFIG_64BIT
|
||||
depd,z %r23, 59, 60, %r21
|
||||
#else
|
||||
depw,z %r23, 27, 28, %r21
|
||||
#endif
|
||||
add %r26, %r21, %r22
|
||||
cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */
|
||||
1: add %r22, %r21, %r22
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
fic,m %r23(%sr4, %r26)
|
||||
cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */
|
||||
fic,m %r23(%sr4, %r26)
|
||||
|
||||
2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */
|
||||
fic,m %r23(%sr4, %r26)
|
||||
|
||||
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
|
||||
|
|
Loading…
Reference in New Issue