ARM: decompressor: switch to by-VA cache maintenance for v7 cores

Update the v7 cache_clean_flush routine to take into account the
memory range passed in r0/r1, and perform cache maintenance by
virtual address on this range instead of set/way maintenance, which
is inappropriate for the purpose of maintaining the cached state of
memory contents.

Since this removes any use of the stack in the implementation of
cache_clean_flush(), we can also drop some code that manages the
value of the stack pointer before calling it.

Tested-by: Tony Lindgren <tony@atomide.com>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
This commit is contained in:
Ard Biesheuvel 2020-02-18 17:06:14 +01:00
parent e114412f61
commit 401b368caa
1 changed files with 30 additions and 53 deletions

View File

@ -530,11 +530,6 @@ dtb_check_done:
mov r0, r9 @ start of relocated zImage
add r1, sp, r6 @ end of relocated zImage
#ifndef CONFIG_ZBOOT_ROM
/* cache_clean_flush may use the stack, so relocate it */
add sp, sp, r6
#endif
bl cache_clean_flush
badr r0, restart
@ -683,6 +678,24 @@ params: ldr r0, =0x10000100 @ params_phys for RPC
.align
#endif
/*
* dcache_line_size - get the minimum D-cache line size from the CTR register
* on ARMv7.
*/
.macro dcache_line_size, reg, tmp
#ifdef CONFIG_CPU_V7M
movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
ldr \tmp, [\tmp]
#else
mrc p15, 0, \tmp, c0, c0, 1 @ read ctr
#endif
lsr \tmp, \tmp, #16
and \tmp, \tmp, #0xf @ cache line size encoding
mov \reg, #4 @ bytes per word
mov \reg, \reg, lsl \tmp @ actual cache line size
.endm
/*
* Turn on the cache. We need to setup some page tables so that we
* can have both the I and D caches on.
@ -1175,8 +1188,6 @@ __armv7_mmu_cache_off:
bic r0, r0, #0x000c
#endif
mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
mov r12, lr
bl __armv7_mmu_cache_flush
mov r0, #0
#ifdef CONFIG_MMU
mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB
@ -1184,7 +1195,7 @@ __armv7_mmu_cache_off:
mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC
mcr p15, 0, r0, c7, c10, 4 @ DSB
mcr p15, 0, r0, c7, c5, 4 @ ISB
mov pc, r12
mov pc, lr
/*
* Clean and flush the cache to maintain consistency.
@ -1200,6 +1211,7 @@ __armv7_mmu_cache_off:
.align 5
cache_clean_flush:
mov r3, #16
mov r11, r1
b call_cache_fn
__armv4_mpu_cache_flush:
@ -1250,51 +1262,16 @@ __armv7_mmu_cache_flush:
mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D
b iflush
hierarchical:
mcr p15, 0, r10, c7, c10, 5 @ DMB
stmfd sp!, {r0-r7, r9-r11}
mrc p15, 1, r0, c0, c0, 1 @ read clidr
ands r3, r0, #0x7000000 @ extract loc from clidr
mov r3, r3, lsr #23 @ left align loc bit field
beq finished @ if loc is 0, then no need to clean
mov r10, #0 @ start clean at cache level 0
loop1:
add r2, r10, r10, lsr #1 @ work out 3x current cache level
mov r1, r0, lsr r2 @ extract cache type bits from clidr
and r1, r1, #7 @ mask of the bits for current cache only
cmp r1, #2 @ see what cache we have at this level
blt skip @ skip if no cache, or just i-cache
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
mcr p15, 0, r10, c7, c5, 4 @ isb to sych the new cssr&csidr
mrc p15, 1, r1, c0, c0, 0 @ read the new csidr
and r2, r1, #7 @ extract the length of the cache lines
add r2, r2, #4 @ add 4 (line length offset)
ldr r4, =0x3ff
ands r4, r4, r1, lsr #3 @ find maximum number on the way size
clz r5, r4 @ find bit position of way size increment
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 @ extract max number of the index size
loop2:
mov r9, r4 @ create working copy of max way size
loop3:
ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11
ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11
THUMB( lsl r6, r9, r5 )
THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
THUMB( lsl r6, r7, r2 )
THUMB( orr r11, r11, r6 ) @ factor index number into r11
mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
subs r9, r9, #1 @ decrement the way
bge loop3
subs r7, r7, #1 @ decrement the index
bge loop2
skip:
add r10, r10, #2 @ increment cache number
cmp r3, r10
bgt loop1
finished:
ldmfd sp!, {r0-r7, r9-r11}
mov r10, #0 @ switch back to cache level 0
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
dcache_line_size r1, r2 @ r1 := dcache min line size
sub r2, r1, #1 @ r2 := line size mask
bic r0, r0, r2 @ round down start to line size
sub r11, r11, #1 @ end address is exclusive
bic r11, r11, r2 @ round down end to line size
0: cmp r0, r11 @ finished?
bgt iflush
mcr p15, 0, r0, c7, c14, 1 @ Dcache clean/invalidate by VA
add r0, r0, r1
b 0b
iflush:
mcr p15, 0, r10, c7, c10, 4 @ DSB
mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB