Commit 6c386e58 authored by Nicolas Pitre's avatar Nicolas Pitre Committed by Lennert Buytenhek

[ARM] Feroceon: speed up flushing of the entire cache

Flushing the L1 D cache with a test/clean/invalidate loop is very
easy in software, but it is not the quickest way of doing it, as
there is a lot of overhead involved in re-scanning the cache from
the beginning every time we hit a dirty line.

This patch makes proc-feroceon.S use "clean+invalidate by set/way"
loops according to possible cache configuration of Feroceon CPUs
(either direct-mapped or 4-way set associative).
Signed-off-by: default avatarNicolas Pitre <nico@marvell.com>
Signed-off-by: default avatarLennert Buytenhek <buytenh@marvell.com>
parent 79e90dd5
...@@ -44,11 +44,31 @@ ...@@ -44,11 +44,31 @@
*/ */
#define CACHE_DLINESIZE 32 #define CACHE_DLINESIZE 32
.bss
.align 3
__cache_params_loc:
.space 8
.text .text
__cache_params:
.word __cache_params_loc
/* /*
* cpu_feroceon_proc_init() * cpu_feroceon_proc_init()
*/ */
ENTRY(cpu_feroceon_proc_init) ENTRY(cpu_feroceon_proc_init)
mrc p15, 0, r0, c0, c0, 1 @ read cache type register
ldr r1, __cache_params
mov r2, #(16 << 5)
tst r0, #(1 << 16) @ get way
mov r0, r0, lsr #18 @ get cache size order
movne r3, #((4 - 1) << 30) @ 4-way
and r0, r0, #0xf
moveq r3, #0 @ 1-way
mov r2, r2, lsl r0 @ actual cache size
movne r2, r2, lsr #2 @ turned into # of sets
sub r2, r2, #(1 << 5)
stmia r1, {r2, r3}
mov pc, lr mov pc, lr
/* /*
...@@ -117,11 +137,19 @@ ENTRY(feroceon_flush_user_cache_all) ...@@ -117,11 +137,19 @@ ENTRY(feroceon_flush_user_cache_all)
*/ */
ENTRY(feroceon_flush_kern_cache_all) ENTRY(feroceon_flush_kern_cache_all)
mov r2, #VM_EXEC mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache: __flush_whole_cache:
1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate ldr r1, __cache_params
bne 1b ldmia r1, {r1, r3}
1: orr ip, r1, r3
2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way
subs ip, ip, #(1 << 30) @ next way
bcs 2b
subs r1, r1, #(1 << 5) @ next set
bcs 1b
tst r2, #VM_EXEC tst r2, #VM_EXEC
mov ip, #0
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB mcrne p15, 0, ip, c7, c10, 4 @ drain WB
mov pc, lr mov pc, lr
...@@ -138,7 +166,6 @@ __flush_whole_cache: ...@@ -138,7 +166,6 @@ __flush_whole_cache:
*/ */
.align 5 .align 5
ENTRY(feroceon_flush_user_cache_range) ENTRY(feroceon_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT cmp r3, #CACHE_DLIMIT
bgt __flush_whole_cache bgt __flush_whole_cache
...@@ -152,6 +179,7 @@ ENTRY(feroceon_flush_user_cache_range) ...@@ -152,6 +179,7 @@ ENTRY(feroceon_flush_user_cache_range)
cmp r0, r1 cmp r0, r1
blo 1b blo 1b
tst r2, #VM_EXEC tst r2, #VM_EXEC
mov ip, #0
mcrne p15, 0, ip, c7, c10, 4 @ drain WB mcrne p15, 0, ip, c7, c10, 4 @ drain WB
mov pc, lr mov pc, lr
...@@ -306,16 +334,25 @@ ENTRY(cpu_feroceon_dcache_clean_area) ...@@ -306,16 +334,25 @@ ENTRY(cpu_feroceon_dcache_clean_area)
.align 5 .align 5
ENTRY(cpu_feroceon_switch_mm) ENTRY(cpu_feroceon_switch_mm)
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
mov ip, #0 /*
@ && 'Clean & Invalidate whole DCache' * Note: we wish to call __flush_whole_cache but we need to preserve
1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate * lr to do so. The only way without touching main memory is to
bne 1b * use r2 which is normally used to test the VM_EXEC flag, and
mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache * compensate locally for the skipped ops if it is not set.
mcr p15, 0, ip, c7, c10, 4 @ drain WB */
mov r2, lr @ abuse r2 to preserve lr
bl __flush_whole_cache
@ if r2 contains the VM_EXEC bit then the next 2 ops are done already
tst r2, #VM_EXEC
mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcreq p15, 0, ip, c7, c10, 4 @ drain WB
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif mov pc, r2
#else
mov pc, lr mov pc, lr
#endif
/* /*
* cpu_feroceon_set_pte_ext(ptep, pte, ext) * cpu_feroceon_set_pte_ext(ptep, pte, ext)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment