Commit 2de8b4cc authored by John David Anglin's avatar John David Anglin Committed by Helge Deller

parisc: Rewrite cache flush code for PA8800/PA8900

Originally, I was convinced that we needed to use tmpalias flushes
everwhere, for both user and kernel flushes. However, when I modified
flush_kernel_dcache_page_addr, to use a tmpalias flush, my c8000
would crash quite early when booting.

The PDC returns alias values of 0 for the icache and dcache. This
indicates that either the alias boundary is greater than 16MB or
equivalent aliasing doesn't work. I modified the tmpalias code to
make it easy to try alternate boundaries. I tried boundaries up to
128MB but still kernel tmpalias flushes didn't work on c8000.

This led me to conclude that tmpalias flushes don't work on PA8800
and PA8900 machines, and that we needed to flush directly using the
virtual address of user and kernel pages. This is likely the major
cause of instability on the c8000 and rp34xx machines.

Flushing user pages requires doing a temporary context switch as we
have to flush pages that don't belong to the current context. Further,
we have to deal with pages that aren't present. If a page isn't
present, the flush instructions fault on every line.

Other code has been rearranged and simplified based on testing. For
example, I introduced a flush_cache_dup_mm routine. flush_cache_mm
and flush_cache_dup_mm differ in that flush_cache_mm calls
purge_cache_pages and flush_cache_dup_mm calls flush_cache_pages.
In some implementations, pdc is more efficient than fdc. Based on
my testing, I don't believe there's any performance benefit on the
c8000.
Signed-off-by: default avatarJohn David Anglin <dave.anglin@bell.net>
Signed-off-by: default avatarHelge Deller <deller@gmx.de>
parent 67c35a3b
...@@ -59,20 +59,12 @@ void flush_dcache_page(struct page *page); ...@@ -59,20 +59,12 @@ void flush_dcache_page(struct page *page);
flush_kernel_icache_range_asm(s,e); \ flush_kernel_icache_range_asm(s,e); \
} while (0) } while (0)
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
do { \ unsigned long user_vaddr, void *dst, void *src, int len);
flush_cache_page(vma, vaddr, page_to_pfn(page)); \ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
memcpy(dst, src, len); \ unsigned long user_vaddr, void *dst, void *src, int len);
flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); \ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
} while (0) unsigned long pfn);
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
do { \
flush_cache_page(vma, vaddr, page_to_pfn(page)); \
memcpy(dst, src, len); \
} while (0)
void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn);
void flush_cache_range(struct vm_area_struct *vma, void flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end); unsigned long start, unsigned long end);
...@@ -80,16 +72,7 @@ void flush_cache_range(struct vm_area_struct *vma, ...@@ -80,16 +72,7 @@ void flush_cache_range(struct vm_area_struct *vma,
void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
#define ARCH_HAS_FLUSH_ANON_PAGE #define ARCH_HAS_FLUSH_ANON_PAGE
static inline void void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
{
if (PageAnon(page)) {
flush_tlb_page(vma, vmaddr);
preempt_disable();
flush_dcache_page_asm(page_to_phys(page), vmaddr);
preempt_enable();
}
}
#define ARCH_HAS_FLUSH_ON_KUNMAP #define ARCH_HAS_FLUSH_ON_KUNMAP
static inline void kunmap_flush_on_unmap(void *addr) static inline void kunmap_flush_on_unmap(void *addr)
......
...@@ -26,12 +26,14 @@ ...@@ -26,12 +26,14 @@
#define copy_page(to, from) copy_page_asm((void *)(to), (void *)(from)) #define copy_page(to, from) copy_page_asm((void *)(to), (void *)(from))
struct page; struct page;
struct vm_area_struct;
void clear_page_asm(void *page); void clear_page_asm(void *page);
void copy_page_asm(void *to, void *from); void copy_page_asm(void *to, void *from);
#define clear_user_page(vto, vaddr, page) clear_page_asm(vto) #define clear_user_page(vto, vaddr, page) clear_page_asm(vto)
void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr,
struct page *pg); struct vm_area_struct *vma);
#define __HAVE_ARCH_COPY_USER_HIGHPAGE
/* /*
* These are used to make use of C type-checking.. * These are used to make use of C type-checking..
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/shmparam.h> #include <asm/shmparam.h>
#include <asm/mmu_context.h>
int split_tlb __ro_after_init; int split_tlb __ro_after_init;
int dcache_stride __ro_after_init; int dcache_stride __ro_after_init;
...@@ -91,7 +92,7 @@ static inline void flush_data_cache(void) ...@@ -91,7 +92,7 @@ static inline void flush_data_cache(void)
} }
/* Virtual address of pfn. */ /* Kernel virtual address of pfn. */
#define pfn_va(pfn) __va(PFN_PHYS(pfn)) #define pfn_va(pfn) __va(PFN_PHYS(pfn))
void void
...@@ -124,11 +125,13 @@ show_cache_info(struct seq_file *m) ...@@ -124,11 +125,13 @@ show_cache_info(struct seq_file *m)
cache_info.ic_size/1024 ); cache_info.ic_size/1024 );
if (cache_info.dc_loop != 1) if (cache_info.dc_loop != 1)
snprintf(buf, 32, "%lu-way associative", cache_info.dc_loop); snprintf(buf, 32, "%lu-way associative", cache_info.dc_loop);
seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s)\n", seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s, alias=%d)\n",
cache_info.dc_size/1024, cache_info.dc_size/1024,
(cache_info.dc_conf.cc_wt ? "WT":"WB"), (cache_info.dc_conf.cc_wt ? "WT":"WB"),
(cache_info.dc_conf.cc_sh ? ", shared I/D":""), (cache_info.dc_conf.cc_sh ? ", shared I/D":""),
((cache_info.dc_loop == 1) ? "direct mapped" : buf)); ((cache_info.dc_loop == 1) ? "direct mapped" : buf),
cache_info.dc_conf.cc_alias
);
seq_printf(m, "ITLB entries\t: %ld\n" "DTLB entries\t: %ld%s\n", seq_printf(m, "ITLB entries\t: %ld\n" "DTLB entries\t: %ld%s\n",
cache_info.it_size, cache_info.it_size,
cache_info.dt_size, cache_info.dt_size,
...@@ -324,25 +327,81 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, ...@@ -324,25 +327,81 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
preempt_enable(); preempt_enable();
} }
static inline void static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr)
__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
unsigned long physaddr)
{ {
if (!static_branch_likely(&parisc_has_cache)) unsigned long flags, space, pgd, prot;
return; #ifdef CONFIG_TLB_PTLOCK
unsigned long pgd_lock;
#endif
vmaddr &= PAGE_MASK;
preempt_disable(); preempt_disable();
purge_dcache_page_asm(physaddr, vmaddr);
/* Set context for flush */
local_irq_save(flags);
prot = mfctl(8);
space = mfsp(SR_USER);
pgd = mfctl(25);
#ifdef CONFIG_TLB_PTLOCK
pgd_lock = mfctl(28);
#endif
switch_mm_irqs_off(NULL, vma->vm_mm, NULL);
local_irq_restore(flags);
flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
if (vma->vm_flags & VM_EXEC) if (vma->vm_flags & VM_EXEC)
flush_icache_page_asm(physaddr, vmaddr); flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
flush_tlb_page(vma, vmaddr);
/* Restore previous context */
local_irq_save(flags);
#ifdef CONFIG_TLB_PTLOCK
mtctl(pgd_lock, 28);
#endif
mtctl(pgd, 25);
mtsp(space, SR_USER);
mtctl(prot, 8);
local_irq_restore(flags);
preempt_enable(); preempt_enable();
} }
static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr)
{
pte_t *ptep = NULL;
pgd_t *pgd = mm->pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
if (!pgd_none(*pgd)) {
p4d = p4d_offset(pgd, addr);
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, addr);
if (!pud_none(*pud)) {
pmd = pmd_offset(pud, addr);
if (!pmd_none(*pmd))
ptep = pte_offset_map(pmd, addr);
}
}
}
return ptep;
}
static inline bool pte_needs_flush(pte_t pte)
{
return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_NO_CACHE))
== (_PAGE_PRESENT | _PAGE_ACCESSED);
}
void flush_dcache_page(struct page *page) void flush_dcache_page(struct page *page)
{ {
struct address_space *mapping = page_mapping_file(page); struct address_space *mapping = page_mapping_file(page);
struct vm_area_struct *mpnt; struct vm_area_struct *mpnt;
unsigned long offset; unsigned long offset;
unsigned long addr, old_addr = 0; unsigned long addr, old_addr = 0;
unsigned long count = 0;
pgoff_t pgoff; pgoff_t pgoff;
if (mapping && !mapping_mapped(mapping)) { if (mapping && !mapping_mapped(mapping)) {
...@@ -357,33 +416,52 @@ void flush_dcache_page(struct page *page) ...@@ -357,33 +416,52 @@ void flush_dcache_page(struct page *page)
pgoff = page->index; pgoff = page->index;
/* We have carefully arranged in arch_get_unmapped_area() that /*
* We have carefully arranged in arch_get_unmapped_area() that
* *any* mappings of a file are always congruently mapped (whether * *any* mappings of a file are always congruently mapped (whether
* declared as MAP_PRIVATE or MAP_SHARED), so we only need * declared as MAP_PRIVATE or MAP_SHARED), so we only need
* to flush one address here for them all to become coherent */ * to flush one address here for them all to become coherent
* on machines that support equivalent aliasing
*/
flush_dcache_mmap_lock(mapping); flush_dcache_mmap_lock(mapping);
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset; addr = mpnt->vm_start + offset;
if (parisc_requires_coherency()) {
pte_t *ptep;
/* The TLB is the engine of coherence on parisc: The ptep = get_ptep(mpnt->vm_mm, addr);
* CPU is entitled to speculate any page with a TLB if (ptep && pte_needs_flush(*ptep))
* mapping, so here we kill the mapping then flush the flush_user_cache_page(mpnt, addr);
* page along a special flush only alias mapping. } else {
* This guarantees that the page is no-longer in the /*
* cache for any process and nor may it be * The TLB is the engine of coherence on parisc:
* speculatively read in (until the user or kernel * The CPU is entitled to speculate any page
* specifically accesses it, of course) */ * with a TLB mapping, so here we kill the
* mapping then flush the page along a special
flush_tlb_page(mpnt, addr); * flush only alias mapping. This guarantees that
if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) * the page is no-longer in the cache for any
!= (addr & (SHM_COLOUR - 1))) { * process and nor may it be speculatively read
__flush_cache_page(mpnt, addr, page_to_phys(page)); * in (until the user or kernel specifically
if (parisc_requires_coherency() && old_addr) * accesses it, of course)
printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", old_addr, addr, mpnt->vm_file); */
old_addr = addr; flush_tlb_page(mpnt, addr);
if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
!= (addr & (SHM_COLOUR - 1))) {
__flush_cache_page(mpnt, addr, page_to_phys(page));
/*
* Software is allowed to have any number
* of private mappings to a page.
*/
if (!(mpnt->vm_flags & VM_SHARED))
continue;
if (old_addr)
pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n",
old_addr, addr, mpnt->vm_file);
old_addr = addr;
}
} }
WARN_ON(++count == 4096);
} }
flush_dcache_mmap_unlock(mapping); flush_dcache_mmap_unlock(mapping);
} }
...@@ -403,7 +481,7 @@ void __init parisc_setup_cache_timing(void) ...@@ -403,7 +481,7 @@ void __init parisc_setup_cache_timing(void)
{ {
unsigned long rangetime, alltime; unsigned long rangetime, alltime;
unsigned long size; unsigned long size;
unsigned long threshold; unsigned long threshold, threshold2;
alltime = mfctl(16); alltime = mfctl(16);
flush_data_cache(); flush_data_cache();
...@@ -417,11 +495,16 @@ void __init parisc_setup_cache_timing(void) ...@@ -417,11 +495,16 @@ void __init parisc_setup_cache_timing(void)
printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n", printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n",
alltime, size, rangetime); alltime, size, rangetime);
threshold = L1_CACHE_ALIGN(size * alltime / rangetime); threshold = L1_CACHE_ALIGN((unsigned long)((uint64_t)size * alltime / rangetime));
if (threshold > cache_info.dc_size) pr_info("Calculated flush threshold is %lu KiB\n",
threshold = cache_info.dc_size; threshold/1024);
if (threshold)
parisc_cache_flush_threshold = threshold; /*
* The threshold computed above isn't very reliable. The following
* heuristic works reasonably well on c8000/rp3440.
*/
threshold2 = cache_info.dc_size * num_online_cpus();
parisc_cache_flush_threshold = threshold2;
printk(KERN_INFO "Cache flush threshold set to %lu KiB\n", printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
parisc_cache_flush_threshold/1024); parisc_cache_flush_threshold/1024);
...@@ -477,19 +560,47 @@ void flush_kernel_dcache_page_addr(void *addr) ...@@ -477,19 +560,47 @@ void flush_kernel_dcache_page_addr(void *addr)
} }
EXPORT_SYMBOL(flush_kernel_dcache_page_addr); EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, static void flush_cache_page_if_present(struct vm_area_struct *vma,
struct page *pg) unsigned long vmaddr, unsigned long pfn)
{ {
/* Copy using kernel mapping. No coherency is needed (all in pte_t *ptep = get_ptep(vma->vm_mm, vmaddr);
kunmap) for the `to' page. However, the `from' page needs to
be flushed through a mapping equivalent to the user mapping /*
before it can be accessed through the kernel mapping. */ * The pte check is racy and sometimes the flush will trigger
preempt_disable(); * a non-access TLB miss. Hopefully, the page has already been
flush_dcache_page_asm(__pa(vfrom), vaddr); * flushed.
copy_page_asm(vto, vfrom); */
preempt_enable(); if (ptep && pte_needs_flush(*ptep))
flush_cache_page(vma, vmaddr, pfn);
}
void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
{
void *kto, *kfrom;
kfrom = kmap_local_page(from);
kto = kmap_local_page(to);
flush_cache_page_if_present(vma, vaddr, page_to_pfn(from));
copy_page_asm(kto, kfrom);
kunmap_local(kto);
kunmap_local(kfrom);
}
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long user_vaddr, void *dst, void *src, int len)
{
flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
memcpy(dst, src, len);
flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len);
}
void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long user_vaddr, void *dst, void *src, int len)
{
flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
memcpy(dst, src, len);
} }
EXPORT_SYMBOL(copy_user_page);
/* __flush_tlb_range() /* __flush_tlb_range()
* *
...@@ -520,92 +631,105 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, ...@@ -520,92 +631,105 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
return 0; return 0;
} }
static inline unsigned long mm_total_size(struct mm_struct *mm) static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{ {
struct vm_area_struct *vma; unsigned long addr, pfn;
unsigned long usize = 0; pte_t *ptep;
for (vma = mm->mmap; vma; vma = vma->vm_next)
usize += vma->vm_end - vma->vm_start;
return usize;
}
static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr)
{
pte_t *ptep = NULL;
if (!pgd_none(*pgd)) { for (addr = start; addr < end; addr += PAGE_SIZE) {
p4d_t *p4d = p4d_offset(pgd, addr); /*
if (!p4d_none(*p4d)) { * The vma can contain pages that aren't present. Although
pud_t *pud = pud_offset(p4d, addr); * the pte search is expensive, we need the pte to find the
if (!pud_none(*pud)) { * page pfn and to check whether the page should be flushed.
pmd_t *pmd = pmd_offset(pud, addr); */
if (!pmd_none(*pmd)) ptep = get_ptep(vma->vm_mm, addr);
ptep = pte_offset_map(pmd, addr); if (ptep && pte_needs_flush(*ptep)) {
if (parisc_requires_coherency()) {
flush_user_cache_page(vma, addr);
} else {
pfn = pte_pfn(*ptep);
if (WARN_ON(!pfn_valid(pfn)))
return;
__flush_cache_page(vma, addr, PFN_PHYS(pfn));
} }
} }
} }
return ptep;
} }
static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm, static inline unsigned long mm_total_size(struct mm_struct *mm)
unsigned long start, unsigned long end)
{ {
unsigned long addr, pfn; struct vm_area_struct *vma;
pte_t *ptep; unsigned long usize = 0;
for (addr = start; addr < end; addr += PAGE_SIZE) { for (vma = mm->mmap; vma && usize < parisc_cache_flush_threshold; vma = vma->vm_next)
ptep = get_ptep(mm->pgd, addr); usize += vma->vm_end - vma->vm_start;
if (ptep) { return usize;
pfn = pte_pfn(*ptep);
flush_cache_page(vma, addr, pfn);
}
}
} }
void flush_cache_mm(struct mm_struct *mm) void flush_cache_mm(struct mm_struct *mm)
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
/* Flushing the whole cache on each cpu takes forever on /*
rp3440, etc. So, avoid it if the mm isn't too big. */ * Flushing the whole cache on each cpu takes forever on
if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && * rp3440, etc. So, avoid it if the mm isn't too big.
mm_total_size(mm) >= parisc_cache_flush_threshold) { *
if (mm->context.space_id) * Note that we must flush the entire cache on machines
flush_tlb_all(); * with aliasing caches to prevent random segmentation
* faults.
*/
if (!parisc_requires_coherency()
|| mm_total_size(mm) >= parisc_cache_flush_threshold) {
if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
return;
flush_tlb_all();
flush_cache_all(); flush_cache_all();
return; return;
} }
/* Flush mm */
for (vma = mm->mmap; vma; vma = vma->vm_next) for (vma = mm->mmap; vma; vma = vma->vm_next)
flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end); flush_cache_pages(vma, vma->vm_start, vma->vm_end);
} }
void flush_cache_range(struct vm_area_struct *vma, void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
unsigned long start, unsigned long end)
{ {
if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && if (!parisc_requires_coherency()
end - start >= parisc_cache_flush_threshold) { || end - start >= parisc_cache_flush_threshold) {
if (vma->vm_mm->context.space_id) if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
flush_tlb_range(vma, start, end); return;
flush_tlb_range(vma, start, end);
flush_cache_all(); flush_cache_all();
return; return;
} }
flush_cache_pages(vma, vma->vm_mm, start, end); flush_cache_pages(vma, start, end);
} }
void void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
{ {
if (pfn_valid(pfn)) { if (WARN_ON(!pfn_valid(pfn)))
if (likely(vma->vm_mm->context.space_id)) { return;
flush_tlb_page(vma, vmaddr); if (parisc_requires_coherency())
__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); flush_user_cache_page(vma, vmaddr);
} else { else
__purge_cache_page(vma, vmaddr, PFN_PHYS(pfn)); __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
} }
void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
{
if (!PageAnon(page))
return;
if (parisc_requires_coherency()) {
flush_user_cache_page(vma, vmaddr);
return;
} }
flush_tlb_page(vma, vmaddr);
preempt_disable();
flush_dcache_page_asm(page_to_phys(page), vmaddr);
preempt_enable();
} }
void flush_kernel_vmap_range(void *vaddr, int size) void flush_kernel_vmap_range(void *vaddr, int size)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment