Commit 750737f6 authored by Anton Blanchard's avatar Anton Blanchard

[PATCH] ppc64: TLB flush rework

ppc64 tlb flush rework from Paul Mackerras

Instead of doing a double pass of the pagetables, we batch things
up in the pte flush routines and then shoot the batch down in
flush_tlb_pending.

Our page aging was broken, we never flushed entries out of the ppc64
hashtable. We now flush in ptep_test_and_clear_young.

A number of other things were fixed up in the process:

- change ppc64_tlb_batch to per cpu data
- remove some LPAR debug code
- be more careful with ioremap_mm inits
- clean up arch/ppc64/mm/init.c, create tlb.c
parent e2801b51
......@@ -300,7 +300,7 @@ static void pSeries_flush_hash_range(unsigned long context,
int i, j;
HPTE *hptep;
Hpte_dword0 dw0;
struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
/* XXX fix for large ptes */
unsigned long large = 0;
......
......@@ -432,10 +432,8 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7));
if (lpar_rc == H_Not_Found) {
udbg_printf("updatepp missed\n");
if (lpar_rc == H_Not_Found)
return -1;
}
if (lpar_rc != H_Success)
panic("bad return code from pte protect rc = %lx\n", lpar_rc);
......@@ -533,10 +531,8 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
&dummy2);
if (lpar_rc == H_Not_Found) {
udbg_printf("invalidate missed\n");
if (lpar_rc == H_Not_Found)
return;
}
if (lpar_rc != H_Success)
panic("Bad return code from invalidate rc = %lx\n", lpar_rc);
......@@ -551,7 +547,7 @@ void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number,
{
int i;
unsigned long flags;
struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
......
......@@ -49,14 +49,20 @@
#include <asm/hardirq.h>
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#ifndef CONFIG_SMP
struct task_struct *last_task_used_math = NULL;
struct task_struct *last_task_used_altivec = NULL;
#endif
struct mm_struct ioremap_mm = { pgd : ioremap_dir
,page_table_lock : SPIN_LOCK_UNLOCKED };
struct mm_struct ioremap_mm = {
.pgd = ioremap_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
.cpu_vm_mask = CPU_MASK_ALL,
.page_table_lock = SPIN_LOCK_UNLOCKED,
};
char *sysmap = NULL;
unsigned long sysmap_size = 0;
......@@ -146,6 +152,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
new->thread.regs->msr |= MSR_VEC;
#endif /* CONFIG_ALTIVEC */
flush_tlb_pending();
new_thread = &new->thread;
old_thread = &current->thread;
......
......@@ -4,6 +4,6 @@
EXTRA_CFLAGS += -mno-minimal-toc
obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o
obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o
obj-$(CONFIG_DISCONTIGMEM) += numa.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
......@@ -353,8 +353,7 @@ void flush_hash_range(unsigned long context, unsigned long number, int local)
ppc_md.flush_hash_range(context, number, local);
} else {
int i;
struct ppc64_tlb_batch *batch =
&ppc64_tlb_batch[smp_processor_id()];
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
for (i = 0; i < number; i++)
flush_hash_page(context, batch->addr[i], batch->pte[i],
......
This diff is collapsed.
/*
* This file contains the routines for flushing entries from the
* TLB and MMU hash table.
*
* Derived from arch/ppc64/mm/init.c:
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
*
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
* Copyright (C) 1996 Paul Mackerras
* Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Dave Engebretsen <engebret@us.ibm.com>
* Rework for PPC64 port.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/hardirq.h>
#include <linux/highmem.h>
#include <asm/rmap.h>
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
/* This is declared as we are using the more or less generic
* include/asm-ppc64/tlb.h file -- tgall
*/
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
unsigned long pte_freelist_forced_free;
/*
* Update the MMU hash table to correspond with a change to
* a Linux PTE. If wrprot is true, it is permissible to
* change the existing HPTE to read-only rather than removing it
* (if we remove it we should clear the _PTE_HPTEFLAGS bits).
*/
void hpte_update(pte_t *ptep, unsigned long pte, int wrprot)
{
struct page *ptepage;
struct mm_struct *mm;
unsigned long addr;
int i;
unsigned long context = 0;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
ptepage = virt_to_page(ptep);
mm = (struct mm_struct *) ptepage->mapping;
addr = ptep_to_address(ptep);
if (REGION_ID(addr) == USER_REGION_ID)
context = mm->context;
i = batch->index;
/*
* This can happen when we are in the middle of a TLB batch and
* we encounter memory pressure (eg copy_page_range when it tries
* to allocate a new pte). If we have to reclaim memory and end
* up scanning and resetting referenced bits then our batch context
* will change mid stream.
*/
if (unlikely(i != 0 && context != batch->context)) {
flush_tlb_pending();
i = 0;
}
if (i == 0) {
batch->context = context;
batch->mm = mm;
}
batch->pte[i] = __pte(pte);
batch->addr[i] = addr;
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
flush_tlb_pending();
}
void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
{
int i;
cpumask_t tmp = cpumask_of_cpu(smp_processor_id());
int local = 0;
BUG_ON(in_interrupt());
i = batch->index;
if (cpus_equal(batch->mm->cpu_vm_mask, tmp))
local = 1;
if (i == 1)
flush_hash_page(batch->context, batch->addr[0], batch->pte[0],
local);
else
flush_hash_range(batch->context, i, local);
batch->index = 0;
}
#ifdef CONFIG_SMP
static void pte_free_smp_sync(void *arg)
{
/* Do nothing, just ensure we sync with all CPUs */
}
#endif
/* This is only called when we are critically out of memory
* (and fail to get a page in pte_free_tlb).
*/
void pte_free_now(struct page *ptepage)
{
pte_freelist_forced_free++;
smp_call_function(pte_free_smp_sync, NULL, 0, 1);
pte_free(ptepage);
}
static void pte_free_rcu_callback(void *arg)
{
struct pte_freelist_batch *batch = arg;
unsigned int i;
for (i = 0; i < batch->index; i++)
pte_free(batch->pages[i]);
free_page((unsigned long)batch);
}
void pte_free_submit(struct pte_freelist_batch *batch)
{
INIT_RCU_HEAD(&batch->rcu);
call_rcu(&batch->rcu, pte_free_rcu_callback, batch);
}
void pte_free_finish(void)
{
/* This is safe as we are holding page_table_lock */
struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
if (*batchp == NULL)
return;
pte_free_submit(*batchp);
*batchp = NULL;
}
......@@ -12,6 +12,7 @@
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#endif /* __ASSEMBLY__ */
/* PMD_SHIFT determines what a second-level page table entry can map */
......@@ -288,72 +289,141 @@ static inline pte_t pte_mkyoung(pte_t pte) {
pte_val(pte) |= _PAGE_ACCESSED; return pte; }
/* Atomic PTE updates */
static inline unsigned long pte_update( pte_t *p, unsigned long clr,
unsigned long set )
static inline unsigned long pte_update(pte_t *p, unsigned long clr)
{
unsigned long old, tmp;
__asm__ __volatile__(
"1: ldarx %0,0,%3 # pte_update\n\
andi. %1,%0,%7\n\
andi. %1,%0,%6\n\
bne- 1b \n\
andc %1,%0,%4 \n\
or %1,%1,%5 \n\
stdcx. %1,0,%3 \n\
bne- 1b"
: "=&r" (old), "=&r" (tmp), "=m" (*p)
: "r" (p), "r" (clr), "r" (set), "m" (*p), "i" (_PAGE_BUSY)
: "r" (p), "r" (clr), "m" (*p), "i" (_PAGE_BUSY)
: "cc" );
return old;
}
/* PTE updating functions */
extern void hpte_update(pte_t *ptep, unsigned long pte, int wrprot);
static inline int ptep_test_and_clear_young(pte_t *ptep)
{
return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0;
unsigned long old;
old = pte_update(ptep, _PAGE_ACCESSED | _PAGE_HPTEFLAGS);
if (old & _PAGE_HASHPTE) {
hpte_update(ptep, old, 0);
flush_tlb_pending(); /* XXX generic code doesn't flush */
}
return (old & _PAGE_ACCESSED) != 0;
}
/*
* On RW/DIRTY bit transitions we can avoid flushing the hpte. For the
* moment we always flush but we need to fix hpte_update and test if the
* optimisation is worth it.
*/
#if 1
static inline int ptep_test_and_clear_dirty(pte_t *ptep)
{
return (pte_update(ptep, _PAGE_DIRTY, 0) & _PAGE_DIRTY) != 0;
unsigned long old;
old = pte_update(ptep, _PAGE_DIRTY | _PAGE_HPTEFLAGS);
if (old & _PAGE_HASHPTE)
hpte_update(ptep, old, 0);
return (old & _PAGE_DIRTY) != 0;
}
static inline pte_t ptep_get_and_clear(pte_t *ptep)
static inline void ptep_set_wrprotect(pte_t *ptep)
{
return __pte(pte_update(ptep, ~_PAGE_HPTEFLAGS, 0));
unsigned long old;
old = pte_update(ptep, _PAGE_RW | _PAGE_HPTEFLAGS);
if (old & _PAGE_HASHPTE)
hpte_update(ptep, old, 0);
}
/*
* We currently remove entries from the hashtable regardless of whether
* the entry was young or dirty. The generic routines only flush if the
* entry was young or dirty which is not good enough.
*
* We should be more intelligent about this but for the moment we override
* these functions and force a tlb flush unconditionally
*/
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
#define ptep_clear_flush_young(__vma, __address, __ptep) \
({ \
int __young = ptep_test_and_clear_young(__ptep); \
flush_tlb_page(__vma, __address); \
__young; \
})
#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
#define ptep_clear_flush_dirty(__vma, __address, __ptep) \
({ \
int __dirty = ptep_test_and_clear_dirty(__ptep); \
flush_tlb_page(__vma, __address); \
__dirty; \
})
#else
static inline int ptep_test_and_clear_dirty(pte_t *ptep)
{
unsigned long old;
old = pte_update(ptep, _PAGE_DIRTY);
if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0)
hpte_update(ptep, old, 1);
return (old & _PAGE_DIRTY) != 0;
}
static inline void ptep_set_wrprotect(pte_t *ptep)
{
pte_update(ptep, _PAGE_RW, 0);
unsigned long old;
old = pte_update(ptep, _PAGE_RW);
if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0)
hpte_update(ptep, old, 1);
}
#endif
static inline void ptep_mkdirty(pte_t *ptep)
static inline pte_t ptep_get_and_clear(pte_t *ptep)
{
pte_update(ptep, 0, _PAGE_DIRTY);
unsigned long old = pte_update(ptep, ~0UL);
if (old & _PAGE_HASHPTE)
hpte_update(ptep, old, 0);
return __pte(old);
}
/*
* Macro to mark a page protection value as "uncacheable".
*/
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
static inline void pte_clear(pte_t * ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
if (old & _PAGE_HASHPTE)
hpte_update(ptep, old, 0);
}
/*
* set_pte stores a linux PTE into the linux page table.
* On machines which use an MMU hash table we avoid changing the
* _PAGE_HASHPTE bit.
*/
static inline void set_pte(pte_t *ptep, pte_t pte)
{
pte_update(ptep, ~_PAGE_HPTEFLAGS, pte_val(pte) & ~_PAGE_HPTEFLAGS);
if (pte_present(*ptep))
pte_clear(ptep);
*ptep = __pte(pte_val(pte)) & ~_PAGE_HPTEFLAGS;
}
static inline void pte_clear(pte_t * ptep)
{
pte_update(ptep, ~_PAGE_HPTEFLAGS, 0);
}
/*
* Macro to mark a page protection value as "uncacheable".
*/
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
extern unsigned long ioremap_bot, ioremap_base;
......
......@@ -12,11 +12,9 @@
#ifndef _PPC64_TLB_H
#define _PPC64_TLB_H
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <asm/mmu.h>
struct mmu_gather;
static inline void tlb_flush(struct mmu_gather *tlb);
/* Avoid pulling in another include just for this */
......@@ -29,66 +27,13 @@ static inline void tlb_flush(struct mmu_gather *tlb);
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
/* Should make this at least as large as the generic batch size, but it
* takes up too much space */
#define PPC64_TLB_BATCH_NR 192
struct ppc64_tlb_batch {
unsigned long index;
pte_t pte[PPC64_TLB_BATCH_NR];
unsigned long addr[PPC64_TLB_BATCH_NR];
unsigned long vaddr[PPC64_TLB_BATCH_NR];
};
extern struct ppc64_tlb_batch ppc64_tlb_batch[NR_CPUS];
static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
unsigned long address)
{
int cpu = smp_processor_id();
struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[cpu];
unsigned long i = batch->index;
pte_t pte;
cpumask_t local_cpumask = cpumask_of_cpu(cpu);
if (pte_val(*ptep) & _PAGE_HASHPTE) {
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
if (pte_val(pte) & _PAGE_HASHPTE) {
batch->pte[i] = pte;
batch->addr[i] = address;
i++;
if (i == PPC64_TLB_BATCH_NR) {
int local = 0;
if (cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask))
local = 1;
flush_hash_range(tlb->mm->context, i, local);
i = 0;
}
}
}
batch->index = i;
}
#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
extern void pte_free_finish(void);
static inline void tlb_flush(struct mmu_gather *tlb)
{
int cpu = smp_processor_id();
struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[cpu];
int local = 0;
cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
if (cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask))
local = 1;
flush_hash_range(tlb->mm->context, batch->index, local);
batch->index = 0;
flush_tlb_pending();
pte_free_finish();
}
......
#ifndef _PPC64_TLBFLUSH_H
#define _PPC64_TLBFLUSH_H
#include <linux/threads.h>
#include <linux/mm.h>
#include <asm/page.h>
/*
* TLB flushing:
*
......@@ -15,22 +11,39 @@
* - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
*/
extern void flush_tlb_mm(struct mm_struct *mm);
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
extern void __flush_tlb_range(struct mm_struct *mm,
unsigned long start, unsigned long end);
#define flush_tlb_range(vma, start, end) \
__flush_tlb_range(vma->vm_mm, start, end)
#include <linux/percpu.h>
#include <asm/page.h>
#define PPC64_TLB_BATCH_NR 192
#define flush_tlb_kernel_range(start, end) \
__flush_tlb_range(&init_mm, (start), (end))
struct mm_struct;
struct ppc64_tlb_batch {
unsigned long index;
unsigned long context;
struct mm_struct *mm;
pte_t pte[PPC64_TLB_BATCH_NR];
unsigned long addr[PPC64_TLB_BATCH_NR];
unsigned long vaddr[PPC64_TLB_BATCH_NR];
};
DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
static inline void flush_tlb_pgtables(struct mm_struct *mm,
unsigned long start, unsigned long end)
extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
static inline void flush_tlb_pending(void)
{
/* PPC has hw page tables. */
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
if (batch->index)
__flush_tlb_pending(batch);
}
#define flush_tlb_mm(mm) flush_tlb_pending()
#define flush_tlb_page(vma, addr) flush_tlb_pending()
#define flush_tlb_range(vma, start, end) \
do { (void)(start); flush_tlb_pending(); } while (0)
#define flush_tlb_kernel_range(start, end) flush_tlb_pending()
#define flush_tlb_pgtables(mm, start, end) do { } while (0)
extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
int local);
void flush_hash_range(unsigned long context, unsigned long number, int local);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment