Commit 750737f6 authored by Anton Blanchard's avatar Anton Blanchard

[PATCH] ppc64: TLB flush rework

ppc64 tlb flush rework from Paul Mackerras

Instead of doing a double pass of the pagetables, we batch things
up in the pte flush routines and then shoot the batch down in
flush_tlb_pending.

Our page aging was broken, we never flushed entries out of the ppc64
hashtable. We now flush in ptep_test_and_clear_young.

A number of other things were fixed up in the process:

- change ppc64_tlb_batch to per cpu data
- remove some LPAR debug code
- be more careful with ioremap_mm inits
- clean up arch/ppc64/mm/init.c, create tlb.c
parent e2801b51
......@@ -300,7 +300,7 @@ static void pSeries_flush_hash_range(unsigned long context,
int i, j;
HPTE *hptep;
Hpte_dword0 dw0;
struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
/* XXX fix for large ptes */
unsigned long large = 0;
......
......@@ -432,10 +432,8 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7));
if (lpar_rc == H_Not_Found) {
udbg_printf("updatepp missed\n");
if (lpar_rc == H_Not_Found)
return -1;
}
if (lpar_rc != H_Success)
panic("bad return code from pte protect rc = %lx\n", lpar_rc);
......@@ -533,10 +531,8 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
&dummy2);
if (lpar_rc == H_Not_Found) {
udbg_printf("invalidate missed\n");
if (lpar_rc == H_Not_Found)
return;
}
if (lpar_rc != H_Success)
panic("Bad return code from invalidate rc = %lx\n", lpar_rc);
......@@ -551,7 +547,7 @@ void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number,
{
int i;
unsigned long flags;
struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
......
......@@ -49,14 +49,20 @@
#include <asm/hardirq.h>
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#ifndef CONFIG_SMP
struct task_struct *last_task_used_math = NULL;
struct task_struct *last_task_used_altivec = NULL;
#endif
struct mm_struct ioremap_mm = { pgd : ioremap_dir
,page_table_lock : SPIN_LOCK_UNLOCKED };
struct mm_struct ioremap_mm = {
.pgd = ioremap_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
.cpu_vm_mask = CPU_MASK_ALL,
.page_table_lock = SPIN_LOCK_UNLOCKED,
};
char *sysmap = NULL;
unsigned long sysmap_size = 0;
......@@ -146,6 +152,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
new->thread.regs->msr |= MSR_VEC;
#endif /* CONFIG_ALTIVEC */
flush_tlb_pending();
new_thread = &new->thread;
old_thread = &current->thread;
......
......@@ -4,6 +4,6 @@
EXTRA_CFLAGS += -mno-minimal-toc
obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o
obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o
obj-$(CONFIG_DISCONTIGMEM) += numa.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
......@@ -353,8 +353,7 @@ void flush_hash_range(unsigned long context, unsigned long number, int local)
ppc_md.flush_hash_range(context, number, local);
} else {
int i;
struct ppc64_tlb_batch *batch =
&ppc64_tlb_batch[smp_processor_id()];
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
for (i = 0; i < number; i++)
flush_hash_page(context, batch->addr[i], batch->pte[i],
......
......@@ -74,11 +74,6 @@ extern struct task_struct *current_set[NR_CPUS];
extern pgd_t ioremap_dir[];
pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir;
static void * __ioremap_com(unsigned long addr, unsigned long pa,
unsigned long ea, unsigned long size,
unsigned long flags);
static void map_io_page(unsigned long va, unsigned long pa, int flags);
unsigned long klimit = (unsigned long)_end;
HPTE *Hash=0;
......@@ -94,59 +89,6 @@ unsigned long io_hole_start;
unsigned long io_hole_size;
unsigned long top_of_ram;
/* This is declared as we are using the more or less generic
* include/asm-ppc64/tlb.h file -- tgall
*/
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
unsigned long pte_freelist_forced_free;
#ifdef CONFIG_SMP
static void pte_free_smp_sync(void *arg)
{
/* Do nothing, just ensure we sync with all CPUs */
}
#endif
/* This is only called when we are critically out of memory
* (and fail to get a page in pte_free_tlb).
*/
void pte_free_now(struct page *ptepage)
{
pte_freelist_forced_free++;
smp_call_function(pte_free_smp_sync, NULL, 0, 1);
pte_free(ptepage);
}
static void pte_free_rcu_callback(void *arg)
{
struct pte_freelist_batch *batch = arg;
unsigned int i;
for (i = 0; i < batch->index; i++)
pte_free(batch->pages[i]);
free_page((unsigned long)batch);
}
void pte_free_submit(struct pte_freelist_batch *batch)
{
INIT_RCU_HEAD(&batch->rcu);
call_rcu(&batch->rcu, pte_free_rcu_callback, batch);
}
void pte_free_finish(void)
{
/* This is safe as we are holding page_table_lock */
struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
if (*batchp == NULL)
return;
pte_free_submit(*batchp);
*batchp = NULL;
}
void show_mem(void)
{
int total = 0, reserved = 0;
......@@ -176,17 +118,99 @@ void show_mem(void)
printk("%d pages swap cached\n",cached);
}
#ifdef CONFIG_PPC_ISERIES
void *ioremap(unsigned long addr, unsigned long size)
{
return (void *)addr;
}
extern void *__ioremap(unsigned long addr, unsigned long size,
unsigned long flags)
{
return (void *)addr;
}
void iounmap(void *addr)
{
return;
}
#else
/*
* map_io_page currently only called by __ioremap
* map_io_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
static void map_io_page(unsigned long ea, unsigned long pa, int flags)
{
pgd_t *pgdp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long vsid;
if (mem_init_done) {
spin_lock(&ioremap_mm.page_table_lock);
pgdp = pgd_offset_i(ea);
pmdp = pmd_alloc(&ioremap_mm, pgdp, ea);
ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea);
pa = absolute_to_phys(pa);
set_pte(ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
spin_unlock(&ioremap_mm.page_table_lock);
} else {
unsigned long va, vpn, hash, hpteg;
/*
* If the mm subsystem is not fully up, we cannot create a
* linux page table entry for this mapping. Simply bolt an
* entry in the hardware page table.
*/
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0xFFFFFFF);
vpn = va >> PAGE_SHIFT;
hash = hpt_hash(vpn, 0);
hpteg = ((hash & htab_data.htab_hash_mask)*HPTES_PER_GROUP);
/* Panic if a pte grpup is full */
if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0,
_PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX,
1, 0) == -1) {
panic("map_io_page: could not insert mapping");
}
}
}
static void * __ioremap_com(unsigned long addr, unsigned long pa,
unsigned long ea, unsigned long size,
unsigned long flags)
{
unsigned long i;
if ((flags & _PAGE_PRESENT) == 0)
flags |= pgprot_val(PAGE_KERNEL);
if (flags & (_PAGE_NO_CACHE | _PAGE_WRITETHRU))
flags |= _PAGE_GUARDED;
for (i = 0; i < size; i += PAGE_SIZE) {
map_io_page(ea+i, pa+i, flags);
}
return (void *) (ea + (addr & ~PAGE_MASK));
}
void *
ioremap(unsigned long addr, unsigned long size)
{
#ifdef CONFIG_PPC_ISERIES
return (void*)addr;
#else
void *ret = __ioremap(addr, size, _PAGE_NO_CACHE);
if(mem_init_done)
return eeh_ioremap(addr, ret); /* may remap the addr */
return ret;
#endif
}
void *
......@@ -332,7 +356,7 @@ static void unmap_im_area_pmd(pgd_t *dir, unsigned long address,
*
* XXX what about calls before mem_init_done (ie python_countermeasures())
*/
void pSeries_iounmap(void *addr)
void iounmap(void *addr)
{
unsigned long address, start, end, size;
struct mm_struct *mm;
......@@ -358,29 +382,18 @@ void pSeries_iounmap(void *addr)
spin_lock(&mm->page_table_lock);
dir = pgd_offset_i(address);
flush_cache_all();
flush_cache_vunmap(address, end);
do {
unmap_im_area_pmd(dir, address, end - address);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
__flush_tlb_range(mm, start, end);
flush_tlb_kernel_range(start, end);
spin_unlock(&mm->page_table_lock);
return;
}
void iounmap(void *addr)
{
#ifdef CONFIG_PPC_ISERIES
/* iSeries I/O Remap is a noop */
return;
#else
/* DRENG / PPPBBB todo */
return pSeries_iounmap(addr);
#endif
}
int iounmap_explicit(void *addr, unsigned long size)
{
struct vm_struct *area;
......@@ -405,216 +418,7 @@ int iounmap_explicit(void *addr, unsigned long size)
return 0;
}
static void * __ioremap_com(unsigned long addr, unsigned long pa,
unsigned long ea, unsigned long size,
unsigned long flags)
{
unsigned long i;
if ((flags & _PAGE_PRESENT) == 0)
flags |= pgprot_val(PAGE_KERNEL);
if (flags & (_PAGE_NO_CACHE | _PAGE_WRITETHRU))
flags |= _PAGE_GUARDED;
for (i = 0; i < size; i += PAGE_SIZE) {
map_io_page(ea+i, pa+i, flags);
}
return (void *) (ea + (addr & ~PAGE_MASK));
}
/*
* map_io_page currently only called by __ioremap
* map_io_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
static void map_io_page(unsigned long ea, unsigned long pa, int flags)
{
pgd_t *pgdp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long vsid;
if (mem_init_done) {
spin_lock(&ioremap_mm.page_table_lock);
pgdp = pgd_offset_i(ea);
pmdp = pmd_alloc(&ioremap_mm, pgdp, ea);
ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea);
pa = absolute_to_phys(pa);
set_pte(ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
spin_unlock(&ioremap_mm.page_table_lock);
} else {
unsigned long va, vpn, hash, hpteg;
/*
* If the mm subsystem is not fully up, we cannot create a
* linux page table entry for this mapping. Simply bolt an
* entry in the hardware page table.
*/
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0xFFFFFFF);
vpn = va >> PAGE_SHIFT;
hash = hpt_hash(vpn, 0);
hpteg = ((hash & htab_data.htab_hash_mask)*HPTES_PER_GROUP);
/* Panic if a pte grpup is full */
if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0,
_PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX,
1, 0) == -1) {
panic("map_io_page: could not insert mapping");
}
}
}
void
flush_tlb_mm(struct mm_struct *mm)
{
struct vm_area_struct *mp;
spin_lock(&mm->page_table_lock);
for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
__flush_tlb_range(mm, mp->vm_start, mp->vm_end);
/* XXX are there races with checking cpu_vm_mask? - Anton */
cpus_clear(mm->cpu_vm_mask);
spin_unlock(&mm->page_table_lock);
}
/*
* Callers should hold the mm->page_table_lock
*/
void
flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
unsigned long context = 0;
pgd_t *pgd;
pmd_t *pmd;
pte_t *ptep;
pte_t pte;
int local = 0;
cpumask_t tmp;
switch( REGION_ID(vmaddr) ) {
case VMALLOC_REGION_ID:
pgd = pgd_offset_k( vmaddr );
break;
case IO_REGION_ID:
pgd = pgd_offset_i( vmaddr );
break;
case USER_REGION_ID:
pgd = pgd_offset( vma->vm_mm, vmaddr );
context = vma->vm_mm->context;
/* XXX are there races with checking cpu_vm_mask? - Anton */
tmp = cpumask_of_cpu(smp_processor_id());
if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
local = 1;
break;
default:
panic("flush_tlb_page: invalid region 0x%016lx", vmaddr);
}
if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, vmaddr);
if (pmd_present(*pmd)) {
ptep = pte_offset_kernel(pmd, vmaddr);
/* Check if HPTE might exist and flush it if so */
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
if ( pte_val(pte) & _PAGE_HASHPTE ) {
flush_hash_page(context, vmaddr, pte, local);
}
}
WARN_ON(pmd_hugepage(*pmd));
}
}
struct ppc64_tlb_batch ppc64_tlb_batch[NR_CPUS];
void
__flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
{
pgd_t *pgd;
pmd_t *pmd;
pte_t *ptep;
pte_t pte;
unsigned long pgd_end, pmd_end;
unsigned long context = 0;
struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
unsigned long i = 0;
int local = 0;
cpumask_t tmp;
switch(REGION_ID(start)) {
case VMALLOC_REGION_ID:
pgd = pgd_offset_k(start);
break;
case IO_REGION_ID:
pgd = pgd_offset_i(start);
break;
case USER_REGION_ID:
pgd = pgd_offset(mm, start);
context = mm->context;
/* XXX are there races with checking cpu_vm_mask? - Anton */
tmp = cpumask_of_cpu(smp_processor_id());
if (cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
break;
default:
panic("flush_tlb_range: invalid region for start (%016lx) and end (%016lx)\n", start, end);
}
do {
pgd_end = (start + PGDIR_SIZE) & PGDIR_MASK;
if (pgd_end > end)
pgd_end = end;
if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, start);
do {
pmd_end = (start + PMD_SIZE) & PMD_MASK;
if (pmd_end > end)
pmd_end = end;
if (pmd_present(*pmd)) {
ptep = pte_offset_kernel(pmd, start);
do {
if (pte_val(*ptep) & _PAGE_HASHPTE) {
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
if (pte_val(pte) & _PAGE_HASHPTE) {
batch->pte[i] = pte;
batch->addr[i] = start;
i++;
if (i == PPC64_TLB_BATCH_NR) {
flush_hash_range(context, i, local);
i = 0;
}
}
}
start += PAGE_SIZE;
++ptep;
} while (start < pmd_end);
} else {
WARN_ON(pmd_hugepage(*pmd));
start = pmd_end;
}
++pmd;
} while (start < pgd_end);
} else {
start = pgd_end;
}
++pgd;
} while (start < end);
if (i)
flush_hash_range(context, i, local);
}
#endif
void free_initmem(void)
{
......
/*
* This file contains the routines for flushing entries from the
* TLB and MMU hash table.
*
* Derived from arch/ppc64/mm/init.c:
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
*
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
* Copyright (C) 1996 Paul Mackerras
* Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Dave Engebretsen <engebret@us.ibm.com>
* Rework for PPC64 port.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/hardirq.h>
#include <linux/highmem.h>
#include <asm/rmap.h>
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
/* This is declared as we are using the more or less generic
* include/asm-ppc64/tlb.h file -- tgall
*/
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
unsigned long pte_freelist_forced_free;
/*
* Update the MMU hash table to correspond with a change to
* a Linux PTE. If wrprot is true, it is permissible to
* change the existing HPTE to read-only rather than removing it
* (if we remove it we should clear the _PTE_HPTEFLAGS bits).
*/
void hpte_update(pte_t *ptep, unsigned long pte, int wrprot)
{
struct page *ptepage;
struct mm_struct *mm;
unsigned long addr;
int i;
unsigned long context = 0;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
ptepage = virt_to_page(ptep);
mm = (struct mm_struct *) ptepage->mapping;
addr = ptep_to_address(ptep);
if (REGION_ID(addr) == USER_REGION_ID)
context = mm->context;
i = batch->index;
/*
* This can happen when we are in the middle of a TLB batch and
* we encounter memory pressure (eg copy_page_range when it tries
* to allocate a new pte). If we have to reclaim memory and end
* up scanning and resetting referenced bits then our batch context
* will change mid stream.
*/
if (unlikely(i != 0 && context != batch->context)) {
flush_tlb_pending();
i = 0;
}
if (i == 0) {
batch->context = context;
batch->mm = mm;
}
batch->pte[i] = __pte(pte);
batch->addr[i] = addr;
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
flush_tlb_pending();
}
void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
{
int i;
cpumask_t tmp = cpumask_of_cpu(smp_processor_id());
int local = 0;
BUG_ON(in_interrupt());
i = batch->index;
if (cpus_equal(batch->mm->cpu_vm_mask, tmp))
local = 1;
if (i == 1)
flush_hash_page(batch->context, batch->addr[0], batch->pte[0],
local);
else
flush_hash_range(batch->context, i, local);
batch->index = 0;
}
#ifdef CONFIG_SMP
static void pte_free_smp_sync(void *arg)
{
/* Do nothing, just ensure we sync with all CPUs */
}
#endif
/* This is only called when we are critically out of memory
* (and fail to get a page in pte_free_tlb).
*/
void pte_free_now(struct page *ptepage)
{
pte_freelist_forced_free++;
smp_call_function(pte_free_smp_sync, NULL, 0, 1);
pte_free(ptepage);
}
static void pte_free_rcu_callback(void *arg)
{
struct pte_freelist_batch *batch = arg;
unsigned int i;
for (i = 0; i < batch->index; i++)
pte_free(batch->pages[i]);
free_page((unsigned long)batch);
}
void pte_free_submit(struct pte_freelist_batch *batch)
{
INIT_RCU_HEAD(&batch->rcu);
call_rcu(&batch->rcu, pte_free_rcu_callback, batch);
}
void pte_free_finish(void)
{
/* This is safe as we are holding page_table_lock */
struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
if (*batchp == NULL)
return;
pte_free_submit(*batchp);
*batchp = NULL;
}
......@@ -12,6 +12,7 @@
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#endif /* __ASSEMBLY__ */
/* PMD_SHIFT determines what a second-level page table entry can map */
......@@ -288,72 +289,141 @@ static inline pte_t pte_mkyoung(pte_t pte) {
pte_val(pte) |= _PAGE_ACCESSED; return pte; }
/* Atomic PTE updates */
static inline unsigned long pte_update( pte_t *p, unsigned long clr,
unsigned long set )
static inline unsigned long pte_update(pte_t *p, unsigned long clr)
{
unsigned long old, tmp;
__asm__ __volatile__(
"1: ldarx %0,0,%3 # pte_update\n\
andi. %1,%0,%7\n\
andi. %1,%0,%6\n\
bne- 1b \n\
andc %1,%0,%4 \n\
or %1,%1,%5 \n\
stdcx. %1,0,%3 \n\
bne- 1b"
: "=&r" (old), "=&r" (tmp), "=m" (*p)
: "r" (p), "r" (clr), "r" (set), "m" (*p), "i" (_PAGE_BUSY)
: "r" (p), "r" (clr), "m" (*p), "i" (_PAGE_BUSY)
: "cc" );
return old;
}
/* PTE updating functions */
extern void hpte_update(pte_t *ptep, unsigned long pte, int wrprot);
static inline int ptep_test_and_clear_young(pte_t *ptep)
{
return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0;
unsigned long old;
old = pte_update(ptep, _PAGE_ACCESSED | _PAGE_HPTEFLAGS);
if (old & _PAGE_HASHPTE) {
hpte_update(ptep, old, 0);
flush_tlb_pending(); /* XXX generic code doesn't flush */
}
return (old & _PAGE_ACCESSED) != 0;
}
/*
* On RW/DIRTY bit transitions we can avoid flushing the hpte. For the
* moment we always flush but we need to fix hpte_update and test if the
* optimisation is worth it.
*/
#if 1
static inline int ptep_test_and_clear_dirty(pte_t *ptep)
{
return (pte_update(ptep, _PAGE_DIRTY, 0) & _PAGE_DIRTY) != 0;
unsigned long old;
old = pte_update(ptep, _PAGE_DIRTY | _PAGE_HPTEFLAGS);
if (old & _PAGE_HASHPTE)
hpte_update(ptep, old, 0);
return (old & _PAGE_DIRTY) != 0;
}
static inline pte_t ptep_get_and_clear(pte_t *ptep)
static inline void ptep_set_wrprotect(pte_t *ptep)
{
return __pte(pte_update(ptep, ~_PAGE_HPTEFLAGS, 0));
unsigned long old;
old = pte_update(ptep, _PAGE_RW | _PAGE_HPTEFLAGS);
if (old & _PAGE_HASHPTE)
hpte_update(ptep, old, 0);
}
/*
* We currently remove entries from the hashtable regardless of whether
* the entry was young or dirty. The generic routines only flush if the
* entry was young or dirty which is not good enough.
*
* We should be more intelligent about this but for the moment we override
* these functions and force a tlb flush unconditionally
*/
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
#define ptep_clear_flush_young(__vma, __address, __ptep) \
({ \
int __young = ptep_test_and_clear_young(__ptep); \
flush_tlb_page(__vma, __address); \
__young; \
})
#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
#define ptep_clear_flush_dirty(__vma, __address, __ptep) \
({ \
int __dirty = ptep_test_and_clear_dirty(__ptep); \
flush_tlb_page(__vma, __address); \
__dirty; \
})
#else
static inline int ptep_test_and_clear_dirty(pte_t *ptep)
{
unsigned long old;
old = pte_update(ptep, _PAGE_DIRTY);
if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0)
hpte_update(ptep, old, 1);
return (old & _PAGE_DIRTY) != 0;
}
static inline void ptep_set_wrprotect(pte_t *ptep)
{
pte_update(ptep, _PAGE_RW, 0);
unsigned long old;
old = pte_update(ptep, _PAGE_RW);
if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0)
hpte_update(ptep, old, 1);
}
#endif
static inline void ptep_mkdirty(pte_t *ptep)
static inline pte_t ptep_get_and_clear(pte_t *ptep)
{
pte_update(ptep, 0, _PAGE_DIRTY);
unsigned long old = pte_update(ptep, ~0UL);
if (old & _PAGE_HASHPTE)
hpte_update(ptep, old, 0);
return __pte(old);
}
/*
* Macro to mark a page protection value as "uncacheable".
*/
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
static inline void pte_clear(pte_t * ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
if (old & _PAGE_HASHPTE)
hpte_update(ptep, old, 0);
}
/*
* set_pte stores a linux PTE into the linux page table.
* On machines which use an MMU hash table we avoid changing the
* _PAGE_HASHPTE bit.
*/
static inline void set_pte(pte_t *ptep, pte_t pte)
{
pte_update(ptep, ~_PAGE_HPTEFLAGS, pte_val(pte) & ~_PAGE_HPTEFLAGS);
if (pte_present(*ptep))
pte_clear(ptep);
*ptep = __pte(pte_val(pte)) & ~_PAGE_HPTEFLAGS;
}
static inline void pte_clear(pte_t * ptep)
{
pte_update(ptep, ~_PAGE_HPTEFLAGS, 0);
}
/*
* Macro to mark a page protection value as "uncacheable".
*/
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
extern unsigned long ioremap_bot, ioremap_base;
......
......@@ -12,11 +12,9 @@
#ifndef _PPC64_TLB_H
#define _PPC64_TLB_H
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <asm/mmu.h>
struct mmu_gather;
static inline void tlb_flush(struct mmu_gather *tlb);
/* Avoid pulling in another include just for this */
......@@ -29,66 +27,13 @@ static inline void tlb_flush(struct mmu_gather *tlb);
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
/* Should make this at least as large as the generic batch size, but it
* takes up too much space */
#define PPC64_TLB_BATCH_NR 192
struct ppc64_tlb_batch {
unsigned long index;
pte_t pte[PPC64_TLB_BATCH_NR];
unsigned long addr[PPC64_TLB_BATCH_NR];
unsigned long vaddr[PPC64_TLB_BATCH_NR];
};
extern struct ppc64_tlb_batch ppc64_tlb_batch[NR_CPUS];
static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
unsigned long address)
{
int cpu = smp_processor_id();
struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[cpu];
unsigned long i = batch->index;
pte_t pte;
cpumask_t local_cpumask = cpumask_of_cpu(cpu);
if (pte_val(*ptep) & _PAGE_HASHPTE) {
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
if (pte_val(pte) & _PAGE_HASHPTE) {
batch->pte[i] = pte;
batch->addr[i] = address;
i++;
if (i == PPC64_TLB_BATCH_NR) {
int local = 0;
if (cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask))
local = 1;
flush_hash_range(tlb->mm->context, i, local);
i = 0;
}
}
}
batch->index = i;
}
#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
extern void pte_free_finish(void);
static inline void tlb_flush(struct mmu_gather *tlb)
{
int cpu = smp_processor_id();
struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[cpu];
int local = 0;
cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
if (cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask))
local = 1;
flush_hash_range(tlb->mm->context, batch->index, local);
batch->index = 0;
flush_tlb_pending();
pte_free_finish();
}
......
#ifndef _PPC64_TLBFLUSH_H
#define _PPC64_TLBFLUSH_H
#include <linux/threads.h>
#include <linux/mm.h>
#include <asm/page.h>
/*
* TLB flushing:
*
......@@ -15,22 +11,39 @@
* - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
*/
extern void flush_tlb_mm(struct mm_struct *mm);
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
extern void __flush_tlb_range(struct mm_struct *mm,
unsigned long start, unsigned long end);
#define flush_tlb_range(vma, start, end) \
__flush_tlb_range(vma->vm_mm, start, end)
#include <linux/percpu.h>
#include <asm/page.h>
#define PPC64_TLB_BATCH_NR 192
#define flush_tlb_kernel_range(start, end) \
__flush_tlb_range(&init_mm, (start), (end))
struct mm_struct;
struct ppc64_tlb_batch {
unsigned long index;
unsigned long context;
struct mm_struct *mm;
pte_t pte[PPC64_TLB_BATCH_NR];
unsigned long addr[PPC64_TLB_BATCH_NR];
unsigned long vaddr[PPC64_TLB_BATCH_NR];
};
DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
static inline void flush_tlb_pgtables(struct mm_struct *mm,
unsigned long start, unsigned long end)
extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
static inline void flush_tlb_pending(void)
{
/* PPC has hw page tables. */
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
if (batch->index)
__flush_tlb_pending(batch);
}
#define flush_tlb_mm(mm) flush_tlb_pending()
#define flush_tlb_page(vma, addr) flush_tlb_pending()
#define flush_tlb_range(vma, start, end) \
do { (void)(start); flush_tlb_pending(); } while (0)
#define flush_tlb_kernel_range(start, end) flush_tlb_pending()
#define flush_tlb_pgtables(mm, start, end) do { } while (0)
extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
int local);
void flush_hash_range(unsigned long context, unsigned long number, int local);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment