Commit e28f7faf authored by David Gibson's avatar David Gibson Committed by Paul Mackerras

[PATCH] Four level pagetables for ppc64

Implement 4-level pagetables for ppc64

This patch implements full four-level page tables for ppc64, thereby
extending the usable user address range to 44 bits (16T).

The patch uses a full page for the tables at the bottom and top level,
and a quarter page for the intermediate levels.  It uses full 64-bit
pointers at every level, thus also increasing the addressable range of
physical memory.  This patch also tweaks the VSID allocation to allow
matching range for user addresses (this halves the number of available
contexts) and adds some #if and BUILD_BUG sanity checks.
Signed-off-by: default avatarDavid Gibson <dwg@au1.ibm.com>
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent decd300b
...@@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) ...@@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
int local = 0; int local = 0;
cpumask_t tmp; cpumask_t tmp;
if ((ea & ~REGION_MASK) > EADDR_MASK) if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
return 1; return 1;
switch (REGION_ID(ea)) { switch (REGION_ID(ea)) {
......
...@@ -27,124 +27,91 @@ ...@@ -27,124 +27,91 @@
#include <linux/sysctl.h> #include <linux/sysctl.h>
#define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3) /* Modelled after find_linux_pte() */
#define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
#define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1))
#define HUGEPTE_INDEX_SIZE 9
#define HUGEPGD_INDEX_SIZE 10
#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE)
#define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE)
static inline int hugepgd_index(unsigned long addr)
{
return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
}
static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
{ {
int index; pgd_t *pg;
pud_t *pu;
if (! mm->context.huge_pgdir) pmd_t *pm;
return NULL; pte_t *pt;
BUG_ON(! in_hugepage_area(mm->context, addr));
index = hugepgd_index(addr); addr &= HPAGE_MASK;
BUG_ON(index >= PTRS_PER_HUGEPGD);
return (pud_t *)(mm->context.huge_pgdir + index); pg = pgd_offset(mm, addr);
} if (!pgd_none(*pg)) {
pu = pud_offset(pg, addr);
static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr) if (!pud_none(*pu)) {
{ pm = pmd_offset(pu, addr);
int index; pt = (pte_t *)pm;
BUG_ON(!pmd_none(*pm)
&& !(pte_present(*pt) && pte_huge(*pt)));
return pt;
}
}
if (pud_none(*dir))
return NULL; return NULL;
index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
return (pte_t *)pud_page(*dir) + index;
} }
static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{ {
BUG_ON(! in_hugepage_area(mm->context, addr)); pgd_t *pg;
pud_t *pu;
if (! mm->context.huge_pgdir) { pmd_t *pm;
pgd_t *new; pte_t *pt;
spin_unlock(&mm->page_table_lock);
/* Don't use pgd_alloc(), because we want __GFP_REPEAT */
new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
spin_lock(&mm->page_table_lock);
/* BUG_ON(! in_hugepage_area(mm->context, addr));
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (mm->context.huge_pgdir)
pgd_free(new);
else
mm->context.huge_pgdir = new;
}
return hugepgd_offset(mm, addr);
}
static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr) addr &= HPAGE_MASK;
{
if (! pud_present(*dir)) {
pte_t *new;
spin_unlock(&mm->page_table_lock); pg = pgd_offset(mm, addr);
new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); pu = pud_alloc(mm, pg, addr);
BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
spin_lock(&mm->page_table_lock);
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (pud_present(*dir)) {
if (new)
kmem_cache_free(zero_cache, new);
} else {
struct page *ptepage;
if (! new) if (pu) {
return NULL; pm = pmd_alloc(mm, pu, addr);
ptepage = virt_to_page(new); if (pm) {
ptepage->mapping = (void *) mm; pt = (pte_t *)pm;
ptepage->index = addr & HUGEPGDIR_MASK; BUG_ON(!pmd_none(*pm)
pud_populate(mm, dir, new); && !(pte_present(*pt) && pte_huge(*pt)));
return pt;
} }
} }
return hugepte_offset(dir, addr); return NULL;
} }
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) #define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
{
pud_t *pud;
BUG_ON(! in_hugepage_area(mm->context, addr)); void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
int i;
pud = hugepgd_offset(mm, addr); if (pte_present(*ptep)) {
if (! pud) pte_clear(mm, addr, ptep);
return NULL; flush_tlb_pending();
}
return hugepte_offset(pud, addr); for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
ptep++;
}
} }
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{ {
pud_t *pud; unsigned long old = pte_update(ptep, ~0UL);
int i;
BUG_ON(! in_hugepage_area(mm->context, addr)); if (old & _PAGE_HASHPTE)
hpte_update(mm, addr, old, 0);
pud = hugepgd_alloc(mm, addr); for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
if (! pud) ptep[i] = __pte(0);
return NULL;
return hugepte_alloc(mm, pud, addr); return __pte(old);
} }
/* /*
...@@ -541,42 +508,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, ...@@ -541,42 +508,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
} }
} }
void hugetlb_mm_free_pgd(struct mm_struct *mm)
{
int i;
pgd_t *pgdir;
spin_lock(&mm->page_table_lock);
pgdir = mm->context.huge_pgdir;
if (! pgdir)
goto out;
mm->context.huge_pgdir = NULL;
/* cleanup any hugepte pages leftover */
for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
pud_t *pud = (pud_t *)(pgdir + i);
if (! pud_none(*pud)) {
pte_t *pte = (pte_t *)pud_page(*pud);
struct page *ptepage = virt_to_page(pte);
ptepage->mapping = NULL;
BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
kmem_cache_free(zero_cache, pte);
}
pud_clear(pud);
}
BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));
kmem_cache_free(zero_cache, pgdir);
out:
spin_unlock(&mm->page_table_lock);
}
int hash_huge_page(struct mm_struct *mm, unsigned long access, int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local) unsigned long ea, unsigned long vsid, int local)
{ {
......
...@@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr) ...@@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
break; break;
if ((unsigned long)tmp->addr >= ioremap_bot) if ((unsigned long)tmp->addr >= ioremap_bot)
addr = tmp->size + (unsigned long) tmp->addr; addr = tmp->size + (unsigned long) tmp->addr;
if (addr > IMALLOC_END-size) if (addr >= IMALLOC_END-size)
return 1; return 1;
} }
*im_addr = addr; *im_addr = addr;
......
...@@ -66,6 +66,14 @@ ...@@ -66,6 +66,14 @@
#include <asm/vdso.h> #include <asm/vdso.h>
#include <asm/imalloc.h> #include <asm/imalloc.h>
#if PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif
#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
#warning TASK_SIZE is smaller than it needs to be.
#endif
int mem_init_done; int mem_init_done;
unsigned long ioremap_bot = IMALLOC_BASE; unsigned long ioremap_bot = IMALLOC_BASE;
static unsigned long phbs_io_bot = PHBS_IO_BASE; static unsigned long phbs_io_bot = PHBS_IO_BASE;
...@@ -226,7 +234,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size, ...@@ -226,7 +234,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size,
* Before that, we map using addresses going * Before that, we map using addresses going
* up from ioremap_bot. imalloc will use * up from ioremap_bot. imalloc will use
* the addresses from ioremap_bot through * the addresses from ioremap_bot through
* IMALLOC_END (0xE000001fffffffff) * IMALLOC_END
* *
*/ */
pa = addr & PAGE_MASK; pa = addr & PAGE_MASK;
...@@ -417,12 +425,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ...@@ -417,12 +425,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
int index; int index;
int err; int err;
#ifdef CONFIG_HUGETLB_PAGE
/* We leave htlb_segs as it was, but for a fork, we need to
* clear the huge_pgdir. */
mm->context.huge_pgdir = NULL;
#endif
again: again:
if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
...@@ -453,8 +455,6 @@ void destroy_context(struct mm_struct *mm) ...@@ -453,8 +455,6 @@ void destroy_context(struct mm_struct *mm)
spin_unlock(&mmu_context_lock); spin_unlock(&mmu_context_lock);
mm->context.id = NO_CONTEXT; mm->context.id = NO_CONTEXT;
hugetlb_mm_free_pgd(mm);
} }
/* /*
...@@ -833,23 +833,43 @@ void __iomem * reserve_phb_iospace(unsigned long size) ...@@ -833,23 +833,43 @@ void __iomem * reserve_phb_iospace(unsigned long size)
return virt_addr; return virt_addr;
} }
kmem_cache_t *zero_cache; static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
{ {
memset(pte, 0, PAGE_SIZE); memset(addr, 0, kmem_cache_size(cache));
} }
static const int pgtable_cache_size[2] = {
PTE_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
"pgd_pte_cache", "pud_pmd_cache",
};
kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
void pgtable_cache_init(void) void pgtable_cache_init(void)
{ {
zero_cache = kmem_cache_create("zero", int i;
PAGE_SIZE,
0, BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
int size = pgtable_cache_size[i];
const char *name = pgtable_cache_name[i];
pgtable_cache[i] = kmem_cache_create(name,
size, size,
SLAB_HWCACHE_ALIGN
| SLAB_MUST_HWCACHE_ALIGN,
zero_ctor, zero_ctor,
NULL); NULL);
if (!zero_cache) if (! pgtable_cache[i])
panic("pgtable_cache_init(): could not create zero_cache!\n"); panic("pgtable_cache_init(): could not create %s!\n",
name);
}
} }
pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
......
...@@ -91,7 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) ...@@ -91,7 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
0: /* user address: proto-VSID = context<<15 | ESID */ 0: /* user address: proto-VSID = context<<15 | ESID */
li r11,SLB_VSID_USER li r11,SLB_VSID_USER
srdi. r9,r3,13 srdi. r9,r3,USER_ESID_BITS
bne- 8f /* invalid ea bits set */ bne- 8f /* invalid ea bits set */
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
......
...@@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); ...@@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
unsigned long pte_freelist_forced_free; unsigned long pte_freelist_forced_free;
void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) struct pte_freelist_batch
{
struct rcu_head rcu;
unsigned int index;
pgtable_free_t tables[0];
};
DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
unsigned long pte_freelist_forced_free;
#define PTE_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
/ sizeof(pgtable_free_t))
#ifdef CONFIG_SMP
static void pte_free_smp_sync(void *arg)
{
/* Do nothing, just ensure we sync with all CPUs */
}
#endif
/* This is only called when we are critically out of memory
* (and fail to get a page in pte_free_tlb).
*/
static void pgtable_free_now(pgtable_free_t pgf)
{
pte_freelist_forced_free++;
smp_call_function(pte_free_smp_sync, NULL, 0, 1);
pgtable_free(pgf);
}
static void pte_free_rcu_callback(struct rcu_head *head)
{
struct pte_freelist_batch *batch =
container_of(head, struct pte_freelist_batch, rcu);
unsigned int i;
for (i = 0; i < batch->index; i++)
pgtable_free(batch->tables[i]);
free_page((unsigned long)batch);
}
static void pte_free_submit(struct pte_freelist_batch *batch)
{
INIT_RCU_HEAD(&batch->rcu);
call_rcu(&batch->rcu, pte_free_rcu_callback);
}
void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
{ {
/* This is safe as we are holding page_table_lock */ /* This is safe as we are holding page_table_lock */
cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
...@@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) ...@@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
if (atomic_read(&tlb->mm->mm_users) < 2 || if (atomic_read(&tlb->mm->mm_users) < 2 ||
cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
pte_free(ptepage); pgtable_free(pgf);
return; return;
} }
if (*batchp == NULL) { if (*batchp == NULL) {
*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
if (*batchp == NULL) { if (*batchp == NULL) {
pte_free_now(ptepage); pgtable_free_now(pgf);
return; return;
} }
(*batchp)->index = 0; (*batchp)->index = 0;
} }
(*batchp)->pages[(*batchp)->index++] = ptepage; (*batchp)->tables[(*batchp)->index++] = pgf;
if ((*batchp)->index == PTE_FREELIST_SIZE) { if ((*batchp)->index == PTE_FREELIST_SIZE) {
pte_free_submit(*batchp); pte_free_submit(*batchp);
*batchp = NULL; *batchp = NULL;
...@@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) ...@@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
put_cpu(); put_cpu();
} }
#ifdef CONFIG_SMP
static void pte_free_smp_sync(void *arg)
{
/* Do nothing, just ensure we sync with all CPUs */
}
#endif
/* This is only called when we are critically out of memory
* (and fail to get a page in pte_free_tlb).
*/
void pte_free_now(struct page *ptepage)
{
pte_freelist_forced_free++;
smp_call_function(pte_free_smp_sync, NULL, 0, 1);
pte_free(ptepage);
}
static void pte_free_rcu_callback(struct rcu_head *head)
{
struct pte_freelist_batch *batch =
container_of(head, struct pte_freelist_batch, rcu);
unsigned int i;
for (i = 0; i < batch->index; i++)
pte_free(batch->pages[i]);
free_page((unsigned long)batch);
}
void pte_free_submit(struct pte_freelist_batch *batch)
{
INIT_RCU_HEAD(&batch->rcu);
call_rcu(&batch->rcu, pte_free_rcu_callback);
}
void pte_free_finish(void) void pte_free_finish(void)
{ {
/* This is safe as we are holding page_table_lock */ /* This is safe as we are holding page_table_lock */
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
*/ */
#define PHBS_IO_BASE VMALLOC_END #define PHBS_IO_BASE VMALLOC_END
#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ #define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */
#define IMALLOC_END (VMALLOC_START + EADDR_MASK) #define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE)
/* imalloc region types */ /* imalloc region types */
......
...@@ -259,8 +259,10 @@ extern void stabs_alloc(void); ...@@ -259,8 +259,10 @@ extern void stabs_alloc(void);
#define VSID_BITS 36 #define VSID_BITS 36
#define VSID_MODULUS ((1UL<<VSID_BITS)-1) #define VSID_MODULUS ((1UL<<VSID_BITS)-1)
#define CONTEXT_BITS 20 #define CONTEXT_BITS 19
#define USER_ESID_BITS 15 #define USER_ESID_BITS 16
#define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT))
/* /*
* This macro generates asm code to compute the VSID scramble * This macro generates asm code to compute the VSID scramble
...@@ -302,7 +304,6 @@ typedef unsigned long mm_context_id_t; ...@@ -302,7 +304,6 @@ typedef unsigned long mm_context_id_t;
typedef struct { typedef struct {
mm_context_id_t id; mm_context_id_t id;
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
pgd_t *huge_pgdir;
u16 htlb_segs; /* bitmask */ u16 htlb_segs; /* bitmask */
#endif #endif
} mm_context_t; } mm_context_t;
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#define ARCH_HAS_HUGEPAGE_ONLY_RANGE #define ARCH_HAS_HUGEPAGE_ONLY_RANGE
#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
#define ARCH_HAS_SETCLEAR_HUGE_PTE
#define touches_hugepage_low_range(mm, addr, len) \ #define touches_hugepage_low_range(mm, addr, len) \
(LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs) (LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs)
...@@ -125,36 +126,42 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag ...@@ -125,36 +126,42 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag
* Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b.
*/ */
typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned int pmd; } pmd_t; typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned int pgd; } pgd_t; typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t; typedef struct { unsigned long pgprot; } pgprot_t;
#define pte_val(x) ((x).pte) #define pte_val(x) ((x).pte)
#define pmd_val(x) ((x).pmd) #define pmd_val(x) ((x).pmd)
#define pud_val(x) ((x).pud)
#define pgd_val(x) ((x).pgd) #define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot) #define pgprot_val(x) ((x).pgprot)
#define __pte(x) ((pte_t) { (x) } ) #define __pte(x) ((pte_t) { (x) })
#define __pmd(x) ((pmd_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) })
#define __pgd(x) ((pgd_t) { (x) } ) #define __pud(x) ((pud_t) { (x) })
#define __pgprot(x) ((pgprot_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) })
#define __pgprot(x) ((pgprot_t) { (x) })
#else #else
/* /*
* .. while these make it easier on the compiler * .. while these make it easier on the compiler
*/ */
typedef unsigned long pte_t; typedef unsigned long pte_t;
typedef unsigned int pmd_t; typedef unsigned long pmd_t;
typedef unsigned int pgd_t; typedef unsigned long pud_t;
typedef unsigned long pgd_t;
typedef unsigned long pgprot_t; typedef unsigned long pgprot_t;
#define pte_val(x) (x) #define pte_val(x) (x)
#define pmd_val(x) (x) #define pmd_val(x) (x)
#define pud_val(x) (x)
#define pgd_val(x) (x) #define pgd_val(x) (x)
#define pgprot_val(x) (x) #define pgprot_val(x) (x)
#define __pte(x) (x) #define __pte(x) (x)
#define __pmd(x) (x) #define __pmd(x) (x)
#define __pud(x) (x)
#define __pgd(x) (x) #define __pgd(x) (x)
#define __pgprot(x) (x) #define __pgprot(x) (x)
...@@ -208,9 +215,6 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */ ...@@ -208,9 +215,6 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */
#define USER_REGION_ID (0UL) #define USER_REGION_ID (0UL)
#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
#define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE)
#define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT)
#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE))
#ifdef CONFIG_DISCONTIGMEM #ifdef CONFIG_DISCONTIGMEM
......
...@@ -6,7 +6,12 @@ ...@@ -6,7 +6,12 @@
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/percpu.h> #include <linux/percpu.h>
extern kmem_cache_t *zero_cache; extern kmem_cache_t *pgtable_cache[];
#define PTE_CACHE_NUM 0
#define PMD_CACHE_NUM 1
#define PUD_CACHE_NUM 1
#define PGD_CACHE_NUM 0
/* /*
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
...@@ -15,30 +20,40 @@ extern kmem_cache_t *zero_cache; ...@@ -15,30 +20,40 @@ extern kmem_cache_t *zero_cache;
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
*/ */
static inline pgd_t * static inline pgd_t *pgd_alloc(struct mm_struct *mm)
pgd_alloc(struct mm_struct *mm)
{ {
return kmem_cache_alloc(zero_cache, GFP_KERNEL); return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
} }
static inline void static inline void pgd_free(pgd_t *pgd)
pgd_free(pgd_t *pgd)
{ {
kmem_cache_free(zero_cache, pgd); kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
}
#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
GFP_KERNEL|__GFP_REPEAT);
}
static inline void pud_free(pud_t *pud)
{
kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
} }
#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD)
static inline pmd_t * static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{ {
return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
GFP_KERNEL|__GFP_REPEAT);
} }
static inline void static inline void pmd_free(pmd_t *pmd)
pmd_free(pmd_t *pmd)
{ {
kmem_cache_free(zero_cache, pmd); kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
} }
#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte)
...@@ -47,44 +62,58 @@ pmd_free(pmd_t *pmd) ...@@ -47,44 +62,58 @@ pmd_free(pmd_t *pmd)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{ {
return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
GFP_KERNEL|__GFP_REPEAT);
} }
static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{ {
pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); return virt_to_page(pte_alloc_one_kernel(mm, address));
if (pte)
return virt_to_page(pte);
return NULL;
} }
static inline void pte_free_kernel(pte_t *pte) static inline void pte_free_kernel(pte_t *pte)
{ {
kmem_cache_free(zero_cache, pte); kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte);
} }
static inline void pte_free(struct page *ptepage) static inline void pte_free(struct page *ptepage)
{ {
kmem_cache_free(zero_cache, page_address(ptepage)); pte_free_kernel(page_address(ptepage));
} }
struct pte_freelist_batch #define PGF_CACHENUM_MASK 0xf
typedef struct pgtable_free {
unsigned long val;
} pgtable_free_t;
static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
unsigned long mask)
{ {
struct rcu_head rcu; BUG_ON(cachenum > PGF_CACHENUM_MASK);
unsigned int index;
struct page * pages[0];
};
#define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \ return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum};
sizeof(struct page *)) }
extern void pte_free_now(struct page *ptepage); static inline void pgtable_free(pgtable_free_t pgf)
extern void pte_free_submit(struct pte_freelist_batch *batch); {
void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
int cachenum = pgf.val & PGF_CACHENUM_MASK;
DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); kmem_cache_free(pgtable_cache[cachenum], p);
}
void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage); void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
#define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd))
#define __pte_free_tlb(tlb, ptepage) \
pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
PTE_CACHE_NUM, PTE_TABLE_SIZE-1))
#define __pmd_free_tlb(tlb, pmd) \
pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
#define __pud_free_tlb(tlb, pmd) \
pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
#define check_pgt_cache() do { } while (0) #define check_pgt_cache() do { } while (0)
......
...@@ -15,19 +15,24 @@ ...@@ -15,19 +15,24 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#include <asm-generic/pgtable-nopud.h>
/* /*
* Entries per page directory level. The PTE level must use a 64b record * Entries per page directory level. The PTE level must use a 64b record
* for each page table entry. The PMD and PGD level use a 32b record for * for each page table entry. The PMD and PGD level use a 32b record for
* each entry by assuming that each entry is page aligned. * each entry by assuming that each entry is page aligned.
*/ */
#define PTE_INDEX_SIZE 9 #define PTE_INDEX_SIZE 9
#define PMD_INDEX_SIZE 10 #define PMD_INDEX_SIZE 7
#define PGD_INDEX_SIZE 10 #define PUD_INDEX_SIZE 7
#define PGD_INDEX_SIZE 9
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
/* PMD_SHIFT determines what a second-level page table entry can map */ /* PMD_SHIFT determines what a second-level page table entry can map */
...@@ -35,8 +40,13 @@ ...@@ -35,8 +40,13 @@
#define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1)) #define PMD_MASK (~(PMD_SIZE-1))
/* PGDIR_SHIFT determines what a third-level page table entry can map */ /* PUD_SHIFT determines what a third-level page table entry can map */
#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
#define PUD_SIZE (1UL << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
#define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PGDIR_MASK (~(PGDIR_SIZE-1))
...@@ -45,15 +55,23 @@ ...@@ -45,15 +55,23 @@
/* /*
* Size of EA range mapped by our pagetables. * Size of EA range mapped by our pagetables.
*/ */
#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ #define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
PGD_INDEX_SIZE + PAGE_SHIFT) PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
#define EADDR_MASK ((1UL << EADDR_SIZE) - 1) #define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE)
#if TASK_SIZE_USER64 > PGTABLE_RANGE
#error TASK_SIZE_USER64 exceeds pagetable range
#endif
#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT))
#error TASK_SIZE_USER64 exceeds user VSID range
#endif
/* /*
* Define the address range of the vmalloc VM area. * Define the address range of the vmalloc VM area.
*/ */
#define VMALLOC_START (0xD000000000000000ul) #define VMALLOC_START (0xD000000000000000ul)
#define VMALLOC_SIZE (0x10000000000UL) #define VMALLOC_SIZE (0x80000000000UL)
#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
/* /*
...@@ -154,8 +172,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; ...@@ -154,8 +172,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
int hash_huge_page(struct mm_struct *mm, unsigned long access, int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local); unsigned long ea, unsigned long vsid, int local);
void hugetlb_mm_free_pgd(struct mm_struct *mm);
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA
...@@ -163,7 +179,6 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm); ...@@ -163,7 +179,6 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm);
#else #else
#define hash_huge_page(mm,a,ea,vsid,local) -1 #define hash_huge_page(mm,a,ea,vsid,local) -1
#define hugetlb_mm_free_pgd(mm) do {} while (0)
#endif #endif
...@@ -197,39 +212,45 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) ...@@ -197,39 +212,45 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT)))
#define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_page(x) pfn_to_page(pte_pfn(x))
#define pmd_set(pmdp, ptep) \ #define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);})
(pmd_val(*(pmdp)) = __ba_to_bpn(ptep))
#define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (pmd_val(pmd) == 0) #define pmd_bad(pmd) (pmd_val(pmd) == 0)
#define pmd_present(pmd) (pmd_val(pmd) != 0) #define pmd_present(pmd) (pmd_val(pmd) != 0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) #define pmd_page_kernel(pmd) (pmd_val(pmd))
#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp))) #define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp))
#define pud_none(pud) (!pud_val(pud)) #define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) ((pud_val(pud)) == 0UL) #define pud_bad(pud) ((pud_val(pud)) == 0)
#define pud_present(pud) (pud_val(pud) != 0UL) #define pud_present(pud) (pud_val(pud) != 0)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) #define pud_clear(pudp) (pud_val(*(pudp)) = 0)
#define pud_page(pud) (__bpn_to_ba(pud_val(pud))) #define pud_page(pud) (pud_val(pud))
#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
#define pgd_none(pgd) (!pgd_val(pgd))
#define pgd_bad(pgd) (pgd_val(pgd) == 0)
#define pgd_present(pgd) (pgd_val(pgd) != 0)
#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
#define pgd_page(pgd) (pgd_val(pgd))
/* /*
* Find an entry in a page-table-directory. We combine the address region * Find an entry in a page-table-directory. We combine the address region
* (the high order N bits) and the pgd portion of the address. * (the high order N bits) and the pgd portion of the address.
*/ */
/* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ /* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */
#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff) #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
/* Find an entry in the second-level page table.. */ #define pud_offset(pgdp, addr) \
(((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
#define pmd_offset(pudp,addr) \ #define pmd_offset(pudp,addr) \
((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
/* Find an entry in the third-level page table.. */
#define pte_offset_kernel(dir,addr) \ #define pte_offset_kernel(dir,addr) \
((pte_t *) pmd_page_kernel(*(dir)) \ (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+ (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
...@@ -458,23 +479,18 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, ...@@ -458,23 +479,18 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
#define pmd_ERROR(e) \ #define pmd_ERROR(e) \
printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_ERROR(e) \
printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e))
#define pgd_ERROR(e) \ #define pgd_ERROR(e) \
printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
extern pgd_t swapper_pg_dir[]; extern pgd_t swapper_pg_dir[];
extern void paging_init(void); extern void paging_init(void);
/*
* Because the huge pgtables are only 2 level, they can take
* at most around 4M, much less than one hugepage which the
* process is presumably entitled to use. So we don't bother
* freeing up the pagetables on unmap, and wait until
* destroy_context() to clean up the lot.
*/
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
do { } while (0) free_pgd_range(tlb, addr, end, floor, ceiling)
/* /*
* This gets called at the end of handling a page fault, when * This gets called at the end of handling a page fault, when
......
...@@ -382,8 +382,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); ...@@ -382,8 +382,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
extern struct task_struct *last_task_used_math; extern struct task_struct *last_task_used_math;
extern struct task_struct *last_task_used_altivec; extern struct task_struct *last_task_used_altivec;
/* 64-bit user address space is 41-bits (2TBs user VM) */ /* 64-bit user address space is 44-bits (16TB user VM) */
#define TASK_SIZE_USER64 (0x0000020000000000UL) #define TASK_SIZE_USER64 (0x0000100000000000UL)
/* /*
* 32-bit user address space is 4GB - 1 page * 32-bit user address space is 4GB - 1 page
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment