Commit 6e036c06 authored by Anton Blanchard's avatar Anton Blanchard

ppc64: ppc64 Hugepage support from David Gibson

parent 4f6b41e5
......@@ -69,6 +69,17 @@ config PPC64
bool
default y
config HUGETLB_PAGE
bool "Huge TLB Page Support"
help
This enables support for huge pages. User space applications
can make use of this support with the sys_alloc_hugepages and
sys_free_hugepages system calls. If your applications are
huge page aware and your processor supports this (only POWER4,
then say Y here.
Otherwise, say N.
config SMP
bool "Symmetric multi-processing support"
---help---
......
......@@ -197,7 +197,7 @@ pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
if (!pgd_none(*pg)) {
pm = pmd_offset(pg, ea);
if (!pmd_none(*pm)) {
if (pmd_present(*pm)) {
pt = pte_offset_kernel(pm, ea);
pte = *pt;
if (!pte_present(pte))
......@@ -436,8 +436,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
ret = hash_huge_page(mm, access, ea, vsid, local);
if (ret < 0) {
ptep = find_linux_pte(pgdir, ea);
ret = __hash_page(ea, access, vsid, ptep, trap, local);
}
spin_unlock(&mm->page_table_lock);
return ret;
......
......@@ -221,15 +221,18 @@ void make_slbe(unsigned long esid, unsigned long vsid, int large,
}
static inline void __ste_allocate(unsigned long esid, unsigned long vsid,
int kernel_segment)
int kernel_segment, mm_context_t context)
{
if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) {
int large = 0;
#ifndef CONFIG_PPC_ISERIES
if (REGION_ID(esid << SID_SHIFT) == KERNEL_REGION_ID)
make_slbe(esid, vsid, 1, kernel_segment);
else
large = 1;
else if (REGION_ID(esid << SID_SHIFT) == USER_REGION_ID)
large = in_hugepage_area(context, esid << SID_SHIFT);
#endif
make_slbe(esid, vsid, 0, kernel_segment);
make_slbe(esid, vsid, large, kernel_segment);
} else {
unsigned char top_entry, stab_entry, *segments;
......@@ -255,6 +258,7 @@ int ste_allocate(unsigned long ea)
{
unsigned long vsid, esid;
int kernel_segment = 0;
mm_context_t context;
PMC_SW_PROCESSOR(stab_faults);
......@@ -266,16 +270,18 @@ int ste_allocate(unsigned long ea)
if (REGION_ID(ea) >= KERNEL_REGION_ID) {
kernel_segment = 1;
vsid = get_kernel_vsid(ea);
context = REGION_ID(ea);
} else {
struct mm_struct *mm = current->mm;
if (mm)
vsid = get_vsid(mm->context, ea);
else
if (! current->mm)
return 1;
context = current->mm->context;
vsid = get_vsid(context, ea);
}
esid = GET_ESID(ea);
__ste_allocate(esid, vsid, kernel_segment);
__ste_allocate(esid, vsid, kernel_segment, context);
if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) {
/* Order update */
asm volatile("sync":::"memory");
......@@ -302,7 +308,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
for (esid = 0; esid < 16; esid++) {
unsigned long ea = esid << SID_SHIFT;
vsid = get_vsid(mm->context, ea);
__ste_allocate(esid, vsid, 0);
__ste_allocate(esid, vsid, 0, mm->context);
}
} else {
unsigned long pc = KSTK_EIP(tsk);
......@@ -316,7 +322,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
(REGION_ID(pc) >= KERNEL_REGION_ID))
return;
vsid = get_vsid(mm->context, pc);
__ste_allocate(GET_ESID(pc), vsid, 0);
__ste_allocate(GET_ESID(pc), vsid, 0, mm->context);
}
if (stack && (pc_segment != stack_segment)) {
......@@ -324,7 +330,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
(REGION_ID(stack) >= KERNEL_REGION_ID))
return;
vsid = get_vsid(mm->context, stack);
__ste_allocate(GET_ESID(stack), vsid, 0);
__ste_allocate(GET_ESID(stack), vsid, 0, mm->context);
}
}
......
......@@ -6,3 +6,4 @@ EXTRA_CFLAGS += -mno-minimal-toc
obj-y := fault.o init.o extable.o imalloc.o
obj-$(CONFIG_DISCONTIGMEM) += numa.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
......@@ -290,7 +290,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, vmaddr);
if (!pmd_none(*pmd)) {
if (pmd_present(*pmd)) {
ptep = pte_offset_kernel(pmd, vmaddr);
/* Check if HPTE might exist and flush it if so */
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
......@@ -298,6 +298,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
flush_hash_page(context, vmaddr, pte, local);
}
}
WARN_ON(pmd_hugepage(*pmd));
}
}
......@@ -348,7 +349,7 @@ __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
pmd_end = (start + PMD_SIZE) & PMD_MASK;
if (pmd_end > end)
pmd_end = end;
if (!pmd_none(*pmd)) {
if (pmd_present(*pmd)) {
ptep = pte_offset_kernel(pmd, start);
do {
if (pte_val(*ptep) & _PAGE_HASHPTE) {
......@@ -367,6 +368,7 @@ __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
++ptep;
} while (start < pmd_end);
} else {
WARN_ON(pmd_hugepage(*pmd));
start = pmd_end;
}
++pmd;
......
......@@ -18,6 +18,12 @@
/* Default "unsigned long" context */
typedef unsigned long mm_context_t;
#ifdef CONFIG_HUGETLB_PAGE
#define CONTEXT_LOW_HPAGES (1UL<<63)
#else
#define CONTEXT_LOW_HPAGES 0
#endif
/*
* Define the size of the cache used for segment table entries. The first
* entry is used as a cache pointer, therefore the actual number of entries
......
......@@ -127,7 +127,8 @@ destroy_context(struct mm_struct *mm)
#endif
mmu_context_queue.size++;
mmu_context_queue.elements[index] = mm->context;
mmu_context_queue.elements[index] =
mm->context & ~CONTEXT_LOW_HPAGES;
spin_unlock_irqrestore(&mmu_context_queue.lock, flags);
}
......@@ -189,6 +190,8 @@ get_vsid( unsigned long context, unsigned long ea )
{
unsigned long ordinal, vsid;
context &= ~CONTEXT_LOW_HPAGES;
ordinal = (((ea >> 28) & 0x1fffff) * LAST_USER_CONTEXT) | context;
vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK;
......
......@@ -22,6 +22,39 @@
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PAGE_OFFSET_MASK (PAGE_SIZE-1)
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_SHIFT 24
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
/* For 64-bit processes the hugepage range is 1T-1.5T */
#define TASK_HPAGE_BASE (0x0000010000000000UL)
#define TASK_HPAGE_END (0x0000018000000000UL)
/* For 32-bit processes the hugepage range is 2-3G */
#define TASK_HPAGE_BASE_32 (0x80000000UL)
#define TASK_HPAGE_END_32 (0xc0000000UL)
#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
#define is_hugepage_only_range(addr, len) \
( ((addr > (TASK_HPAGE_BASE-len)) && (addr < TASK_HPAGE_END)) || \
((current->mm->context & CONTEXT_LOW_HPAGES) && \
(addr > (TASK_HPAGE_BASE_32-len)) && (addr < TASK_HPAGE_END_32)) )
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define in_hugepage_area(context, addr) \
((cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) && \
((((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \
(((context) & CONTEXT_LOW_HPAGES) && \
(((addr) >= TASK_HPAGE_BASE_32) && ((addr) < TASK_HPAGE_END_32)))))
#else /* !CONFIG_HUGETLB_PAGE */
#define in_hugepage_area(mm, addr) 0
#endif /* !CONFIG_HUGETLB_PAGE */
#define SID_SHIFT 28
#define SID_MASK 0xfffffffff
#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
......
......@@ -149,6 +149,25 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
/* shift to put page number into pte */
#define PTE_SHIFT (16)
/* We allow 2^41 bytes of real memory, so we need 29 bits in the PMD
* to give the PTE page number. The bottom two bits are for flags. */
#define PMD_TO_PTEPAGE_SHIFT (2)
#ifdef CONFIG_HUGETLB_PAGE
#define _PMD_HUGEPAGE 0x00000001U
#define HUGEPTE_BATCH_SIZE (1<<(HPAGE_SHIFT-PMD_SHIFT))
int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local);
#define HAVE_ARCH_UNMAPPED_AREA
#else
#define hash_huge_page(mm,a,ea,vsid,local) -1
#define _PMD_HUGEPAGE 0
#endif
#ifndef __ASSEMBLY__
/*
......@@ -178,12 +197,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT)))
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define pmd_set(pmdp, ptep) (pmd_val(*(pmdp)) = (__ba_to_bpn(ptep)))
#define pmd_set(pmdp, ptep) \
(pmd_val(*(pmdp)) = (__ba_to_bpn(ptep) << PMD_TO_PTEPAGE_SHIFT))
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) ((pmd_val(pmd)) == 0)
#define pmd_present(pmd) ((pmd_val(pmd)) != 0)
#define pmd_hugepage(pmd) (!!(pmd_val(pmd) & _PMD_HUGEPAGE))
#define pmd_bad(pmd) (((pmd_val(pmd)) == 0) || pmd_hugepage(pmd))
#define pmd_present(pmd) ((!pmd_hugepage(pmd)) \
&& (pmd_val(pmd) & ~_PMD_HUGEPAGE) != 0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd)))
#define pmd_page_kernel(pmd) \
(__bpn_to_ba(pmd_val(pmd) >> PMD_TO_PTEPAGE_SHIFT))
#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
#define pgd_set(pgdp, pmdp) (pgd_val(*(pgdp)) = (__ba_to_bpn(pmdp)))
#define pgd_none(pgd) (!pgd_val(pgd))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment