Commit fac4e0cb authored by Anton Blanchard's avatar Anton Blanchard

Merge samba.org:/scratch/anton/junk/foo

into samba.org:/scratch/anton/linux-2.5_ppc64
parents 095de27b 6e036c06
...@@ -69,6 +69,17 @@ config PPC64 ...@@ -69,6 +69,17 @@ config PPC64
bool bool
default y default y
config HUGETLB_PAGE
bool "Huge TLB Page Support"
help
This enables support for huge pages. User space applications
can make use of this support with the sys_alloc_hugepages and
sys_free_hugepages system calls. If your applications are
huge page aware and your processor supports this (only POWER4,
then say Y here.
Otherwise, say N.
config SMP config SMP
bool "Symmetric multi-processing support" bool "Symmetric multi-processing support"
---help--- ---help---
......
...@@ -197,7 +197,7 @@ pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea) ...@@ -197,7 +197,7 @@ pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
if (!pgd_none(*pg)) { if (!pgd_none(*pg)) {
pm = pmd_offset(pg, ea); pm = pmd_offset(pg, ea);
if (!pmd_none(*pm)) { if (pmd_present(*pm)) {
pt = pte_offset_kernel(pm, ea); pt = pte_offset_kernel(pm, ea);
pte = *pt; pte = *pt;
if (!pte_present(pte)) if (!pte_present(pte))
...@@ -436,8 +436,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) ...@@ -436,8 +436,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1; local = 1;
ret = hash_huge_page(mm, access, ea, vsid, local);
if (ret < 0) {
ptep = find_linux_pte(pgdir, ea); ptep = find_linux_pte(pgdir, ea);
ret = __hash_page(ea, access, vsid, ptep, trap, local); ret = __hash_page(ea, access, vsid, ptep, trap, local);
}
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return ret; return ret;
......
...@@ -221,15 +221,18 @@ void make_slbe(unsigned long esid, unsigned long vsid, int large, ...@@ -221,15 +221,18 @@ void make_slbe(unsigned long esid, unsigned long vsid, int large,
} }
static inline void __ste_allocate(unsigned long esid, unsigned long vsid, static inline void __ste_allocate(unsigned long esid, unsigned long vsid,
int kernel_segment) int kernel_segment, mm_context_t context)
{ {
if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) {
int large = 0;
#ifndef CONFIG_PPC_ISERIES #ifndef CONFIG_PPC_ISERIES
if (REGION_ID(esid << SID_SHIFT) == KERNEL_REGION_ID) if (REGION_ID(esid << SID_SHIFT) == KERNEL_REGION_ID)
make_slbe(esid, vsid, 1, kernel_segment); large = 1;
else else if (REGION_ID(esid << SID_SHIFT) == USER_REGION_ID)
large = in_hugepage_area(context, esid << SID_SHIFT);
#endif #endif
make_slbe(esid, vsid, 0, kernel_segment); make_slbe(esid, vsid, large, kernel_segment);
} else { } else {
unsigned char top_entry, stab_entry, *segments; unsigned char top_entry, stab_entry, *segments;
...@@ -255,6 +258,7 @@ int ste_allocate(unsigned long ea) ...@@ -255,6 +258,7 @@ int ste_allocate(unsigned long ea)
{ {
unsigned long vsid, esid; unsigned long vsid, esid;
int kernel_segment = 0; int kernel_segment = 0;
mm_context_t context;
PMC_SW_PROCESSOR(stab_faults); PMC_SW_PROCESSOR(stab_faults);
...@@ -266,16 +270,18 @@ int ste_allocate(unsigned long ea) ...@@ -266,16 +270,18 @@ int ste_allocate(unsigned long ea)
if (REGION_ID(ea) >= KERNEL_REGION_ID) { if (REGION_ID(ea) >= KERNEL_REGION_ID) {
kernel_segment = 1; kernel_segment = 1;
vsid = get_kernel_vsid(ea); vsid = get_kernel_vsid(ea);
context = REGION_ID(ea);
} else { } else {
struct mm_struct *mm = current->mm; if (! current->mm)
if (mm)
vsid = get_vsid(mm->context, ea);
else
return 1; return 1;
context = current->mm->context;
vsid = get_vsid(context, ea);
} }
esid = GET_ESID(ea); esid = GET_ESID(ea);
__ste_allocate(esid, vsid, kernel_segment); __ste_allocate(esid, vsid, kernel_segment, context);
if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) { if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) {
/* Order update */ /* Order update */
asm volatile("sync":::"memory"); asm volatile("sync":::"memory");
...@@ -302,7 +308,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) ...@@ -302,7 +308,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
for (esid = 0; esid < 16; esid++) { for (esid = 0; esid < 16; esid++) {
unsigned long ea = esid << SID_SHIFT; unsigned long ea = esid << SID_SHIFT;
vsid = get_vsid(mm->context, ea); vsid = get_vsid(mm->context, ea);
__ste_allocate(esid, vsid, 0); __ste_allocate(esid, vsid, 0, mm->context);
} }
} else { } else {
unsigned long pc = KSTK_EIP(tsk); unsigned long pc = KSTK_EIP(tsk);
...@@ -316,7 +322,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) ...@@ -316,7 +322,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
(REGION_ID(pc) >= KERNEL_REGION_ID)) (REGION_ID(pc) >= KERNEL_REGION_ID))
return; return;
vsid = get_vsid(mm->context, pc); vsid = get_vsid(mm->context, pc);
__ste_allocate(GET_ESID(pc), vsid, 0); __ste_allocate(GET_ESID(pc), vsid, 0, mm->context);
} }
if (stack && (pc_segment != stack_segment)) { if (stack && (pc_segment != stack_segment)) {
...@@ -324,7 +330,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) ...@@ -324,7 +330,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
(REGION_ID(stack) >= KERNEL_REGION_ID)) (REGION_ID(stack) >= KERNEL_REGION_ID))
return; return;
vsid = get_vsid(mm->context, stack); vsid = get_vsid(mm->context, stack);
__ste_allocate(GET_ESID(stack), vsid, 0); __ste_allocate(GET_ESID(stack), vsid, 0, mm->context);
} }
} }
......
...@@ -6,3 +6,4 @@ EXTRA_CFLAGS += -mno-minimal-toc ...@@ -6,3 +6,4 @@ EXTRA_CFLAGS += -mno-minimal-toc
obj-y := fault.o init.o extable.o imalloc.o obj-y := fault.o init.o extable.o imalloc.o
obj-$(CONFIG_DISCONTIGMEM) += numa.o obj-$(CONFIG_DISCONTIGMEM) += numa.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
...@@ -290,7 +290,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) ...@@ -290,7 +290,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
if (!pgd_none(*pgd)) { if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, vmaddr); pmd = pmd_offset(pgd, vmaddr);
if (!pmd_none(*pmd)) { if (pmd_present(*pmd)) {
ptep = pte_offset_kernel(pmd, vmaddr); ptep = pte_offset_kernel(pmd, vmaddr);
/* Check if HPTE might exist and flush it if so */ /* Check if HPTE might exist and flush it if so */
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0)); pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
...@@ -298,6 +298,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) ...@@ -298,6 +298,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
flush_hash_page(context, vmaddr, pte, local); flush_hash_page(context, vmaddr, pte, local);
} }
} }
WARN_ON(pmd_hugepage(*pmd));
} }
} }
...@@ -348,7 +349,7 @@ __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) ...@@ -348,7 +349,7 @@ __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
pmd_end = (start + PMD_SIZE) & PMD_MASK; pmd_end = (start + PMD_SIZE) & PMD_MASK;
if (pmd_end > end) if (pmd_end > end)
pmd_end = end; pmd_end = end;
if (!pmd_none(*pmd)) { if (pmd_present(*pmd)) {
ptep = pte_offset_kernel(pmd, start); ptep = pte_offset_kernel(pmd, start);
do { do {
if (pte_val(*ptep) & _PAGE_HASHPTE) { if (pte_val(*ptep) & _PAGE_HASHPTE) {
...@@ -367,6 +368,7 @@ __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) ...@@ -367,6 +368,7 @@ __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
++ptep; ++ptep;
} while (start < pmd_end); } while (start < pmd_end);
} else { } else {
WARN_ON(pmd_hugepage(*pmd));
start = pmd_end; start = pmd_end;
} }
++pmd; ++pmd;
......
...@@ -18,6 +18,12 @@ ...@@ -18,6 +18,12 @@
/* Default "unsigned long" context */ /* Default "unsigned long" context */
typedef unsigned long mm_context_t; typedef unsigned long mm_context_t;
#ifdef CONFIG_HUGETLB_PAGE
#define CONTEXT_LOW_HPAGES (1UL<<63)
#else
#define CONTEXT_LOW_HPAGES 0
#endif
/* /*
* Define the size of the cache used for segment table entries. The first * Define the size of the cache used for segment table entries. The first
* entry is used as a cache pointer, therefore the actual number of entries * entry is used as a cache pointer, therefore the actual number of entries
......
...@@ -127,7 +127,8 @@ destroy_context(struct mm_struct *mm) ...@@ -127,7 +127,8 @@ destroy_context(struct mm_struct *mm)
#endif #endif
mmu_context_queue.size++; mmu_context_queue.size++;
mmu_context_queue.elements[index] = mm->context; mmu_context_queue.elements[index] =
mm->context & ~CONTEXT_LOW_HPAGES;
spin_unlock_irqrestore(&mmu_context_queue.lock, flags); spin_unlock_irqrestore(&mmu_context_queue.lock, flags);
} }
...@@ -189,6 +190,8 @@ get_vsid( unsigned long context, unsigned long ea ) ...@@ -189,6 +190,8 @@ get_vsid( unsigned long context, unsigned long ea )
{ {
unsigned long ordinal, vsid; unsigned long ordinal, vsid;
context &= ~CONTEXT_LOW_HPAGES;
ordinal = (((ea >> 28) & 0x1fffff) * LAST_USER_CONTEXT) | context; ordinal = (((ea >> 28) & 0x1fffff) * LAST_USER_CONTEXT) | context;
vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK; vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK;
......
...@@ -22,6 +22,39 @@ ...@@ -22,6 +22,39 @@
#define PAGE_MASK (~(PAGE_SIZE-1)) #define PAGE_MASK (~(PAGE_SIZE-1))
#define PAGE_OFFSET_MASK (PAGE_SIZE-1) #define PAGE_OFFSET_MASK (PAGE_SIZE-1)
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_SHIFT 24
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
/* For 64-bit processes the hugepage range is 1T-1.5T */
#define TASK_HPAGE_BASE (0x0000010000000000UL)
#define TASK_HPAGE_END (0x0000018000000000UL)
/* For 32-bit processes the hugepage range is 2-3G */
#define TASK_HPAGE_BASE_32 (0x80000000UL)
#define TASK_HPAGE_END_32 (0xc0000000UL)
#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
#define is_hugepage_only_range(addr, len) \
( ((addr > (TASK_HPAGE_BASE-len)) && (addr < TASK_HPAGE_END)) || \
((current->mm->context & CONTEXT_LOW_HPAGES) && \
(addr > (TASK_HPAGE_BASE_32-len)) && (addr < TASK_HPAGE_END_32)) )
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define in_hugepage_area(context, addr) \
((cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) && \
((((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \
(((context) & CONTEXT_LOW_HPAGES) && \
(((addr) >= TASK_HPAGE_BASE_32) && ((addr) < TASK_HPAGE_END_32)))))
#else /* !CONFIG_HUGETLB_PAGE */
#define in_hugepage_area(mm, addr) 0
#endif /* !CONFIG_HUGETLB_PAGE */
#define SID_SHIFT 28 #define SID_SHIFT 28
#define SID_MASK 0xfffffffff #define SID_MASK 0xfffffffff
#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) #define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
......
...@@ -149,6 +149,25 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; ...@@ -149,6 +149,25 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
/* shift to put page number into pte */ /* shift to put page number into pte */
#define PTE_SHIFT (16) #define PTE_SHIFT (16)
/* We allow 2^41 bytes of real memory, so we need 29 bits in the PMD
* to give the PTE page number. The bottom two bits are for flags. */
#define PMD_TO_PTEPAGE_SHIFT (2)
#ifdef CONFIG_HUGETLB_PAGE
#define _PMD_HUGEPAGE 0x00000001U
#define HUGEPTE_BATCH_SIZE (1<<(HPAGE_SHIFT-PMD_SHIFT))
int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local);
#define HAVE_ARCH_UNMAPPED_AREA
#else
#define hash_huge_page(mm,a,ea,vsid,local) -1
#define _PMD_HUGEPAGE 0
#endif
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
/* /*
...@@ -178,12 +197,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; ...@@ -178,12 +197,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT)))
#define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_page(x) pfn_to_page(pte_pfn(x))
#define pmd_set(pmdp, ptep) (pmd_val(*(pmdp)) = (__ba_to_bpn(ptep))) #define pmd_set(pmdp, ptep) \
(pmd_val(*(pmdp)) = (__ba_to_bpn(ptep) << PMD_TO_PTEPAGE_SHIFT))
#define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) ((pmd_val(pmd)) == 0) #define pmd_hugepage(pmd) (!!(pmd_val(pmd) & _PMD_HUGEPAGE))
#define pmd_present(pmd) ((pmd_val(pmd)) != 0) #define pmd_bad(pmd) (((pmd_val(pmd)) == 0) || pmd_hugepage(pmd))
#define pmd_present(pmd) ((!pmd_hugepage(pmd)) \
&& (pmd_val(pmd) & ~_PMD_HUGEPAGE) != 0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) #define pmd_page_kernel(pmd) \
(__bpn_to_ba(pmd_val(pmd) >> PMD_TO_PTEPAGE_SHIFT))
#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
#define pgd_set(pgdp, pmdp) (pgd_val(*(pgdp)) = (__ba_to_bpn(pmdp))) #define pgd_set(pgdp, pmdp) (pgd_val(*(pgdp)) = (__ba_to_bpn(pmdp)))
#define pgd_none(pgd) (!pgd_val(pgd)) #define pgd_none(pgd) (!pgd_val(pgd))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment