Commit 81c31b89 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: allow hugepages anywhere in low 4GB

From: David Gibson <david@gibson.dropbear.id.au>

On PPC64, to deal with the restrictions imposed by the PPC MMU's segment
design, hugepages are only allowed to be mapping in two fixed address
ranges, one 2-3G (for use by 32-bit processes) and one 1-1.5T (for use in
64-bit processes).  This is quite limiting, particularly for 32-bit
processes which want to use a lot of large page memory.

This patch relaxes this restriction, and allows any of the low 16 segments
(i.e.  those below 4G) to be individually switched over to allow hugepage
mappings (provided the segment does not already have any normal page
mappings).  The 1-1.5T fixed range for 64-bit processes remains.
parent 29178051
......@@ -241,31 +241,25 @@ static void do_slbia(void *unused)
asm volatile ("isync; slbia; isync":::"memory");
}
/* Activate the low hpage region for 32bit processes. mmap_sem must
* be held*/
static int open_32bit_htlbpage_range(struct mm_struct *mm)
static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
{
unsigned long start = seg << SID_SHIFT;
unsigned long end = (seg+1) << SID_SHIFT;
struct vm_area_struct *vma;
unsigned long addr;
struct mmu_gather *tlb;
if (mm->context.low_hpages)
return 0; /* The window is already open */
/* Check no VMAs are in the region */
vma = find_vma(mm, TASK_HPAGE_BASE_32);
BUG_ON(seg >= 16);
if (vma && (vma->vm_start < TASK_HPAGE_END_32)) {
printk(KERN_DEBUG "Low HTLB region busy: PID=%d vma @ %lx-%lx\n",
current->pid, vma->vm_start, vma->vm_end);
/* Check no VMAs are in the region */
vma = find_vma(mm, start);
if (vma && (vma->vm_start < end))
return -EBUSY;
}
/* Clean up any leftover PTE pages in the region */
spin_lock(&mm->page_table_lock);
tlb = tlb_gather_mmu(mm, 0);
for (addr = TASK_HPAGE_BASE_32; addr < TASK_HPAGE_END_32;
addr += PMD_SIZE) {
for (addr = start; addr < end; addr += PMD_SIZE) {
pgd_t *pgd = pgd_offset(mm, addr);
pmd_t *pmd;
struct page *page;
......@@ -293,15 +287,29 @@ static int open_32bit_htlbpage_range(struct mm_struct *mm)
pgtable_remove_rmap(page);
pte_free_tlb(tlb, page);
}
tlb_finish_mmu(tlb, TASK_HPAGE_BASE_32, TASK_HPAGE_END_32);
tlb_finish_mmu(tlb, start, end);
spin_unlock(&mm->page_table_lock);
mm->context.low_hpages = 1;
return 0;
}
static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs)
{
unsigned long i;
newsegs &= ~(mm->context.htlb_segs);
if (! newsegs)
return 0; /* The segments we want are already open */
for (i = 0; i < 16; i++)
if ((1 << i) & newsegs)
if (prepare_low_seg_for_htlb(mm, i) != 0)
return -EBUSY;
mm->context.htlb_segs |= newsegs;
/* the context change must make it to memory before the slbia,
* so that further SLB misses do the right thing. */
mb();
on_each_cpu(do_slbia, NULL, 0, 1);
return 0;
......@@ -311,8 +319,18 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len)
{
if (within_hugepage_high_range(addr, len))
return 0;
else if (within_hugepage_low_range(addr, len))
return open_32bit_htlbpage_range(current->mm);
else if ((addr < 0x100000000) && ((addr+len) < 0x100000000)) {
int err;
/* Yes, we need both tests, in case addr+len overflows
* 64-bit arithmetic */
err = open_low_hpage_segs(current->mm,
LOW_ESID_MASK(addr, len));
if (err)
printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
" failed (segs: 0x%04hx)\n", addr, len,
LOW_ESID_MASK(addr, len));
return err;
}
return -EINVAL;
}
......@@ -559,7 +577,7 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
/* Because we have an exclusive hugepage region which lies within the
* normal user address space, we have to take special measures to make
* non-huge mmap()s evade the hugepage reserved region. */
* non-huge mmap()s evade the hugepage reserved regions. */
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
......@@ -574,36 +592,29 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start) &&
!is_hugepage_only_range(addr,len))
if (((TASK_SIZE - len) >= addr)
&& (!vma || (addr+len) <= vma->vm_start)
&& !is_hugepage_only_range(addr,len))
return addr;
}
start_addr = addr = mm->free_area_cache;
full_search:
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
if (TASK_SIZE - len < addr) {
/*
* Start a new search - just in case we missed
* some holes.
*/
if (start_addr != TASK_UNMAPPED_BASE) {
start_addr = addr = TASK_UNMAPPED_BASE;
goto full_search;
}
return -ENOMEM;
vma = find_vma(mm, addr);
while (TASK_SIZE - len >= addr) {
BUG_ON(vma && (addr >= vma->vm_end));
if (touches_hugepage_low_range(addr, len)) {
addr = ALIGN(addr+1, 1<<SID_SHIFT);
vma = find_vma(mm, addr);
continue;
}
if (touches_hugepage_high_range(addr, len)) {
addr = TASK_HPAGE_END;
vma = find_vma(mm, addr);
continue;
}
if (!vma || addr + len <= vma->vm_start) {
if (is_hugepage_only_range(addr, len)) {
if (addr < TASK_HPAGE_END_32)
addr = TASK_HPAGE_END_32;
else
addr = TASK_HPAGE_END;
continue;
}
/*
* Remember the place where we stopped the search:
*/
......@@ -611,16 +622,70 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
addr = vma->vm_end;
vma = vma->vm_next;
}
/* Make sure we didn't miss any holes */
if (start_addr != TASK_UNMAPPED_BASE) {
start_addr = addr = TASK_UNMAPPED_BASE;
goto full_search;
}
return -ENOMEM;
}
static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
{
unsigned long addr = 0;
struct vm_area_struct *vma;
vma = find_vma(current->mm, addr);
while (addr + len <= 0x100000000UL) {
BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
if (! __within_hugepage_low_range(addr, len, segmask)) {
addr = ALIGN(addr+1, 1<<SID_SHIFT);
vma = find_vma(current->mm, addr);
continue;
}
if (!vma || (addr + len) <= vma->vm_start)
return addr;
addr = ALIGN(vma->vm_end, HPAGE_SIZE);
/* Depending on segmask this might not be a confirmed
* hugepage region, so the ALIGN could have skipped
* some VMAs */
vma = find_vma(current->mm, addr);
}
return -ENOMEM;
}
static unsigned long htlb_get_high_area(unsigned long len)
{
unsigned long addr = TASK_HPAGE_BASE;
struct vm_area_struct *vma;
vma = find_vma(current->mm, addr);
for (vma = find_vma(current->mm, addr);
addr + len <= TASK_HPAGE_END;
vma = vma->vm_next) {
BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
BUG_ON(! within_hugepage_high_range(addr, len));
if (!vma || (addr + len) <= vma->vm_start)
return addr;
addr = ALIGN(vma->vm_end, HPAGE_SIZE);
/* Because we're in a hugepage region, this alignment
* should not skip us over any VMAs */
}
return -ENOMEM;
}
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
struct vm_area_struct *vma;
unsigned long base, end;
if (len & ~HPAGE_MASK)
return -EINVAL;
......@@ -628,34 +693,30 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return -EINVAL;
if (test_thread_flag(TIF_32BIT)) {
int err;
err = open_32bit_htlbpage_range(current->mm);
if (err)
return err; /* Should this just be EINVAL? */
base = TASK_HPAGE_BASE_32;
end = TASK_HPAGE_END_32;
} else {
base = TASK_HPAGE_BASE;
end = TASK_HPAGE_END;
}
if (!in_hugepage_area(current->mm->context, addr)
|| (addr & (HPAGE_SIZE - 1)))
addr = base;
int lastshift = 0;
u16 segmask, cursegs = current->mm->context.htlb_segs;
for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
if (addr + len > end)
return -ENOMEM;
if (!vma || (addr + len) <= vma->vm_start)
/* First see if we can do the mapping in the existing
* low hpage segments */
addr = htlb_get_low_area(len, cursegs);
if (addr != -ENOMEM)
return addr;
addr = ALIGN(vma->vm_end, HPAGE_SIZE);
/* Because we're in an exclusively hugepage region,
* this alignment shouldn't have skipped over any
* other vmas */
for (segmask = LOW_ESID_MASK(0x100000000UL-len, len);
! lastshift; segmask >>=1) {
if (segmask & 1)
lastshift = 1;
addr = htlb_get_low_area(len, cursegs | segmask);
if ((addr != -ENOMEM)
&& open_low_hpage_segs(current->mm, segmask) == 0)
return addr;
}
printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
" enough segments\n");
return -ENOMEM;
} else {
return htlb_get_high_area(len);
}
}
......
......@@ -23,12 +23,12 @@ typedef unsigned long mm_context_id_t;
typedef struct {
mm_context_id_t id;
#ifdef CONFIG_HUGETLB_PAGE
int low_hpages;
u16 htlb_segs; /* bitmask */
#endif
} mm_context_t;
#ifdef CONFIG_HUGETLB_PAGE
#define KERNEL_LOW_HPAGES .low_hpages = 0,
#define KERNEL_LOW_HPAGES .htlb_segs = 0,
#else
#define KERNEL_LOW_HPAGES
#endif
......
......@@ -22,6 +22,10 @@
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PAGE_OFFSET_MASK (PAGE_SIZE-1)
#define SID_SHIFT 28
#define SID_MASK 0xfffffffff
#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_SHIFT 24
......@@ -33,34 +37,36 @@
#define TASK_HPAGE_BASE (0x0000010000000000UL)
#define TASK_HPAGE_END (0x0000018000000000UL)
/* For 32-bit processes the hugepage range is 2-3G */
#define TASK_HPAGE_BASE_32 (0x80000000UL)
#define TASK_HPAGE_END_32 (0xc0000000UL)
#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \
- (1U << GET_ESID(addr))) & 0xffff)
#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
#define touches_hugepage_low_range(addr, len) \
(((addr) > (TASK_HPAGE_BASE_32-(len))) && ((addr) < TASK_HPAGE_END_32))
(LOW_ESID_MASK((addr), (len)) & current->mm->context.htlb_segs)
#define touches_hugepage_high_range(addr, len) \
(((addr) > (TASK_HPAGE_BASE-(len))) && ((addr) < TASK_HPAGE_END))
#define within_hugepage_low_range(addr, len) (((addr) >= TASK_HPAGE_BASE_32) \
&& ((addr)+(len) <= TASK_HPAGE_END_32) && ((addr)+(len) >= (addr)))
#define __within_hugepage_low_range(addr, len, segmask) \
((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask))
#define within_hugepage_low_range(addr, len) \
__within_hugepage_low_range((addr), (len), \
current->mm->context.htlb_segs)
#define within_hugepage_high_range(addr, len) (((addr) >= TASK_HPAGE_BASE) \
&& ((addr)+(len) <= TASK_HPAGE_END) && ((addr)+(len) >= (addr)))
#define is_hugepage_only_range(addr, len) \
(touches_hugepage_high_range((addr), (len)) || \
(current->mm->context.low_hpages \
&& touches_hugepage_low_range((addr), (len))))
touches_hugepage_low_range((addr), (len)))
#define hugetlb_free_pgtables free_pgtables
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define in_hugepage_area(context, addr) \
((cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) && \
((((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \
((context).low_hpages && \
(((addr) >= TASK_HPAGE_BASE_32) && ((addr) < TASK_HPAGE_END_32)))))
( (((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \
( ((addr) < 0x100000000L) && \
((1 << GET_ESID(addr)) & (context).htlb_segs) ) ) )
#else /* !CONFIG_HUGETLB_PAGE */
......@@ -68,10 +74,6 @@
#endif /* !CONFIG_HUGETLB_PAGE */
#define SID_SHIFT 28
#define SID_MASK 0xfffffffff
#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
/* align addr on a size boundary - adjust address up/down if needed */
#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1)))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment