Commit c37f9fb1 authored by Andy Whitcroft's avatar Andy Whitcroft Committed by Linus Torvalds

hugetlb: allow huge page mappings to be created without reservations

By default all shared mappings and most private mappings now have
reservations associated with them.  This improves semantics by providing
allocation guarentees to the mapper.  However a small number of
applications may attempt to make very large sparse mappings, with these
strict reservations the system will never be able to honour the mapping.

This patch set brings MAP_NORESERVE support to hugetlb files.  This allows
new mappings to be made to hugetlbfs files without an associated
reservation, for both shared and private mappings.  This allows
applications which want to create very sparse mappings to opt-out of the
reservation system.  Obviously as there is no reservation they are liable
to fault at runtime if the huge page pool becomes exhausted; buyer beware.
Signed-off-by: default avatarAndy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Johannes Weiner <hannes@saeurebad.de>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 96822904
...@@ -247,6 +247,9 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) ...@@ -247,6 +247,9 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
/* Decrement the reserved pages in the hugepage pool by one */ /* Decrement the reserved pages in the hugepage pool by one */
static void decrement_hugepage_resv_vma(struct vm_area_struct *vma) static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
{ {
if (vma->vm_flags & VM_NORESERVE)
return;
if (vma->vm_flags & VM_SHARED) { if (vma->vm_flags & VM_SHARED) {
/* Shared mappings always use reserves */ /* Shared mappings always use reserves */
resv_huge_pages--; resv_huge_pages--;
...@@ -720,25 +723,65 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) ...@@ -720,25 +723,65 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
} }
} }
/*
* Determine if the huge page at addr within the vma has an associated
* reservation. Where it does not we will need to logically increase
* reservation and actually increase quota before an allocation can occur.
* Where any new reservation would be required the reservation change is
* prepared, but not committed. Once the page has been quota'd allocated
* an instantiated the change should be committed via vma_commit_reservation.
* No action is required on failure.
*/
static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
{
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
if (vma->vm_flags & VM_SHARED) {
pgoff_t idx = vma_pagecache_offset(vma, addr);
return region_chg(&inode->i_mapping->private_list,
idx, idx + 1);
} else {
if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER))
return 1;
}
return 0;
}
static void vma_commit_reservation(struct vm_area_struct *vma,
unsigned long addr)
{
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
if (vma->vm_flags & VM_SHARED) {
pgoff_t idx = vma_pagecache_offset(vma, addr);
region_add(&inode->i_mapping->private_list, idx, idx + 1);
}
}
static struct page *alloc_huge_page(struct vm_area_struct *vma, static struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve) unsigned long addr, int avoid_reserve)
{ {
struct page *page; struct page *page;
struct address_space *mapping = vma->vm_file->f_mapping; struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
unsigned int chg = 0; unsigned int chg;
/* /*
* Processes that did not create the mapping will have no reserves and * Processes that did not create the mapping will have no reserves and
* will not have accounted against quota. Check that the quota can be * will not have accounted against quota. Check that the quota can be
* made before satisfying the allocation * made before satisfying the allocation
* MAP_NORESERVE mappings may also need pages and quota allocated
* if no reserve mapping overlaps.
*/ */
if (!(vma->vm_flags & VM_SHARED) && chg = vma_needs_reservation(vma, addr);
!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { if (chg < 0)
chg = 1; return ERR_PTR(chg);
if (chg)
if (hugetlb_get_quota(inode->i_mapping, chg)) if (hugetlb_get_quota(inode->i_mapping, chg))
return ERR_PTR(-ENOSPC); return ERR_PTR(-ENOSPC);
}
spin_lock(&hugetlb_lock); spin_lock(&hugetlb_lock);
page = dequeue_huge_page_vma(vma, addr, avoid_reserve); page = dequeue_huge_page_vma(vma, addr, avoid_reserve);
...@@ -755,6 +798,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, ...@@ -755,6 +798,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
set_page_refcounted(page); set_page_refcounted(page);
set_page_private(page, (unsigned long) mapping); set_page_private(page, (unsigned long) mapping);
vma_commit_reservation(vma, addr);
return page; return page;
} }
...@@ -1560,6 +1605,9 @@ int hugetlb_reserve_pages(struct inode *inode, ...@@ -1560,6 +1605,9 @@ int hugetlb_reserve_pages(struct inode *inode,
{ {
long ret, chg; long ret, chg;
if (vma && vma->vm_flags & VM_NORESERVE)
return 0;
/* /*
* Shared mappings base their reservation on the number of pages that * Shared mappings base their reservation on the number of pages that
* are already allocated on behalf of the file. Private mappings need * are already allocated on behalf of the file. Private mappings need
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment