Commit c1c0d518 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Fix hugetlb-vs-memory overcommit

From: Andy Whitcroft <apw@shadowen.org>

Two problems:

a) The memory overcommit code fails oto take into account all the pages
   which are pinned by being reserved for the hugetlbpage pool

b) We're performing overcommit accounting and checking on behalf of
   hugetlbpage vmas.

The main thrust is to ensure that VM_ACCOUNT actually only gets set on
vma's which are indeed accountable.  With that ensured much of the rest
comes out in the wash.  It also removes the hugetlb memory for the
overcommit_memory=2 case.
parent 112347bb
...@@ -527,6 +527,12 @@ int is_hugepage_mem_enough(size_t size) ...@@ -527,6 +527,12 @@ int is_hugepage_mem_enough(size_t size)
return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem; return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem;
} }
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE);
}
/* /*
* We cannot handle pagefaults against hugetlb pages at all. They cause * We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the * handle_mm_fault() to try to instantiate regular-sized pages in the
......
...@@ -592,6 +592,12 @@ int is_hugepage_mem_enough(size_t size) ...@@ -592,6 +592,12 @@ int is_hugepage_mem_enough(size_t size)
return 1; return 1;
} }
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE);
}
static struct page *hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int *unused) static struct page *hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int *unused)
{ {
BUG(); BUG();
......
...@@ -928,6 +928,12 @@ int is_hugepage_mem_enough(size_t size) ...@@ -928,6 +928,12 @@ int is_hugepage_mem_enough(size_t size)
return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpage_free; return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpage_free;
} }
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
return htlbpage_total * (HPAGE_SIZE / PAGE_SIZE);
}
/* /*
* We cannot handle pagefaults against hugetlb pages at all. They cause * We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the * handle_mm_fault() to try to instantiate regular-sized pages in the
......
...@@ -501,6 +501,12 @@ int is_hugepage_mem_enough(size_t size) ...@@ -501,6 +501,12 @@ int is_hugepage_mem_enough(size_t size)
return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem; return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem;
} }
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE);
}
/* /*
* We cannot handle pagefaults against hugetlb pages at all. They cause * We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the * handle_mm_fault() to try to instantiate regular-sized pages in the
......
...@@ -497,6 +497,12 @@ int is_hugepage_mem_enough(size_t size) ...@@ -497,6 +497,12 @@ int is_hugepage_mem_enough(size_t size)
return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem; return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem;
} }
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE);
}
/* /*
* We cannot handle pagefaults against hugetlb pages at all. They cause * We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the * handle_mm_fault() to try to instantiate regular-sized pages in the
......
...@@ -19,6 +19,7 @@ int hugetlb_prefault(struct address_space *, struct vm_area_struct *); ...@@ -19,6 +19,7 @@ int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
void huge_page_release(struct page *); void huge_page_release(struct page *);
int hugetlb_report_meminfo(char *); int hugetlb_report_meminfo(char *);
int is_hugepage_mem_enough(size_t); int is_hugepage_mem_enough(size_t);
unsigned long hugetlb_total_pages(void);
struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma, struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write); unsigned long address, int write);
struct vm_area_struct *hugepage_vma(struct mm_struct *mm, struct vm_area_struct *hugepage_vma(struct mm_struct *mm,
...@@ -55,6 +56,10 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) ...@@ -55,6 +56,10 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
{ {
return 0; return 0;
} }
static inline unsigned long hugetlb_total_pages(void)
{
return 0;
}
#define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; }) #define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; })
#define follow_huge_addr(mm, vma, addr, write) 0 #define follow_huge_addr(mm, vma, addr, write) 0
......
...@@ -112,6 +112,9 @@ struct vm_area_struct { ...@@ -112,6 +112,9 @@ struct vm_area_struct {
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
/* It makes sense to apply VM_ACCOUNT to this vma. */
#define VM_MAYACCT(vma) (!!((vma)->vm_flags & VM_HUGETLB))
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif #endif
......
...@@ -489,9 +489,13 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, ...@@ -489,9 +489,13 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
int correct_wcount = 0; int correct_wcount = 0;
int error; int error;
struct rb_node ** rb_link, * rb_parent; struct rb_node ** rb_link, * rb_parent;
int accountable = 1;
unsigned long charged = 0; unsigned long charged = 0;
if (file) { if (file) {
if (is_file_hugepages(file))
accountable = 0;
if (!file->f_op || !file->f_op->mmap) if (!file->f_op || !file->f_op->mmap)
return -ENODEV; return -ENODEV;
...@@ -608,7 +612,8 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, ...@@ -608,7 +612,8 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
> current->rlim[RLIMIT_AS].rlim_cur) > current->rlim[RLIMIT_AS].rlim_cur)
return -ENOMEM; return -ENOMEM;
if (!(flags & MAP_NORESERVE) || sysctl_overcommit_memory > 1) { if (accountable && (!(flags & MAP_NORESERVE) ||
sysctl_overcommit_memory > 1)) {
if (vm_flags & VM_SHARED) { if (vm_flags & VM_SHARED) {
/* Check memory availability in shmem_file_setup? */ /* Check memory availability in shmem_file_setup? */
vm_flags |= VM_ACCOUNT; vm_flags |= VM_ACCOUNT;
......
...@@ -173,7 +173,8 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, ...@@ -173,7 +173,8 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
* a MAP_NORESERVE private mapping to writable will now reserve. * a MAP_NORESERVE private mapping to writable will now reserve.
*/ */
if (newflags & VM_WRITE) { if (newflags & VM_WRITE) {
if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))
&& VM_MAYACCT(vma)) {
charged = (end - start) >> PAGE_SHIFT; charged = (end - start) >> PAGE_SHIFT;
if (security_vm_enough_memory(charged)) if (security_vm_enough_memory(charged))
return -ENOMEM; return -ENOMEM;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/netlink.h> #include <linux/netlink.h>
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/xattr.h> #include <linux/xattr.h>
#include <linux/hugetlb.h>
int cap_capable (struct task_struct *tsk, int cap) int cap_capable (struct task_struct *tsk, int cap)
{ {
...@@ -358,7 +359,8 @@ int cap_vm_enough_memory(long pages) ...@@ -358,7 +359,8 @@ int cap_vm_enough_memory(long pages)
return -ENOMEM; return -ENOMEM;
} }
allowed = totalram_pages * sysctl_overcommit_ratio / 100; allowed = (totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100;
allowed += total_swap_pages; allowed += total_swap_pages;
if (atomic_read(&vm_committed_space) < allowed) if (atomic_read(&vm_committed_space) < allowed)
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/netlink.h> #include <linux/netlink.h>
#include <net/sock.h> #include <net/sock.h>
#include <linux/xattr.h> #include <linux/xattr.h>
#include <linux/hugetlb.h>
static int dummy_ptrace (struct task_struct *parent, struct task_struct *child) static int dummy_ptrace (struct task_struct *parent, struct task_struct *child)
{ {
...@@ -146,7 +147,8 @@ static int dummy_vm_enough_memory(long pages) ...@@ -146,7 +147,8 @@ static int dummy_vm_enough_memory(long pages)
return -ENOMEM; return -ENOMEM;
} }
allowed = totalram_pages * sysctl_overcommit_ratio / 100; allowed = (totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100;
allowed += total_swap_pages; allowed += total_swap_pages;
if (atomic_read(&vm_committed_space) < allowed) if (atomic_read(&vm_committed_space) < allowed)
......
...@@ -59,6 +59,7 @@ ...@@ -59,6 +59,7 @@
#include <net/af_unix.h> /* for Unix socket types */ #include <net/af_unix.h> /* for Unix socket types */
#include <linux/parser.h> #include <linux/parser.h>
#include <linux/nfs_mount.h> #include <linux/nfs_mount.h>
#include <linux/hugetlb.h>
#include "avc.h" #include "avc.h"
#include "objsec.h" #include "objsec.h"
...@@ -1544,7 +1545,8 @@ static int selinux_vm_enough_memory(long pages) ...@@ -1544,7 +1545,8 @@ static int selinux_vm_enough_memory(long pages)
return -ENOMEM; return -ENOMEM;
} }
allowed = totalram_pages * sysctl_overcommit_ratio / 100; allowed = (totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100;
allowed += total_swap_pages; allowed += total_swap_pages;
if (atomic_read(&vm_committed_space) < allowed) if (atomic_read(&vm_committed_space) < allowed)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment