Commit 0b14c179 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] unpaged: VM_UNPAGED

Although we tend to associate VM_RESERVED with remap_pfn_range, quite a few
drivers set VM_RESERVED on areas which are then populated by nopage.  The
PageReserved removal in 2.6.15-rc1 changed VM_RESERVED not to free pages in
zap_pte_range, without changing those drivers not to set it: so their pages
just leak away.

Let's not change miscellaneous drivers now: introduce VM_UNPAGED at the core,
to flag the special areas where the ptes may have no struct page, or if they
have then it's not to be touched.  Replace most instances of VM_RESERVED in
core mm by VM_UNPAGED.  Force it on in remap_pfn_range, and the sparc and
sparc64 io_remap_pfn_range.

Revert addition of VM_RESERVED to powerpc vdso, it's not needed there.  Is it
needed anywhere?  It still governs the mm->reserved_vm statistic, and special
vmas not to be merged, and areas not to be core dumped; but could probably be
eliminated later (the drivers are probably specifying it because in 2.4 it
kept swapout off the vma, but in 2.6 we work from the LRU, which these pages
don't get on).

Use the VM_SHM slot for VM_UNPAGED, and define VM_SHM to 0: it serves no
purpose whatsoever, and should be removed from drivers when we clean up.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Acked-by: default avatarWilliam Irwin <wli@holomorphy.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 664beed0
...@@ -285,8 +285,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, ...@@ -285,8 +285,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
* It's fine to use that for setting breakpoints in the vDSO code * It's fine to use that for setting breakpoints in the vDSO code
* pages though * pages though
*/ */
vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
VM_MAYEXEC | VM_RESERVED;
vma->vm_flags |= mm->def_flags; vma->vm_flags |= mm->def_flags;
vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
vma->vm_ops = &vdso_vmops; vma->vm_ops = &vdso_vmops;
......
...@@ -74,7 +74,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, ...@@ -74,7 +74,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
/* See comment in mm/memory.c remap_pfn_range */ /* See comment in mm/memory.c remap_pfn_range */
vma->vm_flags |= VM_IO | VM_RESERVED; vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED;
prot = __pgprot(pg_iobits); prot = __pgprot(pg_iobits);
offset -= from; offset -= from;
......
...@@ -128,7 +128,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, ...@@ -128,7 +128,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
/* See comment in mm/memory.c remap_pfn_range */ /* See comment in mm/memory.c remap_pfn_range */
vma->vm_flags |= VM_IO | VM_RESERVED; vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED;
prot = __pgprot(pg_iobits); prot = __pgprot(pg_iobits);
offset -= from; offset -= from;
......
...@@ -144,7 +144,8 @@ extern unsigned int kobjsize(const void *objp); ...@@ -144,7 +144,8 @@ extern unsigned int kobjsize(const void *objp);
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ #define VM_GROWSDOWN 0x00000100 /* general info on the segment */
#define VM_GROWSUP 0x00000200 #define VM_GROWSUP 0x00000200
#define VM_SHM 0x00000400 /* shared memory area, don't swap out */ #define VM_SHM 0x00000000 /* Means nothing: delete it later */
#define VM_UNPAGED 0x00000400 /* Pages managed without map count */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
#define VM_EXECUTABLE 0x00001000 #define VM_EXECUTABLE 0x00001000
...@@ -157,7 +158,7 @@ extern unsigned int kobjsize(const void *objp); ...@@ -157,7 +158,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
#define VM_RESERVED 0x00080000 /* Pages managed in a special way */ #define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
......
...@@ -65,7 +65,7 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -65,7 +65,7 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t pte_val; pte_t pte_val;
spinlock_t *ptl; spinlock_t *ptl;
BUG_ON(vma->vm_flags & VM_RESERVED); BUG_ON(vma->vm_flags & VM_UNPAGED);
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr); pud = pud_alloc(mm, pgd, addr);
...@@ -122,7 +122,7 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -122,7 +122,7 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t pte_val; pte_t pte_val;
spinlock_t *ptl; spinlock_t *ptl;
BUG_ON(vma->vm_flags & VM_RESERVED); BUG_ON(vma->vm_flags & VM_UNPAGED);
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr); pud = pud_alloc(mm, pgd, addr);
......
...@@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma, ...@@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
*prev = vma; *prev = vma;
if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_RESERVED)) if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_UNPAGED))
return -EINVAL; return -EINVAL;
if (unlikely(vma->vm_flags & VM_NONLINEAR)) { if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
......
...@@ -334,7 +334,7 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) ...@@ -334,7 +334,7 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
/* /*
* This function is called to print an error when a pte in a * This function is called to print an error when a pte in a
* !VM_RESERVED region is found pointing to an invalid pfn (which * !VM_UNPAGED region is found pointing to an invalid pfn (which
* is an error. * is an error.
* *
* The calling function must still handle the error. * The calling function must still handle the error.
...@@ -381,15 +381,15 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -381,15 +381,15 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
goto out_set_pte; goto out_set_pte;
} }
/* If the region is VM_RESERVED, the mapping is not /* If the region is VM_UNPAGED, the mapping is not
* mapped via rmap - duplicate the pte as is. * mapped via rmap - duplicate the pte as is.
*/ */
if (vm_flags & VM_RESERVED) if (vm_flags & VM_UNPAGED)
goto out_set_pte; goto out_set_pte;
pfn = pte_pfn(pte); pfn = pte_pfn(pte);
/* If the pte points outside of valid memory but /* If the pte points outside of valid memory but
* the region is not VM_RESERVED, we have a problem. * the region is not VM_UNPAGED, we have a problem.
*/ */
if (unlikely(!pfn_valid(pfn))) { if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, pte, addr); print_bad_pte(vma, pte, addr);
...@@ -528,7 +528,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -528,7 +528,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* readonly mappings. The tradeoff is that copy_page_range is more * readonly mappings. The tradeoff is that copy_page_range is more
* efficient than faulting. * efficient than faulting.
*/ */
if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_UNPAGED))) {
if (!vma->anon_vma) if (!vma->anon_vma)
return 0; return 0;
} }
...@@ -572,7 +572,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, ...@@ -572,7 +572,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
(*zap_work) -= PAGE_SIZE; (*zap_work) -= PAGE_SIZE;
if (!(vma->vm_flags & VM_RESERVED)) { if (!(vma->vm_flags & VM_UNPAGED)) {
unsigned long pfn = pte_pfn(ptent); unsigned long pfn = pte_pfn(ptent);
if (unlikely(!pfn_valid(pfn))) if (unlikely(!pfn_valid(pfn)))
print_bad_pte(vma, ptent, addr); print_bad_pte(vma, ptent, addr);
...@@ -1191,10 +1191,16 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, ...@@ -1191,10 +1191,16 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
* rest of the world about it: * rest of the world about it:
* VM_IO tells people not to look at these pages * VM_IO tells people not to look at these pages
* (accesses can have side effects). * (accesses can have side effects).
* VM_RESERVED tells the core MM not to "manage" these pages * VM_RESERVED is specified all over the place, because
* (e.g. refcount, mapcount, try to swap them out). * in 2.4 it kept swapout's vma scan off this vma; but
* in 2.6 the LRU scan won't even find its pages, so this
* flag means no more than count its pages in reserved_vm,
* and omit it from core dump, even when VM_IO turned off.
* VM_UNPAGED tells the core MM not to "manage" these pages
* (e.g. refcount, mapcount, try to swap them out): in
* particular, zap_pte_range does not try to free them.
*/ */
vma->vm_flags |= VM_IO | VM_RESERVED; vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED;
BUG_ON(addr >= end); BUG_ON(addr >= end);
pfn -= addr >> PAGE_SHIFT; pfn -= addr >> PAGE_SHIFT;
...@@ -1276,7 +1282,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1276,7 +1282,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t entry; pte_t entry;
int ret = VM_FAULT_MINOR; int ret = VM_FAULT_MINOR;
BUG_ON(vma->vm_flags & VM_RESERVED); BUG_ON(vma->vm_flags & VM_UNPAGED);
if (unlikely(!pfn_valid(pfn))) { if (unlikely(!pfn_valid(pfn))) {
/* /*
...@@ -1924,7 +1930,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1924,7 +1930,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter(mm, anon_rss); inc_mm_counter(mm, anon_rss);
lru_cache_add_active(new_page); lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address); page_add_anon_rmap(new_page, vma, address);
} else if (!(vma->vm_flags & VM_RESERVED)) { } else if (!(vma->vm_flags & VM_UNPAGED)) {
inc_mm_counter(mm, file_rss); inc_mm_counter(mm, file_rss);
page_add_file_rmap(new_page); page_add_file_rmap(new_page);
} }
...@@ -2203,7 +2209,7 @@ static int __init gate_vma_init(void) ...@@ -2203,7 +2209,7 @@ static int __init gate_vma_init(void)
gate_vma.vm_start = FIXADDR_USER_START; gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_page_prot = PAGE_READONLY; gate_vma.vm_page_prot = PAGE_READONLY;
gate_vma.vm_flags = VM_RESERVED; gate_vma.vm_flags = 0;
return 0; return 0;
} }
__initcall(gate_vma_init); __initcall(gate_vma_init);
......
...@@ -269,7 +269,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, ...@@ -269,7 +269,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
first = find_vma(mm, start); first = find_vma(mm, start);
if (!first) if (!first)
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
if (first->vm_flags & VM_RESERVED) if (first->vm_flags & VM_UNPAGED)
return ERR_PTR(-EACCES); return ERR_PTR(-EACCES);
prev = NULL; prev = NULL;
for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
......
...@@ -97,9 +97,9 @@ static void msync_page_range(struct vm_area_struct *vma, ...@@ -97,9 +97,9 @@ static void msync_page_range(struct vm_area_struct *vma,
/* For hugepages we can't go walking the page table normally, /* For hugepages we can't go walking the page table normally,
* but that's ok, hugetlbfs is memory based, so we don't need * but that's ok, hugetlbfs is memory based, so we don't need
* to do anything more on an msync(). * to do anything more on an msync().
* Can't do anything with VM_RESERVED regions either. * Can't do anything with VM_UNPAGED regions either.
*/ */
if (vma->vm_flags & (VM_HUGETLB|VM_RESERVED)) if (vma->vm_flags & (VM_HUGETLB|VM_UNPAGED))
return; return;
BUG_ON(addr >= end); BUG_ON(addr >= end);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment