Commit 053b534a authored by Peter Feiner's avatar Peter Feiner Committed by Luis Henriques

mm: softdirty: unmapped addresses between VMAs are clean

commit 81d0fa62 upstream.

If a /proc/pid/pagemap read spans a [VMA, an unmapped region, then a
VM_SOFTDIRTY VMA], the virtual pages in the unmapped region are reported
as softdirty.  Here's a program to demonstrate the bug:

int main() {
	const uint64_t PAGEMAP_SOFTDIRTY = 1ul << 55;
	uint64_t pme[3];
	int fd = open("/proc/self/pagemap", O_RDONLY);;
	char *m = mmap(NULL, 3 * getpagesize(), PROT_READ,
	               MAP_ANONYMOUS | MAP_SHARED, -1, 0);
	munmap(m + getpagesize(), getpagesize());
	pread(fd, pme, 24, (unsigned long) m / getpagesize() * 8);
	assert(pme[0] & PAGEMAP_SOFTDIRTY);    /* passes */
	assert(!(pme[1] & PAGEMAP_SOFTDIRTY)); /* fails */
	assert(pme[2] & PAGEMAP_SOFTDIRTY);    /* passes */
	return 0;
}

(Note that all pages in new VMAs are softdirty until cleared).

Tested:
	Used the program given above. I'm going to include this code in
	a selftest in the future.

[n-horiguchi@ah.jp.nec.com: prevent pagemap_pte_range() from overrunning]
Signed-off-by: default avatarPeter Feiner <pfeiner@google.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Jamie Liu <jamieliu@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
[ luis: 3.16-stable prereq for:
  05fbf357 "proc/pagemap: walk page tables under pte lock" ]
Signed-off-by: default avatarLuis Henriques <luis.henriques@canonical.com>
parent 4b75fa0b
...@@ -1005,7 +1005,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -1005,7 +1005,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
spinlock_t *ptl; spinlock_t *ptl;
pte_t *pte; pte_t *pte;
int err = 0; int err = 0;
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
/* find the first VMA at or above 'addr' */ /* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr); vma = find_vma(walk->mm, addr);
...@@ -1019,6 +1018,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -1019,6 +1018,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
for (; addr != end; addr += PAGE_SIZE) { for (; addr != end; addr += PAGE_SIZE) {
unsigned long offset; unsigned long offset;
pagemap_entry_t pme;
offset = (addr & ~PAGEMAP_WALK_MASK) >> offset = (addr & ~PAGEMAP_WALK_MASK) >>
PAGE_SHIFT; PAGE_SHIFT;
...@@ -1033,34 +1033,53 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -1033,34 +1033,53 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (pmd_trans_unstable(pmd)) if (pmd_trans_unstable(pmd))
return 0; return 0;
for (; addr != end; addr += PAGE_SIZE) {
int flags2;
/* check to see if we've left 'vma' behind while (1) {
* and need a new, higher one */ /* End of address space hole, which we mark as non-present. */
if (vma && (addr >= vma->vm_end)) { unsigned long hole_end;
vma = find_vma(walk->mm, addr);
if (vma && (vma->vm_flags & VM_SOFTDIRTY)) if (vma)
flags2 = __PM_SOFT_DIRTY; hole_end = min(end, vma->vm_start);
else else
flags2 = 0; hole_end = end;
pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
for (; addr < hole_end; addr += PAGE_SIZE) {
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
} }
/* check that 'vma' actually covers this address, if (!vma || vma->vm_start >= end)
* and that it isn't a huge page vma */ break;
if (vma && (vma->vm_start <= addr) && /*
!is_vm_hugetlb_page(vma)) { * We can't possibly be in a hugetlb VMA. In general,
* for a mm_walk with a pmd_entry and a hugetlb_entry,
* the pmd_entry can only be called on addresses in a
* hugetlb if the walk starts in a non-hugetlb VMA and
* spans a hugepage VMA. Since pagemap_read walks are
* PMD-sized and PMD-aligned, this will never be true.
*/
BUG_ON(is_vm_hugetlb_page(vma));
/* Addresses in the VMA. */
for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
pagemap_entry_t pme;
pte = pte_offset_map(pmd, addr); pte = pte_offset_map(pmd, addr);
pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
/* unmap before userspace copy */
pte_unmap(pte); pte_unmap(pte);
}
err = add_to_pagemap(addr, &pme, pm); err = add_to_pagemap(addr, &pme, pm);
if (err) if (err)
return err; return err;
} }
if (addr == end)
break;
vma = find_vma(walk->mm, addr);
}
cond_resched(); cond_resched();
return err; return err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment