Commit be1db475 authored by Daniel Axtens's avatar Daniel Axtens Committed by Linus Torvalds

mm/memory.c: add apply_to_existing_page_range() helper

apply_to_page_range() takes an address range, and if any parts of it are
not covered by the existing page table hierarchy, it allocates memory to
fill them in.

In some use cases, this is not what we want - we want to be able to
operate exclusively on PTEs that are already in the tables.

Add apply_to_existing_page_range() for this.  Adjust the walker
functions for apply_to_page_range to take 'create', which switches them
between the old and new modes.

This will be used in KASAN vmalloc.

[akpm@linux-foundation.org: reduce code duplication]
[akpm@linux-foundation.org: s/apply_to_existing_pages/apply_to_existing_page_range/]
[akpm@linux-foundation.org: initialize __apply_to_page_range::err]
Link: http://lkml.kernel.org/r/20191205140407.1874-1-dja@axtens.netSigned-off-by: default avatarDaniel Axtens <dja@axtens.net>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Daniel Axtens <dja@axtens.net>
Cc: Qian Cai <cai@lca.pw>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d98c9e83
...@@ -2621,6 +2621,9 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) ...@@ -2621,6 +2621,9 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data); unsigned long size, pte_fn_t fn, void *data);
extern int apply_to_existing_page_range(struct mm_struct *mm,
unsigned long address, unsigned long size,
pte_fn_t fn, void *data);
#ifdef CONFIG_PAGE_POISONING #ifdef CONFIG_PAGE_POISONING
extern bool page_poisoning_enabled(void); extern bool page_poisoning_enabled(void);
......
...@@ -2021,26 +2021,34 @@ EXPORT_SYMBOL(vm_iomap_memory); ...@@ -2021,26 +2021,34 @@ EXPORT_SYMBOL(vm_iomap_memory);
static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
pte_fn_t fn, void *data) pte_fn_t fn, void *data, bool create)
{ {
pte_t *pte; pte_t *pte;
int err; int err = 0;
spinlock_t *uninitialized_var(ptl); spinlock_t *uninitialized_var(ptl);
if (create) {
pte = (mm == &init_mm) ? pte = (mm == &init_mm) ?
pte_alloc_kernel(pmd, addr) : pte_alloc_kernel(pmd, addr) :
pte_alloc_map_lock(mm, pmd, addr, &ptl); pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte) if (!pte)
return -ENOMEM; return -ENOMEM;
} else {
pte = (mm == &init_mm) ?
pte_offset_kernel(pmd, addr) :
pte_offset_map_lock(mm, pmd, addr, &ptl);
}
BUG_ON(pmd_huge(*pmd)); BUG_ON(pmd_huge(*pmd));
arch_enter_lazy_mmu_mode(); arch_enter_lazy_mmu_mode();
do { do {
if (create || !pte_none(*pte)) {
err = fn(pte++, addr, data); err = fn(pte++, addr, data);
if (err) if (err)
break; break;
}
} while (addr += PAGE_SIZE, addr != end); } while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode(); arch_leave_lazy_mmu_mode();
...@@ -2052,77 +2060,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, ...@@ -2052,77 +2060,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
pte_fn_t fn, void *data) pte_fn_t fn, void *data, bool create)
{ {
pmd_t *pmd; pmd_t *pmd;
unsigned long next; unsigned long next;
int err; int err = 0;
BUG_ON(pud_huge(*pud)); BUG_ON(pud_huge(*pud));
if (create) {
pmd = pmd_alloc(mm, pud, addr); pmd = pmd_alloc(mm, pud, addr);
if (!pmd) if (!pmd)
return -ENOMEM; return -ENOMEM;
} else {
pmd = pmd_offset(pud, addr);
}
do { do {
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
err = apply_to_pte_range(mm, pmd, addr, next, fn, data); if (create || !pmd_none_or_clear_bad(pmd)) {
err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
create);
if (err) if (err)
break; break;
}
} while (pmd++, addr = next, addr != end); } while (pmd++, addr = next, addr != end);
return err; return err;
} }
static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
pte_fn_t fn, void *data) pte_fn_t fn, void *data, bool create)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
int err; int err = 0;
if (create) {
pud = pud_alloc(mm, p4d, addr); pud = pud_alloc(mm, p4d, addr);
if (!pud) if (!pud)
return -ENOMEM; return -ENOMEM;
} else {
pud = pud_offset(p4d, addr);
}
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
err = apply_to_pmd_range(mm, pud, addr, next, fn, data); if (create || !pud_none_or_clear_bad(pud)) {
err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
create);
if (err) if (err)
break; break;
}
} while (pud++, addr = next, addr != end); } while (pud++, addr = next, addr != end);
return err; return err;
} }
static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
pte_fn_t fn, void *data) pte_fn_t fn, void *data, bool create)
{ {
p4d_t *p4d; p4d_t *p4d;
unsigned long next; unsigned long next;
int err; int err = 0;
if (create) {
p4d = p4d_alloc(mm, pgd, addr); p4d = p4d_alloc(mm, pgd, addr);
if (!p4d) if (!p4d)
return -ENOMEM; return -ENOMEM;
} else {
p4d = p4d_offset(pgd, addr);
}
do { do {
next = p4d_addr_end(addr, end); next = p4d_addr_end(addr, end);
err = apply_to_pud_range(mm, p4d, addr, next, fn, data); if (create || !p4d_none_or_clear_bad(p4d)) {
err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
create);
if (err) if (err)
break; break;
}
} while (p4d++, addr = next, addr != end); } while (p4d++, addr = next, addr != end);
return err; return err;
} }
/* static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
* Scan a region of virtual memory, filling in page tables as necessary unsigned long size, pte_fn_t fn,
* and calling a provided function on each leaf page table. void *data, bool create)
*/
int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
unsigned long size, pte_fn_t fn, void *data)
{ {
pgd_t *pgd; pgd_t *pgd;
unsigned long next; unsigned long next;
unsigned long end = addr + size; unsigned long end = addr + size;
int err; int err = 0;
if (WARN_ON(addr >= end)) if (WARN_ON(addr >= end))
return -EINVAL; return -EINVAL;
...@@ -2130,15 +2156,41 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, ...@@ -2130,15 +2156,41 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); if (!create && pgd_none_or_clear_bad(pgd))
continue;
err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create);
if (err) if (err)
break; break;
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
return err; return err;
} }
/*
* Scan a region of virtual memory, filling in page tables as necessary
* and calling a provided function on each leaf page table.
*/
int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
unsigned long size, pte_fn_t fn, void *data)
{
return __apply_to_page_range(mm, addr, size, fn, data, true);
}
EXPORT_SYMBOL_GPL(apply_to_page_range); EXPORT_SYMBOL_GPL(apply_to_page_range);
/*
* Scan a region of virtual memory, calling a provided function on
* each leaf page table where it exists.
*
* Unlike apply_to_page_range, this does _not_ fill in page tables
* where they are absent.
*/
int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr,
unsigned long size, pte_fn_t fn, void *data)
{
return __apply_to_page_range(mm, addr, size, fn, data, false);
}
EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
/* /*
* handle_pte_fault chooses page fault handler according to an entry which was * handle_pte_fault chooses page fault handler according to an entry which was
* read non-atomically. Before making any commitment, on those architectures * read non-atomically. Before making any commitment, on those architectures
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment