Commit 7b86ac33 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jason Gunthorpe

pagewalk: separate function pointers from iterator data

The mm_walk structure currently mixed data and code.  Split out the
operations vectors into a new mm_walk_ops structure, and while we are
changing the API also declare the mm_walk structure inside the
walk_page_range and walk_page_vma functions.

Based on patch from Linus Torvalds.

Link: https://lore.kernel.org/r/20190828141955.22210-3-hch@lst.deSigned-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarThomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: default avatarSteven Price <steven.price@arm.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent a520110e
...@@ -44,6 +44,10 @@ page_set_nocache(pte_t *pte, unsigned long addr, ...@@ -44,6 +44,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
return 0; return 0;
} }
static const struct mm_walk_ops set_nocache_walk_ops = {
.pte_entry = page_set_nocache,
};
static int static int
page_clear_nocache(pte_t *pte, unsigned long addr, page_clear_nocache(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk) unsigned long next, struct mm_walk *walk)
...@@ -59,6 +63,10 @@ page_clear_nocache(pte_t *pte, unsigned long addr, ...@@ -59,6 +63,10 @@ page_clear_nocache(pte_t *pte, unsigned long addr,
return 0; return 0;
} }
static const struct mm_walk_ops clear_nocache_walk_ops = {
.pte_entry = page_clear_nocache,
};
/* /*
* Alloc "coherent" memory, which for OpenRISC means simply uncached. * Alloc "coherent" memory, which for OpenRISC means simply uncached.
* *
...@@ -81,10 +89,6 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, ...@@ -81,10 +89,6 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
{ {
unsigned long va; unsigned long va;
void *page; void *page;
struct mm_walk walk = {
.pte_entry = page_set_nocache,
.mm = &init_mm
};
page = alloc_pages_exact(size, gfp | __GFP_ZERO); page = alloc_pages_exact(size, gfp | __GFP_ZERO);
if (!page) if (!page)
...@@ -99,7 +103,8 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, ...@@ -99,7 +103,8 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* We need to iterate through the pages, clearing the dcache for * We need to iterate through the pages, clearing the dcache for
* them and setting the cache-inhibit bit. * them and setting the cache-inhibit bit.
*/ */
if (walk_page_range(va, va + size, &walk)) { if (walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops,
NULL)) {
free_pages_exact(page, size); free_pages_exact(page, size);
return NULL; return NULL;
} }
...@@ -112,13 +117,10 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr, ...@@ -112,13 +117,10 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs) dma_addr_t dma_handle, unsigned long attrs)
{ {
unsigned long va = (unsigned long)vaddr; unsigned long va = (unsigned long)vaddr;
struct mm_walk walk = {
.pte_entry = page_clear_nocache,
.mm = &init_mm
};
/* walk_page_range shouldn't be able to fail here */ /* walk_page_range shouldn't be able to fail here */
WARN_ON(walk_page_range(va, va + size, &walk)); WARN_ON(walk_page_range(&init_mm, va, va + size,
&clear_nocache_walk_ops, NULL));
free_pages_exact(vaddr, size); free_pages_exact(vaddr, size);
} }
......
...@@ -139,14 +139,14 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr, ...@@ -139,14 +139,14 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
return 0; return 0;
} }
static const struct mm_walk_ops subpage_walk_ops = {
.pmd_entry = subpage_walk_pmd_entry,
};
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
unsigned long len) unsigned long len)
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct mm_walk subpage_proto_walk = {
.mm = mm,
.pmd_entry = subpage_walk_pmd_entry,
};
/* /*
* We don't try too hard, we just mark all the vma in that range * We don't try too hard, we just mark all the vma in that range
...@@ -163,7 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, ...@@ -163,7 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
if (vma->vm_start >= (addr + len)) if (vma->vm_start >= (addr + len))
break; break;
vma->vm_flags |= VM_NOHUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE;
walk_page_vma(vma, &subpage_proto_walk); walk_page_vma(vma, &subpage_walk_ops, NULL);
vma = vma->vm_next; vma = vma->vm_next;
} }
} }
......
...@@ -2521,13 +2521,9 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start, ...@@ -2521,13 +2521,9 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
return 0; return 0;
} }
static inline void zap_zero_pages(struct mm_struct *mm) static const struct mm_walk_ops zap_zero_walk_ops = {
{ .pmd_entry = __zap_zero_pages,
struct mm_walk walk = { .pmd_entry = __zap_zero_pages }; };
walk.mm = mm;
walk_page_range(0, TASK_SIZE, &walk);
}
/* /*
* switch on pgstes for its userspace process (for kvm) * switch on pgstes for its userspace process (for kvm)
...@@ -2546,7 +2542,7 @@ int s390_enable_sie(void) ...@@ -2546,7 +2542,7 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1; mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */ /* split thp mappings and disable thp for future mappings */
thp_split_mm(mm); thp_split_mm(mm);
zap_zero_pages(mm); walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
return 0; return 0;
} }
...@@ -2589,12 +2585,13 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, ...@@ -2589,12 +2585,13 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
return 0; return 0;
} }
int s390_enable_skey(void) static const struct mm_walk_ops enable_skey_walk_ops = {
{
struct mm_walk walk = {
.hugetlb_entry = __s390_enable_skey_hugetlb, .hugetlb_entry = __s390_enable_skey_hugetlb,
.pte_entry = __s390_enable_skey_pte, .pte_entry = __s390_enable_skey_pte,
}; };
int s390_enable_skey(void)
{
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
int rc = 0; int rc = 0;
...@@ -2614,8 +2611,7 @@ int s390_enable_skey(void) ...@@ -2614,8 +2611,7 @@ int s390_enable_skey(void)
} }
mm->def_flags &= ~VM_MERGEABLE; mm->def_flags &= ~VM_MERGEABLE;
walk.mm = mm; walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
walk_page_range(0, TASK_SIZE, &walk);
out_up: out_up:
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
...@@ -2633,13 +2629,14 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr, ...@@ -2633,13 +2629,14 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
return 0; return 0;
} }
static const struct mm_walk_ops reset_cmma_walk_ops = {
.pte_entry = __s390_reset_cmma,
};
void s390_reset_cmma(struct mm_struct *mm) void s390_reset_cmma(struct mm_struct *mm)
{ {
struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
down_write(&mm->mmap_sem); down_write(&mm->mmap_sem);
walk.mm = mm; walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
walk_page_range(0, TASK_SIZE, &walk);
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
} }
EXPORT_SYMBOL_GPL(s390_reset_cmma); EXPORT_SYMBOL_GPL(s390_reset_cmma);
...@@ -513,7 +513,9 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end, ...@@ -513,7 +513,9 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end,
return 0; return 0;
} }
#endif #else
#define smaps_pte_hole NULL
#endif /* CONFIG_SHMEM */
static void smaps_pte_entry(pte_t *pte, unsigned long addr, static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct mm_walk *walk) struct mm_walk *walk)
...@@ -729,21 +731,24 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, ...@@ -729,21 +731,24 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
} }
return 0; return 0;
} }
#else
#define smaps_hugetlb_range NULL
#endif /* HUGETLB_PAGE */ #endif /* HUGETLB_PAGE */
static void smap_gather_stats(struct vm_area_struct *vma, static const struct mm_walk_ops smaps_walk_ops = {
struct mem_size_stats *mss)
{
struct mm_walk smaps_walk = {
.pmd_entry = smaps_pte_range, .pmd_entry = smaps_pte_range,
#ifdef CONFIG_HUGETLB_PAGE
.hugetlb_entry = smaps_hugetlb_range, .hugetlb_entry = smaps_hugetlb_range,
#endif };
.mm = vma->vm_mm,
};
smaps_walk.private = mss; static const struct mm_walk_ops smaps_shmem_walk_ops = {
.pmd_entry = smaps_pte_range,
.hugetlb_entry = smaps_hugetlb_range,
.pte_hole = smaps_pte_hole,
};
static void smap_gather_stats(struct vm_area_struct *vma,
struct mem_size_stats *mss)
{
#ifdef CONFIG_SHMEM #ifdef CONFIG_SHMEM
/* In case of smaps_rollup, reset the value from previous vma */ /* In case of smaps_rollup, reset the value from previous vma */
mss->check_shmem_swap = false; mss->check_shmem_swap = false;
...@@ -765,12 +770,13 @@ static void smap_gather_stats(struct vm_area_struct *vma, ...@@ -765,12 +770,13 @@ static void smap_gather_stats(struct vm_area_struct *vma,
mss->swap += shmem_swapped; mss->swap += shmem_swapped;
} else { } else {
mss->check_shmem_swap = true; mss->check_shmem_swap = true;
smaps_walk.pte_hole = smaps_pte_hole; walk_page_vma(vma, &smaps_shmem_walk_ops, mss);
return;
} }
} }
#endif #endif
/* mmap_sem is held in m_start */ /* mmap_sem is held in m_start */
walk_page_vma(vma, &smaps_walk); walk_page_vma(vma, &smaps_walk_ops, mss);
} }
#define SEQ_PUT_DEC(str, val) \ #define SEQ_PUT_DEC(str, val) \
...@@ -1118,6 +1124,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end, ...@@ -1118,6 +1124,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
return 0; return 0;
} }
static const struct mm_walk_ops clear_refs_walk_ops = {
.pmd_entry = clear_refs_pte_range,
.test_walk = clear_refs_test_walk,
};
static ssize_t clear_refs_write(struct file *file, const char __user *buf, static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
...@@ -1151,12 +1162,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, ...@@ -1151,12 +1162,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
struct clear_refs_private cp = { struct clear_refs_private cp = {
.type = type, .type = type,
}; };
struct mm_walk clear_refs_walk = {
.pmd_entry = clear_refs_pte_range,
.test_walk = clear_refs_test_walk,
.mm = mm,
.private = &cp,
};
if (type == CLEAR_REFS_MM_HIWATER_RSS) { if (type == CLEAR_REFS_MM_HIWATER_RSS) {
if (down_write_killable(&mm->mmap_sem)) { if (down_write_killable(&mm->mmap_sem)) {
...@@ -1217,7 +1222,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, ...@@ -1217,7 +1222,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
0, NULL, mm, 0, -1UL); 0, NULL, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range); mmu_notifier_invalidate_range_start(&range);
} }
walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
&cp);
if (type == CLEAR_REFS_SOFT_DIRTY) if (type == CLEAR_REFS_SOFT_DIRTY)
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, 0, -1); tlb_finish_mmu(&tlb, 0, -1);
...@@ -1489,8 +1495,16 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, ...@@ -1489,8 +1495,16 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
return err; return err;
} }
#else
#define pagemap_hugetlb_range NULL
#endif /* HUGETLB_PAGE */ #endif /* HUGETLB_PAGE */
static const struct mm_walk_ops pagemap_ops = {
.pmd_entry = pagemap_pmd_range,
.pte_hole = pagemap_pte_hole,
.hugetlb_entry = pagemap_hugetlb_range,
};
/* /*
* /proc/pid/pagemap - an array mapping virtual pages to pfns * /proc/pid/pagemap - an array mapping virtual pages to pfns
* *
...@@ -1522,7 +1536,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ...@@ -1522,7 +1536,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
{ {
struct mm_struct *mm = file->private_data; struct mm_struct *mm = file->private_data;
struct pagemapread pm; struct pagemapread pm;
struct mm_walk pagemap_walk = {};
unsigned long src; unsigned long src;
unsigned long svpfn; unsigned long svpfn;
unsigned long start_vaddr; unsigned long start_vaddr;
...@@ -1550,14 +1563,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ...@@ -1550,14 +1563,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!pm.buffer) if (!pm.buffer)
goto out_mm; goto out_mm;
pagemap_walk.pmd_entry = pagemap_pmd_range;
pagemap_walk.pte_hole = pagemap_pte_hole;
#ifdef CONFIG_HUGETLB_PAGE
pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
#endif
pagemap_walk.mm = mm;
pagemap_walk.private = &pm;
src = *ppos; src = *ppos;
svpfn = src / PM_ENTRY_BYTES; svpfn = src / PM_ENTRY_BYTES;
start_vaddr = svpfn << PAGE_SHIFT; start_vaddr = svpfn << PAGE_SHIFT;
...@@ -1586,7 +1591,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ...@@ -1586,7 +1591,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
ret = down_read_killable(&mm->mmap_sem); ret = down_read_killable(&mm->mmap_sem);
if (ret) if (ret)
goto out_free; goto out_free;
ret = walk_page_range(start_vaddr, end, &pagemap_walk); ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
start_vaddr = end; start_vaddr = end;
...@@ -1798,6 +1803,11 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, ...@@ -1798,6 +1803,11 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
} }
#endif #endif
static const struct mm_walk_ops show_numa_ops = {
.hugetlb_entry = gather_hugetlb_stats,
.pmd_entry = gather_pte_stats,
};
/* /*
* Display pages allocated per node and memory policy via /proc. * Display pages allocated per node and memory policy via /proc.
*/ */
...@@ -1809,12 +1819,6 @@ static int show_numa_map(struct seq_file *m, void *v) ...@@ -1809,12 +1819,6 @@ static int show_numa_map(struct seq_file *m, void *v)
struct numa_maps *md = &numa_priv->md; struct numa_maps *md = &numa_priv->md;
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct mm_walk walk = {
.hugetlb_entry = gather_hugetlb_stats,
.pmd_entry = gather_pte_stats,
.private = md,
.mm = mm,
};
struct mempolicy *pol; struct mempolicy *pol;
char buffer[64]; char buffer[64];
int nid; int nid;
...@@ -1848,7 +1852,7 @@ static int show_numa_map(struct seq_file *m, void *v) ...@@ -1848,7 +1852,7 @@ static int show_numa_map(struct seq_file *m, void *v)
seq_puts(m, " huge"); seq_puts(m, " huge");
/* mmap_sem is held by m_start */ /* mmap_sem is held by m_start */
walk_page_vma(vma, &walk); walk_page_vma(vma, &show_numa_ops, md);
if (!md->pages) if (!md->pages)
goto out; goto out;
......
...@@ -4,8 +4,10 @@ ...@@ -4,8 +4,10 @@
#include <linux/mm.h> #include <linux/mm.h>
struct mm_walk;
/** /**
* mm_walk - callbacks for walk_page_range * mm_walk_ops - callbacks for walk_page_range
* @pud_entry: if set, called for each non-empty PUD (2nd-level) entry * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
* this handler should only handle pud_trans_huge() puds. * this handler should only handle pud_trans_huge() puds.
* the pmd_entry or pte_entry callbacks will be used for * the pmd_entry or pte_entry callbacks will be used for
...@@ -18,17 +20,12 @@ ...@@ -18,17 +20,12 @@
* @pte_hole: if set, called for each hole at all levels * @pte_hole: if set, called for each hole at all levels
* @hugetlb_entry: if set, called for each hugetlb entry * @hugetlb_entry: if set, called for each hugetlb entry
* @test_walk: caller specific callback function to determine whether * @test_walk: caller specific callback function to determine whether
* we walk over the current vma or not. Returning 0 * we walk over the current vma or not. Returning 0 means
* value means "do page table walk over the current vma," * "do page table walk over the current vma", returning
* and a negative one means "abort current page table walk * a negative value means "abort current page table walk
* right now." 1 means "skip the current vma." * right now" and returning 1 means "skip the current vma"
* @mm: mm_struct representing the target process of page table walk
* @vma: vma currently walked (NULL if walking outside vmas)
* @private: private data for callbacks' usage
*
* (see the comment on walk_page_range() for more details)
*/ */
struct mm_walk { struct mm_walk_ops {
int (*pud_entry)(pud_t *pud, unsigned long addr, int (*pud_entry)(pud_t *pud, unsigned long addr,
unsigned long next, struct mm_walk *walk); unsigned long next, struct mm_walk *walk);
int (*pmd_entry)(pmd_t *pmd, unsigned long addr, int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
...@@ -42,13 +39,28 @@ struct mm_walk { ...@@ -42,13 +39,28 @@ struct mm_walk {
struct mm_walk *walk); struct mm_walk *walk);
int (*test_walk)(unsigned long addr, unsigned long next, int (*test_walk)(unsigned long addr, unsigned long next,
struct mm_walk *walk); struct mm_walk *walk);
};
/**
* mm_walk - walk_page_range data
* @ops: operation to call during the walk
* @mm: mm_struct representing the target process of page table walk
* @vma: vma currently walked (NULL if walking outside vmas)
* @private: private data for callbacks' usage
*
* (see the comment on walk_page_range() for more details)
*/
struct mm_walk {
const struct mm_walk_ops *ops;
struct mm_struct *mm; struct mm_struct *mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
void *private; void *private;
}; };
int walk_page_range(unsigned long addr, unsigned long end, int walk_page_range(struct mm_struct *mm, unsigned long start,
struct mm_walk *walk); unsigned long end, const struct mm_walk_ops *ops,
int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk); void *private);
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private);
#endif /* _LINUX_PAGEWALK_H */ #endif /* _LINUX_PAGEWALK_H */
...@@ -852,6 +852,13 @@ void hmm_range_unregister(struct hmm_range *range) ...@@ -852,6 +852,13 @@ void hmm_range_unregister(struct hmm_range *range)
} }
EXPORT_SYMBOL(hmm_range_unregister); EXPORT_SYMBOL(hmm_range_unregister);
static const struct mm_walk_ops hmm_walk_ops = {
.pud_entry = hmm_vma_walk_pud,
.pmd_entry = hmm_vma_walk_pmd,
.pte_hole = hmm_vma_walk_hole,
.hugetlb_entry = hmm_vma_walk_hugetlb_entry,
};
/** /**
* hmm_range_fault - try to fault some address in a virtual address range * hmm_range_fault - try to fault some address in a virtual address range
* @range: range being faulted * @range: range being faulted
...@@ -887,7 +894,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) ...@@ -887,7 +894,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
struct hmm_vma_walk hmm_vma_walk; struct hmm_vma_walk hmm_vma_walk;
struct hmm *hmm = range->hmm; struct hmm *hmm = range->hmm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct mm_walk mm_walk;
int ret; int ret;
lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem); lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem);
...@@ -916,21 +922,14 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) ...@@ -916,21 +922,14 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
hmm_vma_walk.last = start; hmm_vma_walk.last = start;
hmm_vma_walk.flags = flags; hmm_vma_walk.flags = flags;
hmm_vma_walk.range = range; hmm_vma_walk.range = range;
mm_walk.private = &hmm_vma_walk;
end = min(range->end, vma->vm_end); end = min(range->end, vma->vm_end);
mm_walk.vma = vma; walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops,
mm_walk.mm = vma->vm_mm; &hmm_vma_walk);
mm_walk.pte_entry = NULL;
mm_walk.test_walk = NULL;
mm_walk.hugetlb_entry = NULL;
mm_walk.pud_entry = hmm_vma_walk_pud;
mm_walk.pmd_entry = hmm_vma_walk_pmd;
mm_walk.pte_hole = hmm_vma_walk_hole;
mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
do { do {
ret = walk_page_range(start, end, &mm_walk); ret = walk_page_range(vma->vm_mm, start, end,
&hmm_walk_ops, &hmm_vma_walk);
start = hmm_vma_walk.last; start = hmm_vma_walk.last;
/* Keep trying while the range is valid. */ /* Keep trying while the range is valid. */
......
...@@ -226,19 +226,9 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, ...@@ -226,19 +226,9 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
return 0; return 0;
} }
static void force_swapin_readahead(struct vm_area_struct *vma, static const struct mm_walk_ops swapin_walk_ops = {
unsigned long start, unsigned long end)
{
struct mm_walk walk = {
.mm = vma->vm_mm,
.pmd_entry = swapin_walk_pmd_entry, .pmd_entry = swapin_walk_pmd_entry,
.private = vma, };
};
walk_page_range(start, end, &walk);
lru_add_drain(); /* Push any new pages onto the LRU now */
}
static void force_shm_swapin_readahead(struct vm_area_struct *vma, static void force_shm_swapin_readahead(struct vm_area_struct *vma,
unsigned long start, unsigned long end, unsigned long start, unsigned long end,
...@@ -280,7 +270,8 @@ static long madvise_willneed(struct vm_area_struct *vma, ...@@ -280,7 +270,8 @@ static long madvise_willneed(struct vm_area_struct *vma,
*prev = vma; *prev = vma;
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
if (!file) { if (!file) {
force_swapin_readahead(vma, start, end); walk_page_range(vma->vm_mm, start, end, &swapin_walk_ops, vma);
lru_add_drain(); /* Push any new pages onto the LRU now */
return 0; return 0;
} }
...@@ -441,20 +432,9 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -441,20 +432,9 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
return 0; return 0;
} }
static void madvise_free_page_range(struct mmu_gather *tlb, static const struct mm_walk_ops madvise_free_walk_ops = {
struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
struct mm_walk free_walk = {
.pmd_entry = madvise_free_pte_range, .pmd_entry = madvise_free_pte_range,
.mm = vma->vm_mm, };
.private = tlb,
};
tlb_start_vma(tlb, vma);
walk_page_range(addr, end, &free_walk);
tlb_end_vma(tlb, vma);
}
static int madvise_free_single_vma(struct vm_area_struct *vma, static int madvise_free_single_vma(struct vm_area_struct *vma,
unsigned long start_addr, unsigned long end_addr) unsigned long start_addr, unsigned long end_addr)
...@@ -481,7 +461,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, ...@@ -481,7 +461,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
update_hiwater_rss(mm); update_hiwater_rss(mm);
mmu_notifier_invalidate_range_start(&range); mmu_notifier_invalidate_range_start(&range);
madvise_free_page_range(&tlb, vma, range.start, range.end); tlb_start_vma(&tlb, vma);
walk_page_range(vma->vm_mm, range.start, range.end,
&madvise_free_walk_ops, &tlb);
tlb_end_vma(&tlb, vma);
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, range.start, range.end); tlb_finish_mmu(&tlb, range.start, range.end);
......
...@@ -5283,17 +5283,16 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, ...@@ -5283,17 +5283,16 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
return 0; return 0;
} }
static const struct mm_walk_ops precharge_walk_ops = {
.pmd_entry = mem_cgroup_count_precharge_pte_range,
};
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
{ {
unsigned long precharge; unsigned long precharge;
struct mm_walk mem_cgroup_count_precharge_walk = {
.pmd_entry = mem_cgroup_count_precharge_pte_range,
.mm = mm,
};
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
walk_page_range(0, mm->highest_vm_end, walk_page_range(mm, 0, mm->highest_vm_end, &precharge_walk_ops, NULL);
&mem_cgroup_count_precharge_walk);
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
precharge = mc.precharge; precharge = mc.precharge;
...@@ -5562,13 +5561,12 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, ...@@ -5562,13 +5561,12 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
return ret; return ret;
} }
static void mem_cgroup_move_charge(void) static const struct mm_walk_ops charge_walk_ops = {
{
struct mm_walk mem_cgroup_move_charge_walk = {
.pmd_entry = mem_cgroup_move_charge_pte_range, .pmd_entry = mem_cgroup_move_charge_pte_range,
.mm = mc.mm, };
};
static void mem_cgroup_move_charge(void)
{
lru_add_drain_all(); lru_add_drain_all();
/* /*
* Signal lock_page_memcg() to take the memcg's move_lock * Signal lock_page_memcg() to take the memcg's move_lock
...@@ -5594,7 +5592,8 @@ static void mem_cgroup_move_charge(void) ...@@ -5594,7 +5592,8 @@ static void mem_cgroup_move_charge(void)
* When we have consumed all precharges and failed in doing * When we have consumed all precharges and failed in doing
* additional charge, the page walk just aborts. * additional charge, the page walk just aborts.
*/ */
walk_page_range(0, mc.mm->highest_vm_end, &mem_cgroup_move_charge_walk); walk_page_range(mc.mm, 0, mc.mm->highest_vm_end, &charge_walk_ops,
NULL);
up_read(&mc.mm->mmap_sem); up_read(&mc.mm->mmap_sem);
atomic_dec(&mc.from->moving_account); atomic_dec(&mc.from->moving_account);
......
...@@ -655,6 +655,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, ...@@ -655,6 +655,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
return 1; return 1;
} }
static const struct mm_walk_ops queue_pages_walk_ops = {
.hugetlb_entry = queue_pages_hugetlb,
.pmd_entry = queue_pages_pte_range,
.test_walk = queue_pages_test_walk,
};
/* /*
* Walk through page tables and collect pages to be migrated. * Walk through page tables and collect pages to be migrated.
* *
...@@ -679,15 +685,8 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, ...@@ -679,15 +685,8 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
.nmask = nodes, .nmask = nodes,
.prev = NULL, .prev = NULL,
}; };
struct mm_walk queue_pages_walk = {
.hugetlb_entry = queue_pages_hugetlb,
.pmd_entry = queue_pages_pte_range,
.test_walk = queue_pages_test_walk,
.mm = mm,
.private = &qp,
};
return walk_page_range(start, end, &queue_pages_walk); return walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
} }
/* /*
......
...@@ -2320,6 +2320,11 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, ...@@ -2320,6 +2320,11 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
return 0; return 0;
} }
static const struct mm_walk_ops migrate_vma_walk_ops = {
.pmd_entry = migrate_vma_collect_pmd,
.pte_hole = migrate_vma_collect_hole,
};
/* /*
* migrate_vma_collect() - collect pages over a range of virtual addresses * migrate_vma_collect() - collect pages over a range of virtual addresses
* @migrate: migrate struct containing all migration information * @migrate: migrate struct containing all migration information
...@@ -2331,21 +2336,15 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, ...@@ -2331,21 +2336,15 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
static void migrate_vma_collect(struct migrate_vma *migrate) static void migrate_vma_collect(struct migrate_vma *migrate)
{ {
struct mmu_notifier_range range; struct mmu_notifier_range range;
struct mm_walk mm_walk = {
.pmd_entry = migrate_vma_collect_pmd,
.pte_hole = migrate_vma_collect_hole,
.vma = migrate->vma,
.mm = migrate->vma->vm_mm,
.private = migrate,
};
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm_walk.mm, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL,
migrate->start, migrate->vma->vm_mm, migrate->start, migrate->end);
migrate->end);
mmu_notifier_invalidate_range_start(&range); mmu_notifier_invalidate_range_start(&range);
walk_page_range(migrate->start, migrate->end, &mm_walk);
mmu_notifier_invalidate_range_end(&range);
walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
&migrate_vma_walk_ops, migrate);
mmu_notifier_invalidate_range_end(&range);
migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT); migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
} }
......
...@@ -193,6 +193,12 @@ static inline bool can_do_mincore(struct vm_area_struct *vma) ...@@ -193,6 +193,12 @@ static inline bool can_do_mincore(struct vm_area_struct *vma)
inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
} }
static const struct mm_walk_ops mincore_walk_ops = {
.pmd_entry = mincore_pte_range,
.pte_hole = mincore_unmapped_range,
.hugetlb_entry = mincore_hugetlb,
};
/* /*
* Do a chunk of "sys_mincore()". We've already checked * Do a chunk of "sys_mincore()". We've already checked
* all the arguments, we hold the mmap semaphore: we should * all the arguments, we hold the mmap semaphore: we should
...@@ -203,12 +209,6 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v ...@@ -203,12 +209,6 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long end; unsigned long end;
int err; int err;
struct mm_walk mincore_walk = {
.pmd_entry = mincore_pte_range,
.pte_hole = mincore_unmapped_range,
.hugetlb_entry = mincore_hugetlb,
.private = vec,
};
vma = find_vma(current->mm, addr); vma = find_vma(current->mm, addr);
if (!vma || addr < vma->vm_start) if (!vma || addr < vma->vm_start)
...@@ -219,8 +219,7 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v ...@@ -219,8 +219,7 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
memset(vec, 1, pages); memset(vec, 1, pages);
return pages; return pages;
} }
mincore_walk.mm = vma->vm_mm; err = walk_page_range(vma->vm_mm, addr, end, &mincore_walk_ops, vec);
err = walk_page_range(addr, end, &mincore_walk);
if (err < 0) if (err < 0)
return err; return err;
return (end - addr) >> PAGE_SHIFT; return (end - addr) >> PAGE_SHIFT;
......
...@@ -329,20 +329,11 @@ static int prot_none_test(unsigned long addr, unsigned long next, ...@@ -329,20 +329,11 @@ static int prot_none_test(unsigned long addr, unsigned long next,
return 0; return 0;
} }
static int prot_none_walk(struct vm_area_struct *vma, unsigned long start, static const struct mm_walk_ops prot_none_walk_ops = {
unsigned long end, unsigned long newflags)
{
pgprot_t new_pgprot = vm_get_page_prot(newflags);
struct mm_walk prot_none_walk = {
.pte_entry = prot_none_pte_entry, .pte_entry = prot_none_pte_entry,
.hugetlb_entry = prot_none_hugetlb_entry, .hugetlb_entry = prot_none_hugetlb_entry,
.test_walk = prot_none_test, .test_walk = prot_none_test,
.mm = current->mm, };
.private = &new_pgprot,
};
return walk_page_range(start, end, &prot_none_walk);
}
int int
mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
...@@ -369,7 +360,10 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, ...@@ -369,7 +360,10 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
if (arch_has_pfn_modify_check() && if (arch_has_pfn_modify_check() &&
(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
(newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
error = prot_none_walk(vma, start, end, newflags); pgprot_t new_pgprot = vm_get_page_prot(newflags);
error = walk_page_range(current->mm, start, end,
&prot_none_walk_ops, &new_pgprot);
if (error) if (error)
return error; return error;
} }
......
...@@ -9,10 +9,11 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -9,10 +9,11 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
{ {
pte_t *pte; pte_t *pte;
int err = 0; int err = 0;
const struct mm_walk_ops *ops = walk->ops;
pte = pte_offset_map(pmd, addr); pte = pte_offset_map(pmd, addr);
for (;;) { for (;;) {
err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
if (err) if (err)
break; break;
addr += PAGE_SIZE; addr += PAGE_SIZE;
...@@ -30,6 +31,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, ...@@ -30,6 +31,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
{ {
pmd_t *pmd; pmd_t *pmd;
unsigned long next; unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0; int err = 0;
pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr);
...@@ -37,8 +39,8 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, ...@@ -37,8 +39,8 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
again: again:
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
if (pmd_none(*pmd) || !walk->vma) { if (pmd_none(*pmd) || !walk->vma) {
if (walk->pte_hole) if (ops->pte_hole)
err = walk->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, walk);
if (err) if (err)
break; break;
continue; continue;
...@@ -47,8 +49,8 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, ...@@ -47,8 +49,8 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
* This implies that each ->pmd_entry() handler * This implies that each ->pmd_entry() handler
* needs to know about pmd_trans_huge() pmds * needs to know about pmd_trans_huge() pmds
*/ */
if (walk->pmd_entry) if (ops->pmd_entry)
err = walk->pmd_entry(pmd, addr, next, walk); err = ops->pmd_entry(pmd, addr, next, walk);
if (err) if (err)
break; break;
...@@ -56,7 +58,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, ...@@ -56,7 +58,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
* Check this here so we only break down trans_huge * Check this here so we only break down trans_huge
* pages when we _need_ to * pages when we _need_ to
*/ */
if (!walk->pte_entry) if (!ops->pte_entry)
continue; continue;
split_huge_pmd(walk->vma, pmd, addr); split_huge_pmd(walk->vma, pmd, addr);
...@@ -75,6 +77,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, ...@@ -75,6 +77,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0; int err = 0;
pud = pud_offset(p4d, addr); pud = pud_offset(p4d, addr);
...@@ -82,18 +85,18 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, ...@@ -82,18 +85,18 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
again: again:
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_none(*pud) || !walk->vma) { if (pud_none(*pud) || !walk->vma) {
if (walk->pte_hole) if (ops->pte_hole)
err = walk->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, walk);
if (err) if (err)
break; break;
continue; continue;
} }
if (walk->pud_entry) { if (ops->pud_entry) {
spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma); spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma);
if (ptl) { if (ptl) {
err = walk->pud_entry(pud, addr, next, walk); err = ops->pud_entry(pud, addr, next, walk);
spin_unlock(ptl); spin_unlock(ptl);
if (err) if (err)
break; break;
...@@ -105,7 +108,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, ...@@ -105,7 +108,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
if (pud_none(*pud)) if (pud_none(*pud))
goto again; goto again;
if (walk->pmd_entry || walk->pte_entry) if (ops->pmd_entry || ops->pte_entry)
err = walk_pmd_range(pud, addr, next, walk); err = walk_pmd_range(pud, addr, next, walk);
if (err) if (err)
break; break;
...@@ -119,19 +122,20 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, ...@@ -119,19 +122,20 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
{ {
p4d_t *p4d; p4d_t *p4d;
unsigned long next; unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0; int err = 0;
p4d = p4d_offset(pgd, addr); p4d = p4d_offset(pgd, addr);
do { do {
next = p4d_addr_end(addr, end); next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d)) { if (p4d_none_or_clear_bad(p4d)) {
if (walk->pte_hole) if (ops->pte_hole)
err = walk->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, walk);
if (err) if (err)
break; break;
continue; continue;
} }
if (walk->pmd_entry || walk->pte_entry) if (ops->pmd_entry || ops->pte_entry)
err = walk_pud_range(p4d, addr, next, walk); err = walk_pud_range(p4d, addr, next, walk);
if (err) if (err)
break; break;
...@@ -145,19 +149,20 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, ...@@ -145,19 +149,20 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
{ {
pgd_t *pgd; pgd_t *pgd;
unsigned long next; unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0; int err = 0;
pgd = pgd_offset(walk->mm, addr); pgd = pgd_offset(walk->mm, addr);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) { if (pgd_none_or_clear_bad(pgd)) {
if (walk->pte_hole) if (ops->pte_hole)
err = walk->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, walk);
if (err) if (err)
break; break;
continue; continue;
} }
if (walk->pmd_entry || walk->pte_entry) if (ops->pmd_entry || ops->pte_entry)
err = walk_p4d_range(pgd, addr, next, walk); err = walk_p4d_range(pgd, addr, next, walk);
if (err) if (err)
break; break;
...@@ -183,6 +188,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, ...@@ -183,6 +188,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
unsigned long hmask = huge_page_mask(h); unsigned long hmask = huge_page_mask(h);
unsigned long sz = huge_page_size(h); unsigned long sz = huge_page_size(h);
pte_t *pte; pte_t *pte;
const struct mm_walk_ops *ops = walk->ops;
int err = 0; int err = 0;
do { do {
...@@ -190,9 +196,9 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, ...@@ -190,9 +196,9 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
pte = huge_pte_offset(walk->mm, addr & hmask, sz); pte = huge_pte_offset(walk->mm, addr & hmask, sz);
if (pte) if (pte)
err = walk->hugetlb_entry(pte, hmask, addr, next, walk); err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
else if (walk->pte_hole) else if (ops->pte_hole)
err = walk->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, walk);
if (err) if (err)
break; break;
...@@ -220,9 +226,10 @@ static int walk_page_test(unsigned long start, unsigned long end, ...@@ -220,9 +226,10 @@ static int walk_page_test(unsigned long start, unsigned long end,
struct mm_walk *walk) struct mm_walk *walk)
{ {
struct vm_area_struct *vma = walk->vma; struct vm_area_struct *vma = walk->vma;
const struct mm_walk_ops *ops = walk->ops;
if (walk->test_walk) if (ops->test_walk)
return walk->test_walk(start, end, walk); return ops->test_walk(start, end, walk);
/* /*
* vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
...@@ -234,8 +241,8 @@ static int walk_page_test(unsigned long start, unsigned long end, ...@@ -234,8 +241,8 @@ static int walk_page_test(unsigned long start, unsigned long end,
*/ */
if (vma->vm_flags & VM_PFNMAP) { if (vma->vm_flags & VM_PFNMAP) {
int err = 1; int err = 1;
if (walk->pte_hole) if (ops->pte_hole)
err = walk->pte_hole(start, end, walk); err = ops->pte_hole(start, end, walk);
return err ? err : 1; return err ? err : 1;
} }
return 0; return 0;
...@@ -248,7 +255,7 @@ static int __walk_page_range(unsigned long start, unsigned long end, ...@@ -248,7 +255,7 @@ static int __walk_page_range(unsigned long start, unsigned long end,
struct vm_area_struct *vma = walk->vma; struct vm_area_struct *vma = walk->vma;
if (vma && is_vm_hugetlb_page(vma)) { if (vma && is_vm_hugetlb_page(vma)) {
if (walk->hugetlb_entry) if (walk->ops->hugetlb_entry)
err = walk_hugetlb_range(start, end, walk); err = walk_hugetlb_range(start, end, walk);
} else } else
err = walk_pgd_range(start, end, walk); err = walk_pgd_range(start, end, walk);
...@@ -258,11 +265,13 @@ static int __walk_page_range(unsigned long start, unsigned long end, ...@@ -258,11 +265,13 @@ static int __walk_page_range(unsigned long start, unsigned long end,
/** /**
* walk_page_range - walk page table with caller specific callbacks * walk_page_range - walk page table with caller specific callbacks
* @mm: mm_struct representing the target process of page table walk
* @start: start address of the virtual address range * @start: start address of the virtual address range
* @end: end address of the virtual address range * @end: end address of the virtual address range
* @walk: mm_walk structure defining the callbacks and the target address space * @ops: operation to call during the walk
* @private: private data for callbacks' usage
* *
* Recursively walk the page table tree of the process represented by @walk->mm * Recursively walk the page table tree of the process represented by @mm
* within the virtual address range [@start, @end). During walking, we can do * within the virtual address range [@start, @end). During walking, we can do
* some caller-specific works for each entry, by setting up pmd_entry(), * some caller-specific works for each entry, by setting up pmd_entry(),
* pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
...@@ -278,47 +287,52 @@ static int __walk_page_range(unsigned long start, unsigned long end, ...@@ -278,47 +287,52 @@ static int __walk_page_range(unsigned long start, unsigned long end,
* *
* Before starting to walk page table, some callers want to check whether * Before starting to walk page table, some callers want to check whether
* they really want to walk over the current vma, typically by checking * they really want to walk over the current vma, typically by checking
* its vm_flags. walk_page_test() and @walk->test_walk() are used for this * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
* purpose. * purpose.
* *
* struct mm_walk keeps current values of some common data like vma and pmd, * struct mm_walk keeps current values of some common data like vma and pmd,
* which are useful for the access from callbacks. If you want to pass some * which are useful for the access from callbacks. If you want to pass some
* caller-specific data to callbacks, @walk->private should be helpful. * caller-specific data to callbacks, @private should be helpful.
* *
* Locking: * Locking:
* Callers of walk_page_range() and walk_page_vma() should hold * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_sem,
* @walk->mm->mmap_sem, because these function traverse vma list and/or * because these function traverse vma list and/or access to vma's data.
* access to vma's data.
*/ */
int walk_page_range(unsigned long start, unsigned long end, int walk_page_range(struct mm_struct *mm, unsigned long start,
struct mm_walk *walk) unsigned long end, const struct mm_walk_ops *ops,
void *private)
{ {
int err = 0; int err = 0;
unsigned long next; unsigned long next;
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct mm_walk walk = {
.ops = ops,
.mm = mm,
.private = private,
};
if (start >= end) if (start >= end)
return -EINVAL; return -EINVAL;
if (!walk->mm) if (!walk.mm)
return -EINVAL; return -EINVAL;
VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm); VM_BUG_ON_MM(!rwsem_is_locked(&walk.mm->mmap_sem), walk.mm);
vma = find_vma(walk->mm, start); vma = find_vma(walk.mm, start);
do { do {
if (!vma) { /* after the last vma */ if (!vma) { /* after the last vma */
walk->vma = NULL; walk.vma = NULL;
next = end; next = end;
} else if (start < vma->vm_start) { /* outside vma */ } else if (start < vma->vm_start) { /* outside vma */
walk->vma = NULL; walk.vma = NULL;
next = min(end, vma->vm_start); next = min(end, vma->vm_start);
} else { /* inside vma */ } else { /* inside vma */
walk->vma = vma; walk.vma = vma;
next = min(end, vma->vm_end); next = min(end, vma->vm_end);
vma = vma->vm_next; vma = vma->vm_next;
err = walk_page_test(start, next, walk); err = walk_page_test(start, next, &walk);
if (err > 0) { if (err > 0) {
/* /*
* positive return values are purely for * positive return values are purely for
...@@ -331,28 +345,34 @@ int walk_page_range(unsigned long start, unsigned long end, ...@@ -331,28 +345,34 @@ int walk_page_range(unsigned long start, unsigned long end,
if (err < 0) if (err < 0)
break; break;
} }
if (walk->vma || walk->pte_hole) if (walk.vma || walk.ops->pte_hole)
err = __walk_page_range(start, next, walk); err = __walk_page_range(start, next, &walk);
if (err) if (err)
break; break;
} while (start = next, start < end); } while (start = next, start < end);
return err; return err;
} }
int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk) int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private)
{ {
struct mm_walk walk = {
.ops = ops,
.mm = vma->vm_mm,
.vma = vma,
.private = private,
};
int err; int err;
if (!walk->mm) if (!walk.mm)
return -EINVAL; return -EINVAL;
VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
VM_BUG_ON(!vma);
walk->vma = vma; err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
err = walk_page_test(vma->vm_start, vma->vm_end, walk);
if (err > 0) if (err > 0)
return 0; return 0;
if (err < 0) if (err < 0)
return err; return err;
return __walk_page_range(vma->vm_start, vma->vm_end, walk); return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment