Commit b7a16c7a authored by Steven Price's avatar Steven Price Committed by Linus Torvalds

mm: pagewalk: add 'depth' parameter to pte_hole

The pte_hole() callback is called at multiple levels of the page tables.
Code dumping the kernel page tables needs to know what at what depth the
missing entry is.  Add this is an extra parameter to pte_hole().  When the
depth isn't know (e.g.  processing a vma) then -1 is passed.

The depth that is reported is the actual level where the entry is missing
(ignoring any folding that is in place), i.e.  any levels where
PTRS_PER_P?D is set to 1 are ignored.

Note that depth starts at 0 for a PGD so that PUD/PMD/PTE retain their
natural numbers as levels 2/3/4.

Link: http://lkml.kernel.org/r/20191218162402.45610-16-steven.price@arm.comSigned-off-by: default avatarSteven Price <steven.price@arm.com>
Tested-by: default avatarZong Li <zong.li@sifive.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Liang, Kan" <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent c02a9875
...@@ -505,7 +505,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, ...@@ -505,7 +505,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
#ifdef CONFIG_SHMEM #ifdef CONFIG_SHMEM
static int smaps_pte_hole(unsigned long addr, unsigned long end, static int smaps_pte_hole(unsigned long addr, unsigned long end,
struct mm_walk *walk) __always_unused int depth, struct mm_walk *walk)
{ {
struct mem_size_stats *mss = walk->private; struct mem_size_stats *mss = walk->private;
...@@ -1282,7 +1282,7 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme, ...@@ -1282,7 +1282,7 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
} }
static int pagemap_pte_hole(unsigned long start, unsigned long end, static int pagemap_pte_hole(unsigned long start, unsigned long end,
struct mm_walk *walk) __always_unused int depth, struct mm_walk *walk)
{ {
struct pagemapread *pm = walk->private; struct pagemapread *pm = walk->private;
unsigned long addr = start; unsigned long addr = start;
......
...@@ -17,7 +17,10 @@ struct mm_walk; ...@@ -17,7 +17,10 @@ struct mm_walk;
* split_huge_page() instead of handling it explicitly. * split_huge_page() instead of handling it explicitly.
* @pte_entry: if set, called for each non-empty PTE (lowest-level) * @pte_entry: if set, called for each non-empty PTE (lowest-level)
* entry * entry
* @pte_hole: if set, called for each hole at all levels * @pte_hole: if set, called for each hole at all levels,
* depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD
* 4:PTE. Any folded depths (where PTRS_PER_P?D is equal
* to 1) are skipped.
* @hugetlb_entry: if set, called for each hugetlb entry * @hugetlb_entry: if set, called for each hugetlb entry
* @test_walk: caller specific callback function to determine whether * @test_walk: caller specific callback function to determine whether
* we walk over the current vma or not. Returning 0 means * we walk over the current vma or not. Returning 0 means
...@@ -43,7 +46,7 @@ struct mm_walk_ops { ...@@ -43,7 +46,7 @@ struct mm_walk_ops {
int (*pte_entry)(pte_t *pte, unsigned long addr, int (*pte_entry)(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk); unsigned long next, struct mm_walk *walk);
int (*pte_hole)(unsigned long addr, unsigned long next, int (*pte_hole)(unsigned long addr, unsigned long next,
struct mm_walk *walk); int depth, struct mm_walk *walk);
int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long next, unsigned long addr, unsigned long next,
struct mm_walk *walk); struct mm_walk *walk);
......
...@@ -186,7 +186,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, ...@@ -186,7 +186,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
} }
static int hmm_vma_walk_hole(unsigned long addr, unsigned long end, static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
struct mm_walk *walk) __always_unused int depth, struct mm_walk *walk)
{ {
struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range; struct hmm_range *range = hmm_vma_walk->range;
...@@ -380,7 +380,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, ...@@ -380,7 +380,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
again: again:
pmd = READ_ONCE(*pmdp); pmd = READ_ONCE(*pmdp);
if (pmd_none(pmd)) if (pmd_none(pmd))
return hmm_vma_walk_hole(start, end, walk); return hmm_vma_walk_hole(start, end, -1, walk);
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
bool fault, write_fault; bool fault, write_fault;
...@@ -487,7 +487,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, ...@@ -487,7 +487,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
pud = READ_ONCE(*pudp); pud = READ_ONCE(*pudp);
if (pud_none(pud)) { if (pud_none(pud)) {
ret = hmm_vma_walk_hole(start, end, walk); ret = hmm_vma_walk_hole(start, end, -1, walk);
goto out_unlock; goto out_unlock;
} }
...@@ -497,7 +497,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, ...@@ -497,7 +497,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
bool fault, write_fault; bool fault, write_fault;
if (!pud_present(pud)) { if (!pud_present(pud)) {
ret = hmm_vma_walk_hole(start, end, walk); ret = hmm_vma_walk_hole(start, end, -1, walk);
goto out_unlock; goto out_unlock;
} }
......
...@@ -2151,6 +2151,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -2151,6 +2151,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
#ifdef CONFIG_DEVICE_PRIVATE #ifdef CONFIG_DEVICE_PRIVATE
static int migrate_vma_collect_hole(unsigned long start, static int migrate_vma_collect_hole(unsigned long start,
unsigned long end, unsigned long end,
__always_unused int depth,
struct mm_walk *walk) struct mm_walk *walk)
{ {
struct migrate_vma *migrate = walk->private; struct migrate_vma *migrate = walk->private;
...@@ -2195,7 +2196,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, ...@@ -2195,7 +2196,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
again: again:
if (pmd_none(*pmdp)) if (pmd_none(*pmdp))
return migrate_vma_collect_hole(start, end, walk); return migrate_vma_collect_hole(start, end, -1, walk);
if (pmd_trans_huge(*pmdp)) { if (pmd_trans_huge(*pmdp)) {
struct page *page; struct page *page;
...@@ -2228,7 +2229,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, ...@@ -2228,7 +2229,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
return migrate_vma_collect_skip(start, end, return migrate_vma_collect_skip(start, end,
walk); walk);
if (pmd_none(*pmdp)) if (pmd_none(*pmdp))
return migrate_vma_collect_hole(start, end, return migrate_vma_collect_hole(start, end, -1,
walk); walk);
} }
} }
......
...@@ -112,6 +112,7 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end, ...@@ -112,6 +112,7 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
} }
static int mincore_unmapped_range(unsigned long addr, unsigned long end, static int mincore_unmapped_range(unsigned long addr, unsigned long end,
__always_unused int depth,
struct mm_walk *walk) struct mm_walk *walk)
{ {
walk->private += __mincore_unmapped_range(addr, end, walk->private += __mincore_unmapped_range(addr, end,
......
...@@ -4,6 +4,22 @@ ...@@ -4,6 +4,22 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
/*
* We want to know the real level where a entry is located ignoring any
* folding of levels which may be happening. For example if p4d is folded then
* a missing entry found at level 1 (p4d) is actually at level 0 (pgd).
*/
static int real_depth(int depth)
{
if (depth == 3 && PTRS_PER_PMD == 1)
depth = 2;
if (depth == 2 && PTRS_PER_PUD == 1)
depth = 1;
if (depth == 1 && PTRS_PER_P4D == 1)
depth = 0;
return depth;
}
static int walk_pte_range_inner(pte_t *pte, unsigned long addr, static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
unsigned long end, struct mm_walk *walk) unsigned long end, struct mm_walk *walk)
{ {
...@@ -49,6 +65,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, ...@@ -49,6 +65,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
unsigned long next; unsigned long next;
const struct mm_walk_ops *ops = walk->ops; const struct mm_walk_ops *ops = walk->ops;
int err = 0; int err = 0;
int depth = real_depth(3);
pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr);
do { do {
...@@ -56,7 +73,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, ...@@ -56,7 +73,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) { if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
if (ops->pte_hole) if (ops->pte_hole)
err = ops->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, depth, walk);
if (err) if (err)
break; break;
continue; continue;
...@@ -106,6 +123,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, ...@@ -106,6 +123,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
unsigned long next; unsigned long next;
const struct mm_walk_ops *ops = walk->ops; const struct mm_walk_ops *ops = walk->ops;
int err = 0; int err = 0;
int depth = real_depth(2);
pud = pud_offset(p4d, addr); pud = pud_offset(p4d, addr);
do { do {
...@@ -113,7 +131,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, ...@@ -113,7 +131,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) { if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
if (ops->pte_hole) if (ops->pte_hole)
err = ops->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, depth, walk);
if (err) if (err)
break; break;
continue; continue;
...@@ -154,13 +172,14 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, ...@@ -154,13 +172,14 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
unsigned long next; unsigned long next;
const struct mm_walk_ops *ops = walk->ops; const struct mm_walk_ops *ops = walk->ops;
int err = 0; int err = 0;
int depth = real_depth(1);
p4d = p4d_offset(pgd, addr); p4d = p4d_offset(pgd, addr);
do { do {
next = p4d_addr_end(addr, end); next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d)) { if (p4d_none_or_clear_bad(p4d)) {
if (ops->pte_hole) if (ops->pte_hole)
err = ops->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, depth, walk);
if (err) if (err)
break; break;
continue; continue;
...@@ -192,7 +211,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, ...@@ -192,7 +211,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) { if (pgd_none_or_clear_bad(pgd)) {
if (ops->pte_hole) if (ops->pte_hole)
err = ops->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, 0, walk);
if (err) if (err)
break; break;
continue; continue;
...@@ -239,7 +258,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, ...@@ -239,7 +258,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
if (pte) if (pte)
err = ops->hugetlb_entry(pte, hmask, addr, next, walk); err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
else if (ops->pte_hole) else if (ops->pte_hole)
err = ops->pte_hole(addr, next, walk); err = ops->pte_hole(addr, next, -1, walk);
if (err) if (err)
break; break;
...@@ -283,7 +302,7 @@ static int walk_page_test(unsigned long start, unsigned long end, ...@@ -283,7 +302,7 @@ static int walk_page_test(unsigned long start, unsigned long end,
if (vma->vm_flags & VM_PFNMAP) { if (vma->vm_flags & VM_PFNMAP) {
int err = 1; int err = 1;
if (ops->pte_hole) if (ops->pte_hole)
err = ops->pte_hole(start, end, walk); err = ops->pte_hole(start, end, -1, walk);
return err ? err : 1; return err ? err : 1;
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment