Commit 3218f871 authored by Alex Sierra's avatar Alex Sierra Committed by akpm

mm: handling Non-LRU pages returned by vm_normal_pages

With DEVICE_COHERENT, we'll soon have vm_normal_pages() return
device-managed anonymous pages that are not LRU pages.  Although they
behave like normal pages for purposes of mapping in CPU page, and for COW.
They do not support LRU lists, NUMA migration or THP.

Callers to follow_page() currently don't expect ZONE_DEVICE pages,
however, with DEVICE_COHERENT we might now return ZONE_DEVICE.  Check for
ZONE_DEVICE pages in applicable users of follow_page() as well.

Link: https://lkml.kernel.org/r/20220715150521.18165-5-alex.sierra@amd.comSigned-off-by: default avatarAlex Sierra <alex.sierra@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>	[v2]
Reviewed-by: Alistair Popple <apopple@nvidia.com>	[v6]
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent f25cbb7a
...@@ -1795,7 +1795,7 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, ...@@ -1795,7 +1795,7 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
return NULL; return NULL;
page = vm_normal_page(vma, addr, pte); page = vm_normal_page(vma, addr, pte);
if (!page) if (!page || is_zone_device_page(page))
return NULL; return NULL;
if (PageReserved(page)) if (PageReserved(page))
......
...@@ -2910,7 +2910,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, ...@@ -2910,7 +2910,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
if (IS_ERR(page)) if (IS_ERR(page))
continue; continue;
if (!page) if (!page || is_zone_device_page(page))
continue; continue;
if (!is_transparent_hugepage(page)) if (!is_transparent_hugepage(page))
......
...@@ -611,7 +611,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, ...@@ -611,7 +611,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
goto out; goto out;
} }
page = vm_normal_page(vma, address, pteval); page = vm_normal_page(vma, address, pteval);
if (unlikely(!page)) { if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
result = SCAN_PAGE_NULL; result = SCAN_PAGE_NULL;
goto out; goto out;
} }
...@@ -1261,7 +1261,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, ...@@ -1261,7 +1261,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
writable = true; writable = true;
page = vm_normal_page(vma, _address, pteval); page = vm_normal_page(vma, _address, pteval);
if (unlikely(!page)) { if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
result = SCAN_PAGE_NULL; result = SCAN_PAGE_NULL;
goto out_unmap; goto out_unmap;
} }
...@@ -1472,7 +1472,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ...@@ -1472,7 +1472,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
goto abort; goto abort;
page = vm_normal_page(vma, addr, *pte); page = vm_normal_page(vma, addr, *pte);
if (WARN_ON_ONCE(page && is_zone_device_page(page)))
page = NULL;
/* /*
* Note that uprobe, debugger, or MAP_PRIVATE may change the * Note that uprobe, debugger, or MAP_PRIVATE may change the
* page table, but the new page will not be a subpage of hpage. * page table, but the new page will not be a subpage of hpage.
...@@ -1490,6 +1491,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ...@@ -1490,6 +1491,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
if (pte_none(*pte)) if (pte_none(*pte))
continue; continue;
page = vm_normal_page(vma, addr, *pte); page = vm_normal_page(vma, addr, *pte);
if (WARN_ON_ONCE(page && is_zone_device_page(page)))
goto abort;
page_remove_rmap(page, vma, false); page_remove_rmap(page, vma, false);
} }
......
...@@ -475,7 +475,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) ...@@ -475,7 +475,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
cond_resched(); cond_resched();
page = follow_page(vma, addr, page = follow_page(vma, addr,
FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
if (IS_ERR_OR_NULL(page)) if (IS_ERR_OR_NULL(page) || is_zone_device_page(page))
break; break;
if (PageKsm(page)) if (PageKsm(page))
ret = handle_mm_fault(vma, addr, ret = handle_mm_fault(vma, addr,
...@@ -560,7 +560,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) ...@@ -560,7 +560,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
goto out; goto out;
page = follow_page(vma, addr, FOLL_GET); page = follow_page(vma, addr, FOLL_GET);
if (IS_ERR_OR_NULL(page)) if (IS_ERR_OR_NULL(page) || is_zone_device_page(page))
goto out; goto out;
if (PageAnon(page)) { if (PageAnon(page)) {
flush_anon_page(vma, page, addr); flush_anon_page(vma, page, addr);
...@@ -2308,7 +2308,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) ...@@ -2308,7 +2308,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page)
if (ksm_test_exit(mm)) if (ksm_test_exit(mm))
break; break;
*page = follow_page(vma, ksm_scan.address, FOLL_GET); *page = follow_page(vma, ksm_scan.address, FOLL_GET);
if (IS_ERR_OR_NULL(*page)) { if (IS_ERR_OR_NULL(*page) || is_zone_device_page(*page)) {
ksm_scan.address += PAGE_SIZE; ksm_scan.address += PAGE_SIZE;
cond_resched(); cond_resched();
continue; continue;
......
...@@ -421,7 +421,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, ...@@ -421,7 +421,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
continue; continue;
page = vm_normal_page(vma, addr, ptent); page = vm_normal_page(vma, addr, ptent);
if (!page) if (!page || is_zone_device_page(page))
continue; continue;
/* /*
...@@ -639,7 +639,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -639,7 +639,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
} }
page = vm_normal_page(vma, addr, ptent); page = vm_normal_page(vma, addr, ptent);
if (!page) if (!page || is_zone_device_page(page))
continue; continue;
/* /*
......
...@@ -624,6 +624,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, ...@@ -624,6 +624,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
if (is_zero_pfn(pfn)) if (is_zero_pfn(pfn))
return NULL; return NULL;
if (pte_devmap(pte)) if (pte_devmap(pte))
/*
* NOTE: New users of ZONE_DEVICE will not set pte_devmap()
* and will have refcounts incremented on their struct pages
* when they are inserted into PTEs, thus they are safe to
* return here. Legacy ZONE_DEVICE pages that set pte_devmap()
* do not have refcounts. Example of legacy ZONE_DEVICE is
* MEMORY_DEVICE_FS_DAX type in pmem or virtio_fs drivers.
*/
return NULL; return NULL;
print_bad_pte(vma, addr, pte, NULL); print_bad_pte(vma, addr, pte, NULL);
...@@ -4693,7 +4701,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) ...@@ -4693,7 +4701,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
pte = pte_modify(old_pte, vma->vm_page_prot); pte = pte_modify(old_pte, vma->vm_page_prot);
page = vm_normal_page(vma, vmf->address, pte); page = vm_normal_page(vma, vmf->address, pte);
if (!page) if (!page || is_zone_device_page(page))
goto out_map; goto out_map;
/* TODO: handle PTE-mapped THP */ /* TODO: handle PTE-mapped THP */
......
...@@ -523,7 +523,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -523,7 +523,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
if (!pte_present(*pte)) if (!pte_present(*pte))
continue; continue;
page = vm_normal_page(vma, addr, *pte); page = vm_normal_page(vma, addr, *pte);
if (!page) if (!page || is_zone_device_page(page))
continue; continue;
/* /*
* vm_normal_page() filters out zero pages, but there might * vm_normal_page() filters out zero pages, but there might
......
...@@ -1630,7 +1630,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, ...@@ -1630,7 +1630,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
goto out; goto out;
err = -ENOENT; err = -ENOENT;
if (!page) if (!page || is_zone_device_page(page))
goto out; goto out;
err = 0; err = 0;
...@@ -1821,7 +1821,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, ...@@ -1821,7 +1821,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
if (IS_ERR(page)) if (IS_ERR(page))
goto set_status; goto set_status;
if (page) { if (page && !is_zone_device_page(page)) {
err = page_to_nid(page); err = page_to_nid(page);
put_page(page); put_page(page);
} else { } else {
......
...@@ -333,7 +333,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -333,7 +333,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
if (!pte_present(*pte)) if (!pte_present(*pte))
continue; continue;
page = vm_normal_page(vma, addr, *pte); page = vm_normal_page(vma, addr, *pte);
if (!page) if (!page || is_zone_device_page(page))
continue; continue;
if (PageTransCompound(page)) if (PageTransCompound(page))
continue; continue;
......
...@@ -127,7 +127,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, ...@@ -127,7 +127,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
continue; continue;
page = vm_normal_page(vma, addr, oldpte); page = vm_normal_page(vma, addr, oldpte);
if (!page || PageKsm(page)) if (!page || is_zone_device_page(page) || PageKsm(page))
continue; continue;
/* Also skip shared copy-on-write pages */ /* Also skip shared copy-on-write pages */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment