Commit bc70fbf2 authored by Peter Xu's avatar Peter Xu Committed by Andrew Morton

mm/hugetlb: handle uffd-wp during fork()

Firstly, we'll need to pass in dst_vma into copy_hugetlb_page_range()
because for uffd-wp it's the dst vma that matters on deciding how we
should treat uffd-wp protected ptes.

We should recognize pte markers during fork and do the pte copy if needed.

[lkp@intel.com: vma_needs_copy can be static]
  Link: https://lkml.kernel.org/r/Ylb0CGeFJlc4EzLk@7ec4ff11d4ae
Link: https://lkml.kernel.org/r/20220405014918.14932-1-peterx@redhat.comSigned-off-by: default avatarPeter Xu <peterx@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 05e90bd0
...@@ -137,7 +137,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, ...@@ -137,7 +137,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
struct vm_area_struct *new_vma, struct vm_area_struct *new_vma,
unsigned long old_addr, unsigned long new_addr, unsigned long old_addr, unsigned long new_addr,
unsigned long len); unsigned long len);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
struct vm_area_struct *, struct vm_area_struct *);
long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **, struct page **, struct vm_area_struct **,
unsigned long *, unsigned long *, long, unsigned int, unsigned long *, unsigned long *, long, unsigned int,
...@@ -269,7 +270,9 @@ static inline struct page *follow_huge_addr(struct mm_struct *mm, ...@@ -269,7 +270,9 @@ static inline struct page *follow_huge_addr(struct mm_struct *mm,
} }
static inline int copy_hugetlb_page_range(struct mm_struct *dst, static inline int copy_hugetlb_page_range(struct mm_struct *dst,
struct mm_struct *src, struct vm_area_struct *vma) struct mm_struct *src,
struct vm_area_struct *dst_vma,
struct vm_area_struct *src_vma)
{ {
BUG(); BUG();
return 0; return 0;
......
...@@ -4719,23 +4719,24 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr ...@@ -4719,23 +4719,24 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr
} }
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma) struct vm_area_struct *dst_vma,
struct vm_area_struct *src_vma)
{ {
pte_t *src_pte, *dst_pte, entry, dst_entry; pte_t *src_pte, *dst_pte, entry, dst_entry;
struct page *ptepage; struct page *ptepage;
unsigned long addr; unsigned long addr;
bool cow = is_cow_mapping(vma->vm_flags); bool cow = is_cow_mapping(src_vma->vm_flags);
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(src_vma);
unsigned long sz = huge_page_size(h); unsigned long sz = huge_page_size(h);
unsigned long npages = pages_per_huge_page(h); unsigned long npages = pages_per_huge_page(h);
struct address_space *mapping = vma->vm_file->f_mapping; struct address_space *mapping = src_vma->vm_file->f_mapping;
struct mmu_notifier_range range; struct mmu_notifier_range range;
int ret = 0; int ret = 0;
if (cow) { if (cow) {
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src_vma, src,
vma->vm_start, src_vma->vm_start,
vma->vm_end); src_vma->vm_end);
mmu_notifier_invalidate_range_start(&range); mmu_notifier_invalidate_range_start(&range);
mmap_assert_write_locked(src); mmap_assert_write_locked(src);
raw_write_seqcount_begin(&src->write_protect_seq); raw_write_seqcount_begin(&src->write_protect_seq);
...@@ -4749,12 +4750,12 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -4749,12 +4750,12 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
i_mmap_lock_read(mapping); i_mmap_lock_read(mapping);
} }
for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
spinlock_t *src_ptl, *dst_ptl; spinlock_t *src_ptl, *dst_ptl;
src_pte = huge_pte_offset(src, addr, sz); src_pte = huge_pte_offset(src, addr, sz);
if (!src_pte) if (!src_pte)
continue; continue;
dst_pte = huge_pte_alloc(dst, vma, addr, sz); dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
if (!dst_pte) { if (!dst_pte) {
ret = -ENOMEM; ret = -ENOMEM;
break; break;
...@@ -4789,6 +4790,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -4789,6 +4790,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
} else if (unlikely(is_hugetlb_entry_migration(entry) || } else if (unlikely(is_hugetlb_entry_migration(entry) ||
is_hugetlb_entry_hwpoisoned(entry))) { is_hugetlb_entry_hwpoisoned(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry); swp_entry_t swp_entry = pte_to_swp_entry(entry);
bool uffd_wp = huge_pte_uffd_wp(entry);
if (!is_readable_migration_entry(swp_entry) && cow) { if (!is_readable_migration_entry(swp_entry) && cow) {
/* /*
...@@ -4798,10 +4800,21 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -4798,10 +4800,21 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
swp_entry = make_readable_migration_entry( swp_entry = make_readable_migration_entry(
swp_offset(swp_entry)); swp_offset(swp_entry));
entry = swp_entry_to_pte(swp_entry); entry = swp_entry_to_pte(swp_entry);
if (userfaultfd_wp(src_vma) && uffd_wp)
entry = huge_pte_mkuffd_wp(entry);
set_huge_swap_pte_at(src, addr, src_pte, set_huge_swap_pte_at(src, addr, src_pte,
entry, sz); entry, sz);
} }
if (!userfaultfd_wp(dst_vma) && uffd_wp)
entry = huge_pte_clear_uffd_wp(entry);
set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz); set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
} else if (unlikely(is_pte_marker(entry))) {
/*
* We copy the pte marker only if the dst vma has
* uffd-wp enabled.
*/
if (userfaultfd_wp(dst_vma))
set_huge_pte_at(dst, addr, dst_pte, entry);
} else { } else {
entry = huge_ptep_get(src_pte); entry = huge_ptep_get(src_pte);
ptepage = pte_page(entry); ptepage = pte_page(entry);
...@@ -4819,20 +4832,21 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -4819,20 +4832,21 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
*/ */
if (!PageAnon(ptepage)) { if (!PageAnon(ptepage)) {
page_dup_file_rmap(ptepage, true); page_dup_file_rmap(ptepage, true);
} else if (page_try_dup_anon_rmap(ptepage, true, vma)) { } else if (page_try_dup_anon_rmap(ptepage, true,
src_vma)) {
pte_t src_pte_old = entry; pte_t src_pte_old = entry;
struct page *new; struct page *new;
spin_unlock(src_ptl); spin_unlock(src_ptl);
spin_unlock(dst_ptl); spin_unlock(dst_ptl);
/* Do not use reserve as it's private owned */ /* Do not use reserve as it's private owned */
new = alloc_huge_page(vma, addr, 1); new = alloc_huge_page(dst_vma, addr, 1);
if (IS_ERR(new)) { if (IS_ERR(new)) {
put_page(ptepage); put_page(ptepage);
ret = PTR_ERR(new); ret = PTR_ERR(new);
break; break;
} }
copy_user_huge_page(new, ptepage, addr, vma, copy_user_huge_page(new, ptepage, addr, dst_vma,
npages); npages);
put_page(ptepage); put_page(ptepage);
...@@ -4842,13 +4856,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -4842,13 +4856,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte); entry = huge_ptep_get(src_pte);
if (!pte_same(src_pte_old, entry)) { if (!pte_same(src_pte_old, entry)) {
restore_reserve_on_error(h, vma, addr, restore_reserve_on_error(h, dst_vma, addr,
new); new);
put_page(new); put_page(new);
/* dst_entry won't change as in child */ /* dst_entry won't change as in child */
goto again; goto again;
} }
hugetlb_install_page(vma, dst_pte, addr, new); hugetlb_install_page(dst_vma, dst_pte, addr, new);
spin_unlock(src_ptl); spin_unlock(src_ptl);
spin_unlock(dst_ptl); spin_unlock(dst_ptl);
continue; continue;
......
...@@ -1234,7 +1234,7 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, ...@@ -1234,7 +1234,7 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
* false when we can speed up fork() by allowing lazy page faults later until * false when we can speed up fork() by allowing lazy page faults later until
* when the child accesses the memory range. * when the child accesses the memory range.
*/ */
bool static bool
vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
{ {
/* /*
...@@ -1278,7 +1278,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) ...@@ -1278,7 +1278,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
return 0; return 0;
if (is_vm_hugetlb_page(src_vma)) if (is_vm_hugetlb_page(src_vma))
return copy_hugetlb_page_range(dst_mm, src_mm, src_vma); return copy_hugetlb_page_range(dst_mm, src_mm, dst_vma, src_vma);
if (unlikely(src_vma->vm_flags & VM_PFNMAP)) { if (unlikely(src_vma->vm_flags & VM_PFNMAP)) {
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment