Commit f6191471 authored by Axel Rasmussen's avatar Axel Rasmussen Committed by Linus Torvalds

userfaultfd: add UFFDIO_CONTINUE ioctl

This ioctl is how userspace ought to resolve "minor" userfaults.  The
idea is, userspace is notified that a minor fault has occurred.  It
might change the contents of the page using its second non-UFFD mapping,
or not.  Then, it calls UFFDIO_CONTINUE to tell the kernel "I have
ensured the page contents are correct, carry on setting up the mapping".

Note that it doesn't make much sense to use UFFDIO_{COPY,ZEROPAGE} for
MINOR registered VMAs.  ZEROPAGE maps the VMA to the zero page; but in
the minor fault case, we already have some pre-existing underlying page.
Likewise, UFFDIO_COPY isn't useful if we have a second non-UFFD mapping.
We'd just use memcpy() or similar instead.

It turns out hugetlb_mcopy_atomic_pte() already does very close to what
we want, if an existing page is provided via `struct page **pagep`.  We
already special-case the behavior a bit for the UFFDIO_ZEROPAGE case, so
just extend that design: add an enum for the three modes of operation,
and make the small adjustments needed for the MCOPY_ATOMIC_CONTINUE
case.  (Basically, look up the existing page, and avoid adding the
existing page to the page cache or calling set_page_huge_active() on
it.)

Link: https://lkml.kernel.org/r/20210301222728.176417-5-axelrasmussen@google.comSigned-off-by: default avatarAxel Rasmussen <axelrasmussen@google.com>
Reviewed-by: default avatarPeter Xu <peterx@redhat.com>
Cc: Adam Ruprecht <ruprecht@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Cannon Matthews <cannonmatthews@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michal Koutn" <mkoutny@suse.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oliver Upton <oupton@google.com>
Cc: Shaohua Li <shli@fb.com>
Cc: Shawn Anastasio <shawn@anastas.io>
Cc: Steven Price <steven.price@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 714c1891
...@@ -1487,6 +1487,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1487,6 +1487,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_WP)) if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_WP))
ioctls_out &= ~((__u64)1 << _UFFDIO_WRITEPROTECT); ioctls_out &= ~((__u64)1 << _UFFDIO_WRITEPROTECT);
/* CONTINUE ioctl is only supported for MINOR ranges. */
if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR))
ioctls_out &= ~((__u64)1 << _UFFDIO_CONTINUE);
/* /*
* Now that we scanned all vmas we can already tell * Now that we scanned all vmas we can already tell
* userland which ioctls methods are guaranteed to * userland which ioctls methods are guaranteed to
...@@ -1840,6 +1844,66 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, ...@@ -1840,6 +1844,66 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
return ret; return ret;
} }
static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
{
__s64 ret;
struct uffdio_continue uffdio_continue;
struct uffdio_continue __user *user_uffdio_continue;
struct userfaultfd_wake_range range;
user_uffdio_continue = (struct uffdio_continue __user *)arg;
ret = -EAGAIN;
if (READ_ONCE(ctx->mmap_changing))
goto out;
ret = -EFAULT;
if (copy_from_user(&uffdio_continue, user_uffdio_continue,
/* don't copy the output fields */
sizeof(uffdio_continue) - (sizeof(__s64))))
goto out;
ret = validate_range(ctx->mm, &uffdio_continue.range.start,
uffdio_continue.range.len);
if (ret)
goto out;
ret = -EINVAL;
/* double check for wraparound just in case. */
if (uffdio_continue.range.start + uffdio_continue.range.len <=
uffdio_continue.range.start) {
goto out;
}
if (uffdio_continue.mode & ~UFFDIO_CONTINUE_MODE_DONTWAKE)
goto out;
if (mmget_not_zero(ctx->mm)) {
ret = mcopy_continue(ctx->mm, uffdio_continue.range.start,
uffdio_continue.range.len,
&ctx->mmap_changing);
mmput(ctx->mm);
} else {
return -ESRCH;
}
if (unlikely(put_user(ret, &user_uffdio_continue->mapped)))
return -EFAULT;
if (ret < 0)
goto out;
/* len == 0 would wake all */
BUG_ON(!ret);
range.len = ret;
if (!(uffdio_continue.mode & UFFDIO_CONTINUE_MODE_DONTWAKE)) {
range.start = uffdio_continue.range.start;
wake_userfault(ctx, &range);
}
ret = range.len == uffdio_continue.range.len ? 0 : -EAGAIN;
out:
return ret;
}
static inline unsigned int uffd_ctx_features(__u64 user_features) static inline unsigned int uffd_ctx_features(__u64 user_features)
{ {
/* /*
...@@ -1927,6 +1991,9 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd, ...@@ -1927,6 +1991,9 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
case UFFDIO_WRITEPROTECT: case UFFDIO_WRITEPROTECT:
ret = userfaultfd_writeprotect(ctx, arg); ret = userfaultfd_writeprotect(ctx, arg);
break; break;
case UFFDIO_CONTINUE:
ret = userfaultfd_continue(ctx, arg);
break;
} }
return ret; return ret;
} }
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/pgtable.h> #include <linux/pgtable.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/userfaultfd_k.h>
struct ctl_table; struct ctl_table;
struct user_struct; struct user_struct;
...@@ -139,6 +140,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, ...@@ -139,6 +140,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
struct vm_area_struct *dst_vma, struct vm_area_struct *dst_vma,
unsigned long dst_addr, unsigned long dst_addr,
unsigned long src_addr, unsigned long src_addr,
enum mcopy_atomic_mode mode,
struct page **pagep); struct page **pagep);
#endif /* CONFIG_USERFAULTFD */ #endif /* CONFIG_USERFAULTFD */
bool hugetlb_reserve_pages(struct inode *inode, long from, long to, bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
...@@ -318,6 +320,7 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -318,6 +320,7 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
struct vm_area_struct *dst_vma, struct vm_area_struct *dst_vma,
unsigned long dst_addr, unsigned long dst_addr,
unsigned long src_addr, unsigned long src_addr,
enum mcopy_atomic_mode mode,
struct page **pagep) struct page **pagep)
{ {
BUG(); BUG();
......
...@@ -37,6 +37,22 @@ extern int sysctl_unprivileged_userfaultfd; ...@@ -37,6 +37,22 @@ extern int sysctl_unprivileged_userfaultfd;
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
/*
* The mode of operation for __mcopy_atomic and its helpers.
*
* This is almost an implementation detail (mcopy_atomic below doesn't take this
* as a parameter), but it's exposed here because memory-kind-specific
* implementations (e.g. hugetlbfs) need to know the mode of operation.
*/
enum mcopy_atomic_mode {
/* A normal copy_from_user into the destination range. */
MCOPY_ATOMIC_NORMAL,
/* Don't copy; map the destination range to the zero page. */
MCOPY_ATOMIC_ZEROPAGE,
/* Just install pte(s) with the existing page(s) in the page cache. */
MCOPY_ATOMIC_CONTINUE,
};
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len, unsigned long src_start, unsigned long len,
bool *mmap_changing, __u64 mode); bool *mmap_changing, __u64 mode);
...@@ -44,6 +60,8 @@ extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, ...@@ -44,6 +60,8 @@ extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
unsigned long dst_start, unsigned long dst_start,
unsigned long len, unsigned long len,
bool *mmap_changing); bool *mmap_changing);
extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long len, bool *mmap_changing);
extern int mwriteprotect_range(struct mm_struct *dst_mm, extern int mwriteprotect_range(struct mm_struct *dst_mm,
unsigned long start, unsigned long len, unsigned long start, unsigned long len,
bool enable_wp, bool *mmap_changing); bool enable_wp, bool *mmap_changing);
......
...@@ -40,10 +40,12 @@ ...@@ -40,10 +40,12 @@
((__u64)1 << _UFFDIO_WAKE | \ ((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_COPY | \
(__u64)1 << _UFFDIO_ZEROPAGE | \ (__u64)1 << _UFFDIO_ZEROPAGE | \
(__u64)1 << _UFFDIO_WRITEPROTECT) (__u64)1 << _UFFDIO_WRITEPROTECT | \
(__u64)1 << _UFFDIO_CONTINUE)
#define UFFD_API_RANGE_IOCTLS_BASIC \ #define UFFD_API_RANGE_IOCTLS_BASIC \
((__u64)1 << _UFFDIO_WAKE | \ ((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY) (__u64)1 << _UFFDIO_COPY | \
(__u64)1 << _UFFDIO_CONTINUE)
/* /*
* Valid ioctl command number range with this API is from 0x00 to * Valid ioctl command number range with this API is from 0x00 to
...@@ -59,6 +61,7 @@ ...@@ -59,6 +61,7 @@
#define _UFFDIO_COPY (0x03) #define _UFFDIO_COPY (0x03)
#define _UFFDIO_ZEROPAGE (0x04) #define _UFFDIO_ZEROPAGE (0x04)
#define _UFFDIO_WRITEPROTECT (0x06) #define _UFFDIO_WRITEPROTECT (0x06)
#define _UFFDIO_CONTINUE (0x07)
#define _UFFDIO_API (0x3F) #define _UFFDIO_API (0x3F)
/* userfaultfd ioctl ids */ /* userfaultfd ioctl ids */
...@@ -77,6 +80,8 @@ ...@@ -77,6 +80,8 @@
struct uffdio_zeropage) struct uffdio_zeropage)
#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
struct uffdio_writeprotect) struct uffdio_writeprotect)
#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \
struct uffdio_continue)
/* read() structure */ /* read() structure */
struct uffd_msg { struct uffd_msg {
...@@ -268,6 +273,18 @@ struct uffdio_writeprotect { ...@@ -268,6 +273,18 @@ struct uffdio_writeprotect {
__u64 mode; __u64 mode;
}; };
struct uffdio_continue {
struct uffdio_range range;
#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0)
__u64 mode;
/*
* Fields below here are written by the ioctl and must be at the end:
* the copy_from_user will not read past here.
*/
__s64 mapped;
};
/* /*
* Flags for the userfaultfd(2) system call itself. * Flags for the userfaultfd(2) system call itself.
*/ */
......
...@@ -39,7 +39,6 @@ ...@@ -39,7 +39,6 @@
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h> #include <linux/hugetlb_cgroup.h>
#include <linux/node.h> #include <linux/node.h>
#include <linux/userfaultfd_k.h>
#include <linux/page_owner.h> #include <linux/page_owner.h>
#include "internal.h" #include "internal.h"
...@@ -4865,8 +4864,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -4865,8 +4864,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
struct vm_area_struct *dst_vma, struct vm_area_struct *dst_vma,
unsigned long dst_addr, unsigned long dst_addr,
unsigned long src_addr, unsigned long src_addr,
enum mcopy_atomic_mode mode,
struct page **pagep) struct page **pagep)
{ {
bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
struct address_space *mapping; struct address_space *mapping;
pgoff_t idx; pgoff_t idx;
unsigned long size; unsigned long size;
...@@ -4876,8 +4877,17 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -4876,8 +4877,17 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
spinlock_t *ptl; spinlock_t *ptl;
int ret; int ret;
struct page *page; struct page *page;
int writable;
if (!*pagep) { mapping = dst_vma->vm_file->f_mapping;
idx = vma_hugecache_offset(h, dst_vma, dst_addr);
if (is_continue) {
ret = -EFAULT;
page = find_lock_page(mapping, idx);
if (!page)
goto out;
} else if (!*pagep) {
ret = -ENOMEM; ret = -ENOMEM;
page = alloc_huge_page(dst_vma, dst_addr, 0); page = alloc_huge_page(dst_vma, dst_addr, 0);
if (IS_ERR(page)) if (IS_ERR(page))
...@@ -4906,13 +4916,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -4906,13 +4916,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
*/ */
__SetPageUptodate(page); __SetPageUptodate(page);
mapping = dst_vma->vm_file->f_mapping; /* Add shared, newly allocated pages to the page cache. */
idx = vma_hugecache_offset(h, dst_vma, dst_addr); if (vm_shared && !is_continue) {
/*
* If shared, add to page cache
*/
if (vm_shared) {
size = i_size_read(mapping->host) >> huge_page_shift(h); size = i_size_read(mapping->host) >> huge_page_shift(h);
ret = -EFAULT; ret = -EFAULT;
if (idx >= size) if (idx >= size)
...@@ -4957,8 +4962,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -4957,8 +4962,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
hugepage_add_new_anon_rmap(page, dst_vma, dst_addr); hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
} }
_dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE); /* For CONTINUE on a non-shared VMA, don't set VM_WRITE for CoW. */
if (dst_vma->vm_flags & VM_WRITE) if (is_continue && !vm_shared)
writable = 0;
else
writable = dst_vma->vm_flags & VM_WRITE;
_dst_pte = make_huge_pte(dst_vma, page, writable);
if (writable)
_dst_pte = huge_pte_mkdirty(_dst_pte); _dst_pte = huge_pte_mkdirty(_dst_pte);
_dst_pte = pte_mkyoung(_dst_pte); _dst_pte = pte_mkyoung(_dst_pte);
...@@ -4972,15 +4983,16 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -4972,15 +4983,16 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
update_mmu_cache(dst_vma, dst_addr, dst_pte); update_mmu_cache(dst_vma, dst_addr, dst_pte);
spin_unlock(ptl); spin_unlock(ptl);
SetHPageMigratable(page); if (!is_continue)
if (vm_shared) SetHPageMigratable(page);
if (vm_shared || is_continue)
unlock_page(page); unlock_page(page);
ret = 0; ret = 0;
out: out:
return ret; return ret;
out_release_unlock: out_release_unlock:
spin_unlock(ptl); spin_unlock(ptl);
if (vm_shared) if (vm_shared || is_continue)
unlock_page(page); unlock_page(page);
out_release_nounlock: out_release_nounlock:
put_page(page); put_page(page);
......
...@@ -207,7 +207,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, ...@@ -207,7 +207,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
unsigned long dst_start, unsigned long dst_start,
unsigned long src_start, unsigned long src_start,
unsigned long len, unsigned long len,
bool zeropage) enum mcopy_atomic_mode mode)
{ {
int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED; int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
int vm_shared = dst_vma->vm_flags & VM_SHARED; int vm_shared = dst_vma->vm_flags & VM_SHARED;
...@@ -227,7 +227,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, ...@@ -227,7 +227,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
* by THP. Since we can not reliably insert a zero page, this * by THP. Since we can not reliably insert a zero page, this
* feature is not supported. * feature is not supported.
*/ */
if (zeropage) { if (mode == MCOPY_ATOMIC_ZEROPAGE) {
mmap_read_unlock(dst_mm); mmap_read_unlock(dst_mm);
return -EINVAL; return -EINVAL;
} }
...@@ -273,8 +273,6 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, ...@@ -273,8 +273,6 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
} }
while (src_addr < src_start + len) { while (src_addr < src_start + len) {
pte_t dst_pteval;
BUG_ON(dst_addr >= dst_start + len); BUG_ON(dst_addr >= dst_start + len);
/* /*
...@@ -297,16 +295,16 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, ...@@ -297,16 +295,16 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
goto out_unlock; goto out_unlock;
} }
err = -EEXIST; if (mode != MCOPY_ATOMIC_CONTINUE &&
dst_pteval = huge_ptep_get(dst_pte); !huge_pte_none(huge_ptep_get(dst_pte))) {
if (!huge_pte_none(dst_pteval)) { err = -EEXIST;
mutex_unlock(&hugetlb_fault_mutex_table[hash]); mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_unlock_read(mapping); i_mmap_unlock_read(mapping);
goto out_unlock; goto out_unlock;
} }
err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
dst_addr, src_addr, &page); dst_addr, src_addr, mode, &page);
mutex_unlock(&hugetlb_fault_mutex_table[hash]); mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_unlock_read(mapping); i_mmap_unlock_read(mapping);
...@@ -408,7 +406,7 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, ...@@ -408,7 +406,7 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
unsigned long dst_start, unsigned long dst_start,
unsigned long src_start, unsigned long src_start,
unsigned long len, unsigned long len,
bool zeropage); enum mcopy_atomic_mode mode);
#endif /* CONFIG_HUGETLB_PAGE */ #endif /* CONFIG_HUGETLB_PAGE */
static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
...@@ -458,7 +456,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, ...@@ -458,7 +456,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
unsigned long dst_start, unsigned long dst_start,
unsigned long src_start, unsigned long src_start,
unsigned long len, unsigned long len,
bool zeropage, enum mcopy_atomic_mode mcopy_mode,
bool *mmap_changing, bool *mmap_changing,
__u64 mode) __u64 mode)
{ {
...@@ -469,6 +467,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, ...@@ -469,6 +467,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
long copied; long copied;
struct page *page; struct page *page;
bool wp_copy; bool wp_copy;
bool zeropage = (mcopy_mode == MCOPY_ATOMIC_ZEROPAGE);
/* /*
* Sanitize the command parameters: * Sanitize the command parameters:
...@@ -527,10 +526,12 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, ...@@ -527,10 +526,12 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
*/ */
if (is_vm_hugetlb_page(dst_vma)) if (is_vm_hugetlb_page(dst_vma))
return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
src_start, len, zeropage); src_start, len, mcopy_mode);
if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
goto out_unlock; goto out_unlock;
if (mcopy_mode == MCOPY_ATOMIC_CONTINUE)
goto out_unlock;
/* /*
* Ensure the dst_vma has a anon_vma or this page * Ensure the dst_vma has a anon_vma or this page
...@@ -626,14 +627,22 @@ ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, ...@@ -626,14 +627,22 @@ ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len, unsigned long src_start, unsigned long len,
bool *mmap_changing, __u64 mode) bool *mmap_changing, __u64 mode)
{ {
return __mcopy_atomic(dst_mm, dst_start, src_start, len, false, return __mcopy_atomic(dst_mm, dst_start, src_start, len,
mmap_changing, mode); MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
} }
ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start, ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool *mmap_changing) unsigned long len, bool *mmap_changing)
{ {
return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing, 0); return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
mmap_changing, 0);
}
ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool *mmap_changing)
{
return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
mmap_changing, 0);
} }
int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment