Commit fa5bb209 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

mm: cleanup __get_user_pages()

Get rid of two nested loops over nr_pages, extract vma flags checking to
separate function and other random cleanups.
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 16744483
...@@ -315,6 +315,44 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, ...@@ -315,6 +315,44 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
return 0; return 0;
} }
static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
{
vm_flags_t vm_flags = vma->vm_flags;
if (vm_flags & (VM_IO | VM_PFNMAP))
return -EFAULT;
if (gup_flags & FOLL_WRITE) {
if (!(vm_flags & VM_WRITE)) {
if (!(gup_flags & FOLL_FORCE))
return -EFAULT;
/*
* We used to let the write,force case do COW in a
* VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
* set a breakpoint in a read-only mapping of an
* executable, without corrupting the file (yet only
* when that file had been opened for writing!).
* Anon pages in shared mappings are surprising: now
* just reject it.
*/
if (!is_cow_mapping(vm_flags)) {
WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
return -EFAULT;
}
}
} else if (!(vm_flags & VM_READ)) {
if (!(gup_flags & FOLL_FORCE))
return -EFAULT;
/*
* Is there actually any vma we can reach here which does not
* have VM_MAYREAD set?
*/
if (!(vm_flags & VM_MAYREAD))
return -EFAULT;
}
return 0;
}
/** /**
* __get_user_pages() - pin user pages in memory * __get_user_pages() - pin user pages in memory
* @tsk: task_struct of target task * @tsk: task_struct of target task
...@@ -369,9 +407,9 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -369,9 +407,9 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned int gup_flags, struct page **pages, unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking) struct vm_area_struct **vmas, int *nonblocking)
{ {
long i; long i = 0;
unsigned long vm_flags;
unsigned int page_mask; unsigned int page_mask;
struct vm_area_struct *vma = NULL;
if (!nr_pages) if (!nr_pages)
return 0; return 0;
...@@ -386,124 +424,82 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -386,124 +424,82 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (!(gup_flags & FOLL_FORCE)) if (!(gup_flags & FOLL_FORCE))
gup_flags |= FOLL_NUMA; gup_flags |= FOLL_NUMA;
i = 0;
do { do {
struct vm_area_struct *vma; struct page *page;
unsigned int foll_flags = gup_flags;
vma = find_extend_vma(mm, start); unsigned int page_increm;
if (!vma && in_gate_area(mm, start)) {
int ret; /* first iteration or cross vma bound */
ret = get_gate_page(mm, start & PAGE_MASK, gup_flags, if (!vma || start >= vma->vm_end) {
&vma, pages ? &pages[i] : NULL); vma = find_extend_vma(mm, start);
if (ret) if (!vma && in_gate_area(mm, start)) {
goto efault; int ret;
page_mask = 0; ret = get_gate_page(mm, start & PAGE_MASK,
goto next_page; gup_flags, &vma,
} pages ? &pages[i] : NULL);
if (ret)
return i ? : ret;
page_mask = 0;
goto next_page;
}
if (!vma) if (!vma || check_vma_flags(vma, gup_flags))
goto efault; return i ? : -EFAULT;
vm_flags = vma->vm_flags; if (is_vm_hugetlb_page(vma)) {
if (vm_flags & (VM_IO | VM_PFNMAP)) i = follow_hugetlb_page(mm, vma, pages, vmas,
goto efault; &start, &nr_pages, i,
gup_flags);
if (gup_flags & FOLL_WRITE) { continue;
if (!(vm_flags & VM_WRITE)) {
if (!(gup_flags & FOLL_FORCE))
goto efault;
/*
* We used to let the write,force case do COW
* in a VM_MAYWRITE VM_SHARED !VM_WRITE vma, so
* ptrace could set a breakpoint in a read-only
* mapping of an executable, without corrupting
* the file (yet only when that file had been
* opened for writing!). Anon pages in shared
* mappings are surprising: now just reject it.
*/
if (!is_cow_mapping(vm_flags)) {
WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
goto efault;
}
} }
} else { }
if (!(vm_flags & VM_READ)) { retry:
if (!(gup_flags & FOLL_FORCE)) /*
goto efault; * If we have a pending SIGKILL, don't keep faulting pages and
/* * potentially allocating memory.
* Is there actually any vma we can reach here */
* which does not have VM_MAYREAD set? if (unlikely(fatal_signal_pending(current)))
*/ return i ? i : -ERESTARTSYS;
if (!(vm_flags & VM_MAYREAD)) cond_resched();
goto efault; page = follow_page_mask(vma, start, foll_flags, &page_mask);
if (!page) {
int ret;
ret = faultin_page(tsk, vma, start, &foll_flags,
nonblocking);
switch (ret) {
case 0:
goto retry;
case -EFAULT:
case -ENOMEM:
case -EHWPOISON:
return i ? i : ret;
case -EBUSY:
return i;
case -ENOENT:
goto next_page;
} }
BUG();
} }
if (IS_ERR(page))
if (is_vm_hugetlb_page(vma)) { return i ? i : PTR_ERR(page);
i = follow_hugetlb_page(mm, vma, pages, vmas, if (pages) {
&start, &nr_pages, i, gup_flags); pages[i] = page;
continue; flush_anon_page(vma, page, start);
flush_dcache_page(page);
page_mask = 0;
} }
do {
struct page *page;
unsigned int foll_flags = gup_flags;
unsigned int page_increm;
/*
* If we have a pending SIGKILL, don't keep faulting
* pages and potentially allocating memory.
*/
if (unlikely(fatal_signal_pending(current)))
return i ? i : -ERESTARTSYS;
cond_resched();
while (!(page = follow_page_mask(vma, start,
foll_flags, &page_mask))) {
int ret;
ret = faultin_page(tsk, vma, start, &foll_flags,
nonblocking);
switch (ret) {
case 0:
break;
case -EFAULT:
case -ENOMEM:
case -EHWPOISON:
return i ? i : ret;
case -EBUSY:
return i;
case -ENOENT:
goto next_page;
default:
BUG();
}
cond_resched();
}
if (IS_ERR(page))
return i ? i : PTR_ERR(page);
if (pages) {
pages[i] = page;
flush_anon_page(vma, page, start);
flush_dcache_page(page);
page_mask = 0;
}
next_page: next_page:
if (vmas) { if (vmas) {
vmas[i] = vma; vmas[i] = vma;
page_mask = 0; page_mask = 0;
} }
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
if (page_increm > nr_pages) if (page_increm > nr_pages)
page_increm = nr_pages; page_increm = nr_pages;
i += page_increm; i += page_increm;
start += page_increm * PAGE_SIZE; start += page_increm * PAGE_SIZE;
nr_pages -= page_increm; nr_pages -= page_increm;
} while (nr_pages && start < vma->vm_end);
} while (nr_pages); } while (nr_pages);
return i; return i;
efault:
return i ? : -EFAULT;
} }
EXPORT_SYMBOL(__get_user_pages); EXPORT_SYMBOL(__get_user_pages);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment