Commit 1e987790 authored by Dave Hansen's avatar Dave Hansen Committed by Ingo Molnar

mm/gup: Introduce get_user_pages_remote()

For protection keys, we need to understand whether protections
should be enforced in software or not.  In general, we enforce
protections when working on our own task, but not when on others.
We call these "current" and "remote" operations.

This patch introduces a new get_user_pages() variant:

        get_user_pages_remote()

Which is a replacement for when get_user_pages() is called on
non-current tsk/mm.

We also introduce a new gup flag: FOLL_REMOTE which can be used
for the "__" gup variants to get this new behavior.

The uprobes is_trap_at_addr() location holds mmap_sem and
calls get_user_pages(current->mm) on an instruction address.  This
makes it a pretty unique gup caller.  Being an instruction access
and also really originating from the kernel (vs. the app), I opted
to consider this a 'remote' access where protection keys will not
be enforced.

Without protection keys, this patch should not change any behavior.
Signed-off-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: jack@suse.cz
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210154.3F0E51EA@viggo.jf.intel.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 1fe3f29e
...@@ -753,7 +753,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages( ...@@ -753,7 +753,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
while (pinned < npages) { while (pinned < npages) {
ret = get_user_pages(task, mm, ptr, npages - pinned, ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
!etnaviv_obj->userptr.ro, 0, !etnaviv_obj->userptr.ro, 0,
pvec + pinned, NULL); pvec + pinned, NULL);
if (ret < 0) if (ret < 0)
......
...@@ -584,7 +584,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) ...@@ -584,7 +584,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
while (pinned < npages) { while (pinned < npages) {
ret = get_user_pages(work->task, mm, ret = get_user_pages_remote(work->task, mm,
obj->userptr.ptr + pinned * PAGE_SIZE, obj->userptr.ptr + pinned * PAGE_SIZE,
npages - pinned, npages - pinned,
!obj->userptr.read_only, 0, !obj->userptr.read_only, 0,
......
...@@ -572,10 +572,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, ...@@ -572,10 +572,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
* complex (and doesn't gain us much performance in most use * complex (and doesn't gain us much performance in most use
* cases). * cases).
*/ */
npages = get_user_pages(owning_process, owning_mm, user_virt, npages = get_user_pages_remote(owning_process, owning_mm,
gup_num_pages, user_virt, gup_num_pages,
access_mask & ODP_WRITE_ALLOWED_BIT, 0, access_mask & ODP_WRITE_ALLOWED_BIT,
local_page_list, NULL); 0, local_page_list, NULL);
up_read(&owning_mm->mmap_sem); up_read(&owning_mm->mmap_sem);
if (npages < 0) if (npages < 0)
......
...@@ -198,8 +198,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, ...@@ -198,8 +198,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
return NULL; return NULL;
} }
#endif #endif
ret = get_user_pages(current, bprm->mm, pos, /*
1, write, 1, &page, NULL); * We are doing an exec(). 'current' is the process
* doing the exec and bprm->mm is the new process's mm.
*/
ret = get_user_pages_remote(current, bprm->mm, pos, 1, write,
1, &page, NULL);
if (ret <= 0) if (ret <= 0)
return NULL; return NULL;
......
...@@ -1225,6 +1225,10 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -1225,6 +1225,10 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages, unsigned long start, unsigned long nr_pages,
unsigned int foll_flags, struct page **pages, unsigned int foll_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking); struct vm_area_struct **vmas, int *nonblocking);
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
int write, int force, struct page **pages,
struct vm_area_struct **vmas);
long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages, unsigned long start, unsigned long nr_pages,
int write, int force, struct page **pages, int write, int force, struct page **pages,
...@@ -2170,6 +2174,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, ...@@ -2170,6 +2174,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
#define FOLL_MLOCK 0x1000 /* lock present pages */ #define FOLL_MLOCK 0x1000 /* lock present pages */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data); void *data);
......
...@@ -299,7 +299,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, ...@@ -299,7 +299,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
retry: retry:
/* Read the page with vaddr into memory */ /* Read the page with vaddr into memory */
ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); ret = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
if (ret <= 0) if (ret <= 0)
return ret; return ret;
...@@ -1700,7 +1700,13 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) ...@@ -1700,7 +1700,13 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
if (likely(result == 0)) if (likely(result == 0))
goto out; goto out;
result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); /*
* The NULL 'tsk' here ensures that any faults that occur here
* will not be accounted to the task. 'mm' *is* current->mm,
* but we treat this as a 'remote' access since it is
* essentially a kernel access to the memory.
*/
result = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
if (result < 0) if (result < 0)
return result; return result;
......
...@@ -870,7 +870,7 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, ...@@ -870,7 +870,7 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
EXPORT_SYMBOL(get_user_pages_unlocked); EXPORT_SYMBOL(get_user_pages_unlocked);
/* /*
* get_user_pages() - pin user pages in memory * get_user_pages_remote() - pin user pages in memory
* @tsk: the task_struct to use for page fault accounting, or * @tsk: the task_struct to use for page fault accounting, or
* NULL if faults are not to be recorded. * NULL if faults are not to be recorded.
* @mm: mm_struct of target mm * @mm: mm_struct of target mm
...@@ -924,12 +924,29 @@ EXPORT_SYMBOL(get_user_pages_unlocked); ...@@ -924,12 +924,29 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* should use get_user_pages because it cannot pass * should use get_user_pages because it cannot pass
* FAULT_FLAG_ALLOW_RETRY to handle_mm_fault. * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
*/ */
long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages, int write, unsigned long start, unsigned long nr_pages,
int force, struct page **pages, struct vm_area_struct **vmas) int write, int force, struct page **pages,
struct vm_area_struct **vmas)
{ {
return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
pages, vmas, NULL, false, FOLL_TOUCH); pages, vmas, NULL, false,
FOLL_TOUCH | FOLL_REMOTE);
}
EXPORT_SYMBOL(get_user_pages_remote);
/*
* This is the same as get_user_pages_remote() for the time
* being.
*/
long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
int write, int force, struct page **pages,
struct vm_area_struct **vmas)
{
return __get_user_pages_locked(tsk, mm, start, nr_pages,
write, force, pages, vmas, NULL, false,
FOLL_TOUCH);
} }
EXPORT_SYMBOL(get_user_pages); EXPORT_SYMBOL(get_user_pages);
......
...@@ -3685,7 +3685,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, ...@@ -3685,7 +3685,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
void *maddr; void *maddr;
struct page *page = NULL; struct page *page = NULL;
ret = get_user_pages(tsk, mm, addr, 1, ret = get_user_pages_remote(tsk, mm, addr, 1,
write, 1, &page, &vma); write, 1, &page, &vma);
if (ret <= 0) { if (ret <= 0) {
#ifndef CONFIG_HAVE_IOREMAP_PROT #ifndef CONFIG_HAVE_IOREMAP_PROT
......
...@@ -98,9 +98,14 @@ static int process_vm_rw_single_vec(unsigned long addr, ...@@ -98,9 +98,14 @@ static int process_vm_rw_single_vec(unsigned long addr,
int pages = min(nr_pages, max_pages_per_loop); int pages = min(nr_pages, max_pages_per_loop);
size_t bytes; size_t bytes;
/* Get the pages we're interested in */ /*
pages = get_user_pages_unlocked(task, mm, pa, pages, * Get the pages we're interested in. We must
vm_write, 0, process_pages); * add FOLL_REMOTE because task/mm might not
* current/current->mm
*/
pages = __get_user_pages_unlocked(task, mm, pa, pages,
vm_write, 0, process_pages,
FOLL_REMOTE);
if (pages <= 0) if (pages <= 0)
return -EFAULT; return -EFAULT;
......
...@@ -874,7 +874,14 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, ...@@ -874,7 +874,14 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
} }
/* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */ /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0) /*
* This is called at execve() time in order to dig around
* in the argv/environment of the new proceess
* (represented by bprm). 'current' is the process doing
* the execve().
*/
if (get_user_pages_remote(current, bprm->mm, pos, 1,
0, 1, &page, NULL) <= 0)
return false; return false;
#else #else
page = bprm->page[pos / PAGE_SIZE]; page = bprm->page[pos / PAGE_SIZE];
......
...@@ -79,7 +79,13 @@ static void async_pf_execute(struct work_struct *work) ...@@ -79,7 +79,13 @@ static void async_pf_execute(struct work_struct *work)
might_sleep(); might_sleep();
get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL); /*
* This work is run asynchromously to the task which owns
* mm and might be done in another context, so we must
* use FOLL_REMOTE.
*/
__get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL, FOLL_REMOTE);
kvm_async_page_present_sync(vcpu, apf); kvm_async_page_present_sync(vcpu, apf);
spin_lock(&vcpu->async_pf.lock); spin_lock(&vcpu->async_pf.lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment