Commit 65ac1320 authored by Liam R. Howlett's avatar Liam R. Howlett Committed by Andrew Morton

userfaultfd: fix regression in userfaultfd_unmap_prep()

Android reported a performance regression in the userfaultfd unmap path. 
A closer inspection on the userfaultfd_unmap_prep() change showed that a
second tree walk would be necessary in the reworked code.

Fix the regression by passing each VMA that will be unmapped through to
the userfaultfd_unmap_prep() function as they are added to the unmap list,
instead of re-walking the tree for the VMA.

Link: https://lkml.kernel.org/r/20230601015402.2819343-1-Liam.Howlett@oracle.com
Fixes: 69dbe6da ("userfaultfd: use maple tree iterator to iterate VMAs")
Signed-off-by: default avatarLiam R. Howlett <Liam.Howlett@oracle.com>
Reported-by: default avatarSuren Baghdasaryan <surenb@google.com>
Suggested-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 1e3be485
...@@ -852,31 +852,26 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, ...@@ -852,31 +852,26 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
return false; return false;
} }
int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start, int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct list_head *unmaps) unsigned long end, struct list_head *unmaps)
{ {
VMA_ITERATOR(vmi, mm, start); struct userfaultfd_unmap_ctx *unmap_ctx;
struct vm_area_struct *vma; struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
for_each_vma_range(vmi, vma, end) {
struct userfaultfd_unmap_ctx *unmap_ctx;
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
has_unmap_ctx(ctx, unmaps, start, end)) has_unmap_ctx(ctx, unmaps, start, end))
continue; return 0;
unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL); unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
if (!unmap_ctx) if (!unmap_ctx)
return -ENOMEM; return -ENOMEM;
userfaultfd_ctx_get(ctx); userfaultfd_ctx_get(ctx);
atomic_inc(&ctx->mmap_changing); atomic_inc(&ctx->mmap_changing);
unmap_ctx->ctx = ctx; unmap_ctx->ctx = ctx;
unmap_ctx->start = start; unmap_ctx->start = start;
unmap_ctx->end = end; unmap_ctx->end = end;
list_add_tail(&unmap_ctx->list, unmaps); list_add_tail(&unmap_ctx->list, unmaps);
}
return 0; return 0;
} }
......
...@@ -188,8 +188,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma, ...@@ -188,8 +188,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start, unsigned long start,
unsigned long end); unsigned long end);
extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start, extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long end, struct list_head *uf); unsigned long start, unsigned long end, struct list_head *uf);
extern void userfaultfd_unmap_complete(struct mm_struct *mm, extern void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf); struct list_head *uf);
extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma);
...@@ -271,7 +271,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma, ...@@ -271,7 +271,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma,
return true; return true;
} }
static inline int userfaultfd_unmap_prep(struct mm_struct *mm, static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end, unsigned long start, unsigned long end,
struct list_head *uf) struct list_head *uf)
{ {
......
...@@ -2417,6 +2417,21 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -2417,6 +2417,21 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto munmap_sidetree_failed; goto munmap_sidetree_failed;
count++; count++;
if (unlikely(uf)) {
/*
* If userfaultfd_unmap_prep returns an error the vmas
* will remain split, but userland will get a
* highly unexpected error anyway. This is no
* different than the case where the first of the two
* __split_vma fails, but we don't undo the first
* split, despite we could. This is unlikely enough
* failure that it's not worth optimizing it for.
*/
error = userfaultfd_unmap_prep(next, start, end, uf);
if (error)
goto userfaultfd_error;
}
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
BUG_ON(next->vm_start < start); BUG_ON(next->vm_start < start);
BUG_ON(next->vm_start > end); BUG_ON(next->vm_start > end);
...@@ -2429,22 +2444,6 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -2429,22 +2444,6 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (!next) if (!next)
next = vma_next(vmi); next = vma_next(vmi);
if (unlikely(uf)) {
/*
* If userfaultfd_unmap_prep returns an error the vmas
* will remain split, but userland will get a
* highly unexpected error anyway. This is no
* different than the case where the first of the two
* __split_vma fails, but we don't undo the first
* split, despite we could. This is unlikely enough
* failure that it's not worth optimizing it for.
*/
error = userfaultfd_unmap_prep(mm, start, end, uf);
if (error)
goto userfaultfd_error;
}
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE) #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
/* Make sure no VMAs are about to be lost. */ /* Make sure no VMAs are about to be lost. */
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment