Commit 66850be5 authored by Liam R. Howlett's avatar Liam R. Howlett Committed by Andrew Morton

mm/mempolicy: use vma iterator & maple state instead of vma linked list

Reworked the way mbind_range() finds the first VMA to reuse the maple
state and limit the number of tree walks needed.

Note, this drops the VM_BUG_ON(!vma) call, which would catch a start
address higher than the last VMA.  The code was written in a way that
allowed no VMA updates to occur and still return success.  There should be
no functional change to this scenario with the new code.

Link: https://lkml.kernel.org/r/20220906194824.2110408-57-Liam.Howlett@oracle.comSigned-off-by: default avatarLiam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Tested-by: default avatarYu Zhao <yuzhao@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent ba0aff8e
...@@ -381,9 +381,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) ...@@ -381,9 +381,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
mmap_write_lock(mm); mmap_write_lock(mm);
for (vma = mm->mmap; vma; vma = vma->vm_next) for_each_vma(vmi, vma)
mpol_rebind_policy(vma->vm_policy, new); mpol_rebind_policy(vma->vm_policy, new);
mmap_write_unlock(mm); mmap_write_unlock(mm);
} }
...@@ -654,7 +655,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, ...@@ -654,7 +655,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
static int queue_pages_test_walk(unsigned long start, unsigned long end, static int queue_pages_test_walk(unsigned long start, unsigned long end,
struct mm_walk *walk) struct mm_walk *walk)
{ {
struct vm_area_struct *vma = walk->vma; struct vm_area_struct *next, *vma = walk->vma;
struct queue_pages *qp = walk->private; struct queue_pages *qp = walk->private;
unsigned long endvma = vma->vm_end; unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags; unsigned long flags = qp->flags;
...@@ -669,9 +670,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, ...@@ -669,9 +670,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
/* hole at head side of range */ /* hole at head side of range */
return -EFAULT; return -EFAULT;
} }
next = find_vma(vma->vm_mm, vma->vm_end);
if (!(flags & MPOL_MF_DISCONTIG_OK) && if (!(flags & MPOL_MF_DISCONTIG_OK) &&
((vma->vm_end < qp->end) && ((vma->vm_end < qp->end) &&
(!vma->vm_next || vma->vm_end < vma->vm_next->vm_start))) (!next || vma->vm_end < next->vm_start)))
/* hole at middle or tail of range */ /* hole at middle or tail of range */
return -EFAULT; return -EFAULT;
...@@ -785,26 +787,24 @@ static int vma_replace_policy(struct vm_area_struct *vma, ...@@ -785,26 +787,24 @@ static int vma_replace_policy(struct vm_area_struct *vma,
static int mbind_range(struct mm_struct *mm, unsigned long start, static int mbind_range(struct mm_struct *mm, unsigned long start,
unsigned long end, struct mempolicy *new_pol) unsigned long end, struct mempolicy *new_pol)
{ {
MA_STATE(mas, &mm->mm_mt, start - 1, start - 1);
struct vm_area_struct *prev; struct vm_area_struct *prev;
struct vm_area_struct *vma; struct vm_area_struct *vma;
int err = 0; int err = 0;
pgoff_t pgoff; pgoff_t pgoff;
unsigned long vmstart;
unsigned long vmend;
vma = find_vma(mm, start);
VM_BUG_ON(!vma);
prev = vma->vm_prev; prev = mas_find_rev(&mas, 0);
if (start > vma->vm_start) if (prev && (start < prev->vm_end))
prev = vma; vma = prev;
else
vma = mas_next(&mas, end - 1);
for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) { for (; vma; vma = mas_next(&mas, end - 1)) {
vmstart = max(start, vma->vm_start); unsigned long vmstart = max(start, vma->vm_start);
vmend = min(end, vma->vm_end); unsigned long vmend = min(end, vma->vm_end);
if (mpol_equal(vma_policy(vma), new_pol)) if (mpol_equal(vma_policy(vma), new_pol))
continue; goto next;
pgoff = vma->vm_pgoff + pgoff = vma->vm_pgoff +
((vmstart - vma->vm_start) >> PAGE_SHIFT); ((vmstart - vma->vm_start) >> PAGE_SHIFT);
...@@ -813,6 +813,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, ...@@ -813,6 +813,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
new_pol, vma->vm_userfaultfd_ctx, new_pol, vma->vm_userfaultfd_ctx,
anon_vma_name(vma)); anon_vma_name(vma));
if (prev) { if (prev) {
/* vma_merge() invalidated the mas */
mas_pause(&mas);
vma = prev; vma = prev;
goto replace; goto replace;
} }
...@@ -820,19 +822,25 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, ...@@ -820,19 +822,25 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
err = split_vma(vma->vm_mm, vma, vmstart, 1); err = split_vma(vma->vm_mm, vma, vmstart, 1);
if (err) if (err)
goto out; goto out;
/* split_vma() invalidated the mas */
mas_pause(&mas);
} }
if (vma->vm_end != vmend) { if (vma->vm_end != vmend) {
err = split_vma(vma->vm_mm, vma, vmend, 0); err = split_vma(vma->vm_mm, vma, vmend, 0);
if (err) if (err)
goto out; goto out;
/* split_vma() invalidated the mas */
mas_pause(&mas);
} }
replace: replace:
err = vma_replace_policy(vma, new_pol); err = vma_replace_policy(vma, new_pol);
if (err) if (err)
goto out; goto out;
next:
prev = vma;
} }
out: out:
return err; return err;
} }
...@@ -1049,6 +1057,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, ...@@ -1049,6 +1057,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
int flags) int flags)
{ {
nodemask_t nmask; nodemask_t nmask;
struct vm_area_struct *vma;
LIST_HEAD(pagelist); LIST_HEAD(pagelist);
int err = 0; int err = 0;
struct migration_target_control mtc = { struct migration_target_control mtc = {
...@@ -1064,8 +1073,9 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, ...@@ -1064,8 +1073,9 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
* need migration. Between passing in the full user address * need migration. Between passing in the full user address
* space range and MPOL_MF_DISCONTIG_OK, this call can not fail. * space range and MPOL_MF_DISCONTIG_OK, this call can not fail.
*/ */
vma = find_vma(mm, 0);
VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
flags | MPOL_MF_DISCONTIG_OK, &pagelist); flags | MPOL_MF_DISCONTIG_OK, &pagelist);
if (!list_empty(&pagelist)) { if (!list_empty(&pagelist)) {
...@@ -1195,14 +1205,13 @@ static struct page *new_page(struct page *page, unsigned long start) ...@@ -1195,14 +1205,13 @@ static struct page *new_page(struct page *page, unsigned long start)
struct folio *dst, *src = page_folio(page); struct folio *dst, *src = page_folio(page);
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long address; unsigned long address;
VMA_ITERATOR(vmi, current->mm, start);
gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL; gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL;
vma = find_vma(current->mm, start); for_each_vma(vmi, vma) {
while (vma) {
address = page_address_in_vma(page, vma); address = page_address_in_vma(page, vma);
if (address != -EFAULT) if (address != -EFAULT)
break; break;
vma = vma->vm_next;
} }
if (folio_test_hugetlb(src)) if (folio_test_hugetlb(src))
...@@ -1480,6 +1489,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le ...@@ -1480,6 +1489,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
unsigned long vmend; unsigned long vmend;
unsigned long end; unsigned long end;
int err = -ENOENT; int err = -ENOENT;
VMA_ITERATOR(vmi, mm, start);
start = untagged_addr(start); start = untagged_addr(start);
if (start & ~PAGE_MASK) if (start & ~PAGE_MASK)
...@@ -1505,9 +1515,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le ...@@ -1505,9 +1515,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
if (end == start) if (end == start)
return 0; return 0;
mmap_write_lock(mm); mmap_write_lock(mm);
vma = find_vma(mm, start); for_each_vma_range(vmi, vma, end) {
for (; vma && vma->vm_start < end; vma = vma->vm_next) {
vmstart = max(start, vma->vm_start); vmstart = max(start, vma->vm_start);
vmend = min(end, vma->vm_end); vmend = min(end, vma->vm_end);
new = mpol_dup(vma_policy(vma)); new = mpol_dup(vma_policy(vma));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment