Commit 5dfab109 authored by Huang Ying's avatar Huang Ying Committed by Andrew Morton

migrate_pages: batch _unmap and _move

In this patch the _unmap and _move stage of the folio migration is
batched.  That for, previously, it is,

  for each folio
    _unmap()
    _move()

Now, it is,

  for each folio
    _unmap()
  for each folio
    _move()

Based on this, we can batch the TLB flushing and use some hardware
accelerator to copy folios between batched _unmap and batched _move
stages.

Link: https://lkml.kernel.org/r/20230213123444.155149-6-ying.huang@intel.comSigned-off-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Tested-by: default avatarHyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Xin Hao <xhao@linux.alibaba.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 64c8902e
...@@ -1051,6 +1051,33 @@ static void __migrate_folio_extract(struct folio *dst, ...@@ -1051,6 +1051,33 @@ static void __migrate_folio_extract(struct folio *dst,
dst->private = NULL; dst->private = NULL;
} }
/* Restore the source folio to the original state upon failure */
static void migrate_folio_undo_src(struct folio *src,
int page_was_mapped,
struct anon_vma *anon_vma,
struct list_head *ret)
{
if (page_was_mapped)
remove_migration_ptes(src, src, false);
/* Drop an anon_vma reference if we took one */
if (anon_vma)
put_anon_vma(anon_vma);
folio_unlock(src);
list_move_tail(&src->lru, ret);
}
/* Restore the destination folio to the original state upon failure */
static void migrate_folio_undo_dst(struct folio *dst,
free_page_t put_new_page,
unsigned long private)
{
folio_unlock(dst);
if (put_new_page)
put_new_page(&dst->page, private);
else
folio_put(dst);
}
/* Cleanup src folio upon migration success */ /* Cleanup src folio upon migration success */
static void migrate_folio_done(struct folio *src, static void migrate_folio_done(struct folio *src,
enum migrate_reason reason) enum migrate_reason reason)
...@@ -1069,8 +1096,8 @@ static void migrate_folio_done(struct folio *src, ...@@ -1069,8 +1096,8 @@ static void migrate_folio_done(struct folio *src,
folio_put(src); folio_put(src);
} }
static int __migrate_folio_unmap(struct folio *src, struct folio *dst, static int __migrate_folio_unmap(struct folio *src, struct folio *dst, int force,
int force, enum migrate_mode mode) bool avoid_force_lock, enum migrate_mode mode)
{ {
int rc = -EAGAIN; int rc = -EAGAIN;
int page_was_mapped = 0; int page_was_mapped = 0;
...@@ -1097,6 +1124,17 @@ static int __migrate_folio_unmap(struct folio *src, struct folio *dst, ...@@ -1097,6 +1124,17 @@ static int __migrate_folio_unmap(struct folio *src, struct folio *dst,
if (current->flags & PF_MEMALLOC) if (current->flags & PF_MEMALLOC)
goto out; goto out;
/*
* We have locked some folios and are going to wait to lock
* this folio. To avoid a potential deadlock, let's bail
* out and not do that. The locked folios will be moved and
* unlocked, then we can wait to lock this folio.
*/
if (avoid_force_lock) {
rc = -EDEADLOCK;
goto out;
}
folio_lock(src); folio_lock(src);
} }
...@@ -1205,10 +1243,20 @@ static int __migrate_folio_move(struct folio *src, struct folio *dst, ...@@ -1205,10 +1243,20 @@ static int __migrate_folio_move(struct folio *src, struct folio *dst,
int page_was_mapped = 0; int page_was_mapped = 0;
struct anon_vma *anon_vma = NULL; struct anon_vma *anon_vma = NULL;
bool is_lru = !__PageMovable(&src->page); bool is_lru = !__PageMovable(&src->page);
struct list_head *prev;
__migrate_folio_extract(dst, &page_was_mapped, &anon_vma); __migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
prev = dst->lru.prev;
list_del(&dst->lru);
rc = move_to_new_folio(dst, src, mode); rc = move_to_new_folio(dst, src, mode);
if (rc == -EAGAIN) {
list_add(&dst->lru, prev);
__migrate_folio_record(dst, page_was_mapped, anon_vma);
return rc;
}
if (unlikely(!is_lru)) if (unlikely(!is_lru))
goto out_unlock_both; goto out_unlock_both;
...@@ -1251,7 +1299,7 @@ static int __migrate_folio_move(struct folio *src, struct folio *dst, ...@@ -1251,7 +1299,7 @@ static int __migrate_folio_move(struct folio *src, struct folio *dst,
/* Obtain the lock on page, remove all ptes. */ /* Obtain the lock on page, remove all ptes. */
static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page, static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page,
unsigned long private, struct folio *src, unsigned long private, struct folio *src,
struct folio **dstp, int force, struct folio **dstp, int force, bool avoid_force_lock,
enum migrate_mode mode, enum migrate_reason reason, enum migrate_mode mode, enum migrate_reason reason,
struct list_head *ret) struct list_head *ret)
{ {
...@@ -1279,7 +1327,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page ...@@ -1279,7 +1327,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
*dstp = dst; *dstp = dst;
dst->private = NULL; dst->private = NULL;
rc = __migrate_folio_unmap(src, dst, force, mode); rc = __migrate_folio_unmap(src, dst, force, avoid_force_lock, mode);
if (rc == MIGRATEPAGE_UNMAP) if (rc == MIGRATEPAGE_UNMAP)
return rc; return rc;
...@@ -1287,7 +1335,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page ...@@ -1287,7 +1335,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
* A folio that has not been unmapped will be restored to * A folio that has not been unmapped will be restored to
* right list unless we want to retry. * right list unless we want to retry.
*/ */
if (rc != -EAGAIN) if (rc != -EAGAIN && rc != -EDEADLOCK)
list_move_tail(&src->lru, ret); list_move_tail(&src->lru, ret);
if (put_new_page) if (put_new_page)
...@@ -1326,9 +1374,8 @@ static int migrate_folio_move(free_page_t put_new_page, unsigned long private, ...@@ -1326,9 +1374,8 @@ static int migrate_folio_move(free_page_t put_new_page, unsigned long private,
*/ */
if (rc == MIGRATEPAGE_SUCCESS) { if (rc == MIGRATEPAGE_SUCCESS) {
migrate_folio_done(src, reason); migrate_folio_done(src, reason);
} else { } else if (rc != -EAGAIN) {
if (rc != -EAGAIN) list_add_tail(&src->lru, ret);
list_add_tail(&src->lru, ret);
if (put_new_page) if (put_new_page)
put_new_page(&dst->page, private); put_new_page(&dst->page, private);
...@@ -1603,12 +1650,16 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page, ...@@ -1603,12 +1650,16 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
return nr_failed; return nr_failed;
} }
/*
* migrate_pages_batch() first unmaps folios in the from list as many as
* possible, then move the unmapped folios.
*/
static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
free_page_t put_new_page, unsigned long private, free_page_t put_new_page, unsigned long private,
enum migrate_mode mode, int reason, struct list_head *ret_folios, enum migrate_mode mode, int reason, struct list_head *ret_folios,
struct migrate_pages_stats *stats) struct migrate_pages_stats *stats)
{ {
int retry = 1; int retry;
int large_retry = 1; int large_retry = 1;
int thp_retry = 1; int thp_retry = 1;
int nr_failed = 0; int nr_failed = 0;
...@@ -1617,13 +1668,19 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, ...@@ -1617,13 +1668,19 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
int pass = 0; int pass = 0;
bool is_large = false; bool is_large = false;
bool is_thp = false; bool is_thp = false;
struct folio *folio, *folio2, *dst = NULL; struct folio *folio, *folio2, *dst = NULL, *dst2;
int rc, nr_pages; int rc, rc_saved, nr_pages;
LIST_HEAD(split_folios); LIST_HEAD(split_folios);
LIST_HEAD(unmap_folios);
LIST_HEAD(dst_folios);
bool nosplit = (reason == MR_NUMA_MISPLACED); bool nosplit = (reason == MR_NUMA_MISPLACED);
bool no_split_folio_counting = false; bool no_split_folio_counting = false;
bool avoid_force_lock;
split_folio_migration: retry:
rc_saved = 0;
avoid_force_lock = false;
retry = 1;
for (pass = 0; for (pass = 0;
pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry);
pass++) { pass++) {
...@@ -1645,16 +1702,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, ...@@ -1645,16 +1702,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
cond_resched(); cond_resched();
rc = migrate_folio_unmap(get_new_page, put_new_page, private, rc = migrate_folio_unmap(get_new_page, put_new_page, private,
folio, &dst, pass > 2, mode, folio, &dst, pass > 2, avoid_force_lock,
reason, ret_folios); mode, reason, ret_folios);
if (rc == MIGRATEPAGE_UNMAP)
rc = migrate_folio_move(put_new_page, private,
folio, dst, mode,
reason, ret_folios);
/* /*
* The rules are: * The rules are:
* Success: folio will be freed * Success: folio will be freed
* Unmap: folio will be put on unmap_folios list,
* dst folio put on dst_folios list
* -EAGAIN: stay on the from list * -EAGAIN: stay on the from list
* -EDEADLOCK: stay on the from list
* -ENOMEM: stay on the from list * -ENOMEM: stay on the from list
* -ENOSYS: stay on the from list * -ENOSYS: stay on the from list
* Other errno: put on ret_folios list * Other errno: put on ret_folios list
...@@ -1689,7 +1745,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, ...@@ -1689,7 +1745,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
case -ENOMEM: case -ENOMEM:
/* /*
* When memory is low, don't bother to try to migrate * When memory is low, don't bother to try to migrate
* other folios, just exit. * other folios, move unmapped folios, then exit.
*/ */
if (is_large) { if (is_large) {
nr_large_failed++; nr_large_failed++;
...@@ -1728,7 +1784,19 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, ...@@ -1728,7 +1784,19 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
/* nr_failed isn't updated for not used */ /* nr_failed isn't updated for not used */
nr_large_failed += large_retry; nr_large_failed += large_retry;
stats->nr_thp_failed += thp_retry; stats->nr_thp_failed += thp_retry;
goto out; rc_saved = rc;
if (list_empty(&unmap_folios))
goto out;
else
goto move;
case -EDEADLOCK:
/*
* The folio cannot be locked for potential deadlock.
* Go move (and unlock) all locked folios. Then we can
* try again.
*/
rc_saved = rc;
goto move;
case -EAGAIN: case -EAGAIN:
if (is_large) { if (is_large) {
large_retry++; large_retry++;
...@@ -1742,6 +1810,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, ...@@ -1742,6 +1810,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
stats->nr_succeeded += nr_pages; stats->nr_succeeded += nr_pages;
stats->nr_thp_succeeded += is_thp; stats->nr_thp_succeeded += is_thp;
break; break;
case MIGRATEPAGE_UNMAP:
/*
* We have locked some folios, don't force lock
* to avoid deadlock.
*/
avoid_force_lock = true;
list_move_tail(&folio->lru, &unmap_folios);
list_add_tail(&dst->lru, &dst_folios);
break;
default: default:
/* /*
* Permanent failure (-EBUSY, etc.): * Permanent failure (-EBUSY, etc.):
...@@ -1765,12 +1842,95 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, ...@@ -1765,12 +1842,95 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
nr_large_failed += large_retry; nr_large_failed += large_retry;
stats->nr_thp_failed += thp_retry; stats->nr_thp_failed += thp_retry;
stats->nr_failed_pages += nr_retry_pages; stats->nr_failed_pages += nr_retry_pages;
move:
retry = 1;
for (pass = 0;
pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry);
pass++) {
retry = 0;
large_retry = 0;
thp_retry = 0;
nr_retry_pages = 0;
dst = list_first_entry(&dst_folios, struct folio, lru);
dst2 = list_next_entry(dst, lru);
list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
is_large = folio_test_large(folio);
is_thp = is_large && folio_test_pmd_mappable(folio);
nr_pages = folio_nr_pages(folio);
cond_resched();
rc = migrate_folio_move(put_new_page, private,
folio, dst, mode,
reason, ret_folios);
/*
* The rules are:
* Success: folio will be freed
* -EAGAIN: stay on the unmap_folios list
* Other errno: put on ret_folios list
*/
switch(rc) {
case -EAGAIN:
if (is_large) {
large_retry++;
thp_retry += is_thp;
} else if (!no_split_folio_counting) {
retry++;
}
nr_retry_pages += nr_pages;
break;
case MIGRATEPAGE_SUCCESS:
stats->nr_succeeded += nr_pages;
stats->nr_thp_succeeded += is_thp;
break;
default:
if (is_large) {
nr_large_failed++;
stats->nr_thp_failed += is_thp;
} else if (!no_split_folio_counting) {
nr_failed++;
}
stats->nr_failed_pages += nr_pages;
break;
}
dst = dst2;
dst2 = list_next_entry(dst, lru);
}
}
nr_failed += retry;
nr_large_failed += large_retry;
stats->nr_thp_failed += thp_retry;
stats->nr_failed_pages += nr_retry_pages;
if (rc_saved)
rc = rc_saved;
else
rc = nr_failed + nr_large_failed;
out:
/* Cleanup remaining folios */
dst = list_first_entry(&dst_folios, struct folio, lru);
dst2 = list_next_entry(dst, lru);
list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
int page_was_mapped = 0;
struct anon_vma *anon_vma = NULL;
__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
ret_folios);
list_del(&dst->lru);
migrate_folio_undo_dst(dst, put_new_page, private);
dst = dst2;
dst2 = list_next_entry(dst, lru);
}
/* /*
* Try to migrate split folios of fail-to-migrate large folios, no * Try to migrate split folios of fail-to-migrate large folios, no
* nr_failed counting in this round, since all split folios of a * nr_failed counting in this round, since all split folios of a
* large folio is counted as 1 failure in the first round. * large folio is counted as 1 failure in the first round.
*/ */
if (!list_empty(&split_folios)) { if (rc >= 0 && !list_empty(&split_folios)) {
/* /*
* Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY
* retries) to ret_folios to avoid migrating them again. * retries) to ret_folios to avoid migrating them again.
...@@ -1778,12 +1938,16 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, ...@@ -1778,12 +1938,16 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
list_splice_init(from, ret_folios); list_splice_init(from, ret_folios);
list_splice_init(&split_folios, from); list_splice_init(&split_folios, from);
no_split_folio_counting = true; no_split_folio_counting = true;
retry = 1; goto retry;
goto split_folio_migration;
} }
rc = nr_failed + nr_large_failed; /*
out: * We have unlocked all locked folios, so we can force lock now, let's
* try again.
*/
if (rc == -EDEADLOCK)
goto retry;
return rc; return rc;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment