Commit 5b855937 authored by Huang Ying's avatar Huang Ying Committed by Andrew Morton

migrate_pages: organize stats with struct migrate_pages_stats

Patch series "migrate_pages(): batch TLB flushing", v5.

Now, migrate_pages() migrates folios one by one, like the fake code as
follows,

  for each folio
    unmap
    flush TLB
    copy
    restore map

If multiple folios are passed to migrate_pages(), there are opportunities
to batch the TLB flushing and copying.  That is, we can change the code to
something as follows,

  for each folio
    unmap
  for each folio
    flush TLB
  for each folio
    copy
  for each folio
    restore map

The total number of TLB flushing IPI can be reduced considerably.  And we
may use some hardware accelerator such as DSA to accelerate the folio
copying.

So in this patch, we refactor the migrate_pages() implementation and
implement the TLB flushing batching.  Base on this, hardware accelerated
folio copying can be implemented.

If too many folios are passed to migrate_pages(), in the naive batched
implementation, we may unmap too many folios at the same time.  The
possibility for a task to wait for the migrated folios to be mapped again
increases.  So the latency may be hurt.  To deal with this issue, the max
number of folios be unmapped in batch is restricted to no more than
HPAGE_PMD_NR in the unit of page.  That is, the influence is at the same
level of THP migration.

We use the following test to measure the performance impact of the
patchset,

On a 2-socket Intel server,

 - Run pmbench memory accessing benchmark

 - Run `migratepages` to migrate pages of pmbench between node 0 and
   node 1 back and forth.

With the patch, the TLB flushing IPI reduces 99.1% during the test and
the number of pages migrated successfully per second increases 291.7%.

Xin Hao helped to test the patchset on an ARM64 server with 128 cores,
2 NUMA nodes.  Test results show that the page migration performance
increases up to 78%.


This patch (of 9):

Define struct migrate_pages_stats to organize the various statistics in
migrate_pages().  This makes it easier to collect and consume the
statistics in multiple functions.  This will be needed in the following
patches in the series.

Link: https://lkml.kernel.org/r/20230213123444.155149-1-ying.huang@intel.com
Link: https://lkml.kernel.org/r/20230213123444.155149-2-ying.huang@intel.comSigned-off-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Reviewed-by: default avatarAlistair Popple <apopple@nvidia.com>
Reviewed-by: default avatarZi Yan <ziy@nvidia.com>
Reviewed-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: default avatarXin Hao <xhao@linux.alibaba.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 0621d160
......@@ -1414,6 +1414,16 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f
return rc;
}
struct migrate_pages_stats {
int nr_succeeded; /* Normal and large folios migrated successfully, in
units of base pages */
int nr_failed_pages; /* Normal and large folios failed to be migrated, in
units of base pages. Untried folios aren't counted */
int nr_thp_succeeded; /* THP migrated successfully */
int nr_thp_failed; /* THP failed to be migrated */
int nr_thp_split; /* THP split before migrating */
};
/*
* migrate_pages - migrate the folios specified in a list, to the free folios
* supplied as the target for the page migration
......@@ -1448,13 +1458,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
int large_retry = 1;
int thp_retry = 1;
int nr_failed = 0;
int nr_failed_pages = 0;
int nr_retry_pages = 0;
int nr_succeeded = 0;
int nr_thp_succeeded = 0;
int nr_large_failed = 0;
int nr_thp_failed = 0;
int nr_thp_split = 0;
int pass = 0;
bool is_large = false;
bool is_thp = false;
......@@ -1464,9 +1469,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
LIST_HEAD(split_folios);
bool nosplit = (reason == MR_NUMA_MISPLACED);
bool no_split_folio_counting = false;
struct migrate_pages_stats stats;
trace_mm_migrate_pages_start(mode, reason);
memset(&stats, 0, sizeof(stats));
split_folio_migration:
for (pass = 0; pass < 10 && (retry || large_retry); pass++) {
retry = 0;
......@@ -1520,9 +1527,9 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
/* Large folio migration is unsupported */
if (is_large) {
nr_large_failed++;
nr_thp_failed += is_thp;
stats.nr_thp_failed += is_thp;
if (!try_split_folio(folio, &split_folios)) {
nr_thp_split += is_thp;
stats.nr_thp_split += is_thp;
break;
}
/* Hugetlb migration is unsupported */
......@@ -1530,7 +1537,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
nr_failed++;
}
nr_failed_pages += nr_pages;
stats.nr_failed_pages += nr_pages;
list_move_tail(&folio->lru, &ret_folios);
break;
case -ENOMEM:
......@@ -1540,13 +1547,13 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
*/
if (is_large) {
nr_large_failed++;
nr_thp_failed += is_thp;
stats.nr_thp_failed += is_thp;
/* Large folio NUMA faulting doesn't split to retry. */
if (!nosplit) {
int ret = try_split_folio(folio, &split_folios);
if (!ret) {
nr_thp_split += is_thp;
stats.nr_thp_split += is_thp;
break;
} else if (reason == MR_LONGTERM_PIN &&
ret == -EAGAIN) {
......@@ -1564,7 +1571,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
nr_failed++;
}
nr_failed_pages += nr_pages + nr_retry_pages;
stats.nr_failed_pages += nr_pages + nr_retry_pages;
/*
* There might be some split folios of fail-to-migrate large
* folios left in split_folios list. Move them back to migration
......@@ -1574,7 +1581,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
list_splice_init(&split_folios, from);
/* nr_failed isn't updated for not used */
nr_large_failed += large_retry;
nr_thp_failed += thp_retry;
stats.nr_thp_failed += thp_retry;
goto out;
case -EAGAIN:
if (is_large) {
......@@ -1586,8 +1593,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
nr_retry_pages += nr_pages;
break;
case MIGRATEPAGE_SUCCESS:
nr_succeeded += nr_pages;
nr_thp_succeeded += is_thp;
stats.nr_succeeded += nr_pages;
stats.nr_thp_succeeded += is_thp;
break;
default:
/*
......@@ -1598,20 +1605,20 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
*/
if (is_large) {
nr_large_failed++;
nr_thp_failed += is_thp;
stats.nr_thp_failed += is_thp;
} else if (!no_split_folio_counting) {
nr_failed++;
}
nr_failed_pages += nr_pages;
stats.nr_failed_pages += nr_pages;
break;
}
}
}
nr_failed += retry;
nr_large_failed += large_retry;
nr_thp_failed += thp_retry;
nr_failed_pages += nr_retry_pages;
stats.nr_thp_failed += thp_retry;
stats.nr_failed_pages += nr_retry_pages;
/*
* Try to migrate split folios of fail-to-migrate large folios, no
* nr_failed counting in this round, since all split folios of a
......@@ -1644,16 +1651,17 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
if (list_empty(from))
rc = 0;
count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
count_vm_events(PGMIGRATE_FAIL, nr_failed_pages);
count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded,
nr_thp_failed, nr_thp_split, mode, reason);
count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded);
count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages);
count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded);
count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed);
count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split);
trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages,
stats.nr_thp_succeeded, stats.nr_thp_failed,
stats.nr_thp_split, mode, reason);
if (ret_succeeded)
*ret_succeeded = nr_succeeded;
*ret_succeeded = stats.nr_succeeded;
return rc;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment