Commit 340ef390 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

mm: numa: cleanup flow of transhuge page migration

When correcting commit 04fa5d6a ("mm: migrate: check page_count of
THP before migrating") Hugh Dickins noted that the control flow for
transhuge migration was difficult to follow.  Unconditionally calling
put_page() in numamigrate_isolate_page() made the failure paths of both
migrate_misplaced_transhuge_page() and migrate_misplaced_page() more
complex that they should be.  Further, he was extremely wary that an
unlock_page() should ever happen after a put_page() even if the
put_page() should never be the final put_page.

Hugh implemented the following cleanup to simplify the path by calling
putback_lru_page() inside numamigrate_isolate_page() if it failed to
isolate and always calling unlock_page() within
migrate_misplaced_transhuge_page().

There is no functional change after this patch is applied but the code
is easier to follow and unlock_page() always happens before put_page().

[mgorman@suse.de: changelog only]
Signed-off-by: default avatarMel Gorman <mgorman@suse.de>
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Simon Jeons <simon.jeons@gmail.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 75980e97
...@@ -1296,7 +1296,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1296,7 +1296,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int target_nid; int target_nid;
int current_nid = -1; int current_nid = -1;
bool migrated; bool migrated;
bool page_locked = false;
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp))) if (unlikely(!pmd_same(pmd, *pmdp)))
...@@ -1318,7 +1317,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1318,7 +1317,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Acquire the page lock to serialise THP migrations */ /* Acquire the page lock to serialise THP migrations */
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
lock_page(page); lock_page(page);
page_locked = true;
/* Confirm the PTE did not while locked */ /* Confirm the PTE did not while locked */
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
...@@ -1331,34 +1329,26 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1331,34 +1329,26 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Migrate the THP to the requested node */ /* Migrate the THP to the requested node */
migrated = migrate_misplaced_transhuge_page(mm, vma, migrated = migrate_misplaced_transhuge_page(mm, vma,
pmdp, pmd, addr, pmdp, pmd, addr, page, target_nid);
page, target_nid); if (!migrated)
if (migrated) goto check_same;
current_nid = target_nid;
else {
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp))) {
unlock_page(page);
goto out_unlock;
}
goto clear_pmdnuma;
}
task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); task_numa_fault(target_nid, HPAGE_PMD_NR, true);
return 0; return 0;
check_same:
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp)))
goto out_unlock;
clear_pmdnuma: clear_pmdnuma:
pmd = pmd_mknonnuma(pmd); pmd = pmd_mknonnuma(pmd);
set_pmd_at(mm, haddr, pmdp, pmd); set_pmd_at(mm, haddr, pmdp, pmd);
VM_BUG_ON(pmd_numa(*pmdp)); VM_BUG_ON(pmd_numa(*pmdp));
update_mmu_cache_pmd(vma, addr, pmdp); update_mmu_cache_pmd(vma, addr, pmdp);
if (page_locked)
unlock_page(page);
out_unlock: out_unlock:
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
if (current_nid != -1) if (current_nid != -1)
task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); task_numa_fault(current_nid, HPAGE_PMD_NR, false);
return 0; return 0;
} }
......
...@@ -1557,41 +1557,40 @@ bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages) ...@@ -1557,41 +1557,40 @@ bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages)
int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
{ {
int ret = 0; int page_lru;
VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); VM_BUG_ON(compound_order(page) && !PageTransHuge(page));
/* Avoid migrating to a node that is nearly full */ /* Avoid migrating to a node that is nearly full */
if (migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) { if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
int page_lru; return 0;
if (isolate_lru_page(page)) { if (isolate_lru_page(page))
put_page(page); return 0;
return 0;
}
/* Page is isolated */ /*
ret = 1; * migrate_misplaced_transhuge_page() skips page migration's usual
page_lru = page_is_file_cache(page); * check on page_count(), so we must do it here, now that the page
if (!PageTransHuge(page)) * has been isolated: a GUP pin, or any other pin, prevents migration.
inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru); * The expected page count is 3: 1 for page's mapcount and 1 for the
else * caller's pin and 1 for the reference taken by isolate_lru_page().
mod_zone_page_state(page_zone(page), */
NR_ISOLATED_ANON + page_lru, if (PageTransHuge(page) && page_count(page) != 3) {
HPAGE_PMD_NR); putback_lru_page(page);
return 0;
} }
page_lru = page_is_file_cache(page);
mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
hpage_nr_pages(page));
/* /*
* Page is either isolated or there is not enough space on the target * Isolating the page has taken another reference, so the
* node. If isolated, then it has taken a reference count and the * caller's reference can be safely dropped without the page
* callers reference can be safely dropped without the page * disappearing underneath us during migration.
* disappearing underneath us during migration. Otherwise the page is
* not to be migrated but the callers reference should still be
* dropped so it does not leak.
*/ */
put_page(page); put_page(page);
return 1;
return ret;
} }
/* /*
...@@ -1602,7 +1601,7 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) ...@@ -1602,7 +1601,7 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
int migrate_misplaced_page(struct page *page, int node) int migrate_misplaced_page(struct page *page, int node)
{ {
pg_data_t *pgdat = NODE_DATA(node); pg_data_t *pgdat = NODE_DATA(node);
int isolated = 0; int isolated;
int nr_remaining; int nr_remaining;
LIST_HEAD(migratepages); LIST_HEAD(migratepages);
...@@ -1610,20 +1609,16 @@ int migrate_misplaced_page(struct page *page, int node) ...@@ -1610,20 +1609,16 @@ int migrate_misplaced_page(struct page *page, int node)
* Don't migrate pages that are mapped in multiple processes. * Don't migrate pages that are mapped in multiple processes.
* TODO: Handle false sharing detection instead of this hammer * TODO: Handle false sharing detection instead of this hammer
*/ */
if (page_mapcount(page) != 1) { if (page_mapcount(page) != 1)
put_page(page);
goto out; goto out;
}
/* /*
* Rate-limit the amount of data that is being migrated to a node. * Rate-limit the amount of data that is being migrated to a node.
* Optimal placement is no good if the memory bus is saturated and * Optimal placement is no good if the memory bus is saturated and
* all the time is being spent migrating! * all the time is being spent migrating!
*/ */
if (numamigrate_update_ratelimit(pgdat, 1)) { if (numamigrate_update_ratelimit(pgdat, 1))
put_page(page);
goto out; goto out;
}
isolated = numamigrate_isolate_page(pgdat, page); isolated = numamigrate_isolate_page(pgdat, page);
if (!isolated) if (!isolated)
...@@ -1640,12 +1635,19 @@ int migrate_misplaced_page(struct page *page, int node) ...@@ -1640,12 +1635,19 @@ int migrate_misplaced_page(struct page *page, int node)
} else } else
count_vm_numa_event(NUMA_PAGE_MIGRATE); count_vm_numa_event(NUMA_PAGE_MIGRATE);
BUG_ON(!list_empty(&migratepages)); BUG_ON(!list_empty(&migratepages));
out:
return isolated; return isolated;
out:
put_page(page);
return 0;
} }
#endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA_BALANCING */
#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
/*
* Migrates a THP to a given target node. page must be locked and is unlocked
* before returning.
*/
int migrate_misplaced_transhuge_page(struct mm_struct *mm, int migrate_misplaced_transhuge_page(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *vma,
pmd_t *pmd, pmd_t entry, pmd_t *pmd, pmd_t entry,
...@@ -1676,29 +1678,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1676,29 +1678,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
new_page = alloc_pages_node(node, new_page = alloc_pages_node(node,
(GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
if (!new_page) { if (!new_page)
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); goto out_fail;
goto out_dropref;
}
page_xchg_last_nid(new_page, page_last_nid(page)); page_xchg_last_nid(new_page, page_last_nid(page));
isolated = numamigrate_isolate_page(pgdat, page); isolated = numamigrate_isolate_page(pgdat, page);
if (!isolated) {
/*
* Failing to isolate or a GUP pin prevents migration. The expected
* page count is 2. 1 for anonymous pages without a mapping and 1
* for the callers pin. If the page was isolated, the page will
* need to be put back on the LRU.
*/
if (!isolated || page_count(page) != 2) {
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
put_page(new_page); put_page(new_page);
if (isolated) { goto out_fail;
putback_lru_page(page);
isolated = 0;
goto out;
}
goto out_keep_locked;
} }
/* Prepare a page as a migration target */ /* Prepare a page as a migration target */
...@@ -1730,6 +1718,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1730,6 +1718,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
putback_lru_page(page); putback_lru_page(page);
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
isolated = 0;
goto out; goto out;
} }
...@@ -1774,9 +1763,11 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1774,9 +1763,11 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
-HPAGE_PMD_NR); -HPAGE_PMD_NR);
return isolated; return isolated;
out_fail:
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
out_dropref: out_dropref:
unlock_page(page);
put_page(page); put_page(page);
out_keep_locked:
return 0; return 0;
} }
#endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA_BALANCING */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment