Commit bcc54222 authored by Naoya Horiguchi's avatar Naoya Horiguchi Committed by Linus Torvalds

mm: hugetlb: introduce page_huge_active

We are not safe from calling isolate_huge_page() on a hugepage
concurrently, which can make the victim hugepage in invalid state and
results in BUG_ON().

The root problem of this is that we don't have any information on struct
page (so easily accessible) about hugepages' activeness.  Note that
hugepages' activeness means just being linked to
hstate->hugepage_activelist, which is not the same as normal pages'
activeness represented by PageActive flag.

Normal pages are isolated by isolate_lru_page() which prechecks PageLRU
before isolation, so let's do similarly for hugetlb with a new
paeg_huge_active().

set/clear_page_huge_active() should be called within hugetlb_lock.  But
hugetlb_cow() and hugetlb_no_page() don't do this, being justified because
in these functions set_page_huge_active() is called right after the
hugepage is allocated and no other thread tries to isolate it.

[akpm@linux-foundation.org: s/PageHugeActive/page_huge_active/, make it return bool]
[fengguang.wu@intel.com: set_page_huge_active() can be static]
Signed-off-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Hugh Dickins <hughd@google.com>
Reviewed-by: default avatarMichal Hocko <mhocko@suse.cz>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: default avatarFengguang Wu <fengguang.wu@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 822fc613
...@@ -924,6 +924,31 @@ struct hstate *size_to_hstate(unsigned long size) ...@@ -924,6 +924,31 @@ struct hstate *size_to_hstate(unsigned long size)
return NULL; return NULL;
} }
/*
* Test to determine whether the hugepage is "active/in-use" (i.e. being linked
* to hstate->hugepage_activelist.)
*
* This function can be called for tail pages, but never returns true for them.
*/
bool page_huge_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
return PageHead(page) && PagePrivate(&page[1]);
}
/* never called for tail page */
static void set_page_huge_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
SetPagePrivate(&page[1]);
}
static void clear_page_huge_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
ClearPagePrivate(&page[1]);
}
void free_huge_page(struct page *page) void free_huge_page(struct page *page)
{ {
/* /*
...@@ -952,6 +977,7 @@ void free_huge_page(struct page *page) ...@@ -952,6 +977,7 @@ void free_huge_page(struct page *page)
restore_reserve = true; restore_reserve = true;
spin_lock(&hugetlb_lock); spin_lock(&hugetlb_lock);
clear_page_huge_active(page);
hugetlb_cgroup_uncharge_page(hstate_index(h), hugetlb_cgroup_uncharge_page(hstate_index(h),
pages_per_huge_page(h), page); pages_per_huge_page(h), page);
if (restore_reserve) if (restore_reserve)
...@@ -2972,6 +2998,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2972,6 +2998,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
copy_user_huge_page(new_page, old_page, address, vma, copy_user_huge_page(new_page, old_page, address, vma,
pages_per_huge_page(h)); pages_per_huge_page(h));
__SetPageUptodate(new_page); __SetPageUptodate(new_page);
set_page_huge_active(new_page);
mmun_start = address & huge_page_mask(h); mmun_start = address & huge_page_mask(h);
mmun_end = mmun_start + huge_page_size(h); mmun_end = mmun_start + huge_page_size(h);
...@@ -3084,6 +3111,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3084,6 +3111,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
} }
clear_huge_page(page, address, pages_per_huge_page(h)); clear_huge_page(page, address, pages_per_huge_page(h));
__SetPageUptodate(page); __SetPageUptodate(page);
set_page_huge_active(page);
if (vma->vm_flags & VM_MAYSHARE) { if (vma->vm_flags & VM_MAYSHARE) {
int err; int err;
...@@ -3913,19 +3941,26 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage) ...@@ -3913,19 +3941,26 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
bool isolate_huge_page(struct page *page, struct list_head *list) bool isolate_huge_page(struct page *page, struct list_head *list)
{ {
bool ret = true;
VM_BUG_ON_PAGE(!PageHead(page), page); VM_BUG_ON_PAGE(!PageHead(page), page);
if (!get_page_unless_zero(page))
return false;
spin_lock(&hugetlb_lock); spin_lock(&hugetlb_lock);
if (!page_huge_active(page) || !get_page_unless_zero(page)) {
ret = false;
goto unlock;
}
clear_page_huge_active(page);
list_move_tail(&page->lru, list); list_move_tail(&page->lru, list);
unlock:
spin_unlock(&hugetlb_lock); spin_unlock(&hugetlb_lock);
return true; return ret;
} }
void putback_active_hugepage(struct page *page) void putback_active_hugepage(struct page *page)
{ {
VM_BUG_ON_PAGE(!PageHead(page), page); VM_BUG_ON_PAGE(!PageHead(page), page);
spin_lock(&hugetlb_lock); spin_lock(&hugetlb_lock);
set_page_huge_active(page);
list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist); list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
spin_unlock(&hugetlb_lock); spin_unlock(&hugetlb_lock);
put_page(page); put_page(page);
......
...@@ -1586,8 +1586,18 @@ static int soft_offline_huge_page(struct page *page, int flags) ...@@ -1586,8 +1586,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
} }
unlock_page(hpage); unlock_page(hpage);
/* Keep page count to indicate a given hugepage is isolated. */ ret = isolate_huge_page(hpage, &pagelist);
list_move(&hpage->lru, &pagelist); if (ret) {
/*
* get_any_page() and isolate_huge_page() takes a refcount each,
* so need to drop one here.
*/
put_page(hpage);
} else {
pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
return -EBUSY;
}
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC, MR_MEMORY_FAILURE); MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) { if (ret) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment