Commit b37ff71c authored by Naoya Horiguchi's avatar Naoya Horiguchi Committed by Linus Torvalds

mm: hwpoison: change PageHWPoison behavior on hugetlb pages

We'd like to narrow down the error region in memory error on hugetlb
pages.  However, currently we set PageHWPoison flags on all subpages in
the error hugepage and add # of subpages to num_hwpoison_pages, which
doesn't fit our purpose.

So this patch changes the behavior and we only set PageHWPoison on the
head page then increase num_hwpoison_pages only by 1.  This is a
preparation for narrow-down part which comes in later patches.

Link: http://lkml.kernel.org/r/1496305019-5493-4-git-send-email-n-horiguchi@ah.jp.nec.comSigned-off-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 09612fa6
...@@ -196,15 +196,6 @@ static inline void num_poisoned_pages_dec(void) ...@@ -196,15 +196,6 @@ static inline void num_poisoned_pages_dec(void)
atomic_long_dec(&num_poisoned_pages); atomic_long_dec(&num_poisoned_pages);
} }
static inline void num_poisoned_pages_add(long num)
{
atomic_long_add(num, &num_poisoned_pages);
}
static inline void num_poisoned_pages_sub(long num)
{
atomic_long_sub(num, &num_poisoned_pages);
}
#else #else
static inline swp_entry_t make_hwpoison_entry(struct page *page) static inline swp_entry_t make_hwpoison_entry(struct page *page)
......
...@@ -1009,22 +1009,6 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, ...@@ -1009,22 +1009,6 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
return unmap_success; return unmap_success;
} }
static void set_page_hwpoison_huge_page(struct page *hpage)
{
int i;
int nr_pages = 1 << compound_order(hpage);
for (i = 0; i < nr_pages; i++)
SetPageHWPoison(hpage + i);
}
static void clear_page_hwpoison_huge_page(struct page *hpage)
{
int i;
int nr_pages = 1 << compound_order(hpage);
for (i = 0; i < nr_pages; i++)
ClearPageHWPoison(hpage + i);
}
/** /**
* memory_failure - Handle memory failure of a page. * memory_failure - Handle memory failure of a page.
* @pfn: Page Number of the corrupted page * @pfn: Page Number of the corrupted page
...@@ -1050,7 +1034,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1050,7 +1034,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
struct page *hpage; struct page *hpage;
struct page *orig_head; struct page *orig_head;
int res; int res;
unsigned int nr_pages;
unsigned long page_flags; unsigned long page_flags;
if (!sysctl_memory_failure_recovery) if (!sysctl_memory_failure_recovery)
...@@ -1064,24 +1047,23 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1064,24 +1047,23 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
p = pfn_to_page(pfn); p = pfn_to_page(pfn);
orig_head = hpage = compound_head(p); orig_head = hpage = compound_head(p);
/* tmporary check code, to be updated in later patches */
if (PageHuge(p)) {
if (TestSetPageHWPoison(hpage)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
return 0;
}
goto tmp;
}
if (TestSetPageHWPoison(p)) { if (TestSetPageHWPoison(p)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n", pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn); pfn);
return 0; return 0;
} }
/* tmp:
* Currently errors on hugetlbfs pages are measured in hugepage units, num_poisoned_pages_inc();
* so nr_pages should be 1 << compound_order. OTOH when errors are on
* transparent hugepages, they are supposed to be split and error
* measurement is done in normal page units. So nr_pages should be one
* in this case.
*/
if (PageHuge(p))
nr_pages = 1 << compound_order(hpage);
else /* normal page or thp */
nr_pages = 1;
num_poisoned_pages_add(nr_pages);
/* /*
* We need/can do nothing about count=0 pages. * We need/can do nothing about count=0 pages.
...@@ -1109,12 +1091,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1109,12 +1091,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
if (PageHWPoison(hpage)) { if (PageHWPoison(hpage)) {
if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
|| (p != hpage && TestSetPageHWPoison(hpage))) { || (p != hpage && TestSetPageHWPoison(hpage))) {
num_poisoned_pages_sub(nr_pages); num_poisoned_pages_dec();
unlock_page(hpage); unlock_page(hpage);
return 0; return 0;
} }
} }
set_page_hwpoison_huge_page(hpage);
res = dequeue_hwpoisoned_huge_page(hpage); res = dequeue_hwpoisoned_huge_page(hpage);
action_result(pfn, MF_MSG_FREE_HUGE, action_result(pfn, MF_MSG_FREE_HUGE,
res ? MF_IGNORED : MF_DELAYED); res ? MF_IGNORED : MF_DELAYED);
...@@ -1137,7 +1118,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1137,7 +1118,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
pr_err("Memory failure: %#lx: thp split failed\n", pr_err("Memory failure: %#lx: thp split failed\n",
pfn); pfn);
if (TestClearPageHWPoison(p)) if (TestClearPageHWPoison(p))
num_poisoned_pages_sub(nr_pages); num_poisoned_pages_dec();
put_hwpoison_page(p); put_hwpoison_page(p);
return -EBUSY; return -EBUSY;
} }
...@@ -1193,14 +1174,14 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1193,14 +1174,14 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
*/ */
if (!PageHWPoison(p)) { if (!PageHWPoison(p)) {
pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
num_poisoned_pages_sub(nr_pages); num_poisoned_pages_dec();
unlock_page(hpage); unlock_page(hpage);
put_hwpoison_page(hpage); put_hwpoison_page(hpage);
return 0; return 0;
} }
if (hwpoison_filter(p)) { if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p)) if (TestClearPageHWPoison(p))
num_poisoned_pages_sub(nr_pages); num_poisoned_pages_dec();
unlock_page(hpage); unlock_page(hpage);
put_hwpoison_page(hpage); put_hwpoison_page(hpage);
return 0; return 0;
...@@ -1219,14 +1200,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1219,14 +1200,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
put_hwpoison_page(hpage); put_hwpoison_page(hpage);
return 0; return 0;
} }
/*
* Set PG_hwpoison on all pages in an error hugepage,
* because containment is done in hugepage unit for now.
* Since we have done TestSetPageHWPoison() for the head page with
* page lock held, we can safely set PG_hwpoison bits on tail pages.
*/
if (PageHuge(p))
set_page_hwpoison_huge_page(hpage);
/* /*
* It's very difficult to mess with pages currently under IO * It's very difficult to mess with pages currently under IO
...@@ -1397,7 +1370,6 @@ int unpoison_memory(unsigned long pfn) ...@@ -1397,7 +1370,6 @@ int unpoison_memory(unsigned long pfn)
struct page *page; struct page *page;
struct page *p; struct page *p;
int freeit = 0; int freeit = 0;
unsigned int nr_pages;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST); DEFAULT_RATELIMIT_BURST);
...@@ -1442,8 +1414,6 @@ int unpoison_memory(unsigned long pfn) ...@@ -1442,8 +1414,6 @@ int unpoison_memory(unsigned long pfn)
return 0; return 0;
} }
nr_pages = 1 << compound_order(page);
if (!get_hwpoison_page(p)) { if (!get_hwpoison_page(p)) {
/* /*
* Since HWPoisoned hugepage should have non-zero refcount, * Since HWPoisoned hugepage should have non-zero refcount,
...@@ -1473,10 +1443,8 @@ int unpoison_memory(unsigned long pfn) ...@@ -1473,10 +1443,8 @@ int unpoison_memory(unsigned long pfn)
if (TestClearPageHWPoison(page)) { if (TestClearPageHWPoison(page)) {
unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
pfn, &unpoison_rs); pfn, &unpoison_rs);
num_poisoned_pages_sub(nr_pages); num_poisoned_pages_dec();
freeit = 1; freeit = 1;
if (PageHuge(page))
clear_page_hwpoison_huge_page(page);
} }
unlock_page(page); unlock_page(page);
...@@ -1608,15 +1576,11 @@ static int soft_offline_huge_page(struct page *page, int flags) ...@@ -1608,15 +1576,11 @@ static int soft_offline_huge_page(struct page *page, int flags)
ret = -EIO; ret = -EIO;
} else { } else {
/* overcommit hugetlb page will be freed to buddy */ /* overcommit hugetlb page will be freed to buddy */
if (PageHuge(page)) {
set_page_hwpoison_huge_page(hpage);
dequeue_hwpoisoned_huge_page(hpage);
num_poisoned_pages_add(1 << compound_order(hpage));
} else {
SetPageHWPoison(page); SetPageHWPoison(page);
if (PageHuge(page))
dequeue_hwpoisoned_huge_page(hpage);
num_poisoned_pages_inc(); num_poisoned_pages_inc();
} }
}
return ret; return ret;
} }
...@@ -1731,15 +1695,12 @@ static int soft_offline_in_use_page(struct page *page, int flags) ...@@ -1731,15 +1695,12 @@ static int soft_offline_in_use_page(struct page *page, int flags)
static void soft_offline_free_page(struct page *page) static void soft_offline_free_page(struct page *page)
{ {
if (PageHuge(page)) { struct page *head = compound_head(page);
struct page *hpage = compound_head(page);
set_page_hwpoison_huge_page(hpage); if (!TestSetPageHWPoison(head)) {
if (!dequeue_hwpoisoned_huge_page(hpage))
num_poisoned_pages_add(1 << compound_order(hpage));
} else {
if (!TestSetPageHWPoison(page))
num_poisoned_pages_inc(); num_poisoned_pages_inc();
if (PageHuge(head))
dequeue_hwpoisoned_huge_page(head);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment