Commit a3d0a918 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

thp: make split_queue per-node

Andrea Arcangeli suggested to make split queue per-node to improve
scalability.  Let's do it.
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Suggested-by: default avatarAndrea Arcangeli <aarcange@redhat.com>
Reviewed-by: default avatarAndrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 34229b27
...@@ -682,6 +682,12 @@ typedef struct pglist_data { ...@@ -682,6 +682,12 @@ typedef struct pglist_data {
*/ */
unsigned long first_deferred_pfn; unsigned long first_deferred_pfn;
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
spinlock_t split_queue_lock;
struct list_head split_queue;
unsigned long split_queue_len;
#endif
} pg_data_t; } pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
......
...@@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = { ...@@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = {
.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
}; };
static DEFINE_SPINLOCK(split_queue_lock);
static LIST_HEAD(split_queue);
static unsigned long split_queue_len;
static struct shrinker deferred_split_shrinker; static struct shrinker deferred_split_shrinker;
static void set_recommended_min_free_kbytes(void) static void set_recommended_min_free_kbytes(void)
...@@ -3358,6 +3355,7 @@ int total_mapcount(struct page *page) ...@@ -3358,6 +3355,7 @@ int total_mapcount(struct page *page)
int split_huge_page_to_list(struct page *page, struct list_head *list) int split_huge_page_to_list(struct page *page, struct list_head *list)
{ {
struct page *head = compound_head(page); struct page *head = compound_head(page);
struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
struct anon_vma *anon_vma; struct anon_vma *anon_vma;
int count, mapcount, ret; int count, mapcount, ret;
bool mlocked; bool mlocked;
...@@ -3401,19 +3399,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) ...@@ -3401,19 +3399,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
lru_add_drain(); lru_add_drain();
/* Prevent deferred_split_scan() touching ->_count */ /* Prevent deferred_split_scan() touching ->_count */
spin_lock_irqsave(&split_queue_lock, flags); spin_lock_irqsave(&pgdata->split_queue_lock, flags);
count = page_count(head); count = page_count(head);
mapcount = total_mapcount(head); mapcount = total_mapcount(head);
if (!mapcount && count == 1) { if (!mapcount && count == 1) {
if (!list_empty(page_deferred_list(head))) { if (!list_empty(page_deferred_list(head))) {
split_queue_len--; pgdata->split_queue_len--;
list_del(page_deferred_list(head)); list_del(page_deferred_list(head));
} }
spin_unlock_irqrestore(&split_queue_lock, flags); spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
__split_huge_page(page, list); __split_huge_page(page, list);
ret = 0; ret = 0;
} else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
spin_unlock_irqrestore(&split_queue_lock, flags); spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
pr_alert("total_mapcount: %u, page_count(): %u\n", pr_alert("total_mapcount: %u, page_count(): %u\n",
mapcount, count); mapcount, count);
if (PageTail(page)) if (PageTail(page))
...@@ -3421,7 +3419,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) ...@@ -3421,7 +3419,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
dump_page(page, "total_mapcount(head) > 0"); dump_page(page, "total_mapcount(head) > 0");
BUG(); BUG();
} else { } else {
spin_unlock_irqrestore(&split_queue_lock, flags); spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
unfreeze_page(anon_vma, head); unfreeze_page(anon_vma, head);
ret = -EBUSY; ret = -EBUSY;
} }
...@@ -3436,52 +3434,56 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) ...@@ -3436,52 +3434,56 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
void free_transhuge_page(struct page *page) void free_transhuge_page(struct page *page)
{ {
struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&split_queue_lock, flags); spin_lock_irqsave(&pgdata->split_queue_lock, flags);
if (!list_empty(page_deferred_list(page))) { if (!list_empty(page_deferred_list(page))) {
split_queue_len--; pgdata->split_queue_len--;
list_del(page_deferred_list(page)); list_del(page_deferred_list(page));
} }
spin_unlock_irqrestore(&split_queue_lock, flags); spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
free_compound_page(page); free_compound_page(page);
} }
void deferred_split_huge_page(struct page *page) void deferred_split_huge_page(struct page *page)
{ {
struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
unsigned long flags; unsigned long flags;
VM_BUG_ON_PAGE(!PageTransHuge(page), page); VM_BUG_ON_PAGE(!PageTransHuge(page), page);
spin_lock_irqsave(&split_queue_lock, flags); spin_lock_irqsave(&pgdata->split_queue_lock, flags);
if (list_empty(page_deferred_list(page))) { if (list_empty(page_deferred_list(page))) {
list_add_tail(page_deferred_list(page), &split_queue); list_add_tail(page_deferred_list(page), &pgdata->split_queue);
split_queue_len++; pgdata->split_queue_len++;
} }
spin_unlock_irqrestore(&split_queue_lock, flags); spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
} }
static unsigned long deferred_split_count(struct shrinker *shrink, static unsigned long deferred_split_count(struct shrinker *shrink,
struct shrink_control *sc) struct shrink_control *sc)
{ {
struct pglist_data *pgdata = NODE_DATA(sc->nid);
/* /*
* Split a page from split_queue will free up at least one page, * Split a page from split_queue will free up at least one page,
* at most HPAGE_PMD_NR - 1. We don't track exact number. * at most HPAGE_PMD_NR - 1. We don't track exact number.
* Let's use HPAGE_PMD_NR / 2 as ballpark. * Let's use HPAGE_PMD_NR / 2 as ballpark.
*/ */
return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2; return ACCESS_ONCE(pgdata->split_queue_len) * HPAGE_PMD_NR / 2;
} }
static unsigned long deferred_split_scan(struct shrinker *shrink, static unsigned long deferred_split_scan(struct shrinker *shrink,
struct shrink_control *sc) struct shrink_control *sc)
{ {
struct pglist_data *pgdata = NODE_DATA(sc->nid);
unsigned long flags; unsigned long flags;
LIST_HEAD(list), *pos, *next; LIST_HEAD(list), *pos, *next;
struct page *page; struct page *page;
int split = 0; int split = 0;
spin_lock_irqsave(&split_queue_lock, flags); spin_lock_irqsave(&pgdata->split_queue_lock, flags);
list_splice_init(&split_queue, &list); list_splice_init(&pgdata->split_queue, &list);
/* Take pin on all head pages to avoid freeing them under us */ /* Take pin on all head pages to avoid freeing them under us */
list_for_each_safe(pos, next, &list) { list_for_each_safe(pos, next, &list) {
...@@ -3490,10 +3492,10 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, ...@@ -3490,10 +3492,10 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
/* race with put_compound_page() */ /* race with put_compound_page() */
if (!get_page_unless_zero(page)) { if (!get_page_unless_zero(page)) {
list_del_init(page_deferred_list(page)); list_del_init(page_deferred_list(page));
split_queue_len--; pgdata->split_queue_len--;
} }
} }
spin_unlock_irqrestore(&split_queue_lock, flags); spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
list_for_each_safe(pos, next, &list) { list_for_each_safe(pos, next, &list) {
page = list_entry((void *)pos, struct page, mapping); page = list_entry((void *)pos, struct page, mapping);
...@@ -3505,9 +3507,9 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, ...@@ -3505,9 +3507,9 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
put_page(page); put_page(page);
} }
spin_lock_irqsave(&split_queue_lock, flags); spin_lock_irqsave(&pgdata->split_queue_lock, flags);
list_splice_tail(&list, &split_queue); list_splice_tail(&list, &pgdata->split_queue);
spin_unlock_irqrestore(&split_queue_lock, flags); spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
return split * HPAGE_PMD_NR / 2; return split * HPAGE_PMD_NR / 2;
} }
...@@ -3516,6 +3518,7 @@ static struct shrinker deferred_split_shrinker = { ...@@ -3516,6 +3518,7 @@ static struct shrinker deferred_split_shrinker = {
.count_objects = deferred_split_count, .count_objects = deferred_split_count,
.scan_objects = deferred_split_scan, .scan_objects = deferred_split_scan,
.seeks = DEFAULT_SEEKS, .seeks = DEFAULT_SEEKS,
.flags = SHRINKER_NUMA_AWARE,
}; };
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
......
...@@ -5209,6 +5209,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) ...@@ -5209,6 +5209,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
spin_lock_init(&pgdat->numabalancing_migrate_lock); spin_lock_init(&pgdat->numabalancing_migrate_lock);
pgdat->numabalancing_migrate_nr_pages = 0; pgdat->numabalancing_migrate_nr_pages = 0;
pgdat->numabalancing_migrate_next_window = jiffies; pgdat->numabalancing_migrate_next_window = jiffies;
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
spin_lock_init(&pgdat->split_queue_lock);
INIT_LIST_HEAD(&pgdat->split_queue);
pgdat->split_queue_len = 0;
#endif #endif
init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->kswapd_wait);
init_waitqueue_head(&pgdat->pfmemalloc_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment