Commit b2261026 authored by Joonsoo Kim's avatar Joonsoo Kim Committed by Linus Torvalds

mm, hugetlb: fix and clean-up node iteration code to alloc or free

Current node iteration code have a minor problem which do one more node
rotation if we can't succeed to allocate.  For example, if we start to
allocate at node 0, we stop to iterate at node 0.  Then we start to
allocate at node 1 for next allocation.

I introduce new macros "for_each_node_mask_to_[alloc|free]" and fix and
clean-up node iteration code to alloc or free.  This makes code more
understandable.
Signed-off-by: default avatarJoonsoo Kim <iamjoonsoo.kim@lge.com>
Reviewed-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: default avatarHillf Danton <dhillf@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 81a6fcae
...@@ -772,33 +772,6 @@ static int hstate_next_node_to_alloc(struct hstate *h, ...@@ -772,33 +772,6 @@ static int hstate_next_node_to_alloc(struct hstate *h,
return nid; return nid;
} }
static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
{
struct page *page;
int start_nid;
int next_nid;
int ret = 0;
start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
next_nid = start_nid;
do {
page = alloc_fresh_huge_page_node(h, next_nid);
if (page) {
ret = 1;
break;
}
next_nid = hstate_next_node_to_alloc(h, nodes_allowed);
} while (next_nid != start_nid);
if (ret)
count_vm_event(HTLB_BUDDY_PGALLOC);
else
count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
return ret;
}
/* /*
* helper for free_pool_huge_page() - return the previously saved * helper for free_pool_huge_page() - return the previously saved
* node ["this node"] from which to free a huge page. Advance the * node ["this node"] from which to free a huge page. Advance the
...@@ -817,6 +790,40 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ...@@ -817,6 +790,40 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
return nid; return nid;
} }
#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask) \
for (nr_nodes = nodes_weight(*mask); \
nr_nodes > 0 && \
((node = hstate_next_node_to_alloc(hs, mask)) || 1); \
nr_nodes--)
#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \
for (nr_nodes = nodes_weight(*mask); \
nr_nodes > 0 && \
((node = hstate_next_node_to_free(hs, mask)) || 1); \
nr_nodes--)
static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
{
struct page *page;
int nr_nodes, node;
int ret = 0;
for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
page = alloc_fresh_huge_page_node(h, node);
if (page) {
ret = 1;
break;
}
}
if (ret)
count_vm_event(HTLB_BUDDY_PGALLOC);
else
count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
return ret;
}
/* /*
* Free huge page from pool from next node to free. * Free huge page from pool from next node to free.
* Attempt to keep persistent huge pages more or less * Attempt to keep persistent huge pages more or less
...@@ -826,36 +833,31 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ...@@ -826,36 +833,31 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
bool acct_surplus) bool acct_surplus)
{ {
int start_nid; int nr_nodes, node;
int next_nid;
int ret = 0; int ret = 0;
start_nid = hstate_next_node_to_free(h, nodes_allowed); for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
next_nid = start_nid;
do {
/* /*
* If we're returning unused surplus pages, only examine * If we're returning unused surplus pages, only examine
* nodes with surplus pages. * nodes with surplus pages.
*/ */
if ((!acct_surplus || h->surplus_huge_pages_node[next_nid]) && if ((!acct_surplus || h->surplus_huge_pages_node[node]) &&
!list_empty(&h->hugepage_freelists[next_nid])) { !list_empty(&h->hugepage_freelists[node])) {
struct page *page = struct page *page =
list_entry(h->hugepage_freelists[next_nid].next, list_entry(h->hugepage_freelists[node].next,
struct page, lru); struct page, lru);
list_del(&page->lru); list_del(&page->lru);
h->free_huge_pages--; h->free_huge_pages--;
h->free_huge_pages_node[next_nid]--; h->free_huge_pages_node[node]--;
if (acct_surplus) { if (acct_surplus) {
h->surplus_huge_pages--; h->surplus_huge_pages--;
h->surplus_huge_pages_node[next_nid]--; h->surplus_huge_pages_node[node]--;
} }
update_and_free_page(h, page); update_and_free_page(h, page);
ret = 1; ret = 1;
break; break;
} }
next_nid = hstate_next_node_to_free(h, nodes_allowed); }
} while (next_nid != start_nid);
return ret; return ret;
} }
...@@ -1192,14 +1194,12 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, ...@@ -1192,14 +1194,12 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
int __weak alloc_bootmem_huge_page(struct hstate *h) int __weak alloc_bootmem_huge_page(struct hstate *h)
{ {
struct huge_bootmem_page *m; struct huge_bootmem_page *m;
int nr_nodes = nodes_weight(node_states[N_MEMORY]); int nr_nodes, node;
while (nr_nodes) { for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
void *addr; void *addr;
addr = __alloc_bootmem_node_nopanic( addr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
NODE_DATA(hstate_next_node_to_alloc(h,
&node_states[N_MEMORY])),
huge_page_size(h), huge_page_size(h), 0); huge_page_size(h), huge_page_size(h), 0);
if (addr) { if (addr) {
...@@ -1211,7 +1211,6 @@ int __weak alloc_bootmem_huge_page(struct hstate *h) ...@@ -1211,7 +1211,6 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
m = addr; m = addr;
goto found; goto found;
} }
nr_nodes--;
} }
return 0; return 0;
...@@ -1350,48 +1349,28 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count, ...@@ -1350,48 +1349,28 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count,
static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed, static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
int delta) int delta)
{ {
int start_nid, next_nid; int nr_nodes, node;
int ret = 0;
VM_BUG_ON(delta != -1 && delta != 1); VM_BUG_ON(delta != -1 && delta != 1);
if (delta < 0) if (delta < 0) {
start_nid = hstate_next_node_to_alloc(h, nodes_allowed); for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
else if (h->surplus_huge_pages_node[node])
start_nid = hstate_next_node_to_free(h, nodes_allowed); goto found;
next_nid = start_nid;
do {
int nid = next_nid;
if (delta < 0) {
/*
* To shrink on this node, there must be a surplus page
*/
if (!h->surplus_huge_pages_node[nid]) {
next_nid = hstate_next_node_to_alloc(h,
nodes_allowed);
continue;
}
} }
if (delta > 0) { } else {
/* for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
* Surplus cannot exceed the total number of pages if (h->surplus_huge_pages_node[node] <
*/ h->nr_huge_pages_node[node])
if (h->surplus_huge_pages_node[nid] >= goto found;
h->nr_huge_pages_node[nid]) {
next_nid = hstate_next_node_to_free(h,
nodes_allowed);
continue;
}
} }
}
return 0;
h->surplus_huge_pages += delta; found:
h->surplus_huge_pages_node[nid] += delta; h->surplus_huge_pages += delta;
ret = 1; h->surplus_huge_pages_node[node] += delta;
break; return 1;
} while (next_nid != start_nid);
return ret;
} }
#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment