Commit c0cd6f55 authored by Johannes Weiner's avatar Johannes Weiner Committed by Andrew Morton

mm: page_alloc: fix freelist movement during block conversion

Currently, page block type conversion during fallbacks, atomic
reservations and isolation can strand various amounts of free pages on
incorrect freelists.

For example, fallback stealing moves free pages in the block to the new
type's freelists, but then may not actually claim the block for that type
if there aren't enough compatible pages already allocated.

In all cases, free page moving might fail if the block straddles more than
one zone, in which case no free pages are moved at all, but the block type
is changed anyway.

This is detrimental to type hygiene on the freelists.  It encourages
incompatible page mixing down the line (ask for one type, get another) and
thus contributes to long-term fragmentation.

Split the process into a proper transaction: check first if conversion
will happen, then try to move the free pages, and only if that was
successful convert the block to the new type.

[baolin.wang@linux.alibaba.com: fix allocation failures with CONFIG_CMA]
  Link: https://lkml.kernel.org/r/a97697e0-45b0-4f71-b087-fdc7a1d43c0e@linux.alibaba.com
Link: https://lkml.kernel.org/r/20240320180429.678181-7-hannes@cmpxchg.orgSigned-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Reviewed-by: default avatarVlastimil Babka <vbabka@suse.cz>
Tested-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 2dd482ba
...@@ -34,8 +34,7 @@ static inline bool is_migrate_isolate(int migratetype) ...@@ -34,8 +34,7 @@ static inline bool is_migrate_isolate(int migratetype)
#define REPORT_FAILURE 0x2 #define REPORT_FAILURE 0x2
void set_pageblock_migratetype(struct page *page, int migratetype); void set_pageblock_migratetype(struct page *page, int migratetype);
int move_freepages_block(struct zone *zone, struct page *page, int move_freepages_block(struct zone *zone, struct page *page, int migratetype);
int migratetype, int *num_movable);
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
int migratetype, int flags, gfp_t gfp_flags); int migratetype, int flags, gfp_t gfp_flags);
......
...@@ -1601,9 +1601,8 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, ...@@ -1601,9 +1601,8 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
* Note that start_page and end_pages are not aligned on a pageblock * Note that start_page and end_pages are not aligned on a pageblock
* boundary. If alignment is required, use move_freepages_block() * boundary. If alignment is required, use move_freepages_block()
*/ */
static int move_freepages(struct zone *zone, static int move_freepages(struct zone *zone, unsigned long start_pfn,
unsigned long start_pfn, unsigned long end_pfn, unsigned long end_pfn, int migratetype)
int migratetype, int *num_movable)
{ {
struct page *page; struct page *page;
unsigned long pfn; unsigned long pfn;
...@@ -1613,14 +1612,6 @@ static int move_freepages(struct zone *zone, ...@@ -1613,14 +1612,6 @@ static int move_freepages(struct zone *zone,
for (pfn = start_pfn; pfn <= end_pfn;) { for (pfn = start_pfn; pfn <= end_pfn;) {
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
if (!PageBuddy(page)) { if (!PageBuddy(page)) {
/*
* We assume that pages that could be isolated for
* migration are movable. But we don't actually try
* isolating, as that would be expensive.
*/
if (num_movable &&
(PageLRU(page) || __PageMovable(page)))
(*num_movable)++;
pfn++; pfn++;
continue; continue;
} }
...@@ -1638,17 +1629,16 @@ static int move_freepages(struct zone *zone, ...@@ -1638,17 +1629,16 @@ static int move_freepages(struct zone *zone,
return pages_moved; return pages_moved;
} }
int move_freepages_block(struct zone *zone, struct page *page, static bool prep_move_freepages_block(struct zone *zone, struct page *page,
int migratetype, int *num_movable) unsigned long *start_pfn,
unsigned long *end_pfn,
int *num_free, int *num_movable)
{ {
unsigned long start_pfn, end_pfn, pfn; unsigned long pfn, start, end;
if (num_movable)
*num_movable = 0;
pfn = page_to_pfn(page); pfn = page_to_pfn(page);
start_pfn = pageblock_start_pfn(pfn); start = pageblock_start_pfn(pfn);
end_pfn = pageblock_end_pfn(pfn) - 1; end = pageblock_end_pfn(pfn) - 1;
/* /*
* The caller only has the lock for @zone, don't touch ranges * The caller only has the lock for @zone, don't touch ranges
...@@ -1657,13 +1647,50 @@ int move_freepages_block(struct zone *zone, struct page *page, ...@@ -1657,13 +1647,50 @@ int move_freepages_block(struct zone *zone, struct page *page,
* accompanied by other operations such as migratetype updates * accompanied by other operations such as migratetype updates
* which also should be locked. * which also should be locked.
*/ */
if (!zone_spans_pfn(zone, start_pfn)) if (!zone_spans_pfn(zone, start))
return 0; return false;
if (!zone_spans_pfn(zone, end_pfn)) if (!zone_spans_pfn(zone, end))
return 0; return false;
*start_pfn = start;
*end_pfn = end;
if (num_free) {
*num_free = 0;
*num_movable = 0;
for (pfn = start; pfn <= end;) {
page = pfn_to_page(pfn);
if (PageBuddy(page)) {
int nr = 1 << buddy_order(page);
*num_free += nr;
pfn += nr;
continue;
}
/*
* We assume that pages that could be isolated for
* migration are movable. But we don't actually try
* isolating, as that would be expensive.
*/
if (PageLRU(page) || __PageMovable(page))
(*num_movable)++;
pfn++;
}
}
return true;
}
int move_freepages_block(struct zone *zone, struct page *page,
int migratetype)
{
unsigned long start_pfn, end_pfn;
return move_freepages(zone, start_pfn, end_pfn, migratetype, if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn,
num_movable); NULL, NULL))
return -1;
return move_freepages(zone, start_pfn, end_pfn, migratetype);
} }
static void change_pageblock_range(struct page *pageblock_page, static void change_pageblock_range(struct page *pageblock_page,
...@@ -1748,33 +1775,37 @@ static inline bool boost_watermark(struct zone *zone) ...@@ -1748,33 +1775,37 @@ static inline bool boost_watermark(struct zone *zone)
} }
/* /*
* This function implements actual steal behaviour. If order is large enough, * This function implements actual steal behaviour. If order is large enough, we
* we can steal whole pageblock. If not, we first move freepages in this * can claim the whole pageblock for the requested migratetype. If not, we check
* pageblock to our migratetype and determine how many already-allocated pages * the pageblock for constituent pages; if at least half of the pages are free
* are there in the pageblock with a compatible migratetype. If at least half * or compatible, we can still claim the whole block, so pages freed in the
* of pages are free or compatible, we can change migratetype of the pageblock * future will be put on the correct free list. Otherwise, we isolate exactly
* itself, so pages freed in the future will be put on the correct free list. * the order we need from the fallback block and leave its migratetype alone.
*/ */
static void steal_suitable_fallback(struct zone *zone, struct page *page, static struct page *
unsigned int alloc_flags, int start_type, bool whole_block) steal_suitable_fallback(struct zone *zone, struct page *page,
int current_order, int order, int start_type,
unsigned int alloc_flags, bool whole_block)
{ {
unsigned int current_order = buddy_order(page);
int free_pages, movable_pages, alike_pages; int free_pages, movable_pages, alike_pages;
int old_block_type; unsigned long start_pfn, end_pfn;
int block_type;
old_block_type = get_pageblock_migratetype(page); block_type = get_pageblock_migratetype(page);
/* /*
* This can happen due to races and we want to prevent broken * This can happen due to races and we want to prevent broken
* highatomic accounting. * highatomic accounting.
*/ */
if (is_migrate_highatomic(old_block_type)) if (is_migrate_highatomic(block_type))
goto single_page; goto single_page;
/* Take ownership for orders >= pageblock_order */ /* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order) { if (current_order >= pageblock_order) {
del_page_from_free_list(page, zone, current_order);
change_pageblock_range(page, current_order, start_type); change_pageblock_range(page, current_order, start_type);
goto single_page; expand(zone, page, order, current_order, start_type);
return page;
} }
/* /*
...@@ -1789,10 +1820,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, ...@@ -1789,10 +1820,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
if (!whole_block) if (!whole_block)
goto single_page; goto single_page;
free_pages = move_freepages_block(zone, page, start_type,
&movable_pages);
/* moving whole block can fail due to zone boundary conditions */ /* moving whole block can fail due to zone boundary conditions */
if (!free_pages) if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn,
&free_pages, &movable_pages))
goto single_page; goto single_page;
/* /*
...@@ -1810,7 +1840,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, ...@@ -1810,7 +1840,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
* vice versa, be conservative since we can't distinguish the * vice versa, be conservative since we can't distinguish the
* exact migratetype of non-movable pages. * exact migratetype of non-movable pages.
*/ */
if (old_block_type == MIGRATE_MOVABLE) if (block_type == MIGRATE_MOVABLE)
alike_pages = pageblock_nr_pages alike_pages = pageblock_nr_pages
- (free_pages + movable_pages); - (free_pages + movable_pages);
else else
...@@ -1821,13 +1851,16 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, ...@@ -1821,13 +1851,16 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
* compatible migratability as our allocation, claim the whole block. * compatible migratability as our allocation, claim the whole block.
*/ */
if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
page_group_by_mobility_disabled) page_group_by_mobility_disabled) {
move_freepages(zone, start_pfn, end_pfn, start_type);
set_pageblock_migratetype(page, start_type); set_pageblock_migratetype(page, start_type);
return __rmqueue_smallest(zone, order, start_type);
return; }
single_page: single_page:
move_to_free_list(page, zone, current_order, start_type); del_page_from_free_list(page, zone, current_order);
expand(zone, page, order, current_order, block_type);
return page;
} }
/* /*
...@@ -1895,9 +1928,10 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone) ...@@ -1895,9 +1928,10 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone)
mt = get_pageblock_migratetype(page); mt = get_pageblock_migratetype(page);
/* Only reserve normal pageblocks (i.e., they can merge with others) */ /* Only reserve normal pageblocks (i.e., they can merge with others) */
if (migratetype_is_mergeable(mt)) { if (migratetype_is_mergeable(mt)) {
zone->nr_reserved_highatomic += pageblock_nr_pages; if (move_freepages_block(zone, page, MIGRATE_HIGHATOMIC) != -1) {
set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL); zone->nr_reserved_highatomic += pageblock_nr_pages;
}
} }
out_unlock: out_unlock:
...@@ -1922,7 +1956,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, ...@@ -1922,7 +1956,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
struct zone *zone; struct zone *zone;
struct page *page; struct page *page;
int order; int order;
bool ret; int ret;
for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
ac->nodemask) { ac->nodemask) {
...@@ -1971,10 +2005,14 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, ...@@ -1971,10 +2005,14 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* of pageblocks that cannot be completely freed * of pageblocks that cannot be completely freed
* may increase. * may increase.
*/ */
ret = move_freepages_block(zone, page, ac->migratetype);
/*
* Reserving this block already succeeded, so this should
* not fail on zone boundaries.
*/
WARN_ON_ONCE(ret == -1);
set_pageblock_migratetype(page, ac->migratetype); set_pageblock_migratetype(page, ac->migratetype);
ret = move_freepages_block(zone, page, ac->migratetype, if (ret > 0) {
NULL);
if (ret) {
spin_unlock_irqrestore(&zone->lock, flags); spin_unlock_irqrestore(&zone->lock, flags);
return ret; return ret;
} }
...@@ -1995,7 +2033,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, ...@@ -1995,7 +2033,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* deviation from the rest of this file, to make the for loop * deviation from the rest of this file, to make the for loop
* condition simpler. * condition simpler.
*/ */
static __always_inline bool static __always_inline struct page *
__rmqueue_fallback(struct zone *zone, int order, int start_migratetype, __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
unsigned int alloc_flags) unsigned int alloc_flags)
{ {
...@@ -2042,7 +2080,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, ...@@ -2042,7 +2080,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
goto do_steal; goto do_steal;
} }
return false; return NULL;
find_smallest: find_smallest:
for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
...@@ -2062,14 +2100,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, ...@@ -2062,14 +2100,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
do_steal: do_steal:
page = get_page_from_free_area(area, fallback_mt); page = get_page_from_free_area(area, fallback_mt);
steal_suitable_fallback(zone, page, alloc_flags, start_migratetype, /* take off list, maybe claim block, expand remainder */
can_steal); page = steal_suitable_fallback(zone, page, current_order, order,
start_migratetype, alloc_flags, can_steal);
trace_mm_page_alloc_extfrag(page, order, current_order, trace_mm_page_alloc_extfrag(page, order, current_order,
start_migratetype, fallback_mt); start_migratetype, fallback_mt);
return true; return page;
} }
/* /*
...@@ -2096,15 +2134,15 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, ...@@ -2096,15 +2134,15 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
return page; return page;
} }
} }
retry:
page = __rmqueue_smallest(zone, order, migratetype); page = __rmqueue_smallest(zone, order, migratetype);
if (unlikely(!page)) { if (unlikely(!page)) {
if (alloc_flags & ALLOC_CMA) if (alloc_flags & ALLOC_CMA)
page = __rmqueue_cma_fallback(zone, order); page = __rmqueue_cma_fallback(zone, order);
if (!page && __rmqueue_fallback(zone, order, migratetype, if (!page)
alloc_flags)) page = __rmqueue_fallback(zone, order, migratetype,
goto retry; alloc_flags);
} }
return page; return page;
} }
...@@ -2659,12 +2697,10 @@ int __isolate_free_page(struct page *page, unsigned int order) ...@@ -2659,12 +2697,10 @@ int __isolate_free_page(struct page *page, unsigned int order)
* Only change normal pageblocks (i.e., they can merge * Only change normal pageblocks (i.e., they can merge
* with others) * with others)
*/ */
if (migratetype_is_mergeable(mt)) { if (migratetype_is_mergeable(mt) &&
set_pageblock_migratetype(page,
MIGRATE_MOVABLE);
move_freepages_block(zone, page, move_freepages_block(zone, page,
MIGRATE_MOVABLE, NULL); MIGRATE_MOVABLE) != -1)
} set_pageblock_migratetype(page, MIGRATE_MOVABLE);
} }
} }
......
...@@ -178,15 +178,18 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ ...@@ -178,15 +178,18 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end, unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
migratetype, isol_flags); migratetype, isol_flags);
if (!unmovable) { if (!unmovable) {
unsigned long nr_pages; int nr_pages;
int mt = get_pageblock_migratetype(page); int mt = get_pageblock_migratetype(page);
nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
/* Block spans zone boundaries? */
if (nr_pages == -1) {
spin_unlock_irqrestore(&zone->lock, flags);
return -EBUSY;
}
__mod_zone_freepage_state(zone, -nr_pages, mt);
set_pageblock_migratetype(page, MIGRATE_ISOLATE); set_pageblock_migratetype(page, MIGRATE_ISOLATE);
zone->nr_isolate_pageblock++; zone->nr_isolate_pageblock++;
nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE,
NULL);
__mod_zone_freepage_state(zone, -nr_pages, mt);
spin_unlock_irqrestore(&zone->lock, flags); spin_unlock_irqrestore(&zone->lock, flags);
return 0; return 0;
} }
...@@ -206,7 +209,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ ...@@ -206,7 +209,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
static void unset_migratetype_isolate(struct page *page, int migratetype) static void unset_migratetype_isolate(struct page *page, int migratetype)
{ {
struct zone *zone; struct zone *zone;
unsigned long flags, nr_pages; unsigned long flags;
bool isolated_page = false; bool isolated_page = false;
unsigned int order; unsigned int order;
struct page *buddy; struct page *buddy;
...@@ -252,7 +255,12 @@ static void unset_migratetype_isolate(struct page *page, int migratetype) ...@@ -252,7 +255,12 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
* allocation. * allocation.
*/ */
if (!isolated_page) { if (!isolated_page) {
nr_pages = move_freepages_block(zone, page, migratetype, NULL); int nr_pages = move_freepages_block(zone, page, migratetype);
/*
* Isolating this block already succeeded, so this
* should not fail on zone boundaries.
*/
WARN_ON_ONCE(nr_pages == -1);
__mod_zone_freepage_state(zone, nr_pages, migratetype); __mod_zone_freepage_state(zone, nr_pages, migratetype);
} }
set_pageblock_migratetype(page, migratetype); set_pageblock_migratetype(page, migratetype);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment