Commit 066b2393 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm, page_alloc: split buffered_rmqueue()

Patch series "Use per-cpu allocator for !irq requests and prepare for a
bulk allocator", v5.

This series is motivated by a conversation led by Jesper Dangaard Brouer
at the last LSF/MM proposing a generic page pool for DMA-coherent pages.
Part of his motivation was due to the overhead of allocating multiple
order-0 that led some drivers to use high-order allocations and
splitting them.  This is very slow in some cases.

The first two patches in this series restructure the page allocator such
that it is relatively easy to introduce an order-0 bulk page allocator.
A patch exists to do that and has been handed over to Jesper until an
in-kernel users is created.  The third patch prevents the per-cpu
allocator being drained from IPI context as that can potentially corrupt
the list after patch four is merged.  The final patch alters the per-cpu
alloctor to make it exclusive to !irq requests.  This cuts
allocation/free overhead by roughly 30%.

Performance tests from both Jesper and me are included in the patch.

This patch (of 4):

buffered_rmqueue removes a page from a given zone and uses the per-cpu
list for order-0.  This is fine but a hypothetical caller that wanted
multiple order-0 pages has to disable/reenable interrupts multiple
times.  This patch structures buffere_rmqueue such that it's relatively
easy to build a bulk order-0 page allocator.  There is no functional
change.

[mgorman@techsingularity.net: failed per-cpu refill may blow up]
  Link: http://lkml.kernel.org/r/20170124112723.mshmgwq2ihxku2um@techsingularity.net
Link: http://lkml.kernel.org/r/20170123153906.3122-2-mgorman@techsingularity.netSigned-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarHillf Danton <hillf.zj@alibaba-inc.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent c55e8d03
...@@ -2600,74 +2600,104 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) ...@@ -2600,74 +2600,104 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
#endif #endif
} }
/* Remove page from the per-cpu list, caller must protect the list */
static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
bool cold, struct per_cpu_pages *pcp,
struct list_head *list)
{
struct page *page;
do {
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
migratetype, cold);
if (unlikely(list_empty(list)))
return NULL;
}
if (cold)
page = list_last_entry(list, struct page, lru);
else
page = list_first_entry(list, struct page, lru);
list_del(&page->lru);
pcp->count--;
} while (check_new_pcp(page));
return page;
}
/* Lock and remove page from the per-cpu list */
static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
gfp_t gfp_flags, int migratetype)
{
struct per_cpu_pages *pcp;
struct list_head *list;
bool cold = ((gfp_flags & __GFP_COLD) != 0);
struct page *page;
unsigned long flags;
local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
}
local_irq_restore(flags);
return page;
}
/* /*
* Allocate a page from the given zone. Use pcplists for order-0 allocations. * Allocate a page from the given zone. Use pcplists for order-0 allocations.
*/ */
static inline static inline
struct page *buffered_rmqueue(struct zone *preferred_zone, struct page *rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order, struct zone *zone, unsigned int order,
gfp_t gfp_flags, unsigned int alloc_flags, gfp_t gfp_flags, unsigned int alloc_flags,
int migratetype) int migratetype)
{ {
unsigned long flags; unsigned long flags;
struct page *page; struct page *page;
bool cold = ((gfp_flags & __GFP_COLD) != 0);
if (likely(order == 0)) { if (likely(order == 0)) {
struct per_cpu_pages *pcp; page = rmqueue_pcplist(preferred_zone, zone, order,
struct list_head *list; gfp_flags, migratetype);
goto out;
local_irq_save(flags); }
do {
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
migratetype, cold);
if (unlikely(list_empty(list)))
goto failed;
}
if (cold)
page = list_last_entry(list, struct page, lru);
else
page = list_first_entry(list, struct page, lru);
list_del(&page->lru);
pcp->count--;
} while (check_new_pcp(page)); /*
} else { * We most definitely don't want callers attempting to
/* * allocate greater than order-1 page units with __GFP_NOFAIL.
* We most definitely don't want callers attempting to */
* allocate greater than order-1 page units with __GFP_NOFAIL. WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
*/ spin_lock_irqsave(&zone->lock, flags);
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
spin_lock_irqsave(&zone->lock, flags);
do { do {
page = NULL; page = NULL;
if (alloc_flags & ALLOC_HARDER) { if (alloc_flags & ALLOC_HARDER) {
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (page) if (page)
trace_mm_page_alloc_zone_locked(page, order, migratetype); trace_mm_page_alloc_zone_locked(page, order, migratetype);
} }
if (!page)
page = __rmqueue(zone, order, migratetype);
} while (page && check_new_pages(page, order));
spin_unlock(&zone->lock);
if (!page) if (!page)
goto failed; page = __rmqueue(zone, order, migratetype);
__mod_zone_freepage_state(zone, -(1 << order), } while (page && check_new_pages(page, order));
get_pcppage_migratetype(page)); spin_unlock(&zone->lock);
} if (!page)
goto failed;
__mod_zone_freepage_state(zone, -(1 << order),
get_pcppage_migratetype(page));
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone); zone_statistics(preferred_zone, zone);
local_irq_restore(flags); local_irq_restore(flags);
VM_BUG_ON_PAGE(bad_range(zone, page), page); out:
VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
return page; return page;
failed: failed:
...@@ -2972,7 +3002,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, ...@@ -2972,7 +3002,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
} }
try_this_zone: try_this_zone:
page = buffered_rmqueue(ac->preferred_zoneref->zone, zone, order, page = rmqueue(ac->preferred_zoneref->zone, zone, order,
gfp_mask, alloc_flags, ac->migratetype); gfp_mask, alloc_flags, ac->migratetype);
if (page) { if (page) {
prep_new_page(page, order, gfp_mask, alloc_flags); prep_new_page(page, order, gfp_mask, alloc_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment