Commit 3c7011b3 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] use compound pages for hugetlb pages only

The compound page logic is a little fragile - it relies on additional
metadata in the pageframes which some other kernel code likes to stomp on
(xfs was doing this).

Also, because we're treating all higher-order pages as compound pages it is
no longer possible to free individual lower-order pages from the middle of
higher-order pages.  At least one ARM driver insists on doing this.

We only really need the compound page logic for higher-order pages which can
be mapped into user pagetables and placed under direct-io.  This covers
hugetlb pages and, conceivably, soundcard DMA buffers which were allcoated
with a higher-order allocation but which weren't marked PageReserved.

The patch arranges for the hugetlb implications to allocate their pages with
compound page metadata, and all other higher-order allocations go back to the
old way.

(Andrea supplied the GFP_LEVEL_MASK fix)
parent 60af4464
......@@ -54,7 +54,8 @@ static struct page *alloc_fresh_huge_page(void)
{
static int nid = 0;
struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes;
return page;
}
......
......@@ -58,7 +58,8 @@ static struct page *alloc_fresh_huge_page(void)
{
static int nid = 0;
struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes;
return page;
}
......
......@@ -78,7 +78,8 @@ static struct page *alloc_fresh_huge_page(void)
static int nid = 0;
struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
if (!page)
return NULL;
......
......@@ -60,7 +60,8 @@ static struct page *alloc_fresh_huge_page(void)
{
static int nid = 0;
struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes;
return page;
}
......
......@@ -56,7 +56,8 @@ static struct page *alloc_fresh_huge_page(void)
{
static int nid = 0;
struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes;
return page;
}
......
......@@ -32,10 +32,16 @@
#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */
#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */
#define __GFP_NO_GROW 0x2000 /* Slab internal usage */
#define __GFP_COMP 0x4000 /* Add compound page metadata */
#define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */
#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
/* if you forget to add the bitmask here kernel will crash, period */
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT)
#define GFP_NOFS (__GFP_WAIT | __GFP_IO)
......
......@@ -247,14 +247,14 @@ static inline int page_count(struct page *p)
static inline void get_page(struct page *page)
{
if (PageCompound(page))
if (unlikely(PageCompound(page)))
page = (struct page *)page->private;
atomic_inc(&page->count);
}
static inline void put_page(struct page *page)
{
if (PageCompound(page)) {
if (unlikely(PageCompound(page))) {
page = (struct page *)page->private;
if (put_page_testzero(page)) {
if (page[1].mapping) { /* destructor? */
......
......@@ -25,9 +25,7 @@ typedef struct kmem_cache_s kmem_cache_t;
#define SLAB_KERNEL GFP_KERNEL
#define SLAB_DMA GFP_DMA
#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|\
__GFP_NOFAIL|__GFP_NORETRY)
#define SLAB_LEVEL_MASK GFP_LEVEL_MASK
#define SLAB_NO_GROW __GFP_NO_GROW /* don't grow a cache */
......
......@@ -130,6 +130,9 @@ static void destroy_compound_page(struct page *page, unsigned long order)
int i;
int nr_pages = 1 << order;
if (!PageCompound(page))
return;
if (page[1].index != order)
bad_page(__FUNCTION__, page);
......@@ -487,10 +490,12 @@ void fastcall free_cold_page(struct page *page)
* or two.
*/
static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
static struct page *
buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
{
unsigned long flags;
struct page *page = NULL;
int cold = !!(gfp_flags & __GFP_COLD);
if (order == 0) {
struct per_cpu_pages *pcp;
......@@ -519,7 +524,7 @@ static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
BUG_ON(bad_range(zone, page));
mod_page_state_zone(zone, pgalloc, 1 << order);
prep_new_page(page, order);
if (order)
if (order && (gfp_flags & __GFP_COMP))
prep_compound_page(page, order);
}
return page;
......@@ -552,16 +557,11 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
struct reclaim_state reclaim_state;
struct task_struct *p = current;
int i;
int cold;
int alloc_type;
int do_retry;
might_sleep_if(wait);
cold = 0;
if (gfp_mask & __GFP_COLD)
cold = 1;
zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
if (zones[0] == NULL) /* no zones in the zonelist */
return NULL;
......@@ -583,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
......@@ -606,7 +606,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
......@@ -620,7 +620,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i];
page = buffered_rmqueue(z, order, cold);
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
......@@ -648,7 +648,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment