[PATCH] resurrect __GFP_HIGH

This patch reinstates __GFP_HIGH functionality. __GFP_HIGH means "able to dip into the emergency pools". However, somewhere along the line this got broken. __GFP_HIGH ceased to do anything. Instead, !__GFP_WAIT is used to tell the page allocator to try harder. __GFP_HIGH makes sense. The concepts of "unable to sleep" and "should try harder" are quite separate, and overloading !__GFP_WAIT to mean "should access emergency pools" seems wrong. This patch fixes a problem in mempool_alloc(). mempool_alloc() tries the first allocation with __GFP_WAIT cleared. If that fails, it tries again with __GFP_WAIT enabled (if the caller can support __GFP_WAIT). So it is currently performing an atomic allocation first, even though the caller said that they're prepared to go in and call the page stealer. I thought this was a mempool bug, but Ingo said: > no, it's not GFP_ATOMIC. The important difference is __GFP_HIGH, which > triggers the intrusive highprio allocation mode. Otherwise gfp_nowait is > just a nonblocking allocation of the same type as the original gfp_mask. > ... > what i've added is a bit more subtle allocation method, with both > performance and balancing-correctness in mind: > > 1. allocate via gfp_mask, but nonblocking > 2. if failure => try to get from the pool if the pool is 'full enough'. > 3. if failure => allocate with gfp_mask [which might block] > > there is performance data that this method improves bounce-IO performance > significantly, because even under VM pressure (when gfp_mask would block) > we can still use up to 50% of the memory pool without blocking (and > without endangering deadlock-free allocation). Ie. the memory pool is also > a fast 'frontside cache' of memory elements. Ingo was assuming that __GFP_HIGH was still functional. It isn't, and the mempool design wants it.

[PATCH] resurrect __GFP_HIGH
This patch reinstates __GFP_HIGH functionality. __GFP_HIGH means "able to dip into the emergency pools". However, somewhere along the line this got broken. __GFP_HIGH ceased to do anything. Instead, !__GFP_WAIT is used to tell the page allocator to try harder. __GFP_HIGH makes sense. The concepts of "unable to sleep" and "should try harder" are quite separate, and overloading !__GFP_WAIT to mean "should access emergency pools" seems wrong. This patch fixes a problem in mempool_alloc(). mempool_alloc() tries the first allocation with __GFP_WAIT cleared. If that fails, it tries again with __GFP_WAIT enabled (if the caller can support __GFP_WAIT). So it is currently performing an atomic allocation first, even though the caller said that they're prepared to go in and call the page stealer. I thought this was a mempool bug, but Ingo said: > no, it's not GFP_ATOMIC. The important difference is __GFP_HIGH, which > triggers the intrusive highprio allocation mode. Otherwise gfp_nowait is > just a nonblocking allocation of the same type as the original gfp_mask. > ... > what i've added is a bit more subtle allocation method, with both > performance and balancing-correctness in mind: > > 1. allocate via gfp_mask, but nonblocking > 2. if failure => try to get from the pool if the pool is 'full enough'. > 3. if failure => allocate with gfp_mask [which might block] > > there is performance data that this method improves bounce-IO performance > significantly, because even under VM pressure (when gfp_mask would block) > we can still use up to 50% of the memory pool without blocking (and > without endangering deadlock-free allocation). Ie. the memory pool is also > a fast 'frontside cache' of memory elements. Ingo was assuming that __GFP_HIGH was still functional. It isn't, and the mempool design wants it.
371151c9 · Andrew Morton · Linus Torvalds · 9bd6f86b · 371151c9 · 371151c9
Commit 371151c9 authored Jul 04, 2002 by Andrew Morton Committed by Linus Torvalds Jul 04, 2002
Showing with 14 additions and 12 deletions

drivers/scsi/scsi_merge.c drivers/scsi/scsi_merge.c +3 -1

include/linux/gfp.h include/linux/gfp.h +5 -5

mm/page_alloc.c mm/page_alloc.c +1 -4

mm/slab.c mm/slab.c +2 -2

mm/vmscan.c mm/vmscan.c +3 -0

No files found.
--- a/drivers/scsi/scsi_merge.c
+++ b/drivers/scsi/scsi_merge.c
@@ -74,8 +74,10 @@ int scsi_init_io(Scsi_Cmnd *SCpnt)
 	SCpnt->use_sg = count;
 	gfp_mask = GFP_NOIO;
-	if (in_interrupt())
+	if (in_interrupt()) {
 		gfp_mask &= ~__GFP_WAIT;
+		gfp_mask |= __GFP_HIGH;
+	}
 	/*
 	 * if sg table allocation fails, requeue request later.

--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -18,14 +18,14 @@
 #define __GFP_HIGHIO	0x80	/* Can start high mem physical IO? */
 #define __GFP_FS	0x100	/* Can call down to low-level FS? */
-#define GFP_NOHIGHIO	(__GFP_HIGH | __GFP_WAIT | __GFP_IO)
+#define GFP_NOHIGHIO	(             __GFP_WAIT | __GFP_IO)
-#define GFP_NOIO	(__GFP_HIGH | __GFP_WAIT)
+#define GFP_NOIO	(             __GFP_WAIT)
-#define GFP_NOFS	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
+#define GFP_NOFS	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_USER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
 #define GFP_HIGHUSER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
-#define GFP_KERNEL	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_KERNEL	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
-#define GFP_NFS		(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_NFS		(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
 #define GFP_KSWAPD	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -272,8 +272,6 @@ static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask
 	struct page * page = NULL;
 	int __freed = 0;
-	if (!(gfp_mask & __GFP_WAIT))
-		goto out;
 	if (in_interrupt())
 		BUG();
@@ -333,7 +331,6 @@ static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask
 		}
 		current->nr_local_pages = 0;
 	}
- out:
 	*freed = __freed;
 	return page;
 }
@@ -380,7 +377,7 @@ struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_
 			break;
 		local_min = z->pages_min;
-		if (!(gfp_mask & __GFP_WAIT))
+		if (gfp_mask & __GFP_HIGH)
 			local_min >>= 2;
 		min += local_min;
 		if (z->free_pages > min) {

--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1153,12 +1153,12 @@ static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
 	 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
 	 * will eventually be caught here (where it matters).
 	 */
-	if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
+	if (in_interrupt() && (flags & __GFP_WAIT))
 		BUG();
 	ctor_flags = SLAB_CTOR_CONSTRUCTOR;
 	local_flags = (flags & SLAB_LEVEL_MASK);
-	if (local_flags == SLAB_ATOMIC)
+	if (!(local_flags & __GFP_WAIT))
 		/*
 		 * Not allowed to sleep.  Need to tell a constructor about
 		 * this - it might need to know...

--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -52,6 +52,9 @@ static inline int is_page_cache_freeable(struct page * page)
 * So PF_MEMALLOC is dropped here.  This causes the slab allocations to fail
 * earlier, so radix-tree nodes will then be allocated from the mempool
 * reserves.
+ *
+ * We're still using __GFP_HIGH for radix-tree node allocations, so some of
+ * the emergency pools are available - just not all of them.
 */
 static inline int
 swap_out_add_to_swap_cache(struct page *page, swp_entry_t entry)