Commit 0e0b864e authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

[PATCH] Account for memmap and optionally the kernel image as holes

The x86_64 code accounted for memmap and some portions of the the DMA zone as
holes.  This was because those areas would never be reclaimed and accounting
for them as memory affects min watermarks.  This patch will account for the
memmap as a memory hole.  Architectures may optionally use set_dma_reserve()
if they wish to account for a portion of memory in ZONE_DMA as a hole.
Signed-off-by: default avatarMel Gorman <mel@csn.ul.ie>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Keith Mannthey" <kmannth@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 05e0caad
...@@ -655,8 +655,10 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) ...@@ -655,8 +655,10 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
#else #else
reserve_bootmem(phys, len); reserve_bootmem(phys, len);
#endif #endif
if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
dma_reserve += len / PAGE_SIZE; dma_reserve += len / PAGE_SIZE;
set_dma_reserve(dma_reserve);
}
} }
int kern_addr_valid(unsigned long addr) int kern_addr_valid(unsigned long addr)
......
...@@ -984,6 +984,7 @@ extern void sparse_memory_present_with_active_regions(int nid); ...@@ -984,6 +984,7 @@ extern void sparse_memory_present_with_active_regions(int nid);
extern int early_pfn_to_nid(unsigned long pfn); extern int early_pfn_to_nid(unsigned long pfn);
#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
extern void setup_per_zone_pages_min(void); extern void setup_per_zone_pages_min(void);
extern void mem_init(void); extern void mem_init(void);
......
...@@ -104,6 +104,7 @@ int min_free_kbytes = 1024; ...@@ -104,6 +104,7 @@ int min_free_kbytes = 1024;
unsigned long __meminitdata nr_kernel_pages; unsigned long __meminitdata nr_kernel_pages;
unsigned long __meminitdata nr_all_pages; unsigned long __meminitdata nr_all_pages;
static unsigned long __initdata dma_reserve;
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
/* /*
...@@ -2213,6 +2214,20 @@ unsigned long __init zone_absent_pages_in_node(int nid, ...@@ -2213,6 +2214,20 @@ unsigned long __init zone_absent_pages_in_node(int nid,
arch_zone_lowest_possible_pfn[zone_type], arch_zone_lowest_possible_pfn[zone_type],
arch_zone_highest_possible_pfn[zone_type]); arch_zone_highest_possible_pfn[zone_type]);
} }
/* Return the zone index a PFN is in */
int memmap_zone_idx(struct page *lmem_map)
{
int i;
unsigned long phys_addr = virt_to_phys(lmem_map);
unsigned long pfn = phys_addr >> PAGE_SHIFT;
for (i = 0; i < MAX_NR_ZONES; i++)
if (pfn < arch_zone_highest_possible_pfn[i])
break;
return i;
}
#else #else
static inline unsigned long zone_spanned_pages_in_node(int nid, static inline unsigned long zone_spanned_pages_in_node(int nid,
unsigned long zone_type, unsigned long zone_type,
...@@ -2230,6 +2245,11 @@ static inline unsigned long zone_absent_pages_in_node(int nid, ...@@ -2230,6 +2245,11 @@ static inline unsigned long zone_absent_pages_in_node(int nid,
return zholes_size[zone_type]; return zholes_size[zone_type];
} }
static inline int memmap_zone_idx(struct page *lmem_map)
{
return MAX_NR_ZONES;
}
#endif #endif
static void __init calculate_node_totalpages(struct pglist_data *pgdat, static void __init calculate_node_totalpages(struct pglist_data *pgdat,
...@@ -2274,12 +2294,35 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, ...@@ -2274,12 +2294,35 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
for (j = 0; j < MAX_NR_ZONES; j++) { for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j; struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize; unsigned long size, realsize, memmap_pages;
size = zone_spanned_pages_in_node(nid, j, zones_size); size = zone_spanned_pages_in_node(nid, j, zones_size);
realsize = size - zone_absent_pages_in_node(nid, j, realsize = size - zone_absent_pages_in_node(nid, j,
zholes_size); zholes_size);
/*
* Adjust realsize so that it accounts for how much memory
* is used by this zone for memmap. This affects the watermark
* and per-cpu initialisations
*/
memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT;
if (realsize >= memmap_pages) {
realsize -= memmap_pages;
printk(KERN_DEBUG
" %s zone: %lu pages used for memmap\n",
zone_names[j], memmap_pages);
} else
printk(KERN_WARNING
" %s zone: %lu pages exceeds realsize %lu\n",
zone_names[j], memmap_pages, realsize);
/* Account for reserved DMA pages */
if (j == ZONE_DMA && realsize > dma_reserve) {
realsize -= dma_reserve;
printk(KERN_DEBUG " DMA zone: %lu pages reserved\n",
dma_reserve);
}
if (!is_highmem_idx(j)) if (!is_highmem_idx(j))
nr_kernel_pages += realsize; nr_kernel_pages += realsize;
nr_all_pages += realsize; nr_all_pages += realsize;
...@@ -2596,6 +2639,21 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) ...@@ -2596,6 +2639,21 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
} }
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
/**
* set_dma_reserve - Account the specified number of pages reserved in ZONE_DMA
* @new_dma_reserve - The number of pages to mark reserved
*
* The per-cpu batchsize and zone watermarks are determined by present_pages.
* In the DMA zone, a significant percentage may be consumed by kernel image
* and other unfreeable allocations which can skew the watermarks badly. This
* function may optionally be used to account for unfreeable pages in
* ZONE_DMA. The effect will be lower watermarks and smaller per-cpu batchsize
*/
void __init set_dma_reserve(unsigned long new_dma_reserve)
{
dma_reserve = new_dma_reserve;
}
#ifndef CONFIG_NEED_MULTIPLE_NODES #ifndef CONFIG_NEED_MULTIPLE_NODES
static bootmem_data_t contig_bootmem_data; static bootmem_data_t contig_bootmem_data;
struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment