Commit 9278aa39 authored by Martin J. Bligh's avatar Martin J. Bligh Committed by Linus Torvalds

[PATCH] fix NUMA boundaray between ZONE_NORMAL and HIGHMEM

From: Andy Whitcroft <apw@shadowen.org>

This patch eliminates the false hole which can form between ZONE_NORMAL and
ZONE_HIGHMEM.  This is most easily seen when 4g/4g split is enabled, but
it's always broken, and we just happen not to hit it most of the time.
Basically, the patch changes the allocation of the numa remaps regions (the
source of the holes) such that they officially fall within VMALLOC space,
where they belong.

Tested in -mjb for a couple of months, and again against 2.6.7-mm1.
Signed-off-by: default avatarAndy Whitcroft <apw@shadowen.org>
Signed-off-by: default avatarMartin J. Bligh <mbligh@aracnet.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4736ba03
...@@ -236,6 +236,13 @@ unsigned long __init setup_memory(void) ...@@ -236,6 +236,13 @@ unsigned long __init setup_memory(void)
unsigned long bootmap_size, system_start_pfn, system_max_low_pfn; unsigned long bootmap_size, system_start_pfn, system_max_low_pfn;
unsigned long reserve_pages; unsigned long reserve_pages;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
* from node local memory. They are then mapped directly into KVA
* between zone normal and vmalloc space. Calculate the size of
* this space and use it to adjust the boundry between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
get_memcfg_numa(); get_memcfg_numa();
reserve_pages = calculate_numa_remap_pages(); reserve_pages = calculate_numa_remap_pages();
...@@ -243,7 +250,10 @@ unsigned long __init setup_memory(void) ...@@ -243,7 +250,10 @@ unsigned long __init setup_memory(void)
system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
find_max_pfn(); find_max_pfn();
system_max_low_pfn = max_low_pfn = find_max_low_pfn(); system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages;
printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n",
reserve_pages, max_low_pfn + reserve_pages);
printk("max_pfn = %ld\n", max_pfn);
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn; highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > system_max_low_pfn) if (max_pfn > system_max_low_pfn)
...@@ -251,7 +261,6 @@ unsigned long __init setup_memory(void) ...@@ -251,7 +261,6 @@ unsigned long __init setup_memory(void)
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn)); pages_to_mb(highend_pfn - highstart_pfn));
#endif #endif
system_max_low_pfn = max_low_pfn = max_low_pfn - reserve_pages;
printk(KERN_NOTICE "%ldMB LOWMEM available.\n", printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(system_max_low_pfn)); pages_to_mb(system_max_low_pfn));
printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n",
...@@ -261,15 +270,16 @@ unsigned long __init setup_memory(void) ...@@ -261,15 +270,16 @@ unsigned long __init setup_memory(void)
(ulong) pfn_to_kaddr(max_low_pfn)); (ulong) pfn_to_kaddr(max_low_pfn));
for (nid = 0; nid < numnodes; nid++) { for (nid = 0; nid < numnodes; nid++) {
node_remap_start_vaddr[nid] = pfn_to_kaddr( node_remap_start_vaddr[nid] = pfn_to_kaddr(
highstart_pfn - node_remap_offset[nid]); (highstart_pfn + reserve_pages) - node_remap_offset[nid]);
allocate_pgdat(nid); allocate_pgdat(nid);
printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
(ulong) node_remap_start_vaddr[nid], (ulong) node_remap_start_vaddr[nid],
(ulong) pfn_to_kaddr(highstart_pfn (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages
- node_remap_offset[nid] + node_remap_size[nid])); - node_remap_offset[nid] + node_remap_size[nid]));
} }
printk("High memory starts at vaddr %08lx\n", printk("High memory starts at vaddr %08lx\n",
(ulong) pfn_to_kaddr(highstart_pfn)); (ulong) pfn_to_kaddr(highstart_pfn));
vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
for (nid = 0; nid < numnodes; nid++) for (nid = 0; nid < numnodes; nid++)
find_max_pfn_node(nid); find_max_pfn_node(nid);
......
...@@ -83,8 +83,8 @@ void paging_init(void); ...@@ -83,8 +83,8 @@ void paging_init(void);
* area for the same reason. ;) * area for the same reason. ;)
*/ */
#define VMALLOC_OFFSET (8*1024*1024) #define VMALLOC_OFFSET (8*1024*1024)
#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ #define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \
~(VMALLOC_OFFSET-1)) 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
#else #else
......
...@@ -23,6 +23,7 @@ extern unsigned long max_mapnr; ...@@ -23,6 +23,7 @@ extern unsigned long max_mapnr;
extern unsigned long num_physpages; extern unsigned long num_physpages;
extern void * high_memory; extern void * high_memory;
extern unsigned long vmalloc_earlyreserve;
extern int page_cluster; extern int page_cluster;
#include <asm/page.h> #include <asm/page.h>
......
...@@ -66,12 +66,21 @@ EXPORT_SYMBOL(mem_map); ...@@ -66,12 +66,21 @@ EXPORT_SYMBOL(mem_map);
#endif #endif
unsigned long num_physpages; unsigned long num_physpages;
/*
* A number of key systems in x86 including ioremap() rely on the assumption
* that high_memory defines the upper bound on direct map memory, then end
* of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and
* highstart_pfn must be the same; there must be no gap between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
void * high_memory; void * high_memory;
struct page *highmem_start_page; struct page *highmem_start_page;
unsigned long vmalloc_earlyreserve;
EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(num_physpages);
EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(highmem_start_page);
EXPORT_SYMBOL(high_memory); EXPORT_SYMBOL(high_memory);
EXPORT_SYMBOL(vmalloc_earlyreserve);
/* /*
* We special-case the C-O-W ZERO_PAGE, because it's such * We special-case the C-O-W ZERO_PAGE, because it's such
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment