Commit 5cb248ab authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

[PATCH] Have x86_64 use add_active_range() and free_area_init_nodes

Size zones and holes in an architecture independent manner for x86_64.
Signed-off-by: default avatarMel Gorman <mel@csn.ul.ie>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Keith Mannthey" <kmannth@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4cfee88a
...@@ -85,6 +85,9 @@ config ARCH_MAY_HAVE_PC_FDC ...@@ -85,6 +85,9 @@ config ARCH_MAY_HAVE_PC_FDC
bool bool
default y default y
config ARCH_POPULATES_NODE_MAP
def_bool y
config DMI config DMI
bool bool
default y default y
......
...@@ -162,59 +162,14 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi ...@@ -162,59 +162,14 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi
return -1UL; return -1UL;
} }
/*
* Free bootmem based on the e820 table for a node.
*/
void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
unsigned long last, addr;
if (ei->type != E820_RAM ||
ei->addr+ei->size <= start ||
ei->addr >= end)
continue;
addr = round_up(ei->addr, PAGE_SIZE);
if (addr < start)
addr = start;
last = round_down(ei->addr + ei->size, PAGE_SIZE);
if (last >= end)
last = end;
if (last > addr && last-addr >= PAGE_SIZE)
free_bootmem_node(pgdat, addr, last-addr);
}
}
/* /*
* Find the highest page frame number we have available * Find the highest page frame number we have available
*/ */
unsigned long __init e820_end_of_ram(void) unsigned long __init e820_end_of_ram(void)
{ {
int i;
unsigned long end_pfn = 0; unsigned long end_pfn = 0;
end_pfn = find_max_pfn_with_active_regions();
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
unsigned long start, end;
start = round_up(ei->addr, PAGE_SIZE);
end = round_down(ei->addr + ei->size, PAGE_SIZE);
if (start >= end)
continue;
if (ei->type == E820_RAM) {
if (end > end_pfn<<PAGE_SHIFT)
end_pfn = end>>PAGE_SHIFT;
} else {
if (end > end_pfn_map<<PAGE_SHIFT)
end_pfn_map = end>>PAGE_SHIFT;
}
}
if (end_pfn > end_pfn_map) if (end_pfn > end_pfn_map)
end_pfn_map = end_pfn; end_pfn_map = end_pfn;
if (end_pfn_map > MAXMEM>>PAGE_SHIFT) if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
...@@ -224,43 +179,10 @@ unsigned long __init e820_end_of_ram(void) ...@@ -224,43 +179,10 @@ unsigned long __init e820_end_of_ram(void)
if (end_pfn > end_pfn_map) if (end_pfn > end_pfn_map)
end_pfn = end_pfn_map; end_pfn = end_pfn_map;
printk("end_pfn_map = %lu\n", end_pfn_map);
return end_pfn; return end_pfn;
} }
/*
* Compute how much memory is missing in a range.
* Unlike the other functions in this file the arguments are in page numbers.
*/
unsigned long __init
e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long ram = 0;
unsigned long start = start_pfn << PAGE_SHIFT;
unsigned long end = end_pfn << PAGE_SHIFT;
int i;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
unsigned long last, addr;
if (ei->type != E820_RAM ||
ei->addr+ei->size <= start ||
ei->addr >= end)
continue;
addr = round_up(ei->addr, PAGE_SIZE);
if (addr < start)
addr = start;
last = round_down(ei->addr + ei->size, PAGE_SIZE);
if (last >= end)
last = end;
if (last > addr)
ram += last - addr;
}
return ((end - start) - ram) >> PAGE_SHIFT;
}
/* /*
* Mark e820 reserved areas as busy for the resource manager. * Mark e820 reserved areas as busy for the resource manager.
*/ */
...@@ -342,6 +264,49 @@ void __init e820_mark_nosave_regions(void) ...@@ -342,6 +264,49 @@ void __init e820_mark_nosave_regions(void)
} }
} }
/* Walk the e820 map and register active regions within a node */
void __init
e820_register_active_regions(int nid, unsigned long start_pfn,
unsigned long end_pfn)
{
int i;
unsigned long ei_startpfn, ei_endpfn;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
>> PAGE_SHIFT;
/* Skip map entries smaller than a page */
if (ei_startpfn > ei_endpfn)
continue;
/* Check if end_pfn_map should be updated */
if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
end_pfn_map = ei_endpfn;
/* Skip if map is outside the node */
if (ei->type != E820_RAM ||
ei_endpfn <= start_pfn ||
ei_startpfn >= end_pfn)
continue;
/* Check for overlaps */
if (ei_startpfn < start_pfn)
ei_startpfn = start_pfn;
if (ei_endpfn > end_pfn)
ei_endpfn = end_pfn;
/* Obey end_user_pfn to save on memmap */
if (ei_startpfn >= end_user_pfn)
continue;
if (ei_endpfn > end_user_pfn)
ei_endpfn = end_user_pfn;
add_active_range(nid, ei_startpfn, ei_endpfn);
}
}
/* /*
* Add a memory region to the kernel e820 map. * Add a memory region to the kernel e820 map.
*/ */
......
...@@ -292,7 +292,8 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -292,7 +292,8 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
if (bootmap == -1L) if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n",bootmap_size); panic("Cannot find bootmem map of size %ld\n",bootmap_size);
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT); e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
reserve_bootmem(bootmap, bootmap_size); reserve_bootmem(bootmap, bootmap_size);
} }
#endif #endif
...@@ -384,6 +385,7 @@ void __init setup_arch(char **cmdline_p) ...@@ -384,6 +385,7 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing(); finish_e820_parsing();
e820_register_active_regions(0, 0, -1UL);
/* /*
* partially used pages are not usable - thus * partially used pages are not usable - thus
* we are rounding upwards: * we are rounding upwards:
...@@ -414,6 +416,9 @@ void __init setup_arch(char **cmdline_p) ...@@ -414,6 +416,9 @@ void __init setup_arch(char **cmdline_p)
max_pfn = end_pfn; max_pfn = end_pfn;
high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1; high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
/* Remove active ranges so rediscovery with NUMA-awareness happens */
remove_all_active_ranges();
#ifdef CONFIG_ACPI_NUMA #ifdef CONFIG_ACPI_NUMA
/* /*
* Parse SRAT to discover nodes. * Parse SRAT to discover nodes.
......
...@@ -403,69 +403,15 @@ void __cpuinit zap_low_mappings(int cpu) ...@@ -403,69 +403,15 @@ void __cpuinit zap_low_mappings(int cpu)
__flush_tlb_all(); __flush_tlb_all();
} }
/* Compute zone sizes for the DMA and DMA32 zones in a node. */
__init void
size_zones(unsigned long *z, unsigned long *h,
unsigned long start_pfn, unsigned long end_pfn)
{
int i;
unsigned long w;
for (i = 0; i < MAX_NR_ZONES; i++)
z[i] = 0;
if (start_pfn < MAX_DMA_PFN)
z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
if (start_pfn < MAX_DMA32_PFN) {
unsigned long dma32_pfn = MAX_DMA32_PFN;
if (dma32_pfn > end_pfn)
dma32_pfn = end_pfn;
z[ZONE_DMA32] = dma32_pfn - start_pfn;
}
z[ZONE_NORMAL] = end_pfn - start_pfn;
/* Remove lower zones from higher ones. */
w = 0;
for (i = 0; i < MAX_NR_ZONES; i++) {
if (z[i])
z[i] -= w;
w += z[i];
}
/* Compute holes */
w = start_pfn;
for (i = 0; i < MAX_NR_ZONES; i++) {
unsigned long s = w;
w += z[i];
h[i] = e820_hole_size(s, w);
}
/* Add the space pace needed for mem_map to the holes too. */
for (i = 0; i < MAX_NR_ZONES; i++)
h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
/* The 16MB DMA zone has the kernel and other misc mappings.
Account them too */
if (h[ZONE_DMA]) {
h[ZONE_DMA] += dma_reserve;
if (h[ZONE_DMA] >= z[ZONE_DMA]) {
printk(KERN_WARNING
"Kernel too large and filling up ZONE_DMA?\n");
h[ZONE_DMA] = z[ZONE_DMA];
}
}
}
#ifndef CONFIG_NUMA #ifndef CONFIG_NUMA
void __init paging_init(void) void __init paging_init(void)
{ {
unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; unsigned long max_zone_pfns[MAX_NR_ZONES] = {MAX_DMA_PFN,
MAX_DMA32_PFN,
end_pfn};
memory_present(0, 0, end_pfn); memory_present(0, 0, end_pfn);
sparse_init(); sparse_init();
size_zones(zones, holes, 0, end_pfn); free_area_init_nodes(max_zone_pfns);
free_area_init_node(0, NODE_DATA(0), zones,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
} }
#endif #endif
...@@ -608,7 +554,8 @@ void __init mem_init(void) ...@@ -608,7 +554,8 @@ void __init mem_init(void)
#else #else
totalram_pages = free_all_bootmem(); totalram_pages = free_all_bootmem();
#endif #endif
reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn); reservedpages = end_pfn - totalram_pages -
absent_pages_in_range(0, end_pfn);
after_bootmem = 1; after_bootmem = 1;
......
...@@ -149,6 +149,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -149,6 +149,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
nodes[nodeid].start = base; nodes[nodeid].start = base;
nodes[nodeid].end = limit; nodes[nodeid].end = limit;
e820_register_active_regions(nodeid,
nodes[nodeid].start >> PAGE_SHIFT,
nodes[nodeid].end >> PAGE_SHIFT);
prevbase = base; prevbase = base;
......
...@@ -161,7 +161,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en ...@@ -161,7 +161,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
bootmap_start >> PAGE_SHIFT, bootmap_start >> PAGE_SHIFT,
start_pfn, end_pfn); start_pfn, end_pfn);
e820_bootmem_free(NODE_DATA(nodeid), start, end); free_bootmem_with_active_regions(nodeid, end);
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
...@@ -175,13 +175,11 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en ...@@ -175,13 +175,11 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
void __init setup_node_zones(int nodeid) void __init setup_node_zones(int nodeid)
{ {
unsigned long start_pfn, end_pfn, memmapsize, limit; unsigned long start_pfn, end_pfn, memmapsize, limit;
unsigned long zones[MAX_NR_ZONES];
unsigned long holes[MAX_NR_ZONES];
start_pfn = node_start_pfn(nodeid); start_pfn = node_start_pfn(nodeid);
end_pfn = node_end_pfn(nodeid); end_pfn = node_end_pfn(nodeid);
Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
nodeid, start_pfn, end_pfn); nodeid, start_pfn, end_pfn);
/* Try to allocate mem_map at end to not fill up precious <4GB /* Try to allocate mem_map at end to not fill up precious <4GB
...@@ -195,10 +193,6 @@ void __init setup_node_zones(int nodeid) ...@@ -195,10 +193,6 @@ void __init setup_node_zones(int nodeid)
round_down(limit - memmapsize, PAGE_SIZE), round_down(limit - memmapsize, PAGE_SIZE),
limit); limit);
#endif #endif
size_zones(zones, holes, start_pfn, end_pfn);
free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
start_pfn, holes);
} }
void __init numa_init_array(void) void __init numa_init_array(void)
...@@ -259,8 +253,11 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) ...@@ -259,8 +253,11 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
return -1; return -1;
} }
for_each_online_node(i) for_each_online_node(i) {
e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
setup_node_bootmem(i, nodes[i].start, nodes[i].end); setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
numa_init_array(); numa_init_array();
return 0; return 0;
} }
...@@ -299,6 +296,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -299,6 +296,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
for (i = 0; i < NR_CPUS; i++) for (i = 0; i < NR_CPUS; i++)
numa_set_node(i, 0); numa_set_node(i, 0);
node_to_cpumask[0] = cpumask_of_cpu(0); node_to_cpumask[0] = cpumask_of_cpu(0);
e820_register_active_regions(0, start_pfn, end_pfn);
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
} }
...@@ -340,12 +338,17 @@ static void __init arch_sparse_init(void) ...@@ -340,12 +338,17 @@ static void __init arch_sparse_init(void)
void __init paging_init(void) void __init paging_init(void)
{ {
int i; int i;
unsigned long max_zone_pfns[MAX_NR_ZONES] = { MAX_DMA_PFN,
MAX_DMA32_PFN,
end_pfn};
arch_sparse_init(); arch_sparse_init();
for_each_online_node(i) { for_each_online_node(i) {
setup_node_zones(i); setup_node_zones(i);
} }
free_area_init_nodes(max_zone_pfns);
} }
static __init int numa_setup(char *opt) static __init int numa_setup(char *opt)
......
...@@ -93,6 +93,7 @@ static __init void bad_srat(void) ...@@ -93,6 +93,7 @@ static __init void bad_srat(void)
apicid_to_node[i] = NUMA_NO_NODE; apicid_to_node[i] = NUMA_NO_NODE;
for (i = 0; i < MAX_NUMNODES; i++) for (i = 0; i < MAX_NUMNODES; i++)
nodes_add[i].start = nodes[i].end = 0; nodes_add[i].start = nodes[i].end = 0;
remove_all_active_ranges();
} }
static __init inline int srat_disabled(void) static __init inline int srat_disabled(void)
...@@ -175,7 +176,7 @@ static int hotadd_enough_memory(struct bootnode *nd) ...@@ -175,7 +176,7 @@ static int hotadd_enough_memory(struct bootnode *nd)
if (mem < 0) if (mem < 0)
return 0; return 0;
allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE; allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE;
allowed = (allowed / 100) * hotadd_percent; allowed = (allowed / 100) * hotadd_percent;
if (allocated + mem > allowed) { if (allocated + mem > allowed) {
unsigned long range; unsigned long range;
...@@ -225,7 +226,7 @@ static int reserve_hotadd(int node, unsigned long start, unsigned long end) ...@@ -225,7 +226,7 @@ static int reserve_hotadd(int node, unsigned long start, unsigned long end)
} }
/* This check might be a bit too strict, but I'm keeping it for now. */ /* This check might be a bit too strict, but I'm keeping it for now. */
if (e820_hole_size(s_pfn, e_pfn) != e_pfn - s_pfn) { if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
printk(KERN_ERR "SRAT: Hotplug area has existing memory\n"); printk(KERN_ERR "SRAT: Hotplug area has existing memory\n");
return -1; return -1;
} }
...@@ -319,6 +320,8 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) ...@@ -319,6 +320,8 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
nd->start, nd->end); nd->start, nd->end);
e820_register_active_regions(node, nd->start >> PAGE_SHIFT,
nd->end >> PAGE_SHIFT);
#ifdef RESERVE_HOTADD #ifdef RESERVE_HOTADD
if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) { if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
...@@ -343,13 +346,13 @@ static int nodes_cover_memory(void) ...@@ -343,13 +346,13 @@ static int nodes_cover_memory(void)
unsigned long s = nodes[i].start >> PAGE_SHIFT; unsigned long s = nodes[i].start >> PAGE_SHIFT;
unsigned long e = nodes[i].end >> PAGE_SHIFT; unsigned long e = nodes[i].end >> PAGE_SHIFT;
pxmram += e - s; pxmram += e - s;
pxmram -= e820_hole_size(s, e); pxmram -= absent_pages_in_range(s, e);
pxmram -= nodes_add[i].end - nodes_add[i].start; pxmram -= nodes_add[i].end - nodes_add[i].start;
if ((long)pxmram < 0) if ((long)pxmram < 0)
pxmram = 0; pxmram = 0;
} }
e820ram = end_pfn - e820_hole_size(0, end_pfn); e820ram = end_pfn - absent_pages_in_range(0, end_pfn);
/* We seem to lose 3 pages somewhere. Allow a bit of slack. */ /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
if ((long)(e820ram - pxmram) >= 1*1024*1024) { if ((long)(e820ram - pxmram) >= 1*1024*1024) {
printk(KERN_ERR printk(KERN_ERR
......
...@@ -47,10 +47,9 @@ extern void e820_print_map(char *who); ...@@ -47,10 +47,9 @@ extern void e820_print_map(char *who);
extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
extern void e820_setup_gap(void); extern void e820_setup_gap(void);
extern unsigned long e820_hole_size(unsigned long start_pfn, extern void e820_register_active_regions(int nid,
unsigned long end_pfn); unsigned long start_pfn, unsigned long end_pfn);
extern void finish_e820_parsing(void); extern void finish_e820_parsing(void);
......
...@@ -24,8 +24,6 @@ extern void mtrr_bp_init(void); ...@@ -24,8 +24,6 @@ extern void mtrr_bp_init(void);
#define mtrr_bp_init() do {} while (0) #define mtrr_bp_init() do {} while (0)
#endif #endif
extern void init_memory_mapping(unsigned long start, unsigned long end); extern void init_memory_mapping(unsigned long start, unsigned long end);
extern void size_zones(unsigned long *z, unsigned long *h,
unsigned long start_pfn, unsigned long end_pfn);
extern void system_call(void); extern void system_call(void);
extern int kernel_syscall(void); extern int kernel_syscall(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment