Commit 6f167ec7 authored by Dave Hansen's avatar Dave Hansen Committed by Linus Torvalds

[PATCH] sparsemem base: simple NUMA remap space allocator

Introduce a simple allocator for the NUMA remap space.  This space is very
scarce, used for structures which are best allocated node local.

This mechanism is also used on non-NUMA ia64 systems with a vmem_map to keep
the pgdat->node_mem_map initialized in a consistent place for all
architectures.

Issues:
o alloc_remap takes a node_id where we might expect a pgdat which was intended
  to allow us to allocate the pgdat's using this mechanism; which we do not yet
  do.  Could have alloc_remap_node() and alloc_remap_nid() for this purpose.
Signed-off-by: default avatarAndy Whitcroft <apw@shadowen.org>
Signed-off-by: default avatarDave Hansen <haveblue@us.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent c2ebaa42
...@@ -803,6 +803,11 @@ config NEED_NODE_MEMMAP_SIZE ...@@ -803,6 +803,11 @@ config NEED_NODE_MEMMAP_SIZE
depends on DISCONTIGMEM depends on DISCONTIGMEM
default y default y
config HAVE_ARCH_ALLOC_REMAP
bool
depends on NUMA
default y
config HIGHPTE config HIGHPTE
bool "Allocate 3rd-level pagetables from highmem" bool "Allocate 3rd-level pagetables from highmem"
depends on HIGHMEM4G || HIGHMEM64G depends on HIGHMEM4G || HIGHMEM64G
......
...@@ -108,6 +108,9 @@ unsigned long node_remap_offset[MAX_NUMNODES]; ...@@ -108,6 +108,9 @@ unsigned long node_remap_offset[MAX_NUMNODES];
void *node_remap_start_vaddr[MAX_NUMNODES]; void *node_remap_start_vaddr[MAX_NUMNODES];
void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
void *node_remap_end_vaddr[MAX_NUMNODES];
void *node_remap_alloc_vaddr[MAX_NUMNODES];
/* /*
* FLAT - support for basic PC memory model with discontig enabled, essentially * FLAT - support for basic PC memory model with discontig enabled, essentially
* a single node with all available processors in it with a flat * a single node with all available processors in it with a flat
...@@ -178,6 +181,21 @@ static void __init allocate_pgdat(int nid) ...@@ -178,6 +181,21 @@ static void __init allocate_pgdat(int nid)
} }
} }
void *alloc_remap(int nid, unsigned long size)
{
void *allocation = node_remap_alloc_vaddr[nid];
size = ALIGN(size, L1_CACHE_BYTES);
if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
return 0;
node_remap_alloc_vaddr[nid] += size;
memset(allocation, 0, size);
return allocation;
}
void __init remap_numa_kva(void) void __init remap_numa_kva(void)
{ {
void *vaddr; void *vaddr;
...@@ -185,8 +203,6 @@ void __init remap_numa_kva(void) ...@@ -185,8 +203,6 @@ void __init remap_numa_kva(void)
int node; int node;
for_each_online_node(node) { for_each_online_node(node) {
if (node == 0)
continue;
for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
set_pmd_pfn((ulong) vaddr, set_pmd_pfn((ulong) vaddr,
...@@ -202,11 +218,6 @@ static unsigned long calculate_numa_remap_pages(void) ...@@ -202,11 +218,6 @@ static unsigned long calculate_numa_remap_pages(void)
unsigned long size, reserve_pages = 0; unsigned long size, reserve_pages = 0;
for_each_online_node(nid) { for_each_online_node(nid) {
if (nid == 0)
continue;
if (!node_remap_size[nid])
continue;
/* /*
* The acpi/srat node info can show hot-add memroy zones * The acpi/srat node info can show hot-add memroy zones
* where memory could be added but not currently present. * where memory could be added but not currently present.
...@@ -226,8 +237,8 @@ static unsigned long calculate_numa_remap_pages(void) ...@@ -226,8 +237,8 @@ static unsigned long calculate_numa_remap_pages(void)
printk("Reserving %ld pages of KVA for lmem_map of node %d\n", printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
size, nid); size, nid);
node_remap_size[nid] = size; node_remap_size[nid] = size;
reserve_pages += size;
node_remap_offset[nid] = reserve_pages; node_remap_offset[nid] = reserve_pages;
reserve_pages += size;
printk("Shrinking node %d from %ld pages to %ld pages\n", printk("Shrinking node %d from %ld pages to %ld pages\n",
nid, node_end_pfn[nid], node_end_pfn[nid] - size); nid, node_end_pfn[nid], node_end_pfn[nid] - size);
node_end_pfn[nid] -= size; node_end_pfn[nid] -= size;
...@@ -280,12 +291,18 @@ unsigned long __init setup_memory(void) ...@@ -280,12 +291,18 @@ unsigned long __init setup_memory(void)
(ulong) pfn_to_kaddr(max_low_pfn)); (ulong) pfn_to_kaddr(max_low_pfn));
for_each_online_node(nid) { for_each_online_node(nid) {
node_remap_start_vaddr[nid] = pfn_to_kaddr( node_remap_start_vaddr[nid] = pfn_to_kaddr(
(highstart_pfn + reserve_pages) - node_remap_offset[nid]); highstart_pfn + node_remap_offset[nid]);
/* Init the node remap allocator */
node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
(node_remap_size[nid] * PAGE_SIZE);
node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
ALIGN(sizeof(pg_data_t), PAGE_SIZE);
allocate_pgdat(nid); allocate_pgdat(nid);
printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
(ulong) node_remap_start_vaddr[nid], (ulong) node_remap_start_vaddr[nid],
(ulong) pfn_to_kaddr(highstart_pfn + reserve_pages (ulong) pfn_to_kaddr(highstart_pfn
- node_remap_offset[nid] + node_remap_size[nid])); + node_remap_offset[nid] + node_remap_size[nid]));
} }
printk("High memory starts at vaddr %08lx\n", printk("High memory starts at vaddr %08lx\n",
(ulong) pfn_to_kaddr(highstart_pfn)); (ulong) pfn_to_kaddr(highstart_pfn));
...@@ -348,23 +365,9 @@ void __init zone_sizes_init(void) ...@@ -348,23 +365,9 @@ void __init zone_sizes_init(void)
} }
zholes_size = get_zholes_size(nid); zholes_size = get_zholes_size(nid);
/*
* We let the lmem_map for node 0 be allocated from the free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
* normal bootmem allocator, but other nodes come from the zholes_size);
* remapped KVA area - mbligh
*/
if (!nid)
free_area_init_node(nid, NODE_DATA(nid),
zones_size, start, zholes_size);
else {
unsigned long lmem_map;
lmem_map = (unsigned long)node_remap_start_vaddr[nid];
lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
lmem_map &= PAGE_MASK;
NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
free_area_init_node(nid, NODE_DATA(nid), zones_size,
start, zholes_size);
}
} }
return; return;
} }
......
...@@ -67,6 +67,15 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, ...@@ -67,6 +67,15 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size,
__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
extern void *alloc_remap(int nid, unsigned long size);
#else
static inline void *alloc_remap(int nid, unsigned long size)
{
return NULL;
}
#endif
extern unsigned long __initdata nr_kernel_pages; extern unsigned long __initdata nr_kernel_pages;
extern unsigned long __initdata nr_all_pages; extern unsigned long __initdata nr_all_pages;
......
...@@ -1936,6 +1936,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, ...@@ -1936,6 +1936,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
static void __init alloc_node_mem_map(struct pglist_data *pgdat) static void __init alloc_node_mem_map(struct pglist_data *pgdat)
{ {
unsigned long size; unsigned long size;
struct page *map;
/* Skip empty nodes */ /* Skip empty nodes */
if (!pgdat->node_spanned_pages) if (!pgdat->node_spanned_pages)
...@@ -1944,7 +1945,10 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) ...@@ -1944,7 +1945,10 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
/* ia64 gets its own node_mem_map, before this, without bootmem */ /* ia64 gets its own node_mem_map, before this, without bootmem */
if (!pgdat->node_mem_map) { if (!pgdat->node_mem_map) {
size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
pgdat->node_mem_map = alloc_bootmem_node(pgdat, size); map = alloc_remap(pgdat->node_id, size);
if (!map)
map = alloc_bootmem_node(pgdat, size);
pgdat->node_mem_map = map;
} }
#ifndef CONFIG_DISCONTIGMEM #ifndef CONFIG_DISCONTIGMEM
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment