Commit cc1050ba authored by Yinghai Lu's avatar Yinghai Lu Committed by Ingo Molnar

x86: replace shrink_active_range() with remove_active_range()

in case we have kva before ramdisk on a node, we still need to use
those ranges.

v2: reserve_early kva ram area, in case there are holes in highmem, to avoid
    those area could be treat as free high pages.
Signed-off-by: default avatarYinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent d2dbf343
...@@ -230,8 +230,8 @@ static unsigned long calculate_numa_remap_pages(void) ...@@ -230,8 +230,8 @@ static unsigned long calculate_numa_remap_pages(void)
unsigned long size, reserve_pages = 0; unsigned long size, reserve_pages = 0;
for_each_online_node(nid) { for_each_online_node(nid) {
u64 node_end_target; u64 node_kva_target;
u64 node_end_final; u64 node_kva_final;
/* /*
* The acpi/srat node info can show hot-add memroy zones * The acpi/srat node info can show hot-add memroy zones
...@@ -254,42 +254,45 @@ static unsigned long calculate_numa_remap_pages(void) ...@@ -254,42 +254,45 @@ static unsigned long calculate_numa_remap_pages(void)
/* now the roundup is correct, convert to PAGE_SIZE pages */ /* now the roundup is correct, convert to PAGE_SIZE pages */
size = size * PTRS_PER_PTE; size = size * PTRS_PER_PTE;
node_end_target = round_down(node_end_pfn[nid] - size, node_kva_target = round_down(node_end_pfn[nid] - size,
PTRS_PER_PTE); PTRS_PER_PTE);
node_end_target <<= PAGE_SHIFT; node_kva_target <<= PAGE_SHIFT;
do { do {
node_end_final = find_e820_area(node_end_target, node_kva_final = find_e820_area(node_kva_target,
((u64)node_end_pfn[nid])<<PAGE_SHIFT, ((u64)node_end_pfn[nid])<<PAGE_SHIFT,
((u64)size)<<PAGE_SHIFT, ((u64)size)<<PAGE_SHIFT,
LARGE_PAGE_BYTES); LARGE_PAGE_BYTES);
node_end_target -= LARGE_PAGE_BYTES; node_kva_target -= LARGE_PAGE_BYTES;
} while (node_end_final == -1ULL && } while (node_kva_final == -1ULL &&
(node_end_target>>PAGE_SHIFT) > (node_start_pfn[nid])); (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
if (node_end_final == -1ULL) if (node_kva_final == -1ULL)
panic("Can not get kva ram\n"); panic("Can not get kva ram\n");
printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
size, nid);
node_remap_size[nid] = size; node_remap_size[nid] = size;
node_remap_offset[nid] = reserve_pages; node_remap_offset[nid] = reserve_pages;
reserve_pages += size; reserve_pages += size;
printk("Shrinking node %d from %ld pages to %lld pages\n", printk("Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
nid, node_end_pfn[nid], node_end_final>>PAGE_SHIFT); size, nid, node_kva_final>>PAGE_SHIFT);
/* /*
* prevent kva address below max_low_pfn want it on system * prevent kva address below max_low_pfn want it on system
* with less memory later. * with less memory later.
* layout will be: KVA address , KVA RAM * layout will be: KVA address , KVA RAM
*
* we are supposed to only record the one less then max_low_pfn
* but we could have some hole in high memory, and it will only
* check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
* to use it as free.
* So reserve_early here, hope we don't run out of that array
*/ */
if ((node_end_final>>PAGE_SHIFT) < max_low_pfn) reserve_early(node_kva_final,
reserve_early(node_end_final, node_kva_final+(((u64)size)<<PAGE_SHIFT),
node_end_final+(((u64)size)<<PAGE_SHIFT), "KVA RAM");
"KVA RAM");
node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
node_end_pfn[nid] = node_end_final>>PAGE_SHIFT; remove_active_range(nid, node_remap_start_pfn[nid],
node_remap_start_pfn[nid] = node_end_pfn[nid]; node_remap_start_pfn[nid] + size);
shrink_active_range(nid, node_end_pfn[nid]);
} }
printk("Reserving total of %ld pages for numa KVA remap\n", printk("Reserving total of %ld pages for numa KVA remap\n",
reserve_pages); reserve_pages);
......
...@@ -998,7 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, ...@@ -998,7 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat,
extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void free_area_init_nodes(unsigned long *max_zone_pfn);
extern void add_active_range(unsigned int nid, unsigned long start_pfn, extern void add_active_range(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn); unsigned long end_pfn);
extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn);
extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn); unsigned long end_pfn);
extern void remove_all_active_ranges(void); extern void remove_all_active_ranges(void);
......
...@@ -3552,30 +3552,47 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, ...@@ -3552,30 +3552,47 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
} }
/** /**
* shrink_active_range - Shrink an existing registered range of PFNs * remove_active_range - Shrink an existing registered range of PFNs
* @nid: The node id the range is on that should be shrunk * @nid: The node id the range is on that should be shrunk
* @new_end_pfn: The new PFN of the range * @start_pfn: The new PFN of the range
* @end_pfn: The new PFN of the range
* *
* i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
* The map is kept near the end physical page range that has already been * The map is kept near the end physical page range that has already been
* registered. This function allows an arch to shrink an existing registered * registered. This function allows an arch to shrink an existing registered
* range. * range.
*/ */
void __init shrink_active_range(unsigned int nid, unsigned long new_end_pfn) void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn)
{ {
int i, j; int i, j;
int removed = 0; int removed = 0;
printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
nid, start_pfn, end_pfn);
/* Find the old active region end and shrink */ /* Find the old active region end and shrink */
for_each_active_range_index_in_nid(i, nid) { for_each_active_range_index_in_nid(i, nid) {
if (early_node_map[i].start_pfn >= new_end_pfn) { if (early_node_map[i].start_pfn >= start_pfn &&
early_node_map[i].end_pfn <= end_pfn) {
/* clear it */ /* clear it */
early_node_map[i].start_pfn = 0;
early_node_map[i].end_pfn = 0; early_node_map[i].end_pfn = 0;
removed = 1; removed = 1;
continue; continue;
} }
if (early_node_map[i].end_pfn > new_end_pfn) { if (early_node_map[i].start_pfn < start_pfn &&
early_node_map[i].end_pfn = new_end_pfn; early_node_map[i].end_pfn > start_pfn) {
unsigned long temp_end_pfn = early_node_map[i].end_pfn;
early_node_map[i].end_pfn = start_pfn;
if (temp_end_pfn > end_pfn)
add_active_range(nid, end_pfn, temp_end_pfn);
continue;
}
if (early_node_map[i].start_pfn >= start_pfn &&
early_node_map[i].end_pfn > end_pfn &&
early_node_map[i].start_pfn < end_pfn) {
early_node_map[i].start_pfn = end_pfn;
continue; continue;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment