Commit d4388840 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] NUMA memory reporting fix

From: Dave Hansen <haveblue@us.ibm.com>

The current numa meminfo code exports (via sysfs) pgdat->node_size, as
totalram.  This variable is consistently used elsewhere to mean "the number
of physical pages that this particular node spans".  This is _not_ what we
want to see from meminfo, which is: "how much actual memory does this node
have?"

The following patch removes pgdat->node_size, and replaces it with
->node_spanned_pages.  This is to avoid confusion with a new variable,
node_present_pages, which is the _actual_ value that we want to export in
meminfo.  Most of the patch is a simple s/node_size/node_spanned_pages/.
The node_size() macro is also removed, and replaced with new ones for
node_{spanned,present}_pages() to avoid confusion.

We were bitten by this problem in this bug:
	http://bugme.osdl.org/show_bug.cgi?id=818

Compiled and tested on NUMA-Q.
parent 98eb235b
......@@ -338,7 +338,7 @@ void __init mem_init(void)
lmem_map = node_mem_map(nid);
pfn = NODE_DATA(nid)->node_start_pfn;
for (i = 0; i < node_size(nid); i++, pfn++)
for (i = 0; i < node_spanned_pages(nid); i++, pfn++)
if (page_is_ram(pfn) && PageReserved(lmem_map+i))
reservedpages++;
}
......@@ -372,7 +372,7 @@ show_mem(void)
printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
for (nid = 0; nid < numnodes; nid++) {
struct page * lmem_map = node_mem_map(nid);
i = node_size(nid);
i = node_spanned_pages(nid);
while (i-- > 0) {
total++;
if (PageReserved(lmem_map+i))
......
......@@ -79,7 +79,7 @@ void show_mem(void)
struct page *page, *end;
page = NODE_MEM_MAP(node);
end = page + NODE_DATA(node)->node_size;
end = page + NODE_DATA(node)->node_spanned_pages;
do {
total++;
......@@ -576,7 +576,7 @@ void __init mem_init(void)
for (node = 0; node < numnodes; node++) {
pg_data_t *pgdat = NODE_DATA(node);
if (pgdat->node_size != 0)
if (pgdat->node_spanned_pages != 0)
totalram_pages += free_all_bootmem_node(pgdat);
}
......
......@@ -68,7 +68,7 @@ void show_mem(void)
page = NODE_MEM_MAP(0);
end = page + NODE_DATA(0)->node_size;
end = page + NODE_DATA(0)->node_spanned_pages;
do {
total++;
......@@ -353,7 +353,7 @@ void __init mem_init(void)
max_mapnr = virt_to_page(high_memory) - mem_map;
/* this will put all unused low memory onto the freelists */
if (pgdat->node_size != 0)
if (pgdat->node_spanned_pages != 0)
totalram_pages += free_all_bootmem_node(pgdat);
printk(KERN_INFO "Memory:");
......
......@@ -34,7 +34,7 @@ void show_mem(void)
show_free_areas();
printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_size; ++i) {
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat->node_mem_map + i;
total++;
if (PageHighMem(page))
......
......@@ -232,7 +232,7 @@ show_mem(void)
printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
printk("Node ID: %d\n", pgdat->node_id);
for(i = 0; i < pgdat->node_size; i++) {
for(i = 0; i < pgdat->node_spanned_pages; i++) {
if (PageReserved(pgdat->node_mem_map+i))
reserved++;
else if (PageSwapCache(pgdat->node_mem_map+i))
......@@ -240,7 +240,7 @@ show_mem(void)
else if (page_count(pgdat->node_mem_map + i))
shared += page_count(pgdat->node_mem_map + i) - 1;
}
printk("\t%d pages of RAM\n", pgdat->node_size);
printk("\t%d pages of RAM\n", pgdat->node_spanned_pages);
printk("\t%d reserved pages\n", reserved);
printk("\t%d pages shared\n", shared);
printk("\t%d pages swap cached\n", cached);
......
......@@ -109,7 +109,7 @@ void show_mem(void)
show_free_areas();
printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_size; i++) {
for (i = 0; i < pgdat->node_spanned_pages; i++) {
page = pgdat->node_mem_map + i;
total++;
if (PageReserved(page))
......@@ -564,7 +564,7 @@ void __init mem_init(void)
int nid;
for (nid = 0; nid < numnodes; nid++) {
if (node_data[nid].node_size != 0) {
if (node_data[nid].node_spanned_pages != 0) {
printk("freeing bootmem node %x\n", nid);
totalram_pages +=
free_all_bootmem_node(NODE_DATA(nid));
......
......@@ -160,21 +160,21 @@ static int __init parse_numa_properties(void)
* this simple case and complain if there is a gap in
* memory
*/
if (node_data[numa_domain].node_size) {
if (node_data[numa_domain].node_spanned_pages) {
unsigned long shouldstart =
node_data[numa_domain].node_start_pfn +
node_data[numa_domain].node_size;
node_data[numa_domain].node_spanned_pages;
if (shouldstart != (start / PAGE_SIZE)) {
printk(KERN_ERR "Hole in node, disabling "
"region start %lx length %lx\n",
start, size);
continue;
}
node_data[numa_domain].node_size += size / PAGE_SIZE;
node_data[numa_domain].node_spanned_pages += size / PAGE_SIZE;
} else {
node_data[numa_domain].node_start_pfn =
start / PAGE_SIZE;
node_data[numa_domain].node_size = size / PAGE_SIZE;
node_data[numa_domain].node_spanned_pages = size / PAGE_SIZE;
}
for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
......@@ -202,7 +202,7 @@ void setup_nonnuma(void)
map_cpu_to_node(i, 0);
node_data[0].node_start_pfn = 0;
node_data[0].node_size = lmb_end_of_DRAM() / PAGE_SIZE;
node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE;
for (i = 0 ; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT)
numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
......@@ -224,12 +224,12 @@ void __init do_init_bootmem(void)
unsigned long bootmem_paddr;
unsigned long bootmap_pages;
if (node_data[nid].node_size == 0)
if (node_data[nid].node_spanned_pages == 0)
continue;
start_paddr = node_data[nid].node_start_pfn * PAGE_SIZE;
end_paddr = start_paddr +
(node_data[nid].node_size * PAGE_SIZE);
(node_data[nid].node_spanned_pages * PAGE_SIZE);
dbg("node %d\n", nid);
dbg("start_paddr = %lx\n", start_paddr);
......@@ -311,7 +311,7 @@ void __init paging_init(void)
unsigned long start_pfn;
unsigned long end_pfn;
if (node_data[nid].node_size == 0)
if (node_data[nid].node_spanned_pages == 0)
continue;
start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT;
......
......@@ -64,7 +64,7 @@ void show_mem(void)
printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_size; ++i) {
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat->node_mem_map + i;
total++;
if (PageReserved(page))
......
......@@ -86,7 +86,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_size = end_pfn - start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
/* Find a place for the bootmem map */
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
......
......@@ -31,7 +31,6 @@ extern pg_data_t node_data[];
#define pa_to_nid(pa) alpha_pa_to_nid(pa)
#define NODE_DATA(nid) (&node_data[(nid)])
#define node_size(nid) (NODE_DATA(nid)->node_size)
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)
......@@ -124,7 +123,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
#define pfn_to_nid(pfn) pa_to_nid(((u64)pfn << PAGE_SHIFT))
#define pfn_valid(pfn) \
(((pfn) - node_start_pfn(pfn_to_nid(pfn))) < \
node_size(pfn_to_nid(pfn))) \
node_spanned_pages(pfn_to_nid(pfn))) \
#define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT))
......
......@@ -32,8 +32,7 @@ extern struct pglist_data *node_data[];
#define alloc_bootmem_low_pages_node(ignore, x) \
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0)
#define node_size(nid) (node_data[nid]->node_size)
#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn)
#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn)
/*
* Following are macros that each numa implmentation must define.
......@@ -54,7 +53,7 @@ extern struct pglist_data *node_data[];
#define node_end_pfn(nid) \
({ \
pg_data_t *__pgdat = NODE_DATA(nid); \
__pgdat->node_start_pfn + __pgdat->node_size; \
__pgdat->node_start_pfn + __pgdat->node_spanned_pages; \
})
#define local_mapnr(kvaddr) \
......
......@@ -24,7 +24,7 @@ extern plat_pg_data_t *plat_node_data[];
#define PHYSADDR_TO_NID(pa) NASID_TO_COMPACT_NODEID(NASID_GET(pa))
#define PLAT_NODE_DATA(n) (plat_node_data[n])
#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_size)
#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_spanned_pages)
#define PLAT_NODE_DATA_LOCALNR(p, n) \
(((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn)
......
......@@ -54,7 +54,6 @@ static inline int pa_to_nid(unsigned long pa)
*/
#define NODE_DATA(nid) (&node_data[nid])
#define node_size(nid) (NODE_DATA(nid)->node_size)
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)
/*
......
......@@ -40,8 +40,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map)
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
NODE_DATA(nid)->node_size)
#define node_size(nid) (NODE_DATA(nid)->node_size)
NODE_DATA(nid)->node_spanned_pages)
#define local_mapnr(kvaddr) \
( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) )
......
......@@ -184,12 +184,17 @@ typedef struct pglist_data {
unsigned long *valid_addr_bitmap;
struct bootmem_data *bdata;
unsigned long node_start_pfn;
unsigned long node_size;
unsigned long node_present_pages; /* total number of physical pages */
unsigned long node_spanned_pages; /* total size of physical page
range, including holes */
int node_id;
struct pglist_data *pgdat_next;
wait_queue_head_t kswapd_wait;
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
extern int numnodes;
extern struct pglist_data *pgdat_list;
......
......@@ -903,7 +903,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
val->totalram = pgdat->node_size;
val->totalram = pgdat->node_present_pages;
val->freeram = nr_free_pages_pgdat(pgdat);
val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
......@@ -1138,12 +1138,13 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
for (i = 0; i < MAX_NR_ZONES; i++)
totalpages += zones_size[i];
pgdat->node_size = totalpages;
pgdat->node_spanned_pages = totalpages;
realtotalpages = totalpages;
if (zholes_size)
for (i = 0; i < MAX_NR_ZONES; i++)
realtotalpages -= zholes_size[i];
pgdat->node_present_pages = realtotalpages;
printk("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
}
......@@ -1349,7 +1350,7 @@ void __init free_area_init_node(int nid, struct pglist_data *pgdat,
pgdat->node_start_pfn = node_start_pfn;
calculate_zone_totalpages(pgdat, zones_size, zholes_size);
if (!node_mem_map) {
size = (pgdat->node_size + 1) * sizeof(struct page);
size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
node_mem_map = alloc_bootmem_node(pgdat, size);
}
pgdat->node_mem_map = node_mem_map;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment