Commit d4388840 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] NUMA memory reporting fix

From: Dave Hansen <haveblue@us.ibm.com>

The current numa meminfo code exports (via sysfs) pgdat->node_size, as
totalram.  This variable is consistently used elsewhere to mean "the number
of physical pages that this particular node spans".  This is _not_ what we
want to see from meminfo, which is: "how much actual memory does this node
have?"

The following patch removes pgdat->node_size, and replaces it with
->node_spanned_pages.  This is to avoid confusion with a new variable,
node_present_pages, which is the _actual_ value that we want to export in
meminfo.  Most of the patch is a simple s/node_size/node_spanned_pages/.
The node_size() macro is also removed, and replaced with new ones for
node_{spanned,present}_pages() to avoid confusion.

We were bitten by this problem in this bug:
	http://bugme.osdl.org/show_bug.cgi?id=818

Compiled and tested on NUMA-Q.
parent 98eb235b
...@@ -338,7 +338,7 @@ void __init mem_init(void) ...@@ -338,7 +338,7 @@ void __init mem_init(void)
lmem_map = node_mem_map(nid); lmem_map = node_mem_map(nid);
pfn = NODE_DATA(nid)->node_start_pfn; pfn = NODE_DATA(nid)->node_start_pfn;
for (i = 0; i < node_size(nid); i++, pfn++) for (i = 0; i < node_spanned_pages(nid); i++, pfn++)
if (page_is_ram(pfn) && PageReserved(lmem_map+i)) if (page_is_ram(pfn) && PageReserved(lmem_map+i))
reservedpages++; reservedpages++;
} }
...@@ -372,7 +372,7 @@ show_mem(void) ...@@ -372,7 +372,7 @@ show_mem(void)
printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
for (nid = 0; nid < numnodes; nid++) { for (nid = 0; nid < numnodes; nid++) {
struct page * lmem_map = node_mem_map(nid); struct page * lmem_map = node_mem_map(nid);
i = node_size(nid); i = node_spanned_pages(nid);
while (i-- > 0) { while (i-- > 0) {
total++; total++;
if (PageReserved(lmem_map+i)) if (PageReserved(lmem_map+i))
......
...@@ -79,7 +79,7 @@ void show_mem(void) ...@@ -79,7 +79,7 @@ void show_mem(void)
struct page *page, *end; struct page *page, *end;
page = NODE_MEM_MAP(node); page = NODE_MEM_MAP(node);
end = page + NODE_DATA(node)->node_size; end = page + NODE_DATA(node)->node_spanned_pages;
do { do {
total++; total++;
...@@ -576,7 +576,7 @@ void __init mem_init(void) ...@@ -576,7 +576,7 @@ void __init mem_init(void)
for (node = 0; node < numnodes; node++) { for (node = 0; node < numnodes; node++) {
pg_data_t *pgdat = NODE_DATA(node); pg_data_t *pgdat = NODE_DATA(node);
if (pgdat->node_size != 0) if (pgdat->node_spanned_pages != 0)
totalram_pages += free_all_bootmem_node(pgdat); totalram_pages += free_all_bootmem_node(pgdat);
} }
......
...@@ -68,7 +68,7 @@ void show_mem(void) ...@@ -68,7 +68,7 @@ void show_mem(void)
page = NODE_MEM_MAP(0); page = NODE_MEM_MAP(0);
end = page + NODE_DATA(0)->node_size; end = page + NODE_DATA(0)->node_spanned_pages;
do { do {
total++; total++;
...@@ -353,7 +353,7 @@ void __init mem_init(void) ...@@ -353,7 +353,7 @@ void __init mem_init(void)
max_mapnr = virt_to_page(high_memory) - mem_map; max_mapnr = virt_to_page(high_memory) - mem_map;
/* this will put all unused low memory onto the freelists */ /* this will put all unused low memory onto the freelists */
if (pgdat->node_size != 0) if (pgdat->node_spanned_pages != 0)
totalram_pages += free_all_bootmem_node(pgdat); totalram_pages += free_all_bootmem_node(pgdat);
printk(KERN_INFO "Memory:"); printk(KERN_INFO "Memory:");
......
...@@ -34,7 +34,7 @@ void show_mem(void) ...@@ -34,7 +34,7 @@ void show_mem(void)
show_free_areas(); show_free_areas();
printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) { for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_size; ++i) { for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat->node_mem_map + i; page = pgdat->node_mem_map + i;
total++; total++;
if (PageHighMem(page)) if (PageHighMem(page))
......
...@@ -232,7 +232,7 @@ show_mem(void) ...@@ -232,7 +232,7 @@ show_mem(void)
printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) { for_each_pgdat(pgdat) {
printk("Node ID: %d\n", pgdat->node_id); printk("Node ID: %d\n", pgdat->node_id);
for(i = 0; i < pgdat->node_size; i++) { for(i = 0; i < pgdat->node_spanned_pages; i++) {
if (PageReserved(pgdat->node_mem_map+i)) if (PageReserved(pgdat->node_mem_map+i))
reserved++; reserved++;
else if (PageSwapCache(pgdat->node_mem_map+i)) else if (PageSwapCache(pgdat->node_mem_map+i))
...@@ -240,7 +240,7 @@ show_mem(void) ...@@ -240,7 +240,7 @@ show_mem(void)
else if (page_count(pgdat->node_mem_map + i)) else if (page_count(pgdat->node_mem_map + i))
shared += page_count(pgdat->node_mem_map + i) - 1; shared += page_count(pgdat->node_mem_map + i) - 1;
} }
printk("\t%d pages of RAM\n", pgdat->node_size); printk("\t%d pages of RAM\n", pgdat->node_spanned_pages);
printk("\t%d reserved pages\n", reserved); printk("\t%d reserved pages\n", reserved);
printk("\t%d pages shared\n", shared); printk("\t%d pages shared\n", shared);
printk("\t%d pages swap cached\n", cached); printk("\t%d pages swap cached\n", cached);
......
...@@ -109,7 +109,7 @@ void show_mem(void) ...@@ -109,7 +109,7 @@ void show_mem(void)
show_free_areas(); show_free_areas();
printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) { for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_size; i++) { for (i = 0; i < pgdat->node_spanned_pages; i++) {
page = pgdat->node_mem_map + i; page = pgdat->node_mem_map + i;
total++; total++;
if (PageReserved(page)) if (PageReserved(page))
...@@ -564,7 +564,7 @@ void __init mem_init(void) ...@@ -564,7 +564,7 @@ void __init mem_init(void)
int nid; int nid;
for (nid = 0; nid < numnodes; nid++) { for (nid = 0; nid < numnodes; nid++) {
if (node_data[nid].node_size != 0) { if (node_data[nid].node_spanned_pages != 0) {
printk("freeing bootmem node %x\n", nid); printk("freeing bootmem node %x\n", nid);
totalram_pages += totalram_pages +=
free_all_bootmem_node(NODE_DATA(nid)); free_all_bootmem_node(NODE_DATA(nid));
......
...@@ -160,21 +160,21 @@ static int __init parse_numa_properties(void) ...@@ -160,21 +160,21 @@ static int __init parse_numa_properties(void)
* this simple case and complain if there is a gap in * this simple case and complain if there is a gap in
* memory * memory
*/ */
if (node_data[numa_domain].node_size) { if (node_data[numa_domain].node_spanned_pages) {
unsigned long shouldstart = unsigned long shouldstart =
node_data[numa_domain].node_start_pfn + node_data[numa_domain].node_start_pfn +
node_data[numa_domain].node_size; node_data[numa_domain].node_spanned_pages;
if (shouldstart != (start / PAGE_SIZE)) { if (shouldstart != (start / PAGE_SIZE)) {
printk(KERN_ERR "Hole in node, disabling " printk(KERN_ERR "Hole in node, disabling "
"region start %lx length %lx\n", "region start %lx length %lx\n",
start, size); start, size);
continue; continue;
} }
node_data[numa_domain].node_size += size / PAGE_SIZE; node_data[numa_domain].node_spanned_pages += size / PAGE_SIZE;
} else { } else {
node_data[numa_domain].node_start_pfn = node_data[numa_domain].node_start_pfn =
start / PAGE_SIZE; start / PAGE_SIZE;
node_data[numa_domain].node_size = size / PAGE_SIZE; node_data[numa_domain].node_spanned_pages = size / PAGE_SIZE;
} }
for (i = start ; i < (start+size); i += MEMORY_INCREMENT) for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
...@@ -202,7 +202,7 @@ void setup_nonnuma(void) ...@@ -202,7 +202,7 @@ void setup_nonnuma(void)
map_cpu_to_node(i, 0); map_cpu_to_node(i, 0);
node_data[0].node_start_pfn = 0; node_data[0].node_start_pfn = 0;
node_data[0].node_size = lmb_end_of_DRAM() / PAGE_SIZE; node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE;
for (i = 0 ; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) for (i = 0 ; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT)
numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
...@@ -224,12 +224,12 @@ void __init do_init_bootmem(void) ...@@ -224,12 +224,12 @@ void __init do_init_bootmem(void)
unsigned long bootmem_paddr; unsigned long bootmem_paddr;
unsigned long bootmap_pages; unsigned long bootmap_pages;
if (node_data[nid].node_size == 0) if (node_data[nid].node_spanned_pages == 0)
continue; continue;
start_paddr = node_data[nid].node_start_pfn * PAGE_SIZE; start_paddr = node_data[nid].node_start_pfn * PAGE_SIZE;
end_paddr = start_paddr + end_paddr = start_paddr +
(node_data[nid].node_size * PAGE_SIZE); (node_data[nid].node_spanned_pages * PAGE_SIZE);
dbg("node %d\n", nid); dbg("node %d\n", nid);
dbg("start_paddr = %lx\n", start_paddr); dbg("start_paddr = %lx\n", start_paddr);
...@@ -311,7 +311,7 @@ void __init paging_init(void) ...@@ -311,7 +311,7 @@ void __init paging_init(void)
unsigned long start_pfn; unsigned long start_pfn;
unsigned long end_pfn; unsigned long end_pfn;
if (node_data[nid].node_size == 0) if (node_data[nid].node_spanned_pages == 0)
continue; continue;
start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT; start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT;
......
...@@ -64,7 +64,7 @@ void show_mem(void) ...@@ -64,7 +64,7 @@ void show_mem(void)
printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) { for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_size; ++i) { for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat->node_mem_map + i; page = pgdat->node_mem_map + i;
total++; total++;
if (PageReserved(page)) if (PageReserved(page))
......
...@@ -86,7 +86,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en ...@@ -86,7 +86,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_size = end_pfn - start_pfn; NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
/* Find a place for the bootmem map */ /* Find a place for the bootmem map */
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
......
...@@ -31,7 +31,6 @@ extern pg_data_t node_data[]; ...@@ -31,7 +31,6 @@ extern pg_data_t node_data[];
#define pa_to_nid(pa) alpha_pa_to_nid(pa) #define pa_to_nid(pa) alpha_pa_to_nid(pa)
#define NODE_DATA(nid) (&node_data[(nid)]) #define NODE_DATA(nid) (&node_data[(nid)])
#define node_size(nid) (NODE_DATA(nid)->node_size)
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)
...@@ -124,7 +123,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) ...@@ -124,7 +123,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
#define pfn_to_nid(pfn) pa_to_nid(((u64)pfn << PAGE_SHIFT)) #define pfn_to_nid(pfn) pa_to_nid(((u64)pfn << PAGE_SHIFT))
#define pfn_valid(pfn) \ #define pfn_valid(pfn) \
(((pfn) - node_start_pfn(pfn_to_nid(pfn))) < \ (((pfn) - node_start_pfn(pfn_to_nid(pfn))) < \
node_size(pfn_to_nid(pfn))) \ node_spanned_pages(pfn_to_nid(pfn))) \
#define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT)) #define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT))
......
...@@ -32,7 +32,6 @@ extern struct pglist_data *node_data[]; ...@@ -32,7 +32,6 @@ extern struct pglist_data *node_data[];
#define alloc_bootmem_low_pages_node(ignore, x) \ #define alloc_bootmem_low_pages_node(ignore, x) \
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0)
#define node_size(nid) (node_data[nid]->node_size)
#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) #define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn)
/* /*
...@@ -54,7 +53,7 @@ extern struct pglist_data *node_data[]; ...@@ -54,7 +53,7 @@ extern struct pglist_data *node_data[];
#define node_end_pfn(nid) \ #define node_end_pfn(nid) \
({ \ ({ \
pg_data_t *__pgdat = NODE_DATA(nid); \ pg_data_t *__pgdat = NODE_DATA(nid); \
__pgdat->node_start_pfn + __pgdat->node_size; \ __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \
}) })
#define local_mapnr(kvaddr) \ #define local_mapnr(kvaddr) \
......
...@@ -24,7 +24,7 @@ extern plat_pg_data_t *plat_node_data[]; ...@@ -24,7 +24,7 @@ extern plat_pg_data_t *plat_node_data[];
#define PHYSADDR_TO_NID(pa) NASID_TO_COMPACT_NODEID(NASID_GET(pa)) #define PHYSADDR_TO_NID(pa) NASID_TO_COMPACT_NODEID(NASID_GET(pa))
#define PLAT_NODE_DATA(n) (plat_node_data[n]) #define PLAT_NODE_DATA(n) (plat_node_data[n])
#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_size) #define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_spanned_pages)
#define PLAT_NODE_DATA_LOCALNR(p, n) \ #define PLAT_NODE_DATA_LOCALNR(p, n) \
(((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn) (((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn)
......
...@@ -54,7 +54,6 @@ static inline int pa_to_nid(unsigned long pa) ...@@ -54,7 +54,6 @@ static inline int pa_to_nid(unsigned long pa)
*/ */
#define NODE_DATA(nid) (&node_data[nid]) #define NODE_DATA(nid) (&node_data[nid])
#define node_size(nid) (NODE_DATA(nid)->node_size)
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)
/* /*
......
...@@ -40,8 +40,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) ...@@ -40,8 +40,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map)
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
NODE_DATA(nid)->node_size) NODE_DATA(nid)->node_spanned_pages)
#define node_size(nid) (NODE_DATA(nid)->node_size)
#define local_mapnr(kvaddr) \ #define local_mapnr(kvaddr) \
( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) )
......
...@@ -184,12 +184,17 @@ typedef struct pglist_data { ...@@ -184,12 +184,17 @@ typedef struct pglist_data {
unsigned long *valid_addr_bitmap; unsigned long *valid_addr_bitmap;
struct bootmem_data *bdata; struct bootmem_data *bdata;
unsigned long node_start_pfn; unsigned long node_start_pfn;
unsigned long node_size; unsigned long node_present_pages; /* total number of physical pages */
unsigned long node_spanned_pages; /* total size of physical page
range, including holes */
int node_id; int node_id;
struct pglist_data *pgdat_next; struct pglist_data *pgdat_next;
wait_queue_head_t kswapd_wait; wait_queue_head_t kswapd_wait;
} pg_data_t; } pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
extern int numnodes; extern int numnodes;
extern struct pglist_data *pgdat_list; extern struct pglist_data *pgdat_list;
......
...@@ -903,7 +903,7 @@ void si_meminfo_node(struct sysinfo *val, int nid) ...@@ -903,7 +903,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
{ {
pg_data_t *pgdat = NODE_DATA(nid); pg_data_t *pgdat = NODE_DATA(nid);
val->totalram = pgdat->node_size; val->totalram = pgdat->node_present_pages;
val->freeram = nr_free_pages_pgdat(pgdat); val->freeram = nr_free_pages_pgdat(pgdat);
val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
...@@ -1138,12 +1138,13 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, ...@@ -1138,12 +1138,13 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
for (i = 0; i < MAX_NR_ZONES; i++) for (i = 0; i < MAX_NR_ZONES; i++)
totalpages += zones_size[i]; totalpages += zones_size[i];
pgdat->node_size = totalpages; pgdat->node_spanned_pages = totalpages;
realtotalpages = totalpages; realtotalpages = totalpages;
if (zholes_size) if (zholes_size)
for (i = 0; i < MAX_NR_ZONES; i++) for (i = 0; i < MAX_NR_ZONES; i++)
realtotalpages -= zholes_size[i]; realtotalpages -= zholes_size[i];
pgdat->node_present_pages = realtotalpages;
printk("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); printk("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
} }
...@@ -1349,7 +1350,7 @@ void __init free_area_init_node(int nid, struct pglist_data *pgdat, ...@@ -1349,7 +1350,7 @@ void __init free_area_init_node(int nid, struct pglist_data *pgdat,
pgdat->node_start_pfn = node_start_pfn; pgdat->node_start_pfn = node_start_pfn;
calculate_zone_totalpages(pgdat, zones_size, zholes_size); calculate_zone_totalpages(pgdat, zones_size, zholes_size);
if (!node_mem_map) { if (!node_mem_map) {
size = (pgdat->node_size + 1) * sizeof(struct page); size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
node_mem_map = alloc_bootmem_node(pgdat, size); node_mem_map = alloc_bootmem_node(pgdat, size);
} }
pgdat->node_mem_map = node_mem_map; pgdat->node_mem_map = node_mem_map;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment