[PATCH] ppc64: physical RAM accounting fix

On machines with an IO hole (like Apple G5 with more than 2Gb of RAM,
but also some pSeries) we failed to properly account for the real
amount of physical RAM and inform the zone allocator of our hole size.

During the process, I included Anton slaughtering of the guard page we
had in the first 256Mb kernel segment, thus allowing this segment to be
mapped with large pages as it should be.
parent 9f9f56a2
...@@ -269,11 +269,13 @@ lmb_phys_mem_size(void) ...@@ -269,11 +269,13 @@ lmb_phys_mem_size(void)
return _lmb->memory.size; return _lmb->memory.size;
#else #else
struct lmb_region *_mem = &(_lmb->memory); struct lmb_region *_mem = &(_lmb->memory);
unsigned long idx = _mem->cnt-1; unsigned long total = 0;
unsigned long lastbase = _mem->region[idx].physbase; int i;
unsigned long lastsize = _mem->region[idx].size;
/* add all physical memory to the bootmem map */
return (lastbase + lastsize); for (i=0; i < _mem->cnt; i++)
total += _mem->region[i].size;
return total;
#endif /* CONFIG_MSCHUNKS */ #endif /* CONFIG_MSCHUNKS */
} }
...@@ -283,15 +285,13 @@ lmb_end_of_DRAM(void) ...@@ -283,15 +285,13 @@ lmb_end_of_DRAM(void)
unsigned long offset = reloc_offset(); unsigned long offset = reloc_offset();
struct lmb *_lmb = PTRRELOC(&lmb); struct lmb *_lmb = PTRRELOC(&lmb);
struct lmb_region *_mem = &(_lmb->memory); struct lmb_region *_mem = &(_lmb->memory);
unsigned long idx; int idx = _mem->cnt - 1;
for(idx=_mem->cnt-1; idx >= 0; idx--) {
#ifdef CONFIG_MSCHUNKS #ifdef CONFIG_MSCHUNKS
return (_mem->region[idx].physbase + _mem->region[idx].size); return (_mem->region[idx].physbase + _mem->region[idx].size);
#else #else
return (_mem->region[idx].base + _mem->region[idx].size); return (_mem->region[idx].base + _mem->region[idx].size);
#endif /* CONFIG_MSCHUNKS */ #endif /* CONFIG_MSCHUNKS */
}
return 0; return 0;
} }
......
...@@ -32,10 +32,14 @@ static void make_slbe(unsigned long esid, unsigned long vsid, int large, ...@@ -32,10 +32,14 @@ static void make_slbe(unsigned long esid, unsigned long vsid, int large,
void stab_initialize(unsigned long stab) void stab_initialize(unsigned long stab)
{ {
unsigned long esid, vsid; unsigned long esid, vsid;
int seg0_largepages = 0;
esid = GET_ESID(KERNELBASE); esid = GET_ESID(KERNELBASE);
vsid = get_kernel_vsid(esid << SID_SHIFT); vsid = get_kernel_vsid(esid << SID_SHIFT);
if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE)
seg0_largepages = 1;
if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) {
/* Invalidate the entire SLB & all the ERATS */ /* Invalidate the entire SLB & all the ERATS */
#ifdef CONFIG_PPC_ISERIES #ifdef CONFIG_PPC_ISERIES
...@@ -44,7 +48,7 @@ void stab_initialize(unsigned long stab) ...@@ -44,7 +48,7 @@ void stab_initialize(unsigned long stab)
asm volatile("isync":::"memory"); asm volatile("isync":::"memory");
asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory"); asm volatile("isync; slbia; isync":::"memory");
make_slbe(esid, vsid, 0, 1); make_slbe(esid, vsid, seg0_largepages, 1);
asm volatile("isync":::"memory"); asm volatile("isync":::"memory");
#endif #endif
} else { } else {
......
...@@ -123,6 +123,7 @@ void __init htab_initialize(void) ...@@ -123,6 +123,7 @@ void __init htab_initialize(void)
unsigned long table, htab_size_bytes; unsigned long table, htab_size_bytes;
unsigned long pteg_count; unsigned long pteg_count;
unsigned long mode_rw; unsigned long mode_rw;
int i, use_largepages = 0;
/* /*
* Calculate the required size of the htab. We want the number of * Calculate the required size of the htab. We want the number of
...@@ -165,18 +166,21 @@ void __init htab_initialize(void) ...@@ -165,18 +166,21 @@ void __init htab_initialize(void)
mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
/* XXX we currently map kernel text rw, should fix this */ /* On U3 based machines, we need to reserve the DART area and
if ((cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) * _NOT_ map it to avoid cache paradoxes as it's remapped non
&& systemcfg->physicalMemorySize > 256*MB) { * cacheable later on
create_pte_mapping((unsigned long)KERNELBASE, */
KERNELBASE + 256*MB, mode_rw, 0); if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE)
create_pte_mapping((unsigned long)KERNELBASE + 256*MB, use_largepages = 1;
KERNELBASE + (systemcfg->physicalMemorySize),
mode_rw, 1); /* add all physical memory to the bootmem map */
} else { for (i=0; i < lmb.memory.cnt; i++) {
create_pte_mapping((unsigned long)KERNELBASE, unsigned long base, size;
KERNELBASE+(systemcfg->physicalMemorySize),
mode_rw, 0); base = lmb.memory.region[i].physbase + KERNELBASE;
size = lmb.memory.region[i].size;
create_pte_mapping(base, base + size, mode_rw, use_largepages);
} }
} }
#undef KB #undef KB
......
...@@ -91,6 +91,11 @@ unsigned long _ASR=0; ...@@ -91,6 +91,11 @@ unsigned long _ASR=0;
/* max amount of RAM to use */ /* max amount of RAM to use */
unsigned long __max_memory; unsigned long __max_memory;
/* info on what we think the IO hole is */
unsigned long io_hole_start;
unsigned long io_hole_size;
unsigned long top_of_ram;
/* This is declared as we are using the more or less generic /* This is declared as we are using the more or less generic
* include/asm-ppc64/tlb.h file -- tgall * include/asm-ppc64/tlb.h file -- tgall
*/ */
...@@ -647,8 +652,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) ...@@ -647,8 +652,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
*/ */
void __init mm_init_ppc64(void) void __init mm_init_ppc64(void)
{ {
struct paca_struct *lpaca; unsigned long i;
unsigned long guard_page, index;
ppc64_boot_msg(0x100, "MM Init"); ppc64_boot_msg(0x100, "MM Init");
...@@ -660,20 +664,63 @@ void __init mm_init_ppc64(void) ...@@ -660,20 +664,63 @@ void __init mm_init_ppc64(void)
mmu_context_queue.head = 0; mmu_context_queue.head = 0;
mmu_context_queue.tail = NUM_USER_CONTEXT-1; mmu_context_queue.tail = NUM_USER_CONTEXT-1;
mmu_context_queue.size = NUM_USER_CONTEXT; mmu_context_queue.size = NUM_USER_CONTEXT;
for(index=0; index < NUM_USER_CONTEXT ;index++) { for (i = 0; i < NUM_USER_CONTEXT; i++)
mmu_context_queue.elements[index] = index+FIRST_USER_CONTEXT; mmu_context_queue.elements[i] = i + FIRST_USER_CONTEXT;
}
/* This is the story of the IO hole... please, keep seated,
/* Setup guard pages for the Paca's */ * unfortunately, we are out of oxygen masks at the moment.
for (index = 0; index < NR_CPUS; index++) { * So we need some rough way to tell where your big IO hole
lpaca = &paca[index]; * is. On pmac, it's between 2G and 4G, on POWER3, it's around
guard_page = ((unsigned long)lpaca) + 0x1000; * that area as well, on POWER4 we don't have one, etc...
ppc_md.hpte_updateboltedpp(PP_RXRX, guard_page); * We need that to implement something approx. decent for
* page_is_ram() so that /dev/mem doesn't map cacheable IO space
* when XFree resquest some IO regions witout using O_SYNC, we
* also need that as a "hint" when sizing the TCE table on POWER3
* So far, the simplest way that seem work well enough for us it
* to just assume that the first discontinuity in our physical
* RAM layout is the IO hole. That may not be correct in the future
* (and isn't on iSeries but then we don't care ;)
*/
top_of_ram = lmb_end_of_DRAM();
#ifndef CONFIG_PPC_ISERIES
for (i = 1; i < lmb.memory.cnt; i++) {
unsigned long base, prevbase, prevsize;
prevbase = lmb.memory.region[i-1].physbase;
prevsize = lmb.memory.region[i-1].size;
base = lmb.memory.region[i].physbase;
if (base > (prevbase + prevsize)) {
io_hole_start = prevbase + prevsize;
io_hole_size = base - (prevbase + prevsize);
break;
}
} }
#endif /* CONFIG_PPC_ISERIES */
if (io_hole_start)
printk("IO Hole assumed to be %lx -> %lx\n",
io_hole_start, io_hole_start + io_hole_size - 1);
ppc64_boot_msg(0x100, "MM Init Done"); ppc64_boot_msg(0x100, "MM Init Done");
} }
/*
* This is called by /dev/mem to know if a given address has to
* be mapped non-cacheable or not
*/
int page_is_ram(unsigned long physaddr)
{
#ifdef CONFIG_PPC_ISERIES
return 1;
#endif
if (physaddr >= top_of_ram)
return 0;
return io_hole_start == 0 || physaddr < io_hole_start ||
physaddr >= (io_hole_start + io_hole_size);
}
/* /*
* Initialize the bootmem system and give it all the memory we * Initialize the bootmem system and give it all the memory we
* have available. * have available.
...@@ -698,7 +745,7 @@ void __init do_init_bootmem(void) ...@@ -698,7 +745,7 @@ void __init do_init_bootmem(void)
boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
/* add all physical memory to the bootmem map */ /* add all physical memory to the bootmem map. Also find the first */
for (i=0; i < lmb.memory.cnt; i++) { for (i=0; i < lmb.memory.cnt; i++) {
unsigned long physbase, size; unsigned long physbase, size;
...@@ -721,17 +768,28 @@ void __init do_init_bootmem(void) ...@@ -721,17 +768,28 @@ void __init do_init_bootmem(void)
*/ */
void __init paging_init(void) void __init paging_init(void)
{ {
unsigned long zones_size[MAX_NR_ZONES], i; unsigned long zones_size[MAX_NR_ZONES];
unsigned long zholes_size[MAX_NR_ZONES];
unsigned long total_ram = lmb_phys_mem_size();
printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
top_of_ram, total_ram);
printk(KERN_INFO "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
/* /*
* All pages are DMA-able so we put them all in the DMA zone. * All pages are DMA-able so we put them all in the DMA zone.
*/ */
zones_size[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; memset(zones_size, 0, sizeof(zones_size));
for (i = 1; i < MAX_NR_ZONES; i++) memset(zholes_size, 0, sizeof(zholes_size));
zones_size[i] = 0;
free_area_init(zones_size); zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
free_area_init_node(0, &contig_page_data, NULL, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
mem_map = contig_page_data.node_mem_map;
} }
#endif #endif /* CONFIG_DISCONTIGMEM */
static struct kcore_list kcore_vmem; static struct kcore_list kcore_vmem;
......
...@@ -30,6 +30,7 @@ int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0}; ...@@ -30,6 +30,7 @@ int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0};
struct pglist_data node_data[MAX_NUMNODES]; struct pglist_data node_data[MAX_NUMNODES];
bootmem_data_t plat_node_bdata[MAX_NUMNODES]; bootmem_data_t plat_node_bdata[MAX_NUMNODES];
static unsigned long node0_io_hole_size;
EXPORT_SYMBOL(node_data); EXPORT_SYMBOL(node_data);
EXPORT_SYMBOL(numa_memory_lookup_table); EXPORT_SYMBOL(numa_memory_lookup_table);
...@@ -205,8 +206,15 @@ static int __init parse_numa_properties(void) ...@@ -205,8 +206,15 @@ static int __init parse_numa_properties(void)
static void __init setup_nonnuma(void) static void __init setup_nonnuma(void)
{ {
unsigned long top_of_ram = lmb_end_of_DRAM();
unsigned long total_ram = lmb_phys_mem_size();
unsigned long i; unsigned long i;
printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
top_of_ram, total_ram);
printk(KERN_INFO "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
for (i = 0; i < NR_CPUS; i++) for (i = 0; i < NR_CPUS; i++)
map_cpu_to_node(i, 0); map_cpu_to_node(i, 0);
...@@ -215,8 +223,10 @@ static void __init setup_nonnuma(void) ...@@ -215,8 +223,10 @@ static void __init setup_nonnuma(void)
node_data[0].node_start_pfn = 0; node_data[0].node_start_pfn = 0;
node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE; node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE;
for (i = 0 ; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
node0_io_hole_size = top_of_ram - total_ram;
} }
void __init do_init_bootmem(void) void __init do_init_bootmem(void)
...@@ -309,11 +319,12 @@ void __init do_init_bootmem(void) ...@@ -309,11 +319,12 @@ void __init do_init_bootmem(void)
void __init paging_init(void) void __init paging_init(void)
{ {
unsigned long zones_size[MAX_NR_ZONES]; unsigned long zones_size[MAX_NR_ZONES];
int i, nid; unsigned long zholes_size[MAX_NR_ZONES];
struct page *node_mem_map; struct page *node_mem_map;
int nid;
for (i = 1; i < MAX_NR_ZONES; i++) memset(zones_size, 0, sizeof(zones_size));
zones_size[i] = 0; memset(zholes_size, 0, sizeof(zholes_size));
for (nid = 0; nid < numnodes; nid++) { for (nid = 0; nid < numnodes; nid++) {
unsigned long start_pfn; unsigned long start_pfn;
...@@ -323,8 +334,12 @@ void __init paging_init(void) ...@@ -323,8 +334,12 @@ void __init paging_init(void)
end_pfn = plat_node_bdata[nid].node_low_pfn; end_pfn = plat_node_bdata[nid].node_low_pfn;
zones_size[ZONE_DMA] = end_pfn - start_pfn; zones_size[ZONE_DMA] = end_pfn - start_pfn;
dbg("free_area_init node %d %lx %lx\n", nid, zholes_size[ZONE_DMA] = 0;
zones_size[ZONE_DMA], start_pfn); if (nid == 0)
zholes_size[ZONE_DMA] = node0_io_hole_size;
dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
/* /*
* Give this empty node a dummy struct page to avoid * Give this empty node a dummy struct page to avoid
...@@ -337,6 +352,6 @@ void __init paging_init(void) ...@@ -337,6 +352,6 @@ void __init paging_init(void)
node_mem_map = NULL; node_mem_map = NULL;
free_area_init_node(nid, NODE_DATA(nid), node_mem_map, free_area_init_node(nid, NODE_DATA(nid), node_mem_map,
zones_size, start_pfn, NULL); zones_size, start_pfn, zholes_size);
} }
} }
...@@ -163,6 +163,9 @@ static inline int get_order(unsigned long size) ...@@ -163,6 +163,9 @@ static inline int get_order(unsigned long size)
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
/* Not 100% correct, for use by /dev/mem only */
extern int page_is_ram(unsigned long physaddr);
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#ifdef MODULE #ifdef MODULE
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment