Commit c9e37353 authored by Heiko Carstens's avatar Heiko Carstens Committed by Linus Torvalds

[PATCH] s390: fix memory holes and cleanup setup_arch

The memory setup didn't take care of memory holes and this makes the memory
management think there would be more memory available than there is in
reality.  That causes the OOM killer to kill processes even if there is enough
memory left that can be written to the swap space.

The patch fixes this by using free_area_init_node with an array of memory
holes instead of free_area_init.  Further the patch cleans up the code in
setup.c by splitting setup_arch into smaller pieces.
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4b7e0706
...@@ -60,6 +60,8 @@ struct { ...@@ -60,6 +60,8 @@ struct {
#define CHUNK_READ_WRITE 0 #define CHUNK_READ_WRITE 0
#define CHUNK_READ_ONLY 1 #define CHUNK_READ_ONLY 1
volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
unsigned long __initdata zholes_size[MAX_NR_ZONES];
static unsigned long __initdata memory_end;
/* /*
* Setup options * Setup options
...@@ -78,11 +80,15 @@ static char command_line[COMMAND_LINE_SIZE] = { 0, }; ...@@ -78,11 +80,15 @@ static char command_line[COMMAND_LINE_SIZE] = { 0, };
static struct resource code_resource = { static struct resource code_resource = {
.name = "Kernel code", .name = "Kernel code",
.start = (unsigned long) &_text,
.end = (unsigned long) &_etext - 1,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM, .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
}; };
static struct resource data_resource = { static struct resource data_resource = {
.name = "Kernel data", .name = "Kernel data",
.start = (unsigned long) &_etext,
.end = (unsigned long) &_edata - 1,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM, .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
}; };
...@@ -310,90 +316,50 @@ void machine_power_off(void) ...@@ -310,90 +316,50 @@ void machine_power_off(void)
EXPORT_SYMBOL(machine_power_off); EXPORT_SYMBOL(machine_power_off);
/* static void __init
* Setup function called from init/main.c just after the banner add_memory_hole(unsigned long start, unsigned long end)
* was printed.
*/
extern char _pstart, _pend, _stext;
void __init setup_arch(char **cmdline_p)
{ {
unsigned long bootmap_size; unsigned long dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
unsigned long memory_start, memory_end;
char c = ' ', cn, *to = command_line, *from = COMMAND_LINE; if (end <= dma_pfn)
unsigned long start_pfn, end_pfn; zholes_size[ZONE_DMA] += end - start + 1;
static unsigned int smptrap=0; else if (start > dma_pfn)
unsigned long delay = 0; zholes_size[ZONE_NORMAL] += end - start + 1;
struct _lowcore *lc; else {
int i; zholes_size[ZONE_DMA] += dma_pfn - start + 1;
zholes_size[ZONE_NORMAL] += end - dma_pfn;
}
}
if (smptrap) static void __init
return; parse_cmdline_early(char **cmdline_p)
smptrap=1; {
char c = ' ', cn, *to = command_line, *from = COMMAND_LINE;
unsigned long delay = 0;
/* /* Save unparsed command line copy for /proc/cmdline */
* print what head.S has found out about the machine memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
*/ saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
#ifndef CONFIG_ARCH_S390X
printk((MACHINE_IS_VM) ?
"We are running under VM (31 bit mode)\n" :
"We are running native (31 bit mode)\n");
printk((MACHINE_HAS_IEEE) ?
"This machine has an IEEE fpu\n" :
"This machine has no IEEE fpu\n");
#else /* CONFIG_ARCH_S390X */
printk((MACHINE_IS_VM) ?
"We are running under VM (64 bit mode)\n" :
"We are running native (64 bit mode)\n");
#endif /* CONFIG_ARCH_S390X */
ROOT_DEV = Root_RAM0; for (;;) {
memory_start = (unsigned long) &_end; /* fixit if use $CODELO etc*/ /*
#ifndef CONFIG_ARCH_S390X * "mem=XXX[kKmM]" sets memsize
memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */ */
/* if (c == ' ' && strncmp(from, "mem=", 4) == 0) {
* We need some free virtual space to be able to do vmalloc. memory_end = simple_strtoul(from+4, &from, 0);
* On a machine with 2GB memory we make sure that we have at if ( *from == 'K' || *from == 'k' ) {
* least 128 MB free space for vmalloc. memory_end = memory_end << 10;
*/ from++;
if (memory_end > 1920*1024*1024) } else if ( *from == 'M' || *from == 'm' ) {
memory_end = 1920*1024*1024; memory_end = memory_end << 20;
#else /* CONFIG_ARCH_S390X */ from++;
memory_end = memory_size & ~0x200000UL; /* detected in head.s */ }
#endif /* CONFIG_ARCH_S390X */ }
init_mm.start_code = PAGE_OFFSET; /*
init_mm.end_code = (unsigned long) &_etext; * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes
init_mm.end_data = (unsigned long) &_edata; */
init_mm.brk = (unsigned long) &_end; if (c == ' ' && strncmp(from, "ipldelay=", 9) == 0) {
delay = simple_strtoul(from+9, &from, 0);
code_resource.start = (unsigned long) &_text;
code_resource.end = (unsigned long) &_etext - 1;
data_resource.start = (unsigned long) &_etext;
data_resource.end = (unsigned long) &_edata - 1;
/* Save unparsed command line copy for /proc/cmdline */
memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
for (;;) {
/*
* "mem=XXX[kKmM]" sets memsize
*/
if (c == ' ' && strncmp(from, "mem=", 4) == 0) {
memory_end = simple_strtoul(from+4, &from, 0);
if ( *from == 'K' || *from == 'k' ) {
memory_end = memory_end << 10;
from++;
} else if ( *from == 'M' || *from == 'm' ) {
memory_end = memory_end << 20;
from++;
}
}
/*
* "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes
*/
if (c == ' ' && strncmp(from, "ipldelay=", 9) == 0) {
delay = simple_strtoul(from+9, &from, 0);
if (*from == 's' || *from == 'S') { if (*from == 's' || *from == 'S') {
delay = delay*1000000; delay = delay*1000000;
from++; from++;
...@@ -403,24 +369,110 @@ void __init setup_arch(char **cmdline_p) ...@@ -403,24 +369,110 @@ void __init setup_arch(char **cmdline_p)
} }
/* now wait for the requested amount of time */ /* now wait for the requested amount of time */
udelay(delay); udelay(delay);
} }
cn = *(from++); cn = *(from++);
if (!cn) if (!cn)
break; break;
if (cn == '\n') if (cn == '\n')
cn = ' '; /* replace newlines with space */ cn = ' '; /* replace newlines with space */
if (cn == 0x0d) if (cn == 0x0d)
cn = ' '; /* replace 0x0d with space */ cn = ' '; /* replace 0x0d with space */
if (cn == ' ' && c == ' ') if (cn == ' ' && c == ' ')
continue; /* remove additional spaces */ continue; /* remove additional spaces */
c = cn; c = cn;
if (to - command_line >= COMMAND_LINE_SIZE) if (to - command_line >= COMMAND_LINE_SIZE)
break; break;
*(to++) = c; *(to++) = c;
} }
if (c == ' ' && to > command_line) to--; if (c == ' ' && to > command_line) to--;
*to = '\0'; *to = '\0';
*cmdline_p = command_line; *cmdline_p = command_line;
}
static void __init
setup_lowcore(void)
{
struct _lowcore *lc;
int lc_pages;
/*
* Setup lowcore for boot cpu
*/
lc_pages = sizeof(void *) == 8 ? 2 : 1;
lc = (struct _lowcore *)
__alloc_bootmem(lc_pages * PAGE_SIZE, lc_pages * PAGE_SIZE, 0);
memset(lc, 0, lc_pages * PAGE_SIZE);
lc->restart_psw.mask = PSW_BASE_BITS;
lc->restart_psw.addr =
PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
lc->external_new_psw.mask = PSW_KERNEL_BITS;
lc->external_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
lc->program_new_psw.mask = PSW_KERNEL_BITS;
lc->program_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long)pgm_check_handler;
lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
lc->io_new_psw.mask = PSW_KERNEL_BITS;
lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
lc->ipl_device = S390_lowcore.ipl_device;
lc->jiffy_timer = -1LL;
lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
lc->async_stack = (unsigned long)
__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
#ifdef CONFIG_CHECK_STACK
lc->panic_stack = (unsigned long)
__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
#endif
lc->current_task = (unsigned long) init_thread_union.thread_info.task;
lc->thread_info = (unsigned long) &init_thread_union;
#ifdef CONFIG_ARCH_S390X
if (MACHINE_HAS_DIAG44)
lc->diag44_opcode = 0x83000044;
else
lc->diag44_opcode = 0x07000700;
#endif /* CONFIG_ARCH_S390X */
set_prefix((u32)(unsigned long) lc);
}
static void __init
setup_resources(void)
{
struct resource *res;
int i;
for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
res = alloc_bootmem_low(sizeof(struct resource));
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
switch (memory_chunk[i].type) {
case CHUNK_READ_WRITE:
res->name = "System RAM";
break;
case CHUNK_READ_ONLY:
res->name = "System ROM";
res->flags |= IORESOURCE_READONLY;
break;
default:
res->name = "reserved";
}
res->start = memory_chunk[i].addr;
res->end = memory_chunk[i].addr + memory_chunk[i].size - 1;
request_resource(&iomem_resource, res);
request_resource(res, &code_resource);
request_resource(res, &data_resource);
}
}
static void __init
setup_memory(void)
{
unsigned long bootmap_size;
unsigned long start_pfn, end_pfn;
unsigned long last_rw_end;
int i;
/* /*
* partially used pages are not usable - thus * partially used pages are not usable - thus
...@@ -437,6 +489,8 @@ void __init setup_arch(char **cmdline_p) ...@@ -437,6 +489,8 @@ void __init setup_arch(char **cmdline_p)
/* /*
* Register RAM areas with the bootmem allocator. * Register RAM areas with the bootmem allocator.
*/ */
last_rw_end = start_pfn;
for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) { for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) {
unsigned long start_chunk, end_chunk; unsigned long start_chunk, end_chunk;
...@@ -450,102 +504,91 @@ void __init setup_arch(char **cmdline_p) ...@@ -450,102 +504,91 @@ void __init setup_arch(char **cmdline_p)
start_chunk = start_pfn; start_chunk = start_pfn;
if (end_chunk > end_pfn) if (end_chunk > end_pfn)
end_chunk = end_pfn; end_chunk = end_pfn;
if (start_chunk < end_chunk) if (start_chunk < end_chunk) {
free_bootmem(start_chunk << PAGE_SHIFT, free_bootmem(start_chunk << PAGE_SHIFT,
(end_chunk - start_chunk) << PAGE_SHIFT); (end_chunk - start_chunk) << PAGE_SHIFT);
if (last_rw_end < start_chunk)
add_memory_hole(last_rw_end, start_chunk - 1);
last_rw_end = end_chunk;
}
} }
/* if (last_rw_end < end_pfn - 1)
* Reserve the bootmem bitmap itself as well. We do this in two add_memory_hole(last_rw_end, end_pfn - 1);
* steps (first step was init_bootmem()) because this catches
* the (very unlikely) case of us accidentally initializing the /*
* bootmem allocator with an invalid RAM area. * Reserve the bootmem bitmap itself as well. We do this in two
*/ * steps (first step was init_bootmem()) because this catches
reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size); * the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size);
#ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_BLK_DEV_INITRD
if (INITRD_START) { if (INITRD_START) {
if (INITRD_START + INITRD_SIZE <= memory_end) { if (INITRD_START + INITRD_SIZE <= memory_end) {
reserve_bootmem(INITRD_START, INITRD_SIZE); reserve_bootmem(INITRD_START, INITRD_SIZE);
initrd_start = INITRD_START; initrd_start = INITRD_START;
initrd_end = initrd_start + INITRD_SIZE; initrd_end = initrd_start + INITRD_SIZE;
} else { } else {
printk("initrd extends beyond end of memory " printk("initrd extends beyond end of memory "
"(0x%08lx > 0x%08lx)\ndisabling initrd\n", "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
initrd_start + INITRD_SIZE, memory_end); initrd_start + INITRD_SIZE, memory_end);
initrd_start = initrd_end = 0; initrd_start = initrd_end = 0;
} }
} }
#endif #endif
}
for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) { /*
struct resource *res; * Setup function called from init/main.c just after the banner
* was printed.
res = alloc_bootmem_low(sizeof(struct resource)); */
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
switch (memory_chunk[i].type) {
case CHUNK_READ_WRITE:
res->name = "System RAM";
break;
case CHUNK_READ_ONLY:
res->name = "System ROM";
res->flags |= IORESOURCE_READONLY;
break;
default:
res->name = "reserved";
}
res->start = memory_chunk[i].addr;
res->end = memory_chunk[i].addr + memory_chunk[i].size - 1;
request_resource(&iomem_resource, res);
request_resource(res, &code_resource);
request_resource(res, &data_resource);
}
void __init
setup_arch(char **cmdline_p)
{
/* /*
* Setup lowcore for boot cpu * print what head.S has found out about the machine
*/ */
#ifndef CONFIG_ARCH_S390X #ifndef CONFIG_ARCH_S390X
lc = (struct _lowcore *) __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0); printk((MACHINE_IS_VM) ?
memset(lc, 0, PAGE_SIZE); "We are running under VM (31 bit mode)\n" :
"We are running native (31 bit mode)\n");
printk((MACHINE_HAS_IEEE) ?
"This machine has an IEEE fpu\n" :
"This machine has no IEEE fpu\n");
#else /* CONFIG_ARCH_S390X */ #else /* CONFIG_ARCH_S390X */
lc = (struct _lowcore *) __alloc_bootmem(2*PAGE_SIZE, 2*PAGE_SIZE, 0); printk((MACHINE_IS_VM) ?
memset(lc, 0, 2*PAGE_SIZE); "We are running under VM (64 bit mode)\n" :
"We are running native (64 bit mode)\n");
#endif /* CONFIG_ARCH_S390X */ #endif /* CONFIG_ARCH_S390X */
lc->restart_psw.mask = PSW_BASE_BITS;
lc->restart_psw.addr = ROOT_DEV = Root_RAM0;
PSW_ADDR_AMODE | (unsigned long) restart_int_handler; #ifndef CONFIG_ARCH_S390X
lc->external_new_psw.mask = PSW_KERNEL_BITS; memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */
lc->external_new_psw.addr = /*
PSW_ADDR_AMODE | (unsigned long) ext_int_handler; * We need some free virtual space to be able to do vmalloc.
lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT; * On a machine with 2GB memory we make sure that we have at
lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; * least 128 MB free space for vmalloc.
lc->program_new_psw.mask = PSW_KERNEL_BITS; */
lc->program_new_psw.addr = if (memory_end > 1920*1024*1024)
PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; memory_end = 1920*1024*1024;
lc->mcck_new_psw.mask = PSW_KERNEL_BITS; #else /* CONFIG_ARCH_S390X */
lc->mcck_new_psw.addr = memory_end = memory_size & ~0x200000UL; /* detected in head.s */
PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
lc->io_new_psw.mask = PSW_KERNEL_BITS;
lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
lc->ipl_device = S390_lowcore.ipl_device;
lc->jiffy_timer = -1LL;
lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
lc->async_stack = (unsigned long)
__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
#ifdef CONFIG_CHECK_STACK
lc->panic_stack = (unsigned long)
__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
#endif
lc->current_task = (unsigned long) init_thread_union.thread_info.task;
lc->thread_info = (unsigned long) &init_thread_union;
#ifdef CONFIG_ARCH_S390X
if (MACHINE_HAS_DIAG44)
lc->diag44_opcode = 0x83000044;
else
lc->diag44_opcode = 0x07000700;
#endif /* CONFIG_ARCH_S390X */ #endif /* CONFIG_ARCH_S390X */
set_prefix((u32)(unsigned long) lc);
init_mm.start_code = PAGE_OFFSET;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
init_mm.brk = (unsigned long) &_end;
parse_cmdline_early(cmdline_p);
setup_memory();
setup_resources();
setup_lowcore();
cpu_init(); cpu_init();
__cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
......
...@@ -101,6 +101,7 @@ extern unsigned long _end; ...@@ -101,6 +101,7 @@ extern unsigned long _end;
extern unsigned long __init_begin; extern unsigned long __init_begin;
extern unsigned long __init_end; extern unsigned long __init_end;
extern unsigned long __initdata zholes_size[];
/* /*
* paging_init() sets up the page tables * paging_init() sets up the page tables
*/ */
...@@ -163,10 +164,13 @@ void __init paging_init(void) ...@@ -163,10 +164,13 @@ void __init paging_init(void)
local_flush_tlb(); local_flush_tlb();
{ {
unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0}; unsigned long zones_size[MAX_NR_ZONES];
memset(zones_size, 0, sizeof(zones_size));
zones_size[ZONE_DMA] = max_low_pfn; zones_size[ZONE_DMA] = max_low_pfn;
free_area_init(zones_size); free_area_init_node(0, &contig_page_data, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT,
zholes_size);
} }
return; return;
} }
...@@ -184,9 +188,10 @@ void __init paging_init(void) ...@@ -184,9 +188,10 @@ void __init paging_init(void)
_KERN_REGION_TABLE; _KERN_REGION_TABLE;
static const int ssm_mask = 0x04000000L; static const int ssm_mask = 0x04000000L;
unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned long zones_size[MAX_NR_ZONES];
unsigned long dma_pfn, high_pfn; unsigned long dma_pfn, high_pfn;
memset(zones_size, 0, sizeof(zones_size));
dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
high_pfn = max_low_pfn; high_pfn = max_low_pfn;
...@@ -198,8 +203,8 @@ void __init paging_init(void) ...@@ -198,8 +203,8 @@ void __init paging_init(void)
} }
/* Initialize mem_map[]. */ /* Initialize mem_map[]. */
free_area_init(zones_size); free_area_init_node(0, &contig_page_data, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
/* /*
* map whole physical memory to virtual memory (identity mapping) * map whole physical memory to virtual memory (identity mapping)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment