Commit fb06dbb1 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 - new memory map handling

New e820 memory map handling for x86-64.  Move it all to a new file and
clean it up a lot.  Add some simple allocator functions to deal with
holey e820 mappings cleanly A lot of this is preparation for NUMA (which
works in 2.4, but is not ported to 2.5 yet)
parent dbb2ae18
/*
* Handle the memory map.
* The functions here do the job until bootmem takes over.
* $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
* AK: some of these functions are not used in 2.5 yet but they will be when
* NUMA is completely merged.
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/string.h>
#include <asm/page.h>
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/bootsetup.h>
extern unsigned long table_start, table_end;
extern char _end[];
extern struct resource code_resource, data_resource, vram_resource;
/* Check for some hardcoded bad areas that early boot is not allowed to touch */
static inline int bad_addr(unsigned long *addrp, unsigned long size)
{
unsigned long addr = *addrp, last = addr + size;
/* various gunk below that needed for SMP startup */
if (addr < 7*PAGE_SIZE) {
*addrp = 7*PAGE_SIZE;
return 1;
}
#if 0
/* direct mapping tables of the kernel */
if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
*addrp = table_end << PAGE_SHIFT;
return 1;
}
#endif
/* initrd */
#ifdef CONFIG_BLK_DEV_INITRD
if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
addr < INITRD_START+INITRD_SIZE) {
*addrp = INITRD_START + INITRD_SIZE;
return 1;
}
#endif
/* kernel code + 640k memory hole (later should not be needed, but
be paranoid for now) */
if (last >= 640*1024 && addr < __pa_symbol(&_end)) {
*addrp = __pa_symbol(&_end);
return 1;
}
/* XXX ramdisk image here? */
return 0;
}
int __init e820_mapped(unsigned long start, unsigned long end, int type)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
if (type && ei->type != type)
continue;
if (ei->addr >= end || ei->addr + ei->size < start)
continue;
return 1;
}
return 0;
}
/*
* Find a free area in a specific range.
*/
unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
unsigned long addr = ei->addr, last;
if (ei->type != E820_RAM)
continue;
if (addr < start)
addr = start;
if (addr > ei->addr + ei->size)
continue;
while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size)
;
last = addr + size;
if (last > ei->addr + ei->size)
continue;
if (last > end)
continue;
return addr;
}
return -1UL;
}
/*
* Free bootmem based on the e820 table for a node.
*/
void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
unsigned long last, addr;
if (ei->type != E820_RAM ||
ei->addr+ei->size <= start ||
ei->addr > end)
continue;
addr = round_up(ei->addr, PAGE_SIZE);
if (addr < start)
addr = start;
last = round_down(ei->addr + ei->size, PAGE_SIZE);
if (last >= end)
last = end;
if (last > addr && last-addr >= PAGE_SIZE)
free_bootmem_node(pgdat, addr, last-addr);
}
}
/*
* Find the highest page frame number we have available
*/
void __init e820_end_of_ram(void)
{
int i;
end_pfn = 0;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
unsigned long start, end;
/* count all types of areas for now to map ACPI easily */
start = round_up(ei->addr, PAGE_SIZE);
end = round_down(ei->addr + ei->size, PAGE_SIZE);
if (start >= end)
continue;
if (end > end_pfn<<PAGE_SHIFT)
end_pfn = end>>PAGE_SHIFT;
}
if (end_pfn > MAXMEM >> PAGE_SHIFT)
end_pfn = MAXMEM >> PAGE_SHIFT;
}
/*
* Mark e820 reserved areas as busy for the resource manager.
*/
void __init e820_reserve_resources(void)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
struct resource *res;
if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
continue;
res = alloc_bootmem_low(sizeof(struct resource));
switch (e820.map[i].type) {
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
default: res->name = "reserved";
}
res->start = e820.map[i].addr;
res->end = res->start + e820.map[i].size - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
if (e820.map[i].type == E820_RAM) {
/*
* We dont't know which RAM region contains kernel data,
* so we try it repeatedly and let the resource manager
* test it.
*/
request_resource(res, &code_resource);
request_resource(res, &data_resource);
}
}
}
/*
* Add a memory region to the kernel e820 map.
*/
void __init add_memory_region(unsigned long start, unsigned long size, int type)
{
int x = e820.nr_map;
if (x == E820MAX) {
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
return;
}
e820.map[x].addr = start;
e820.map[x].size = size;
e820.map[x].type = type;
e820.nr_map++;
}
void __init e820_print_map(char *who)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
printk(" %s: %016Lx - %016Lx ", who,
(unsigned long long) e820.map[i].addr,
(unsigned long long) (e820.map[i].addr + e820.map[i].size));
switch (e820.map[i].type) {
case E820_RAM: printk("(usable)\n");
break;
case E820_RESERVED:
printk("(reserved)\n");
break;
case E820_ACPI:
printk("(ACPI data)\n");
break;
case E820_NVS:
printk("(ACPI NVS)\n");
break;
default: printk("type %u\n", e820.map[i].type);
break;
}
}
}
/*
* Sanitize the BIOS e820 map.
*
* Some e820 responses include overlapping entries. The following
* replaces the original e820 map with a new one, removing overlaps.
*
*/
static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
{
struct change_member {
struct e820entry *pbios; /* pointer to original bios entry */
unsigned long long addr; /* address for this change point */
};
static struct change_member change_point_list[2*E820MAX] __initdata;
static struct change_member *change_point[2*E820MAX] __initdata;
static struct e820entry *overlap_list[E820MAX] __initdata;
static struct e820entry new_bios[E820MAX] __initdata;
struct change_member *change_tmp;
unsigned long current_type, last_type;
unsigned long long last_addr;
int chgidx, still_changing;
int overlap_entries;
int new_bios_entry;
int old_nr, new_nr;
int i;
/*
Visually we're performing the following (1,2,3,4 = memory types)...
Sample memory map (w/overlaps):
____22__________________
______________________4_
____1111________________
_44_____________________
11111111________________
____________________33__
___________44___________
__________33333_________
______________22________
___________________2222_
_________111111111______
_____________________11_
_________________4______
Sanitized equivalent (no overlap):
1_______________________
_44_____________________
___1____________________
____22__________________
______11________________
_________1______________
__________3_____________
___________44___________
_____________33_________
_______________2________
________________1_______
_________________4______
___________________2____
____________________33__
______________________4_
*/
/* if there's only one memory region, don't bother */
if (*pnr_map < 2)
return -1;
old_nr = *pnr_map;
/* bail out if we find any unreasonable addresses in bios map */
for (i=0; i<old_nr; i++)
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
return -1;
/* create pointers for initial change-point information (for sorting) */
for (i=0; i < 2*old_nr; i++)
change_point[i] = &change_point_list[i];
/* record all known change-points (starting and ending addresses) */
chgidx = 0;
for (i=0; i < old_nr; i++) {
change_point[chgidx]->addr = biosmap[i].addr;
change_point[chgidx++]->pbios = &biosmap[i];
change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
change_point[chgidx++]->pbios = &biosmap[i];
}
/* sort change-point list by memory addresses (low -> high) */
still_changing = 1;
while (still_changing) {
still_changing = 0;
for (i=1; i < 2*old_nr; i++) {
/* if <current_addr> > <last_addr>, swap */
/* or, if current=<start_addr> & last=<end_addr>, swap */
if ((change_point[i]->addr < change_point[i-1]->addr) ||
((change_point[i]->addr == change_point[i-1]->addr) &&
(change_point[i]->addr == change_point[i]->pbios->addr) &&
(change_point[i-1]->addr != change_point[i-1]->pbios->addr))
)
{
change_tmp = change_point[i];
change_point[i] = change_point[i-1];
change_point[i-1] = change_tmp;
still_changing=1;
}
}
}
/* create a new bios memory map, removing overlaps */
overlap_entries=0; /* number of entries in the overlap table */
new_bios_entry=0; /* index for creating new bios map entries */
last_type = 0; /* start with undefined memory type */
last_addr = 0; /* start with 0 as last starting address */
/* loop through change-points, determining affect on the new bios map */
for (chgidx=0; chgidx < 2*old_nr; chgidx++)
{
/* keep track of all overlapping bios entries */
if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
{
/* add map entry to overlap list (> 1 entry implies an overlap) */
overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
}
else
{
/* remove entry from list (order independent, so swap with last) */
for (i=0; i<overlap_entries; i++)
{
if (overlap_list[i] == change_point[chgidx]->pbios)
overlap_list[i] = overlap_list[overlap_entries-1];
}
overlap_entries--;
}
/* if there are overlapping entries, decide which "type" to use */
/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
current_type = 0;
for (i=0; i<overlap_entries; i++)
if (overlap_list[i]->type > current_type)
current_type = overlap_list[i]->type;
/* continue building up new bios map based on this information */
if (current_type != last_type) {
if (last_type != 0) {
new_bios[new_bios_entry].size =
change_point[chgidx]->addr - last_addr;
/* move forward only if the new size was non-zero */
if (new_bios[new_bios_entry].size != 0)
if (++new_bios_entry >= E820MAX)
break; /* no more space left for new bios entries */
}
if (current_type != 0) {
new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
new_bios[new_bios_entry].type = current_type;
last_addr=change_point[chgidx]->addr;
}
last_type = current_type;
}
}
new_nr = new_bios_entry; /* retain count for new bios entries */
/* copy new bios mapping into original location */
memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
*pnr_map = new_nr;
return 0;
}
/*
* Copy the BIOS e820 map into a safe place.
*
* Sanity-check it while we're at it..
*
* If we're lucky and live on a modern system, the setup code
* will have given us a memory map that we can use to properly
* set up memory. If we aren't, we'll fake a memory map.
*
* We check to see that the memory map contains at least 2 elements
* before we'll use it, because the detection code in setup.S may
* not be perfect and most every PC known to man has two memory
* regions: one from 0 to 640k, and one from 1mb up. (The IBM
* thinkpad 560x, for example, does not cooperate with the memory
* detection code.)
*/
static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
{
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
return -1;
do {
unsigned long start = biosmap->addr;
unsigned long size = biosmap->size;
unsigned long end = start + size;
unsigned long type = biosmap->type;
/* Overflow in 64 bits? Ignore the memory map. */
if (start > end)
return -1;
/*
* Some BIOSes claim RAM in the 640k - 1M region.
* Not right. Fix it up.
*
* This should be removed on Hammer which is supposed to not
* have non e820 covered ISA mappings there, but I had some strange
* problems so it stays for now. -AK
*/
if (type == E820_RAM) {
if (start < 0x100000ULL && end > 0xA0000ULL) {
if (start < 0xA0000ULL)
add_memory_region(start, 0xA0000ULL-start, type);
if (end <= 0x100000ULL)
continue;
start = 0x100000ULL;
size = end - start;
}
}
add_memory_region(start, size, type);
} while (biosmap++,--nr_map);
return 0;
}
void __init setup_memory_region(void)
{
char *who = "BIOS-e820";
/*
* Try to copy the BIOS-supplied E820-map.
*
* Otherwise fake a memory map; one section from 0k->640k,
* the next section from 1mb->appropriate_mem_k
*/
sanitize_e820_map(E820_MAP, &E820_MAP_NR);
if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
unsigned long mem_size;
/* compare results from other methods and take the greater */
if (ALT_MEM_K < EXT_MEM_K) {
mem_size = EXT_MEM_K;
who = "BIOS-88";
} else {
mem_size = ALT_MEM_K;
who = "BIOS-e801";
}
e820.nr_map = 0;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
}
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
e820_print_map(who);
}
static int usermem __initdata;
void __init parse_memopt(char *p)
{
if (!strncmp(p,"exactmap",8)) {
e820.nr_map = 0;
usermem = 1;
} else {
/* If the user specifies memory size, we
* blow away any automatically generated
* size
*/
unsigned long long start_at, mem_size;
if (usermem == 0) {
/* first time in: zap the whitelist
* and reinitialize it with the
* standard low-memory region.
*/
e820.nr_map = 0;
usermem = 1;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
}
mem_size = memparse(p, &p);
if (*p == '@')
start_at = memparse(p+1, &p);
else {
start_at = HIGH_MEMORY;
mem_size -= HIGH_MEMORY;
usermem=0;
}
add_memory_region(start_at, mem_size, E820_RAM);
}
}
void __init print_user_map(void)
{
if (usermem) {
printk(KERN_INFO "user-defined physical RAM map:\n");
e820_print_map("user");
}
}
......@@ -50,18 +50,18 @@
#include <asm/mmu_context.h>
#include <asm/bootsetup.h>
#include <asm/smp.h>
#include <asm/proto.h>
/*
* Machine setup..
*/
extern void mcheck_init(struct cpuinfo_x86 *c);
extern void init_memory_mapping(void);
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
struct cpuinfo_x86 boot_cpu_data;
unsigned long mmu_cr4_features;
int acpi_disabled __initdata = 0;
/* For PCI or other memory-mapped resources */
unsigned long pci_mem_start = 0x10000000;
......@@ -86,8 +86,8 @@ extern char _text, _etext, _edata, _end;
static int disable_x86_fxsr __initdata = 0;
static char command_line[COMMAND_LINE_SIZE];
char saved_command_line[COMMAND_LINE_SIZE];
char command_line[COMMAND_LINE_SIZE];
char saved_command_line[COMMAND_LINE_SIZE];
struct resource standard_io_resources[] = {
{ "dma1", 0x00, 0x1f, IORESOURCE_BUSY },
......@@ -102,9 +102,9 @@ struct resource standard_io_resources[] = {
#define STANDARD_IO_RESOURCES (sizeof(standard_io_resources)/sizeof(struct resource))
static struct resource code_resource = { "Kernel code", 0x100000, 0 };
static struct resource data_resource = { "Kernel data", 0, 0 };
static struct resource vram_resource = { "Video RAM area", 0xa0000, 0xbffff, IORESOURCE_BUSY };
struct resource code_resource = { "Kernel code", 0x100000, 0 };
struct resource data_resource = { "Kernel data", 0, 0 };
struct resource vram_resource = { "Video RAM area", 0xa0000, 0xbffff, IORESOURCE_BUSY };
/* System ROM resources */
#define MAXROMS 6
......@@ -178,358 +178,33 @@ static void __init probe_roms(void)
}
}
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type)
{
int x = e820.nr_map;
if (x == E820MAX) {
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
return;
}
e820.map[x].addr = start;
e820.map[x].size = size;
e820.map[x].type = type;
e820.nr_map++;
} /* add_memory_region */
#define E820_DEBUG 1
static void __init print_memory_map(char *who)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
printk(" %s: %016Lx - %016Lx ", who,
(unsigned long long) e820.map[i].addr,
(unsigned long long) (e820.map[i].addr + e820.map[i].size));
switch (e820.map[i].type) {
case E820_RAM: printk("(usable)\n");
break;
case E820_RESERVED:
printk("(reserved)\n");
break;
case E820_ACPI:
printk("(ACPI data)\n");
break;
case E820_NVS:
printk("(ACPI NVS)\n");
break;
default: printk("type %u\n", e820.map[i].type);
break;
}
}
}
/*
* Sanitize the BIOS e820 map.
*
* Some e820 responses include overlapping entries. The following
* replaces the original e820 map with a new one, removing overlaps.
*
*/
static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
{
struct change_member {
struct e820entry *pbios; /* pointer to original bios entry */
unsigned long long addr; /* address for this change point */
};
struct change_member change_point_list[2*E820MAX];
struct change_member *change_point[2*E820MAX];
struct e820entry *overlap_list[E820MAX];
struct e820entry new_bios[E820MAX];
struct change_member *change_tmp;
unsigned long current_type, last_type;
unsigned long long last_addr;
int chgidx, still_changing;
int overlap_entries;
int new_bios_entry;
int old_nr, new_nr;
int i;
/*
Visually we're performing the following (1,2,3,4 = memory types)...
Sample memory map (w/overlaps):
____22__________________
______________________4_
____1111________________
_44_____________________
11111111________________
____________________33__
___________44___________
__________33333_________
______________22________
___________________2222_
_________111111111______
_____________________11_
_________________4______
Sanitized equivalent (no overlap):
1_______________________
_44_____________________
___1____________________
____22__________________
______11________________
_________1______________
__________3_____________
___________44___________
_____________33_________
_______________2________
________________1_______
_________________4______
___________________2____
____________________33__
______________________4_
*/
/* if there's only one memory region, don't bother */
if (*pnr_map < 2)
return -1;
old_nr = *pnr_map;
/* bail out if we find any unreasonable addresses in bios map */
for (i=0; i<old_nr; i++)
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
return -1;
/* create pointers for initial change-point information (for sorting) */
for (i=0; i < 2*old_nr; i++)
change_point[i] = &change_point_list[i];
/* record all known change-points (starting and ending addresses) */
chgidx = 0;
for (i=0; i < old_nr; i++) {
change_point[chgidx]->addr = biosmap[i].addr;
change_point[chgidx++]->pbios = &biosmap[i];
change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
change_point[chgidx++]->pbios = &biosmap[i];
}
/* sort change-point list by memory addresses (low -> high) */
still_changing = 1;
while (still_changing) {
still_changing = 0;
for (i=1; i < 2*old_nr; i++) {
/* if <current_addr> > <last_addr>, swap */
/* or, if current=<start_addr> & last=<end_addr>, swap */
if ((change_point[i]->addr < change_point[i-1]->addr) ||
((change_point[i]->addr == change_point[i-1]->addr) &&
(change_point[i]->addr == change_point[i]->pbios->addr) &&
(change_point[i-1]->addr != change_point[i-1]->pbios->addr))
)
{
change_tmp = change_point[i];
change_point[i] = change_point[i-1];
change_point[i-1] = change_tmp;
still_changing=1;
}
}
}
/* create a new bios memory map, removing overlaps */
overlap_entries=0; /* number of entries in the overlap table */
new_bios_entry=0; /* index for creating new bios map entries */
last_type = 0; /* start with undefined memory type */
last_addr = 0; /* start with 0 as last starting address */
/* loop through change-points, determining affect on the new bios map */
for (chgidx=0; chgidx < 2*old_nr; chgidx++)
{
/* keep track of all overlapping bios entries */
if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
{
/* add map entry to overlap list (> 1 entry implies an overlap) */
overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
}
else
{
/* remove entry from list (order independent, so swap with last) */
for (i=0; i<overlap_entries; i++)
{
if (overlap_list[i] == change_point[chgidx]->pbios)
overlap_list[i] = overlap_list[overlap_entries-1];
}
overlap_entries--;
}
/* if there are overlapping entries, decide which "type" to use */
/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
current_type = 0;
for (i=0; i<overlap_entries; i++)
if (overlap_list[i]->type > current_type)
current_type = overlap_list[i]->type;
/* continue building up new bios map based on this information */
if (current_type != last_type) {
if (last_type != 0) {
new_bios[new_bios_entry].size =
change_point[chgidx]->addr - last_addr;
/* move forward only if the new size was non-zero */
if (new_bios[new_bios_entry].size != 0)
if (++new_bios_entry >= E820MAX)
break; /* no more space left for new bios entries */
}
if (current_type != 0) {
new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
new_bios[new_bios_entry].type = current_type;
last_addr=change_point[chgidx]->addr;
}
last_type = current_type;
}
}
new_nr = new_bios_entry; /* retain count for new bios entries */
/* copy new bios mapping into original location */
memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
*pnr_map = new_nr;
return 0;
}
/*
* Copy the BIOS e820 map into a safe place.
*
* Sanity-check it while we're at it..
*
* If we're lucky and live on a modern system, the setup code
* will have given us a memory map that we can use to properly
* set up memory. If we aren't, we'll fake a memory map.
*
* We check to see that the memory map contains at least 2 elements
* before we'll use it, because the detection code in setup.S may
* not be perfect and most every PC known to man has two memory
* regions: one from 0 to 640k, and one from 1mb up. (The IBM
* thinkpad 560x, for example, does not cooperate with the memory
* detection code.)
*/
static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
{
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
return -1;
do {
unsigned long long start = biosmap->addr;
unsigned long long size = biosmap->size;
unsigned long long end = start + size;
unsigned long type = biosmap->type;
/* Overflow in 64 bits? Ignore the memory map. */
if (start > end)
return -1;
/*
* Some BIOSes claim RAM in the 640k - 1M region.
* Not right. Fix it up.
*/
if (type == E820_RAM) {
if (start < 0x100000ULL && end > 0xA0000ULL) {
if (start < 0xA0000ULL)
add_memory_region(start, 0xA0000ULL-start, type);
if (end <= 0x100000ULL)
continue;
start = 0x100000ULL;
size = end - start;
}
}
add_memory_region(start, size, type);
} while (biosmap++,--nr_map);
return 0;
}
/*
* Do NOT EVER look at the BIOS memory size location.
* It does not work on many machines.
*/
#define LOWMEMSIZE() (0x9f000)
void __init setup_memory_region(void)
{
char *who = "BIOS-e820";
/*
* Try to copy the BIOS-supplied E820-map.
*
* Otherwise fake a memory map; one section from 0k->640k,
* the next section from 1mb->appropriate_mem_k
*/
sanitize_e820_map(E820_MAP, &E820_MAP_NR);
if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
unsigned long mem_size;
/* compare results from other methods and take the greater */
if (ALT_MEM_K < EXT_MEM_K) {
mem_size = EXT_MEM_K;
who = "BIOS-88";
} else {
mem_size = ALT_MEM_K;
who = "BIOS-e801";
}
e820.nr_map = 0;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
}
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
print_memory_map(who);
} /* setup_memory_region */
static inline void parse_mem_cmdline (char ** cmdline_p)
static __init void parse_cmdline_early (char ** cmdline_p)
{
char c = ' ', *to = command_line, *from = COMMAND_LINE;
int len = 0;
int usermem = 0;
/* Save unparsed command line copy for /proc/cmdline */
memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
for (;;) {
/*
* "mem=nopentium" disables the 4MB page tables.
* "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
* to <mem>, overriding the bios size.
* "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from
* <start> to <start>+<mem>, overriding the bios size.
*/
if (c == ' ' && !memcmp(from, "mem=", 4)) {
if (to != command_line)
to--;
if (!memcmp(from+4, "nopentium", 9)) {
from += 9+4;
clear_bit(X86_FEATURE_PSE, &boot_cpu_data.x86_capability);
} else if (!memcmp(from+4, "exactmap", 8)) {
from += 8+4;
e820.nr_map = 0;
usermem = 1;
} else {
/* If the user specifies memory size, we
* blow away any automatically generated
* size
*/
unsigned long long start_at, mem_size;
if (c != ' ')
goto next_char;
if (usermem == 0) {
/* first time in: zap the whitelist
* and reinitialize it with the
* standard low-memory region.
*/
e820.nr_map = 0;
usermem = 1;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
}
mem_size = memparse(from+4, &from);
if (*from == '@')
start_at = memparse(from+1, &from);
else {
start_at = HIGH_MEMORY;
mem_size -= HIGH_MEMORY;
usermem=0;
}
add_memory_region(start_at, mem_size, E820_RAM);
}
/* "acpi=off" disables both ACPI table parsing and interpreter init */
if (!memcmp(from, "acpi=off", 8))
acpi_disabled = 1;
if (!memcmp(from, "mem=", 4))
parse_memopt(from+4);
#ifdef CONFIG_GART_IOMMU
if (!memcmp(from,"iommu=",6)) {
iommu_setup(from+6);
}
#endif
next_char:
c = *(from++);
if (!c)
break;
......@@ -539,15 +214,23 @@ static inline void parse_mem_cmdline (char ** cmdline_p)
}
*to = '\0';
*cmdline_p = command_line;
if (usermem) {
printk(KERN_INFO "user-defined physical RAM map:\n");
print_memory_map("user");
}
print_user_map();
}
unsigned long start_pfn, end_pfn;
#ifndef CONFIG_DISCONTIGMEM
static void __init contig_initmem_init(void)
{
unsigned long bootmap_size, bootmap;
bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n",bootmap_size);
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
e820_bootmem_free(&contig_page_data, 0, end_pfn << PAGE_SHIFT);
reserve_bootmem(bootmap, bootmap_size);
}
#endif
extern void exception_table_check(void);
void __init setup_arch(char **cmdline_p)
{
......@@ -579,7 +262,7 @@ void __init setup_arch(char **cmdline_p)
data_resource.start = virt_to_phys(&_etext);
data_resource.end = virt_to_phys(&_edata)-1;
parse_mem_cmdline(cmdline_p);
parse_cmdline_early(cmdline_p);
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
......@@ -595,68 +278,12 @@ void __init setup_arch(char **cmdline_p)
*/
start_pfn = PFN_UP(__pa_symbol(&_end));
/*
* Find the highest page frame number we have available
*/
end_pfn = 0;
for (i = 0; i < e820.nr_map; i++) {
unsigned long start, end;
/* RAM? */
if (e820.map[i].type != E820_RAM)
continue;
start = PFN_UP(e820.map[i].addr);
end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
if (start >= end)
continue;
if (end > end_pfn)
end_pfn = end;
}
if (end_pfn > MAXMEM_PFN) {
end_pfn = MAXMEM_PFN;
}
e820_end_of_ram();
init_memory_mapping();
/*
* Initialize the boot-time allocator (with low memory only):
*/
bootmap_size = init_bootmem(start_pfn, end_pfn);
contig_initmem_init();
/*
* Register fully available low RAM pages with the bootmem allocator.
*/
for (i = 0; i < e820.nr_map; i++) {
unsigned long curr_pfn, last_pfn, size;
/*
* Reserve usable low memory
*/
if (e820.map[i].type != E820_RAM)
continue;
/*
* We are rounding up the start address of usable memory:
*/
curr_pfn = PFN_UP(e820.map[i].addr);
if (curr_pfn >= end_pfn)
continue;
/*
* ... and at the end of the usable range downwards:
*/
last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
if (last_pfn > end_pfn)
last_pfn = end_pfn;
/*
* .. finally, did all the rounding and playing
* around just make the area go away?
*/
if (last_pfn <= curr_pfn)
continue;
size = last_pfn - curr_pfn;
free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
}
/*
* Reserve the bootmem bitmap itself as well. We do this in two
* steps (first step was init_bootmem()) because this catches
......@@ -694,7 +321,7 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_BLK_DEV_INITRD
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
reserve_bootmem(INITRD_START, INITRD_SIZE);
initrd_start =
INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
......@@ -704,7 +331,7 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_ERR "initrd extends beyond end of memory "
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
(unsigned long)(INITRD_START + INITRD_SIZE),
(unsigned long)(max_low_pfn << PAGE_SHIFT));
(unsigned long)(end_pfn << PAGE_SHIFT));
initrd_start = 0;
}
}
......@@ -726,6 +353,7 @@ void __init setup_arch(char **cmdline_p)
* the bootmem allocator) but before get_smp_config (to allow parsing
* of MADT).
*/
if (!acpi_disabled)
acpi_boot_init(*cmdline_p);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
......@@ -737,48 +365,24 @@ void __init setup_arch(char **cmdline_p)
init_apic_mappings();
#endif
/*
* Request address space for all standard RAM and ROM resources
* and also for regions reported as reserved by the e820.
*/
probe_roms();
for (i = 0; i < e820.nr_map; i++) {
struct resource *res;
if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
continue;
res = alloc_bootmem_low(sizeof(struct resource));
switch (e820.map[i].type) {
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
default: res->name = "reserved";
}
res->start = e820.map[i].addr;
res->end = res->start + e820.map[i].size - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
if (e820.map[i].type == E820_RAM) {
/*
* We dont't know which RAM region contains kernel data,
* so we try it repeatedly and let the resource manager
* test it.
*/
request_resource(res, &code_resource);
request_resource(res, &data_resource);
}
}
e820_reserve_resources();
request_resource(&iomem_resource, &vram_resource);
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < STANDARD_IO_RESOURCES; i++)
request_resource(&ioport_resource, standard_io_resources+i);
/* Tell the PCI layer not to allocate too close to the RAM area.. */
/* ??? move this up on x86-64 */
low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
if (low_mem_size > pci_mem_start)
pci_mem_start = low_mem_size;
pci_mem_start = IOMAP_START;
#ifdef CONFIG_GART_IOMMU
iommu_hole_init();
#endif
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
......@@ -919,11 +523,14 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
/* Intel-defined flags: level 0x00000001 */
if ( c->cpuid_level >= 0x00000001 ) {
cpuid(0x00000001, &tfms, &junk, &junk,
__u32 misc;
cpuid(0x00000001, &tfms, &misc, &junk,
&c->x86_capability[0]);
c->x86 = (tfms >> 8) & 15;
c->x86_model = (tfms >> 4) & 15;
c->x86_mask = tfms & 15;
if (c->x86_capability[0] & (1<<19))
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
} else {
/* Have CPUID level 0 only - unheard of */
c->x86 = 4;
......@@ -946,12 +553,6 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
}
printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
c->x86_capability[0],
c->x86_capability[1],
c->x86_capability[2],
c->x86_vendor);
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
......@@ -1017,11 +618,6 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
printk(KERN_DEBUG "CPU: Common caps: %08x %08x %08x %08x\n",
boot_cpu_data.x86_capability[0],
boot_cpu_data.x86_capability[1],
boot_cpu_data.x86_capability[2],
boot_cpu_data.x86_capability[3]);
}
......@@ -1147,8 +743,8 @@ static void c_stop(struct seq_file *m, void *v)
}
struct seq_operations cpuinfo_op = {
start: c_start,
next: c_next,
stop: c_stop,
show: show_cpuinfo,
.start =c_start,
.next = c_next,
.stop = c_stop,
.show = show_cpuinfo,
};
......@@ -20,10 +20,11 @@
#include <asm/mmu_context.h>
#include <asm/smp.h>
#include <asm/i387.h>
#include <asm/percpu.h>
char x86_boot_params[2048] __initdata = {0,};
static unsigned long cpu_initialized __initdata = 0;
unsigned long cpu_initialized __initdata = 0;
struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned;
......@@ -34,11 +35,15 @@ extern struct task_struct init_task;
extern unsigned char __per_cpu_start[], __per_cpu_end[];
struct desc_ptr gdt_descr = { 0 /* filled in */, (unsigned long) gdt_table };
extern struct desc_ptr cpu_gdt_descr[];
struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned;
#ifndef __GENERIC_PER_CPU
unsigned long __per_cpu_offset[NR_CPUS];
void __init setup_per_cpu_areas(void)
{
unsigned long size, i;
......@@ -52,10 +57,15 @@ void __init setup_per_cpu_areas(void)
ptr = alloc_bootmem(size * NR_CPUS);
for (i = 0; i < NR_CPUS; i++, ptr += size) {
cpu_pda[cpu_logical_map(i)].cpudata_offset = ptr - __per_cpu_start;
/* hide this from the compiler to avoid problems */
unsigned long offset;
asm("subq %[b],%0" : "=r" (offset) : "0" (ptr), [b] "r" (&__per_cpu_start));
__per_cpu_offset[i] = offset;
cpu_pda[i].cpudata_offset = offset;
memcpy(ptr, __per_cpu_start, size);
}
}
#endif
void pda_init(int cpu)
{
......@@ -111,38 +121,48 @@ char boot_exception_stacks[N_EXCEPTION_STACKS*EXCEPTION_STKSZ];
void __init cpu_init (void)
{
#ifdef CONFIG_SMP
int nr = stack_smp_processor_id();
int cpu = stack_smp_processor_id();
#else
int nr = smp_processor_id();
int cpu = smp_processor_id();
#endif
struct tss_struct * t = &init_tss[nr];
struct tss_struct * t = &init_tss[cpu];
unsigned long v;
char *estacks;
struct task_struct *me;
/* CPU 0 is initialised in head64.c */
if (nr != 0) {
if (cpu != 0) {
estacks = (char *)__get_free_pages(GFP_ATOMIC, 0);
if (!estacks)
panic("Can't allocate exception stacks for CPU %d\n",nr);
pda_init(nr);
panic("Can't allocate exception stacks for CPU %d\n",cpu);
pda_init(cpu);
} else
estacks = boot_exception_stacks;
me = current;
if (test_and_set_bit(nr, &cpu_initialized))
panic("CPU#%d already initialized!\n", nr);
if (test_and_set_bit(cpu, &cpu_initialized))
panic("CPU#%d already initialized!\n", cpu);
printk("Initializing CPU#%d\n", nr);
printk("Initializing CPU#%d\n", cpu);
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
gdt_descr.size = (__u8*) gdt_end - (__u8*)gdt_table;
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
*/
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
}
__asm__ __volatile__("lgdt %0": "=m" (gdt_descr));
cpu_gdt_descr[cpu].size = GDT_SIZE;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
__asm__ __volatile__("lidt %0": "=m" (idt_descr));
memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8);
/*
* Delete NT
*/
......@@ -177,14 +197,16 @@ void __init cpu_init (void)
estacks += EXCEPTION_STKSZ;
}
t->io_map_base = INVALID_IO_BITMAP_OFFSET;
atomic_inc(&init_mm.mm_count);
me->active_mm = &init_mm;
if (me->mm)
BUG();
enter_lazy_tlb(&init_mm, me, nr);
enter_lazy_tlb(&init_mm, me, cpu);
set_tss_desc(nr, t);
load_TR(nr);
set_tss_desc(cpu, t);
load_TR_desc();
load_LDT(&init_mm.context);
/*
......
......@@ -2,16 +2,17 @@
* structures and definitions for the int 15, ax=e820 memory map
* scheme.
*
* In a nutshell, arch/x86_64/boot/setup.S populates a scratch table
* in the empty_zero_block that contains a list of usable address/size
* duples. In arch/x86_64/kernel/setup.c, this information is
* transferred into the e820map, and in arch/i386/x86_64/init.c, that
* new information is used to mark pages reserved or not.
*
* In a nutshell, setup.S populates a scratch table in the
* empty_zero_block that contains a list of usable address/size
* duples. setup.c, this information is transferred into the e820map,
* and in init.c/numa.c, that new information is used to mark pages
* reserved or not.
*/
#ifndef __E820_HEADER
#define __E820_HEADER
#include <linux/mmzone.h>
#define E820MAP 0x2d0 /* our map */
#define E820MAX 32 /* number of entries in E820MAP */
#define E820NR 0x1e8 /* # entries in E820MAP */
......@@ -23,17 +24,39 @@
#define HIGH_MEMORY (1024*1024)
#define LOWMEMSIZE() (0x9f000)
#define MAXMEM (120UL * 1024 * 1024 * 1024 * 1024) /* 120TB */
#ifndef __ASSEMBLY__
struct e820entry {
u64 addr; /* start of memory segment */
u64 size; /* size of memory segment */
u32 type; /* type of memory segment */
} __attribute__((packed));
struct e820map {
int nr_map;
struct e820entry {
u64 addr __attribute__((packed)); /* start of memory segment */
u64 size __attribute__((packed)); /* size of memory segment */
u32 type __attribute__((packed)); /* type of memory segment */
} map[E820MAX];
struct e820entry map[E820MAX];
};
extern unsigned long find_e820_area(unsigned long start, unsigned long end,
unsigned size);
extern void add_memory_region(unsigned long start, unsigned long size,
int type);
extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern void e820_end_of_ram(void);
extern void e820_reserve_resources(void);
extern void e820_print_map(char *who);
extern int e820_mapped(unsigned long start, unsigned long end, int type);
extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
extern void __init parse_memopt(char *p);
extern void __init print_user_map(void);
extern struct e820map e820;
#endif/*!__ASSEMBLY__*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment