Commit dd1a7550 authored by David Mosberger's avatar David Mosberger

ia64: Patch by Jesse Barnes:

This patch fixes the combination of CONFIG_DISCONTIGMEM and
CONFIG_VIRTUAL_MEM_MAP so that generic kernels will work on all ia64
platforms, including sn2, and also makes sn2 specific kernels work
(which I think is a first).

I've cleaned this patch up heavily based on feedback from yourself,
Christoph and others.  I've tested sn2, zx1, and dig (thanks Xavier!)
specific configurations, as well as a generic configuration that allows
the same binary to boot on zx1, dig, and sn2.
parent 051a53b0
......@@ -220,24 +220,8 @@ config NUMA
Access). This option is for configuring high-end multiprocessor
server systems. If in doubt, say N.
choice
prompt "Maximum Memory per NUMA Node" if NUMA && IA64_DIG
depends on NUMA && IA64_DIG
default IA64_NODESIZE_16GB
config IA64_NODESIZE_16GB
bool "16GB"
config IA64_NODESIZE_64GB
bool "64GB"
config IA64_NODESIZE_256GB
bool "256GB"
endchoice
config DISCONTIGMEM
bool "Discontiguous memory support" if (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC) && NUMA
bool "Discontiguous memory support" if (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC) && NUMA && VIRTUAL_MEM_MAP
default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA
help
Say Y to support efficient handling of discontiguous physical memory,
......@@ -250,14 +234,10 @@ config VIRTUAL_MEM_MAP
default y if !IA64_HP_SIM
help
Say Y to compile the kernel with support for a virtual mem map.
This is an alternate method of supporting large holes in the
physical address space on non NUMA machines. Since the DISCONTIGMEM
option is not supported on machines with the ZX1 chipset, this is
the only way of supporting more than 1 Gb of memory on those
machines. This code also only takes effect if a memory hole of
greater than 1 Gb is found during boot, so it is safe to enable
unless you require the DISCONTIGMEM option for your machine. If you
are unsure, say Y.
This code also only takes effect if a memory hole of greater than
1 Gb is found during boot. You must turn this option on if you
require the DISCONTIGMEM option for your machine. If you are
unsure, say Y.
config IA64_MCA
bool "Enable IA-64 Machine Check Abort"
......
......@@ -380,7 +380,7 @@ acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
void __init
acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
{
unsigned long paddr, size, hole_size, min_hole_size;
unsigned long paddr, size;
u8 pxm;
struct node_memblk_s *p, *q, *pend;
......@@ -402,34 +402,6 @@ acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
if (!ma->flags.enabled)
return;
/*
* When the chunk is not the first one in the node, check distance
* from the other chunks. When the hole is too huge ignore the chunk.
* This restriction should be removed when multiple chunks per node
* is supported.
*/
pend = &node_memblk[num_memblks];
min_hole_size = 0;
for (p = &node_memblk[0]; p < pend; p++) {
if (p->nid != pxm)
continue;
if (p->start_paddr < paddr)
hole_size = paddr - (p->start_paddr + p->size);
else
hole_size = p->start_paddr - (paddr + size);
if (!min_hole_size || hole_size < min_hole_size)
min_hole_size = hole_size;
}
if (min_hole_size) {
if (min_hole_size > size) {
printk(KERN_ERR "Too huge memory hole. Ignoring %ld MBytes at %lx\n",
size/(1024*1024), paddr);
return;
}
}
/* record this node in proximity bitmap */
pxm_bit_set(pxm);
......
......@@ -101,7 +101,7 @@ int
filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
{
unsigned long range_start, range_end, prev_start;
void (*func)(unsigned long, unsigned long);
void (*func)(unsigned long, unsigned long, int);
int i;
#if IGNORE_PFN0
......@@ -122,11 +122,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
range_end = min(end, rsvd_region[i].start);
if (range_start < range_end)
#ifdef CONFIG_DISCONTIGMEM
call_pernode_memory(__pa(range_start), __pa(range_end), func);
#else
(*func)(__pa(range_start), range_end - range_start);
#endif
call_pernode_memory(__pa(range_start), range_end - range_start, func);
/* nothing more available in this segment */
if (range_end == end) return 0;
......@@ -544,28 +540,7 @@ cpu_init (void)
struct cpuinfo_ia64 *cpu_info;
void *cpu_data;
#ifdef CONFIG_SMP
int cpu;
/*
* get_free_pages() cannot be used before cpu_init() done. BSP allocates
* "NR_CPUS" pages for all CPUs to avoid that AP calls get_zeroed_page().
*/
if (smp_processor_id() == 0) {
cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, PERCPU_PAGE_SIZE,
__pa(MAX_DMA_ADDRESS));
for (cpu = 0; cpu < NR_CPUS; cpu++) {
memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
cpu_data += PERCPU_PAGE_SIZE;
per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
}
}
cpu_data = __per_cpu_start + __per_cpu_offset[smp_processor_id()];
#else /* !CONFIG_SMP */
cpu_data = __phys_per_cpu_start;
#endif /* !CONFIG_SMP */
cpu_data = per_cpu_init();
get_max_cacheline_size();
......@@ -576,9 +551,6 @@ cpu_init (void)
* accessing cpu_data() through the canonical per-CPU address.
*/
cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
#ifdef CONFIG_NUMA
cpu_info->node_data = get_node_data_ptr();
#endif
identify_cpu(cpu_info);
#ifdef CONFIG_MCKINLEY
......
......@@ -166,6 +166,46 @@ find_memory (void)
find_initrd();
}
#ifdef CONFIG_SMP
/**
* per_cpu_init - setup per-cpu variables
*
* Allocate and setup per-cpu data areas.
*/
void *
per_cpu_init (void)
{
void *cpu_data;
int cpu;
/*
* get_free_pages() cannot be used before cpu_init() done. BSP
* allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
* get_zeroed_page().
*/
if (smp_processor_id() == 0) {
cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
for (cpu = 0; cpu < NR_CPUS; cpu++) {
memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
cpu_data += PERCPU_PAGE_SIZE;
per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
}
}
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
}
#endif /* CONFIG_SMP */
static int
count_pages (u64 start, u64 end, void *arg)
{
unsigned long *count = arg;
*count += (end - start) >> PAGE_SHIFT;
return 0;
}
#ifdef CONFIG_VIRTUAL_MEM_MAP
static int
count_dma_pages (u64 start, u64 end, void *arg)
......
This diff is collapsed.
......@@ -450,15 +450,6 @@ find_largest_hole (u64 start, u64 end, void *arg)
}
#endif /* CONFIG_VIRTUAL_MEM_MAP */
int
count_pages (u64 start, u64 end, void *arg)
{
unsigned long *count = arg;
*count += (end - start) >> PAGE_SHIFT;
return 0;
}
static int
count_reserved_pages (u64 start, u64 end, void *arg)
{
......
......@@ -8,7 +8,6 @@
*/
#include <linux/config.h>
#include <linux/mm.h>
/*
* Entries defined so far:
......@@ -34,19 +33,28 @@ extern void find_memory (void);
extern void reserve_memory (void);
extern void find_initrd (void);
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
extern int count_pages (u64 start, u64 end, void *arg);
/*
* For rounding an address to the next IA64_GRANULE_SIZE or order
*/
#define GRANULEROUNDDOWN(n) ((n) & ~(IA64_GRANULE_SIZE-1))
#define GRANULEROUNDUP(n) (((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1))
#define ORDERROUNDDOWN(n) ((n) & ~((PAGE_SIZE<<MAX_ORDER)-1))
#ifdef CONFIG_DISCONTIGMEM
extern void call_pernode_memory (unsigned long start, unsigned long end, void *arg);
extern void call_pernode_memory (unsigned long start, unsigned long len, void *func);
#else
# define call_pernode_memory(start, len, func) (*func)(start, len, 0)
#endif
#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
#ifdef CONFIG_VIRTUAL_MEM_MAP
#define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */
extern struct page *vmem_map;
extern int find_largest_hole (u64 start, u64 end, void *arg);
extern int create_mem_map_page_table (u64 start, u64 end, void *arg);
# define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */
extern unsigned long vmalloc_end;
extern struct page *vmem_map;
extern int find_largest_hole (u64 start, u64 end, void *arg);
extern int create_mem_map_page_table (u64 start, u64 end, void *arg);
#endif
#endif /* meminit_h */
......@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2000 Silicon Graphics, Inc. All rights reserved.
* Copyright (c) 2000,2003 Silicon Graphics, Inc. All rights reserved.
* Copyright (c) 2002 NEC Corp.
* Copyright (c) 2002 Erich Focht <efocht@ess.nec.de>
* Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
......@@ -12,148 +12,26 @@
#define _ASM_IA64_MMZONE_H
#include <linux/config.h>
#include <linux/init.h>
/*
* Given a kaddr, find the base mem_map address for the start of the mem_map
* entries for the bank containing the kaddr.
*/
#define BANK_MEM_MAP_BASE(kaddr) local_node_data->bank_mem_map_base[BANK_MEM_MAP_INDEX(kaddr)]
/*
* Given a kaddr, this macro return the relative map number
* within the bank.
*/
#define BANK_MAP_NR(kaddr) (BANK_OFFSET(kaddr) >> PAGE_SHIFT)
/*
* Given a pte, this macro returns a pointer to the page struct for the pte.
*/
#define pte_page(pte) virt_to_page(PAGE_OFFSET | (pte_val(pte)&_PFN_MASK))
/*
* Determine if a kaddr is a valid memory address of memory that
* actually exists.
*
* The check consists of 2 parts:
* - verify that the address is a region 7 address & does not
* contain any bits that preclude it from being a valid platform
* memory address
* - verify that the chunk actually exists.
*
* Note that IO addresses are NOT considered valid addresses.
*
* Note, many platforms can simply check if kaddr exceeds a specific size.
* (However, this won't work on SGI platforms since IO space is embedded
* within the range of valid memory addresses & nodes have holes in the
* address range between banks).
*/
#define kern_addr_valid(kaddr) ({long _kav=(long)(kaddr); \
VALID_MEM_KADDR(_kav);})
/*
* Given a kaddr, return a pointer to the page struct for the page.
* If the kaddr does not represent RAM memory that potentially exists, return
* a pointer the page struct for max_mapnr. IO addresses will
* return the page for max_nr. Addresses in unpopulated RAM banks may
* return undefined results OR may panic the system.
*
*/
#define virt_to_page(kaddr) ({long _kvtp=(long)(kaddr); \
(VALID_MEM_KADDR(_kvtp)) \
? BANK_MEM_MAP_BASE(_kvtp) + BANK_MAP_NR(_kvtp) \
: NULL;})
/*
* Given a page struct entry, return the physical address that the page struct represents.
* Since IA64 has all memory in the DMA zone, the following works:
*/
#define page_to_phys(page) __pa(page_address(page))
#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map)
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)
#define pfn_to_page(pfn) (struct page *)(node_mem_map(pfn_to_nid(pfn)) + node_localnr(pfn, pfn_to_nid(pfn)))
#define pfn_to_nid(pfn) local_node_data->node_id_map[(pfn << PAGE_SHIFT) >> BANKSHIFT]
#define page_to_pfn(page) (long)((page - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn)
/*
* pfn_valid should be made as fast as possible, and the current definition
* is valid for machines that are NUMA, but still contiguous, which is what
* is currently supported. A more generalised, but slower definition would
* be something like this - mbligh:
* ( pfn_to_pgdat(pfn) && (pfn < node_end_pfn(pfn_to_nid(pfn))) )
*/
#define pfn_valid(pfn) (pfn < max_low_pfn)
extern unsigned long max_low_pfn;
#if defined(CONFIG_IA64_DIG)
/*
* Platform definitions for DIG platform with contiguous memory.
*/
#define MAX_PHYSNODE_ID 8 /* Maximum node number +1 */
#define MAX_PHYS_MEMORY (1UL << 40) /* 1 TB */
/*
* Bank definitions.
* Configurable settings for DIG: 512MB/bank: 16GB/node,
* 2048MB/bank: 64GB/node,
* 8192MB/bank: 256GB/node.
*/
#define NR_BANKS_PER_NODE 32
#if defined(CONFIG_IA64_NODESIZE_16GB)
# define BANKSHIFT 29
#elif defined(CONFIG_IA64_NODESIZE_64GB)
# define BANKSHIFT 31
#elif defined(CONFIG_IA64_NODESIZE_256GB)
# define BANKSHIFT 33
#else
# error Unsupported bank and nodesize!
#include <asm/page.h>
#include <asm/meminit.h>
#ifdef CONFIG_DISCONTIGMEM
#ifdef CONFIG_IA64_DIG /* DIG systems are small */
# define MAX_PHYSNODE_ID 8
# define NR_NODES 8
# define NR_MEMBLKS (NR_NODES * 32)
#else /* sn2 is the biggest case, so we use that if !DIG */
# define MAX_PHYSNODE_ID 2048
# define NR_NODES 256
# define NR_MEMBLKS (NR_NODES)
#endif
#define BANKSIZE (1UL << BANKSHIFT)
#elif defined(CONFIG_IA64_SGI_SN2)
/*
* SGI SN2 discontig definitions
*/
#define MAX_PHYSNODE_ID 2048 /* 2048 node ids (also called nasid) */
#define MAX_PHYS_MEMORY (1UL << 49)
#define NR_BANKS_PER_NODE 4
#define BANKSHIFT 38
#define SN2_NODE_SIZE (64UL*1024*1024*1024) /* 64GB per node */
#define BANKSIZE (SN2_NODE_SIZE/NR_BANKS_PER_NODE)
#endif /* CONFIG_IA64_DIG */
#if defined(CONFIG_IA64_DIG) || defined (CONFIG_IA64_SGI_SN2)
/* Common defines for both platforms */
#include <asm/numnodes.h>
#define BANK_OFFSET(addr) ((unsigned long)(addr) & (BANKSIZE-1))
#define NR_BANKS (NR_BANKS_PER_NODE * (1 << NODES_SHIFT))
#define NR_MEMBLKS (NR_BANKS)
/*
* VALID_MEM_KADDR returns a boolean to indicate if a kaddr is
* potentially a valid cacheable identity mapped RAM memory address.
* Note that the RAM may or may not actually be present!!
*/
#define VALID_MEM_KADDR(kaddr) 1
/*
* Given a nodeid & a bank number, find the address of the mem_map
* entry for the first page of the bank.
*/
#define BANK_MEM_MAP_INDEX(kaddr) \
(((unsigned long)(kaddr) & (MAX_PHYS_MEMORY-1)) >> BANKSHIFT)
extern unsigned long max_low_pfn;
#endif /* CONFIG_IA64_DIG || CONFIG_IA64_SGI_SN2 */
#define pfn_valid(pfn) (((pfn) < max_low_pfn) && ia64_pfn_valid(pfn))
#define page_to_pfn(page) ((unsigned long) (page - vmem_map))
#define pfn_to_page(pfn) (vmem_map + (pfn))
#endif /* CONFIG_DISCONTIGMEM */
#endif /* _ASM_IA64_MMZONE_H */
......@@ -11,9 +11,14 @@
#ifndef _ASM_IA64_NODEDATA_H
#define _ASM_IA64_NODEDATA_H
#include <linux/config.h>
#include <linux/numa.h>
#include <asm/percpu.h>
#include <asm/mmzone.h>
#ifdef CONFIG_DISCONTIGMEM
/*
* Node Data. One of these structures is located on each node of a NUMA system.
*/
......@@ -22,10 +27,7 @@ struct pglist_data;
struct ia64_node_data {
short active_cpu_count;
short node;
struct pglist_data *pg_data_ptrs[MAX_NUMNODES];
struct page *bank_mem_map_base[NR_BANKS];
struct ia64_node_data *node_data_ptrs[MAX_NUMNODES];
short node_id_map[NR_BANKS];
struct pglist_data *pg_data_ptrs[NR_NODES];
};
......@@ -34,41 +36,17 @@ struct ia64_node_data {
*/
#define local_node_data (local_cpu_data->node_data)
/*
* Return a pointer to the node_data structure for the specified node.
*/
#define node_data(node) (local_node_data->node_data_ptrs[node])
/*
* Get a pointer to the node_id/node_data for the current cpu.
* (boot time only)
*/
extern int boot_get_local_nodeid(void);
extern struct ia64_node_data *get_node_data_ptr(void);
/*
* Given a node id, return a pointer to the pg_data_t for the node.
* The following 2 macros are similar.
*
* NODE_DATA - should be used in all code not related to system
* initialization. It uses pernode data structures to minimize
* offnode memory references. However, these structure are not
* present during boot. This macro can be used once cpu_init
* completes.
*
* BOOT_NODE_DATA
* - should be used during system initialization
* prior to freeing __initdata. It does not depend on the percpu
* area being present.
*
* NOTE: The names of these macros are misleading but are difficult to change
* since they are used in generic linux & on other architecures.
*/
#define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid])
#define BOOT_NODE_DATA(nid) boot_get_pg_data_ptr((long)(nid))
struct pglist_data;
extern struct pglist_data * __init boot_get_pg_data_ptr(long);
#endif /* CONFIG_DISCONTIGMEM */
#endif /* _ASM_IA64_NODEDATA_H */
......@@ -12,12 +12,17 @@
#define _ASM_IA64_NUMA_H
#include <linux/config.h>
#include <linux/cpumask.h>
#ifdef CONFIG_NUMA
#include <linux/numa.h>
#include <linux/cache.h>
#include <linux/cache.h>
#include <linux/cpumask.h>
#include <linux/numa.h>
#include <linux/smp.h>
#include <linux/threads.h>
#include <asm/mmzone.h>
extern volatile char cpu_to_node_map[NR_CPUS] __cacheline_aligned;
extern volatile cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
......
......@@ -94,18 +94,20 @@ do { \
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#ifdef CONFIG_VIRTUAL_MEM_MAP
extern int ia64_pfn_valid (unsigned long pfn);
#else
# define ia64_pfn_valid(pfn) 1
#endif
#ifndef CONFIG_DISCONTIGMEM
# ifdef CONFIG_VIRTUAL_MEM_MAP
extern int ia64_pfn_valid (unsigned long pfn);
# define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
# else
# define pfn_valid(pfn) ((pfn) < max_mapnr)
# endif
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
#define page_to_pfn(page) ((unsigned long) (page - mem_map))
#define pfn_to_page(pfn) (mem_map + (pfn))
#endif /* CONFIG_DISCONTIGMEM */
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
#endif
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
typedef union ia64_va {
struct {
......
......@@ -46,11 +46,13 @@ DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size);
extern void setup_per_cpu_areas (void);
extern void *per_cpu_init(void);
#else /* ! SMP */
#define per_cpu(var, cpu) ((void)cpu, per_cpu__##var)
#define __get_cpu_var(var) per_cpu__##var
#define per_cpu_init() (__phys_per_cpu_start)
#endif /* SMP */
......
......@@ -174,7 +174,6 @@ ia64_phys_addr_valid (unsigned long addr)
return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
}
#ifndef CONFIG_DISCONTIGMEM
/*
* kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
* memory. For the return value to be meaningful, ADDR must be >=
......@@ -190,7 +189,6 @@ ia64_phys_addr_valid (unsigned long addr)
*/
#define kern_addr_valid(addr) (1)
#endif
/*
* Now come the defines and routines to manage and access the three-level
......@@ -240,10 +238,8 @@ ia64_phys_addr_valid (unsigned long addr)
#define pte_none(pte) (!pte_val(pte))
#define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE))
#define pte_clear(pte) (pte_val(*(pte)) = 0UL)
#ifndef CONFIG_DISCONTIGMEM
/* pte_page() returns the "struct page *" corresponding to the PTE: */
#define pte_page(pte) virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET))
#endif
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd)))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment