Commit 2caaad41 authored by Christoph Lameter's avatar Christoph Lameter Committed by Linus Torvalds

[PATCH] Reduce size of huge boot per_cpu_pageset

Reduce size of the huge per_cpu_pageset structure in __initdata introduced
into mm1 with the pageset localization patchset.  Use one specially
configured pageset per cpu for all zones and nodes during bootup.

- Avoid duplication of pageset initialization code.
- do the adding to the pageset list before potential free_pages_bulk
  in free_hot_cold_page (otherwise we would have to hold a page
  in a pageset during the period that the boot pagesets are in use).
- remove mistaken __cpuinitdata attribute and revert back to __initdata
  for the boot pageset. A boot pageset is not necessary for cpu hotplug.

Tested for UP SMP NUMA on x86_64 (2.6.12-rc6-mm1): UP SMP NUMA Tested on
IA64 (2.6.12-rc5-mm2): NUMA (2.6.12-rc6-mm1 broken for IA64 because of
sparsemem patches)
Signed-off-by: default avatarChristoph Lameter <clameter@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4ae7c039
...@@ -71,11 +71,6 @@ EXPORT_SYMBOL(nr_swap_pages); ...@@ -71,11 +71,6 @@ EXPORT_SYMBOL(nr_swap_pages);
struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)]; struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)];
EXPORT_SYMBOL(zone_table); EXPORT_SYMBOL(zone_table);
#ifdef CONFIG_NUMA
static struct per_cpu_pageset
pageset_table[MAX_NR_ZONES*MAX_NUMNODES*NR_CPUS] __initdata;
#endif
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
int min_free_kbytes = 1024; int min_free_kbytes = 1024;
...@@ -652,10 +647,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) ...@@ -652,10 +647,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
free_pages_check(__FUNCTION__, page); free_pages_check(__FUNCTION__, page);
pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags); local_irq_save(flags);
if (pcp->count >= pcp->high)
pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
list_add(&page->lru, &pcp->list); list_add(&page->lru, &pcp->list);
pcp->count++; pcp->count++;
if (pcp->count >= pcp->high)
pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
local_irq_restore(flags); local_irq_restore(flags);
put_cpu(); put_cpu();
} }
...@@ -1714,57 +1709,55 @@ static int __devinit zone_batchsize(struct zone *zone) ...@@ -1714,57 +1709,55 @@ static int __devinit zone_batchsize(struct zone *zone)
return batch; return batch;
} }
inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
{
struct per_cpu_pages *pcp;
pcp = &p->pcp[0]; /* hot */
pcp->count = 0;
pcp->low = 2 * batch;
pcp->high = 6 * batch;
pcp->batch = max(1UL, 1 * batch);
INIT_LIST_HEAD(&pcp->list);
pcp = &p->pcp[1]; /* cold*/
pcp->count = 0;
pcp->low = 0;
pcp->high = 2 * batch;
pcp->batch = max(1UL, 1 * batch);
INIT_LIST_HEAD(&pcp->list);
}
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
/* /*
* Dynamicaly allocate memory for the * Boot pageset table. One per cpu which is going to be used for all
* zones and all nodes. The parameters will be set in such a way
* that an item put on a list will immediately be handed over to
* the buddy list. This is safe since pageset manipulation is done
* with interrupts disabled.
*
* Some NUMA counter updates may also be caught by the boot pagesets.
* These will be discarded when bootup is complete.
*/
static struct per_cpu_pageset
boot_pageset[NR_CPUS] __initdata;
/*
* Dynamically allocate memory for the
* per cpu pageset array in struct zone. * per cpu pageset array in struct zone.
*/ */
static int __devinit process_zones(int cpu) static int __devinit process_zones(int cpu)
{ {
struct zone *zone, *dzone; struct zone *zone, *dzone;
int i;
for_each_zone(zone) { for_each_zone(zone) {
struct per_cpu_pageset *npageset = NULL;
npageset = kmalloc_node(sizeof(struct per_cpu_pageset), zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset),
GFP_KERNEL, cpu_to_node(cpu)); GFP_KERNEL, cpu_to_node(cpu));
if (!npageset) { if (!zone->pageset[cpu])
zone->pageset[cpu] = NULL;
goto bad; goto bad;
}
if (zone->pageset[cpu]) {
memcpy(npageset, zone->pageset[cpu],
sizeof(struct per_cpu_pageset));
/* Relocate lists */ setup_pageset(zone->pageset[cpu], zone_batchsize(zone));
for (i = 0; i < 2; i++) {
INIT_LIST_HEAD(&npageset->pcp[i].list);
list_splice(&zone->pageset[cpu]->pcp[i].list,
&npageset->pcp[i].list);
}
} else {
struct per_cpu_pages *pcp;
unsigned long batch;
batch = zone_batchsize(zone);
pcp = &npageset->pcp[0]; /* hot */
pcp->count = 0;
pcp->low = 2 * batch;
pcp->high = 6 * batch;
pcp->batch = 1 * batch;
INIT_LIST_HEAD(&pcp->list);
pcp = &npageset->pcp[1]; /* cold*/
pcp->count = 0;
pcp->low = 0;
pcp->high = 2 * batch;
pcp->batch = 1 * batch;
INIT_LIST_HEAD(&pcp->list);
}
zone->pageset[cpu] = npageset;
} }
return 0; return 0;
...@@ -1878,30 +1871,13 @@ static void __init free_area_init_core(struct pglist_data *pgdat, ...@@ -1878,30 +1871,13 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
batch = zone_batchsize(zone); batch = zone_batchsize(zone);
for (cpu = 0; cpu < NR_CPUS; cpu++) { for (cpu = 0; cpu < NR_CPUS; cpu++) {
struct per_cpu_pages *pcp;
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
struct per_cpu_pageset *pgset; /* Early boot. Slab allocator not functional yet */
pgset = &pageset_table[nid*MAX_NR_ZONES*NR_CPUS + zone->pageset[cpu] = &boot_pageset[cpu];
(j * NR_CPUS) + cpu]; setup_pageset(&boot_pageset[cpu],0);
zone->pageset[cpu] = pgset;
#else #else
struct per_cpu_pageset *pgset = zone_pcp(zone, cpu); setup_pageset(zone_pcp(zone,cpu), batch);
#endif #endif
pcp = &pgset->pcp[0]; /* hot */
pcp->count = 0;
pcp->low = 2 * batch;
pcp->high = 6 * batch;
pcp->batch = 1 * batch;
INIT_LIST_HEAD(&pcp->list);
pcp = &pgset->pcp[1]; /* cold */
pcp->count = 0;
pcp->low = 0;
pcp->high = 2 * batch;
pcp->batch = 1 * batch;
INIT_LIST_HEAD(&pcp->list);
} }
printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
zone_names[j], realsize, batch); zone_names[j], realsize, batch);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment