Commit 69fba2dd authored by Kamezawa Hiroyuki's avatar Kamezawa Hiroyuki Committed by Linus Torvalds

[PATCH] no buddy bitmap patch revisit: for mm/page_alloc.c

This patch removes bitmaps from page allocator in mm/page_alloc.c.

This buddy system uses page->private field to record free page's order
instead of using bitmaps.

The algorithm of the buddy system is unchanged. Only bitmaps are removed.

In this buddy system, 2 pages,a page and "buddy", can be coalesced when

(buddy->private & PG_private) &&
(page_order(page)) == (page_order(buddy)) &&
!PageReserved(buddy) &&
page_count(buddy) == 0

this also means "buddy" is a head of continuous free pages
of length of (1 << page_order(buddy)).

bad_range() is called from inner loop of __free_pages_bulk().
In many archs, bad_range() is only a sanity check, it will always return 0.
But if a zone's memmap has a hole, it sometimes returns 1.
An architecture with memory holes in a zone has to define CONFIG_HOLES_IN_ZONE.
When CONFIG_HOLES_IN_ZONE is defined, pfn_valid() is called for checking
whether a buddy pages is valid or not.
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 6951e82f
...@@ -71,6 +71,10 @@ static int bad_range(struct zone *zone, struct page *page) ...@@ -71,6 +71,10 @@ static int bad_range(struct zone *zone, struct page *page)
return 1; return 1;
if (page_to_pfn(page) < zone->zone_start_pfn) if (page_to_pfn(page) < zone->zone_start_pfn)
return 1; return 1;
#ifdef CONFIG_HOLES_IN_ZONE
if (!pfn_valid(page_to_pfn(page)))
return 1;
#endif
if (zone != page_zone(page)) if (zone != page_zone(page))
return 1; return 1;
return 0; return 0;
...@@ -158,6 +162,45 @@ static void destroy_compound_page(struct page *page, unsigned long order) ...@@ -158,6 +162,45 @@ static void destroy_compound_page(struct page *page, unsigned long order)
} }
#endif /* CONFIG_HUGETLB_PAGE */ #endif /* CONFIG_HUGETLB_PAGE */
/*
* function for dealing with page's order in buddy system.
* zone->lock is already acquired when we use these.
* So, we don't need atomic page->flags operations here.
*/
static inline unsigned long page_order(struct page *page) {
return page->private;
}
static inline void set_page_order(struct page *page, int order) {
page->private = order;
__SetPagePrivate(page);
}
static inline void rmv_page_order(struct page *page)
{
__ClearPagePrivate(page);
page->private = 0;
}
/*
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (a) the buddy is free &&
* (b) the buddy is on the buddy system &&
* (c) a page and its buddy have the same order.
* for recording page's order, we use page->private and PG_private.
*
*/
static inline int page_is_buddy(struct page *page, int order)
{
if (PagePrivate(page) &&
(page_order(page) == order) &&
!PageReserved(page) &&
page_count(page) == 0)
return 1;
return 0;
}
/* /*
* Freeing function for a buddy system allocator. * Freeing function for a buddy system allocator.
* *
...@@ -170,9 +213,10 @@ static void destroy_compound_page(struct page *page, unsigned long order) ...@@ -170,9 +213,10 @@ static void destroy_compound_page(struct page *page, unsigned long order)
* at the bottom level available, and propagating the changes upward * at the bottom level available, and propagating the changes upward
* as necessary, plus some accounting needed to play nicely with other * as necessary, plus some accounting needed to play nicely with other
* parts of the VM system. * parts of the VM system.
* At each level, we keep one bit for each pair of blocks, which * At each level, we keep a list of pages, which are heads of continuous
* is set to 1 iff only one of the pair is allocated. So when we * free pages of length of (1 << order) and marked with PG_Private.Page's
* are allocating or freeing one, we can derive the state of the * order is recorded in page->private field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were * other. That is, if we allocate a small block, and both were
* free, the remainder of the region must be split into blocks. * free, the remainder of the region must be split into blocks.
* If a block is freed, and its buddy is also free, then this * If a block is freed, and its buddy is also free, then this
...@@ -182,44 +226,44 @@ static void destroy_compound_page(struct page *page, unsigned long order) ...@@ -182,44 +226,44 @@ static void destroy_compound_page(struct page *page, unsigned long order)
*/ */
static inline void __free_pages_bulk (struct page *page, struct page *base, static inline void __free_pages_bulk (struct page *page, struct page *base,
struct zone *zone, struct free_area *area, unsigned int order) struct zone *zone, unsigned int order)
{ {
unsigned long page_idx, index, mask; unsigned long page_idx;
struct page *coalesced;
int order_size = 1 << order;
if (order) if (unlikely(order))
destroy_compound_page(page, order); destroy_compound_page(page, order);
mask = (~0UL) << order;
page_idx = page - base; page_idx = page - base;
if (page_idx & ~mask)
BUG();
index = page_idx >> (1 + order);
zone->free_pages += 1 << order; BUG_ON(page_idx & (order_size - 1));
BUG_ON(bad_range(zone, page));
zone->free_pages += order_size;
while (order < MAX_ORDER-1) { while (order < MAX_ORDER-1) {
struct page *buddy1, *buddy2; struct free_area *area;
struct page *buddy;
int buddy_idx;
BUG_ON(area >= zone->free_area + MAX_ORDER); buddy_idx = (page_idx ^ (1 << order));
if (!__test_and_change_bit(index, area->map)) buddy = base + buddy_idx;
/* if (bad_range(zone, buddy))
* the buddy page is still allocated. break;
*/ if (!page_is_buddy(buddy, order))
break; break;
/* Move the buddy up one level. */ /* Move the buddy up one level. */
buddy1 = base + (page_idx ^ (1 << order)); list_del(&buddy->lru);
buddy2 = base + page_idx; area = zone->free_area + order;
BUG_ON(bad_range(zone, buddy1));
BUG_ON(bad_range(zone, buddy2));
list_del(&buddy1->lru);
area->nr_free--; area->nr_free--;
mask <<= 1; rmv_page_order(buddy);
page_idx &= buddy_idx;
order++; order++;
area++;
index >>= 1;
page_idx &= mask;
} }
list_add(&(base + page_idx)->lru, &area->free_list); coalesced = base + page_idx;
area->nr_free++; set_page_order(coalesced, order);
list_add(&coalesced->lru, &zone->free_area[order].free_list);
zone->free_area[order].nr_free++;
} }
static inline void free_pages_check(const char *function, struct page *page) static inline void free_pages_check(const char *function, struct page *page)
...@@ -257,12 +301,10 @@ free_pages_bulk(struct zone *zone, int count, ...@@ -257,12 +301,10 @@ free_pages_bulk(struct zone *zone, int count,
struct list_head *list, unsigned int order) struct list_head *list, unsigned int order)
{ {
unsigned long flags; unsigned long flags;
struct free_area *area;
struct page *base, *page = NULL; struct page *base, *page = NULL;
int ret = 0; int ret = 0;
base = zone->zone_mem_map; base = zone->zone_mem_map;
area = zone->free_area + order;
spin_lock_irqsave(&zone->lock, flags); spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0; zone->all_unreclaimable = 0;
zone->pages_scanned = 0; zone->pages_scanned = 0;
...@@ -270,7 +312,7 @@ free_pages_bulk(struct zone *zone, int count, ...@@ -270,7 +312,7 @@ free_pages_bulk(struct zone *zone, int count,
page = list_entry(list->prev, struct page, lru); page = list_entry(list->prev, struct page, lru);
/* have to delete it as __free_pages_bulk list manipulates */ /* have to delete it as __free_pages_bulk list manipulates */
list_del(&page->lru); list_del(&page->lru);
__free_pages_bulk(page, base, zone, area, order); __free_pages_bulk(page, base, zone, order);
ret++; ret++;
} }
spin_unlock_irqrestore(&zone->lock, flags); spin_unlock_irqrestore(&zone->lock, flags);
...@@ -299,8 +341,6 @@ void __free_pages_ok(struct page *page, unsigned int order) ...@@ -299,8 +341,6 @@ void __free_pages_ok(struct page *page, unsigned int order)
free_pages_bulk(page_zone(page), 1, &list, order); free_pages_bulk(page_zone(page), 1, &list, order);
} }
#define MARK_USED(index, order, area) \
__change_bit((index) >> (1+(order)), (area)->map)
/* /*
* The order of subdivision here is critical for the IO subsystem. * The order of subdivision here is critical for the IO subsystem.
...@@ -318,7 +358,7 @@ void __free_pages_ok(struct page *page, unsigned int order) ...@@ -318,7 +358,7 @@ void __free_pages_ok(struct page *page, unsigned int order)
*/ */
static inline struct page * static inline struct page *
expand(struct zone *zone, struct page *page, expand(struct zone *zone, struct page *page,
unsigned long index, int low, int high, struct free_area *area) int low, int high, struct free_area *area)
{ {
unsigned long size = 1 << high; unsigned long size = 1 << high;
...@@ -329,7 +369,7 @@ expand(struct zone *zone, struct page *page, ...@@ -329,7 +369,7 @@ expand(struct zone *zone, struct page *page,
BUG_ON(bad_range(zone, &page[size])); BUG_ON(bad_range(zone, &page[size]));
list_add(&page[size].lru, &area->free_list); list_add(&page[size].lru, &area->free_list);
area->nr_free++; area->nr_free++;
MARK_USED(index + size, high, area); set_page_order(&page[size], high);
} }
return page; return page;
} }
...@@ -384,7 +424,6 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) ...@@ -384,7 +424,6 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
struct free_area * area; struct free_area * area;
unsigned int current_order; unsigned int current_order;
struct page *page; struct page *page;
unsigned int index;
for (current_order = order; current_order < MAX_ORDER; ++current_order) { for (current_order = order; current_order < MAX_ORDER; ++current_order) {
area = zone->free_area + current_order; area = zone->free_area + current_order;
...@@ -393,12 +432,10 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) ...@@ -393,12 +432,10 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
page = list_entry(area->free_list.next, struct page, lru); page = list_entry(area->free_list.next, struct page, lru);
list_del(&page->lru); list_del(&page->lru);
rmv_page_order(page);
area->nr_free--; area->nr_free--;
index = page - zone->zone_mem_map;
if (current_order != MAX_ORDER-1)
MARK_USED(index, current_order, area);
zone->free_pages -= 1UL << order; zone->free_pages -= 1UL << order;
return expand(zone, page, index, order, current_order, area); return expand(zone, page, order, current_order, area);
} }
return NULL; return NULL;
...@@ -1473,49 +1510,12 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, ...@@ -1473,49 +1510,12 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
} }
} }
/* void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
* Page buddy system uses "index >> (i+1)", where "index" is unsigned long size)
* at most "size-1".
*
* The extra "+3" is to round down to byte size (8 bits per byte
* assumption). Thus we get "(size-1) >> (i+4)" as the last byte
* we can access.
*
* The "+1" is because we want to round the byte allocation up
* rather than down. So we should have had a "+7" before we shifted
* down by three. Also, we have to add one as we actually _use_ the
* last bit (it's [0,n] inclusive, not [0,n[).
*
* So we actually had +7+1 before we shift down by 3. But
* (n+8) >> 3 == (n >> 3) + 1 (modulo overflows, which we do not have).
*
* Finally, we LONG_ALIGN because all bitmap operations are on longs.
*/
unsigned long pages_to_bitmap_size(unsigned long order, unsigned long nr_pages)
{
unsigned long bitmap_size;
bitmap_size = (nr_pages-1) >> (order+4);
bitmap_size = LONG_ALIGN(bitmap_size+1);
return bitmap_size;
}
void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, unsigned long size)
{ {
int order; int order;
for (order = 0; ; order++) { for (order = 0; order < MAX_ORDER ; order++) {
unsigned long bitmap_size;
INIT_LIST_HEAD(&zone->free_area[order].free_list); INIT_LIST_HEAD(&zone->free_area[order].free_list);
if (order == MAX_ORDER-1) {
zone->free_area[order].map = NULL;
break;
}
bitmap_size = pages_to_bitmap_size(order, size);
zone->free_area[order].map =
(unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
zone->free_area[order].nr_free = 0; zone->free_area[order].nr_free = 0;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment