Commit b539b87f authored by Tejun Heo's avatar Tejun Heo

percpu: implmeent pcpu_nr_empty_pop_pages and chunk->nr_populated

pcpu_nr_empty_pop_pages counts the number of empty populated pages
across all chunks and chunk->nr_populated counts the number of
populated pages in a chunk.  Both will be used to implement pre/async
population for atomic allocations.

pcpu_chunk_[de]populated() are added to update chunk->populated,
chunk->nr_populated and pcpu_nr_empty_pop_pages together.  All
successful chunk [de]populations should be followed by the
corresponding pcpu_chunk_[de]populated() calls.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 9c824b6a
...@@ -69,7 +69,7 @@ static struct pcpu_chunk *pcpu_create_chunk(void) ...@@ -69,7 +69,7 @@ static struct pcpu_chunk *pcpu_create_chunk(void)
chunk->base_addr = page_address(pages) - pcpu_group_offsets[0]; chunk->base_addr = page_address(pages) - pcpu_group_offsets[0];
spin_lock_irq(&pcpu_lock); spin_lock_irq(&pcpu_lock);
bitmap_fill(chunk->populated, nr_pages); pcpu_chunk_populated(chunk, 0, nr_pages);
spin_unlock_irq(&pcpu_lock); spin_unlock_irq(&pcpu_lock);
return chunk; return chunk;
......
...@@ -113,6 +113,7 @@ struct pcpu_chunk { ...@@ -113,6 +113,7 @@ struct pcpu_chunk {
void *data; /* chunk data */ void *data; /* chunk data */
int first_free; /* no free below this */ int first_free; /* no free below this */
bool immutable; /* no [de]population allowed */ bool immutable; /* no [de]population allowed */
int nr_populated; /* # of populated pages */
unsigned long populated[]; /* populated bitmap */ unsigned long populated[]; /* populated bitmap */
}; };
...@@ -161,6 +162,12 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */ ...@@ -161,6 +162,12 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */
static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
/*
* The number of empty populated pages, protected by pcpu_lock. The
* reserved chunk doesn't contribute to the count.
*/
static int pcpu_nr_empty_pop_pages;
/* reclaim work to release fully free chunks, scheduled from free path */ /* reclaim work to release fully free chunks, scheduled from free path */
static void pcpu_reclaim(struct work_struct *work); static void pcpu_reclaim(struct work_struct *work);
static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim); static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim);
...@@ -295,6 +302,38 @@ static void pcpu_mem_free(void *ptr, size_t size) ...@@ -295,6 +302,38 @@ static void pcpu_mem_free(void *ptr, size_t size)
vfree(ptr); vfree(ptr);
} }
/**
* pcpu_count_occupied_pages - count the number of pages an area occupies
* @chunk: chunk of interest
* @i: index of the area in question
*
* Count the number of pages chunk's @i'th area occupies. When the area's
* start and/or end address isn't aligned to page boundary, the straddled
* page is included in the count iff the rest of the page is free.
*/
static int pcpu_count_occupied_pages(struct pcpu_chunk *chunk, int i)
{
int off = chunk->map[i] & ~1;
int end = chunk->map[i + 1] & ~1;
if (!PAGE_ALIGNED(off) && i > 0) {
int prev = chunk->map[i - 1];
if (!(prev & 1) && prev <= round_down(off, PAGE_SIZE))
off = round_down(off, PAGE_SIZE);
}
if (!PAGE_ALIGNED(end) && i + 1 < chunk->map_used) {
int next = chunk->map[i + 1];
int nend = chunk->map[i + 2] & ~1;
if (!(next & 1) && nend >= round_up(end, PAGE_SIZE))
end = round_up(end, PAGE_SIZE);
}
return max_t(int, PFN_DOWN(end) - PFN_UP(off), 0);
}
/** /**
* pcpu_chunk_relocate - put chunk in the appropriate chunk slot * pcpu_chunk_relocate - put chunk in the appropriate chunk slot
* @chunk: chunk of interest * @chunk: chunk of interest
...@@ -483,6 +522,7 @@ static int pcpu_fit_in_area(struct pcpu_chunk *chunk, int off, int this_size, ...@@ -483,6 +522,7 @@ static int pcpu_fit_in_area(struct pcpu_chunk *chunk, int off, int this_size,
* @size: wanted size in bytes * @size: wanted size in bytes
* @align: wanted align * @align: wanted align
* @pop_only: allocate only from the populated area * @pop_only: allocate only from the populated area
* @occ_pages_p: out param for the number of pages the area occupies
* *
* Try to allocate @size bytes area aligned at @align from @chunk. * Try to allocate @size bytes area aligned at @align from @chunk.
* Note that this function only allocates the offset. It doesn't * Note that this function only allocates the offset. It doesn't
...@@ -498,7 +538,7 @@ static int pcpu_fit_in_area(struct pcpu_chunk *chunk, int off, int this_size, ...@@ -498,7 +538,7 @@ static int pcpu_fit_in_area(struct pcpu_chunk *chunk, int off, int this_size,
* found. * found.
*/ */
static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align,
bool pop_only) bool pop_only, int *occ_pages_p)
{ {
int oslot = pcpu_chunk_slot(chunk); int oslot = pcpu_chunk_slot(chunk);
int max_contig = 0; int max_contig = 0;
...@@ -587,6 +627,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, ...@@ -587,6 +627,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align,
chunk->free_size -= size; chunk->free_size -= size;
*p |= 1; *p |= 1;
*occ_pages_p = pcpu_count_occupied_pages(chunk, i);
pcpu_chunk_relocate(chunk, oslot); pcpu_chunk_relocate(chunk, oslot);
return off; return off;
} }
...@@ -602,6 +643,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, ...@@ -602,6 +643,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align,
* pcpu_free_area - free area to a pcpu_chunk * pcpu_free_area - free area to a pcpu_chunk
* @chunk: chunk of interest * @chunk: chunk of interest
* @freeme: offset of area to free * @freeme: offset of area to free
* @occ_pages_p: out param for the number of pages the area occupies
* *
* Free area starting from @freeme to @chunk. Note that this function * Free area starting from @freeme to @chunk. Note that this function
* only modifies the allocation map. It doesn't depopulate or unmap * only modifies the allocation map. It doesn't depopulate or unmap
...@@ -610,7 +652,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, ...@@ -610,7 +652,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align,
* CONTEXT: * CONTEXT:
* pcpu_lock. * pcpu_lock.
*/ */
static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme,
int *occ_pages_p)
{ {
int oslot = pcpu_chunk_slot(chunk); int oslot = pcpu_chunk_slot(chunk);
int off = 0; int off = 0;
...@@ -641,6 +684,8 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) ...@@ -641,6 +684,8 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
*p = off &= ~1; *p = off &= ~1;
chunk->free_size += (p[1] & ~1) - off; chunk->free_size += (p[1] & ~1) - off;
*occ_pages_p = pcpu_count_occupied_pages(chunk, i);
/* merge with next? */ /* merge with next? */
if (!(p[1] & 1)) if (!(p[1] & 1))
to_free++; to_free++;
...@@ -696,6 +741,50 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) ...@@ -696,6 +741,50 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
pcpu_mem_free(chunk, pcpu_chunk_struct_size); pcpu_mem_free(chunk, pcpu_chunk_struct_size);
} }
/**
* pcpu_chunk_populated - post-population bookkeeping
* @chunk: pcpu_chunk which got populated
* @page_start: the start page
* @page_end: the end page
*
* Pages in [@page_start,@page_end) have been populated to @chunk. Update
* the bookkeeping information accordingly. Must be called after each
* successful population.
*/
static void pcpu_chunk_populated(struct pcpu_chunk *chunk,
int page_start, int page_end)
{
int nr = page_end - page_start;
lockdep_assert_held(&pcpu_lock);
bitmap_set(chunk->populated, page_start, nr);
chunk->nr_populated += nr;
pcpu_nr_empty_pop_pages += nr;
}
/**
* pcpu_chunk_depopulated - post-depopulation bookkeeping
* @chunk: pcpu_chunk which got depopulated
* @page_start: the start page
* @page_end: the end page
*
* Pages in [@page_start,@page_end) have been depopulated from @chunk.
* Update the bookkeeping information accordingly. Must be called after
* each successful depopulation.
*/
static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
int page_start, int page_end)
{
int nr = page_end - page_start;
lockdep_assert_held(&pcpu_lock);
bitmap_clear(chunk->populated, page_start, nr);
chunk->nr_populated -= nr;
pcpu_nr_empty_pop_pages -= nr;
}
/* /*
* Chunk management implementation. * Chunk management implementation.
* *
...@@ -772,6 +861,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, ...@@ -772,6 +861,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
struct pcpu_chunk *chunk; struct pcpu_chunk *chunk;
const char *err; const char *err;
bool is_atomic = !(gfp & GFP_KERNEL); bool is_atomic = !(gfp & GFP_KERNEL);
int occ_pages = 0;
int slot, off, new_alloc, cpu, ret; int slot, off, new_alloc, cpu, ret;
unsigned long flags; unsigned long flags;
void __percpu *ptr; void __percpu *ptr;
...@@ -812,7 +902,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, ...@@ -812,7 +902,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
spin_lock_irqsave(&pcpu_lock, flags); spin_lock_irqsave(&pcpu_lock, flags);
} }
off = pcpu_alloc_area(chunk, size, align, is_atomic); off = pcpu_alloc_area(chunk, size, align, is_atomic,
&occ_pages);
if (off >= 0) if (off >= 0)
goto area_found; goto area_found;
...@@ -845,7 +936,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, ...@@ -845,7 +936,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
goto restart; goto restart;
} }
off = pcpu_alloc_area(chunk, size, align, is_atomic); off = pcpu_alloc_area(chunk, size, align, is_atomic,
&occ_pages);
if (off >= 0) if (off >= 0)
goto area_found; goto area_found;
} }
...@@ -899,17 +991,20 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, ...@@ -899,17 +991,20 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
spin_lock_irqsave(&pcpu_lock, flags); spin_lock_irqsave(&pcpu_lock, flags);
if (ret) { if (ret) {
mutex_unlock(&pcpu_alloc_mutex); mutex_unlock(&pcpu_alloc_mutex);
pcpu_free_area(chunk, off); pcpu_free_area(chunk, off, &occ_pages);
err = "failed to populate"; err = "failed to populate";
goto fail_unlock; goto fail_unlock;
} }
bitmap_set(chunk->populated, rs, re - rs); pcpu_chunk_populated(chunk, rs, re);
spin_unlock_irqrestore(&pcpu_lock, flags); spin_unlock_irqrestore(&pcpu_lock, flags);
} }
mutex_unlock(&pcpu_alloc_mutex); mutex_unlock(&pcpu_alloc_mutex);
} }
if (chunk != pcpu_reserved_chunk)
pcpu_nr_empty_pop_pages -= occ_pages;
/* clear the areas and return address relative to base address */ /* clear the areas and return address relative to base address */
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
...@@ -1019,7 +1114,9 @@ static void pcpu_reclaim(struct work_struct *work) ...@@ -1019,7 +1114,9 @@ static void pcpu_reclaim(struct work_struct *work)
pcpu_for_each_pop_region(chunk, rs, re, 0, pcpu_unit_pages) { pcpu_for_each_pop_region(chunk, rs, re, 0, pcpu_unit_pages) {
pcpu_depopulate_chunk(chunk, rs, re); pcpu_depopulate_chunk(chunk, rs, re);
bitmap_clear(chunk->populated, rs, re - rs); spin_lock_irq(&pcpu_lock);
pcpu_chunk_depopulated(chunk, rs, re);
spin_unlock_irq(&pcpu_lock);
} }
pcpu_destroy_chunk(chunk); pcpu_destroy_chunk(chunk);
} }
...@@ -1041,7 +1138,7 @@ void free_percpu(void __percpu *ptr) ...@@ -1041,7 +1138,7 @@ void free_percpu(void __percpu *ptr)
void *addr; void *addr;
struct pcpu_chunk *chunk; struct pcpu_chunk *chunk;
unsigned long flags; unsigned long flags;
int off; int off, occ_pages;
if (!ptr) if (!ptr)
return; return;
...@@ -1055,7 +1152,10 @@ void free_percpu(void __percpu *ptr) ...@@ -1055,7 +1152,10 @@ void free_percpu(void __percpu *ptr)
chunk = pcpu_chunk_addr_search(addr); chunk = pcpu_chunk_addr_search(addr);
off = addr - chunk->base_addr; off = addr - chunk->base_addr;
pcpu_free_area(chunk, off); pcpu_free_area(chunk, off, &occ_pages);
if (chunk != pcpu_reserved_chunk)
pcpu_nr_empty_pop_pages += occ_pages;
/* if there are more than one fully free chunks, wake up grim reaper */ /* if there are more than one fully free chunks, wake up grim reaper */
if (chunk->free_size == pcpu_unit_size) { if (chunk->free_size == pcpu_unit_size) {
...@@ -1459,6 +1559,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1459,6 +1559,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
schunk->map_alloc = ARRAY_SIZE(smap); schunk->map_alloc = ARRAY_SIZE(smap);
schunk->immutable = true; schunk->immutable = true;
bitmap_fill(schunk->populated, pcpu_unit_pages); bitmap_fill(schunk->populated, pcpu_unit_pages);
schunk->nr_populated = pcpu_unit_pages;
if (ai->reserved_size) { if (ai->reserved_size) {
schunk->free_size = ai->reserved_size; schunk->free_size = ai->reserved_size;
...@@ -1488,6 +1589,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1488,6 +1589,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
dchunk->map_alloc = ARRAY_SIZE(dmap); dchunk->map_alloc = ARRAY_SIZE(dmap);
dchunk->immutable = true; dchunk->immutable = true;
bitmap_fill(dchunk->populated, pcpu_unit_pages); bitmap_fill(dchunk->populated, pcpu_unit_pages);
dchunk->nr_populated = pcpu_unit_pages;
dchunk->contig_hint = dchunk->free_size = dyn_size; dchunk->contig_hint = dchunk->free_size = dyn_size;
dchunk->map[0] = 1; dchunk->map[0] = 1;
...@@ -1498,6 +1600,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1498,6 +1600,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
/* link the first chunk in */ /* link the first chunk in */
pcpu_first_chunk = dchunk ?: schunk; pcpu_first_chunk = dchunk ?: schunk;
pcpu_nr_empty_pop_pages +=
pcpu_count_occupied_pages(pcpu_first_chunk, 1);
pcpu_chunk_relocate(pcpu_first_chunk, -1); pcpu_chunk_relocate(pcpu_first_chunk, -1);
/* we're done */ /* we're done */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment