Commit c550033c authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'core/percpu' into x86/core

parents a98fe7f3 7a46c594
...@@ -64,6 +64,7 @@ SECTIONS ...@@ -64,6 +64,7 @@ SECTIONS
__initramfs_end = .; __initramfs_end = .;
#endif #endif
. = ALIGN(4096); . = ALIGN(4096);
__per_cpu_load = .;
__per_cpu_start = .; __per_cpu_start = .;
*(.data.percpu.page_aligned) *(.data.percpu.page_aligned)
*(.data.percpu) *(.data.percpu)
......
...@@ -213,17 +213,9 @@ SECTIONS ...@@ -213,17 +213,9 @@ SECTIONS
{ *(.data.cacheline_aligned) } { *(.data.cacheline_aligned) }
/* Per-cpu data: */ /* Per-cpu data: */
percpu : { } :percpu
. = ALIGN(PERCPU_PAGE_SIZE); . = ALIGN(PERCPU_PAGE_SIZE);
__phys_per_cpu_start = .; PERCPU_VADDR(PERCPU_ADDR, :percpu)
.data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) __phys_per_cpu_start = __per_cpu_load;
{
__per_cpu_start = .;
*(.data.percpu.page_aligned)
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
}
. = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits
* into percpu page size * into percpu page size
*/ */
......
...@@ -181,14 +181,7 @@ SECTIONS ...@@ -181,14 +181,7 @@ SECTIONS
__initramfs_end = .; __initramfs_end = .;
} }
#endif #endif
. = ALIGN(PAGE_SIZE); PERCPU(PAGE_SIZE)
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
__per_cpu_start = .;
*(.data.percpu.page_aligned)
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
}
. = ALIGN(8); . = ALIGN(8);
.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
......
...@@ -43,14 +43,6 @@ ...@@ -43,14 +43,6 @@
#else /* ...!ASSEMBLY */ #else /* ...!ASSEMBLY */
#include <linux/stringify.h> #include <linux/stringify.h>
#include <asm/sections.h>
#define __addr_to_pcpu_ptr(addr) \
(void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
+ (unsigned long)__per_cpu_start)
#define __pcpu_ptr_to_addr(ptr) \
(void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
- (unsigned long)__per_cpu_start)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
......
...@@ -233,8 +233,8 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) ...@@ -233,8 +233,8 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
"%zu bytes\n", vm.addr, static_size); "%zu bytes\n", vm.addr, static_size);
ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
PMD_SIZE, dyn_size, vm.addr, NULL); PMD_SIZE, vm.addr, NULL);
goto out_free_ar; goto out_free_ar;
enomem: enomem:
...@@ -257,31 +257,13 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) ...@@ -257,31 +257,13 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
* Embedding allocator * Embedding allocator
* *
* The first chunk is sized to just contain the static area plus * The first chunk is sized to just contain the static area plus
* module and dynamic reserves, and allocated as a contiguous area * module and dynamic reserves and embedded into linear physical
* using bootmem allocator and used as-is without being mapped into * mapping so that it can use PMD mapping without additional TLB
* vmalloc area. This enables the first chunk to piggy back on the * pressure.
* linear physical PMD mapping and doesn't add any additional pressure
* to TLB. Note that if the needed size is smaller than the minimum
* unit size, the leftover is returned to the bootmem allocator.
*/ */
static void *pcpue_ptr __initdata;
static size_t pcpue_size __initdata;
static size_t pcpue_unit_size __initdata;
static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
{
size_t off = (size_t)pageno << PAGE_SHIFT;
if (off >= pcpue_size)
return NULL;
return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
}
static ssize_t __init setup_pcpu_embed(size_t static_size) static ssize_t __init setup_pcpu_embed(size_t static_size)
{ {
unsigned int cpu; size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
size_t dyn_size;
/* /*
* If large page isn't supported, there's no benefit in doing * If large page isn't supported, there's no benefit in doing
...@@ -291,33 +273,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) ...@@ -291,33 +273,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
if (!cpu_has_pse || pcpu_need_numa()) if (!cpu_has_pse || pcpu_need_numa())
return -EINVAL; return -EINVAL;
/* allocate and copy */ return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
PERCPU_DYNAMIC_RESERVE);
pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size,
PAGE_SIZE);
if (!pcpue_ptr)
return -ENOMEM;
for_each_possible_cpu(cpu) {
void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
free_bootmem(__pa(ptr + pcpue_size),
pcpue_unit_size - pcpue_size);
memcpy(ptr, __per_cpu_load, static_size);
}
/* we're ready, commit */
pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
return pcpu_setup_first_chunk(pcpue_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE,
pcpue_unit_size, dyn_size,
pcpue_ptr, NULL);
} }
/* /*
...@@ -375,8 +332,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) ...@@ -375,8 +332,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
pcpu4k_nr_static_pages, static_size); pcpu4k_nr_static_pages, static_size);
ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, PERCPU_FIRST_CHUNK_RESERVE, -1,
pcpu4k_populate_pte); -1, NULL, pcpu4k_populate_pte);
goto out_free_ar; goto out_free_ar;
enomem: enomem:
......
...@@ -107,10 +107,14 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); ...@@ -107,10 +107,14 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr);
extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
size_t static_size, size_t reserved_size, size_t static_size, size_t reserved_size,
ssize_t unit_size, ssize_t dyn_size, ssize_t dyn_size, ssize_t unit_size,
void *base_addr, void *base_addr,
pcpu_populate_pte_fn_t populate_pte_fn); pcpu_populate_pte_fn_t populate_pte_fn);
extern ssize_t __init pcpu_embed_first_chunk(
size_t static_size, size_t reserved_size,
ssize_t dyn_size, ssize_t unit_size);
/* /*
* Use this to get to a cpu's version of the per-cpu object * Use this to get to a cpu's version of the per-cpu object
* dynamically allocated. Non-atomic access to the current CPU's * dynamically allocated. Non-atomic access to the current CPU's
......
...@@ -9599,10 +9599,11 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) ...@@ -9599,10 +9599,11 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
cpu = task_cpu(tsk); cpu = task_cpu(tsk);
ca = task_ca(tsk); ca = task_ca(tsk);
for (; ca; ca = ca->parent) { do {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime; *cpuusage += cputime;
} ca = ca->parent;
} while (ca);
} }
struct cgroup_subsys cpuacct_subsys = { struct cgroup_subsys cpuacct_subsys = {
......
...@@ -120,7 +120,7 @@ void *__alloc_percpu(size_t size, size_t align) ...@@ -120,7 +120,7 @@ void *__alloc_percpu(size_t size, size_t align)
* on it. Larger alignment should only be used for module * on it. Larger alignment should only be used for module
* percpu sections on SMP for which this path isn't used. * percpu sections on SMP for which this path isn't used.
*/ */
WARN_ON_ONCE(align > __alignof__(unsigned long long)); WARN_ON_ONCE(align > SMP_CACHE_BYTES);
if (unlikely(!pdata)) if (unlikely(!pdata))
return NULL; return NULL;
......
...@@ -46,7 +46,8 @@ ...@@ -46,7 +46,8 @@
* - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
* *
* - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
* regular address to percpu pointer and back * regular address to percpu pointer and back if they need to be
* different from the default
* *
* - use pcpu_setup_first_chunk() during percpu area initialization to * - use pcpu_setup_first_chunk() during percpu area initialization to
* setup the first chunk containing the kernel static percpu area * setup the first chunk containing the kernel static percpu area
...@@ -67,11 +68,24 @@ ...@@ -67,11 +68,24 @@
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/sections.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
#ifndef __addr_to_pcpu_ptr
#define __addr_to_pcpu_ptr(addr) \
(void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
+ (unsigned long)__per_cpu_start)
#endif
#ifndef __pcpu_ptr_to_addr
#define __pcpu_ptr_to_addr(ptr) \
(void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
- (unsigned long)__per_cpu_start)
#endif
struct pcpu_chunk { struct pcpu_chunk {
struct list_head list; /* linked to pcpu_slot lists */ struct list_head list; /* linked to pcpu_slot lists */
struct rb_node rb_node; /* key is chunk->vm->addr */ struct rb_node rb_node; /* key is chunk->vm->addr */
...@@ -1013,8 +1027,8 @@ EXPORT_SYMBOL_GPL(free_percpu); ...@@ -1013,8 +1027,8 @@ EXPORT_SYMBOL_GPL(free_percpu);
* @get_page_fn: callback to fetch page pointer * @get_page_fn: callback to fetch page pointer
* @static_size: the size of static percpu area in bytes * @static_size: the size of static percpu area in bytes
* @reserved_size: the size of reserved percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes
* @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
* @dyn_size: free size for dynamic allocation in bytes, -1 for auto * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
* @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
* @base_addr: mapped address, NULL for auto * @base_addr: mapped address, NULL for auto
* @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
* *
...@@ -1039,14 +1053,14 @@ EXPORT_SYMBOL_GPL(free_percpu); ...@@ -1039,14 +1053,14 @@ EXPORT_SYMBOL_GPL(free_percpu);
* limited offset range for symbol relocations to guarantee module * limited offset range for symbol relocations to guarantee module
* percpu symbols fall inside the relocatable range. * percpu symbols fall inside the relocatable range.
* *
* @dyn_size, if non-negative, determines the number of bytes
* available for dynamic allocation in the first chunk. Specifying
* non-negative value makes percpu leave alone the area beyond
* @static_size + @reserved_size + @dyn_size.
*
* @unit_size, if non-negative, specifies unit size and must be * @unit_size, if non-negative, specifies unit size and must be
* aligned to PAGE_SIZE and equal to or larger than @static_size + * aligned to PAGE_SIZE and equal to or larger than @static_size +
* @reserved_size + @dyn_size. * @reserved_size + if non-negative, @dyn_size.
*
* @dyn_size, if non-negative, limits the number of bytes available
* for dynamic allocation in the first chunk. Specifying non-negative
* value make percpu leave alone the area beyond @static_size +
* @reserved_size + @dyn_size.
* *
* Non-null @base_addr means that the caller already allocated virtual * Non-null @base_addr means that the caller already allocated virtual
* region for the first chunk and mapped it. percpu must not mess * region for the first chunk and mapped it. percpu must not mess
...@@ -1069,12 +1083,14 @@ EXPORT_SYMBOL_GPL(free_percpu); ...@@ -1069,12 +1083,14 @@ EXPORT_SYMBOL_GPL(free_percpu);
*/ */
size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
size_t static_size, size_t reserved_size, size_t static_size, size_t reserved_size,
ssize_t unit_size, ssize_t dyn_size, ssize_t dyn_size, ssize_t unit_size,
void *base_addr, void *base_addr,
pcpu_populate_pte_fn_t populate_pte_fn) pcpu_populate_pte_fn_t populate_pte_fn)
{ {
static struct vm_struct first_vm; static struct vm_struct first_vm;
static int smap[2], dmap[2]; static int smap[2], dmap[2];
size_t size_sum = static_size + reserved_size +
(dyn_size >= 0 ? dyn_size : 0);
struct pcpu_chunk *schunk, *dchunk = NULL; struct pcpu_chunk *schunk, *dchunk = NULL;
unsigned int cpu; unsigned int cpu;
int nr_pages; int nr_pages;
...@@ -1085,20 +1101,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, ...@@ -1085,20 +1101,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
BUG_ON(!static_size); BUG_ON(!static_size);
if (unit_size >= 0) { if (unit_size >= 0) {
BUG_ON(unit_size < static_size + reserved_size + BUG_ON(unit_size < size_sum);
(dyn_size >= 0 ? dyn_size : 0));
BUG_ON(unit_size & ~PAGE_MASK); BUG_ON(unit_size & ~PAGE_MASK);
} else { BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
BUG_ON(dyn_size >= 0); } else
BUG_ON(base_addr); BUG_ON(base_addr);
}
BUG_ON(base_addr && populate_pte_fn); BUG_ON(base_addr && populate_pte_fn);
if (unit_size >= 0) if (unit_size >= 0)
pcpu_unit_pages = unit_size >> PAGE_SHIFT; pcpu_unit_pages = unit_size >> PAGE_SHIFT;
else else
pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
PFN_UP(static_size + reserved_size)); PFN_UP(size_sum));
pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
...@@ -1224,3 +1238,89 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, ...@@ -1224,3 +1238,89 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
return pcpu_unit_size; return pcpu_unit_size;
} }
/*
* Embedding first chunk setup helper.
*/
static void *pcpue_ptr __initdata;
static size_t pcpue_size __initdata;
static size_t pcpue_unit_size __initdata;
static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
{
size_t off = (size_t)pageno << PAGE_SHIFT;
if (off >= pcpue_size)
return NULL;
return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
}
/**
* pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
* @static_size: the size of static percpu area in bytes
* @reserved_size: the size of reserved percpu area in bytes
* @dyn_size: free size for dynamic allocation in bytes, -1 for auto
* @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
*
* This is a helper to ease setting up embedded first percpu chunk and
* can be called where pcpu_setup_first_chunk() is expected.
*
* If this function is used to setup the first chunk, it is allocated
* as a contiguous area using bootmem allocator and used as-is without
* being mapped into vmalloc area. This enables the first chunk to
* piggy back on the linear physical mapping which often uses larger
* page size.
*
* When @dyn_size is positive, dynamic area might be larger than
* specified to fill page alignment. Also, when @dyn_size is auto,
* @dyn_size does not fill the whole first chunk but only what's
* necessary for page alignment after static and reserved areas.
*
* If the needed size is smaller than the minimum or specified unit
* size, the leftover is returned to the bootmem allocator.
*
* RETURNS:
* The determined pcpu_unit_size which can be used to initialize
* percpu access on success, -errno on failure.
*/
ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
ssize_t dyn_size, ssize_t unit_size)
{
unsigned int cpu;
/* determine parameters and allocate */
pcpue_size = PFN_ALIGN(static_size + reserved_size +
(dyn_size >= 0 ? dyn_size : 0));
if (dyn_size != 0)
dyn_size = pcpue_size - static_size - reserved_size;
if (unit_size >= 0) {
BUG_ON(unit_size < pcpue_size);
pcpue_unit_size = unit_size;
} else
pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
pcpue_ptr = __alloc_bootmem_nopanic(
num_possible_cpus() * pcpue_unit_size,
PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
if (!pcpue_ptr)
return -ENOMEM;
/* return the leftover and copy */
for_each_possible_cpu(cpu) {
void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
free_bootmem(__pa(ptr + pcpue_size),
pcpue_unit_size - pcpue_size);
memcpy(ptr, __per_cpu_load, static_size);
}
/* we're ready, commit */
pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
return pcpu_setup_first_chunk(pcpue_get_page, static_size,
reserved_size, dyn_size,
pcpue_unit_size, pcpue_ptr, NULL);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment