Commit 8f5d36ed authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu

parents 5cdc5e9e 6b7c38d5
#ifndef _ASM_X86_PDA_H
#define _ASM_X86_PDA_H
#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <asm/page.h>
#include <asm/percpu.h>
/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
unsigned long unused1;
unsigned long unused2;
unsigned long unused3;
unsigned long unused4;
int unused5;
unsigned int unused6; /* 36 was cpunumber */
unsigned long stack_canary; /* 40 stack canary value */
/* gcc-ABI: this canary MUST be at
offset 40!!! */
short in_bootmem; /* pda lives in bootmem */
} ____cacheline_aligned_in_smp;
DECLARE_PER_CPU(struct x8664_pda, __pda);
extern void pda_init(int);
#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
#define read_pda(field) percpu_read(__pda.field)
#define write_pda(field, val) percpu_write(__pda.field, val)
#define add_pda(field, val) percpu_add(__pda.field, val)
#define sub_pda(field, val) percpu_sub(__pda.field, val)
#define or_pda(field, val) percpu_or(__pda.field, val)
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
#define test_and_clear_bit_pda(bit, field) \
x86_test_and_clear_bit_percpu(bit, __pda.field)
#endif
#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary)
#endif /* _ASM_X86_PDA_H */
...@@ -133,12 +133,6 @@ do { \ ...@@ -133,12 +133,6 @@ do { \
/* We can use this directly for local CPU (faster). */ /* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off); DECLARE_PER_CPU(unsigned long, this_cpu_off);
#ifdef CONFIG_X86_64
extern void load_pda_offset(int cpu);
#else
static inline void load_pda_offset(int cpu) { }
#endif
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <asm/pda.h>
extern pud_t level3_kernel_pgt[512]; extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512]; extern pud_t level3_ident_pgt[512];
......
...@@ -379,8 +379,29 @@ union thread_xstate { ...@@ -379,8 +379,29 @@ union thread_xstate {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist); DECLARE_PER_CPU(struct orig_ist, orig_ist);
DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); union irq_stack_union {
char irq_stack[IRQ_STACK_SIZE];
/*
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*/
struct {
char gs_base[40];
unsigned long stack_canary;
};
};
DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(char *, irq_stack_ptr);
static inline void load_gs_base(int cpu)
{
/* Memory clobbers used to order pda/percpu accesses */
mb();
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
mb();
}
#endif #endif
extern void print_cpu_info(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *);
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
# include <asm/io_apic.h> # include <asm/io_apic.h>
# endif # endif
#endif #endif
#include <asm/pda.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/cpumask.h> #include <asm/cpumask.h>
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#define _ASM_STACKPROTECTOR_H 1 #define _ASM_STACKPROTECTOR_H 1
#include <asm/tsc.h> #include <asm/tsc.h>
#include <asm/pda.h> #include <asm/processor.h>
/* /*
* Initialize the stackprotector canary value. * Initialize the stackprotector canary value.
...@@ -16,13 +16,12 @@ static __always_inline void boot_init_stack_canary(void) ...@@ -16,13 +16,12 @@ static __always_inline void boot_init_stack_canary(void)
u64 tsc; u64 tsc;
/* /*
* If we're the non-boot CPU, nothing set the PDA stack * Build time only check to make sure the stack_canary is at
* canary up for us - and if we are the boot CPU we have * offset 40 in the pda; this is a gcc ABI requirement
* a 0 stack canary. This is a good place for updating */
* it, as we wont ever return from this function (so the BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
* invalid canaries already on the stack wont ever
* trigger). /*
*
* We both use the random pool and the current TSC as a source * We both use the random pool and the current TSC as a source
* of randomness. The TSC only matters for very early init, * of randomness. The TSC only matters for very early init,
* there it already has some randomness on most systems. Later * there it already has some randomness on most systems. Later
...@@ -33,7 +32,7 @@ static __always_inline void boot_init_stack_canary(void) ...@@ -33,7 +32,7 @@ static __always_inline void boot_init_stack_canary(void)
canary += tsc + (tsc << 32UL); canary += tsc + (tsc << 32UL);
current->stack_canary = canary; current->stack_canary = canary;
write_pda(stack_canary, canary); percpu_write(irq_stack_union.stack_canary, canary);
} }
#endif #endif
...@@ -86,17 +86,28 @@ do { \ ...@@ -86,17 +86,28 @@ do { \
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
"r12", "r13", "r14", "r15" "r12", "r13", "r14", "r15"
#ifdef CONFIG_CC_STACKPROTECTOR
#define __switch_canary \
"movq %P[task_canary](%%rsi),%%r8\n\t" \
"movq %%r8,%%gs:%P[gs_canary]\n\t"
#define __switch_canary_param \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \
, [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary))
#else /* CC_STACKPROTECTOR */
#define __switch_canary
#define __switch_canary_param
#endif /* CC_STACKPROTECTOR */
/* Save restore flags to clear handle leaking NT */ /* Save restore flags to clear handle leaking NT */
#define switch_to(prev, next, last) \ #define switch_to(prev, next, last) \
asm volatile(SAVE_CONTEXT \ asm volatile(SAVE_CONTEXT \
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
"call __switch_to\n\t" \ "call __switch_to\n\t" \
".globl thread_return\n" \ ".globl thread_return\n" \
"thread_return:\n\t" \ "thread_return:\n\t" \
"movq "__percpu_arg([current_task])",%%rsi\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \
"movq %P[task_canary](%%rsi),%%r8\n\t" \ __switch_canary \
"movq %%r8,%%gs:%P[pda_canary]\n\t" \
"movq %P[thread_info](%%rsi),%%r8\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \
LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \ "movq %%rax,%%rdi\n\t" \
...@@ -108,9 +119,8 @@ do { \ ...@@ -108,9 +119,8 @@ do { \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \
[tif_fork] "i" (TIF_FORK), \ [tif_fork] "i" (TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \
[task_canary] "i" (offsetof(struct task_struct, stack_canary)),\ [current_task] "m" (per_cpu_var(current_task)) \
[current_task] "m" (per_cpu_var(current_task)), \ __switch_canary_param \
[pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\
: "memory", "cc" __EXTRA_CLOBBER) : "memory", "cc" __EXTRA_CLOBBER)
#endif #endif
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/kbuild.h> #include <linux/kbuild.h>
#include <asm/pda.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/segment.h> #include <asm/segment.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
...@@ -48,10 +47,6 @@ int main(void) ...@@ -48,10 +47,6 @@ int main(void)
#endif #endif
BLANK(); BLANK();
#undef ENTRY #undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
DEFINE(pda_size, sizeof(struct x8664_pda));
BLANK();
#undef ENTRY
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
BLANK(); BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
......
...@@ -30,7 +30,6 @@ ...@@ -30,7 +30,6 @@
#include <asm/genapic.h> #include <asm/genapic.h>
#endif #endif
#include <asm/pda.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/desc.h> #include <asm/desc.h>
...@@ -881,12 +880,13 @@ __setup("clearcpuid=", setup_disablecpuid); ...@@ -881,12 +880,13 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
#else #else
DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(char *, irq_stack_ptr) =
per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
#endif #endif
DEFINE_PER_CPU(unsigned long, kernel_stack) = DEFINE_PER_CPU(unsigned long, kernel_stack) =
...@@ -895,15 +895,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack); ...@@ -895,15 +895,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(unsigned int, irq_count) = -1; DEFINE_PER_CPU(unsigned int, irq_count) = -1;
void __cpuinit pda_init(int cpu)
{
/* Setup up data that may be needed in __get_free_pages early */
loadsegment(fs, 0);
loadsegment(gs, 0);
load_pda_offset(cpu);
}
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
__aligned(PAGE_SIZE); __aligned(PAGE_SIZE);
...@@ -967,9 +958,9 @@ void __cpuinit cpu_init(void) ...@@ -967,9 +958,9 @@ void __cpuinit cpu_init(void)
struct task_struct *me; struct task_struct *me;
int i; int i;
/* CPU 0 is initialised in head64.c */ loadsegment(fs, 0);
if (cpu != 0) loadsegment(gs, 0);
pda_init(cpu); load_gs_base(cpu);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 && if (cpu != 0 && percpu_read(node_number) == 0 &&
......
...@@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) ...@@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
if (console_loglevel == 10) if (console_loglevel == 10)
early_printk("Kernel alive\n"); early_printk("Kernel alive\n");
pda_init(0);
x86_64_start_reservations(real_mode_data); x86_64_start_reservations(real_mode_data);
} }
......
...@@ -207,19 +207,15 @@ ENTRY(secondary_startup_64) ...@@ -207,19 +207,15 @@ ENTRY(secondary_startup_64)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* early_gdt_base should point to the gdt_page in static percpu init * Fix up static pointers that need __per_cpu_load added. The assembler
* data area. Computing this requires two symbols - __per_cpu_load * is unable to do this directly. This is only needed for the boot cpu.
* and per_cpu__gdt_page. As linker can't do no such relocation, do * These values are set up with the correct base addresses by C code for
* it by hand. As early_gdt_descr is manipulated by C code for * secondary cpus.
* secondary CPUs, this should be done only once for the boot CPU
* when early_gdt_descr_base contains zero.
*/ */
movq early_gdt_descr_base(%rip), %rax movq initial_gs(%rip), %rax
testq %rax, %rax cmpl $0, per_cpu__cpu_number(%rax)
jnz 1f jne 1f
movq $__per_cpu_load, %rax addq %rax, early_gdt_descr_base(%rip)
addq $per_cpu__gdt_page, %rax
movq %rax, early_gdt_descr_base(%rip)
1: 1:
#endif #endif
/* /*
...@@ -246,13 +242,10 @@ ENTRY(secondary_startup_64) ...@@ -246,13 +242,10 @@ ENTRY(secondary_startup_64)
/* Set up %gs. /* Set up %gs.
* *
* On SMP, %gs should point to the per-cpu area. For initial * The base of %gs always points to the bottom of the irqstack
* boot, make %gs point to the init data section. For a * union. If the stack protector canary is enabled, it is
* secondary CPU,initial_gs should be set to its pda address * located at %gs:40. Note that, on SMP, the boot cpu uses
* before the CPU runs this code. * init data section till per cpu areas are set up.
*
* On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
* change.
*/ */
movl $MSR_GS_BASE,%ecx movl $MSR_GS_BASE,%ecx
movq initial_gs(%rip),%rax movq initial_gs(%rip),%rax
...@@ -285,7 +278,7 @@ ENTRY(secondary_startup_64) ...@@ -285,7 +278,7 @@ ENTRY(secondary_startup_64)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
.quad __per_cpu_load .quad __per_cpu_load
#else #else
.quad PER_CPU_VAR(__pda) .quad PER_CPU_VAR(irq_stack_union)
#endif #endif
__FINITDATA __FINITDATA
...@@ -431,12 +424,8 @@ NEXT_PAGE(level2_spare_pgt) ...@@ -431,12 +424,8 @@ NEXT_PAGE(level2_spare_pgt)
.globl early_gdt_descr .globl early_gdt_descr
early_gdt_descr: early_gdt_descr:
.word GDT_ENTRIES*8-1 .word GDT_ENTRIES*8-1
#ifdef CONFIG_SMP
early_gdt_descr_base: early_gdt_descr_base:
.quad 0x0000000000000000
#else
.quad per_cpu__gdt_page .quad per_cpu__gdt_page
#endif
ENTRY(phys_base) ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */ /* This must match the first entry in level2_kernel_pgt */
......
...@@ -47,7 +47,6 @@ ...@@ -47,7 +47,6 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pda.h>
#include <asm/prctl.h> #include <asm/prctl.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/proto.h> #include <asm/proto.h>
...@@ -638,13 +637,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -638,13 +637,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
percpu_write(kernel_stack, percpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) + (unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET); THREAD_SIZE - KERNEL_STACK_OFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR
/*
* Build time only check to make sure the stack_canary is at
* offset 40 in the pda; this is a gcc ABI requirement
*/
BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
#endif
/* /*
* Now maybe reload the debug registers and handle I/O bitmaps * Now maybe reload the debug registers and handle I/O bitmaps
......
...@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void); ...@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void);
static inline void setup_node_to_cpumask_map(void) { } static inline void setup_node_to_cpumask_map(void) { }
#endif #endif
/*
* Define load_pda_offset() and per-cpu __pda for x86_64.
* load_pda_offset() is responsible for loading the offset of pda into
* %gs.
*
* On SMP, pda offset also duals as percpu base address and thus it
* should be at the start of per-cpu area. To achieve this, it's
* preallocated in vmlinux_64.lds.S directly instead of using
* DEFINE_PER_CPU().
*/
#ifdef CONFIG_X86_64
void __cpuinit load_pda_offset(int cpu)
{
/* Memory clobbers used to order pda/percpu accesses */
mb();
wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
mb();
}
#ifndef CONFIG_SMP
DEFINE_PER_CPU(struct x8664_pda, __pda);
#endif
EXPORT_PER_CPU_SYMBOL(__pda);
#endif /* CONFIG_SMP && CONFIG_X86_64 */
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* correctly size the local cpu masks */ /* correctly size the local cpu masks */
...@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void) ...@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
per_cpu(cpu_number, cpu) = cpu; per_cpu(cpu_number, cpu) = cpu;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_ptr, cpu) =
(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
/* /*
* CPU0 modified pda in the init data area, reload pda * Up to this point, CPU0 has been using .data.init
* offset for CPU0 and clear the area for others. * area. Reload %gs offset for CPU0.
*/ */
if (cpu == 0) if (cpu == 0)
load_pda_offset(0); load_gs_base(cpu);
else
memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
#endif #endif
DBG("PERCPU: cpu %4d %p\n", cpu, ptr); DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
......
...@@ -59,7 +59,6 @@ ...@@ -59,7 +59,6 @@
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/pda.h>
#else #else
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/arch_hooks.h> #include <asm/arch_hooks.h>
......
...@@ -220,8 +220,7 @@ SECTIONS ...@@ -220,8 +220,7 @@ SECTIONS
* so that it can be accessed as a percpu variable. * so that it can be accessed as a percpu variable.
*/ */
. = ALIGN(PAGE_SIZE); . = ALIGN(PAGE_SIZE);
PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) PERCPU_VADDR(0, :percpu)
per_cpu____pda = __per_cpu_start;
#else #else
PERCPU(PAGE_SIZE) PERCPU(PAGE_SIZE)
#endif #endif
...@@ -262,3 +261,8 @@ SECTIONS ...@@ -262,3 +261,8 @@ SECTIONS
*/ */
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE") "kernel image bigger than KERNEL_IMAGE_SIZE")
#ifdef CONFIG_SMP
ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
...@@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void) ...@@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* Disable until direct per-cpu data access. */ /* Disable until direct per-cpu data access. */
have_vcpu_info_placement = 0; have_vcpu_info_placement = 0;
pda_init(0);
#endif #endif
xen_smp_init(); xen_smp_init();
......
...@@ -430,22 +430,10 @@ ...@@ -430,22 +430,10 @@
*(.initcall7.init) \ *(.initcall7.init) \
*(.initcall7s.init) *(.initcall7s.init)
#define PERCPU_PROLOG(vaddr) \
VMLINUX_SYMBOL(__per_cpu_load) = .; \
.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
- LOAD_OFFSET) { \
VMLINUX_SYMBOL(__per_cpu_start) = .;
#define PERCPU_EPILOG(phdr) \
VMLINUX_SYMBOL(__per_cpu_end) = .; \
} phdr \
. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
/** /**
* PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc * PERCPU_VADDR - define output section for percpu area
* @vaddr: explicit base address (optional) * @vaddr: explicit base address (optional)
* @phdr: destination PHDR (optional) * @phdr: destination PHDR (optional)
* @prealloc: the size of prealloc area
* *
* Macro which expands to output section for percpu area. If @vaddr * Macro which expands to output section for percpu area. If @vaddr
* is not blank, it specifies explicit base address and all percpu * is not blank, it specifies explicit base address and all percpu
...@@ -457,39 +445,23 @@ ...@@ -457,39 +445,23 @@
* section in the linker script will go there too. @phdr should have * section in the linker script will go there too. @phdr should have
* a leading colon. * a leading colon.
* *
* If @prealloc is non-zero, the specified number of bytes will be
* reserved at the start of percpu area. As the prealloc area is
* likely to break alignment, this macro puts areas in increasing
* alignment order.
*
* This macro defines three symbols, __per_cpu_load, __per_cpu_start * This macro defines three symbols, __per_cpu_load, __per_cpu_start
* and __per_cpu_end. The first one is the vaddr of loaded percpu * and __per_cpu_end. The first one is the vaddr of loaded percpu
* init data. __per_cpu_start equals @vaddr and __per_cpu_end is the * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
* end offset. * end offset.
*/ */
#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \
PERCPU_PROLOG(vaddr) \
. += prealloc; \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
*(.data.percpu.page_aligned) \
PERCPU_EPILOG(segment)
/**
* PERCPU_VADDR - define output section for percpu area
* @vaddr: explicit base address (optional)
* @phdr: destination PHDR (optional)
*
* Macro which expands to output section for percpu area. Mostly
* identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than
* using slighly different layout.
*/
#define PERCPU_VADDR(vaddr, phdr) \ #define PERCPU_VADDR(vaddr, phdr) \
PERCPU_PROLOG(vaddr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \
.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
- LOAD_OFFSET) { \
VMLINUX_SYMBOL(__per_cpu_start) = .; \
*(.data.percpu.first) \
*(.data.percpu.page_aligned) \ *(.data.percpu.page_aligned) \
*(.data.percpu) \ *(.data.percpu) \
*(.data.percpu.shared_aligned) \ *(.data.percpu.shared_aligned) \
PERCPU_EPILOG(phdr) VMLINUX_SYMBOL(__per_cpu_end) = .; \
} phdr \
. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
/** /**
* PERCPU - define output section for percpu area, simple version * PERCPU - define output section for percpu area, simple version
......
...@@ -9,34 +9,39 @@ ...@@ -9,34 +9,39 @@
#include <asm/percpu.h> #include <asm/percpu.h>
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define DEFINE_PER_CPU(type, name) \ #define PER_CPU_BASE_SECTION ".data.percpu"
__attribute__((__section__(".data.percpu"))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#ifdef MODULE #ifdef MODULE
#define SHARED_ALIGNED_SECTION ".data.percpu" #define PER_CPU_SHARED_ALIGNED_SECTION ""
#else #else
#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned" #define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
#endif #endif
#define PER_CPU_FIRST_SECTION ".first"
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ #else
__attribute__((__section__(SHARED_ALIGNED_SECTION))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ #define PER_CPU_BASE_SECTION ".data"
____cacheline_aligned_in_smp #define PER_CPU_SHARED_ALIGNED_SECTION ""
#define PER_CPU_FIRST_SECTION ""
#endif
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ #define DEFINE_PER_CPU_SECTION(type, name, section) \
__attribute__((__section__(".data.percpu.page_aligned"))) \ __attribute__((__section__(PER_CPU_BASE_SECTION section))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#else
#define DEFINE_PER_CPU(type, name) \ #define DEFINE_PER_CPU(type, name) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name DEFINE_PER_CPU_SECTION(type, name, "")
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name) DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
____cacheline_aligned_in_smp
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name) DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
#endif
#define DEFINE_PER_CPU_FIRST(type, name) \
DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment