Commit 55e0715f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-percpu-for-linus' of...

Merge branch 'x86-percpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-percpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, percpu: Collect hot percpu variables into one cacheline
  x86, percpu: Fix DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED()
  x86, percpu: Add 'percpu_read_stable()' interface for cacheable accesses
parents 7dfd54a9 bdf977b3
...@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task); ...@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
static __always_inline struct task_struct *get_current(void) static __always_inline struct task_struct *get_current(void)
{ {
return percpu_read(current_task); return percpu_read_stable(current_task);
} }
#define current get_current() #define current get_current()
......
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
#define __my_cpu_offset percpu_read(this_cpu_off) #define __my_cpu_offset percpu_read(this_cpu_off)
#else #else
#define __percpu_arg(x) "%" #x #define __percpu_arg(x) "%P" #x
#endif #endif
/* /*
...@@ -104,36 +104,48 @@ do { \ ...@@ -104,36 +104,48 @@ do { \
} \ } \
} while (0) } while (0)
#define percpu_from_op(op, var) \ #define percpu_from_op(op, var, constraint) \
({ \ ({ \
typeof(var) ret__; \ typeof(var) ret__; \
switch (sizeof(var)) { \ switch (sizeof(var)) { \
case 1: \ case 1: \
asm(op "b "__percpu_arg(1)",%0" \ asm(op "b "__percpu_arg(1)",%0" \
: "=q" (ret__) \ : "=q" (ret__) \
: "m" (var)); \ : constraint); \
break; \ break; \
case 2: \ case 2: \
asm(op "w "__percpu_arg(1)",%0" \ asm(op "w "__percpu_arg(1)",%0" \
: "=r" (ret__) \ : "=r" (ret__) \
: "m" (var)); \ : constraint); \
break; \ break; \
case 4: \ case 4: \
asm(op "l "__percpu_arg(1)",%0" \ asm(op "l "__percpu_arg(1)",%0" \
: "=r" (ret__) \ : "=r" (ret__) \
: "m" (var)); \ : constraint); \
break; \ break; \
case 8: \ case 8: \
asm(op "q "__percpu_arg(1)",%0" \ asm(op "q "__percpu_arg(1)",%0" \
: "=r" (ret__) \ : "=r" (ret__) \
: "m" (var)); \ : constraint); \
break; \ break; \
default: __bad_percpu_size(); \ default: __bad_percpu_size(); \
} \ } \
ret__; \ ret__; \
}) })
#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) /*
* percpu_read() makes gcc load the percpu variable every time it is
* accessed while percpu_read_stable() allows the value to be cached.
* percpu_read_stable() is more efficient and can be used if its value
* is guaranteed to be valid across cpus. The current users include
* get_current() and get_thread_info() both of which are actually
* per-thread variables implemented as per-cpu variables and thus
* stable for the duration of the respective task.
*/
#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \
"m" (per_cpu__##var))
#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \
"p" (&per_cpu__##var))
#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
......
...@@ -214,7 +214,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack); ...@@ -214,7 +214,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void) static inline struct thread_info *current_thread_info(void)
{ {
struct thread_info *ti; struct thread_info *ti;
ti = (void *)(percpu_read(kernel_stack) + ti = (void *)(percpu_read_stable(kernel_stack) +
KERNEL_STACK_OFFSET - THREAD_SIZE); KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti; return ti;
} }
......
...@@ -987,13 +987,21 @@ struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; ...@@ -987,13 +987,21 @@ struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union, DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE); irq_stack_union) __aligned(PAGE_SIZE);
DEFINE_PER_CPU(char *, irq_stack_ptr) = /*
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; * The following four percpu variables are hot. Align current_task to
* cacheline size such that all four fall in the same cacheline.
*/
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, kernel_stack) = DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack); EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
DEFINE_PER_CPU(unsigned int, irq_count) = -1; DEFINE_PER_CPU(unsigned int, irq_count) = -1;
/* /*
...@@ -1008,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { ...@@ -1008,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
}; };
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
__aligned(PAGE_SIZE);
/* May not be marked __init: used by software suspend */ /* May not be marked __init: used by software suspend */
void syscall_init(void) void syscall_init(void)
...@@ -1042,6 +1049,9 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist); ...@@ -1042,6 +1049,9 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
#ifdef CONFIG_CC_STACKPROTECTOR #ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif #endif
......
...@@ -61,9 +61,6 @@ ...@@ -61,9 +61,6 @@
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
/* /*
* Return saved PC of a blocked thread. * Return saved PC of a blocked thread.
*/ */
......
...@@ -55,9 +55,6 @@ ...@@ -55,9 +55,6 @@
asmlinkage extern void ret_from_fork(void); asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, old_rsp); DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle); static DEFINE_PER_CPU(unsigned char, is_idle);
......
...@@ -78,10 +78,12 @@ ...@@ -78,10 +78,12 @@
* Declaration/definition used for per-CPU variables that must be page aligned. * Declaration/definition used for per-CPU variables that must be page aligned.
*/ */
#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ #define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \
DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") \
__aligned(PAGE_SIZE)
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") \
__aligned(PAGE_SIZE)
/* /*
* Intermodule exports for per-CPU variables. * Intermodule exports for per-CPU variables.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment