Commit 5a5488d3 authored by David S. Miller's avatar David S. Miller

sparc64: Store per-cpu offset in trap_block[]

Surprisingly this actually makes LOAD_PER_CPU_BASE() a little
more efficient.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 19f0fa3f
...@@ -7,12 +7,12 @@ register unsigned long __local_per_cpu_offset asm("g5"); ...@@ -7,12 +7,12 @@ register unsigned long __local_per_cpu_offset asm("g5");
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#include <asm/trap_block.h>
extern void real_setup_per_cpu_areas(void); extern void real_setup_per_cpu_areas(void);
extern unsigned long __per_cpu_base;
extern unsigned long __per_cpu_shift;
#define __per_cpu_offset(__cpu) \ #define __per_cpu_offset(__cpu) \
(__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) (trap_block[(__cpu)].__per_cpu_base)
#define per_cpu_offset(x) (__per_cpu_offset(x)) #define per_cpu_offset(x) (__per_cpu_offset(x))
#define __my_cpu_offset __local_per_cpu_offset #define __my_cpu_offset __local_per_cpu_offset
......
...@@ -48,7 +48,7 @@ struct trap_per_cpu { ...@@ -48,7 +48,7 @@ struct trap_per_cpu {
unsigned int dev_mondo_qmask; unsigned int dev_mondo_qmask;
unsigned int resum_qmask; unsigned int resum_qmask;
unsigned int nonresum_qmask; unsigned int nonresum_qmask;
unsigned long __unused; unsigned long __per_cpu_base;
} __attribute__((aligned(64))); } __attribute__((aligned(64)));
extern struct trap_per_cpu trap_block[NR_CPUS]; extern struct trap_per_cpu trap_block[NR_CPUS];
extern void init_cur_cpu_trap(struct thread_info *); extern void init_cur_cpu_trap(struct thread_info *);
...@@ -101,6 +101,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, ...@@ -101,6 +101,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
#define TRAP_PER_CPU_DEV_MONDO_QMASK 0xec #define TRAP_PER_CPU_DEV_MONDO_QMASK 0xec
#define TRAP_PER_CPU_RESUM_QMASK 0xf0 #define TRAP_PER_CPU_RESUM_QMASK 0xf0
#define TRAP_PER_CPU_NONRESUM_QMASK 0xf4 #define TRAP_PER_CPU_NONRESUM_QMASK 0xf4
#define TRAP_PER_CPU_PER_CPU_BASE 0xf8
#define TRAP_BLOCK_SZ_SHIFT 8 #define TRAP_BLOCK_SZ_SHIFT 8
...@@ -172,12 +173,11 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, ...@@ -172,12 +173,11 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
*/ */
#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \
lduh [THR + TI_CPU], REG1; \ lduh [THR + TI_CPU], REG1; \
sethi %hi(__per_cpu_shift), REG3; \ sethi %hi(trap_block), REG2; \
sethi %hi(__per_cpu_base), REG2; \ sllx REG1, TRAP_BLOCK_SZ_SHIFT, REG1; \
ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ or REG2, %lo(trap_block), REG2; \
ldx [REG2 + %lo(__per_cpu_base)], REG2; \ add REG2, REG1, REG2; \
sllx REG1, REG3, REG3; \ ldx [REG2 + TRAP_PER_CPU_PER_CPU_BASE], DEST;
add REG3, REG2, DEST;
#else #else
......
...@@ -641,28 +641,6 @@ tlb_fixup_done: ...@@ -641,28 +641,6 @@ tlb_fixup_done:
/* Not reached... */ /* Not reached... */
1: 1:
/* If we boot on a non-zero cpu, all of the per-cpu
* variable references we make before setting up the
* per-cpu areas will use a bogus offset. Put a
* compensating factor into __per_cpu_base to handle
* this cleanly.
*
* What the per-cpu code calculates is:
*
* __per_cpu_base + (cpu << __per_cpu_shift)
*
* These two variables are zero initially, so to
* make it all cancel out to zero we need to put
* "0 - (cpu << 0)" into __per_cpu_base so that the
* above formula evaluates to zero.
*
* We cannot even perform a printk() until this stuff
* is setup as that calls cpu_clock() which uses
* per-cpu variables.
*/
sub %g0, %o0, %o1
sethi %hi(__per_cpu_base), %o2
stx %o1, [%o2 + %lo(__per_cpu_base)]
#else #else
mov 0, %o0 mov 0, %o0
#endif #endif
......
...@@ -1371,23 +1371,17 @@ void smp_send_stop(void) ...@@ -1371,23 +1371,17 @@ void smp_send_stop(void)
{ {
} }
unsigned long __per_cpu_base __read_mostly;
unsigned long __per_cpu_shift __read_mostly;
EXPORT_SYMBOL(__per_cpu_base);
EXPORT_SYMBOL(__per_cpu_shift);
void __init real_setup_per_cpu_areas(void) void __init real_setup_per_cpu_areas(void)
{ {
unsigned long paddr, goal, size, i; unsigned long base, shift, paddr, goal, size, i;
char *ptr; char *ptr;
/* Copy section for each CPU (we discard the original) */ /* Copy section for each CPU (we discard the original) */
goal = PERCPU_ENOUGH_ROOM; goal = PERCPU_ENOUGH_ROOM;
__per_cpu_shift = PAGE_SHIFT; shift = PAGE_SHIFT;
for (size = PAGE_SIZE; size < goal; size <<= 1UL) for (size = PAGE_SIZE; size < goal; size <<= 1UL)
__per_cpu_shift++; shift++;
paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
if (!paddr) { if (!paddr) {
...@@ -1396,10 +1390,12 @@ void __init real_setup_per_cpu_areas(void) ...@@ -1396,10 +1390,12 @@ void __init real_setup_per_cpu_areas(void)
} }
ptr = __va(paddr); ptr = __va(paddr);
__per_cpu_base = ptr - __per_cpu_start; base = ptr - __per_cpu_start;
for (i = 0; i < NR_CPUS; i++, ptr += size) for (i = 0; i < NR_CPUS; i++, ptr += size) {
__per_cpu_offset(i) = base + (i * size);
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
}
/* Setup %g5 for the boot cpu. */ /* Setup %g5 for the boot cpu. */
__local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
......
...@@ -2509,6 +2509,7 @@ void do_getpsr(struct pt_regs *regs) ...@@ -2509,6 +2509,7 @@ void do_getpsr(struct pt_regs *regs)
} }
struct trap_per_cpu trap_block[NR_CPUS]; struct trap_per_cpu trap_block[NR_CPUS];
EXPORT_SYMBOL(trap_block);
/* This can get invoked before sched_init() so play it super safe /* This can get invoked before sched_init() so play it super safe
* and use hard_smp_processor_id(). * and use hard_smp_processor_id().
...@@ -2592,7 +2593,9 @@ void __init trap_init(void) ...@@ -2592,7 +2593,9 @@ void __init trap_init(void)
(TRAP_PER_CPU_RESUM_QMASK != (TRAP_PER_CPU_RESUM_QMASK !=
offsetof(struct trap_per_cpu, resum_qmask)) || offsetof(struct trap_per_cpu, resum_qmask)) ||
(TRAP_PER_CPU_NONRESUM_QMASK != (TRAP_PER_CPU_NONRESUM_QMASK !=
offsetof(struct trap_per_cpu, nonresum_qmask))) offsetof(struct trap_per_cpu, nonresum_qmask)) ||
(TRAP_PER_CPU_PER_CPU_BASE !=
offsetof(struct trap_per_cpu, __per_cpu_base)))
trap_per_cpu_offsets_are_bolixed_dave(); trap_per_cpu_offsets_are_bolixed_dave();
if ((TSB_CONFIG_TSB != if ((TSB_CONFIG_TSB !=
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment