Commit f5e58aa1 authored by Nicolas Pitre's avatar Nicolas Pitre Committed by Russell King

[ARM PATCH] 2430/3: TLS support for ARM

Patch from Nicolas Pitre

This implements TLS support in the most efficient way for all ARM
processors in use today.  The trick is to define a specific address in
kernel area that is made readable from user space to hold the TLS
pointer so it is highly efficient to retrieve it with no overhead.
Since the kernel already maps a page at 0xffff0000 to hold the exception
vectors, we can use the top of that page for storing the TLS ptr at
0xffff0ffc. This address has the advantage of fitting optimally with the
ARM load addressing mode as follows:
	mov	rd, #0xffff0fff
	ldr	rd, [rd, #-3]
Considering load scheduling, this means 2 cycles to retrieve the TLS
value which is even faster than a coprocessor access.  Even gcc
generates the above assembly when given:
	void *tls = *((void **)0xffff0ffc);
This is fine to make the vector page readable from user space since it
contains nothing that could compromize security and doesn't require an
extra memory page to be allocated.
On SMP (which should be ARMv6 and above only) the special reg for
TLS will be available.  Since ARMv6 binaries are most likely to use
strex/ldrex insns instead of swp to implement user space atomic
primitives, those ARMv6 binaries won't execute on pre ARMv6 processors
anyway.  So the abscence of a tls reg is a non issue for them already.
Also on SMP targets, since the hivec page can't be relied upon to get
the TLS value, we'll have the kernel emulate access to it through the
data abort vector for backward compatibility with pre ARMv6 binaries.
Otherwise, non SMP ARMv6 kernels will support both methods
simultaneously.
So the decision to use hivec or tlsreg could depend on whether given
library is optimized for ARMv6+ or not. And since ARM SMP devices are
still far from becoming mainstream we can safely go with the hivec (and
hivec emulation on SMP) for the time being.

Signed-off-by: Nicolas Pitre
Signed-off-by: Russell King
parent 186bbc72
...@@ -59,6 +59,7 @@ int main(void) ...@@ -59,6 +59,7 @@ int main(void)
DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain));
DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context));
DEFINE(TI_USED_CP, offsetof(struct thread_info, used_cp)); DEFINE(TI_USED_CP, offsetof(struct thread_info, used_cp));
DEFINE(TI_TP_VALUE, offsetof(struct thread_info, tp_value));
DEFINE(TI_FPSTATE, offsetof(struct thread_info, fpstate)); DEFINE(TI_FPSTATE, offsetof(struct thread_info, fpstate));
DEFINE(TI_VFPSTATE, offsetof(struct thread_info, vfpstate)); DEFINE(TI_VFPSTATE, offsetof(struct thread_info, vfpstate));
DEFINE(TI_IWMMXT_STATE, (offsetof(struct thread_info, fpstate)+4)&~7); DEFINE(TI_IWMMXT_STATE, (offsetof(struct thread_info, fpstate)+4)&~7);
......
...@@ -270,7 +270,7 @@ __syscall_start: ...@@ -270,7 +270,7 @@ __syscall_start:
.long sys_remap_file_pages .long sys_remap_file_pages
.long sys_ni_syscall /* sys_set_thread_area */ .long sys_ni_syscall /* sys_set_thread_area */
/* 255 */ .long sys_ni_syscall /* sys_get_thread_area */ /* 255 */ .long sys_ni_syscall /* sys_get_thread_area */
.long sys_ni_syscall /* sys_set_tid_address */ .long sys_set_tid_address
.long sys_timer_create .long sys_timer_create
.long sys_timer_settime .long sys_timer_settime
.long sys_timer_gettime .long sys_timer_gettime
......
...@@ -490,13 +490,16 @@ ENTRY(ret_from_exception) ...@@ -490,13 +490,16 @@ ENTRY(ret_from_exception)
*/ */
ENTRY(__switch_to) ENTRY(__switch_to)
add ip, r1, #TI_CPU_SAVE add ip, r1, #TI_CPU_SAVE
ldr r3, [r2, #TI_CPU_DOMAIN]! ldr r3, [r2, #TI_TP_VALUE]
stmia ip!, {r4 - sl, fp, sp, lr} @ Store most regs on stack stmia ip!, {r4 - sl, fp, sp, lr} @ Store most regs on stack
ldr r6, [r2, #TI_CPU_DOMAIN]!
#if defined(CONFIG_CPU_XSCALE) && !defined(CONFIG_IWMMXT) #if defined(CONFIG_CPU_XSCALE) && !defined(CONFIG_IWMMXT)
mra r4, r5, acc0 mra r4, r5, acc0
stmia ip, {r4, r5} stmia ip, {r4, r5}
#endif #endif
mcr p15, 0, r3, c3, c0, 0 @ Set domain register mov r4, #0xffff0fff
str r3, [r4, #-3] @ Set TLS ptr
mcr p15, 0, r6, c3, c0, 0 @ Set domain register
#ifdef CONFIG_VFP #ifdef CONFIG_VFP
@ Always disable VFP so we can lazily save/restore the old @ Always disable VFP so we can lazily save/restore the old
@ state. This occurs in the context of the previous thread. @ state. This occurs in the context of the previous thread.
......
...@@ -212,7 +212,8 @@ sys_execve_wrapper: ...@@ -212,7 +212,8 @@ sys_execve_wrapper:
b sys_execve b sys_execve
sys_clone_wapper: sys_clone_wapper:
add r2, sp, #S_OFF add ip, sp, #S_OFF
str ip, [sp, #4]
b sys_clone b sys_clone
sys_sigsuspend_wrapper: sys_sigsuspend_wrapper:
......
...@@ -352,6 +352,9 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start, ...@@ -352,6 +352,9 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start,
thread->cpu_context.sp = (unsigned long)childregs; thread->cpu_context.sp = (unsigned long)childregs;
thread->cpu_context.pc = (unsigned long)ret_from_fork; thread->cpu_context.pc = (unsigned long)ret_from_fork;
if (clone_flags & CLONE_SETTLS)
thread->tp_value = regs->ARM_r3;
return 0; return 0;
} }
......
...@@ -719,6 +719,11 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat ...@@ -719,6 +719,11 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
ret = ptrace_setfpregs(child, (void __user *)data); ret = ptrace_setfpregs(child, (void __user *)data);
break; break;
case PTRACE_GET_THREAD_AREA:
ret = put_user(child->thread_info->tp_value,
(unsigned long __user *) data);
break;
default: default:
ret = ptrace_request(child, request, addr, data); ret = ptrace_request(child, request, addr, data);
break; break;
......
...@@ -241,18 +241,14 @@ asmlinkage int sys_fork(struct pt_regs *regs) ...@@ -241,18 +241,14 @@ asmlinkage int sys_fork(struct pt_regs *regs)
/* Clone a task - this clones the calling program thread. /* Clone a task - this clones the calling program thread.
* This is called indirectly via a small wrapper * This is called indirectly via a small wrapper
*/ */
asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs *regs) asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
int *parent_tidptr, int tls_val, int *child_tidptr,
struct pt_regs *regs)
{ {
/*
* We don't support SETTID / CLEARTID
*/
if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID))
return -EINVAL;
if (!newsp) if (!newsp)
newsp = regs->ARM_sp; newsp = regs->ARM_sp;
return do_fork(clone_flags, newsp, regs, 0, NULL, NULL); return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
} }
asmlinkage int sys_vfork(struct pt_regs *regs) asmlinkage int sys_vfork(struct pt_regs *regs)
......
...@@ -393,6 +393,7 @@ do_cache_op(unsigned long start, unsigned long end, int flags) ...@@ -393,6 +393,7 @@ do_cache_op(unsigned long start, unsigned long end, int flags)
#define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE) #define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE)
asmlinkage int arm_syscall(int no, struct pt_regs *regs) asmlinkage int arm_syscall(int no, struct pt_regs *regs)
{ {
struct thread_info *thread = current_thread_info();
siginfo_t info; siginfo_t info;
if ((no >> 16) != 0x9f) if ((no >> 16) != 0x9f)
...@@ -445,6 +446,17 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) ...@@ -445,6 +446,17 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
regs->ARM_cpsr |= MODE32_BIT; regs->ARM_cpsr |= MODE32_BIT;
return regs->ARM_r0; return regs->ARM_r0;
case NR(set_tls):
thread->tp_value = regs->ARM_r0;
/*
* Our user accessible TLS ptr is located at 0xffff0ffc.
* On SMP read access to this address must raise a fault
* and be emulated from the data abort handler.
* m
*/
*((unsigned long *)0xffff0ffc) = thread->tp_value;
return 0;
default: default:
/* Calls 9f00xx..9f07ff are defined to return -ENOSYS /* Calls 9f00xx..9f07ff are defined to return -ENOSYS
if not implemented, rather than raising SIGILL. This if not implemented, rather than raising SIGILL. This
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#define PTRACE_OLDSETOPTIONS 21 #define PTRACE_OLDSETOPTIONS 21
#define PTRACE_GET_THREAD_AREA 22
/* /*
* PSR bits * PSR bits
*/ */
......
...@@ -53,6 +53,7 @@ struct thread_info { ...@@ -53,6 +53,7 @@ struct thread_info {
__u32 cpu_domain; /* cpu domain */ __u32 cpu_domain; /* cpu domain */
struct cpu_context_save cpu_context; /* cpu context */ struct cpu_context_save cpu_context; /* cpu context */
__u8 used_cp[16]; /* thread used copro */ __u8 used_cp[16]; /* thread used copro */
unsigned long tp_value;
union fp_state fpstate; union fp_state fpstate;
union vfp_state vfpstate; union vfp_state vfpstate;
struct restart_block restart_block; struct restart_block restart_block;
......
...@@ -281,7 +281,7 @@ ...@@ -281,7 +281,7 @@
#define __NR_remap_file_pages (__NR_SYSCALL_BASE+253) #define __NR_remap_file_pages (__NR_SYSCALL_BASE+253)
/* 254 for set_thread_area */ /* 254 for set_thread_area */
/* 255 for get_thread_area */ /* 255 for get_thread_area */
/* 256 for set_tid_address */ #define __NR_set_tid_address (__NR_SYSCALL_BASE+256)
#define __NR_timer_create (__NR_SYSCALL_BASE+257) #define __NR_timer_create (__NR_SYSCALL_BASE+257)
#define __NR_timer_settime (__NR_SYSCALL_BASE+258) #define __NR_timer_settime (__NR_SYSCALL_BASE+258)
#define __NR_timer_gettime (__NR_SYSCALL_BASE+259) #define __NR_timer_gettime (__NR_SYSCALL_BASE+259)
...@@ -316,6 +316,8 @@ ...@@ -316,6 +316,8 @@
#define __ARM_NR_usr26 (__ARM_NR_BASE+3) #define __ARM_NR_usr26 (__ARM_NR_BASE+3)
#define __ARM_NR_usr32 (__ARM_NR_BASE+4) #define __ARM_NR_usr32 (__ARM_NR_BASE+4)
#define __ARM_NR_set_tls (__ARM_NR_BASE+0x800)
#define __sys2(x) #x #define __sys2(x) #x
#define __sys1(x) __sys2(x) #define __sys1(x) __sys2(x)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment