Commit bc8c6490 authored by David Mosberger's avatar David Mosberger Committed by Tony Luck

[IA64] Improve ia64_leave_syscall() for McKinley-type cores.

Optimize ia64_leave_syscall() a bit better for McKinley-type cores.
The patch looks big, but that's mostly due to renaming r16/r17 to r2/r3.
Good for a 13 cycle improvement.
Signed-off-by: default avatarDavid Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent b6f4b744
...@@ -633,10 +633,12 @@ END(ia64_ret_from_syscall) ...@@ -633,10 +633,12 @@ END(ia64_ret_from_syscall)
* r13: restored (user-level thread pointer) * r13: restored (user-level thread pointer)
* r14: cleared * r14: cleared
* r15: restored (syscall #) * r15: restored (syscall #)
* r16-r19: cleared * r16-r17: cleared
* r18: user-level b6
* r19: cleared
* r20: user-level ar.fpsr * r20: user-level ar.fpsr
* r21: user-level b0 * r21: user-level b0
* r22: user-level b6 * r22: cleared
* r23: user-level ar.bspstore * r23: user-level ar.bspstore
* r24: user-level ar.rnat * r24: user-level ar.rnat
* r25: user-level ar.unat * r25: user-level ar.unat
...@@ -661,7 +663,7 @@ END(ia64_ret_from_syscall) ...@@ -661,7 +663,7 @@ END(ia64_ret_from_syscall)
* ar.csd: cleared * ar.csd: cleared
* ar.ssd: cleared * ar.ssd: cleared
*/ */
GLOBAL_ENTRY(ia64_leave_syscall) ENTRY(ia64_leave_syscall)
PT_REGS_UNWIND_INFO(0) PT_REGS_UNWIND_INFO(0)
/* /*
* work.need_resched etc. mustn't get changed by this CPU before it returns to * work.need_resched etc. mustn't get changed by this CPU before it returns to
...@@ -690,79 +692,80 @@ GLOBAL_ENTRY(ia64_leave_syscall) ...@@ -690,79 +692,80 @@ GLOBAL_ENTRY(ia64_leave_syscall)
(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
#endif #endif
.work_processed_syscall: .work_processed_syscall:
adds r16=PT(LOADRS)+16,r12 adds r2=PT(LOADRS)+16,r12
adds r17=PT(AR_BSPSTORE)+16,r12 adds r3=PT(AR_BSPSTORE)+16,r12
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
;; ;;
(p6) ld4 r31=[r18] // load current_thread_info()->flags (p6) ld4 r31=[r18] // load current_thread_info()->flags
ld8 r19=[r16],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
nop.i 0 mov b7=r0 // clear b7
;; ;;
ld8 r23=[r17],PT(R9)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) ld8 r23=[r3],PT(R9)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
ld8 r22=[r16],PT(R8)-PT(B6) // load b6 ld8 r18=[r2],PT(R8)-PT(B6) // load b6
(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? (p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
;; ;;
mov r16=ar.bsp // M2 get existing backing store pointer
mov.m ar.ccv=r0 // clear ar.ccv
(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? (p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
(p6) br.cond.spnt .work_pending (p6) br.cond.spnt .work_pending
;; ;;
// start restoring the state saved on the kernel stack (struct pt_regs): // start restoring the state saved on the kernel stack (struct pt_regs):
ld8.fill r8=[r16],16 ld8.fill r8=[r2],16
ld8.fill r9=[r17],16 ld8.fill r9=[r3],16
mov f6=f0 // clear f6 mov f6=f0 // clear f6
;; ;;
ld8.fill r10=[r16],16 invala // M0|1 invalidate ALAT
ld8.fill r11=[r17],16 rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection
mov f9=f0 // clear f9
ld8.fill r10=[r2],16
ld8.fill r11=[r3],16
mov f7=f0 // clear f7 mov f7=f0 // clear f7
;; ;;
ld8 r29=[r16],16 // load cr.ipsr ld8 r29=[r2],16 // load cr.ipsr
ld8 r28=[r17],16 // load cr.iip ld8 r28=[r3],16 // load cr.iip
mov f8=f0 // clear f8 mov f8=f0 // clear f8
;; ;;
ld8 r30=[r16],16 // load cr.ifs ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r17],16 // load ar.unat mov.m ar.ssd=r0 // M2 clear ar.ssd
cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
;; ;;
rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection ld8 r25=[r3],16 // M0|1 load ar.unat
invala // invalidate ALAT mov.m ar.csd=r0 // M2 clear ar.csd
mov f9=f0 // clear f9 mov r22=r0 // clear r22
;;
mov.m ar.ssd=r0 // clear ar.ssd ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
mov.m ar.csd=r0 // clear ar.csd nop.m 0
mov f10=f0 // clear f10 mov f10=f0 // clear f10
;; ;;
ld8 r26=[r16],16 // load ar.pfs ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
ld8 r27=[r17],PT(PR)-PT(AR_RSC) // load ar.rsc ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc
mov f11=f0 // clear f11 mov f11=f0 // clear f11
;; ;;
ld8 r24=[r16],PT(B0)-PT(AR_RNAT) // load ar.rnat (may be garbage) ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage)
ld8 r31=[r17],PT(R1)-PT(PR) // load predicates ld8 r31=[r3],PT(R1)-PT(PR) // load predicates
(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;; ;;
ld8 r21=[r16],PT(R12)-PT(B0) // load b0 ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr
ld8.fill r1=[r17],16 // load r1 ld8.fill r1=[r3],16 // load r1
(pUStk) mov r3=1 (pUStk) mov r17=1
;; ;;
ld8.fill r12=[r16],16 srlz.i // M0 ensure interruption collection is off
ld8.fill r13=[r17],16 ld8.fill r13=[r3],16
mov r2=r0 // clear r2 nop.i 0
;; ;;
ld8 r20=[r16] // load ar.fpsr ld8.fill r12=[r2] // restore r12 (sp)
ld8.fill r15=[r17] // load r15 ld8.fill r15=[r3] // restore r15
mov b7=r0 // clear b7 addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
;; ;;
(pUStk) st1 [r14]=r3 (pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8
addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 (pUStk) st1 [r14]=r17
mov b6=r18 // I0 restore b6
;; ;;
mov r16=ar.bsp // get existing backing store pointer shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
srlz.i // ensure interruption collection is off
mov r14=r0 // clear r14 mov r14=r0 // clear r14
;;
ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
mov b6=r22 // restore b6
shr.u r18=r19,16 // get byte size of existing "dirty" partition
(pKStk) br.cond.dpnt.many skip_rbs_switch (pKStk) br.cond.dpnt.many skip_rbs_switch
mov.m ar.ccv=r0 // clear ar.ccv
(pNonSys) br.cond.dpnt.many dont_preserve_current_frame (pNonSys) br.cond.dpnt.many dont_preserve_current_frame
br.cond.sptk.many rbs_switch br.cond.sptk.many rbs_switch
END(ia64_leave_syscall) END(ia64_leave_syscall)
...@@ -1054,7 +1057,7 @@ skip_rbs_switch: ...@@ -1054,7 +1057,7 @@ skip_rbs_switch:
;; ;;
(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode (pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode
nop 0 nop 0
nop 0 (pLvSys)mov r2=r0
mov ar.rsc=r27 // M2 mov ar.rsc=r27 // M2
mov pr=r31,-1 // I0 mov pr=r31,-1 // I0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment