Commit 70929a57 authored by David Mosberger-Tang's avatar David Mosberger-Tang Committed by Tony Luck

[IA64] Reschedule __kernel_syscall_via_epc().

Avoid some stalls, which is good for about 2 cycles when invoking a
light-weight handler.  When invoking a heavy-weight handler, this
helps by about 7 cycles, with most of the improvement coming from the
improved branch-prediction achieved by splitting the BBB bundle into
two MIB bundles.
Signed-off-by: default avatarDavid Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent f8fa5448
...@@ -79,31 +79,34 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) ...@@ -79,31 +79,34 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
;; ;;
rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be" rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be"
LOAD_FSYSCALL_TABLE(r14) LOAD_FSYSCALL_TABLE(r14)
;;
mov r16=IA64_KR(CURRENT) // 12 cycle read latency mov r16=IA64_KR(CURRENT) // 12 cycle read latency
tnat.nz p10,p9=r15 shladd r18=r17,3,r14
mov r19=NR_syscalls-1 mov r19=NR_syscalls-1
;; ;;
shladd r18=r17,3,r14 lfetch [r18] // M0|1
mov r29=psr // read psr (12 cyc load latency)
srlz.d
cmp.ne p8,p0=r0,r0 // p8 <- FALSE
/* Note: if r17 is a NaT, p6 will be set to zero. */ /* Note: if r17 is a NaT, p6 will be set to zero. */
cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)? cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
;; ;;
(p6) ld8 r18=[r18]
mov r21=ar.fpsr mov r21=ar.fpsr
add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry tnat.nz p10,p9=r15
mov r26=ar.pfs
;; ;;
srlz.d
(p6) ld8 r18=[r18]
nop.i 0
;;
nop.m 0
(p6) mov b7=r18 (p6) mov b7=r18
(p6) tbit.z p8,p0=r18,0 (p6) tbit.z.unc p8,p0=r18,0
nop.m 0
nop.i 0
(p8) br.dptk.many b7 (p8) br.dptk.many b7
(p6) rsm psr.i
mov r27=ar.rsc mov r27=ar.rsc
mov r26=ar.pfs (p6) rsm psr.i
;;
mov r29=psr // read psr (12 cyc load latency)
/* /*
* brl.cond doesn't work as intended because the linker would convert this branch * brl.cond doesn't work as intended because the linker would convert this branch
* into a branch to a PLT. Perhaps there will be a way to avoid this with some * into a branch to a PLT. Perhaps there will be a way to avoid this with some
...@@ -111,6 +114,8 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) ...@@ -111,6 +114,8 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
* instead. * instead.
*/ */
#ifdef CONFIG_ITANIUM #ifdef CONFIG_ITANIUM
add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
;;
(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down (p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
;; ;;
(p6) mov b7=r14 (p6) mov b7=r14
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment