Commit ba9ccbcf authored by David Mosberger's avatar David Mosberger

ia64: Finish the fsyscall support (finally!). Now fsyscall stubs will

	run faster than break-based syscall stubs, even if there is
	no light-weight syscall handler.
	Adds a new boot command-line option "nolwsys" which can be used
	to turn off light-weight system call handlers.  Good for
	performance measurement and (potentially) for debugging.
parent ce2070ec
...@@ -1464,3 +1464,6 @@ sys_call_table: ...@@ -1464,3 +1464,6 @@ sys_call_table:
data8 ia64_ni_syscall data8 ia64_ni_syscall
data8 ia64_ni_syscall data8 ia64_ni_syscall
data8 ia64_ni_syscall data8 ia64_ni_syscall
data8 ia64_ni_syscall
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/sal.h> #include <asm/sal.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/unistd.h>
#include "entry.h" #include "entry.h"
...@@ -41,59 +42,10 @@ ...@@ -41,59 +42,10 @@
* ar.pfs = previous frame-state (as passed into the fsyscall handler) * ar.pfs = previous frame-state (as passed into the fsyscall handler)
*/ */
#if 1
ENTRY(fsys_fallback_syscall)
/*
* This is called for system calls which are entered via epc, but don't
* have a light-weight handler. We need to bubble down into the kernel,
* and that requires setting up a minimal pt_regs structure, and initializing
* the CPU state more or less as if an interruption had occurred. To make
* syscall-restarts work, we setup pt_regs such that cr_iip points to the
* second instruction in syscall_via_break. Decrementing the IP hence will
* restart the syscall via break and not decrementing IP will return us
* to the caller, as usual.
*/
# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
| IA64_PSR_DT | IA64_PSR_RT)
/*
* Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have
* to synthesize.
*/
# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
| IA64_PSR_BN)
mov r29=psr
movl r9=PSR_PRESERVED_BITS
mov r20=r1
movl r8=PSR_ONE_BITS
;;
mov r1=IA64_KR(CURRENT) // r16 = current (physical); 12 cycle read lat.
and r9=r9,r29
or r29=r8,r29
;;
mov psr.l=r9 // slam the door
mov r21=ar.fpsr
mov r26=ar.pfs
mov r25=ar.unat
mov r27=ar.rsc
mov r19=b6
;;
srlz.i // ensure new psr.l has been established
movl r28=GATE_ADDR // cr.iip XXX fix me!! Should be: GATE_ADDR(syscall_via_break)
invala
mov r31=pr
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r1
;;
st1 [r16]=r0 // clear current->thread.on_ustack flag
cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1
br.cond.sptk.many ia64_enter_syscall
END(fsys_fallback_syscall)
#endif
ENTRY(fsys_ni_syscall) ENTRY(fsys_ni_syscall)
.prologue
.altrp b6
.body
mov r8=ENOSYS mov r8=ENOSYS
mov r10=-1 mov r10=-1
MCKINLEY_E9_WORKAROUND MCKINLEY_E9_WORKAROUND
...@@ -101,6 +53,9 @@ ENTRY(fsys_ni_syscall) ...@@ -101,6 +53,9 @@ ENTRY(fsys_ni_syscall)
END(fsys_ni_syscall) END(fsys_ni_syscall)
ENTRY(fsys_getpid) ENTRY(fsys_getpid)
.prologue
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;; ;;
ld4 r9=[r9] ld4 r9=[r9]
...@@ -116,6 +71,9 @@ ENTRY(fsys_getpid) ...@@ -116,6 +71,9 @@ ENTRY(fsys_getpid)
END(fsys_getpid) END(fsys_getpid)
ENTRY(fsys_getppid) ENTRY(fsys_getppid)
.prologue
.altrp b6
.body
add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
;; ;;
ld8 r17=[r17] // r17 = current->group_leader ld8 r17=[r17] // r17 = current->group_leader
...@@ -161,6 +119,9 @@ ENTRY(fsys_getppid) ...@@ -161,6 +119,9 @@ ENTRY(fsys_getppid)
END(fsys_getppid) END(fsys_getppid)
ENTRY(fsys_set_tid_address) ENTRY(fsys_set_tid_address)
.prologue
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;; ;;
ld4 r9=[r9] ld4 r9=[r9]
...@@ -200,6 +161,9 @@ END(fsys_set_tid_address) ...@@ -200,6 +161,9 @@ END(fsys_set_tid_address)
*/ */
ENTRY(fsys_gettimeofday) ENTRY(fsys_gettimeofday)
.prologue
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
movl r3=THIS_CPU(cpu_info) movl r3=THIS_CPU(cpu_info)
...@@ -213,7 +177,7 @@ ENTRY(fsys_gettimeofday) ...@@ -213,7 +177,7 @@ ENTRY(fsys_gettimeofday)
movl r19=xtime // xtime is a timespec struct movl r19=xtime // xtime is a timespec struct
ld8 r10=[r10] // r10 <- __per_cpu_offset[0] ld8 r10=[r10] // r10 <- __per_cpu_offset[0]
movl r21=cpu_info__per_cpu movl r21=THIS_CPU(cpu_info)
;; ;;
add r10=r21, r10 // r10 <- &cpu_data(time_keeper_id) add r10=r21, r10 // r10 <- &cpu_data(time_keeper_id)
tbit.nz p8,p0 = r2, IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT tbit.nz p8,p0 = r2, IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT
...@@ -368,262 +332,373 @@ EX(.fail, st8 [r9]=r3) // store them in the timeval struct ...@@ -368,262 +332,373 @@ EX(.fail, st8 [r9]=r3) // store them in the timeval struct
br.ret.spnt.many b6 // return with r8 set to EINVAL br.ret.spnt.many b6 // return with r8 set to EINVAL
END(fsys_gettimeofday) END(fsys_gettimeofday)
ENTRY(fsys_fallback_syscall)
.prologue
.altrp b6
.body
/*
* We only get here from light-weight syscall handlers. Thus, we already
* know that r15 contains a valid syscall number. No need to re-check.
*/
adds r17=-1024,r15
movl r14=sys_call_table
;;
shladd r18=r17,3,r14
;;
ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
mov r29=psr // read psr (12 cyc load latency)
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
END(fsys_fallback_syscall)
/* FALL THROUGH */
GLOBAL_ENTRY(fsys_bubble_down)
.prologue
.altrp b6
.body
/*
* We get here for syscalls that don't have a lightweight handler. For those, we
* need to bubble down into the kernel and that requires setting up a minimal
* pt_regs structure, and initializing the CPU state more or less as if an
* interruption had occurred. To make syscall-restarts work, we setup pt_regs
* such that cr_iip points to the second instruction in syscall_via_break.
* Decrementing the IP hence will restart the syscall via break and not
* decrementing IP will return us to the caller, as usual. Note that we preserve
* the value of psr.pp rather than initializing it from dcr.pp. This makes it
* possible to distinguish fsyscall execution from other privileged execution.
*
* On entry:
* - normal fsyscall handler register usage, except that we also have:
* - r18: address of syscall entry point
* - r21: ar.fpsr
* - r26: ar.pfs
* - r27: ar.rsc
* - r29: psr
*/
# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
| IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_RT | IA64_PSR_IC)
/*
* Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have
* to synthesize.
*/
# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
| IA64_PSR_BN)
invala
movl r8=PSR_ONE_BITS
mov r25=ar.unat // save ar.unat (5 cyc)
movl r9=PSR_PRESERVED_BITS
mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0
movl r28=GATE_ADDR // cr.iip XXX fix me!! Should be: GATE_ADDR(syscall_via_break)
;;
mov r23=ar.bspstore // save ar.bspstore (12 cyc)
mov r31=pr // save pr (2 cyc)
mov r20=r1 // save caller's gp in r20
;;
mov r2=r16 // copy current task addr to addl-addressable register
and r9=r9,r29
mov r19=b6 // save b6 (2 cyc)
;;
mov psr.l=r9 // slam the door (17 cyc to srlz.i)
or r29=r8,r29 // construct cr.ipsr value to save
addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS
;;
mov.m r24=ar.rnat // read ar.rnat (5 cyc lat)
lfetch.fault.excl.nt1 [r22]
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
// ensure previous insn group is issued before we stall for srlz.i:
;;
srlz.i // ensure new psr.l has been established
/////////////////////////////////////////////////////////////////////////////
////////// from this point on, execution is not interruptible anymore
/////////////////////////////////////////////////////////////////////////////
addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack
cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1
;;
st1 [r16]=r0 // clear current->thread.on_ustack flag
mov ar.bspstore=r22 // switch to kernel RBS
mov b6=r18 // copy syscall entry-point to b6 (7 cyc)
add r3=TI_FLAGS+IA64_TASK_SIZE,r2
;;
ld4 r3=[r3] // r2 = current_thread_info()->flags
mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc)
mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0
br.call.sptk.many b7=ia64_syscall_setup
;;
ssm psr.i
movl r2=ia64_ret_from_syscall
;;
mov rp=r2 // set the real return addr
tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
(p8) br.call.sptk.many b6=b6 // ignore this return addr
br.cond.sptk ia64_trace_syscall
END(fsys_bubble_down)
.rodata .rodata
.align 8 .align 8
.globl fsyscall_table .globl fsyscall_table
data8 fsys_bubble_down
fsyscall_table: fsyscall_table:
data8 fsys_ni_syscall data8 fsys_ni_syscall
data8 fsys_fallback_syscall // exit // 1025 data8 0 // exit // 1025
data8 fsys_fallback_syscall // read data8 0 // read
data8 fsys_fallback_syscall // write data8 0 // write
data8 fsys_fallback_syscall // open data8 0 // open
data8 fsys_fallback_syscall // close data8 0 // close
data8 fsys_fallback_syscall // creat // 1030 data8 0 // creat // 1030
data8 fsys_fallback_syscall // link data8 0 // link
data8 fsys_fallback_syscall // unlink data8 0 // unlink
data8 fsys_fallback_syscall // execve data8 0 // execve
data8 fsys_fallback_syscall // chdir data8 0 // chdir
data8 fsys_fallback_syscall // fchdir // 1035 data8 0 // fchdir // 1035
data8 fsys_fallback_syscall // utimes data8 0 // utimes
data8 fsys_fallback_syscall // mknod data8 0 // mknod
data8 fsys_fallback_syscall // chmod data8 0 // chmod
data8 fsys_fallback_syscall // chown data8 0 // chown
data8 fsys_fallback_syscall // lseek // 1040 data8 0 // lseek // 1040
data8 fsys_getpid // getpid data8 fsys_getpid // getpid
data8 fsys_getppid // getppid data8 fsys_getppid // getppid
data8 fsys_fallback_syscall // mount data8 0 // mount
data8 fsys_fallback_syscall // umount data8 0 // umount
data8 fsys_fallback_syscall // setuid // 1045 data8 0 // setuid // 1045
data8 fsys_fallback_syscall // getuid data8 0 // getuid
data8 fsys_fallback_syscall // geteuid data8 0 // geteuid
data8 fsys_fallback_syscall // ptrace data8 0 // ptrace
data8 fsys_fallback_syscall // access data8 0 // access
data8 fsys_fallback_syscall // sync // 1050 data8 0 // sync // 1050
data8 fsys_fallback_syscall // fsync data8 0 // fsync
data8 fsys_fallback_syscall // fdatasync data8 0 // fdatasync
data8 fsys_fallback_syscall // kill data8 0 // kill
data8 fsys_fallback_syscall // rename data8 0 // rename
data8 fsys_fallback_syscall // mkdir // 1055 data8 0 // mkdir // 1055
data8 fsys_fallback_syscall // rmdir data8 0 // rmdir
data8 fsys_fallback_syscall // dup data8 0 // dup
data8 fsys_fallback_syscall // pipe data8 0 // pipe
data8 fsys_fallback_syscall // times data8 0 // times
data8 fsys_fallback_syscall // brk // 1060 data8 0 // brk // 1060
data8 fsys_fallback_syscall // setgid data8 0 // setgid
data8 fsys_fallback_syscall // getgid data8 0 // getgid
data8 fsys_fallback_syscall // getegid data8 0 // getegid
data8 fsys_fallback_syscall // acct data8 0 // acct
data8 fsys_fallback_syscall // ioctl // 1065 data8 0 // ioctl // 1065
data8 fsys_fallback_syscall // fcntl data8 0 // fcntl
data8 fsys_fallback_syscall // umask data8 0 // umask
data8 fsys_fallback_syscall // chroot data8 0 // chroot
data8 fsys_fallback_syscall // ustat data8 0 // ustat
data8 fsys_fallback_syscall // dup2 // 1070 data8 0 // dup2 // 1070
data8 fsys_fallback_syscall // setreuid data8 0 // setreuid
data8 fsys_fallback_syscall // setregid data8 0 // setregid
data8 fsys_fallback_syscall // getresuid data8 0 // getresuid
data8 fsys_fallback_syscall // setresuid data8 0 // setresuid
data8 fsys_fallback_syscall // getresgid // 1075 data8 0 // getresgid // 1075
data8 fsys_fallback_syscall // setresgid data8 0 // setresgid
data8 fsys_fallback_syscall // getgroups data8 0 // getgroups
data8 fsys_fallback_syscall // setgroups data8 0 // setgroups
data8 fsys_fallback_syscall // getpgid data8 0 // getpgid
data8 fsys_fallback_syscall // setpgid // 1080 data8 0 // setpgid // 1080
data8 fsys_fallback_syscall // setsid data8 0 // setsid
data8 fsys_fallback_syscall // getsid data8 0 // getsid
data8 fsys_fallback_syscall // sethostname data8 0 // sethostname
data8 fsys_fallback_syscall // setrlimit data8 0 // setrlimit
data8 fsys_fallback_syscall // getrlimit // 1085 data8 0 // getrlimit // 1085
data8 fsys_fallback_syscall // getrusage data8 0 // getrusage
data8 fsys_gettimeofday // gettimeofday data8 fsys_gettimeofday // gettimeofday
data8 fsys_fallback_syscall // settimeofday data8 0 // settimeofday
data8 fsys_fallback_syscall // select data8 0 // select
data8 fsys_fallback_syscall // poll // 1090 data8 0 // poll // 1090
data8 fsys_fallback_syscall // symlink data8 0 // symlink
data8 fsys_fallback_syscall // readlink data8 0 // readlink
data8 fsys_fallback_syscall // uselib data8 0 // uselib
data8 fsys_fallback_syscall // swapon data8 0 // swapon
data8 fsys_fallback_syscall // swapoff // 1095 data8 0 // swapoff // 1095
data8 fsys_fallback_syscall // reboot data8 0 // reboot
data8 fsys_fallback_syscall // truncate data8 0 // truncate
data8 fsys_fallback_syscall // ftruncate data8 0 // ftruncate
data8 fsys_fallback_syscall // fchmod data8 0 // fchmod
data8 fsys_fallback_syscall // fchown // 1100 data8 0 // fchown // 1100
data8 fsys_fallback_syscall // getpriority data8 0 // getpriority
data8 fsys_fallback_syscall // setpriority data8 0 // setpriority
data8 fsys_fallback_syscall // statfs data8 0 // statfs
data8 fsys_fallback_syscall // fstatfs data8 0 // fstatfs
data8 fsys_fallback_syscall // gettid // 1105 data8 0 // gettid // 1105
data8 fsys_fallback_syscall // semget data8 0 // semget
data8 fsys_fallback_syscall // semop data8 0 // semop
data8 fsys_fallback_syscall // semctl data8 0 // semctl
data8 fsys_fallback_syscall // msgget data8 0 // msgget
data8 fsys_fallback_syscall // msgsnd // 1110 data8 0 // msgsnd // 1110
data8 fsys_fallback_syscall // msgrcv data8 0 // msgrcv
data8 fsys_fallback_syscall // msgctl data8 0 // msgctl
data8 fsys_fallback_syscall // shmget data8 0 // shmget
data8 fsys_fallback_syscall // shmat data8 0 // shmat
data8 fsys_fallback_syscall // shmdt // 1115 data8 0 // shmdt // 1115
data8 fsys_fallback_syscall // shmctl data8 0 // shmctl
data8 fsys_fallback_syscall // syslog data8 0 // syslog
data8 fsys_fallback_syscall // setitimer data8 0 // setitimer
data8 fsys_fallback_syscall // getitimer data8 0 // getitimer
data8 fsys_fallback_syscall // 1120 data8 0 // 1120
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall // vhangup data8 0 // vhangup
data8 fsys_fallback_syscall // lchown data8 0 // lchown
data8 fsys_fallback_syscall // remap_file_pages // 1125 data8 0 // remap_file_pages // 1125
data8 fsys_fallback_syscall // wait4 data8 0 // wait4
data8 fsys_fallback_syscall // sysinfo data8 0 // sysinfo
data8 fsys_fallback_syscall // clone data8 0 // clone
data8 fsys_fallback_syscall // setdomainname data8 0 // setdomainname
data8 fsys_fallback_syscall // newuname // 1130 data8 0 // newuname // 1130
data8 fsys_fallback_syscall // adjtimex data8 0 // adjtimex
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall // init_module data8 0 // init_module
data8 fsys_fallback_syscall // delete_module data8 0 // delete_module
data8 fsys_fallback_syscall // 1135 data8 0 // 1135
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall // quotactl data8 0 // quotactl
data8 fsys_fallback_syscall // bdflush data8 0 // bdflush
data8 fsys_fallback_syscall // sysfs data8 0 // sysfs
data8 fsys_fallback_syscall // personality // 1140 data8 0 // personality // 1140
data8 fsys_fallback_syscall // afs_syscall data8 0 // afs_syscall
data8 fsys_fallback_syscall // setfsuid data8 0 // setfsuid
data8 fsys_fallback_syscall // setfsgid data8 0 // setfsgid
data8 fsys_fallback_syscall // getdents data8 0 // getdents
data8 fsys_fallback_syscall // flock // 1145 data8 0 // flock // 1145
data8 fsys_fallback_syscall // readv data8 0 // readv
data8 fsys_fallback_syscall // writev data8 0 // writev
data8 fsys_fallback_syscall // pread64 data8 0 // pread64
data8 fsys_fallback_syscall // pwrite64 data8 0 // pwrite64
data8 fsys_fallback_syscall // sysctl // 1150 data8 0 // sysctl // 1150
data8 fsys_fallback_syscall // mmap data8 0 // mmap
data8 fsys_fallback_syscall // munmap data8 0 // munmap
data8 fsys_fallback_syscall // mlock data8 0 // mlock
data8 fsys_fallback_syscall // mlockall data8 0 // mlockall
data8 fsys_fallback_syscall // mprotect // 1155 data8 0 // mprotect // 1155
data8 fsys_fallback_syscall // mremap data8 0 // mremap
data8 fsys_fallback_syscall // msync data8 0 // msync
data8 fsys_fallback_syscall // munlock data8 0 // munlock
data8 fsys_fallback_syscall // munlockall data8 0 // munlockall
data8 fsys_fallback_syscall // sched_getparam // 1160 data8 0 // sched_getparam // 1160
data8 fsys_fallback_syscall // sched_setparam data8 0 // sched_setparam
data8 fsys_fallback_syscall // sched_getscheduler data8 0 // sched_getscheduler
data8 fsys_fallback_syscall // sched_setscheduler data8 0 // sched_setscheduler
data8 fsys_fallback_syscall // sched_yield data8 0 // sched_yield
data8 fsys_fallback_syscall // sched_get_priority_max // 1165 data8 0 // sched_get_priority_max // 1165
data8 fsys_fallback_syscall // sched_get_priority_min data8 0 // sched_get_priority_min
data8 fsys_fallback_syscall // sched_rr_get_interval data8 0 // sched_rr_get_interval
data8 fsys_fallback_syscall // nanosleep data8 0 // nanosleep
data8 fsys_fallback_syscall // nfsservctl data8 0 // nfsservctl
data8 fsys_fallback_syscall // prctl // 1170 data8 0 // prctl // 1170
data8 fsys_fallback_syscall // getpagesize data8 0 // getpagesize
data8 fsys_fallback_syscall // mmap2 data8 0 // mmap2
data8 fsys_fallback_syscall // pciconfig_read data8 0 // pciconfig_read
data8 fsys_fallback_syscall // pciconfig_write data8 0 // pciconfig_write
data8 fsys_fallback_syscall // perfmonctl // 1175 data8 0 // perfmonctl // 1175
data8 fsys_fallback_syscall // sigaltstack data8 0 // sigaltstack
data8 fsys_fallback_syscall // rt_sigaction data8 0 // rt_sigaction
data8 fsys_fallback_syscall // rt_sigpending data8 0 // rt_sigpending
data8 fsys_fallback_syscall // rt_sigprocmask data8 0 // rt_sigprocmask
data8 fsys_fallback_syscall // rt_sigqueueinfo // 1180 data8 0 // rt_sigqueueinfo // 1180
data8 fsys_fallback_syscall // rt_sigreturn data8 0 // rt_sigreturn
data8 fsys_fallback_syscall // rt_sigsuspend data8 0 // rt_sigsuspend
data8 fsys_fallback_syscall // rt_sigtimedwait data8 0 // rt_sigtimedwait
data8 fsys_fallback_syscall // getcwd data8 0 // getcwd
data8 fsys_fallback_syscall // capget // 1185 data8 0 // capget // 1185
data8 fsys_fallback_syscall // capset data8 0 // capset
data8 fsys_fallback_syscall // sendfile data8 0 // sendfile
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall // socket // 1190 data8 0 // socket // 1190
data8 fsys_fallback_syscall // bind data8 0 // bind
data8 fsys_fallback_syscall // connect data8 0 // connect
data8 fsys_fallback_syscall // listen data8 0 // listen
data8 fsys_fallback_syscall // accept data8 0 // accept
data8 fsys_fallback_syscall // getsockname // 1195 data8 0 // getsockname // 1195
data8 fsys_fallback_syscall // getpeername data8 0 // getpeername
data8 fsys_fallback_syscall // socketpair data8 0 // socketpair
data8 fsys_fallback_syscall // send data8 0 // send
data8 fsys_fallback_syscall // sendto data8 0 // sendto
data8 fsys_fallback_syscall // recv // 1200 data8 0 // recv // 1200
data8 fsys_fallback_syscall // recvfrom data8 0 // recvfrom
data8 fsys_fallback_syscall // shutdown data8 0 // shutdown
data8 fsys_fallback_syscall // setsockopt data8 0 // setsockopt
data8 fsys_fallback_syscall // getsockopt data8 0 // getsockopt
data8 fsys_fallback_syscall // sendmsg // 1205 data8 0 // sendmsg // 1205
data8 fsys_fallback_syscall // recvmsg data8 0 // recvmsg
data8 fsys_fallback_syscall // pivot_root data8 0 // pivot_root
data8 fsys_fallback_syscall // mincore data8 0 // mincore
data8 fsys_fallback_syscall // madvise data8 0 // madvise
data8 fsys_fallback_syscall // newstat // 1210 data8 0 // newstat // 1210
data8 fsys_fallback_syscall // newlstat data8 0 // newlstat
data8 fsys_fallback_syscall // newfstat data8 0 // newfstat
data8 fsys_fallback_syscall // clone2 data8 0 // clone2
data8 fsys_fallback_syscall // getdents64 data8 0 // getdents64
data8 fsys_fallback_syscall // getunwind // 1215 data8 0 // getunwind // 1215
data8 fsys_fallback_syscall // readahead data8 0 // readahead
data8 fsys_fallback_syscall // setxattr data8 0 // setxattr
data8 fsys_fallback_syscall // lsetxattr data8 0 // lsetxattr
data8 fsys_fallback_syscall // fsetxattr data8 0 // fsetxattr
data8 fsys_fallback_syscall // getxattr // 1220 data8 0 // getxattr // 1220
data8 fsys_fallback_syscall // lgetxattr data8 0 // lgetxattr
data8 fsys_fallback_syscall // fgetxattr data8 0 // fgetxattr
data8 fsys_fallback_syscall // listxattr data8 0 // listxattr
data8 fsys_fallback_syscall // llistxattr data8 0 // llistxattr
data8 fsys_fallback_syscall // flistxattr // 1225 data8 0 // flistxattr // 1225
data8 fsys_fallback_syscall // removexattr data8 0 // removexattr
data8 fsys_fallback_syscall // lremovexattr data8 0 // lremovexattr
data8 fsys_fallback_syscall // fremovexattr data8 0 // fremovexattr
data8 fsys_fallback_syscall // tkill data8 0 // tkill
data8 fsys_fallback_syscall // futex // 1230 data8 0 // futex // 1230
data8 fsys_fallback_syscall // sched_setaffinity data8 0 // sched_setaffinity
data8 fsys_fallback_syscall // sched_getaffinity data8 0 // sched_getaffinity
data8 fsys_set_tid_address // set_tid_address data8 fsys_set_tid_address // set_tid_address
data8 fsys_fallback_syscall // unused data8 0 // unused
data8 fsys_fallback_syscall // unused // 1235 data8 0 // unused // 1235
data8 fsys_fallback_syscall // exit_group data8 0 // exit_group
data8 fsys_fallback_syscall // lookup_dcookie data8 0 // lookup_dcookie
data8 fsys_fallback_syscall // io_setup data8 0 // io_setup
data8 fsys_fallback_syscall // io_destroy data8 0 // io_destroy
data8 fsys_fallback_syscall // io_getevents // 1240 data8 0 // io_getevents // 1240
data8 fsys_fallback_syscall // io_submit data8 0 // io_submit
data8 fsys_fallback_syscall // io_cancel data8 0 // io_cancel
data8 fsys_fallback_syscall // epoll_create data8 0 // epoll_create
data8 fsys_fallback_syscall // epoll_ctl data8 0 // epoll_ctl
data8 fsys_fallback_syscall // epoll_wait // 1245 data8 0 // epoll_wait // 1245
data8 fsys_fallback_syscall // restart_syscall data8 0 // restart_syscall
data8 fsys_fallback_syscall // semtimedop data8 0 // semtimedop
data8 fsys_fallback_syscall // timer_create data8 0 // timer_create
data8 fsys_fallback_syscall // timer_settime data8 0 // timer_settime
data8 fsys_fallback_syscall // timer_gettime // 1250 data8 0 // timer_gettime // 1250
data8 fsys_fallback_syscall // timer_getoverrun data8 0 // timer_getoverrun
data8 fsys_fallback_syscall // timer_delete data8 0 // timer_delete
data8 fsys_fallback_syscall // clock_settime data8 0 // clock_settime
data8 fsys_fallback_syscall // clock_gettime data8 0 // clock_gettime
data8 fsys_fallback_syscall // clock_getres // 1255 data8 0 // clock_getres // 1255
data8 fsys_fallback_syscall // clock_nanosleep data8 0 // clock_nanosleep
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall // 1260 data8 0 // 1260
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall // 1265 data8 0 // 1265
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall // 1270 data8 0 // 1270
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall // 1275 data8 0 // 1275
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 fsys_fallback_syscall data8 0
data8 0
.org fsyscall_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
...@@ -11,12 +11,10 @@ ...@@ -11,12 +11,10 @@
#include <asm/sigcontext.h> #include <asm/sigcontext.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/page.h>
.section .text.gate, "ax" .section .text.gate, "ax"
.start_gate: .start_gate:
#if CONFIG_FSYS #if CONFIG_FSYS
#include <asm/errno.h> #include <asm/errno.h>
...@@ -49,6 +47,7 @@ END(syscall_via_break) ...@@ -49,6 +47,7 @@ END(syscall_via_break)
* all other "scratch" registers: undefined * all other "scratch" registers: undefined
* all "preserved" registers: same as on entry * all "preserved" registers: same as on entry
*/ */
GLOBAL_ENTRY(syscall_via_epc) GLOBAL_ENTRY(syscall_via_epc)
.prologue .prologue
.altrp b6 .altrp b6
...@@ -65,19 +64,38 @@ GLOBAL_ENTRY(syscall_via_epc) ...@@ -65,19 +64,38 @@ GLOBAL_ENTRY(syscall_via_epc)
} }
;; ;;
rsm psr.be rsm psr.be
movl r18=fsyscall_table movl r14=fsyscall_table
mov r16=IA64_KR(CURRENT) mov r16=IA64_KR(CURRENT) // 12 cycle read latency
mov r19=255 mov r19=NR_syscalls-1
;;
shladd r18=r17,3,r18
cmp.geu p6,p0=r19,r17 // (syscall > 0 && syscall <= 1024+255)?
;; ;;
shladd r18=r17,3,r14
srlz.d // ensure little-endian byteorder is in effect srlz.d // ensure little-endian byteorder is in effect
cmp.ne p8,p0=r0,r0 // p8 <- FALSE
/* Note: if r17 is a NaT, p6 will be set to zero. */
cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
;;
(p6) ld8 r18=[r18] (p6) ld8 r18=[r18]
mov r29=psr // read psr (12 cyc load latency)
add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
;; ;;
(p6) mov b7=r18 (p6) mov b7=r18
(p6) tbit.z p8,p0=r18,0
(p8) br.dptk.many b7
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
#if 1/*def CONFIG_ITANIUM*/
(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
;;
(p6) mov b7=r14
(p6) br.sptk.many b7 (p6) br.sptk.many b7
#else
/* We can't do this until gate is a proper ELF DSO. */
(p6) brl.cond.sptk fsys_bubble_down
#endif
mov r10=-1 mov r10=-1
mov r8=ENOSYS mov r8=ENOSYS
...@@ -85,24 +103,6 @@ GLOBAL_ENTRY(syscall_via_epc) ...@@ -85,24 +103,6 @@ GLOBAL_ENTRY(syscall_via_epc)
br.ret.sptk.many b6 br.ret.sptk.many b6
END(syscall_via_epc) END(syscall_via_epc)
#if 0
GLOBAL_ENTRY(fsys_fallback_syscall)
/*
* It would be better/fsyser to do the SAVE_MIN magic directly here, but for now
* we simply fall back on doing a system-call via break. Good enough
* to get started. (Note: we have to do this through the gate page again, since
* the br.ret will switch us back to user-level privilege.)
*
* XXX Move this back to fsys.S after changing it over to avoid break 0x100000.
*/
movl r2=(syscall_via_break - .start_gate) + GATE_ADDR
;;
MCKINLEY_E9_WORKAROUND
mov b7=r2
br.ret.sptk.many b7
END(fsys_fallback_syscall)
#endif
#endif /* CONFIG_FSYS */ #endif /* CONFIG_FSYS */
# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) # define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
......
...@@ -637,7 +637,6 @@ END(daccess_bit) ...@@ -637,7 +637,6 @@ END(daccess_bit)
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
ENTRY(break_fault) ENTRY(break_fault)
.global ia64_enter_syscall
/* /*
* The streamlined system call entry/exit paths only save/restore the initial part * The streamlined system call entry/exit paths only save/restore the initial part
* of pt_regs. This implies that the callers of system-calls must adhere to the * of pt_regs. This implies that the callers of system-calls must adhere to the
...@@ -654,7 +653,7 @@ ENTRY(break_fault) ...@@ -654,7 +653,7 @@ ENTRY(break_fault)
* to prevent leaking bits from kernel to user level. * to prevent leaking bits from kernel to user level.
*/ */
DBG_FAULT(11) DBG_FAULT(11)
mov r16=IA64_KR(CURRENT) // r16 = current (physical); 12 cycle read lat. mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat.
mov r17=cr.iim mov r17=cr.iim
mov r18=__IA64_BREAK_SYSCALL mov r18=__IA64_BREAK_SYSCALL
mov r21=ar.fpsr mov r21=ar.fpsr
...@@ -673,7 +672,7 @@ ENTRY(break_fault) ...@@ -673,7 +672,7 @@ ENTRY(break_fault)
;; ;;
ld1 r17=[r16] // load current->thread.on_ustack flag ld1 r17=[r16] // load current->thread.on_ustack flag
st1 [r16]=r0 // clear current->thread.on_ustack flag st1 [r16]=r0 // clear current->thread.on_ustack flag
adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
;; ;;
invala invala
...@@ -682,6 +681,7 @@ ENTRY(break_fault) ...@@ -682,6 +681,7 @@ ENTRY(break_fault)
extr.u r8=r29,41,2 // extract ei field from cr.ipsr extr.u r8=r29,41,2 // extract ei field from cr.ipsr
;; ;;
cmp.eq p6,p7=2,r8 // isr.ei==2? cmp.eq p6,p7=2,r8 // isr.ei==2?
mov r2=r1 // setup r2 for ia64_syscall_setup
;; ;;
(p6) mov r8=0 // clear ei to 0 (p6) mov r8=0 // clear ei to 0
(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped (p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped
...@@ -691,19 +691,25 @@ ENTRY(break_fault) ...@@ -691,19 +691,25 @@ ENTRY(break_fault)
dep r29=r8,r29,41,2 // insert new ei into cr.ipsr dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
;; ;;
ia64_enter_syscall:
// switch from user to kernel RBS: // switch from user to kernel RBS:
MINSTATE_START_SAVE_MIN_VIRT MINSTATE_START_SAVE_MIN_VIRT
br.call.sptk.many b7=setup_syscall_via_break br.call.sptk.many b7=ia64_syscall_setup
;; ;;
mov r3=255 MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
ssm psr.ic | PSR_DEFAULT_BITS
;;
srlz.i // guarantee that interruption collection is on
;;
(p15) ssm psr.i // restore psr.i
;;
mov r3=NR_syscalls - 1
movl r16=sys_call_table movl r16=sys_call_table
adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
movl r2=ia64_ret_from_syscall movl r2=ia64_ret_from_syscall
;; ;;
shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
cmp.geu p0,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ? cmp.geu p0,p7=r3,r15 // (syscall > 0 && syscall < 1024 + NR_syscalls) ?
mov rp=r2 // set the real return addr mov rp=r2 // set the real return addr
;; ;;
(p7) add r20=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall (p7) add r20=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall
...@@ -764,11 +770,44 @@ END(interrupt) ...@@ -764,11 +770,44 @@ END(interrupt)
* fault ever gets "unreserved", simply moved the following code to a more * fault ever gets "unreserved", simply moved the following code to a more
* suitable spot... * suitable spot...
* *
* setup_syscall_via_break() is a separate subroutine so that it can * ia64_syscall_setup() is a separate subroutine so that it can
* allocate stacked registers so it can safely demine any * allocate stacked registers so it can safely demine any
* potential NaT values from the input registers. * potential NaT values from the input registers.
*
* On entry:
* - executing on bank 0 or bank 1 register set (doesn't matter)
* - r1: stack pointer
* - r2: current task pointer
* - r3: preserved
* - r11: original contents (saved ar.pfs to be saved)
* - r12: original contents (sp to be saved)
* - r13: original contents (tp to be saved)
* - r15: original contents (syscall # to be saved)
* - r18: saved bsp (after switching to kernel stack)
* - r19: saved b6
* - r20: saved r1 (gp)
* - r21: saved ar.fpsr
* - r22: kernel's register backing store base (krbs_base)
* - r23: saved ar.bspstore
* - r24: saved ar.rnat
* - r25: saved ar.unat
* - r26: saved ar.pfs
* - r27: saved ar.rsc
* - r28: saved cr.iip
* - r29: saved cr.ipsr
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
* - executing on bank 1 registers
* - psr.ic enabled, interrupts restored
* - r1: kernel's gp
* - r3: preserved (same as on entry)
* - r12: points to kernel stack
* - r13: points to current task
* - p15: TRUE if interrupts need to be re-enabled
* - ar.fpsr: set to kernel settings
*/ */
ENTRY(setup_syscall_via_break) GLOBAL_ENTRY(ia64_syscall_setup)
#if PT(B6) != 0 #if PT(B6) != 0
# error This code assumes that b6 is the first field in pt_regs. # error This code assumes that b6 is the first field in pt_regs.
#endif #endif
...@@ -786,7 +825,7 @@ ENTRY(setup_syscall_via_break) ...@@ -786,7 +825,7 @@ ENTRY(setup_syscall_via_break)
;; ;;
st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
mov r28=b0 mov r28=b0 // save b0 (2 cyc)
(p8) mov in0=-1 (p8) mov in0=-1
;; ;;
...@@ -824,8 +863,8 @@ ENTRY(setup_syscall_via_break) ...@@ -824,8 +863,8 @@ ENTRY(setup_syscall_via_break)
(p13) mov in5=-1 (p13) mov in5=-1
;; ;;
.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 .mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 .mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
tnat.nz p14,p0=in6 tnat.nz p14,p0=in6
;; ;;
st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
...@@ -836,23 +875,19 @@ ENTRY(setup_syscall_via_break) ...@@ -836,23 +875,19 @@ ENTRY(setup_syscall_via_break)
adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
(p14) mov in6=-1 (p14) mov in6=-1
mov r13=IA64_KR(CURRENT) // establish `current' mov r13=r2 // establish `current'
movl r1=__gp // establish kernel global pointer movl r1=__gp // establish kernel global pointer
;; ;;
(p8) mov in7=-1 (p8) mov in7=-1
tnat.nz p9,p0=r15 tnat.nz p9,p0=r15
MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
ssm psr.ic | PSR_DEFAULT_BITS cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
movl r17=FPSR_DEFAULT movl r17=FPSR_DEFAULT
;; ;;
srlz.i // guarantee that interruption collection is on
cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
(p9) mov r15=-1
(p15) ssm psr.i // restore psr.i
mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
(p9) mov r15=-1
br.ret.sptk.many b7 br.ret.sptk.many b7
END(setup_syscall_via_break) END(ia64_syscall_setup)
.org ia64_ivt+0x3c00 .org ia64_ivt+0x3c00
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <asm/sal.h> #include <asm/sal.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/tlb.h> #include <asm/tlb.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
...@@ -569,6 +570,29 @@ count_reserved_pages (u64 start, u64 end, void *arg) ...@@ -569,6 +570,29 @@ count_reserved_pages (u64 start, u64 end, void *arg)
return 0; return 0;
} }
#ifdef CONFIG_FSYS
/*
* Boot command-line option "nolwsys" can be used to disable the use of any light-weight
* system call handler. When this option is in effect, all fsyscalls will end up bubbling
* down into the kernel and calling the normal (heavy-weight) syscall handler. This is
* useful for performance testing, but conceivably could also come in handy for debugging
* purposes.
*/
static int nolwsys;
static int __init
nolwsys_setup (char *s)
{
nolwsys = 1;
return 1;
}
__setup("nolwsys", nolwsys_setup);
#endif /* CONFIG_FSYS */
void void
mem_init (void) mem_init (void)
{ {
...@@ -622,6 +646,25 @@ mem_init (void) ...@@ -622,6 +646,25 @@ mem_init (void)
if (num_pgt_pages > (u64) pgt_cache_water[1]) if (num_pgt_pages > (u64) pgt_cache_water[1])
pgt_cache_water[1] = num_pgt_pages; pgt_cache_water[1] = num_pgt_pages;
#ifdef CONFIG_FSYS
{
int i;
/*
* For fsyscall entrpoints with no light-weight handler, use the ordinary
* (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry
* code can tell them apart.
*/
for (i = 0; i < NR_syscalls; ++i) {
extern unsigned long fsyscall_table[NR_syscalls];
extern unsigned long sys_call_table[NR_syscalls];
if (!fsyscall_table[i] || nolwsys)
fsyscall_table[i] = sys_call_table[i] | 1;
}
}
#endif
/* install the gate page in the global page table: */ /* install the gate page in the global page table: */
put_gate_page(virt_to_page(ia64_imva(__start_gate_section)), GATE_ADDR); put_gate_page(virt_to_page(ia64_imva(__start_gate_section)), GATE_ADDR);
......
...@@ -247,6 +247,8 @@ ...@@ -247,6 +247,8 @@
#define __NR_sys_clock_getres 1255 #define __NR_sys_clock_getres 1255
#define __NR_sys_clock_nanosleep 1256 #define __NR_sys_clock_nanosleep 1256
#define NR_syscalls 256 /* length of syscall table */
#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER) #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr); extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment