diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 2510b76bed898e171aaf70734a8649db994bccf6..6debecd9783448baecfed349cd833d6a94b83d43 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1464,3 +1464,6 @@ sys_call_table:
 	data8 ia64_ni_syscall
 	data8 ia64_ni_syscall
 	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+
+	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 24ab7b26bc3b154eb02de15cf76f473542a9672f..bec306ad60711050bdb5ff44fb776d52f9317fdd 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -16,6 +16,7 @@
 #include <asm/thread_info.h>
 #include <asm/sal.h>
 #include <asm/system.h>
+#include <asm/unistd.h>
 
 #include "entry.h"
 
@@ -41,59 +42,10 @@
  *   ar.pfs	= previous frame-state (as passed into the fsyscall handler)
  */
 
-#if 1
-ENTRY(fsys_fallback_syscall)
-	/*
-	 * This is called for system calls which are entered via epc, but don't
-	 * have a light-weight handler.  We need to bubble down into the kernel,
-	 * and that requires setting up a minimal pt_regs structure, and initializing
-	 * the CPU state more or less as if an interruption had occurred.  To make
-	 * syscall-restarts work, we setup pt_regs such that cr_iip points to the
-	 * second instruction in syscall_via_break.  Decrementing the IP hence will
-	 * restart the syscall via break and not decrementing IP will return us
-	 * to the caller, as usual.
-	 */
-#	define PSR_PRESERVED_BITS	(IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
-					 | IA64_PSR_DT | IA64_PSR_RT)
-	/*
-	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.  The rest we have
-	 * to synthesize.
-	 */
-#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
-					 | IA64_PSR_BN)
-	mov r29=psr
-	movl r9=PSR_PRESERVED_BITS
-
-	mov r20=r1
-	movl r8=PSR_ONE_BITS
-	;;
-	mov r1=IA64_KR(CURRENT)		// r16 = current (physical); 12 cycle read lat.
-	and r9=r9,r29
-	or r29=r8,r29
-	;;
-	mov psr.l=r9			// slam the door
-	mov r21=ar.fpsr
-	mov r26=ar.pfs
-
-	mov r25=ar.unat
-	mov r27=ar.rsc
-	mov r19=b6
-	;;
-
-	srlz.i				// ensure new psr.l has been established
-	movl r28=GATE_ADDR	// cr.iip XXX fix me!! Should be: GATE_ADDR(syscall_via_break)
-
-	invala
-	mov r31=pr
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r1
-	;;
-	st1 [r16]=r0				// clear current->thread.on_ustack flag
-	cmp.ne pKStk,pUStk=r0,r0		// set pKStk <- 0, pUStk <- 1
-	br.cond.sptk.many ia64_enter_syscall
-END(fsys_fallback_syscall)
-#endif
-
 ENTRY(fsys_ni_syscall)
+	.prologue
+	.altrp b6
+	.body
 	mov r8=ENOSYS
 	mov r10=-1
 	MCKINLEY_E9_WORKAROUND
@@ -101,6 +53,9 @@ ENTRY(fsys_ni_syscall)
 END(fsys_ni_syscall)
 
 ENTRY(fsys_getpid)
+	.prologue
+	.altrp b6
+	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
@@ -116,6 +71,9 @@ ENTRY(fsys_getpid)
 END(fsys_getpid)
 
 ENTRY(fsys_getppid)
+	.prologue
+	.altrp b6
+	.body
 	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
 	;;
 	ld8 r17=[r17]				// r17 = current->group_leader
@@ -161,6 +119,9 @@ ENTRY(fsys_getppid)
 END(fsys_getppid)
 
 ENTRY(fsys_set_tid_address)
+	.prologue
+	.altrp b6
+	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
@@ -200,6 +161,9 @@ END(fsys_set_tid_address)
  */
 
 ENTRY(fsys_gettimeofday)
+	.prologue
+	.altrp b6
+	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	movl r3=THIS_CPU(cpu_info)
 
@@ -213,7 +177,7 @@ ENTRY(fsys_gettimeofday)
 	movl r19=xtime			// xtime is a timespec struct
 
 	ld8 r10=[r10]			// r10 <- __per_cpu_offset[0]
-	movl r21=cpu_info__per_cpu
+	movl r21=THIS_CPU(cpu_info)
 	;;
 	add r10=r21, r10		// r10 <- &cpu_data(time_keeper_id)
 	tbit.nz p8,p0 = r2, IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT
@@ -368,262 +332,373 @@ EX(.fail, st8 [r9]=r3)			// store them in the timeval struct
 	br.ret.spnt.many b6		// return with r8 set to EINVAL
 END(fsys_gettimeofday)
 
+ENTRY(fsys_fallback_syscall)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * We only get here from light-weight syscall handlers.  Thus, we already
+	 * know that r15 contains a valid syscall number.  No need to re-check.
+	 */
+	adds r17=-1024,r15
+	movl r14=sys_call_table
+	;;
+	shladd r18=r17,3,r14
+	;;
+	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
+	mov r29=psr				// read psr (12 cyc load latency)
+	mov r27=ar.rsc
+	mov r21=ar.fpsr
+	mov r26=ar.pfs
+END(fsys_fallback_syscall)
+	/* FALL THROUGH */
+GLOBAL_ENTRY(fsys_bubble_down)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * We get here for syscalls that don't have a lightweight handler.  For those, we
+	 * need to bubble down into the kernel and that requires setting up a minimal
+	 * pt_regs structure, and initializing the CPU state more or less as if an
+	 * interruption had occurred.  To make syscall-restarts work, we setup pt_regs
+	 * such that cr_iip points to the second instruction in syscall_via_break.
+	 * Decrementing the IP hence will restart the syscall via break and not
+	 * decrementing IP will return us to the caller, as usual.  Note that we preserve
+	 * the value of psr.pp rather than initializing it from dcr.pp.  This makes it
+	 * possible to distinguish fsyscall execution from other privileged execution.
+	 *
+	 * On entry:
+	 *	- normal fsyscall handler register usage, except that we also have:
+	 *	- r18: address of syscall entry point
+	 *	- r21: ar.fpsr
+	 *	- r26: ar.pfs
+	 *	- r27: ar.rsc
+	 *	- r29: psr
+	 */
+#	define PSR_PRESERVED_BITS	(IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
+					 | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_RT | IA64_PSR_IC)
+	/*
+	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.  The rest we have
+	 * to synthesize.
+	 */
+#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
+					 | IA64_PSR_BN)
+
+	invala
+	movl r8=PSR_ONE_BITS
+
+	mov r25=ar.unat			// save ar.unat (5 cyc)
+	movl r9=PSR_PRESERVED_BITS
+
+	mov ar.rsc=0			// set enforced lazy mode, pl 0, little-endian, loadrs=0
+	movl r28=GATE_ADDR	// cr.iip XXX fix me!! Should be: GATE_ADDR(syscall_via_break)
+	;;
+	mov r23=ar.bspstore		// save ar.bspstore (12 cyc)
+	mov r31=pr			// save pr (2 cyc)
+	mov r20=r1			// save caller's gp in r20
+	;;
+	mov r2=r16			// copy current task addr to addl-addressable register
+	and r9=r9,r29
+	mov r19=b6			// save b6 (2 cyc)
+	;;
+	mov psr.l=r9			// slam the door (17 cyc to srlz.i)
+	or r29=r8,r29			// construct cr.ipsr value to save
+	addl r22=IA64_RBS_OFFSET,r2	// compute base of RBS
+	;;
+	mov.m r24=ar.rnat		// read ar.rnat (5 cyc lat)
+	lfetch.fault.excl.nt1 [r22]
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
+
+	// ensure previous insn group is issued before we stall for srlz.i:
+	;;
+	srlz.i				// ensure new psr.l has been established
+	/////////////////////////////////////////////////////////////////////////////
+	////////// from this point on, execution is not interruptible anymore
+	/////////////////////////////////////////////////////////////////////////////
+	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2	// compute base of memory stack
+	cmp.ne pKStk,pUStk=r0,r0	// set pKStk <- 0, pUStk <- 1
+	;;
+	st1 [r16]=r0			// clear current->thread.on_ustack flag
+	mov ar.bspstore=r22		// switch to kernel RBS
+	mov b6=r18			// copy syscall entry-point to b6 (7 cyc)
+	add r3=TI_FLAGS+IA64_TASK_SIZE,r2
+	;;
+	ld4 r3=[r3]				// r2 = current_thread_info()->flags
+	mov r18=ar.bsp			// save (kernel) ar.bsp (12 cyc)
+	mov ar.rsc=0x3			// set eager mode, pl 0, little-endian, loadrs=0
+	br.call.sptk.many b7=ia64_syscall_setup
+	;;
+	ssm psr.i
+	movl r2=ia64_ret_from_syscall
+	;;
+	mov rp=r2				// set the real return addr
+	tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
+
+(p8)	br.call.sptk.many b6=b6			// ignore this return addr
+	br.cond.sptk ia64_trace_syscall
+END(fsys_bubble_down)
+
 	.rodata
 	.align 8
 	.globl fsyscall_table
+
+	data8 fsys_bubble_down
 fsyscall_table:
 	data8 fsys_ni_syscall
-	data8 fsys_fallback_syscall	// exit			// 1025
-	data8 fsys_fallback_syscall	// read
-	data8 fsys_fallback_syscall	// write
-	data8 fsys_fallback_syscall	// open
-	data8 fsys_fallback_syscall	// close
-	data8 fsys_fallback_syscall	// creat		// 1030
-	data8 fsys_fallback_syscall	// link
-	data8 fsys_fallback_syscall	// unlink
-	data8 fsys_fallback_syscall	// execve
-	data8 fsys_fallback_syscall	// chdir
-	data8 fsys_fallback_syscall	// fchdir		// 1035
-	data8 fsys_fallback_syscall	// utimes
-	data8 fsys_fallback_syscall	// mknod
-	data8 fsys_fallback_syscall	// chmod
-	data8 fsys_fallback_syscall	// chown
-	data8 fsys_fallback_syscall	// lseek		// 1040
+	data8 0				// exit			// 1025
+	data8 0				// read
+	data8 0				// write
+	data8 0				// open
+	data8 0				// close
+	data8 0				// creat		// 1030
+	data8 0				// link
+	data8 0				// unlink
+	data8 0				// execve
+	data8 0				// chdir
+	data8 0				// fchdir		// 1035
+	data8 0				// utimes
+	data8 0				// mknod
+	data8 0				// chmod
+	data8 0				// chown
+	data8 0				// lseek		// 1040
 	data8 fsys_getpid		// getpid
 	data8 fsys_getppid		// getppid
-	data8 fsys_fallback_syscall	// mount
-	data8 fsys_fallback_syscall	// umount
-	data8 fsys_fallback_syscall	// setuid		// 1045
-	data8 fsys_fallback_syscall	// getuid
-	data8 fsys_fallback_syscall	// geteuid
-	data8 fsys_fallback_syscall	// ptrace
-	data8 fsys_fallback_syscall	// access
-	data8 fsys_fallback_syscall	// sync			// 1050
-	data8 fsys_fallback_syscall	// fsync
-	data8 fsys_fallback_syscall	// fdatasync
-	data8 fsys_fallback_syscall	// kill
-	data8 fsys_fallback_syscall	// rename
-	data8 fsys_fallback_syscall	// mkdir		// 1055
-	data8 fsys_fallback_syscall	// rmdir
-	data8 fsys_fallback_syscall	// dup
-	data8 fsys_fallback_syscall	// pipe
-	data8 fsys_fallback_syscall	// times
-	data8 fsys_fallback_syscall	// brk			// 1060
-	data8 fsys_fallback_syscall	// setgid
-	data8 fsys_fallback_syscall	// getgid
-	data8 fsys_fallback_syscall	// getegid
-	data8 fsys_fallback_syscall	// acct
-	data8 fsys_fallback_syscall	// ioctl		// 1065
-	data8 fsys_fallback_syscall	// fcntl
-	data8 fsys_fallback_syscall	// umask
-	data8 fsys_fallback_syscall	// chroot
-	data8 fsys_fallback_syscall	// ustat
-	data8 fsys_fallback_syscall	// dup2			// 1070
-	data8 fsys_fallback_syscall	// setreuid
-	data8 fsys_fallback_syscall	// setregid
-	data8 fsys_fallback_syscall	// getresuid
-	data8 fsys_fallback_syscall	// setresuid
-	data8 fsys_fallback_syscall	// getresgid		// 1075
-	data8 fsys_fallback_syscall	// setresgid
-	data8 fsys_fallback_syscall	// getgroups
-	data8 fsys_fallback_syscall	// setgroups
-	data8 fsys_fallback_syscall	// getpgid
-	data8 fsys_fallback_syscall	// setpgid		// 1080
-	data8 fsys_fallback_syscall	// setsid
-	data8 fsys_fallback_syscall	// getsid
-	data8 fsys_fallback_syscall	// sethostname
-	data8 fsys_fallback_syscall	// setrlimit
-	data8 fsys_fallback_syscall	// getrlimit		// 1085
-	data8 fsys_fallback_syscall	// getrusage
+	data8 0				// mount
+	data8 0				// umount
+	data8 0				// setuid		// 1045
+	data8 0				// getuid
+	data8 0				// geteuid
+	data8 0				// ptrace
+	data8 0				// access
+	data8 0				// sync			// 1050
+	data8 0				// fsync
+	data8 0				// fdatasync
+	data8 0				// kill
+	data8 0				// rename
+	data8 0				// mkdir		// 1055
+	data8 0				// rmdir
+	data8 0				// dup
+	data8 0				// pipe
+	data8 0				// times
+	data8 0				// brk			// 1060
+	data8 0				// setgid
+	data8 0				// getgid
+	data8 0				// getegid
+	data8 0				// acct
+	data8 0				// ioctl		// 1065
+	data8 0				// fcntl
+	data8 0				// umask
+	data8 0				// chroot
+	data8 0				// ustat
+	data8 0				// dup2			// 1070
+	data8 0				// setreuid
+	data8 0				// setregid
+	data8 0				// getresuid
+	data8 0				// setresuid
+	data8 0				// getresgid		// 1075
+	data8 0				// setresgid
+	data8 0				// getgroups
+	data8 0				// setgroups
+	data8 0				// getpgid
+	data8 0				// setpgid		// 1080
+	data8 0				// setsid
+	data8 0				// getsid
+	data8 0				// sethostname
+	data8 0				// setrlimit
+	data8 0				// getrlimit		// 1085
+	data8 0				// getrusage
 	data8 fsys_gettimeofday		// gettimeofday
-	data8 fsys_fallback_syscall	// settimeofday
-	data8 fsys_fallback_syscall	// select
-	data8 fsys_fallback_syscall	// poll			// 1090
-	data8 fsys_fallback_syscall	// symlink
-	data8 fsys_fallback_syscall	// readlink
-	data8 fsys_fallback_syscall	// uselib
-	data8 fsys_fallback_syscall	// swapon
-	data8 fsys_fallback_syscall	// swapoff		// 1095
-	data8 fsys_fallback_syscall	// reboot
-	data8 fsys_fallback_syscall	// truncate
-	data8 fsys_fallback_syscall	// ftruncate
-	data8 fsys_fallback_syscall	// fchmod
-	data8 fsys_fallback_syscall	// fchown		// 1100
-	data8 fsys_fallback_syscall	// getpriority
-	data8 fsys_fallback_syscall	// setpriority
-	data8 fsys_fallback_syscall	// statfs
-	data8 fsys_fallback_syscall	// fstatfs
-	data8 fsys_fallback_syscall	// gettid		// 1105
-	data8 fsys_fallback_syscall	// semget
-	data8 fsys_fallback_syscall	// semop
-	data8 fsys_fallback_syscall	// semctl
-	data8 fsys_fallback_syscall	// msgget
-	data8 fsys_fallback_syscall	// msgsnd		// 1110
-	data8 fsys_fallback_syscall	// msgrcv
-	data8 fsys_fallback_syscall	// msgctl
-	data8 fsys_fallback_syscall	// shmget
-	data8 fsys_fallback_syscall	// shmat
-	data8 fsys_fallback_syscall	// shmdt		// 1115
-	data8 fsys_fallback_syscall	// shmctl
-	data8 fsys_fallback_syscall	// syslog
-	data8 fsys_fallback_syscall	// setitimer
-	data8 fsys_fallback_syscall	// getitimer
-	data8 fsys_fallback_syscall		 		// 1120
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall	// vhangup
-	data8 fsys_fallback_syscall	// lchown
-	data8 fsys_fallback_syscall	// remap_file_pages	// 1125
-	data8 fsys_fallback_syscall	// wait4
-	data8 fsys_fallback_syscall	// sysinfo
-	data8 fsys_fallback_syscall	// clone
-	data8 fsys_fallback_syscall	// setdomainname
-	data8 fsys_fallback_syscall	// newuname		// 1130
-	data8 fsys_fallback_syscall	// adjtimex
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall	// init_module
-	data8 fsys_fallback_syscall	// delete_module
-	data8 fsys_fallback_syscall				// 1135
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall	// quotactl
-	data8 fsys_fallback_syscall	// bdflush
-	data8 fsys_fallback_syscall	// sysfs
-	data8 fsys_fallback_syscall	// personality		// 1140
-	data8 fsys_fallback_syscall	// afs_syscall
-	data8 fsys_fallback_syscall	// setfsuid
-	data8 fsys_fallback_syscall	// setfsgid
-	data8 fsys_fallback_syscall	// getdents
-	data8 fsys_fallback_syscall	// flock		// 1145
-	data8 fsys_fallback_syscall	// readv
-	data8 fsys_fallback_syscall	// writev
-	data8 fsys_fallback_syscall	// pread64
-	data8 fsys_fallback_syscall	// pwrite64
-	data8 fsys_fallback_syscall	// sysctl		// 1150
-	data8 fsys_fallback_syscall	// mmap
-	data8 fsys_fallback_syscall	// munmap
-	data8 fsys_fallback_syscall	// mlock
-	data8 fsys_fallback_syscall	// mlockall
-	data8 fsys_fallback_syscall	// mprotect		// 1155
-	data8 fsys_fallback_syscall	// mremap
-	data8 fsys_fallback_syscall	// msync
-	data8 fsys_fallback_syscall	// munlock
-	data8 fsys_fallback_syscall	// munlockall
-	data8 fsys_fallback_syscall	// sched_getparam	// 1160
-	data8 fsys_fallback_syscall	// sched_setparam
-	data8 fsys_fallback_syscall	// sched_getscheduler
-	data8 fsys_fallback_syscall	// sched_setscheduler
-	data8 fsys_fallback_syscall	// sched_yield
-	data8 fsys_fallback_syscall	// sched_get_priority_max	// 1165
-	data8 fsys_fallback_syscall	// sched_get_priority_min
-	data8 fsys_fallback_syscall	// sched_rr_get_interval
-	data8 fsys_fallback_syscall	// nanosleep
-	data8 fsys_fallback_syscall	// nfsservctl
-	data8 fsys_fallback_syscall	// prctl		// 1170
-	data8 fsys_fallback_syscall	// getpagesize
-	data8 fsys_fallback_syscall	// mmap2
-	data8 fsys_fallback_syscall	// pciconfig_read
-	data8 fsys_fallback_syscall	// pciconfig_write
-	data8 fsys_fallback_syscall	// perfmonctl		// 1175
-	data8 fsys_fallback_syscall	// sigaltstack
-	data8 fsys_fallback_syscall	// rt_sigaction
-	data8 fsys_fallback_syscall	// rt_sigpending
-	data8 fsys_fallback_syscall	// rt_sigprocmask
-	data8 fsys_fallback_syscall	// rt_sigqueueinfo	// 1180
-	data8 fsys_fallback_syscall	// rt_sigreturn
-	data8 fsys_fallback_syscall	// rt_sigsuspend
-	data8 fsys_fallback_syscall	// rt_sigtimedwait
-	data8 fsys_fallback_syscall	// getcwd
-	data8 fsys_fallback_syscall	// capget		// 1185
-	data8 fsys_fallback_syscall	// capset
-	data8 fsys_fallback_syscall	// sendfile
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall	// socket		// 1190
-	data8 fsys_fallback_syscall	// bind
-	data8 fsys_fallback_syscall	// connect
-	data8 fsys_fallback_syscall	// listen
-	data8 fsys_fallback_syscall	// accept
-	data8 fsys_fallback_syscall	// getsockname		// 1195
-	data8 fsys_fallback_syscall	// getpeername
-	data8 fsys_fallback_syscall	// socketpair
-	data8 fsys_fallback_syscall	// send
-	data8 fsys_fallback_syscall	// sendto
-	data8 fsys_fallback_syscall	// recv			// 1200
-	data8 fsys_fallback_syscall	// recvfrom
-	data8 fsys_fallback_syscall	// shutdown
-	data8 fsys_fallback_syscall	// setsockopt
-	data8 fsys_fallback_syscall	// getsockopt
-	data8 fsys_fallback_syscall	// sendmsg		// 1205
-	data8 fsys_fallback_syscall	// recvmsg
-	data8 fsys_fallback_syscall	// pivot_root
-	data8 fsys_fallback_syscall	// mincore
-	data8 fsys_fallback_syscall	// madvise
-	data8 fsys_fallback_syscall	// newstat		// 1210
-	data8 fsys_fallback_syscall	// newlstat
-	data8 fsys_fallback_syscall	// newfstat
-	data8 fsys_fallback_syscall	// clone2
-	data8 fsys_fallback_syscall	// getdents64
-	data8 fsys_fallback_syscall	// getunwind		// 1215
-	data8 fsys_fallback_syscall	// readahead
-	data8 fsys_fallback_syscall	// setxattr
-	data8 fsys_fallback_syscall	// lsetxattr
-	data8 fsys_fallback_syscall	// fsetxattr
-	data8 fsys_fallback_syscall	// getxattr		// 1220
-	data8 fsys_fallback_syscall	// lgetxattr
-	data8 fsys_fallback_syscall	// fgetxattr
-	data8 fsys_fallback_syscall	// listxattr
-	data8 fsys_fallback_syscall	// llistxattr
-	data8 fsys_fallback_syscall	// flistxattr		// 1225
-	data8 fsys_fallback_syscall	// removexattr
-	data8 fsys_fallback_syscall	// lremovexattr
-	data8 fsys_fallback_syscall	// fremovexattr
-	data8 fsys_fallback_syscall	// tkill
-	data8 fsys_fallback_syscall	// futex		// 1230
-	data8 fsys_fallback_syscall	// sched_setaffinity
-	data8 fsys_fallback_syscall	// sched_getaffinity
+	data8 0				// settimeofday
+	data8 0				// select
+	data8 0				// poll			// 1090
+	data8 0				// symlink
+	data8 0				// readlink
+	data8 0				// uselib
+	data8 0				// swapon
+	data8 0				// swapoff		// 1095
+	data8 0				// reboot
+	data8 0				// truncate
+	data8 0				// ftruncate
+	data8 0				// fchmod
+	data8 0				// fchown		// 1100
+	data8 0				// getpriority
+	data8 0				// setpriority
+	data8 0				// statfs
+	data8 0				// fstatfs
+	data8 0				// gettid		// 1105
+	data8 0				// semget
+	data8 0				// semop
+	data8 0				// semctl
+	data8 0				// msgget
+	data8 0				// msgsnd		// 1110
+	data8 0				// msgrcv
+	data8 0				// msgctl
+	data8 0				// shmget
+	data8 0				// shmat
+	data8 0				// shmdt		// 1115
+	data8 0				// shmctl
+	data8 0				// syslog
+	data8 0				// setitimer
+	data8 0				// getitimer
+	data8 0					 		// 1120
+	data8 0
+	data8 0
+	data8 0				// vhangup
+	data8 0				// lchown
+	data8 0				// remap_file_pages	// 1125
+	data8 0				// wait4
+	data8 0				// sysinfo
+	data8 0				// clone
+	data8 0				// setdomainname
+	data8 0				// newuname		// 1130
+	data8 0				// adjtimex
+	data8 0
+	data8 0				// init_module
+	data8 0				// delete_module
+	data8 0							// 1135
+	data8 0
+	data8 0				// quotactl
+	data8 0				// bdflush
+	data8 0				// sysfs
+	data8 0				// personality		// 1140
+	data8 0				// afs_syscall
+	data8 0				// setfsuid
+	data8 0				// setfsgid
+	data8 0				// getdents
+	data8 0				// flock		// 1145
+	data8 0				// readv
+	data8 0				// writev
+	data8 0				// pread64
+	data8 0				// pwrite64
+	data8 0				// sysctl		// 1150
+	data8 0				// mmap
+	data8 0				// munmap
+	data8 0				// mlock
+	data8 0				// mlockall
+	data8 0				// mprotect		// 1155
+	data8 0				// mremap
+	data8 0				// msync
+	data8 0				// munlock
+	data8 0				// munlockall
+	data8 0				// sched_getparam	// 1160
+	data8 0				// sched_setparam
+	data8 0				// sched_getscheduler
+	data8 0				// sched_setscheduler
+	data8 0				// sched_yield
+	data8 0				// sched_get_priority_max	// 1165
+	data8 0				// sched_get_priority_min
+	data8 0				// sched_rr_get_interval
+	data8 0				// nanosleep
+	data8 0				// nfsservctl
+	data8 0				// prctl		// 1170
+	data8 0				// getpagesize
+	data8 0				// mmap2
+	data8 0				// pciconfig_read
+	data8 0				// pciconfig_write
+	data8 0				// perfmonctl		// 1175
+	data8 0				// sigaltstack
+	data8 0				// rt_sigaction
+	data8 0				// rt_sigpending
+	data8 0				// rt_sigprocmask
+	data8 0				// rt_sigqueueinfo	// 1180
+	data8 0				// rt_sigreturn
+	data8 0				// rt_sigsuspend
+	data8 0				// rt_sigtimedwait
+	data8 0				// getcwd
+	data8 0				// capget		// 1185
+	data8 0				// capset
+	data8 0				// sendfile
+	data8 0
+	data8 0
+	data8 0				// socket		// 1190
+	data8 0				// bind
+	data8 0				// connect
+	data8 0				// listen
+	data8 0				// accept
+	data8 0				// getsockname		// 1195
+	data8 0				// getpeername
+	data8 0				// socketpair
+	data8 0				// send
+	data8 0				// sendto
+	data8 0				// recv			// 1200
+	data8 0				// recvfrom
+	data8 0				// shutdown
+	data8 0				// setsockopt
+	data8 0				// getsockopt
+	data8 0				// sendmsg		// 1205
+	data8 0				// recvmsg
+	data8 0				// pivot_root
+	data8 0				// mincore
+	data8 0				// madvise
+	data8 0				// newstat		// 1210
+	data8 0				// newlstat
+	data8 0				// newfstat
+	data8 0				// clone2
+	data8 0				// getdents64
+	data8 0				// getunwind		// 1215
+	data8 0				// readahead
+	data8 0				// setxattr
+	data8 0				// lsetxattr
+	data8 0				// fsetxattr
+	data8 0				// getxattr		// 1220
+	data8 0				// lgetxattr
+	data8 0				// fgetxattr
+	data8 0				// listxattr
+	data8 0				// llistxattr
+	data8 0				// flistxattr		// 1225
+	data8 0				// removexattr
+	data8 0				// lremovexattr
+	data8 0				// fremovexattr
+	data8 0				// tkill
+	data8 0				// futex		// 1230
+	data8 0				// sched_setaffinity
+	data8 0				// sched_getaffinity
 	data8 fsys_set_tid_address	// set_tid_address
-	data8 fsys_fallback_syscall	// unused
-	data8 fsys_fallback_syscall	// unused		// 1235
-	data8 fsys_fallback_syscall	// exit_group
-	data8 fsys_fallback_syscall	// lookup_dcookie
-	data8 fsys_fallback_syscall	// io_setup
-	data8 fsys_fallback_syscall	// io_destroy
-	data8 fsys_fallback_syscall	// io_getevents		// 1240
-	data8 fsys_fallback_syscall	// io_submit
-	data8 fsys_fallback_syscall	// io_cancel
-	data8 fsys_fallback_syscall	// epoll_create
-	data8 fsys_fallback_syscall	// epoll_ctl
-	data8 fsys_fallback_syscall	// epoll_wait		// 1245
-	data8 fsys_fallback_syscall	// restart_syscall
-	data8 fsys_fallback_syscall	// semtimedop
-	data8 fsys_fallback_syscall	// timer_create
-	data8 fsys_fallback_syscall	// timer_settime
-	data8 fsys_fallback_syscall	// timer_gettime 	// 1250
-	data8 fsys_fallback_syscall	// timer_getoverrun
-	data8 fsys_fallback_syscall	// timer_delete
-	data8 fsys_fallback_syscall	// clock_settime
-	data8 fsys_fallback_syscall	// clock_gettime
-	data8 fsys_fallback_syscall	// clock_getres		// 1255
-	data8 fsys_fallback_syscall	// clock_nanosleep
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall				// 1260
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall				// 1265
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall				// 1270
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall				// 1275
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
-	data8 fsys_fallback_syscall
+	data8 0				// unused
+	data8 0				// unused		// 1235
+	data8 0				// exit_group
+	data8 0				// lookup_dcookie
+	data8 0				// io_setup
+	data8 0				// io_destroy
+	data8 0				// io_getevents		// 1240
+	data8 0				// io_submit
+	data8 0				// io_cancel
+	data8 0				// epoll_create
+	data8 0				// epoll_ctl
+	data8 0				// epoll_wait		// 1245
+	data8 0				// restart_syscall
+	data8 0				// semtimedop
+	data8 0				// timer_create
+	data8 0				// timer_settime
+	data8 0				// timer_gettime 	// 1250
+	data8 0				// timer_getoverrun
+	data8 0				// timer_delete
+	data8 0				// clock_settime
+	data8 0				// clock_gettime
+	data8 0				// clock_getres		// 1255
+	data8 0				// clock_nanosleep
+	data8 0
+	data8 0
+	data8 0
+	data8 0							// 1260
+	data8 0
+	data8 0
+	data8 0
+	data8 0
+	data8 0							// 1265
+	data8 0
+	data8 0
+	data8 0
+	data8 0
+	data8 0							// 1270
+	data8 0
+	data8 0
+	data8 0
+	data8 0
+	data8 0							// 1275
+	data8 0
+	data8 0
+	data8 0
+	data8 0
+
+	.org fsyscall_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index d88a79cc30ff5900f732c2706bc60a874d201c75..12bd82ac4546f73ca849d19eb72748a4bb6efef3 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -11,12 +11,10 @@
 #include <asm/sigcontext.h>
 #include <asm/system.h>
 #include <asm/unistd.h>
-#include <asm/page.h>
 
 	.section .text.gate, "ax"
 .start_gate:
 
-
 #if CONFIG_FSYS
 
 #include <asm/errno.h>
@@ -49,6 +47,7 @@ END(syscall_via_break)
  *	all other "scratch" registers:	undefined
  *	all "preserved" registers:	same as on entry
  */
+
 GLOBAL_ENTRY(syscall_via_epc)
 	.prologue
 	.altrp b6
@@ -65,19 +64,38 @@ GLOBAL_ENTRY(syscall_via_epc)
 }
 	;;
 	rsm psr.be
-	movl r18=fsyscall_table
+	movl r14=fsyscall_table
 
-	mov r16=IA64_KR(CURRENT)
-	mov r19=255
-	;;
-	shladd r18=r17,3,r18
-	cmp.geu p6,p0=r19,r17			// (syscall > 0 && syscall <= 1024+255)?
+	mov r16=IA64_KR(CURRENT)		// 12 cycle read latency
+	mov r19=NR_syscalls-1
 	;;
+	shladd r18=r17,3,r14
+
 	srlz.d					// ensure little-endian byteorder is in effect
+	cmp.ne p8,p0=r0,r0			// p8 <- FALSE
+	/* Note: if r17 is a NaT, p6 will be set to zero.  */
+	cmp.geu p6,p7=r19,r17			// (syscall > 0 && syscall < 1024+NR_syscalls)?
+	;;
 (p6)	ld8 r18=[r18]
+	mov r29=psr				// read psr (12 cyc load latency)
+	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
 	;;
 (p6)	mov b7=r18
+(p6)	tbit.z p8,p0=r18,0
+(p8)	br.dptk.many b7
+
+	mov r27=ar.rsc
+	mov r21=ar.fpsr
+	mov r26=ar.pfs
+#if 1/*def CONFIG_ITANIUM*/
+(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
+	;;
+(p6)	mov b7=r14
 (p6)	br.sptk.many b7
+#else
+	/* We can't do this until gate is a proper ELF DSO.  */
+(p6)	brl.cond.sptk fsys_bubble_down
+#endif
 
 	mov r10=-1
 	mov r8=ENOSYS
@@ -85,24 +103,6 @@ GLOBAL_ENTRY(syscall_via_epc)
 	br.ret.sptk.many b6
 END(syscall_via_epc)
 
-#if 0
-GLOBAL_ENTRY(fsys_fallback_syscall)
-	/*
-	 * It would be better/fsyser to do the SAVE_MIN magic directly here, but for now
-	 * we simply fall back on doing a system-call via break.  Good enough
-	 * to get started.  (Note: we have to do this through the gate page again, since
-	 * the br.ret will switch us back to user-level privilege.)
-	 *
-	 * XXX Move this back to fsys.S after changing it over to avoid break 0x100000.
-	 */
-	movl r2=(syscall_via_break - .start_gate) + GATE_ADDR
-	;;
-	MCKINLEY_E9_WORKAROUND
-	mov b7=r2
-	br.ret.sptk.many b7
-END(fsys_fallback_syscall)
-#endif
-
 #endif /* CONFIG_FSYS */
 
 #	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index e3930f29066755ba248e8f44799ed26b92c4603c..b0c48a8f8c7c12a5a87014ec3cb91f1db5ce6ff1 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -637,7 +637,6 @@ END(daccess_bit)
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
 ENTRY(break_fault)
-	.global ia64_enter_syscall
 	/*
 	 * The streamlined system call entry/exit paths only save/restore the initial part
 	 * of pt_regs.  This implies that the callers of system-calls must adhere to the
@@ -654,7 +653,7 @@ ENTRY(break_fault)
 	 * to prevent leaking bits from kernel to user level.
 	 */
 	DBG_FAULT(11)
-	mov r16=IA64_KR(CURRENT)		// r16 = current (physical); 12 cycle read lat.
+	mov r16=IA64_KR(CURRENT)		// r16 = current task; 12 cycle read lat.
 	mov r17=cr.iim
 	mov r18=__IA64_BREAK_SYSCALL
 	mov r21=ar.fpsr
@@ -673,7 +672,7 @@ ENTRY(break_fault)
 	;;
 	ld1 r17=[r16]				// load current->thread.on_ustack flag
 	st1 [r16]=r0				// clear current->thread.on_ustack flag
-	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16	// set r1 for MINSTATE_START_SAVE_MIN_VIRT
+	add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16	// set r1 for MINSTATE_START_SAVE_MIN_VIRT
 	;;
 	invala
 
@@ -682,6 +681,7 @@ ENTRY(break_fault)
 	extr.u r8=r29,41,2			// extract ei field from cr.ipsr
 	;;
 	cmp.eq p6,p7=2,r8			// isr.ei==2?
+	mov r2=r1				// setup r2 for ia64_syscall_setup
 	;;
 (p6)	mov r8=0				// clear ei to 0
 (p6)	adds r28=16,r28				// switch cr.iip to next bundle cr.ipsr.ei wrapped
@@ -691,19 +691,25 @@ ENTRY(break_fault)
 	dep r29=r8,r29,41,2			// insert new ei into cr.ipsr
 	;;
 
-ia64_enter_syscall:
 	// switch from user to kernel RBS:
 	MINSTATE_START_SAVE_MIN_VIRT
-	br.call.sptk.many b7=setup_syscall_via_break
+	br.call.sptk.many b7=ia64_syscall_setup
 	;;
-	mov r3=255
+	MINSTATE_END_SAVE_MIN_VIRT		// switch to bank 1
+	ssm psr.ic | PSR_DEFAULT_BITS
+	;;
+	srlz.i					// guarantee that interruption collection is on
+	;;
+(p15)	ssm psr.i				// restore psr.i
+	;;
+	mov r3=NR_syscalls - 1
 	movl r16=sys_call_table
 
 	adds r15=-1024,r15			// r15 contains the syscall number---subtract 1024
 	movl r2=ia64_ret_from_syscall
 	;;
 	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
-	cmp.geu p0,p7=r3,r15			// (syscall > 0 && syscall <= 1024+255) ?
+	cmp.geu p0,p7=r3,r15			// (syscall > 0 && syscall < 1024 + NR_syscalls) ?
 	mov rp=r2				// set the real return addr
 	;;
 (p7)	add r20=(__NR_ni_syscall-1024)*8,r16	// force __NR_ni_syscall
@@ -764,11 +770,44 @@ END(interrupt)
 	 * fault ever gets "unreserved", simply moved the following code to a more
 	 * suitable spot...
 	 *
-	 * setup_syscall_via_break() is a separate subroutine so that it can
+	 * ia64_syscall_setup() is a separate subroutine so that it can
 	 *	allocate stacked registers so it can safely demine any
 	 *	potential NaT values from the input registers.
+	 *
+	 * On entry:
+	 *	- executing on bank 0 or bank 1 register set (doesn't matter)
+	 *	-  r1: stack pointer
+	 *	-  r2: current task pointer
+	 *	-  r3: preserved
+	 *	- r11: original contents (saved ar.pfs to be saved)
+	 *	- r12: original contents (sp to be saved)
+	 *	- r13: original contents (tp to be saved)
+	 *	- r15: original contents (syscall # to be saved)
+	 *	- r18: saved bsp (after switching to kernel stack)
+	 *	- r19: saved b6
+	 *	- r20: saved r1 (gp)
+	 *	- r21: saved ar.fpsr
+	 *	- r22: kernel's register backing store base (krbs_base)
+	 *	- r23: saved ar.bspstore
+	 *	- r24: saved ar.rnat
+	 *	- r25: saved ar.unat
+	 *	- r26: saved ar.pfs
+	 *	- r27: saved ar.rsc
+	 *	- r28: saved cr.iip
+	 *	- r29: saved cr.ipsr
+	 *	- r31: saved pr
+	 *	-  b0: original contents (to be saved)
+	 * On exit:
+	 *	- executing on bank 1 registers
+	 *	- psr.ic enabled, interrupts restored
+	 *	-  r1: kernel's gp
+	 *	-  r3: preserved (same as on entry)
+	 *	- r12: points to kernel stack
+	 *	- r13: points to current task
+	 *	- p15: TRUE if interrupts need to be re-enabled
+	 *	- ar.fpsr: set to kernel settings
 	 */
-ENTRY(setup_syscall_via_break)
+GLOBAL_ENTRY(ia64_syscall_setup)
 #if PT(B6) != 0
 # error This code assumes that b6 is the first field in pt_regs.
 #endif
@@ -786,7 +825,7 @@ ENTRY(setup_syscall_via_break)
 	;;
 
 	st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP)	// save cr.iip
-	mov r28=b0
+	mov r28=b0				// save b0 (2 cyc)
 (p8)	mov in0=-1
 	;;
 
@@ -824,8 +863,8 @@ ENTRY(setup_syscall_via_break)
 (p13)	mov in5=-1
 	;;
 
-.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
-.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
+.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12)	// save r12
+.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13)		// save r13
 	tnat.nz p14,p0=in6
 	;;
 	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
@@ -836,23 +875,19 @@ ENTRY(setup_syscall_via_break)
 	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
 (p14)	mov in6=-1
 
-	mov r13=IA64_KR(CURRENT)		// establish `current'
+	mov r13=r2				// establish `current'
 	movl r1=__gp				// establish kernel global pointer
 	;;
 (p8)	mov in7=-1
 	tnat.nz p9,p0=r15
-	MINSTATE_END_SAVE_MIN_VIRT		// switch to bank 1
 
-	ssm psr.ic | PSR_DEFAULT_BITS
+	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
 	movl r17=FPSR_DEFAULT
 	;;
-	srlz.i					// guarantee that interruption collection is on
-	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
-(p9)	mov r15=-1
-(p15)	ssm psr.i				// restore psr.i
 	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
+(p9)	mov r15=-1
 	br.ret.sptk.many b7
-END(setup_syscall_via_break)
+END(ia64_syscall_setup)
 
 	.org ia64_ivt+0x3c00
 /////////////////////////////////////////////////////////////////////////////////////////
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index d06ee28d4a3bae81449f10ffeefc78fa37ef8e15..235e856177230d09ec3a3231713415ac242f8ab3 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -27,6 +27,7 @@
 #include <asm/sal.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/unistd.h>
 #include <asm/tlb.h>
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -569,6 +570,29 @@ count_reserved_pages (u64 start, u64 end, void *arg)
 	return 0;
 }
 
+#ifdef CONFIG_FSYS
+
+/*
+ * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
+ * system call handler.  When this option is in effect, all fsyscalls will end up bubbling
+ * down into the kernel and calling the normal (heavy-weight) syscall handler.  This is
+ * useful for performance testing, but conceivably could also come in handy for debugging
+ * purposes.
+ */
+
+static int nolwsys;
+
+static int __init
+nolwsys_setup (char *s)
+{
+	nolwsys = 1;
+	return 1;
+}
+
+__setup("nolwsys", nolwsys_setup);
+
+#endif /* CONFIG_FSYS */
+
 void
 mem_init (void)
 {
@@ -622,6 +646,25 @@ mem_init (void)
 	if (num_pgt_pages > (u64) pgt_cache_water[1])
 		pgt_cache_water[1] = num_pgt_pages;
 
+#ifdef CONFIG_FSYS
+	{
+		int i;
+
+		/*
+		 * For fsyscall entrpoints with no light-weight handler, use the ordinary
+		 * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry
+		 * code can tell them apart.
+		 */
+		for (i = 0; i < NR_syscalls; ++i) {
+			extern unsigned long fsyscall_table[NR_syscalls];
+			extern unsigned long sys_call_table[NR_syscalls];
+
+			if (!fsyscall_table[i] || nolwsys)
+				fsyscall_table[i] = sys_call_table[i] | 1;
+		}
+	}
+#endif
+
 	/* install the gate page in the global page table: */
 	put_gate_page(virt_to_page(ia64_imva(__start_gate_section)), GATE_ADDR);
 
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index 01aebf7f38d30f09a7b7604070fa40b8667b9407..76ba8a920b636daa922fe6e383c4cea35671eb49 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -247,6 +247,8 @@
 #define __NR_sys_clock_getres		1255
 #define __NR_sys_clock_nanosleep	1256
 
+#define NR_syscalls			256 /* length of syscall table */
+
 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
 
 extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);