Commit b835996f authored by Dipankar Sarma's avatar Dipankar Sarma Committed by Linus Torvalds

[PATCH] files: lock-free fd look-up

With the use of RCU in files structure, the look-up of files using fds can now
be lock-free.  The lookup is protected by rcu_read_lock()/rcu_read_unlock().
This patch changes the readers to use lock-free lookup.
Signed-off-by: default avatarManeesh Soni <maneesh@in.ibm.com>
Signed-off-by: default avatarRavikiran Thirumalai <kiran_th@gmail.com>
Signed-off-by: default avatarDipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent ab2af1f5
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/tty.h> #include <linux/tty.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/rcupdate.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/ioctl.h> #include <asm/ioctl.h>
...@@ -33,7 +34,7 @@ static struct tty_struct *get_tty(int fd) ...@@ -33,7 +34,7 @@ static struct tty_struct *get_tty(int fd)
struct file *filp; struct file *filp;
struct tty_struct *ttyp = NULL; struct tty_struct *ttyp = NULL;
spin_lock(&current->files->file_lock); rcu_read_lock();
filp = fcheck(fd); filp = fcheck(fd);
if(filp && filp->private_data) { if(filp && filp->private_data) {
ttyp = (struct tty_struct *) filp->private_data; ttyp = (struct tty_struct *) filp->private_data;
...@@ -41,7 +42,7 @@ static struct tty_struct *get_tty(int fd) ...@@ -41,7 +42,7 @@ static struct tty_struct *get_tty(int fd)
if(ttyp->magic != TTY_MAGIC) if(ttyp->magic != TTY_MAGIC)
ttyp =NULL; ttyp =NULL;
} }
spin_unlock(&current->files->file_lock); rcu_read_unlock();
return ttyp; return ttyp;
} }
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/mtio.h> #include <linux/mtio.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/rcupdate.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <net/sock.h> #include <net/sock.h>
...@@ -295,16 +296,16 @@ static inline int solaris_sockmod(unsigned int fd, unsigned int cmd, u32 arg) ...@@ -295,16 +296,16 @@ static inline int solaris_sockmod(unsigned int fd, unsigned int cmd, u32 arg)
struct inode *ino; struct inode *ino;
struct fdtable *fdt; struct fdtable *fdt;
/* I wonder which of these tests are superfluous... --patrik */ /* I wonder which of these tests are superfluous... --patrik */
spin_lock(&current->files->file_lock); rcu_read_lock();
fdt = files_fdtable(current->files); fdt = files_fdtable(current->files);
if (! fdt->fd[fd] || if (! fdt->fd[fd] ||
! fdt->fd[fd]->f_dentry || ! fdt->fd[fd]->f_dentry ||
! (ino = fdt->fd[fd]->f_dentry->d_inode) || ! (ino = fdt->fd[fd]->f_dentry->d_inode) ||
! S_ISSOCK(ino->i_mode)) { ! S_ISSOCK(ino->i_mode)) {
spin_unlock(&current->files->file_lock); rcu_read_unlock();
return TBADF; return TBADF;
} }
spin_unlock(&current->files->file_lock); rcu_read_unlock();
switch (cmd & 0xff) { switch (cmd & 0xff) {
case 109: /* SI_SOCKPARAMS */ case 109: /* SI_SOCKPARAMS */
......
...@@ -2480,7 +2480,7 @@ static void __do_SAK(void *arg) ...@@ -2480,7 +2480,7 @@ static void __do_SAK(void *arg)
} }
task_lock(p); task_lock(p);
if (p->files) { if (p->files) {
spin_lock(&p->files->file_lock); rcu_read_lock();
fdt = files_fdtable(p->files); fdt = files_fdtable(p->files);
for (i=0; i < fdt->max_fds; i++) { for (i=0; i < fdt->max_fds; i++) {
filp = fcheck_files(p->files, i); filp = fcheck_files(p->files, i);
...@@ -2495,7 +2495,7 @@ static void __do_SAK(void *arg) ...@@ -2495,7 +2495,7 @@ static void __do_SAK(void *arg)
break; break;
} }
} }
spin_unlock(&p->files->file_lock); rcu_read_unlock();
} }
task_unlock(p); task_unlock(p);
} while_each_task_pid(session, PIDTYPE_SID, p); } while_each_task_pid(session, PIDTYPE_SID, p);
......
...@@ -40,10 +40,10 @@ static inline int get_close_on_exec(unsigned int fd) ...@@ -40,10 +40,10 @@ static inline int get_close_on_exec(unsigned int fd)
struct files_struct *files = current->files; struct files_struct *files = current->files;
struct fdtable *fdt; struct fdtable *fdt;
int res; int res;
spin_lock(&files->file_lock); rcu_read_lock();
fdt = files_fdtable(files); fdt = files_fdtable(files);
res = FD_ISSET(fd, fdt->close_on_exec); res = FD_ISSET(fd, fdt->close_on_exec);
spin_unlock(&files->file_lock); rcu_read_unlock();
return res; return res;
} }
......
...@@ -62,6 +62,7 @@ ...@@ -62,6 +62,7 @@
#include <linux/namespace.h> #include <linux/namespace.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/rcupdate.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/security.h> #include <linux/security.h>
...@@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm ...@@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
files = get_files_struct(task); files = get_files_struct(task);
if (files) { if (files) {
spin_lock(&files->file_lock); rcu_read_lock();
file = fcheck_files(files, fd); file = fcheck_files(files, fd);
if (file) { if (file) {
*mnt = mntget(file->f_vfsmnt); *mnt = mntget(file->f_vfsmnt);
*dentry = dget(file->f_dentry); *dentry = dget(file->f_dentry);
spin_unlock(&files->file_lock); rcu_read_unlock();
put_files_struct(files); put_files_struct(files);
return 0; return 0;
} }
spin_unlock(&files->file_lock); rcu_read_unlock();
put_files_struct(files); put_files_struct(files);
} }
return -ENOENT; return -ENOENT;
...@@ -1062,7 +1063,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) ...@@ -1062,7 +1063,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
files = get_files_struct(p); files = get_files_struct(p);
if (!files) if (!files)
goto out; goto out;
spin_lock(&files->file_lock); rcu_read_lock();
fdt = files_fdtable(files); fdt = files_fdtable(files);
for (fd = filp->f_pos-2; for (fd = filp->f_pos-2;
fd < fdt->max_fds; fd < fdt->max_fds;
...@@ -1071,7 +1072,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) ...@@ -1071,7 +1072,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
if (!fcheck_files(files, fd)) if (!fcheck_files(files, fd))
continue; continue;
spin_unlock(&files->file_lock); rcu_read_unlock();
j = NUMBUF; j = NUMBUF;
i = fd; i = fd;
...@@ -1083,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) ...@@ -1083,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
ino = fake_ino(tid, PROC_TID_FD_DIR + fd); ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
spin_lock(&files->file_lock); rcu_read_lock();
break; break;
} }
spin_lock(&files->file_lock); rcu_read_lock();
} }
spin_unlock(&files->file_lock); rcu_read_unlock();
put_files_struct(files); put_files_struct(files);
} }
out: out:
...@@ -1263,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) ...@@ -1263,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
files = get_files_struct(task); files = get_files_struct(task);
if (files) { if (files) {
spin_lock(&files->file_lock); rcu_read_lock();
if (fcheck_files(files, fd)) { if (fcheck_files(files, fd)) {
spin_unlock(&files->file_lock); rcu_read_unlock();
put_files_struct(files); put_files_struct(files);
if (task_dumpable(task)) { if (task_dumpable(task)) {
inode->i_uid = task->euid; inode->i_uid = task->euid;
...@@ -1277,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) ...@@ -1277,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
security_task_to_inode(task, inode); security_task_to_inode(task, inode);
return 1; return 1;
} }
spin_unlock(&files->file_lock); rcu_read_unlock();
put_files_struct(files); put_files_struct(files);
} }
d_drop(dentry); d_drop(dentry);
...@@ -1369,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, ...@@ -1369,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
if (!files) if (!files)
goto out_unlock; goto out_unlock;
inode->i_mode = S_IFLNK; inode->i_mode = S_IFLNK;
spin_lock(&files->file_lock); rcu_read_lock();
file = fcheck_files(files, fd); file = fcheck_files(files, fd);
if (!file) if (!file)
goto out_unlock2; goto out_unlock2;
...@@ -1377,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, ...@@ -1377,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
inode->i_mode |= S_IRUSR | S_IXUSR; inode->i_mode |= S_IRUSR | S_IXUSR;
if (file->f_mode & 2) if (file->f_mode & 2)
inode->i_mode |= S_IWUSR | S_IXUSR; inode->i_mode |= S_IWUSR | S_IXUSR;
spin_unlock(&files->file_lock); rcu_read_unlock();
put_files_struct(files); put_files_struct(files);
inode->i_op = &proc_pid_link_inode_operations; inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64; inode->i_size = 64;
...@@ -1387,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, ...@@ -1387,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
return NULL; return NULL;
out_unlock2: out_unlock2:
spin_unlock(&files->file_lock); rcu_read_unlock();
put_files_struct(files); put_files_struct(files);
out_unlock: out_unlock:
iput(inode); iput(inode);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/personality.h> /* for STICKY_TIMEOUTS */ #include <linux/personality.h> /* for STICKY_TIMEOUTS */
#include <linux/file.h> #include <linux/file.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/rcupdate.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -185,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout) ...@@ -185,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
int retval, i; int retval, i;
long __timeout = *timeout; long __timeout = *timeout;
spin_lock(&current->files->file_lock); rcu_read_lock();
retval = max_select_fd(n, fds); retval = max_select_fd(n, fds);
spin_unlock(&current->files->file_lock); rcu_read_unlock();
if (retval < 0) if (retval < 0)
return retval; return retval;
...@@ -329,8 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s ...@@ -329,8 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
goto out_nofds; goto out_nofds;
/* max_fdset can increase, so grab it once to avoid race */ /* max_fdset can increase, so grab it once to avoid race */
rcu_read_lock();
fdt = files_fdtable(current->files); fdt = files_fdtable(current->files);
max_fdset = fdt->max_fdset; max_fdset = fdt->max_fdset;
rcu_read_unlock();
if (n > max_fdset) if (n > max_fdset)
n = max_fdset; n = max_fdset;
...@@ -469,10 +472,14 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti ...@@ -469,10 +472,14 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
struct poll_list *head; struct poll_list *head;
struct poll_list *walk; struct poll_list *walk;
struct fdtable *fdt; struct fdtable *fdt;
int max_fdset;
/* Do a sanity check on nfds ... */ /* Do a sanity check on nfds ... */
rcu_read_lock();
fdt = files_fdtable(current->files); fdt = files_fdtable(current->files);
if (nfds > fdt->max_fdset && nfds > OPEN_MAX) max_fdset = fdt->max_fdset;
rcu_read_unlock();
if (nfds > max_fdset && nfds > OPEN_MAX)
return -EINVAL; return -EINVAL;
if (timeout) { if (timeout) {
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/rcupdate.h>
#include <net/sock.h> #include <net/sock.h>
#include <linux/netfilter_ipv4/ipt_owner.h> #include <linux/netfilter_ipv4/ipt_owner.h>
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/rcupdate.h>
#include <net/sock.h> #include <net/sock.h>
#include <linux/netfilter_ipv6/ip6t_owner.h> #include <linux/netfilter_ipv6/ip6t_owner.h>
......
...@@ -1652,7 +1652,7 @@ static inline void flush_unauthorized_files(struct files_struct * files) ...@@ -1652,7 +1652,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
continue; continue;
} }
if (devnull) { if (devnull) {
atomic_inc(&devnull->f_count); rcuref_inc(&devnull->f_count);
} else { } else {
devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR);
if (!devnull) { if (!devnull) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment