Commit cea39746 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] add file_operations.fcntl

From: Chuck Lever <cel@citi.umich.edu>

O_DIRECT|O_APPEND cannot possibly work on NFS, so NFS needs some way of
preventing the user from setting this combination.  We felt that the best
way of implementing this restriction is to allow the filesytem to implement
its own fcntl() handler.

This patch does, that, and provide the appropriate handler for NFS.

Additional details from Chuck:

Forgetting O_DIRECT for a moment, O_APPEND writes on NFS don't work in any
case when multiple clients are writing to a file, since an NFS client can
never guarantee it knows where the true end of file is 100% of the time.
it works as expected iff only one client writes to an O_APPEND file at a
time.

Multi-client O_APPEND writing doesn't seem to be a problem for any
application I'm aware of.  Since it can be made to behave in the
multi-client case with careful application logic or by using file locking,
I don't think we should disallow it.

I want to drop the inode semaphore when doing NFS direct I/O because it is
synchronous; holding the i_sem means we reduce direct I/O concurrency to
one I/O per file at a time.  the important thing sct was worried about was
the case where a single client is writing with O_APPEND and O_DIRECT, and
we don't hold the i_sem during the write.

We must at least hold the i_sem when determining where the end of file is
to do the O_APPEND write.  In 2.6, I believe that is handled correctly in
the VFS layer, so this is not an issue for 2.6, right?
parent 3f66b056
...@@ -282,80 +282,88 @@ void f_delown(struct file *filp) ...@@ -282,80 +282,88 @@ void f_delown(struct file *filp)
EXPORT_SYMBOL(f_delown); EXPORT_SYMBOL(f_delown);
static long do_fcntl(unsigned int fd, unsigned int cmd, long generic_file_fcntl(int fd, unsigned int cmd,
unsigned long arg, struct file * filp) unsigned long arg, struct file *filp)
{ {
long err = -EINVAL; long err = -EINVAL;
switch (cmd) { switch (cmd) {
case F_DUPFD: case F_DUPFD:
get_file(filp); get_file(filp);
err = dupfd(filp, arg); err = dupfd(filp, arg);
break; break;
case F_GETFD: case F_GETFD:
err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
break; break;
case F_SETFD: case F_SETFD:
err = 0; err = 0;
set_close_on_exec(fd, arg & FD_CLOEXEC); set_close_on_exec(fd, arg & FD_CLOEXEC);
break; break;
case F_GETFL: case F_GETFL:
err = filp->f_flags; err = filp->f_flags;
break; break;
case F_SETFL: case F_SETFL:
err = setfl(fd, filp, arg); err = setfl(fd, filp, arg);
break; break;
case F_GETLK: case F_GETLK:
err = fcntl_getlk(filp, (struct flock __user *) arg); err = fcntl_getlk(filp, (struct flock __user *) arg);
break; break;
case F_SETLK: case F_SETLK:
case F_SETLKW: case F_SETLKW:
err = fcntl_setlk(filp, cmd, (struct flock __user *) arg); err = fcntl_setlk(filp, cmd, (struct flock __user *) arg);
break; break;
case F_GETOWN: case F_GETOWN:
/* /*
* XXX If f_owner is a process group, the * XXX If f_owner is a process group, the
* negative return value will get converted * negative return value will get converted
* into an error. Oops. If we keep the * into an error. Oops. If we keep the
* current syscall conventions, the only way * current syscall conventions, the only way
* to fix this will be in libc. * to fix this will be in libc.
*/ */
err = filp->f_owner.pid; err = filp->f_owner.pid;
force_successful_syscall_return(); force_successful_syscall_return();
break; break;
case F_SETOWN: case F_SETOWN:
err = f_setown(filp, arg, 1); err = f_setown(filp, arg, 1);
break; break;
case F_GETSIG: case F_GETSIG:
err = filp->f_owner.signum; err = filp->f_owner.signum;
break; break;
case F_SETSIG: case F_SETSIG:
/* arg == 0 restores default behaviour. */ /* arg == 0 restores default behaviour. */
if (arg < 0 || arg > _NSIG) { if (arg < 0 || arg > _NSIG) {
break;
}
err = 0;
filp->f_owner.signum = arg;
break;
case F_GETLEASE:
err = fcntl_getlease(filp);
break;
case F_SETLEASE:
err = fcntl_setlease(fd, filp, arg);
break;
case F_NOTIFY:
err = fcntl_dirnotify(fd, filp, arg);
break;
default:
break; break;
}
err = 0;
filp->f_owner.signum = arg;
break;
case F_GETLEASE:
err = fcntl_getlease(filp);
break;
case F_SETLEASE:
err = fcntl_setlease(fd, filp, arg);
break;
case F_NOTIFY:
err = fcntl_dirnotify(fd, filp, arg);
break;
default:
break;
} }
return err; return err;
} }
EXPORT_SYMBOL(generic_file_fcntl);
asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) static long do_fcntl(int fd, unsigned int cmd,
unsigned long arg, struct file *filp)
{
if (filp->f_op && filp->f_op->fcntl)
return filp->f_op->fcntl(fd, cmd, arg, filp);
return generic_file_fcntl(fd, cmd, arg, filp);
}
asmlinkage long sys_fcntl(int fd, unsigned int cmd, unsigned long arg)
{ {
struct file * filp; struct file *filp;
long err = -EBADF; long err = -EBADF;
filp = fget(fd); filp = fget(fd);
......
...@@ -33,6 +33,8 @@ ...@@ -33,6 +33,8 @@
#define NFSDBG_FACILITY NFSDBG_FILE #define NFSDBG_FACILITY NFSDBG_FILE
static long nfs_file_fcntl(int fd, unsigned int cmd,
unsigned long arg, struct file *filp);
static int nfs_file_open(struct inode *, struct file *); static int nfs_file_open(struct inode *, struct file *);
static int nfs_file_release(struct inode *, struct file *); static int nfs_file_release(struct inode *, struct file *);
static int nfs_file_mmap(struct file *, struct vm_area_struct *); static int nfs_file_mmap(struct file *, struct vm_area_struct *);
...@@ -55,6 +57,7 @@ struct file_operations nfs_file_operations = { ...@@ -55,6 +57,7 @@ struct file_operations nfs_file_operations = {
.fsync = nfs_fsync, .fsync = nfs_fsync,
.lock = nfs_lock, .lock = nfs_lock,
.sendfile = nfs_file_sendfile, .sendfile = nfs_file_sendfile,
.fcntl = nfs_file_fcntl,
}; };
struct inode_operations nfs_file_inode_operations = { struct inode_operations nfs_file_inode_operations = {
...@@ -68,6 +71,28 @@ struct inode_operations nfs_file_inode_operations = { ...@@ -68,6 +71,28 @@ struct inode_operations nfs_file_inode_operations = {
# define IS_SWAPFILE(inode) (0) # define IS_SWAPFILE(inode) (0)
#endif #endif
#define nfs_invalid_flags (O_APPEND | O_DIRECT)
/*
* Check for special cases that NFS doesn't support, and
* pass the rest to the generic fcntl function.
*/
static long
nfs_file_fcntl(int fd, unsigned int cmd,
unsigned long arg, struct file *filp)
{
switch (cmd) {
case F_SETFL:
if ((filp->f_flags & nfs_invalid_flags) == nfs_invalid_flags)
return -EINVAL;
break;
default:
break;
}
return generic_file_fcntl(fd, cmd, arg, filp);
}
/* /*
* Open file * Open file
*/ */
...@@ -78,6 +103,9 @@ nfs_file_open(struct inode *inode, struct file *filp) ...@@ -78,6 +103,9 @@ nfs_file_open(struct inode *inode, struct file *filp)
int (*open)(struct inode *, struct file *); int (*open)(struct inode *, struct file *);
int res = 0; int res = 0;
if ((filp->f_flags & nfs_invalid_flags) == nfs_invalid_flags)
return -EINVAL;
lock_kernel(); lock_kernel();
/* Do NFSv4 open() call */ /* Do NFSv4 open() call */
if ((open = server->rpc_ops->file_open) != NULL) if ((open = server->rpc_ops->file_open) != NULL)
......
...@@ -621,6 +621,9 @@ extern struct list_head file_lock_list; ...@@ -621,6 +621,9 @@ extern struct list_head file_lock_list;
#include <linux/fcntl.h> #include <linux/fcntl.h>
extern long generic_file_fcntl(int fd, unsigned int cmd,
unsigned long arg, struct file *filp);
extern int fcntl_getlk(struct file *, struct flock __user *); extern int fcntl_getlk(struct file *, struct flock __user *);
extern int fcntl_setlk(struct file *, unsigned int, struct flock __user *); extern int fcntl_setlk(struct file *, unsigned int, struct flock __user *);
...@@ -830,6 +833,8 @@ struct file_operations { ...@@ -830,6 +833,8 @@ struct file_operations {
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void __user *); ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void __user *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
long (*fcntl)(int fd, unsigned int cmd,
unsigned long arg, struct file *filp);
}; };
struct inode_operations { struct inode_operations {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment