Commit b8a7f3cd authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
  kill I_LOCK
  fold do_sync_file_range into sys_sync_file_range
  fix up O_SYNC comments
  VFS/fsstack: handle 32-bit smp + preempt + large files in fsstack_copy_inode_size
  fsstack/ecryptfs: remove unused get_nlinks param to fsstack_copy_attr_all
  vfs: remove extraneous NULL d_inode check from do_filp_open
  fs: no games with DCACHE_UNHASHED
  fs: anon_inodes implement dname
  dio: fix use-after-free
parents a377d181 eaff8079
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#define O_NOATIME 04000000 #define O_NOATIME 04000000
#define O_CLOEXEC 010000000 /* set close_on_exec */ #define O_CLOEXEC 010000000 /* set close_on_exec */
/* /*
* Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
* the O_SYNC flag. We continue to use the existing numerical value * the O_SYNC flag. We continue to use the existing numerical value
* for O_DSYNC semantics now, but using the correct symbolic name for it. * for O_DSYNC semantics now, but using the correct symbolic name for it.
* This new value is used to request true Posix O_SYNC semantics. It is * This new value is used to request true Posix O_SYNC semantics. It is
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#define FASYNC 0x1000 /* fcntl, for BSD compatibility */ #define FASYNC 0x1000 /* fcntl, for BSD compatibility */
#define O_LARGEFILE 0x2000 /* allow large file opens */ #define O_LARGEFILE 0x2000 /* allow large file opens */
/* /*
* Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
* the O_SYNC flag. We continue to use the existing numerical value * the O_SYNC flag. We continue to use the existing numerical value
* for O_DSYNC semantics now, but using the correct symbolic name for it. * for O_DSYNC semantics now, but using the correct symbolic name for it.
* This new value is used to request true Posix O_SYNC semantics. It is * This new value is used to request true Posix O_SYNC semantics. It is
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#define O_NOATIME 0x200000 #define O_NOATIME 0x200000
#define O_CLOEXEC 0x400000 #define O_CLOEXEC 0x400000
/* /*
* Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
* the O_SYNC flag. We continue to use the existing numerical value * the O_SYNC flag. We continue to use the existing numerical value
* for O_DSYNC semantics now, but using the correct symbolic name for it. * for O_DSYNC semantics now, but using the correct symbolic name for it.
* This new value is used to request true Posix O_SYNC semantics. It is * This new value is used to request true Posix O_SYNC semantics. It is
......
...@@ -35,14 +35,13 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, ...@@ -35,14 +35,13 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
mnt); mnt);
} }
static int anon_inodefs_delete_dentry(struct dentry *dentry) /*
{ * anon_inodefs_dname() is called from d_path().
/*
* We faked vfs to believe the dentry was hashed when we created it.
* Now we restore the flag so that dput() will work correctly.
*/ */
dentry->d_flags |= DCACHE_UNHASHED; static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
return 1; {
return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
dentry->d_name.name);
} }
static struct file_system_type anon_inode_fs_type = { static struct file_system_type anon_inode_fs_type = {
...@@ -51,7 +50,7 @@ static struct file_system_type anon_inode_fs_type = { ...@@ -51,7 +50,7 @@ static struct file_system_type anon_inode_fs_type = {
.kill_sb = kill_anon_super, .kill_sb = kill_anon_super,
}; };
static const struct dentry_operations anon_inodefs_dentry_operations = { static const struct dentry_operations anon_inodefs_dentry_operations = {
.d_delete = anon_inodefs_delete_dentry, .d_dname = anon_inodefs_dname,
}; };
/* /*
...@@ -119,8 +118,6 @@ struct file *anon_inode_getfile(const char *name, ...@@ -119,8 +118,6 @@ struct file *anon_inode_getfile(const char *name,
atomic_inc(&anon_inode_inode->i_count); atomic_inc(&anon_inode_inode->i_count);
path.dentry->d_op = &anon_inodefs_dentry_operations; path.dentry->d_op = &anon_inodefs_dentry_operations;
/* Do not publish this dentry inside the global dentry hash table */
path.dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(path.dentry, anon_inode_inode); d_instantiate(path.dentry, anon_inode_inode);
error = -ENFILE; error = -ENFILE;
......
...@@ -1206,7 +1206,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1206,7 +1206,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
* NOTE: filesystems with their own locking have to handle this * NOTE: filesystems with their own locking have to handle this
* on their own. * on their own.
*/ */
if (dio->flags & DIO_LOCKING) { if (flags & DIO_LOCKING) {
if (unlikely((rw & WRITE) && retval < 0)) { if (unlikely((rw & WRITE) && retval < 0)) {
loff_t isize = i_size_read(inode); loff_t isize = i_size_read(inode);
if (end > isize) if (end > isize)
......
...@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) ...@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
struct inode *lower_inode = struct inode *lower_inode =
ecryptfs_inode_to_lower(dentry->d_inode); ecryptfs_inode_to_lower(dentry->d_inode);
fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL); fsstack_copy_attr_all(dentry->d_inode, lower_inode);
} }
out: out:
return rc; return rc;
......
...@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
lower_new_dir_dentry->d_inode, lower_new_dentry); lower_new_dir_dentry->d_inode, lower_new_dentry);
if (rc) if (rc)
goto out_lock; goto out_lock;
fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
if (new_dir != old_dir) if (new_dir != old_dir)
fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL); fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
out_lock: out_lock:
unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
dput(lower_new_dentry->d_parent); dput(lower_new_dentry->d_parent);
...@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) ...@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
rc = notify_change(lower_dentry, ia); rc = notify_change(lower_dentry, ia);
mutex_unlock(&lower_dentry->d_inode->i_mutex); mutex_unlock(&lower_dentry->d_inode->i_mutex);
out: out:
fsstack_copy_attr_all(inode, lower_inode, NULL); fsstack_copy_attr_all(inode, lower_inode);
return rc; return rc;
} }
......
...@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, ...@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
init_special_inode(inode, lower_inode->i_mode, init_special_inode(inode, lower_inode->i_mode,
lower_inode->i_rdev); lower_inode->i_rdev);
dentry->d_op = &ecryptfs_dops; dentry->d_op = &ecryptfs_dops;
fsstack_copy_attr_all(inode, lower_inode, NULL); fsstack_copy_attr_all(inode, lower_inode);
/* This size will be overwritten for real files w/ headers and /* This size will be overwritten for real files w/ headers and
* other metadata */ * other metadata */
fsstack_copy_inode_size(inode, lower_inode); fsstack_copy_inode_size(inode, lower_inode);
......
...@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb, ...@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
* directory entry when gfs2_inode_lookup() is invoked. Part of the code * directory entry when gfs2_inode_lookup() is invoked. Part of the code
* segment inside gfs2_inode_lookup code needs to get moved around. * segment inside gfs2_inode_lookup code needs to get moved around.
* *
* Clean up I_LOCK and I_NEW as well. * Clears I_NEW as well.
**/ **/
void gfs2_set_iop(struct inode *inode) void gfs2_set_iop(struct inode *inode)
......
...@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode) ...@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
* Prevent speculative execution through spin_unlock(&inode_lock); * Prevent speculative execution through spin_unlock(&inode_lock);
*/ */
smp_mb(); smp_mb();
wake_up_bit(&inode->i_state, __I_LOCK); wake_up_bit(&inode->i_state, __I_NEW);
} }
/** /**
...@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode) ...@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
} }
#endif #endif
/* /*
* This is special! We do not need the spinlock when clearing I_LOCK, * This is special! We do not need the spinlock when clearing I_NEW,
* because we're guaranteed that nobody else tries to do anything about * because we're guaranteed that nobody else tries to do anything about
* the state of the inode when it is locked, as we just created it (so * the state of the inode when it is locked, as we just created it (so
* there can be no old holders that haven't tested I_LOCK). * there can be no old holders that haven't tested I_NEW).
* However we must emit the memory barrier so that other CPUs reliably * However we must emit the memory barrier so that other CPUs reliably
* see the clearing of I_LOCK after the other inode initialisation has * see the clearing of I_NEW after the other inode initialisation has
* completed. * completed.
*/ */
smp_mb(); smp_mb();
WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~(I_LOCK|I_NEW); inode->i_state &= ~I_NEW;
wake_up_inode(inode); wake_up_inode(inode);
} }
EXPORT_SYMBOL(unlock_new_inode); EXPORT_SYMBOL(unlock_new_inode);
...@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb, ...@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
goto set_failed; goto set_failed;
__inode_add_to_lists(sb, head, inode); __inode_add_to_lists(sb, head, inode);
inode->i_state = I_LOCK|I_NEW; inode->i_state = I_NEW;
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the /* Return the locked inode with I_NEW set, the
...@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb, ...@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
if (!old) { if (!old) {
inode->i_ino = ino; inode->i_ino = ino;
__inode_add_to_lists(sb, head, inode); __inode_add_to_lists(sb, head, inode);
inode->i_state = I_LOCK|I_NEW; inode->i_state = I_NEW;
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the /* Return the locked inode with I_NEW set, the
...@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode) ...@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
ino_t ino = inode->i_ino; ino_t ino = inode->i_ino;
struct hlist_head *head = inode_hashtable + hash(sb, ino); struct hlist_head *head = inode_hashtable + hash(sb, ino);
inode->i_state |= I_LOCK|I_NEW; inode->i_state |= I_NEW;
while (1) { while (1) {
struct hlist_node *node; struct hlist_node *node;
struct inode *old = NULL; struct inode *old = NULL;
...@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, ...@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct hlist_head *head = inode_hashtable + hash(sb, hashval);
inode->i_state |= I_LOCK|I_NEW; inode->i_state |= I_NEW;
while (1) { while (1) {
struct hlist_node *node; struct hlist_node *node;
...@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait); ...@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
* until the deletion _might_ have completed. Callers are responsible * until the deletion _might_ have completed. Callers are responsible
* to recheck inode state. * to recheck inode state.
* *
* It doesn't matter if I_LOCK is not set initially, a call to * It doesn't matter if I_NEW is not set initially, a call to
* wake_up_inode() after removing from the hash list will DTRT. * wake_up_inode() after removing from the hash list will DTRT.
* *
* This is called with inode_lock held. * This is called with inode_lock held.
...@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait); ...@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
static void __wait_on_freeing_inode(struct inode *inode) static void __wait_on_freeing_inode(struct inode *inode)
{ {
wait_queue_head_t *wq; wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_LOCK); wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
schedule(); schedule();
......
...@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid, /* transaction identifier */ ...@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid, /* transaction identifier */
*/ */
/* /*
* I believe this code is no longer needed. Splitting I_LOCK * I believe this code is no longer needed. Splitting I_LOCK
* into two bits, I_LOCK and I_SYNC should prevent this * into two bits, I_NEW and I_SYNC should prevent this
* deadlock as well. But since I don't have a JFS testload * deadlock as well. But since I don't have a JFS testload
* to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
* Joern * Joern
......
...@@ -1764,7 +1764,7 @@ struct file *do_filp_open(int dfd, const char *pathname, ...@@ -1764,7 +1764,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
path_to_nameidata(&path, &nd); path_to_nameidata(&path, &nd);
error = -EISDIR; error = -EISDIR;
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) if (S_ISDIR(path.dentry->d_inode->i_mode))
goto exit; goto exit;
ok: ok:
/* /*
......
...@@ -530,7 +530,7 @@ static int ntfs_is_extended_system_file(ntfs_attr_search_ctx *ctx) ...@@ -530,7 +530,7 @@ static int ntfs_is_extended_system_file(ntfs_attr_search_ctx *ctx)
* the ntfs inode. * the ntfs inode.
* *
* Q: What locks are held when the function is called? * Q: What locks are held when the function is called?
* A: i_state has I_LOCK set, hence the inode is locked, also * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away * i_count is set to 1, so it is not going to go away
* i_flags is set to 0 and we have no business touching it. Only an ioctl() * i_flags is set to 0 and we have no business touching it. Only an ioctl()
* is allowed to write to them. We should of course be honouring them but * is allowed to write to them. We should of course be honouring them but
...@@ -1207,7 +1207,7 @@ static int ntfs_read_locked_inode(struct inode *vi) ...@@ -1207,7 +1207,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
* necessary fields in @vi as well as initializing the ntfs inode. * necessary fields in @vi as well as initializing the ntfs inode.
* *
* Q: What locks are held when the function is called? * Q: What locks are held when the function is called?
* A: i_state has I_LOCK set, hence the inode is locked, also * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away * i_count is set to 1, so it is not going to go away
* *
* Return 0 on success and -errno on error. In the error case, the inode will * Return 0 on success and -errno on error. In the error case, the inode will
...@@ -1474,7 +1474,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) ...@@ -1474,7 +1474,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
* normal directory inodes. * normal directory inodes.
* *
* Q: What locks are held when the function is called? * Q: What locks are held when the function is called?
* A: i_state has I_LOCK set, hence the inode is locked, also * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away * i_count is set to 1, so it is not going to go away
* *
* Return 0 on success and -errno on error. In the error case, the inode will * Return 0 on success and -errno on error. In the error case, the inode will
......
...@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode) ...@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode)
} }
static struct vfsmount *pipe_mnt __read_mostly; static struct vfsmount *pipe_mnt __read_mostly;
static int pipefs_delete_dentry(struct dentry *dentry)
{
/*
* At creation time, we pretended this dentry was hashed
* (by clearing DCACHE_UNHASHED bit in d_flags)
* At delete time, we restore the truth : not hashed.
* (so that dput() can proceed correctly)
*/
dentry->d_flags |= DCACHE_UNHASHED;
return 0;
}
/* /*
* pipefs_dname() is called from d_path(). * pipefs_dname() is called from d_path().
...@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) ...@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
} }
static const struct dentry_operations pipefs_dentry_operations = { static const struct dentry_operations pipefs_dentry_operations = {
.d_delete = pipefs_delete_dentry,
.d_dname = pipefs_dname, .d_dname = pipefs_dname,
}; };
...@@ -989,12 +977,6 @@ struct file *create_write_pipe(int flags) ...@@ -989,12 +977,6 @@ struct file *create_write_pipe(int flags)
path.mnt = mntget(pipe_mnt); path.mnt = mntget(pipe_mnt);
path.dentry->d_op = &pipefs_dentry_operations; path.dentry->d_op = &pipefs_dentry_operations;
/*
* We dont want to publish this dentry into global dentry hash table.
* We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
* This permits a working /proc/$pid/fd/XXX on pipes
*/
path.dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(path.dentry, inode); d_instantiate(path.dentry, inode);
err = -ENFILE; err = -ENFILE;
......
...@@ -7,18 +7,63 @@ ...@@ -7,18 +7,63 @@
* This function cannot be inlined since i_size_{read,write} is rather * This function cannot be inlined since i_size_{read,write} is rather
* heavy-weight on 32-bit systems * heavy-weight on 32-bit systems
*/ */
void fsstack_copy_inode_size(struct inode *dst, const struct inode *src) void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
{ {
i_size_write(dst, i_size_read((struct inode *)src)); loff_t i_size;
dst->i_blocks = src->i_blocks; blkcnt_t i_blocks;
/*
* i_size_read() includes its own seqlocking and protection from
* preemption (see include/linux/fs.h): we need nothing extra for
* that here, and prefer to avoid nesting locks than attempt to keep
* i_size and i_blocks in sync together.
*/
i_size = i_size_read(src);
/*
* But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
* keep the two halves of i_blocks in sync despite SMP or PREEMPT -
* though stat's generic_fillattr() doesn't bother, and we won't be
* applying quotas (where i_blocks does become important) at the
* upper level.
*
* We don't actually know what locking is used at the lower level;
* but if it's a filesystem that supports quotas, it will be using
* i_lock as in inode_add_bytes(). tmpfs uses other locking, and
* its 32-bit is (just) able to exceed 2TB i_size with the aid of
* holes; but its i_blocks cannot carry into the upper long without
* almost 2TB swap - let's ignore that case.
*/
if (sizeof(i_blocks) > sizeof(long))
spin_lock(&src->i_lock);
i_blocks = src->i_blocks;
if (sizeof(i_blocks) > sizeof(long))
spin_unlock(&src->i_lock);
/*
* If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
* fsstack_copy_inode_size() to hold some lock around
* i_size_write(), otherwise i_size_read() may spin forever (see
* include/linux/fs.h). We don't necessarily hold i_mutex when this
* is called, so take i_lock for that case.
*
* And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
* two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
* for that case too, and do both at once by combining the tests.
*
* There is none of this locking overhead in the 64-bit case.
*/
if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
spin_lock(&dst->i_lock);
i_size_write(dst, i_size);
dst->i_blocks = i_blocks;
if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
spin_unlock(&dst->i_lock);
} }
EXPORT_SYMBOL_GPL(fsstack_copy_inode_size); EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
/* copy all attributes; get_nlinks is optional way to override the i_nlink /* copy all attributes */
* copying void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
*/
void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
int (*get_nlinks)(struct inode *))
{ {
dest->i_mode = src->i_mode; dest->i_mode = src->i_mode;
dest->i_uid = src->i_uid; dest->i_uid = src->i_uid;
...@@ -29,14 +74,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, ...@@ -29,14 +74,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
dest->i_ctime = src->i_ctime; dest->i_ctime = src->i_ctime;
dest->i_blkbits = src->i_blkbits; dest->i_blkbits = src->i_blkbits;
dest->i_flags = src->i_flags; dest->i_flags = src->i_flags;
/*
* Update the nlinks AFTER updating the above fields, because the
* get_links callback may depend on them.
*/
if (!get_nlinks)
dest->i_nlink = src->i_nlink; dest->i_nlink = src->i_nlink;
else
dest->i_nlink = (*get_nlinks)(dest);
} }
EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
...@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, ...@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
{ {
int ret; int ret;
struct file *file; struct file *file;
struct address_space *mapping;
loff_t endbyte; /* inclusive */ loff_t endbyte; /* inclusive */
int fput_needed; int fput_needed;
umode_t i_mode; umode_t i_mode;
...@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, ...@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
!S_ISLNK(i_mode)) !S_ISLNK(i_mode))
goto out_put; goto out_put;
ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); mapping = file->f_mapping;
if (!mapping) {
ret = -EINVAL;
goto out_put;
}
ret = 0;
if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
ret = filemap_fdatawait_range(mapping, offset, endbyte);
if (ret < 0)
goto out_put;
}
if (flags & SYNC_FILE_RANGE_WRITE) {
ret = filemap_fdatawrite_range(mapping, offset, endbyte);
if (ret < 0)
goto out_put;
}
if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
ret = filemap_fdatawait_range(mapping, offset, endbyte);
out_put: out_put:
fput_light(file, fput_needed); fput_light(file, fput_needed);
out: out:
...@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags, ...@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
} }
SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
#endif #endif
/*
* `endbyte' is inclusive
*/
int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
loff_t endbyte, unsigned int flags)
{
int ret;
if (!mapping) {
ret = -EINVAL;
goto out;
}
ret = 0;
if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
ret = filemap_fdatawait_range(mapping, offset, endbyte);
if (ret < 0)
goto out;
}
if (flags & SYNC_FILE_RANGE_WRITE) {
ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
WB_SYNC_ALL);
if (ret < 0)
goto out;
}
if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
ret = filemap_fdatawait_range(mapping, offset, endbyte);
}
out:
return ret;
}
EXPORT_SYMBOL_GPL(do_sync_mapping_range);
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
* *
* Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
* read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
* ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
* set as well. However, UBIFS disables readahead. * set as well. However, UBIFS disables readahead.
*/ */
......
...@@ -794,7 +794,7 @@ xfs_setup_inode( ...@@ -794,7 +794,7 @@ xfs_setup_inode(
struct inode *inode = &ip->i_vnode; struct inode *inode = &ip->i_vnode;
inode->i_ino = ip->i_ino; inode->i_ino = ip->i_ino;
inode->i_state = I_NEW|I_LOCK; inode->i_state = I_NEW;
inode_add_to_lists(ip->i_mount->m_super, inode); inode_add_to_lists(ip->i_mount->m_super, inode);
inode->i_mode = ip->i_d.di_mode; inode->i_mode = ip->i_d.di_mode;
......
...@@ -91,7 +91,7 @@ xfs_inode_alloc( ...@@ -91,7 +91,7 @@ xfs_inode_alloc(
ip->i_new_size = 0; ip->i_new_size = 0;
/* prevent anyone from using this yet */ /* prevent anyone from using this yet */
VFS_I(ip)->i_state = I_NEW|I_LOCK; VFS_I(ip)->i_state = I_NEW;
return ip; return ip;
} }
...@@ -217,7 +217,7 @@ xfs_iget_cache_hit( ...@@ -217,7 +217,7 @@ xfs_iget_cache_hit(
trace_xfs_iget_reclaim(ip); trace_xfs_iget_reclaim(ip);
goto out_error; goto out_error;
} }
inode->i_state = I_LOCK|I_NEW; inode->i_state = I_NEW;
} else { } else {
/* If the VFS inode is being torn down, pause and try again. */ /* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) { if (!igrab(inode)) {
......
...@@ -51,7 +51,7 @@ ...@@ -51,7 +51,7 @@
#endif #endif
/* /*
* Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
* the O_SYNC flag. We continue to use the existing numerical value * the O_SYNC flag. We continue to use the existing numerical value
* for O_DSYNC semantics now, but using the correct symbolic name for it. * for O_DSYNC semantics now, but using the correct symbolic name for it.
* This new value is used to request true Posix O_SYNC semantics. It is * This new value is used to request true Posix O_SYNC semantics. It is
......
...@@ -1095,10 +1095,6 @@ struct file_lock { ...@@ -1095,10 +1095,6 @@ struct file_lock {
extern void send_sigio(struct fown_struct *fown, int fd, int band); extern void send_sigio(struct fown_struct *fown, int fd, int band);
/* fs/sync.c */
extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
loff_t endbyte, unsigned int flags);
#ifdef CONFIG_FILE_LOCKING #ifdef CONFIG_FILE_LOCKING
extern int fcntl_getlk(struct file *, struct flock __user *); extern int fcntl_getlk(struct file *, struct flock __user *);
extern int fcntl_setlk(unsigned int, struct file *, unsigned int, extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
...@@ -1591,7 +1587,7 @@ struct super_operations { ...@@ -1591,7 +1587,7 @@ struct super_operations {
* until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
* various stages of removing an inode. * various stages of removing an inode.
* *
* Two bits are used for locking and completion notification, I_LOCK and I_SYNC. * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
* *
* I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
* fdatasync(). i_atime is the usual cause. * fdatasync(). i_atime is the usual cause.
...@@ -1600,8 +1596,14 @@ struct super_operations { ...@@ -1600,8 +1596,14 @@ struct super_operations {
* don't have to write inode on fdatasync() when only * don't have to write inode on fdatasync() when only
* mtime has changed in it. * mtime has changed in it.
* I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
* I_NEW get_new_inode() sets i_state to I_LOCK|I_NEW. Both * I_NEW Serves as both a mutex and completion notification.
* are cleared by unlock_new_inode(), called from iget(). * New inodes set I_NEW. If two processes both create
* the same inode, one of them will release its inode and
* wait for I_NEW to be released before returning.
* Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
* also cause waiting on I_NEW, without I_NEW actually
* being set. find_inode() uses this to prevent returning
* nearly-dead inodes.
* I_WILL_FREE Must be set when calling write_inode_now() if i_count * I_WILL_FREE Must be set when calling write_inode_now() if i_count
* is zero. I_FREEING must be set when I_WILL_FREE is * is zero. I_FREEING must be set when I_WILL_FREE is
* cleared. * cleared.
...@@ -1615,20 +1617,11 @@ struct super_operations { ...@@ -1615,20 +1617,11 @@ struct super_operations {
* prohibited for many purposes. iget() must wait for * prohibited for many purposes. iget() must wait for
* the inode to be completely released, then create it * the inode to be completely released, then create it
* anew. Other functions will just ignore such inodes, * anew. Other functions will just ignore such inodes,
* if appropriate. I_LOCK is used for waiting. * if appropriate. I_NEW is used for waiting.
* *
* I_LOCK Serves as both a mutex and completion notification. * I_SYNC Synchonized write of dirty inode data. The bits is
* New inodes set I_LOCK. If two processes both create * set during data writeback, and cleared with a wakeup
* the same inode, one of them will release its inode and * on the bit address once it is done.
* wait for I_LOCK to be released before returning.
* Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
* also cause waiting on I_LOCK, without I_LOCK actually
* being set. find_inode() uses this to prevent returning
* nearly-dead inodes.
* I_SYNC Similar to I_LOCK, but limited in scope to writeback
* of inode dirty data. Having a separate lock for this
* purpose reduces latency and prevents some filesystem-
* specific deadlocks.
* *
* Q: What is the difference between I_WILL_FREE and I_FREEING? * Q: What is the difference between I_WILL_FREE and I_FREEING?
* Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on
...@@ -1637,13 +1630,12 @@ struct super_operations { ...@@ -1637,13 +1630,12 @@ struct super_operations {
#define I_DIRTY_SYNC 1 #define I_DIRTY_SYNC 1
#define I_DIRTY_DATASYNC 2 #define I_DIRTY_DATASYNC 2
#define I_DIRTY_PAGES 4 #define I_DIRTY_PAGES 4
#define I_NEW 8 #define __I_NEW 3
#define I_NEW (1 << __I_NEW)
#define I_WILL_FREE 16 #define I_WILL_FREE 16
#define I_FREEING 32 #define I_FREEING 32
#define I_CLEAR 64 #define I_CLEAR 64
#define __I_LOCK 7 #define __I_SYNC 7
#define I_LOCK (1 << __I_LOCK)
#define __I_SYNC 8
#define I_SYNC (1 << __I_SYNC) #define I_SYNC (1 << __I_SYNC)
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
......
...@@ -8,10 +8,8 @@ ...@@ -8,10 +8,8 @@
#include <linux/fs.h> #include <linux/fs.h>
/* externs for fs/stack.c */ /* externs for fs/stack.c */
extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
int (*get_nlinks)(struct inode *)); extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src);
extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
/* inlines */ /* inlines */
static inline void fsstack_copy_attr_atime(struct inode *dest, static inline void fsstack_copy_attr_atime(struct inode *dest,
......
...@@ -79,8 +79,7 @@ void wakeup_flusher_threads(long nr_pages); ...@@ -79,8 +79,7 @@ void wakeup_flusher_threads(long nr_pages);
static inline void wait_on_inode(struct inode *inode) static inline void wait_on_inode(struct inode *inode)
{ {
might_sleep(); might_sleep();
wait_on_bit(&inode->i_state, __I_LOCK, inode_wait, wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
TASK_UNINTERRUPTIBLE);
} }
static inline void inode_sync_wait(struct inode *inode) static inline void inode_sync_wait(struct inode *inode)
{ {
......
...@@ -312,18 +312,6 @@ static struct file_system_type sock_fs_type = { ...@@ -312,18 +312,6 @@ static struct file_system_type sock_fs_type = {
.kill_sb = kill_anon_super, .kill_sb = kill_anon_super,
}; };
static int sockfs_delete_dentry(struct dentry *dentry)
{
/*
* At creation time, we pretended this dentry was hashed
* (by clearing DCACHE_UNHASHED bit in d_flags)
* At delete time, we restore the truth : not hashed.
* (so that dput() can proceed correctly)
*/
dentry->d_flags |= DCACHE_UNHASHED;
return 0;
}
/* /*
* sockfs_dname() is called from d_path(). * sockfs_dname() is called from d_path().
*/ */
...@@ -334,7 +322,6 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) ...@@ -334,7 +322,6 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
} }
static const struct dentry_operations sockfs_dentry_operations = { static const struct dentry_operations sockfs_dentry_operations = {
.d_delete = sockfs_delete_dentry,
.d_dname = sockfs_dname, .d_dname = sockfs_dname,
}; };
...@@ -374,12 +361,6 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) ...@@ -374,12 +361,6 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
path.mnt = mntget(sock_mnt); path.mnt = mntget(sock_mnt);
path.dentry->d_op = &sockfs_dentry_operations; path.dentry->d_op = &sockfs_dentry_operations;
/*
* We dont want to push this dentry into global dentry hash table.
* We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
* This permits a working /proc/$pid/fd/XXX on sockets
*/
path.dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(path.dentry, SOCK_INODE(sock)); d_instantiate(path.dentry, SOCK_INODE(sock));
SOCK_INODE(sock)->i_fop = &socket_file_ops; SOCK_INODE(sock)->i_fop = &socket_file_ops;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment