Commit 4142e0d1 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6

* 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
  fsync: wait for data writeout completion before calling ->fsync
  vfs: Remove generic_osync_inode() and sync_page_range{_nolock}()
  fat: Opencode sync_page_range_nolock()
  pohmelfs: Use new syncing helper
  xfs: Convert sync_page_range() to simple filemap_write_and_wait_range()
  ocfs2: Update syncing after splicing to match generic version
  ntfs: Use new syncing helpers and update comments
  ext4: Remove syncing logic from ext4_file_write
  ext3: Remove syncing logic from ext3_file_write
  ext2: Update comment about generic_osync_inode
  vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode
  vfs: Rename generic_file_aio_write_nolock
  ocfs2: Use __generic_file_aio_write instead of generic_file_aio_write_nolock
  pohmelfs: Use __generic_file_aio_write instead of generic_file_aio_write_nolock
  vfs: Remove syncing from generic_file_direct_write() and generic_file_buffered_write()
  vfs: Export __generic_file_aio_write() and add some comments
  vfs: Introduce filemap_fdatawait_range
parents 33f1de69 2daea67e
...@@ -246,7 +246,7 @@ static const struct file_operations raw_fops = { ...@@ -246,7 +246,7 @@ static const struct file_operations raw_fops = {
.read = do_sync_read, .read = do_sync_read,
.aio_read = generic_file_aio_read, .aio_read = generic_file_aio_read,
.write = do_sync_write, .write = do_sync_write,
.aio_write = generic_file_aio_write_nolock, .aio_write = blkdev_aio_write,
.open = raw_open, .open = raw_open,
.release= raw_release, .release= raw_release,
.ioctl = raw_ioctl, .ioctl = raw_ioctl,
......
...@@ -921,16 +921,16 @@ ssize_t pohmelfs_write(struct file *file, const char __user *buf, ...@@ -921,16 +921,16 @@ ssize_t pohmelfs_write(struct file *file, const char __user *buf,
if (ret) if (ret)
goto err_out_unlock; goto err_out_unlock;
ret = generic_file_aio_write_nolock(&kiocb, &iov, 1, pos); ret = __generic_file_aio_write(&kiocb, &iov, 1, &kiocb.ki_pos);
*ppos = kiocb.ki_pos; *ppos = kiocb.ki_pos;
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
WARN_ON(ret < 0); WARN_ON(ret < 0);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { if (ret > 0) {
ssize_t err; ssize_t err;
err = sync_page_range(inode, mapping, pos, ret); err = generic_write_sync(file, pos, ret);
if (err < 0) if (err < 0)
ret = err; ret = err;
WARN_ON(ret < 0); WARN_ON(ret < 0);
......
...@@ -1404,6 +1404,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) ...@@ -1404,6 +1404,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return blkdev_ioctl(bdev, mode, cmd, arg); return blkdev_ioctl(bdev, mode, cmd, arg);
} }
/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
*
* Does not take i_mutex for the write and thus is not for general purpose
* use.
*/
ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
err = generic_write_sync(file, pos, ret);
if (err < 0 && ret > 0)
ret = err;
}
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);
/* /*
* Try to release a page associated with block device when the system * Try to release a page associated with block device when the system
* is under memory pressure. * is under memory pressure.
...@@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = { ...@@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = {
.read = do_sync_read, .read = do_sync_read,
.write = do_sync_write, .write = do_sync_write,
.aio_read = generic_file_aio_read, .aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write_nolock, .aio_write = blkdev_aio_write,
.mmap = generic_file_mmap, .mmap = generic_file_mmap,
.fsync = block_fsync, .fsync = block_fsync,
.unlocked_ioctl = block_ioctl, .unlocked_ioctl = block_ioctl,
......
...@@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode, ...@@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode,
unlock_buffer(bh); unlock_buffer(bh);
mark_buffer_dirty_inode(bh, inode); mark_buffer_dirty_inode(bh, inode);
/* We used to sync bh here if IS_SYNC(inode). /* We used to sync bh here if IS_SYNC(inode).
* But we now rely upon generic_osync_inode() * But we now rely upon generic_write_sync()
* and b_inode_buffers. But not for directories. * and b_inode_buffers. But not for directories.
*/ */
if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
......
...@@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp) ...@@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
return 0; return 0;
} }
static ssize_t
ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_path.dentry->d_inode;
ssize_t ret;
int err;
ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
/*
* Skip flushing if there was an error, or if nothing was written.
*/
if (ret <= 0)
return ret;
/*
* If the inode is IS_SYNC, or is O_SYNC and we are doing data
* journalling then we need to make sure that we force the transaction
* to disk to keep all metadata uptodate synchronously.
*/
if (file->f_flags & O_SYNC) {
/*
* If we are non-data-journaled, then the dirty data has
* already been flushed to backing store by generic_osync_inode,
* and the inode has been flushed too if there have been any
* modifications other than mere timestamp updates.
*
* Open question --- do we care about flushing timestamps too
* if the inode is IS_SYNC?
*/
if (!ext3_should_journal_data(inode))
return ret;
goto force_commit;
}
/*
* So we know that there has been no forced data flush. If the inode
* is marked IS_SYNC, we need to force one ourselves.
*/
if (!IS_SYNC(inode))
return ret;
/*
* Open question #2 --- should we force data to disk here too? If we
* don't, the only impact is that data=writeback filesystems won't
* flush data to disk automatically on IS_SYNC, only metadata (but
* historically, that is what ext2 has done.)
*/
force_commit:
err = ext3_force_commit(inode->i_sb);
if (err)
return err;
return ret;
}
const struct file_operations ext3_file_operations = { const struct file_operations ext3_file_operations = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.read = do_sync_read, .read = do_sync_read,
.write = do_sync_write, .write = do_sync_write,
.aio_read = generic_file_aio_read, .aio_read = generic_file_aio_read,
.aio_write = ext3_file_write, .aio_write = generic_file_aio_write,
.unlocked_ioctl = ext3_ioctl, .unlocked_ioctl = ext3_ioctl,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_ioctl = ext3_compat_ioctl, .compat_ioctl = ext3_compat_ioctl,
......
...@@ -58,10 +58,7 @@ static ssize_t ...@@ -58,10 +58,7 @@ static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov, ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos) unsigned long nr_segs, loff_t pos)
{ {
struct file *file = iocb->ki_filp; struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
struct inode *inode = file->f_path.dentry->d_inode;
ssize_t ret;
int err;
/* /*
* If we have encountered a bitmap-format file, the size limit * If we have encountered a bitmap-format file, the size limit
...@@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
} }
} }
ret = generic_file_aio_write(iocb, iov, nr_segs, pos); return generic_file_aio_write(iocb, iov, nr_segs, pos);
/*
* Skip flushing if there was an error, or if nothing was written.
*/
if (ret <= 0)
return ret;
/*
* If the inode is IS_SYNC, or is O_SYNC and we are doing data
* journalling then we need to make sure that we force the transaction
* to disk to keep all metadata uptodate synchronously.
*/
if (file->f_flags & O_SYNC) {
/*
* If we are non-data-journaled, then the dirty data has
* already been flushed to backing store by generic_osync_inode,
* and the inode has been flushed too if there have been any
* modifications other than mere timestamp updates.
*
* Open question --- do we care about flushing timestamps too
* if the inode is IS_SYNC?
*/
if (!ext4_should_journal_data(inode))
return ret;
goto force_commit;
}
/*
* So we know that there has been no forced data flush. If the inode
* is marked IS_SYNC, we need to force one ourselves.
*/
if (!IS_SYNC(inode))
return ret;
/*
* Open question #2 --- should we force data to disk here too? If we
* don't, the only impact is that data=writeback filesystems won't
* flush data to disk automatically on IS_SYNC, only metadata (but
* historically, that is what ext2 has done.)
*/
force_commit:
err = ext4_force_commit(inode->i_sb);
if (err)
return err;
return ret;
} }
static struct vm_operations_struct ext4_file_vm_ops = { static struct vm_operations_struct ext4_file_vm_ops = {
......
...@@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size) ...@@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size)
inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
mark_inode_dirty(inode); mark_inode_dirty(inode);
if (IS_SYNC(inode)) if (IS_SYNC(inode)) {
err = sync_page_range_nolock(inode, mapping, start, count); int err2;
/*
* Opencode syncing since we don't have a file open to use
* standard fsync path.
*/
err = filemap_fdatawrite_range(mapping, start,
start + count - 1);
err2 = sync_mapping_buffers(mapping);
if (!err)
err = err2;
err2 = write_inode_now(inode, 1);
if (!err)
err = err2;
if (!err) {
err = filemap_fdatawait_range(mapping, start,
start + count - 1);
}
}
out: out:
return err; return err;
} }
......
...@@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) ...@@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
MSDOS_I(inode)->i_start = new_dclus; MSDOS_I(inode)->i_start = new_dclus;
MSDOS_I(inode)->i_logstart = new_dclus; MSDOS_I(inode)->i_logstart = new_dclus;
/* /*
* Since generic_osync_inode() synchronize later if * Since generic_write_sync() synchronizes regular files later,
* this is not directory, we don't here. * we sync here only directories.
*/ */
if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) { if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) {
ret = fat_sync_inode(inode); ret = fat_sync_inode(inode);
......
...@@ -1242,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -1242,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
return ret; return ret;
} }
EXPORT_SYMBOL(sync_inode); EXPORT_SYMBOL(sync_inode);
/**
* generic_osync_inode - flush all dirty data for a given inode to disk
* @inode: inode to write
* @mapping: the address_space that should be flushed
* @what: what to write and wait upon
*
* This can be called by file_write functions for files which have the
* O_SYNC flag set, to flush dirty writes to disk.
*
* @what is a bitmask, specifying which part of the inode's data should be
* written and waited upon.
*
* OSYNC_DATA: i_mapping's dirty data
* OSYNC_METADATA: the buffers at i_mapping->private_list
* OSYNC_INODE: the inode itself
*/
int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
{
int err = 0;
int need_write_inode_now = 0;
int err2;
if (what & OSYNC_DATA)
err = filemap_fdatawrite(mapping);
if (what & (OSYNC_METADATA|OSYNC_DATA)) {
err2 = sync_mapping_buffers(mapping);
if (!err)
err = err2;
}
if (what & OSYNC_DATA) {
err2 = filemap_fdatawait(mapping);
if (!err)
err = err2;
}
spin_lock(&inode_lock);
if ((inode->i_state & I_DIRTY) &&
((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
need_write_inode_now = 1;
spin_unlock(&inode_lock);
if (need_write_inode_now) {
err2 = write_inode_now(inode, 1);
if (!err)
err = err2;
}
else
inode_sync_wait(inode);
return err;
}
EXPORT_SYMBOL(generic_osync_inode);
...@@ -2076,14 +2076,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, ...@@ -2076,14 +2076,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
*ppos = pos; *ppos = pos;
if (cached_page) if (cached_page)
page_cache_release(cached_page); page_cache_release(cached_page);
/* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
if (likely(!status)) {
if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
status = generic_osync_inode(vi, mapping,
OSYNC_METADATA|OSYNC_DATA);
}
}
pagevec_lru_add_file(&lru_pvec); pagevec_lru_add_file(&lru_pvec);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
written ? "written" : "status", (unsigned long)written, written ? "written" : "status", (unsigned long)written,
...@@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { if (ret > 0) {
int err = sync_page_range(inode, mapping, pos, ret); int err = generic_write_sync(file, pos, ret);
if (err < 0) if (err < 0)
ret = err; ret = err;
} }
...@@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov, ...@@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
if (ret == -EIOCBQUEUED) if (ret == -EIOCBQUEUED)
ret = wait_on_sync_kiocb(&kiocb); ret = wait_on_sync_kiocb(&kiocb);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { if (ret > 0) {
int err = sync_page_range(inode, mapping, *ppos - ret, ret); int err = generic_write_sync(file, *ppos - ret, ret);
if (err < 0) if (err < 0)
ret = err; ret = err;
} }
......
...@@ -384,13 +384,12 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, ...@@ -384,13 +384,12 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
* it is dirty in the inode meta data rather than the data page cache of the * it is dirty in the inode meta data rather than the data page cache of the
* inode, and thus there are no data pages that need writing out. Therefore, a * inode, and thus there are no data pages that need writing out. Therefore, a
* full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
* other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to * other hand, is not sufficient, because ->write_inode needs to be called even
* ensure ->write_inode is called from generic_osync_inode() and this needs to * in case of fdatasync. This needs to happen or the file data would not
* happen or the file data would not necessarily hit the device synchronously, * necessarily hit the device synchronously, even though the vfs inode has the
* even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
* simply "feels" better than just I_DIRTY_SYNC, since the file data has not * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
* actually hit the block device yet, which is not what I_DIRTY_SYNC on its own * which is not what I_DIRTY_SYNC on its own would suggest.
* would suggest.
*/ */
void __mark_mft_record_dirty(ntfs_inode *ni) void __mark_mft_record_dirty(ntfs_inode *ni)
{ {
......
...@@ -1871,8 +1871,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, ...@@ -1871,8 +1871,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
goto out_dio; goto out_dio;
} }
} else { } else {
written = generic_file_aio_write_nolock(iocb, iov, nr_segs, written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
*ppos);
} }
out_dio: out_dio:
...@@ -1880,18 +1879,21 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, ...@@ -1880,18 +1879,21 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
/* ret = filemap_fdatawrite_range(file->f_mapping, pos,
* The generic write paths have handled getting data pos + count - 1);
* to disk, but since we don't make use of the dirty if (ret < 0)
* inode list, a manual journal commit is necessary written = ret;
* here.
*/ if (!ret && (old_size != i_size_read(inode) ||
if (old_size != i_size_read(inode) || old_clusters != OCFS2_I(inode)->ip_clusters)) {
old_clusters != OCFS2_I(inode)->ip_clusters) {
ret = jbd2_journal_force_commit(osb->journal->j_journal); ret = jbd2_journal_force_commit(osb->journal->j_journal);
if (ret < 0) if (ret < 0)
written = ret; written = ret;
} }
if (!ret)
ret = filemap_fdatawait_range(file->f_mapping, pos,
pos + count - 1);
} }
/* /*
...@@ -1991,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, ...@@ -1991,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
if (ret > 0) { if (ret > 0) {
unsigned long nr_pages; unsigned long nr_pages;
*ppos += ret;
nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
/*
* If file or inode is SYNC and we actually wrote some data,
* sync it.
*/
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err; int err;
mutex_lock(&inode->i_mutex); nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
err = ocfs2_rw_lock(inode, 1);
if (err < 0) {
mlog_errno(err);
} else {
err = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
ocfs2_rw_unlock(inode, 1);
}
mutex_unlock(&inode->i_mutex);
err = generic_write_sync(out, *ppos, ret);
if (err) if (err)
ret = err; ret = err;
} else
*ppos += ret;
balance_dirty_pages_ratelimited_nr(mapping, nr_pages); balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
} }
......
...@@ -976,25 +976,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -976,25 +976,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
if (ret > 0) { if (ret > 0) {
unsigned long nr_pages; unsigned long nr_pages;
*ppos += ret;
nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
/*
* If file or inode is SYNC and we actually wrote some data,
* sync it.
*/
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err; int err;
mutex_lock(&inode->i_mutex); nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
err = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
mutex_unlock(&inode->i_mutex);
err = generic_write_sync(out, *ppos, ret);
if (err) if (err)
ret = err; ret = err;
} else
*ppos += ret;
balance_dirty_pages_ratelimited_nr(mapping, nr_pages); balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
} }
......
...@@ -178,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) ...@@ -178,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
} }
/** /**
* vfs_fsync - perform a fsync or fdatasync on a file * vfs_fsync_range - helper to sync a range of data & metadata to disk
* @file: file to sync * @file: file to sync
* @dentry: dentry of @file * @dentry: dentry of @file
* @data: only perform a fdatasync operation * @start: offset in bytes of the beginning of data range to sync
* @end: offset in bytes of the end of data range (inclusive)
* @datasync: perform only datasync
* *
* Write back data and metadata for @file to disk. If @datasync is * Write back data in range @start..@end and metadata for @file to disk. If
* set only metadata needed to access modified file data is written. * @datasync is set only metadata needed to access modified file data is
* written.
* *
* In case this function is called from nfsd @file may be %NULL and * In case this function is called from nfsd @file may be %NULL and
* only @dentry is set. This can only happen when the filesystem * only @dentry is set. This can only happen when the filesystem
* implements the export_operations API. * implements the export_operations API.
*/ */
int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
loff_t end, int datasync)
{ {
const struct file_operations *fop; const struct file_operations *fop;
struct address_space *mapping; struct address_space *mapping;
...@@ -214,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) ...@@ -214,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
goto out; goto out;
} }
ret = filemap_fdatawrite(mapping); ret = filemap_write_and_wait_range(mapping, start, end);
/* /*
* We need to protect against concurrent writers, which could cause * We need to protect against concurrent writers, which could cause
...@@ -225,12 +229,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) ...@@ -225,12 +229,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
if (!ret) if (!ret)
ret = err; ret = err;
mutex_unlock(&mapping->host->i_mutex); mutex_unlock(&mapping->host->i_mutex);
err = filemap_fdatawait(mapping);
if (!ret)
ret = err;
out: out:
return ret; return ret;
} }
EXPORT_SYMBOL(vfs_fsync_range);
/**
* vfs_fsync - perform a fsync or fdatasync on a file
* @file: file to sync
* @dentry: dentry of @file
* @datasync: only perform a fdatasync operation
*
* Write back data and metadata for @file to disk. If @datasync is
* set only metadata needed to access modified file data is written.
*
* In case this function is called from nfsd @file may be %NULL and
* only @dentry is set. This can only happen when the filesystem
* implements the export_operations API.
*/
int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
{
return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
}
EXPORT_SYMBOL(vfs_fsync); EXPORT_SYMBOL(vfs_fsync);
static int do_fsync(unsigned int fd, int datasync) static int do_fsync(unsigned int fd, int datasync)
...@@ -256,6 +277,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) ...@@ -256,6 +277,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
return do_fsync(fd, 1); return do_fsync(fd, 1);
} }
/**
* generic_write_sync - perform syncing after a write if file / inode is sync
* @file: file to which the write happened
* @pos: offset where the write started
* @count: length of the write
*
* This is just a simple wrapper about our general syncing function.
*/
int generic_write_sync(struct file *file, loff_t pos, loff_t count)
{
if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
return 0;
return vfs_fsync_range(file, file->f_path.dentry, pos,
pos + count - 1, 1);
}
EXPORT_SYMBOL(generic_write_sync);
/* /*
* sys_sync_file_range() permits finely controlled syncing over a segment of * sys_sync_file_range() permits finely controlled syncing over a segment of
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
......
...@@ -817,7 +817,8 @@ xfs_write( ...@@ -817,7 +817,8 @@ xfs_write(
xfs_iunlock(xip, iolock); xfs_iunlock(xip, iolock);
if (need_i_mutex) if (need_i_mutex)
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
error2 = sync_page_range(inode, mapping, pos, ret); error2 = filemap_write_and_wait_range(mapping, pos,
pos + ret - 1);
if (!error) if (!error)
error = error2; error = error2;
if (need_i_mutex) if (need_i_mutex)
......
...@@ -1455,11 +1455,6 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); ...@@ -1455,11 +1455,6 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
#define DT_SOCK 12 #define DT_SOCK 12
#define DT_WHT 14 #define DT_WHT 14
#define OSYNC_METADATA (1<<0)
#define OSYNC_DATA (1<<1)
#define OSYNC_INODE (1<<2)
int generic_osync_inode(struct inode *, struct address_space *, int);
/* /*
* This is the "filldir" function type, used by readdir() to let * This is the "filldir" function type, used by readdir() to let
* the kernel specify what kind of dirent layout it wants to have. * the kernel specify what kind of dirent layout it wants to have.
...@@ -2086,6 +2081,8 @@ extern int write_inode_now(struct inode *, int); ...@@ -2086,6 +2081,8 @@ extern int write_inode_now(struct inode *, int);
extern int filemap_fdatawrite(struct address_space *); extern int filemap_fdatawrite(struct address_space *);
extern int filemap_flush(struct address_space *); extern int filemap_flush(struct address_space *);
extern int filemap_fdatawait(struct address_space *); extern int filemap_fdatawait(struct address_space *);
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
loff_t lend);
extern int filemap_write_and_wait(struct address_space *mapping); extern int filemap_write_and_wait(struct address_space *mapping);
extern int filemap_write_and_wait_range(struct address_space *mapping, extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend); loff_t lstart, loff_t lend);
...@@ -2096,7 +2093,10 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping, ...@@ -2096,7 +2093,10 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
extern int filemap_fdatawrite_range(struct address_space *mapping, extern int filemap_fdatawrite_range(struct address_space *mapping,
loff_t start, loff_t end); loff_t start, loff_t end);
extern int vfs_fsync_range(struct file *file, struct dentry *dentry,
loff_t start, loff_t end, int datasync);
extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
extern void sync_supers(void); extern void sync_supers(void);
extern void emergency_sync(void); extern void emergency_sync(void);
extern void emergency_remount(void); extern void emergency_remount(void);
...@@ -2202,9 +2202,9 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); ...@@ -2202,9 +2202,9 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
loff_t *);
extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
unsigned long *, loff_t, loff_t *, size_t, size_t); unsigned long *, loff_t, loff_t *, size_t, size_t);
extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
...@@ -2214,6 +2214,10 @@ extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t l ...@@ -2214,6 +2214,10 @@ extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t l
extern int generic_segment_checks(const struct iovec *iov, extern int generic_segment_checks(const struct iovec *iov,
unsigned long *nr_segs, size_t *count, int access_flags); unsigned long *nr_segs, size_t *count, int access_flags);
/* fs/block_dev.c */
extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
/* fs/splice.c */ /* fs/splice.c */
extern ssize_t generic_file_splice_read(struct file *, loff_t *, extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int); struct pipe_inode_info *, size_t, unsigned int);
......
...@@ -150,10 +150,6 @@ int write_cache_pages(struct address_space *mapping, ...@@ -150,10 +150,6 @@ int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage, struct writeback_control *wbc, writepage_t writepage,
void *data); void *data);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc); int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
int sync_page_range(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
void set_page_dirty_balance(struct page *page, int page_mkwrite); void set_page_dirty_balance(struct page *page, int page_mkwrite);
void writeback_set_ratelimit(void); void writeback_set_ratelimit(void);
......
...@@ -39,11 +39,10 @@ ...@@ -39,11 +39,10 @@
/* /*
* FIXME: remove all knowledge of the buffer layer from the core VM * FIXME: remove all knowledge of the buffer layer from the core VM
*/ */
#include <linux/buffer_head.h> /* for generic_osync_inode */ #include <linux/buffer_head.h> /* for try_to_free_buffers */
#include <asm/mman.h> #include <asm/mman.h>
/* /*
* Shared mappings implemented 30.11.1994. It's not fully working yet, * Shared mappings implemented 30.11.1994. It's not fully working yet,
* though. * though.
...@@ -307,68 +306,24 @@ int wait_on_page_writeback_range(struct address_space *mapping, ...@@ -307,68 +306,24 @@ int wait_on_page_writeback_range(struct address_space *mapping,
} }
/** /**
* sync_page_range - write and wait on all pages in the passed range * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range
* @inode: target inode * @mapping: address space structure to wait for
* @mapping: target address_space * @start: offset in bytes where the range starts
* @pos: beginning offset in pages to write * @end: offset in bytes where the range ends (inclusive)
* @count: number of bytes to write
*
* Write and wait upon all the pages in the passed range. This is a "data
* integrity" operation. It waits upon in-flight writeout before starting and
* waiting upon new writeout. If there was an IO error, return it.
* *
* We need to re-take i_mutex during the generic_osync_inode list walk because * Walk the list of under-writeback pages of the given address space
* it is otherwise livelockable. * in the given range and wait for all of them.
*/
int sync_page_range(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count)
{
pgoff_t start = pos >> PAGE_CACHE_SHIFT;
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
int ret;
if (!mapping_cap_writeback_dirty(mapping) || !count)
return 0;
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
if (ret == 0) {
mutex_lock(&inode->i_mutex);
ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
mutex_unlock(&inode->i_mutex);
}
if (ret == 0)
ret = wait_on_page_writeback_range(mapping, start, end);
return ret;
}
EXPORT_SYMBOL(sync_page_range);
/**
* sync_page_range_nolock - write & wait on all pages in the passed range without locking
* @inode: target inode
* @mapping: target address_space
* @pos: beginning offset in pages to write
* @count: number of bytes to write
* *
* Note: Holding i_mutex across sync_page_range_nolock() is not a good idea * This is just a simple wrapper so that callers don't have to convert offsets
* as it forces O_SYNC writers to different parts of the same file * to page indexes themselves
* to be serialised right until io completion.
*/ */
int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, int filemap_fdatawait_range(struct address_space *mapping, loff_t start,
loff_t pos, loff_t count) loff_t end)
{ {
pgoff_t start = pos >> PAGE_CACHE_SHIFT; return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT,
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; end >> PAGE_CACHE_SHIFT);
int ret;
if (!mapping_cap_writeback_dirty(mapping) || !count)
return 0;
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
if (ret == 0)
ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
if (ret == 0)
ret = wait_on_page_writeback_range(mapping, start, end);
return ret;
} }
EXPORT_SYMBOL(sync_page_range_nolock); EXPORT_SYMBOL(filemap_fdatawait_range);
/** /**
* filemap_fdatawait - wait for all under-writeback pages to complete * filemap_fdatawait - wait for all under-writeback pages to complete
...@@ -2167,20 +2122,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -2167,20 +2122,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
} }
*ppos = end; *ppos = end;
} }
/*
* Sync the fs metadata but not the minor inode changes and
* of course not the data as we did direct DMA for the IO.
* i_mutex is held, which protects generic_osync_inode() from
* livelocking. AIO O_DIRECT ops attempt to sync metadata here.
*/
out: out:
if ((written >= 0 || written == -EIOCBQUEUED) &&
((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);
if (err < 0)
written = err;
}
return written; return written;
} }
EXPORT_SYMBOL(generic_file_direct_write); EXPORT_SYMBOL(generic_file_direct_write);
...@@ -2312,8 +2254,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -2312,8 +2254,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
const struct address_space_operations *a_ops = mapping->a_ops;
struct inode *inode = mapping->host;
ssize_t status; ssize_t status;
struct iov_iter i; struct iov_iter i;
...@@ -2323,16 +2263,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -2323,16 +2263,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
if (likely(status >= 0)) { if (likely(status >= 0)) {
written += status; written += status;
*ppos = pos + status; *ppos = pos + status;
/*
* For now, when the user asks for O_SYNC, we'll actually give
* O_DSYNC
*/
if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
if (!a_ops->writepage || !is_sync_kiocb(iocb))
status = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
}
} }
/* /*
...@@ -2348,8 +2278,26 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -2348,8 +2278,26 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
} }
EXPORT_SYMBOL(generic_file_buffered_write); EXPORT_SYMBOL(generic_file_buffered_write);
static ssize_t /**
__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, * __generic_file_aio_write - write data to a file
* @iocb: IO state structure (file, offset, etc.)
* @iov: vector with data to write
* @nr_segs: number of segments in the vector
* @ppos: position where to write
*
* This function does all the work needed for actually writing data to a
* file. It does all basic checks, removes SUID from the file, updates
* modification times and calls proper subroutines depending on whether we
* do direct IO or a standard buffered write.
*
* It expects i_mutex to be grabbed unless we work on a block device or similar
* object which does not need locking at all.
*
* This function does *not* take care of syncing data in case of O_SYNC write.
* A caller has to handle it. This is mainly due to the fact that we want to
* avoid syncing under i_mutex.
*/
ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos) unsigned long nr_segs, loff_t *ppos)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
...@@ -2447,51 +2395,37 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, ...@@ -2447,51 +2395,37 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
current->backing_dev_info = NULL; current->backing_dev_info = NULL;
return written ? written : err; return written ? written : err;
} }
EXPORT_SYMBOL(__generic_file_aio_write);
ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, /**
const struct iovec *iov, unsigned long nr_segs, loff_t pos) * generic_file_aio_write - write data to a file
{ * @iocb: IO state structure
struct file *file = iocb->ki_filp; * @iov: vector with data to write
struct address_space *mapping = file->f_mapping; * @nr_segs: number of segments in the vector
struct inode *inode = mapping->host; * @pos: position in file where to write
ssize_t ret; *
* This is a wrapper around __generic_file_aio_write() to be used by most
BUG_ON(iocb->ki_pos != pos); * filesystems. It takes care of syncing the file in case of O_SYNC file
* and acquires i_mutex as needed.
ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, */
&iocb->ki_pos);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
ssize_t err;
err = sync_page_range_nolock(inode, mapping, pos, ret);
if (err < 0)
ret = err;
}
return ret;
}
EXPORT_SYMBOL(generic_file_aio_write_nolock);
ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos) unsigned long nr_segs, loff_t pos)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host;
struct inode *inode = mapping->host;
ssize_t ret; ssize_t ret;
BUG_ON(iocb->ki_pos != pos); BUG_ON(iocb->ki_pos != pos);
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
&iocb->ki_pos);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err; ssize_t err;
err = sync_page_range(inode, mapping, pos, ret); err = generic_write_sync(file, pos, ret);
if (err < 0) if (err < 0 && ret > 0)
ret = err; ret = err;
} }
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment