Commit 6c760c07 authored by Josef Bacik's avatar Josef Bacik Committed by Chris Mason

Btrfs: do not call file_update_time in aio_write

This starts a transaction and dirties the inode everytime we call it, which
is super expensive if you have a write heavy workload.  We will be updating
the inode when the IO completes and we reserve the space for the inode
update when we reserve space for the write, so there is no chance of loss of
information or enospc issues.  Thanks,
Signed-off-by: default avatarJosef Bacik <jbacik@fusionio.com>
Signed-off-by: default avatarChris Mason <chris.mason@fusionio.com>
parent 5124e00e
......@@ -1464,6 +1464,24 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
return written ? written : err;
}
static void update_time_for_write(struct inode *inode)
{
struct timespec now;
if (IS_NOCMTIME(inode))
return;
now = current_fs_time(inode->i_sb);
if (!timespec_equal(&inode->i_mtime, &now))
inode->i_mtime = now;
if (!timespec_equal(&inode->i_ctime, &now))
inode->i_ctime = now;
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
}
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
......@@ -1519,11 +1537,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
goto out;
}
err = file_update_time(file);
if (err) {
mutex_unlock(&inode->i_mutex);
goto out;
}
/*
* We reserve space for updating the inode when we reserve space for the
* extent we are going to write, so we will enospc out there. We don't
* need to start yet another transaction to update the inode as we will
* update the inode when we finish writing whatever data we write.
*/
update_time_for_write(inode);
start_pos = round_down(pos, root->sectorsize);
if (start_pos > i_size_read(inode)) {
......@@ -1563,8 +1583,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
* this will either be one more than the running transaction
* or the generation used for the next transaction if there isn't
* one running right now.
*
* We also have to set last_sub_trans to the current log transid,
* otherwise subsequent syncs to a file that's been synced in this
* transaction will appear to have already occured.
*/
BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
BTRFS_I(inode)->last_sub_trans = root->log_transid;
if (num_written > 0 || num_written == -EIOCBQUEUED) {
err = generic_write_sync(file, pos, num_written);
if (err < 0 && num_written > 0)
......
......@@ -1922,8 +1922,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (nolock)
trans = btrfs_join_transaction_nolock(root);
else
......@@ -1937,7 +1936,6 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, root, ret);
}
goto out;
}
......@@ -1986,16 +1984,12 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) { /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, root, ret);
goto out_unlock;
}
} else {
btrfs_set_inode_last_trans(trans, inode);
}
ret = 0;
out_unlock:
unlock_extent_cached(io_tree, ordered_extent->file_offset,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment