Commit 4bd809db authored by Zheng Liu's avatar Zheng Liu Committed by Theodore Ts'o

ext4: don't take the i_mutex lock when doing DIO overwrites

Aligned and overwrite direct I/O can be parallelized.  In
ext4_file_dio_write, we first check whether these conditions are
satisfied or not.  If so, we take i_data_sem and release i_mutex lock
directly.  Meanwhile iocb->private is set to indicate that this is a
dio overwrite, and it will be handled in ext4_ext_direct_IO.

[ Added fix from Dan Carpenter to fix locking bug on the error path. ]

CC: Tao Ma <tm@tao.ma>
CC: Eric Sandeen <sandeen@redhat.com>
CC: Robin Dong <hao.bigrat@gmail.com>
Signed-off-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
parent 729f52c6
...@@ -93,9 +93,13 @@ static ssize_t ...@@ -93,9 +93,13 @@ static ssize_t
ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos) unsigned long nr_segs, loff_t pos)
{ {
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct blk_plug plug;
int unaligned_aio = 0; int unaligned_aio = 0;
ssize_t ret; ssize_t ret;
int overwrite = 0;
size_t length = iov_length(iov, nr_segs);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
!is_sync_kiocb(iocb)) !is_sync_kiocb(iocb))
...@@ -115,7 +119,50 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -115,7 +119,50 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
ext4_aiodio_wait(inode); ext4_aiodio_wait(inode);
} }
ret = generic_file_aio_write(iocb, iov, nr_segs, pos); BUG_ON(iocb->ki_pos != pos);
mutex_lock(&inode->i_mutex);
blk_start_plug(&plug);
iocb->private = &overwrite;
/* check whether we do a DIO overwrite or not */
if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
int err, len;
map.m_lblk = pos >> blkbits;
map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
- map.m_lblk;
len = map.m_len;
err = ext4_map_blocks(NULL, inode, &map, 0);
/*
* 'err==len' means that all of blocks has been preallocated no
* matter they are initialized or not. For excluding
* uninitialized extents, we need to check m_flags. There are
* two conditions that indicate for initialized extents.
* 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned;
* 2) If we do a real lookup, non-flags are returned.
* So we should check these two conditions.
*/
if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
overwrite = 1;
}
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
err = generic_write_sync(file, pos, ret);
if (err < 0 && ret > 0)
ret = err;
}
blk_finish_plug(&plug);
if (unaligned_aio) if (unaligned_aio)
mutex_unlock(ext4_aio_mutex(inode)); mutex_unlock(ext4_aio_mutex(inode));
......
...@@ -2996,6 +2996,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -2996,6 +2996,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
if (rw == WRITE && final_size <= inode->i_size) { if (rw == WRITE && final_size <= inode->i_size) {
int overwrite = 0; int overwrite = 0;
BUG_ON(iocb->private == NULL);
/* If we do a overwrite dio, i_mutex locking can be released */
overwrite = *((int *)iocb->private);
if (overwrite) {
down_read(&EXT4_I(inode)->i_data_sem);
mutex_unlock(&inode->i_mutex);
}
/* /*
* We could direct write to holes and fallocate. * We could direct write to holes and fallocate.
* *
...@@ -3021,8 +3031,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -3021,8 +3031,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
if (!is_sync_kiocb(iocb)) { if (!is_sync_kiocb(iocb)) {
ext4_io_end_t *io_end = ext4_io_end_t *io_end =
ext4_init_io_end(inode, GFP_NOFS); ext4_init_io_end(inode, GFP_NOFS);
if (!io_end) if (!io_end) {
return -ENOMEM; ret = -ENOMEM;
goto retake_lock;
}
io_end->flag |= EXT4_IO_END_DIRECT; io_end->flag |= EXT4_IO_END_DIRECT;
iocb->private = io_end; iocb->private = io_end;
/* /*
...@@ -3083,6 +3095,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -3083,6 +3095,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
ret = err; ret = err;
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
} }
retake_lock:
/* take i_mutex locking again if we do a ovewrite dio */
if (overwrite) {
up_read(&EXT4_I(inode)->i_data_sem);
mutex_lock(&inode->i_mutex);
}
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment