Commit dee1f973 authored by Dmitry Monakhov's avatar Dmitry Monakhov Committed by Theodore Ts'o

ext4: race-condition protection for ext4_convert_unwritten_extents_endio

We assumed that at the time we call ext4_convert_unwritten_extents_endio()
extent in question is fully inside [map.m_lblk, map->m_len] because
it was already split during submission.  But this may not be true due to
a race between writeback vs fallocate.

If extent in question is larger than requested we will split it again.
Special precautions should being done if zeroout required because
[map.m_lblk, map->m_len] already contains valid data.
Signed-off-by: default avatarDmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
parent 60d4616f
...@@ -52,6 +52,9 @@ ...@@ -52,6 +52,9 @@
#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
static __le32 ext4_extent_block_csum(struct inode *inode, static __le32 ext4_extent_block_csum(struct inode *inode,
struct ext4_extent_header *eh) struct ext4_extent_header *eh)
{ {
...@@ -2914,6 +2917,9 @@ static int ext4_split_extent_at(handle_t *handle, ...@@ -2914,6 +2917,9 @@ static int ext4_split_extent_at(handle_t *handle,
unsigned int ee_len, depth; unsigned int ee_len, depth;
int err = 0; int err = 0;
BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
(EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
ext_debug("ext4_split_extents_at: inode %lu, logical" ext_debug("ext4_split_extents_at: inode %lu, logical"
"block %llu\n", inode->i_ino, (unsigned long long)split); "block %llu\n", inode->i_ino, (unsigned long long)split);
...@@ -2972,7 +2978,14 @@ static int ext4_split_extent_at(handle_t *handle, ...@@ -2972,7 +2978,14 @@ static int ext4_split_extent_at(handle_t *handle,
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
if (split_flag & EXT4_EXT_DATA_VALID1)
err = ext4_ext_zeroout(inode, ex2);
else
err = ext4_ext_zeroout(inode, ex);
} else
err = ext4_ext_zeroout(inode, &orig_ex); err = ext4_ext_zeroout(inode, &orig_ex);
if (err) if (err)
goto fix_extent_len; goto fix_extent_len;
/* update the extent length and mark as initialized */ /* update the extent length and mark as initialized */
...@@ -3025,12 +3038,13 @@ static int ext4_split_extent(handle_t *handle, ...@@ -3025,12 +3038,13 @@ static int ext4_split_extent(handle_t *handle,
uninitialized = ext4_ext_is_uninitialized(ex); uninitialized = ext4_ext_is_uninitialized(ex);
if (map->m_lblk + map->m_len < ee_block + ee_len) { if (map->m_lblk + map->m_len < ee_block + ee_len) {
split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
EXT4_EXT_MAY_ZEROOUT : 0;
flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
if (uninitialized) if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1 | split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
EXT4_EXT_MARK_UNINIT2; EXT4_EXT_MARK_UNINIT2;
if (split_flag & EXT4_EXT_DATA_VALID2)
split_flag1 |= EXT4_EXT_DATA_VALID1;
err = ext4_split_extent_at(handle, inode, path, err = ext4_split_extent_at(handle, inode, path,
map->m_lblk + map->m_len, split_flag1, flags1); map->m_lblk + map->m_len, split_flag1, flags1);
if (err) if (err)
...@@ -3043,8 +3057,8 @@ static int ext4_split_extent(handle_t *handle, ...@@ -3043,8 +3057,8 @@ static int ext4_split_extent(handle_t *handle,
return PTR_ERR(path); return PTR_ERR(path);
if (map->m_lblk >= ee_block) { if (map->m_lblk >= ee_block) {
split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
EXT4_EXT_MAY_ZEROOUT : 0; EXT4_EXT_DATA_VALID2);
if (uninitialized) if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1; split_flag1 |= EXT4_EXT_MARK_UNINIT1;
if (split_flag & EXT4_EXT_MARK_UNINIT2) if (split_flag & EXT4_EXT_MARK_UNINIT2)
...@@ -3323,26 +3337,47 @@ static int ext4_split_unwritten_extents(handle_t *handle, ...@@ -3323,26 +3337,47 @@ static int ext4_split_unwritten_extents(handle_t *handle,
split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
split_flag |= EXT4_EXT_MARK_UNINIT2; split_flag |= EXT4_EXT_MARK_UNINIT2;
if (flags & EXT4_GET_BLOCKS_CONVERT)
split_flag |= EXT4_EXT_DATA_VALID2;
flags |= EXT4_GET_BLOCKS_PRE_IO; flags |= EXT4_GET_BLOCKS_PRE_IO;
return ext4_split_extent(handle, inode, path, map, split_flag, flags); return ext4_split_extent(handle, inode, path, map, split_flag, flags);
} }
static int ext4_convert_unwritten_extents_endio(handle_t *handle, static int ext4_convert_unwritten_extents_endio(handle_t *handle,
struct inode *inode, struct inode *inode,
struct ext4_map_blocks *map,
struct ext4_ext_path *path) struct ext4_ext_path *path)
{ {
struct ext4_extent *ex; struct ext4_extent *ex;
ext4_lblk_t ee_block;
unsigned int ee_len;
int depth; int depth;
int err = 0; int err = 0;
depth = ext_depth(inode); depth = ext_depth(inode);
ex = path[depth].p_ext; ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino, "block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)le32_to_cpu(ex->ee_block), (unsigned long long)ee_block, ee_len);
ext4_ext_get_actual_len(ex));
/* If extent is larger than requested then split is required */
if (ee_block != map->m_lblk || ee_len > map->m_len) {
err = ext4_split_unwritten_extents(handle, inode, map, path,
EXT4_GET_BLOCKS_CONVERT);
if (err < 0)
goto out;
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, map->m_lblk, path);
if (IS_ERR(path)) {
err = PTR_ERR(path);
goto out;
}
depth = ext_depth(inode);
ex = path[depth].p_ext;
}
err = ext4_ext_get_access(handle, inode, path + depth); err = ext4_ext_get_access(handle, inode, path + depth);
if (err) if (err)
...@@ -3652,7 +3687,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3652,7 +3687,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
} }
/* IO end_io complete, convert the filled extent to written */ /* IO end_io complete, convert the filled extent to written */
if ((flags & EXT4_GET_BLOCKS_CONVERT)) { if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
ret = ext4_convert_unwritten_extents_endio(handle, inode, ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
path); path);
if (ret >= 0) { if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment