Commit 418b0902 authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba

btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given

When FIEMAP_FLAG_SYNC is given to fiemap the expectation is that that
are no concurrent writes and we get a stable view of the inode's extent
layout.

When the flag is given we flush all IO (and wait for ordered extents to
complete) and then lock the inode in shared mode, however that leaves open
the possibility that a write might happen right after the flushing and
before locking the inode. So fix this by flushing again after locking the
inode - we leave the initial flushing before locking the inode to avoid
holding the lock and blocking other RO operations while waiting for IO
and ordered extents to complete. The second flushing while holding the
inode's lock will most of the time do nothing or very little since the
time window for new writes to have happened is small.
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent a1a4a9ca
...@@ -2996,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -2996,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
range_end = round_up(start + len, sectorsize); range_end = round_up(start + len, sectorsize);
prev_extent_end = range_start; prev_extent_end = range_start;
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0) if (ret < 0)
goto out_unlock; goto out;
btrfs_release_path(path); btrfs_release_path(path);
path->reada = READA_FORWARD; path->reada = READA_FORWARD;
ret = fiemap_search_slot(inode, path, range_start); ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) { if (ret < 0) {
goto out_unlock; goto out;
} else if (ret > 0) { } else if (ret > 0) {
/* /*
* No file extent item found, but we may have delalloc between * No file extent item found, but we may have delalloc between
...@@ -3053,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3053,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
backref_ctx, 0, 0, 0, backref_ctx, 0, 0, 0,
prev_extent_end, hole_end); prev_extent_end, hole_end);
if (ret < 0) { if (ret < 0) {
goto out_unlock; goto out;
} else if (ret > 0) { } else if (ret > 0) {
/* fiemap_fill_next_extent() told us to stop. */ /* fiemap_fill_next_extent() told us to stop. */
stopped = true; stopped = true;
...@@ -3109,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3109,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
extent_gen, extent_gen,
backref_ctx); backref_ctx);
if (ret < 0) if (ret < 0)
goto out_unlock; goto out;
else if (ret > 0) else if (ret > 0)
flags |= FIEMAP_EXTENT_SHARED; flags |= FIEMAP_EXTENT_SHARED;
} }
...@@ -3120,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3120,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
} }
if (ret < 0) { if (ret < 0) {
goto out_unlock; goto out;
} else if (ret > 0) { } else if (ret > 0) {
/* fiemap_fill_next_extent() told us to stop. */ /* fiemap_fill_next_extent() told us to stop. */
stopped = true; stopped = true;
...@@ -3131,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3131,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
next_item: next_item:
if (fatal_signal_pending(current)) { if (fatal_signal_pending(current)) {
ret = -EINTR; ret = -EINTR;
goto out_unlock; goto out;
} }
ret = fiemap_next_leaf_item(inode, path); ret = fiemap_next_leaf_item(inode, path);
if (ret < 0) { if (ret < 0) {
goto out_unlock; goto out;
} else if (ret > 0) { } else if (ret > 0) {
/* No more file extent items for this inode. */ /* No more file extent items for this inode. */
break; break;
...@@ -3160,7 +3158,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3160,7 +3158,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
&delalloc_cached_state, backref_ctx, &delalloc_cached_state, backref_ctx,
0, 0, 0, prev_extent_end, range_end - 1); 0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0) if (ret < 0)
goto out_unlock; goto out;
prev_extent_end = range_end; prev_extent_end = range_end;
} }
...@@ -3198,9 +3196,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3198,9 +3196,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
} }
ret = emit_last_fiemap_cache(fieinfo, &cache); ret = emit_last_fiemap_cache(fieinfo, &cache);
out_unlock:
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out: out:
free_extent_state(delalloc_cached_state); free_extent_state(delalloc_cached_state);
btrfs_free_backref_share_ctx(backref_ctx); btrfs_free_backref_share_ctx(backref_ctx);
......
...@@ -7835,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, ...@@ -7835,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len) u64 start, u64 len)
{ {
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
int ret; int ret;
ret = fiemap_prep(inode, fieinfo, start, &len, 0); ret = fiemap_prep(inode, fieinfo, start, &len, 0);
...@@ -7860,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -7860,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return ret; return ret;
} }
return extent_fiemap(BTRFS_I(inode), fieinfo, start, len); btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
/*
* We did an initial flush to avoid holding the inode's lock while
* triggering writeback and waiting for the completion of IO and ordered
* extents. Now after we locked the inode we do it again, because it's
* possible a new write may have happened in between those two steps.
*/
if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
if (ret) {
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
return ret;
}
}
ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
return ret;
} }
static int btrfs_writepages(struct address_space *mapping, static int btrfs_writepages(struct address_space *mapping,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment