Commit 8da8d884 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - Fix a deadlock in fiemap.

   There was a big lock around the whole operation that can interfere
   with a page fault and mkwrite.

   Reducing the lock scope can also speed up fiemap

 - Fix range condition for extent defragmentation which could lead to
   worse layout in some cases

* tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix deadlock with fiemap and extent locking
  btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size
parents d8be5a55 b0ad381f
......@@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto add;
/* Skip too large extent */
if (range_len >= extent_thresh)
if (em->len >= extent_thresh)
goto next;
/*
......
......@@ -2689,16 +2689,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
* it beyond i_size.
*/
while (cur_offset < end && cur_offset < i_size) {
struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
u64 prealloc_start;
u64 lockstart;
u64 lockend;
u64 prealloc_len = 0;
bool delalloc;
lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
lockend = round_up(end, inode->root->fs_info->sectorsize);
/*
* We are only locking for the delalloc range because that's the
* only thing that can change here. With fiemap we have a lock
* on the inode, so no buffered or direct writes can happen.
*
* However mmaps and normal page writeback will cause this to
* change arbitrarily. We have to lock the extent lock here to
* make sure that nobody messes with the tree while we're doing
* btrfs_find_delalloc_in_range.
*/
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
delalloc_cached_state,
&delalloc_start,
&delalloc_end);
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
break;
......@@ -2866,15 +2884,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
const u64 ino = btrfs_ino(inode);
struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL;
struct btrfs_path *path;
struct fiemap_cache cache = { 0 };
struct btrfs_backref_share_check_ctx *backref_ctx;
u64 last_extent_end;
u64 prev_extent_end;
u64 lockstart;
u64 lockend;
u64 range_start;
u64 range_end;
const u64 sectorsize = inode->root->fs_info->sectorsize;
bool stopped = false;
int ret;
......@@ -2885,12 +2903,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto out;
}
lockstart = round_down(start, inode->root->fs_info->sectorsize);
lockend = round_up(start + len, inode->root->fs_info->sectorsize);
prev_extent_end = lockstart;
range_start = round_down(start, sectorsize);
range_end = round_up(start + len, sectorsize);
prev_extent_end = range_start;
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
......@@ -2898,7 +2915,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_release_path(path);
path->reada = READA_FORWARD;
ret = fiemap_search_slot(inode, path, lockstart);
ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
......@@ -2910,7 +2927,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto check_eof_delalloc;
}
while (prev_extent_end < lockend) {
while (prev_extent_end < range_end) {
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_file_extent_item *ei;
struct btrfs_key key;
......@@ -2933,19 +2950,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
* The first iteration can leave us at an extent item that ends
* before our range's start. Move to the next item.
*/
if (extent_end <= lockstart)
if (extent_end <= range_start)
goto next_item;
backref_ctx->curr_leaf_bytenr = leaf->start;
/* We have in implicit hole (NO_HOLES feature enabled). */
if (prev_extent_end < key.offset) {
const u64 range_end = min(key.offset, lockend) - 1;
const u64 hole_end = min(key.offset, range_end) - 1;
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state,
backref_ctx, 0, 0, 0,
prev_extent_end, range_end);
prev_extent_end, hole_end);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
......@@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
/* We've reached the end of the fiemap range, stop. */
if (key.offset >= lockend) {
if (key.offset >= range_end) {
stopped = true;
break;
}
......@@ -3049,29 +3066,41 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_free_path(path);
path = NULL;
if (!stopped && prev_extent_end < lockend) {
if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx,
0, 0, 0, prev_extent_end, lockend - 1);
0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0)
goto out_unlock;
prev_extent_end = lockend;
prev_extent_end = range_end;
}
if (cache.cached && cache.offset + cache.len >= last_extent_end) {
const u64 i_size = i_size_read(&inode->vfs_inode);
if (prev_extent_end < i_size) {
struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
u64 lockstart;
u64 lockend;
bool delalloc;
lockstart = round_down(prev_extent_end, sectorsize);
lockend = round_up(i_size, sectorsize);
/*
* See the comment in fiemap_process_hole as to why
* we're doing the locking here.
*/
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode,
prev_extent_end,
i_size - 1,
&delalloc_cached_state,
&delalloc_start,
&delalloc_end);
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
cache.flags |= FIEMAP_EXTENT_LAST;
} else {
......@@ -3082,7 +3111,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
ret = emit_last_fiemap_cache(fieinfo, &cache);
out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out:
free_extent_state(delalloc_cached_state);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment