Commit 07978330 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.10-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - fix handling of folio private changes.

   The private value holds pointer to our extent buffer structure
   representing a metadata range. Release and create of the range was
   not properly synchronized when updating the private bit which ended
   up in double folio_put, leading to all sorts of breakage

 - fix a crash, reported as duplicate key in metadata, but caused by a
   race of fsync and size extending write. Requires prealloc target
   range + fsync and other conditions (log tree state, timing)

 - fix leak of qgroup extent records after transaction abort

* tag 'for-6.10-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: protect folio::private when attaching extent buffer folios
  btrfs: fix leak of qgroup extent records after transaction abort
  btrfs: fix crash on racing fsync and size-extending write into prealloc
parents eecba7c0 f3a5367c
......@@ -4538,18 +4538,10 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info)
{
struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
struct btrfs_delayed_ref_node *ref;
delayed_refs = &trans->delayed_refs;
spin_lock(&delayed_refs->lock);
if (atomic_read(&delayed_refs->num_entries) == 0) {
spin_unlock(&delayed_refs->lock);
btrfs_debug(fs_info, "delayed_refs has NO entry");
return;
}
while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
struct btrfs_delayed_ref_head *head;
struct rb_node *n;
......
......@@ -3689,6 +3689,8 @@ static struct extent_buffer *grab_extent_buffer(
struct folio *folio = page_folio(page);
struct extent_buffer *exists;
lockdep_assert_held(&page->mapping->i_private_lock);
/*
* For subpage case, we completely rely on radix tree to ensure we
* don't try to insert two ebs for the same bytenr. So here we always
......@@ -3756,13 +3758,14 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
* The caller needs to free the existing folios and retry using the same order.
*/
static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
struct btrfs_subpage *prealloc,
struct extent_buffer **found_eb_ret)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
struct address_space *mapping = fs_info->btree_inode->i_mapping;
const unsigned long index = eb->start >> PAGE_SHIFT;
struct folio *existing_folio;
struct folio *existing_folio = NULL;
int ret;
ASSERT(found_eb_ret);
......@@ -3774,12 +3777,14 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
ret = filemap_add_folio(mapping, eb->folios[i], index + i,
GFP_NOFS | __GFP_NOFAIL);
if (!ret)
return 0;
goto finish;
existing_folio = filemap_lock_folio(mapping, index + i);
/* The page cache only exists for a very short time, just retry. */
if (IS_ERR(existing_folio))
if (IS_ERR(existing_folio)) {
existing_folio = NULL;
goto retry;
}
/* For now, we should only have single-page folios for btree inode. */
ASSERT(folio_nr_pages(existing_folio) == 1);
......@@ -3790,14 +3795,13 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
return -EAGAIN;
}
if (fs_info->nodesize < PAGE_SIZE) {
/*
* We're going to reuse the existing page, can drop our page
* and subpage structure now.
*/
finish:
spin_lock(&mapping->i_private_lock);
if (existing_folio && fs_info->nodesize < PAGE_SIZE) {
/* We're going to reuse the existing page, can drop our folio now. */
__free_page(folio_page(eb->folios[i], 0));
eb->folios[i] = existing_folio;
} else {
} else if (existing_folio) {
struct extent_buffer *existing_eb;
existing_eb = grab_extent_buffer(fs_info,
......@@ -3805,6 +3809,7 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
if (existing_eb) {
/* The extent buffer still exists, we can use it directly. */
*found_eb_ret = existing_eb;
spin_unlock(&mapping->i_private_lock);
folio_unlock(existing_folio);
folio_put(existing_folio);
return 1;
......@@ -3813,6 +3818,22 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
__free_page(folio_page(eb->folios[i], 0));
eb->folios[i] = existing_folio;
}
eb->folio_size = folio_size(eb->folios[i]);
eb->folio_shift = folio_shift(eb->folios[i]);
/* Should not fail, as we have preallocated the memory. */
ret = attach_extent_buffer_folio(eb, eb->folios[i], prealloc);
ASSERT(!ret);
/*
* To inform we have an extra eb under allocation, so that
* detach_extent_buffer_page() won't release the folio private when the
* eb hasn't been inserted into radix tree yet.
*
* The ref will be decreased when the eb releases the page, in
* detach_extent_buffer_page(). Thus needs no special handling in the
* error path.
*/
btrfs_folio_inc_eb_refs(fs_info, eb->folios[i]);
spin_unlock(&mapping->i_private_lock);
return 0;
}
......@@ -3824,7 +3845,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
int attached = 0;
struct extent_buffer *eb;
struct extent_buffer *existing_eb = NULL;
struct address_space *mapping = fs_info->btree_inode->i_mapping;
struct btrfs_subpage *prealloc = NULL;
u64 lockdep_owner = owner_root;
bool page_contig = true;
......@@ -3890,7 +3910,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
for (int i = 0; i < num_folios; i++) {
struct folio *folio;
ret = attach_eb_folio_to_filemap(eb, i, &existing_eb);
ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb);
if (ret > 0) {
ASSERT(existing_eb);
goto out;
......@@ -3927,24 +3947,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
* and free the allocated page.
*/
folio = eb->folios[i];
eb->folio_size = folio_size(folio);
eb->folio_shift = folio_shift(folio);
spin_lock(&mapping->i_private_lock);
/* Should not fail, as we have preallocated the memory */
ret = attach_extent_buffer_folio(eb, folio, prealloc);
ASSERT(!ret);
/*
* To inform we have extra eb under allocation, so that
* detach_extent_buffer_page() won't release the folio private
* when the eb hasn't yet been inserted into radix tree.
*
* The ref will be decreased when the eb released the page, in
* detach_extent_buffer_page().
* Thus needs no special handling in error path.
*/
btrfs_folio_inc_eb_refs(fs_info, folio);
spin_unlock(&mapping->i_private_lock);
WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len));
/*
......
......@@ -4860,18 +4860,23 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
path->slots[0]++;
continue;
}
if (!dropped_extents) {
/*
* Avoid logging extent items logged in past fsync calls
* and leading to duplicate keys in the log tree.
*/
/*
* Avoid overlapping items in the log tree. The first time we
* get here, get rid of everything from a past fsync. After
* that, if the current extent starts before the end of the last
* extent we copied, truncate the last one. This can happen if
* an ordered extent completion modifies the subvolume tree
* while btrfs_next_leaf() has the tree unlocked.
*/
if (!dropped_extents || key.offset < truncate_offset) {
ret = truncate_inode_items(trans, root->log_root, inode,
truncate_offset,
min(key.offset, truncate_offset),
BTRFS_EXTENT_DATA_KEY);
if (ret)
goto out;
dropped_extents = true;
}
truncate_offset = btrfs_file_extent_end(path);
if (ins_nr == 0)
start_slot = slot;
ins_nr++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment