Commit 46670259 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "Stable fixes:

   - fix race between balance and cancel/pause

   - various iput() fixes

   - fix use-after-free of new block group that became unused

   - fix warning when putting transaction with qgroups enabled after
     abort

   - fix crash in subpage mode when page could be released between map
     and map read

   - when scrubbing raid56 verify the P/Q stripes unconditionally

   - fix minor memory leak in zoned mode when a block group with an
     unexpected superblock is found

  Regression fixes:

   - fix ordered extent split error handling when submitting direct IO

   - user irq-safe locking when adding delayed iputs"

* tag 'for-6.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix warning when putting transaction with qgroups enabled after abort
  btrfs: fix ordered extent split error handling in btrfs_dio_submit_io
  btrfs: set_page_extent_mapped after read_folio in btrfs_cont_expand
  btrfs: raid56: always verify the P/Q contents for scrub
  btrfs: use irq safe locking when running and adding delayed iputs
  btrfs: fix iput() on error pointer after error during orphan cleanup
  btrfs: fix double iput() on inode after an error during orphan cleanup
  btrfs: zoned: fix memory leak after finding block group with super blocks
  btrfs: fix use-after-free of new block group that became unused
  btrfs: be a bit more careful when setting mirror_num_ret in btrfs_map_block
  btrfs: fix race between balance and cancel/pause
parents 2922800a aa84ce8a
...@@ -1640,13 +1640,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg) ...@@ -1640,13 +1640,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
{ {
struct btrfs_fs_info *fs_info = bg->fs_info; struct btrfs_fs_info *fs_info = bg->fs_info;
trace_btrfs_add_unused_block_group(bg);
spin_lock(&fs_info->unused_bgs_lock); spin_lock(&fs_info->unused_bgs_lock);
if (list_empty(&bg->bg_list)) { if (list_empty(&bg->bg_list)) {
btrfs_get_block_group(bg); btrfs_get_block_group(bg);
trace_btrfs_add_unused_block_group(bg);
list_add_tail(&bg->bg_list, &fs_info->unused_bgs); list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
} else { } else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) {
/* Pull out the block group from the reclaim_bgs list. */ /* Pull out the block group from the reclaim_bgs list. */
trace_btrfs_add_unused_block_group(bg);
list_move_tail(&bg->bg_list, &fs_info->unused_bgs); list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
} }
spin_unlock(&fs_info->unused_bgs_lock); spin_unlock(&fs_info->unused_bgs_lock);
...@@ -2087,6 +2088,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) ...@@ -2087,6 +2088,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
/* Shouldn't have super stripes in sequential zones */ /* Shouldn't have super stripes in sequential zones */
if (zoned && nr) { if (zoned && nr) {
kfree(logical);
btrfs_err(fs_info, btrfs_err(fs_info,
"zoned: block group %llu must not contain super block", "zoned: block group %llu must not contain super block",
cache->start); cache->start);
...@@ -2668,6 +2670,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) ...@@ -2668,6 +2670,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
next: next:
btrfs_delayed_refs_rsv_release(fs_info, 1); btrfs_delayed_refs_rsv_release(fs_info, 1);
list_del_init(&block_group->bg_list); list_del_init(&block_group->bg_list);
clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
} }
btrfs_trans_release_chunk_metadata(trans); btrfs_trans_release_chunk_metadata(trans);
} }
...@@ -2707,6 +2710,13 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran ...@@ -2707,6 +2710,13 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
if (!cache) if (!cache)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
/*
* Mark it as new before adding it to the rbtree of block groups or any
* list, so that no other task finds it and calls btrfs_mark_bg_unused()
* before the new flag is set.
*/
set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags);
cache->length = size; cache->length = size;
set_free_space_tree_thresholds(cache); set_free_space_tree_thresholds(cache);
cache->flags = type; cache->flags = type;
......
...@@ -70,6 +70,11 @@ enum btrfs_block_group_flags { ...@@ -70,6 +70,11 @@ enum btrfs_block_group_flags {
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
/* Indicate that the block group is placed on a sequential zone */ /* Indicate that the block group is placed on a sequential zone */
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
/*
* Indicate that block group is in the list of new block groups of a
* transaction.
*/
BLOCK_GROUP_FLAG_NEW,
}; };
enum btrfs_caching_type { enum btrfs_caching_type {
......
...@@ -3482,15 +3482,21 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, ...@@ -3482,15 +3482,21 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
void btrfs_add_delayed_iput(struct btrfs_inode *inode) void btrfs_add_delayed_iput(struct btrfs_inode *inode)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long flags;
if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1)) if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1))
return; return;
atomic_inc(&fs_info->nr_delayed_iputs); atomic_inc(&fs_info->nr_delayed_iputs);
spin_lock(&fs_info->delayed_iput_lock); /*
* Need to be irq safe here because we can be called from either an irq
* context (see bio.c and btrfs_put_ordered_extent()) or a non-irq
* context.
*/
spin_lock_irqsave(&fs_info->delayed_iput_lock, flags);
ASSERT(list_empty(&inode->delayed_iput)); ASSERT(list_empty(&inode->delayed_iput));
list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs); list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs);
spin_unlock(&fs_info->delayed_iput_lock); spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags);
if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
wake_up_process(fs_info->cleaner_kthread); wake_up_process(fs_info->cleaner_kthread);
} }
...@@ -3499,37 +3505,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info, ...@@ -3499,37 +3505,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
struct btrfs_inode *inode) struct btrfs_inode *inode)
{ {
list_del_init(&inode->delayed_iput); list_del_init(&inode->delayed_iput);
spin_unlock(&fs_info->delayed_iput_lock); spin_unlock_irq(&fs_info->delayed_iput_lock);
iput(&inode->vfs_inode); iput(&inode->vfs_inode);
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs)) if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
wake_up(&fs_info->delayed_iputs_wait); wake_up(&fs_info->delayed_iputs_wait);
spin_lock(&fs_info->delayed_iput_lock); spin_lock_irq(&fs_info->delayed_iput_lock);
} }
static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info, static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
struct btrfs_inode *inode) struct btrfs_inode *inode)
{ {
if (!list_empty(&inode->delayed_iput)) { if (!list_empty(&inode->delayed_iput)) {
spin_lock(&fs_info->delayed_iput_lock); spin_lock_irq(&fs_info->delayed_iput_lock);
if (!list_empty(&inode->delayed_iput)) if (!list_empty(&inode->delayed_iput))
run_delayed_iput_locked(fs_info, inode); run_delayed_iput_locked(fs_info, inode);
spin_unlock(&fs_info->delayed_iput_lock); spin_unlock_irq(&fs_info->delayed_iput_lock);
} }
} }
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
{ {
/*
spin_lock(&fs_info->delayed_iput_lock); * btrfs_put_ordered_extent() can run in irq context (see bio.c), which
* calls btrfs_add_delayed_iput() and that needs to lock
* fs_info->delayed_iput_lock. So we need to disable irqs here to
* prevent a deadlock.
*/
spin_lock_irq(&fs_info->delayed_iput_lock);
while (!list_empty(&fs_info->delayed_iputs)) { while (!list_empty(&fs_info->delayed_iputs)) {
struct btrfs_inode *inode; struct btrfs_inode *inode;
inode = list_first_entry(&fs_info->delayed_iputs, inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput); struct btrfs_inode, delayed_iput);
run_delayed_iput_locked(fs_info, inode); run_delayed_iput_locked(fs_info, inode);
cond_resched_lock(&fs_info->delayed_iput_lock); if (need_resched()) {
spin_unlock_irq(&fs_info->delayed_iput_lock);
cond_resched();
spin_lock_irq(&fs_info->delayed_iput_lock);
}
} }
spin_unlock(&fs_info->delayed_iput_lock); spin_unlock_irq(&fs_info->delayed_iput_lock);
} }
/* /*
...@@ -3659,11 +3674,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ...@@ -3659,11 +3674,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
found_key.type = BTRFS_INODE_ITEM_KEY; found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0; found_key.offset = 0;
inode = btrfs_iget(fs_info->sb, last_objectid, root); inode = btrfs_iget(fs_info->sb, last_objectid, root);
ret = PTR_ERR_OR_ZERO(inode); if (IS_ERR(inode)) {
if (ret && ret != -ENOENT) ret = PTR_ERR(inode);
goto out; inode = NULL;
if (ret != -ENOENT)
goto out;
}
if (ret == -ENOENT && root == fs_info->tree_root) { if (!inode && root == fs_info->tree_root) {
struct btrfs_root *dead_root; struct btrfs_root *dead_root;
int is_dead_root = 0; int is_dead_root = 0;
...@@ -3724,17 +3742,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ...@@ -3724,17 +3742,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
* deleted but wasn't. The inode number may have been reused, * deleted but wasn't. The inode number may have been reused,
* but either way, we can delete the orphan item. * but either way, we can delete the orphan item.
*/ */
if (ret == -ENOENT || inode->i_nlink) { if (!inode || inode->i_nlink) {
if (!ret) { if (inode) {
ret = btrfs_drop_verity_items(BTRFS_I(inode)); ret = btrfs_drop_verity_items(BTRFS_I(inode));
iput(inode); iput(inode);
inode = NULL;
if (ret) if (ret)
goto out; goto out;
} }
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
ret = PTR_ERR(trans); ret = PTR_ERR(trans);
iput(inode);
goto out; goto out;
} }
btrfs_debug(fs_info, "auto deleting %Lu", btrfs_debug(fs_info, "auto deleting %Lu",
...@@ -3742,10 +3760,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ...@@ -3742,10 +3760,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
ret = btrfs_del_orphan_item(trans, root, ret = btrfs_del_orphan_item(trans, root,
found_key.objectid); found_key.objectid);
btrfs_end_transaction(trans); btrfs_end_transaction(trans);
if (ret) { if (ret)
iput(inode);
goto out; goto out;
}
continue; continue;
} }
...@@ -4847,9 +4863,6 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, ...@@ -4847,9 +4863,6 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
ret = set_page_extent_mapped(page);
if (ret < 0)
goto out_unlock;
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
ret = btrfs_read_folio(NULL, page_folio(page)); ret = btrfs_read_folio(NULL, page_folio(page));
...@@ -4864,6 +4877,17 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, ...@@ -4864,6 +4877,17 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
goto out_unlock; goto out_unlock;
} }
} }
/*
* We unlock the page after the io is completed and then re-lock it
* above. release_folio() could have come in between that and cleared
* PagePrivate(), but left the page in the mapping. Set the page mapped
* here to make sure it's properly set for the subpage stuff.
*/
ret = set_page_extent_mapped(page);
if (ret < 0)
goto out_unlock;
wait_on_page_writeback(page); wait_on_page_writeback(page);
lock_extent(io_tree, block_start, block_end, &cached_state); lock_extent(io_tree, block_start, block_end, &cached_state);
...@@ -7849,8 +7873,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio, ...@@ -7849,8 +7873,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered); ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
if (ret) { if (ret) {
bbio->bio.bi_status = errno_to_blk_status(ret); btrfs_finish_ordered_extent(dio_data->ordered, NULL,
btrfs_dio_end_io(bbio); file_offset, dip->bytes,
!ret);
bio->bi_status = errno_to_blk_status(ret);
iomap_dio_bio_end_io(bio);
return; return;
} }
} }
......
...@@ -4445,4 +4445,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) ...@@ -4445,4 +4445,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
ulist_free(entry->old_roots); ulist_free(entry->old_roots);
kfree(entry); kfree(entry);
} }
*root = RB_ROOT;
} }
...@@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct work_struct *work); ...@@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct work_struct *work);
static void index_rbio_pages(struct btrfs_raid_bio *rbio); static void index_rbio_pages(struct btrfs_raid_bio *rbio);
static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check); static int finish_parity_scrub(struct btrfs_raid_bio *rbio);
static void scrub_rbio_work_locked(struct work_struct *work); static void scrub_rbio_work_locked(struct work_struct *work);
static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio) static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
...@@ -2404,7 +2404,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) ...@@ -2404,7 +2404,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
return 0; return 0;
} }
static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
{ {
struct btrfs_io_context *bioc = rbio->bioc; struct btrfs_io_context *bioc = rbio->bioc;
const u32 sectorsize = bioc->fs_info->sectorsize; const u32 sectorsize = bioc->fs_info->sectorsize;
...@@ -2445,9 +2445,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) ...@@ -2445,9 +2445,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
*/ */
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
if (!need_check)
goto writeback;
p_sector.page = alloc_page(GFP_NOFS); p_sector.page = alloc_page(GFP_NOFS);
if (!p_sector.page) if (!p_sector.page)
return -ENOMEM; return -ENOMEM;
...@@ -2516,7 +2513,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) ...@@ -2516,7 +2513,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
q_sector.page = NULL; q_sector.page = NULL;
} }
writeback:
/* /*
* time to start writing. Make bios for everything from the * time to start writing. Make bios for everything from the
* higher layers (the bio_list in our rbio) and our p/q. Ignore * higher layers (the bio_list in our rbio) and our p/q. Ignore
...@@ -2699,7 +2695,6 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio) ...@@ -2699,7 +2695,6 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
static void scrub_rbio(struct btrfs_raid_bio *rbio) static void scrub_rbio(struct btrfs_raid_bio *rbio)
{ {
bool need_check = false;
int sector_nr; int sector_nr;
int ret; int ret;
...@@ -2722,7 +2717,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio) ...@@ -2722,7 +2717,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio)
* We have every sector properly prepared. Can finish the scrub * We have every sector properly prepared. Can finish the scrub
* and writeback the good content. * and writeback the good content.
*/ */
ret = finish_parity_scrub(rbio, need_check); ret = finish_parity_scrub(rbio);
wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
int found_errors; int found_errors;
......
...@@ -4078,14 +4078,6 @@ static int alloc_profile_is_valid(u64 flags, int extended) ...@@ -4078,14 +4078,6 @@ static int alloc_profile_is_valid(u64 flags, int extended)
return has_single_bit_set(flags); return has_single_bit_set(flags);
} }
static inline int balance_need_close(struct btrfs_fs_info *fs_info)
{
/* cancel requested || normal exit path */
return atomic_read(&fs_info->balance_cancel_req) ||
(atomic_read(&fs_info->balance_pause_req) == 0 &&
atomic_read(&fs_info->balance_cancel_req) == 0);
}
/* /*
* Validate target profile against allowed profiles and return true if it's OK. * Validate target profile against allowed profiles and return true if it's OK.
* Otherwise print the error message and return false. * Otherwise print the error message and return false.
...@@ -4275,6 +4267,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, ...@@ -4275,6 +4267,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
u64 num_devices; u64 num_devices;
unsigned seq; unsigned seq;
bool reducing_redundancy; bool reducing_redundancy;
bool paused = false;
int i; int i;
if (btrfs_fs_closing(fs_info) || if (btrfs_fs_closing(fs_info) ||
...@@ -4405,6 +4398,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, ...@@ -4405,6 +4398,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) { if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
btrfs_info(fs_info, "balance: paused"); btrfs_info(fs_info, "balance: paused");
btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED); btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED);
paused = true;
} }
/* /*
* Balance can be canceled by: * Balance can be canceled by:
...@@ -4433,8 +4427,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, ...@@ -4433,8 +4427,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
btrfs_update_ioctl_balance_args(fs_info, bargs); btrfs_update_ioctl_balance_args(fs_info, bargs);
} }
if ((ret && ret != -ECANCELED && ret != -ENOSPC) || /* We didn't pause, we can clean everything up. */
balance_need_close(fs_info)) { if (!paused) {
reset_balance_state(fs_info); reset_balance_state(fs_info);
btrfs_exclop_finish(fs_info); btrfs_exclop_finish(fs_info);
} }
...@@ -6404,7 +6398,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ...@@ -6404,7 +6398,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
(op == BTRFS_MAP_READ || !dev_replace_is_ongoing || (op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
!dev_replace->tgtdev)) { !dev_replace->tgtdev)) {
set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr); set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
*mirror_num_ret = mirror_num; if (mirror_num_ret)
*mirror_num_ret = mirror_num;
*bioc_ret = NULL; *bioc_ret = NULL;
ret = 0; ret = 0;
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment