Commit bc00965d authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle) Committed by David Sterba

btrfs: count super block write errors in device instead of tracking folio error state

Currently the error status of super block write is tracked in page/folio
status bit Error. For that we need to keep the reference for the whole
duration of write and wait.

Count the number of superblock writeback errors in the btrfs_device.
That means we don't need the folio to stay around until it's waited for,
and can avoid the extra call to folio_get/put.

Also remove a mention of PageError in a comment as it's the last mention
of the page Error state.
Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 617fb10e
...@@ -3634,11 +3634,15 @@ static void btrfs_end_super_write(struct bio *bio) ...@@ -3634,11 +3634,15 @@ static void btrfs_end_super_write(struct bio *bio)
"lost super block write due to IO error on %s (%d)", "lost super block write due to IO error on %s (%d)",
btrfs_dev_name(device), btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status)); blk_status_to_errno(bio->bi_status));
folio_set_error(fi.folio);
btrfs_dev_stat_inc_and_print(device, btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_WRITE_ERRS); BTRFS_DEV_STAT_WRITE_ERRS);
/* Ensure failure if the primary sb fails. */
if (bio->bi_opf & REQ_FUA)
atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR,
&device->sb_write_errors);
else
atomic_inc(&device->sb_write_errors);
} }
folio_unlock(fi.folio); folio_unlock(fi.folio);
folio_put(fi.folio); folio_put(fi.folio);
} }
...@@ -3742,10 +3746,11 @@ static int write_dev_supers(struct btrfs_device *device, ...@@ -3742,10 +3746,11 @@ static int write_dev_supers(struct btrfs_device *device,
struct address_space *mapping = device->bdev->bd_inode->i_mapping; struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i; int i;
int errors = 0;
int ret; int ret;
u64 bytenr, bytenr_orig; u64 bytenr, bytenr_orig;
atomic_set(&device->sb_write_errors, 0);
if (max_mirrors == 0) if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX; max_mirrors = BTRFS_SUPER_MIRROR_MAX;
...@@ -3765,7 +3770,7 @@ static int write_dev_supers(struct btrfs_device *device, ...@@ -3765,7 +3770,7 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_err(device->fs_info, btrfs_err(device->fs_info,
"couldn't get super block location for mirror %d", "couldn't get super block location for mirror %d",
i); i);
errors++; atomic_inc(&device->sb_write_errors);
continue; continue;
} }
if (bytenr + BTRFS_SUPER_INFO_SIZE >= if (bytenr + BTRFS_SUPER_INFO_SIZE >=
...@@ -3785,14 +3790,11 @@ static int write_dev_supers(struct btrfs_device *device, ...@@ -3785,14 +3790,11 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_err(device->fs_info, btrfs_err(device->fs_info,
"couldn't get super block page for bytenr %llu", "couldn't get super block page for bytenr %llu",
bytenr); bytenr);
errors++; atomic_inc(&device->sb_write_errors);
continue; continue;
} }
ASSERT(folio_order(folio) == 0); ASSERT(folio_order(folio) == 0);
/* Bump the refcount for wait_dev_supers() */
folio_get(folio);
offset = offset_in_folio(folio, bytenr); offset = offset_in_folio(folio, bytenr);
disk_super = folio_address(folio) + offset; disk_super = folio_address(folio) + offset;
memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE); memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
...@@ -3820,16 +3822,17 @@ static int write_dev_supers(struct btrfs_device *device, ...@@ -3820,16 +3822,17 @@ static int write_dev_supers(struct btrfs_device *device,
submit_bio(bio); submit_bio(bio);
if (btrfs_advance_sb_log(device, i)) if (btrfs_advance_sb_log(device, i))
errors++; atomic_inc(&device->sb_write_errors);
} }
return errors < i ? 0 : -1; return atomic_read(&device->sb_write_errors) < i ? 0 : -1;
} }
/* /*
* Wait for write completion of superblocks done by write_dev_supers, * Wait for write completion of superblocks done by write_dev_supers,
* @max_mirrors same for write and wait phases. * @max_mirrors same for write and wait phases.
* *
* Return number of errors when folio is not found or not marked up to date. * Return -1 if primary super block write failed or when there were no super block
* copies written. Otherwise 0.
*/ */
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{ {
...@@ -3860,30 +3863,19 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) ...@@ -3860,30 +3863,19 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
folio = filemap_get_folio(device->bdev->bd_inode->i_mapping, folio = filemap_get_folio(device->bdev->bd_inode->i_mapping,
bytenr >> PAGE_SHIFT); bytenr >> PAGE_SHIFT);
if (IS_ERR(folio)) { /* If the folio has been removed, then we know it completed. */
errors++; if (IS_ERR(folio))
if (i == 0)
primary_failed = true;
continue; continue;
}
ASSERT(folio_order(folio) == 0); ASSERT(folio_order(folio) == 0);
/* Folio will be unlocked once the write completes. */ /* Folio will be unlocked once the write completes. */
folio_wait_locked(folio); folio_wait_locked(folio);
if (folio_test_error(folio)) {
errors++;
if (i == 0)
primary_failed = true;
}
/* Drop our reference */
folio_put(folio);
/* Drop the reference from the writing run */
folio_put(folio); folio_put(folio);
} }
/* log error, force error return */ errors += atomic_read(&device->sb_write_errors);
if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR)
primary_failed = true;
if (primary_failed) { if (primary_failed) {
btrfs_err(device->fs_info, "error writing primary super block to device %llu", btrfs_err(device->fs_info, "error writing primary super block to device %llu",
device->devid); device->devid);
......
...@@ -1602,7 +1602,7 @@ static void set_btree_ioerr(struct extent_buffer *eb) ...@@ -1602,7 +1602,7 @@ static void set_btree_ioerr(struct extent_buffer *eb)
* can be no longer dirty nor marked anymore for writeback (if a * can be no longer dirty nor marked anymore for writeback (if a
* subsequent modification to the extent buffer didn't happen before the * subsequent modification to the extent buffer didn't happen before the
* transaction commit), which makes filemap_fdata[write|wait]_range not * transaction commit), which makes filemap_fdata[write|wait]_range not
* able to find the pages tagged with SetPageError at transaction * able to find the pages which contain errors at transaction
* commit time. So if this happens we must abort the transaction, * commit time. So if this happens we must abort the transaction,
* otherwise we commit a super block with btree roots that point to * otherwise we commit a super block with btree roots that point to
* btree nodes/leafs whose content on disk is invalid - either garbage * btree nodes/leafs whose content on disk is invalid - either garbage
......
...@@ -92,6 +92,9 @@ enum btrfs_raid_types { ...@@ -92,6 +92,9 @@ enum btrfs_raid_types {
#define BTRFS_DEV_STATE_FLUSH_SENT (4) #define BTRFS_DEV_STATE_FLUSH_SENT (4)
#define BTRFS_DEV_STATE_NO_READA (5) #define BTRFS_DEV_STATE_NO_READA (5)
/* Special value encoding failure to write primary super block. */
#define BTRFS_SUPER_PRIMARY_WRITE_ERROR (INT_MAX / 2)
struct btrfs_fs_devices; struct btrfs_fs_devices;
struct btrfs_device { struct btrfs_device {
...@@ -142,6 +145,12 @@ struct btrfs_device { ...@@ -142,6 +145,12 @@ struct btrfs_device {
/* type and info about this device */ /* type and info about this device */
u64 type; u64 type;
/*
* Counter of super block write errors, values larger than
* BTRFS_SUPER_PRIMARY_WRITE_ERROR encode primary super block write failure.
*/
atomic_t sb_write_errors;
/* minimal io size for this device */ /* minimal io size for this device */
u32 sector_size; u32 sector_size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment