Commit 8f32380d authored by Johannes Thumshirn's avatar Johannes Thumshirn Committed by David Sterba

btrfs: use the page cache for super block reading

Super-block reading in BTRFS is done using buffer_heads. Buffer_heads
have some drawbacks, like not being able to propagate errors from the
lower layers.

Directly use the page cache for reading the super blocks from disk or
invalidating an on-disk super block. We have to use the page cache so to
avoid races between mkfs and udev. See also 6f60cbd3 ("btrfs: access
superblock via pagecache in scan_one_device").

This patch unwraps the buffer head API and does not change the way the
super block is actually read.
Signed-off-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 6fbceb9f
......@@ -2846,7 +2846,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
u64 features;
u16 csum_type;
struct btrfs_key location;
struct buffer_head *bh;
struct btrfs_super_block *disk_super;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *tree_root;
......@@ -2887,9 +2886,9 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
/*
* Read super block and check the signature bytes only
*/
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
disk_super = btrfs_read_dev_super(fs_devices->latest_bdev);
if (IS_ERR(disk_super)) {
err = PTR_ERR(disk_super);
goto fail_alloc;
}
......@@ -2897,18 +2896,19 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* Verify the type first, if that or the the checksum value are
* corrupted, we'll find out
*/
csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data);
csum_type = btrfs_super_csum_type(disk_super);
if (!btrfs_supported_super_csum(csum_type)) {
btrfs_err(fs_info, "unsupported checksum algorithm: %u",
csum_type);
err = -EINVAL;
brelse(bh);
btrfs_release_disk_super(disk_super);
goto fail_alloc;
}
ret = btrfs_init_csum_hash(fs_info, csum_type);
if (ret) {
err = ret;
btrfs_release_disk_super(disk_super);
goto fail_alloc;
}
......@@ -2916,10 +2916,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* We want to check superblock checksum, the type is stored inside.
* Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
*/
if (btrfs_check_super_csum(fs_info, bh->b_data)) {
if (btrfs_check_super_csum(fs_info, (u8 *)disk_super)) {
btrfs_err(fs_info, "superblock checksum mismatch");
err = -EINVAL;
brelse(bh);
btrfs_release_disk_super(disk_super);
goto fail_alloc;
}
......@@ -2928,8 +2928,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* following bytes up to INFO_SIZE, the checksum is calculated from
* the whole block of INFO_SIZE
*/
memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
brelse(bh);
memcpy(fs_info->super_copy, disk_super, sizeof(*fs_info->super_copy));
btrfs_release_disk_super(disk_super);
disk_super = fs_info->super_copy;
......@@ -3416,45 +3416,38 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
put_bh(bh);
}
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
struct buffer_head **bh_ret)
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
int copy_num)
{
struct buffer_head *bh;
struct btrfs_super_block *super;
struct page *page;
u64 bytenr;
struct address_space *mapping = bdev->bd_inode->i_mapping;
bytenr = btrfs_sb_offset(copy_num);
if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
return -EINVAL;
return ERR_PTR(-EINVAL);
bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE);
/*
* If we fail to read from the underlying devices, as of now
* the best option we have is to mark it EIO.
*/
if (!bh)
return -EIO;
page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
if (IS_ERR(page))
return ERR_CAST(page);
super = (struct btrfs_super_block *)bh->b_data;
super = page_address(page);
if (btrfs_super_bytenr(super) != bytenr ||
btrfs_super_magic(super) != BTRFS_MAGIC) {
brelse(bh);
return -EINVAL;
btrfs_release_disk_super(super);
return ERR_PTR(-EINVAL);
}
*bh_ret = bh;
return 0;
return super;
}
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
{
struct buffer_head *bh;
struct buffer_head *latest = NULL;
struct btrfs_super_block *super;
struct btrfs_super_block *super, *latest = NULL;
int i;
u64 transid = 0;
int ret = -EINVAL;
/* we would like to check all the supers, but that would make
* a btrfs mount succeed after a mkfs from a different FS.
......@@ -3462,25 +3455,20 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
*/
for (i = 0; i < 1; i++) {
ret = btrfs_read_dev_one_super(bdev, i, &bh);
if (ret)
super = btrfs_read_dev_one_super(bdev, i);
if (IS_ERR(super))
continue;
super = (struct btrfs_super_block *)bh->b_data;
if (!latest || btrfs_super_generation(super) > transid) {
brelse(latest);
latest = bh;
if (latest)
btrfs_release_disk_super(super);
latest = super;
transid = btrfs_super_generation(super);
} else {
brelse(bh);
}
}
if (!latest)
return ERR_PTR(ret);
return latest;
return super;
}
/*
......
......@@ -56,9 +56,9 @@ int __cold open_ctree(struct super_block *sb,
char *options);
void __cold close_ctree(struct btrfs_fs_info *fs_info);
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
struct buffer_head **bh_ret);
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
int copy_num);
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_key *key);
......
......@@ -6,7 +6,6 @@
#include <linux/sched.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
......@@ -500,7 +499,7 @@ static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
static int
btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
int flush, struct block_device **bdev,
struct buffer_head **bh)
struct btrfs_super_block **disk_super)
{
int ret;
......@@ -519,9 +518,9 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
goto error;
}
invalidate_bdev(*bdev);
*bh = btrfs_read_dev_super(*bdev);
if (IS_ERR(*bh)) {
ret = PTR_ERR(*bh);
*disk_super = btrfs_read_dev_super(*bdev);
if (IS_ERR(*disk_super)) {
ret = PTR_ERR(*disk_super);
blkdev_put(*bdev, flags);
goto error;
}
......@@ -530,7 +529,6 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
error:
*bdev = NULL;
*bh = NULL;
return ret;
}
......@@ -611,7 +609,6 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
{
struct request_queue *q;
struct block_device *bdev;
struct buffer_head *bh;
struct btrfs_super_block *disk_super;
u64 devid;
int ret;
......@@ -622,17 +619,16 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
return -EINVAL;
ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
&bdev, &bh);
&bdev, &disk_super);
if (ret)
return ret;
disk_super = (struct btrfs_super_block *)bh->b_data;
devid = btrfs_stack_device_id(&disk_super->dev_item);
if (devid != device->devid)
goto error_brelse;
goto error_free_page;
if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
goto error_brelse;
goto error_free_page;
device->generation = btrfs_super_generation(disk_super);
......@@ -641,7 +637,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
pr_err(
"BTRFS: Invalid seeding and uuid-changed device detected\n");
goto error_brelse;
goto error_free_page;
}
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
......@@ -667,12 +663,12 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
fs_devices->rw_devices++;
list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
}
brelse(bh);
btrfs_release_disk_super(disk_super);
return 0;
error_brelse:
brelse(bh);
error_free_page:
btrfs_release_disk_super(disk_super);
blkdev_put(bdev, flags);
return -EINVAL;
......@@ -1247,8 +1243,10 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
return ret;
}
void btrfs_release_disk_super(struct page *page)
void btrfs_release_disk_super(struct btrfs_super_block *super)
{
struct page *page = virt_to_page(super);
put_page(page);
}
......@@ -1286,7 +1284,7 @@ static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
if (btrfs_super_bytenr(*disk_super) != bytenr ||
btrfs_super_magic(*disk_super) != BTRFS_MAGIC) {
btrfs_release_disk_super(*page);
btrfs_release_disk_super(p);
return 1;
}
......@@ -1349,7 +1347,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
btrfs_free_stale_devices(path, device);
}
btrfs_release_disk_super(page);
btrfs_release_disk_super(disk_super);
error_bdev_put:
blkdev_put(bdev, flags);
......@@ -1948,10 +1946,10 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
return num_devices;
}
static void btrfs_scratch_superblocks(struct block_device *bdev,
static void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
struct block_device *bdev,
const char *device_path)
{
struct buffer_head *bh;
struct btrfs_super_block *disk_super;
int copy_num;
......@@ -1959,15 +1957,26 @@ static void btrfs_scratch_superblocks(struct block_device *bdev,
return;
for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) {
if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
continue;
struct page *page;
int ret;
disk_super = (struct btrfs_super_block *)bh->b_data;
disk_super = btrfs_read_dev_one_super(bdev, copy_num);
if (IS_ERR(disk_super))
continue;
memset(&disk_super->magic, 0, sizeof(disk_super->magic));
set_buffer_dirty(bh);
sync_dirty_buffer(bh);
brelse(bh);
page = virt_to_page(disk_super);
set_page_dirty(page);
lock_page(page);
/* write_on_page() unlocks the page */
ret = write_one_page(page);
if (ret)
btrfs_warn(fs_info,
"error clearing superblock number %d (%d)",
copy_num, ret);
btrfs_release_disk_super(disk_super);
}
/* Notify udev that device has changed */
......@@ -2095,7 +2104,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
* supers and free the device.
*/
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
btrfs_scratch_superblocks(device->bdev, device->name->str);
btrfs_scratch_superblocks(fs_info, device->bdev,
device->name->str);
btrfs_close_bdev(device);
synchronize_rcu();
......@@ -2163,7 +2173,8 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
/* zero out the old super if it is writable */
btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
btrfs_scratch_superblocks(fs_info, srcdev->bdev,
srcdev->name->str);
}
btrfs_close_bdev(srcdev);
......@@ -2222,7 +2233,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
* is already out of device list, so we don't have to hold
* the device_list_mutex lock.
*/
btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
tgtdev->name->str);
btrfs_close_bdev(tgtdev);
synchronize_rcu();
......@@ -2237,14 +2249,13 @@ static struct btrfs_device *btrfs_find_device_by_path(
u64 devid;
u8 *dev_uuid;
struct block_device *bdev;
struct buffer_head *bh;
struct btrfs_device *device;
ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
fs_info->bdev_holder, 0, &bdev, &bh);
fs_info->bdev_holder, 0, &bdev, &disk_super);
if (ret)
return ERR_PTR(ret);
disk_super = (struct btrfs_super_block *)bh->b_data;
devid = btrfs_stack_device_id(&disk_super->dev_item);
dev_uuid = disk_super->dev_item.uuid;
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
......@@ -2254,7 +2265,7 @@ static struct btrfs_device *btrfs_find_device_by_path(
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
disk_super->fsid, true);
brelse(bh);
btrfs_release_disk_super(disk_super);
if (!device)
device = ERR_PTR(-ENOENT);
blkdev_put(bdev, FMODE_READ);
......
......@@ -17,8 +17,6 @@ extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN SZ_64K
struct buffer_head;
struct btrfs_io_geometry {
/* remaining bytes before crossing a stripe */
u64 len;
......@@ -483,7 +481,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
u64 logical, u64 length);
void btrfs_release_disk_super(struct page *page);
void btrfs_release_disk_super(struct btrfs_super_block *super);
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment