Commit 2fe6303e authored by Miao Xie's avatar Miao Xie Committed by Chris Mason

Btrfs: split bio_readpage_error into several functions

The data repair function of direct read will be implemented later, and some code
in bio_readpage_error will be reused, so split bio_readpage_error into
several functions which will be used in direct read repair later.
Signed-off-by: default avatarMiao Xie <miaox@cn.fujitsu.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 454ff3de
...@@ -1962,25 +1962,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) ...@@ -1962,25 +1962,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
SetPageUptodate(page); SetPageUptodate(page);
} }
/*
* When IO fails, either with EIO or csum verification fails, we
* try other mirrors that might have a good copy of the data. This
* io_failure_record is used to record state as we go through all the
* mirrors. If another mirror has good data, the page is set up to date
* and things continue. If a good mirror can't be found, the original
* bio end_io callback is called to indicate things have failed.
*/
struct io_failure_record {
struct page *page;
u64 start;
u64 len;
u64 logical;
unsigned long bio_flags;
int this_mirror;
int failed_mirror;
int in_validation;
};
static int free_io_failure(struct inode *inode, struct io_failure_record *rec) static int free_io_failure(struct inode *inode, struct io_failure_record *rec)
{ {
int ret; int ret;
...@@ -2156,40 +2137,24 @@ static int clean_io_failure(u64 start, struct page *page) ...@@ -2156,40 +2137,24 @@ static int clean_io_failure(u64 start, struct page *page)
return 0; return 0;
} }
/* int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
* this is a generic handler for readpage errors (default struct io_failure_record **failrec_ret)
* readpage_io_failed_hook). if other copies exist, read those and write back
* good data to the failed position. does not investigate in remapping the
* failed extent elsewhere, hoping the device will be smart enough to do this as
* needed
*/
static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int failed_mirror)
{ {
struct io_failure_record *failrec = NULL; struct io_failure_record *failrec;
u64 private; u64 private;
struct extent_map *em; struct extent_map *em;
struct inode *inode = page->mapping->host;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct bio *bio;
struct btrfs_io_bio *btrfs_failed_bio;
struct btrfs_io_bio *btrfs_bio;
int num_copies;
int ret; int ret;
int read_mode;
u64 logical; u64 logical;
BUG_ON(failed_bio->bi_rw & REQ_WRITE);
ret = get_state_private(failure_tree, start, &private); ret = get_state_private(failure_tree, start, &private);
if (ret) { if (ret) {
failrec = kzalloc(sizeof(*failrec), GFP_NOFS); failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
if (!failrec) if (!failrec)
return -ENOMEM; return -ENOMEM;
failrec->start = start; failrec->start = start;
failrec->len = end - start + 1; failrec->len = end - start + 1;
failrec->this_mirror = 0; failrec->this_mirror = 0;
...@@ -2209,11 +2174,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, ...@@ -2209,11 +2174,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
em = NULL; em = NULL;
} }
read_unlock(&em_tree->lock); read_unlock(&em_tree->lock);
if (!em) { if (!em) {
kfree(failrec); kfree(failrec);
return -EIO; return -EIO;
} }
logical = start - em->start; logical = start - em->start;
logical = em->block_start + logical; logical = em->block_start + logical;
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
...@@ -2222,8 +2187,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, ...@@ -2222,8 +2187,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
extent_set_compress_type(&failrec->bio_flags, extent_set_compress_type(&failrec->bio_flags,
em->compress_type); em->compress_type);
} }
pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, "
"len=%llu\n", logical, start, failrec->len); pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
logical, start, failrec->len);
failrec->logical = logical; failrec->logical = logical;
free_extent_map(em); free_extent_map(em);
...@@ -2243,8 +2210,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, ...@@ -2243,8 +2210,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
} }
} else { } else {
failrec = (struct io_failure_record *)(unsigned long)private; failrec = (struct io_failure_record *)(unsigned long)private;
pr_debug("bio_readpage_error: (found) logical=%llu, " pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
"start=%llu, len=%llu, validation=%d\n",
failrec->logical, failrec->start, failrec->len, failrec->logical, failrec->start, failrec->len,
failrec->in_validation); failrec->in_validation);
/* /*
...@@ -2253,6 +2219,17 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, ...@@ -2253,6 +2219,17 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
* clean_io_failure() clean all those errors at once. * clean_io_failure() clean all those errors at once.
*/ */
} }
*failrec_ret = failrec;
return 0;
}
int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec, int failed_mirror)
{
int num_copies;
num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
failrec->logical, failrec->len); failrec->logical, failrec->len);
if (num_copies == 1) { if (num_copies == 1) {
...@@ -2261,10 +2238,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, ...@@ -2261,10 +2238,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
* all the retry and error correction code that follows. no * all the retry and error correction code that follows. no
* matter what the error is, it is very likely to persist. * matter what the error is, it is very likely to persist.
*/ */
pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
num_copies, failrec->this_mirror, failed_mirror); num_copies, failrec->this_mirror, failed_mirror);
free_io_failure(inode, failrec); return 0;
return -EIO;
} }
/* /*
...@@ -2284,7 +2260,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, ...@@ -2284,7 +2260,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
BUG_ON(failrec->in_validation); BUG_ON(failrec->in_validation);
failrec->in_validation = 1; failrec->in_validation = 1;
failrec->this_mirror = failed_mirror; failrec->this_mirror = failed_mirror;
read_mode = READ_SYNC | REQ_FAILFAST_DEV;
} else { } else {
/* /*
* we're ready to fulfill a) and b) alongside. get a good copy * we're ready to fulfill a) and b) alongside. get a good copy
...@@ -2300,22 +2275,32 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, ...@@ -2300,22 +2275,32 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
failrec->this_mirror++; failrec->this_mirror++;
if (failrec->this_mirror == failed_mirror) if (failrec->this_mirror == failed_mirror)
failrec->this_mirror++; failrec->this_mirror++;
read_mode = READ_SYNC;
} }
if (failrec->this_mirror > num_copies) { if (failrec->this_mirror > num_copies) {
pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
num_copies, failrec->this_mirror, failed_mirror); num_copies, failrec->this_mirror, failed_mirror);
free_io_failure(inode, failrec); return 0;
return -EIO;
} }
return 1;
}
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func)
{
struct bio *bio;
struct btrfs_io_bio *btrfs_failed_bio;
struct btrfs_io_bio *btrfs_bio;
bio = btrfs_io_bio_alloc(GFP_NOFS, 1); bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) { if (!bio)
free_io_failure(inode, failrec); return NULL;
return -EIO;
} bio->bi_end_io = endio_func;
bio->bi_end_io = failed_bio->bi_end_io;
bio->bi_iter.bi_sector = failrec->logical >> 9; bio->bi_iter.bi_sector = failrec->logical >> 9;
bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
bio->bi_iter.bi_size = 0; bio->bi_iter.bi_size = 0;
...@@ -2327,17 +2312,63 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, ...@@ -2327,17 +2312,63 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
btrfs_bio = btrfs_io_bio(bio); btrfs_bio = btrfs_io_bio(bio);
btrfs_bio->csum = btrfs_bio->csum_inline; btrfs_bio->csum = btrfs_bio->csum_inline;
phy_offset >>= inode->i_sb->s_blocksize_bits; icsum *= csum_size;
phy_offset *= csum_size; memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
csum_size); csum_size);
} }
bio_add_page(bio, page, failrec->len, start - page_offset(page)); bio_add_page(bio, page, failrec->len, pg_offset);
return bio;
}
/*
* this is a generic handler for readpage errors (default
* readpage_io_failed_hook). if other copies exist, read those and write back
* good data to the failed position. does not investigate in remapping the
* failed extent elsewhere, hoping the device will be smart enough to do this as
* needed
*/
static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int failed_mirror)
{
struct io_failure_record *failrec;
struct inode *inode = page->mapping->host;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct bio *bio;
int read_mode;
int ret;
BUG_ON(failed_bio->bi_rw & REQ_WRITE);
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
if (ret)
return ret;
ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
if (!ret) {
free_io_failure(inode, failrec);
return -EIO;
}
if (failed_bio->bi_vcnt > 1)
read_mode = READ_SYNC | REQ_FAILFAST_DEV;
else
read_mode = READ_SYNC;
phy_offset >>= inode->i_sb->s_blocksize_bits;
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
start - page_offset(page),
(int)phy_offset, failed_bio->bi_end_io);
if (!bio) {
free_io_failure(inode, failrec);
return -EIO;
}
pr_debug("bio_readpage_error: submitting new read[%#x] to " pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
"this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, read_mode, failrec->this_mirror, failrec->in_validation);
failrec->this_mirror, num_copies, failrec->in_validation);
ret = tree->ops->submit_bio_hook(inode, read_mode, bio, ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
failrec->this_mirror, failrec->this_mirror,
......
...@@ -344,6 +344,34 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, ...@@ -344,6 +344,34 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
int end_extent_writepage(struct page *page, int err, u64 start, u64 end); int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num); int mirror_num);
/*
* When IO fails, either with EIO or csum verification fails, we
* try other mirrors that might have a good copy of the data. This
* io_failure_record is used to record state as we go through all the
* mirrors. If another mirror has good data, the page is set up to date
* and things continue. If a good mirror can't be found, the original
* bio end_io callback is called to indicate things have failed.
*/
struct io_failure_record {
struct page *page;
u64 start;
u64 len;
u64 logical;
unsigned long bio_flags;
int this_mirror;
int failed_mirror;
int in_validation;
};
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
struct io_failure_record **failrec_ret);
int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec, int fail_mirror);
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
noinline u64 find_lock_delalloc_range(struct inode *inode, noinline u64 find_lock_delalloc_range(struct inode *inode,
struct extent_io_tree *tree, struct extent_io_tree *tree,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment