Commit 94e8c95c authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: add structure to keep track of extent range in end_bio_extent_readpage

In end_bio_extent_readpage() we had a strange dance around
extent_start/extent_len.

Hidden behind the strange dance is, it's just calling
endio_readpage_release_extent() on each bvec range.

Here is an example to explain the original work flow:

  Bio is for inode 257, containing 2 pages, for range [1M, 1M+8K)

  end_bio_extent_extent_readpage() entered
  |- extent_start = 0;
  |- extent_end = 0;
  |- bio_for_each_segment_all() {
  |  |- /* Got the 1st bvec */
  |  |- start = SZ_1M;
  |  |- end = SZ_1M + SZ_4K - 1;
  |  |- update = 1;
  |  |- if (extent_len == 0) {
  |  |  |- extent_start = start; /* SZ_1M */
  |  |  |- extent_len = end + 1 - start; /* SZ_1M */
  |  |  }
  |  |
  |  |- /* Got the 2nd bvec */
  |  |- start = SZ_1M + 4K;
  |  |- end = SZ_1M + 4K - 1;
  |  |- update = 1;
  |  |- if (extent_start + extent_len == start) {
  |  |  |- extent_len += end + 1 - start; /* SZ_8K */
  |  |  }
  |  } /* All bio vec iterated */
  |
  |- if (extent_len) {
     |- endio_readpage_release_extent(tree, extent_start, extent_len,
				      update);
	/* extent_start == SZ_1M, extent_len == SZ_8K, uptodate = 1 */

As the above flow shows, the existing code in end_bio_extent_readpage()
is accumulates extent_start/extent_len, and when the contiguous range
stops, calls endio_readpage_release_extent() for the range.

However current behavior has something not really considered:

- The inode can change
  For bio, its pages don't need to have contiguous page_offset.
  This means, even pages from different inodes can be packed into one
  bio.

- bvec cross page boundary
  There is a feature called multi-page bvec, where bvec->bv_len can go
  beyond bvec->bv_page boundary.

- Poor readability

This patch will address the problem:

- Introduce a proper structure, processed_extent, to record processed
  extent range

- Integrate inode/start/end/uptodate check into
  endio_readpage_release_extent()

- Add more comment on each step.
  This should greatly improve the readability, now in
  end_bio_extent_readpage() there are only two
  endio_readpage_release_extent() calls.

- Add inode check for contiguity
  Now we also ensure the inode is the same one before checking if the
  range is contiguous.
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent b1d51f67
...@@ -2769,16 +2769,77 @@ static void end_bio_extent_writepage(struct bio *bio) ...@@ -2769,16 +2769,77 @@ static void end_bio_extent_writepage(struct bio *bio)
bio_put(bio); bio_put(bio);
} }
static void /*
endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, * Record previously processed extent range
int uptodate) *
* For endio_readpage_release_extent() to handle a full extent range, reducing
* the extent io operations.
*/
struct processed_extent {
struct btrfs_inode *inode;
/* Start of the range in @inode */
u64 start;
/* End of the range in in @inode */
u64 end;
bool uptodate;
};
/*
* Try to release processed extent range
*
* May not release the extent range right now if the current range is
* contiguous to processed extent.
*
* Will release processed extent when any of @inode, @uptodate, the range is
* no longer contiguous to the processed range.
*
* Passing @inode == NULL will force processed extent to be released.
*/
static void endio_readpage_release_extent(struct processed_extent *processed,
struct btrfs_inode *inode, u64 start, u64 end,
bool uptodate)
{ {
struct extent_state *cached = NULL; struct extent_state *cached = NULL;
u64 end = start + len - 1; struct extent_io_tree *tree;
/* The first extent, initialize @processed */
if (!processed->inode)
goto update;
if (uptodate && tree->track_uptodate) /*
set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); * Contiguous to processed extent, just uptodate the end.
unlock_extent_cached_atomic(tree, start, end, &cached); *
* Several things to notice:
*
* - bio can be merged as long as on-disk bytenr is contiguous
* This means we can have page belonging to other inodes, thus need to
* check if the inode still matches.
* - bvec can contain range beyond current page for multi-page bvec
* Thus we need to do processed->end + 1 >= start check
*/
if (processed->inode == inode && processed->uptodate == uptodate &&
processed->end + 1 >= start && end >= processed->end) {
processed->end = end;
return;
}
tree = &processed->inode->io_tree;
/*
* Now we don't have range contiguous to the processed range, release
* the processed range now.
*/
if (processed->uptodate && tree->track_uptodate)
set_extent_uptodate(tree, processed->start, processed->end,
&cached, GFP_ATOMIC);
unlock_extent_cached_atomic(tree, processed->start, processed->end,
&cached);
update:
/* Update processed to current range */
processed->inode = inode;
processed->start = start;
processed->end = end;
processed->uptodate = uptodate;
} }
/* /*
...@@ -2798,12 +2859,11 @@ static void end_bio_extent_readpage(struct bio *bio) ...@@ -2798,12 +2859,11 @@ static void end_bio_extent_readpage(struct bio *bio)
int uptodate = !bio->bi_status; int uptodate = !bio->bi_status;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree, *failure_tree; struct extent_io_tree *tree, *failure_tree;
struct processed_extent processed = { 0 };
u64 offset = 0; u64 offset = 0;
u64 start; u64 start;
u64 end; u64 end;
u64 len; u64 len;
u64 extent_start = 0;
u64 extent_len = 0;
int mirror; int mirror;
int ret; int ret;
struct bvec_iter_all iter_all; struct bvec_iter_all iter_all;
...@@ -2912,32 +2972,11 @@ static void end_bio_extent_readpage(struct bio *bio) ...@@ -2912,32 +2972,11 @@ static void end_bio_extent_readpage(struct bio *bio)
unlock_page(page); unlock_page(page);
offset += len; offset += len;
if (unlikely(!uptodate)) { endio_readpage_release_extent(&processed, BTRFS_I(inode),
if (extent_len) { start, end, uptodate);
endio_readpage_release_extent(tree,
extent_start,
extent_len, 1);
extent_start = 0;
extent_len = 0;
}
endio_readpage_release_extent(tree, start,
end - start + 1, 0);
} else if (!extent_len) {
extent_start = start;
extent_len = end + 1 - start;
} else if (extent_start + extent_len == start) {
extent_len += end + 1 - start;
} else {
endio_readpage_release_extent(tree, extent_start,
extent_len, uptodate);
extent_start = start;
extent_len = end + 1 - start;
}
} }
/* Release the last extent */
if (extent_len) endio_readpage_release_extent(&processed, NULL, 0, 0, false);
endio_readpage_release_extent(tree, extent_start, extent_len,
uptodate);
btrfs_io_bio_free_csum(io_bio); btrfs_io_bio_free_csum(io_bio);
bio_put(bio); bio_put(bio);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment