Commit e6c4efd8 authored by Qu Wenruo's avatar Qu Wenruo Committed by Chris Mason

btrfs: Fix and enhance merge_extent_mapping() to insert best fitted extent map

The following commit enhanced the merge_extent_mapping() to reduce
fragment in extent map tree, but it can't handle case which existing
lies before map_start:
51f39 btrfs: Use right extent length when inserting overlap extent map.

[BUG]
When existing extent map's start is before map_start,
the em->len will be minus, which will corrupt the extent map and fail to
insert the new extent map.
This will happen when someone get a large extent map, but when it is
going to insert it into extent map tree, some one has already commit
some write and split the huge extent into small parts.

[REPRODUCER]
It is very easy to tiger using filebench with randomrw personality.
It is about 100% to reproduce when using 8G preallocated file in 60s
randonrw test.

[FIX]
This patch can now handle any existing extent position.
Since it does not directly use existing->start, now it will find the
previous and next extent around map_start.
So the old existing->start < map_start bug will never happen again.

[ENHANCE]
This patch will insert the best fitted extent map into extent map tree,
other than the oldest [map_start, map_start + sectorsize) or the
relatively newer but not perfect [map_start, existing->start).

The patch will first search existing extent that does not intersects with
the desired map range [map_start, map_start + len).
The existing extent will be either before or behind map_start, and based
on the existing extent, we can find out the previous and next extent
around map_start.

So the best fitted extent would be [prev->end, next->start).
For prev or next is not found, em->start would be prev->end and em->end
wold be next->start.

With this patch, the fragment in extent map tree should be reduced much
more than the 51f39 commit and reduce an unneeded extent map tree search.
Reported-by: default avatarTsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: default avatarQu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 4d1a40c6
...@@ -6249,21 +6249,60 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ...@@ -6249,21 +6249,60 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail; goto out_fail;
} }
/* Find next extent map of a given extent map, caller needs to ensure locks */
static struct extent_map *next_extent_map(struct extent_map *em)
{
struct rb_node *next;
next = rb_next(&em->rb_node);
if (!next)
return NULL;
return container_of(next, struct extent_map, rb_node);
}
static struct extent_map *prev_extent_map(struct extent_map *em)
{
struct rb_node *prev;
prev = rb_prev(&em->rb_node);
if (!prev)
return NULL;
return container_of(prev, struct extent_map, rb_node);
}
/* helper for btfs_get_extent. Given an existing extent in the tree, /* helper for btfs_get_extent. Given an existing extent in the tree,
* the existing extent is the nearest extent to map_start,
* and an extent that you want to insert, deal with overlap and insert * and an extent that you want to insert, deal with overlap and insert
* the new extent into the tree. * the best fitted new extent into the tree.
*/ */
static int merge_extent_mapping(struct extent_map_tree *em_tree, static int merge_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map *existing, struct extent_map *existing,
struct extent_map *em, struct extent_map *em,
u64 map_start) u64 map_start)
{ {
struct extent_map *prev;
struct extent_map *next;
u64 start;
u64 end;
u64 start_diff; u64 start_diff;
BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
start_diff = map_start - em->start;
em->start = map_start; if (existing->start > map_start) {
em->len = existing->start - em->start; next = existing;
prev = prev_extent_map(next);
} else {
prev = existing;
next = next_extent_map(prev);
}
start = prev ? extent_map_end(prev) : em->start;
start = max_t(u64, start, em->start);
end = next ? next->start : extent_map_end(em);
end = min_t(u64, end, extent_map_end(em));
start_diff = start - em->start;
em->start = start;
em->len = end - start;
if (em->block_start < EXTENT_MAP_LAST_BYTE && if (em->block_start < EXTENT_MAP_LAST_BYTE &&
!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
em->block_start += start_diff; em->block_start += start_diff;
...@@ -6540,16 +6579,17 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, ...@@ -6540,16 +6579,17 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
ret = 0; ret = 0;
existing = lookup_extent_mapping(em_tree, start, len); existing = search_extent_mapping(em_tree, start, len);
if (existing && (existing->start > start || /*
existing->start + existing->len <= start)) { * existing will always be non-NULL, since there must be
free_extent_map(existing); * extent causing the -EEXIST.
existing = NULL; */
} if (start >= extent_map_end(existing) ||
if (!existing) { start + len <= existing->start) {
existing = lookup_extent_mapping(em_tree, em->start, /*
em->len); * The existing extent map is the one nearest to
if (existing) { * the [start, start + len) range which overlaps
*/
err = merge_extent_mapping(em_tree, existing, err = merge_extent_mapping(em_tree, existing,
em, start); em, start);
free_extent_map(existing); free_extent_map(existing);
...@@ -6557,11 +6597,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, ...@@ -6557,11 +6597,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
free_extent_map(em); free_extent_map(em);
em = NULL; em = NULL;
} }
} else {
err = -EIO;
free_extent_map(em);
em = NULL;
}
} else { } else {
free_extent_map(em); free_extent_map(em);
em = existing; em = existing;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment