Commit 3173a18f authored by Josef Bacik's avatar Josef Bacik

Btrfs: add a incompatible format change for smaller metadata extent refs

We currently store the first key of the tree block inside the reference for the
tree block in the extent tree.  This takes up quite a bit of space.  Make a new
key type for metadata which holds the level as the offset and completely removes
storing the btrfs_tree_block_info inside the extent ref.  This reduces the size
from 51 bytes to 33 bytes per extent reference for each tree block.  In practice
this results in a 30-35% decrease in the size of our extent tree, which means we
COW less and can keep more of the extent tree in memory which makes our heavy
metadata operations go much faster.  This is not an automatic format change, you
must enable it at mkfs time or with btrfstune.  This patch deals with having
metadata stored as either the old format or the new format so it is easy to
convert.  Thanks,
Signed-off-by: default avatarJosef Bacik <jbacik@fusionio.com>
parent be283b2e
...@@ -867,7 +867,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ...@@ -867,7 +867,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (btrfs_block_can_be_shared(root, buf)) { if (btrfs_block_can_be_shared(root, buf)) {
ret = btrfs_lookup_extent_info(trans, root, buf->start, ret = btrfs_lookup_extent_info(trans, root, buf->start,
buf->len, &refs, &flags); btrfs_header_level(buf), 1,
&refs, &flags);
if (ret) if (ret)
return ret; return ret;
if (refs == 0) { if (refs == 0) {
......
...@@ -509,6 +509,7 @@ struct btrfs_super_block { ...@@ -509,6 +509,7 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) #define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
...@@ -519,7 +520,8 @@ struct btrfs_super_block { ...@@ -519,7 +520,8 @@ struct btrfs_super_block {
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \ BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
/* /*
* A leaf is full of items. offset and size tell us where to find * A leaf is full of items. offset and size tell us where to find
...@@ -1809,6 +1811,12 @@ struct btrfs_ioctl_defrag_range_args { ...@@ -1809,6 +1811,12 @@ struct btrfs_ioctl_defrag_range_args {
*/ */
#define BTRFS_EXTENT_ITEM_KEY 168 #define BTRFS_EXTENT_ITEM_KEY 168
/*
* The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
* the length, so we save the level in key->offset instead of the length.
*/
#define BTRFS_METADATA_ITEM_KEY 169
#define BTRFS_TREE_BLOCK_REF_KEY 176 #define BTRFS_TREE_BLOCK_REF_KEY 176
#define BTRFS_EXTENT_DATA_REF_KEY 178 #define BTRFS_EXTENT_DATA_REF_KEY 178
...@@ -3006,7 +3014,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, ...@@ -3006,7 +3014,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags); u64 offset, int metadata, u64 *refs, u64 *flags);
int btrfs_pin_extent(struct btrfs_root *root, int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num, int reserved); u64 bytenr, u64 num, int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
...@@ -3669,6 +3677,16 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, ...@@ -3669,6 +3677,16 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
} }
} }
#define btrfs_fs_incompat(fs_info, opt) \
__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
{
struct btrfs_super_block *disk_super;
disk_super = fs_info->super_copy;
return !!(btrfs_super_incompat_flags(disk_super) & flag);
}
/* /*
* Call btrfs_abort_transaction as early as possible when an error condition is * Call btrfs_abort_transaction as early as possible when an error condition is
* detected, that way the exact line number is reported. * detected, that way the exact line number is reported.
......
...@@ -2290,6 +2290,9 @@ int open_ctree(struct super_block *sb, ...@@ -2290,6 +2290,9 @@ int open_ctree(struct super_block *sb,
if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
printk(KERN_ERR "btrfs: has skinny extents\n");
/* /*
* flag our filesystem as having big metadata blocks if * flag our filesystem as having big metadata blocks if
* they are bigger than the page size * they are bigger than the page size
......
This diff is collapsed.
...@@ -3660,7 +3660,7 @@ static int check_path_shared(struct btrfs_root *root, ...@@ -3660,7 +3660,7 @@ static int check_path_shared(struct btrfs_root *root,
eb = path->nodes[level]; eb = path->nodes[level];
if (!btrfs_block_can_be_shared(root, eb)) if (!btrfs_block_can_be_shared(root, eb))
continue; continue;
ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
&refs, NULL); &refs, NULL);
if (refs > 1) if (refs > 1)
return 1; return 1;
......
...@@ -619,10 +619,13 @@ static noinline_for_stack ...@@ -619,10 +619,13 @@ static noinline_for_stack
int find_inline_backref(struct extent_buffer *leaf, int slot, int find_inline_backref(struct extent_buffer *leaf, int slot,
unsigned long *ptr, unsigned long *end) unsigned long *ptr, unsigned long *end)
{ {
struct btrfs_key key;
struct btrfs_extent_item *ei; struct btrfs_extent_item *ei;
struct btrfs_tree_block_info *bi; struct btrfs_tree_block_info *bi;
u32 item_size; u32 item_size;
btrfs_item_key_to_cpu(leaf, &key, slot);
item_size = btrfs_item_size_nr(leaf, slot); item_size = btrfs_item_size_nr(leaf, slot);
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (item_size < sizeof(*ei)) { if (item_size < sizeof(*ei)) {
...@@ -634,13 +637,18 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, ...@@ -634,13 +637,18 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
WARN_ON(!(btrfs_extent_flags(leaf, ei) & WARN_ON(!(btrfs_extent_flags(leaf, ei) &
BTRFS_EXTENT_FLAG_TREE_BLOCK)); BTRFS_EXTENT_FLAG_TREE_BLOCK));
if (item_size <= sizeof(*ei) + sizeof(*bi)) { if (key.type == BTRFS_EXTENT_ITEM_KEY &&
item_size <= sizeof(*ei) + sizeof(*bi)) {
WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
return 1; return 1;
} }
bi = (struct btrfs_tree_block_info *)(ei + 1); if (key.type == BTRFS_EXTENT_ITEM_KEY) {
*ptr = (unsigned long)(bi + 1); bi = (struct btrfs_tree_block_info *)(ei + 1);
*ptr = (unsigned long)(bi + 1);
} else {
*ptr = (unsigned long)(ei + 1);
}
*end = (unsigned long)ei + item_size; *end = (unsigned long)ei + item_size;
return 0; return 0;
} }
...@@ -708,7 +716,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, ...@@ -708,7 +716,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
end = 0; end = 0;
ptr = 0; ptr = 0;
key.objectid = cur->bytenr; key.objectid = cur->bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY; key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = (u64)-1; key.offset = (u64)-1;
path1->search_commit_root = 1; path1->search_commit_root = 1;
...@@ -766,7 +774,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, ...@@ -766,7 +774,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
break; break;
} }
if (key.type == BTRFS_EXTENT_ITEM_KEY) { if (key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY) {
ret = find_inline_backref(eb, path1->slots[0], ret = find_inline_backref(eb, path1->slots[0],
&ptr, &end); &ptr, &end);
if (ret) if (ret)
...@@ -2768,8 +2777,13 @@ static int reada_tree_block(struct reloc_control *rc, ...@@ -2768,8 +2777,13 @@ static int reada_tree_block(struct reloc_control *rc,
struct tree_block *block) struct tree_block *block)
{ {
BUG_ON(block->key_ready); BUG_ON(block->key_ready);
readahead_tree_block(rc->extent_root, block->bytenr, if (block->key.type == BTRFS_METADATA_ITEM_KEY)
block->key.objectid, block->key.offset); readahead_tree_block(rc->extent_root, block->bytenr,
block->key.objectid,
rc->extent_root->leafsize);
else
readahead_tree_block(rc->extent_root, block->bytenr,
block->key.objectid, block->key.offset);
return 0; return 0;
} }
...@@ -3176,12 +3190,17 @@ static int add_tree_block(struct reloc_control *rc, ...@@ -3176,12 +3190,17 @@ static int add_tree_block(struct reloc_control *rc,
eb = path->nodes[0]; eb = path->nodes[0];
item_size = btrfs_item_size_nr(eb, path->slots[0]); item_size = btrfs_item_size_nr(eb, path->slots[0]);
if (item_size >= sizeof(*ei) + sizeof(*bi)) { if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
item_size >= sizeof(*ei) + sizeof(*bi)) {
ei = btrfs_item_ptr(eb, path->slots[0], ei = btrfs_item_ptr(eb, path->slots[0],
struct btrfs_extent_item); struct btrfs_extent_item);
bi = (struct btrfs_tree_block_info *)(ei + 1); if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
bi = (struct btrfs_tree_block_info *)(ei + 1);
level = btrfs_tree_block_level(eb, bi);
} else {
level = (int)extent_key->offset;
}
generation = btrfs_extent_generation(eb, ei); generation = btrfs_extent_generation(eb, ei);
level = btrfs_tree_block_level(eb, bi);
} else { } else {
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
u64 ref_owner; u64 ref_owner;
...@@ -3210,7 +3229,7 @@ static int add_tree_block(struct reloc_control *rc, ...@@ -3210,7 +3229,7 @@ static int add_tree_block(struct reloc_control *rc,
return -ENOMEM; return -ENOMEM;
block->bytenr = extent_key->objectid; block->bytenr = extent_key->objectid;
block->key.objectid = extent_key->offset; block->key.objectid = rc->extent_root->leafsize;
block->key.offset = generation; block->key.offset = generation;
block->level = level; block->level = level;
block->key_ready = 0; block->key_ready = 0;
...@@ -3252,9 +3271,15 @@ static int __add_tree_block(struct reloc_control *rc, ...@@ -3252,9 +3271,15 @@ static int __add_tree_block(struct reloc_control *rc,
ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
if (ret < 0) if (ret < 0)
goto out; goto out;
BUG_ON(ret);
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (ret > 0) {
if (key.objectid == bytenr &&
key.type == BTRFS_METADATA_ITEM_KEY)
ret = 0;
}
BUG_ON(ret);
ret = add_tree_block(rc, &key, path, blocks); ret = add_tree_block(rc, &key, path, blocks);
out: out:
btrfs_free_path(path); btrfs_free_path(path);
...@@ -3275,7 +3300,8 @@ static int block_use_full_backref(struct reloc_control *rc, ...@@ -3275,7 +3300,8 @@ static int block_use_full_backref(struct reloc_control *rc,
return 1; return 1;
ret = btrfs_lookup_extent_info(NULL, rc->extent_root, ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
eb->start, eb->len, NULL, &flags); eb->start, btrfs_header_level(eb), 1,
NULL, &flags);
BUG_ON(ret); BUG_ON(ret);
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
...@@ -3644,12 +3670,25 @@ int find_next_extent(struct btrfs_trans_handle *trans, ...@@ -3644,12 +3670,25 @@ int find_next_extent(struct btrfs_trans_handle *trans,
break; break;
} }
if (key.type != BTRFS_EXTENT_ITEM_KEY || if (key.type != BTRFS_EXTENT_ITEM_KEY &&
key.type != BTRFS_METADATA_ITEM_KEY) {
path->slots[0]++;
goto next;
}
if (key.type == BTRFS_EXTENT_ITEM_KEY &&
key.objectid + key.offset <= rc->search_start) { key.objectid + key.offset <= rc->search_start) {
path->slots[0]++; path->slots[0]++;
goto next; goto next;
} }
if (key.type == BTRFS_METADATA_ITEM_KEY &&
key.objectid + rc->extent_root->leafsize <=
rc->search_start) {
path->slots[0]++;
goto next;
}
ret = find_first_extent_bit(&rc->processed_blocks, ret = find_first_extent_bit(&rc->processed_blocks,
key.objectid, &start, &end, key.objectid, &start, &end,
EXTENT_DIRTY, NULL); EXTENT_DIRTY, NULL);
...@@ -3658,7 +3697,11 @@ int find_next_extent(struct btrfs_trans_handle *trans, ...@@ -3658,7 +3697,11 @@ int find_next_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path); btrfs_release_path(path);
rc->search_start = end + 1; rc->search_start = end + 1;
} else { } else {
rc->search_start = key.objectid + key.offset; if (key.type == BTRFS_EXTENT_ITEM_KEY)
rc->search_start = key.objectid + key.offset;
else
rc->search_start = key.objectid +
rc->extent_root->leafsize;
memcpy(extent_key, &key, sizeof(key)); memcpy(extent_key, &key, sizeof(key));
return 0; return 0;
} }
......
...@@ -2312,8 +2312,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -2312,8 +2312,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
key_start.type = BTRFS_EXTENT_ITEM_KEY; key_start.type = BTRFS_EXTENT_ITEM_KEY;
key_start.offset = (u64)0; key_start.offset = (u64)0;
key_end.objectid = base + offset + nstripes * increment; key_end.objectid = base + offset + nstripes * increment;
key_end.type = BTRFS_EXTENT_ITEM_KEY; key_end.type = BTRFS_METADATA_ITEM_KEY;
key_end.offset = (u64)0; key_end.offset = (u64)-1;
reada1 = btrfs_reada_add(root, &key_start, &key_end); reada1 = btrfs_reada_add(root, &key_start, &key_end);
key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
...@@ -2401,6 +2401,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -2401,6 +2401,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) if (ret < 0)
goto out; goto out;
if (ret > 0) { if (ret > 0) {
ret = btrfs_previous_item(root, path, 0, ret = btrfs_previous_item(root, path, 0,
BTRFS_EXTENT_ITEM_KEY); BTRFS_EXTENT_ITEM_KEY);
...@@ -2418,6 +2419,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -2418,6 +2419,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
} }
while (1) { while (1) {
u64 bytes;
l = path->nodes[0]; l = path->nodes[0];
slot = path->slots[0]; slot = path->slots[0];
if (slot >= btrfs_header_nritems(l)) { if (slot >= btrfs_header_nritems(l)) {
...@@ -2431,14 +2434,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -2431,14 +2434,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
} }
btrfs_item_key_to_cpu(l, &key, slot); btrfs_item_key_to_cpu(l, &key, slot);
if (key.objectid + key.offset <= logical) if (key.type != BTRFS_EXTENT_ITEM_KEY &&
key.type != BTRFS_METADATA_ITEM_KEY)
goto next;
if (key.type == BTRFS_METADATA_ITEM_KEY)
bytes = root->leafsize;
else
bytes = key.offset;
if (key.objectid + bytes <= logical)
goto next; goto next;
if (key.objectid >= logical + map->stripe_len) if (key.objectid >= logical + map->stripe_len)
break; break;
if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
goto next;
extent = btrfs_item_ptr(l, slot, extent = btrfs_item_ptr(l, slot,
struct btrfs_extent_item); struct btrfs_extent_item);
...@@ -2459,18 +2469,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -2459,18 +2469,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
* trim extent to this stripe * trim extent to this stripe
*/ */
if (key.objectid < logical) { if (key.objectid < logical) {
key.offset -= logical - key.objectid; bytes -= logical - key.objectid;
key.objectid = logical; key.objectid = logical;
} }
if (key.objectid + key.offset > if (key.objectid + bytes >
logical + map->stripe_len) { logical + map->stripe_len) {
key.offset = logical + map->stripe_len - bytes = logical + map->stripe_len -
key.objectid; key.objectid;
} }
extent_logical = key.objectid; extent_logical = key.objectid;
extent_physical = key.objectid - logical + physical; extent_physical = key.objectid - logical + physical;
extent_len = key.offset; extent_len = bytes;
extent_dev = scrub_dev; extent_dev = scrub_dev;
extent_mirror_num = mirror_num; extent_mirror_num = mirror_num;
if (is_dev_replace) if (is_dev_replace)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment