Commit f0486c68 authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason

Btrfs: Introduce contexts for metadata reservation

Introducing metadata reseravtion contexts has two major advantages.
First, it makes metadata reseravtion more traceable. Second, it can
reclaim freed space and re-add them to the itself after transaction
committed.

Besides add btrfs_block_rsv structure and related helper functions,
This patch contains following changes:

Move code that decides if freed tree block should be pinned into
btrfs_free_tree_block().

Make space accounting more accurate, mainly for handling read only
block groups.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 2ead6ae7
...@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, ...@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct extent_buffer *buf, struct extent_buffer *buf,
struct extent_buffer *cow) struct extent_buffer *cow,
int *last_ref)
{ {
u64 refs; u64 refs;
u64 owner; u64 owner;
...@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ...@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
BUG_ON(ret); BUG_ON(ret);
} }
clean_tree_block(trans, root, buf); clean_tree_block(trans, root, buf);
*last_ref = 1;
} }
return 0; return 0;
} }
...@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, ...@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key; struct btrfs_disk_key disk_key;
struct extent_buffer *cow; struct extent_buffer *cow;
int level; int level;
int last_ref = 0;
int unlock_orig = 0; int unlock_orig = 0;
u64 parent_start; u64 parent_start;
...@@ -442,7 +445,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, ...@@ -442,7 +445,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_fsid(cow), (unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE); BTRFS_FSID_SIZE);
update_ref_for_cow(trans, root, buf, cow); update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (buf == root->node) { if (buf == root->node) {
WARN_ON(parent && parent != buf); WARN_ON(parent && parent != buf);
...@@ -457,8 +460,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, ...@@ -457,8 +460,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
extent_buffer_get(cow); extent_buffer_get(cow);
spin_unlock(&root->node_lock); spin_unlock(&root->node_lock);
btrfs_free_tree_block(trans, root, buf->start, buf->len, btrfs_free_tree_block(trans, root, buf, parent_start,
parent_start, root->root_key.objectid, level); last_ref);
free_extent_buffer(buf); free_extent_buffer(buf);
add_root_to_dirty_list(root); add_root_to_dirty_list(root);
} else { } else {
...@@ -473,8 +476,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, ...@@ -473,8 +476,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot, btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid); trans->transid);
btrfs_mark_buffer_dirty(parent); btrfs_mark_buffer_dirty(parent);
btrfs_free_tree_block(trans, root, buf->start, buf->len, btrfs_free_tree_block(trans, root, buf, parent_start,
parent_start, root->root_key.objectid, level); last_ref);
} }
if (unlock_orig) if (unlock_orig)
btrfs_tree_unlock(buf); btrfs_tree_unlock(buf);
...@@ -949,6 +952,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, ...@@ -949,6 +952,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
return bin_search(eb, key, level, slot); return bin_search(eb, key, level, slot);
} }
static void root_add_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
btrfs_root_used(&root->root_item) + size);
spin_unlock(&root->accounting_lock);
}
static void root_sub_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
btrfs_root_used(&root->root_item) - size);
spin_unlock(&root->accounting_lock);
}
/* given a node and slot number, this reads the blocks it points to. The /* given a node and slot number, this reads the blocks it points to. The
* extent buffer is returned with a reference taken (but unlocked). * extent buffer is returned with a reference taken (but unlocked).
* NULL is returned on error. * NULL is returned on error.
...@@ -1019,7 +1038,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ...@@ -1019,7 +1038,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_lock(child); btrfs_tree_lock(child);
btrfs_set_lock_blocking(child); btrfs_set_lock_blocking(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
BUG_ON(ret); if (ret) {
btrfs_tree_unlock(child);
free_extent_buffer(child);
goto enospc;
}
spin_lock(&root->node_lock); spin_lock(&root->node_lock);
root->node = child; root->node = child;
...@@ -1034,11 +1057,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ...@@ -1034,11 +1057,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid); btrfs_tree_unlock(mid);
/* once for the path */ /* once for the path */
free_extent_buffer(mid); free_extent_buffer(mid);
ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
0, root->root_key.objectid, level); root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
/* once for the root ptr */ /* once for the root ptr */
free_extent_buffer(mid); free_extent_buffer(mid);
return ret; return 0;
} }
if (btrfs_header_nritems(mid) > if (btrfs_header_nritems(mid) >
BTRFS_NODEPTRS_PER_BLOCK(root) / 4) BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
...@@ -1088,23 +1112,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ...@@ -1088,23 +1112,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC) if (wret < 0 && wret != -ENOSPC)
ret = wret; ret = wret;
if (btrfs_header_nritems(right) == 0) { if (btrfs_header_nritems(right) == 0) {
u64 bytenr = right->start;
u32 blocksize = right->len;
clean_tree_block(trans, root, right); clean_tree_block(trans, root, right);
btrfs_tree_unlock(right); btrfs_tree_unlock(right);
free_extent_buffer(right);
right = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot + wret = del_ptr(trans, root, path, level + 1, pslot +
1); 1);
if (wret) if (wret)
ret = wret; ret = wret;
wret = btrfs_free_tree_block(trans, root, root_sub_used(root, right->len);
bytenr, blocksize, 0, btrfs_free_tree_block(trans, root, right, 0, 1);
root->root_key.objectid, free_extent_buffer(right);
level); right = NULL;
if (wret)
ret = wret;
} else { } else {
struct btrfs_disk_key right_key; struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0); btrfs_node_key(right, &right_key, 0);
...@@ -1136,21 +1153,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ...@@ -1136,21 +1153,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1); BUG_ON(wret == 1);
} }
if (btrfs_header_nritems(mid) == 0) { if (btrfs_header_nritems(mid) == 0) {
/* we've managed to empty the middle node, drop it */
u64 bytenr = mid->start;
u32 blocksize = mid->len;
clean_tree_block(trans, root, mid); clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid); btrfs_tree_unlock(mid);
free_extent_buffer(mid);
mid = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot); wret = del_ptr(trans, root, path, level + 1, pslot);
if (wret) if (wret)
ret = wret; ret = wret;
wret = btrfs_free_tree_block(trans, root, bytenr, blocksize, root_sub_used(root, mid->len);
0, root->root_key.objectid, level); btrfs_free_tree_block(trans, root, mid, 0, 1);
if (wret) free_extent_buffer(mid);
ret = wret; mid = NULL;
} else { } else {
/* update the parent key to reflect our changes */ /* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key; struct btrfs_disk_key mid_key;
...@@ -1740,7 +1751,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1740,7 +1751,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
p->nodes[level + 1], p->nodes[level + 1],
p->slots[level + 1], &b); p->slots[level + 1], &b);
if (err) { if (err) {
free_extent_buffer(b);
ret = err; ret = err;
goto done; goto done;
} }
...@@ -2076,6 +2086,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, ...@@ -2076,6 +2086,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
if (IS_ERR(c)) if (IS_ERR(c))
return PTR_ERR(c); return PTR_ERR(c);
root_add_used(root, root->nodesize);
memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_nritems(c, 1); btrfs_set_header_nritems(c, 1);
btrfs_set_header_level(c, level); btrfs_set_header_level(c, level);
...@@ -2134,6 +2146,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -2134,6 +2146,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
int nritems; int nritems;
BUG_ON(!path->nodes[level]); BUG_ON(!path->nodes[level]);
btrfs_assert_tree_locked(path->nodes[level]);
lower = path->nodes[level]; lower = path->nodes[level];
nritems = btrfs_header_nritems(lower); nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems); BUG_ON(slot > nritems);
...@@ -2202,6 +2215,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, ...@@ -2202,6 +2215,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
if (IS_ERR(split)) if (IS_ERR(split))
return PTR_ERR(split); return PTR_ERR(split);
root_add_used(root, root->nodesize);
memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_level(split, btrfs_header_level(c)); btrfs_set_header_level(split, btrfs_header_level(c));
btrfs_set_header_bytenr(split, split->start); btrfs_set_header_bytenr(split, split->start);
...@@ -2415,6 +2430,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, ...@@ -2415,6 +2430,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (left_nritems) if (left_nritems)
btrfs_mark_buffer_dirty(left); btrfs_mark_buffer_dirty(left);
else
clean_tree_block(trans, root, left);
btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(right);
btrfs_item_key(right, &disk_key, 0); btrfs_item_key(right, &disk_key, 0);
...@@ -2660,6 +2678,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, ...@@ -2660,6 +2678,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(left); btrfs_mark_buffer_dirty(left);
if (right_nritems) if (right_nritems)
btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(right);
else
clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0); btrfs_item_key(right, &disk_key, 0);
wret = fixup_low_keys(trans, root, path, &disk_key, 1); wret = fixup_low_keys(trans, root, path, &disk_key, 1);
...@@ -2669,8 +2689,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, ...@@ -2669,8 +2689,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
/* then fixup the leaf pointer in the path */ /* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) { if (path->slots[0] < push_items) {
path->slots[0] += old_left_nritems; path->slots[0] += old_left_nritems;
if (btrfs_header_nritems(path->nodes[0]) == 0)
clean_tree_block(trans, root, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]); btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]); free_extent_buffer(path->nodes[0]);
path->nodes[0] = left; path->nodes[0] = left;
...@@ -2932,10 +2950,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, ...@@ -2932,10 +2950,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
root->root_key.objectid, root->root_key.objectid,
&disk_key, 0, l->start, 0); &disk_key, 0, l->start, 0);
if (IS_ERR(right)) { if (IS_ERR(right))
BUG_ON(1);
return PTR_ERR(right); return PTR_ERR(right);
}
root_add_used(root, root->leafsize);
memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(right, right->start); btrfs_set_header_bytenr(right, right->start);
...@@ -3054,7 +3072,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, ...@@ -3054,7 +3072,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path); btrfs_set_path_blocking(path);
ret = split_leaf(trans, root, &key, path, ins_len, 1); ret = split_leaf(trans, root, &key, path, ins_len, 1);
BUG_ON(ret); if (ret)
goto err;
path->keep_locks = 0; path->keep_locks = 0;
btrfs_unlock_up_safe(path, 1); btrfs_unlock_up_safe(path, 1);
...@@ -3796,9 +3815,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, ...@@ -3796,9 +3815,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
*/ */
btrfs_unlock_up_safe(path, 0); btrfs_unlock_up_safe(path, 0);
ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len, root_sub_used(root, leaf->len);
0, root->root_key.objectid, 0);
return ret; btrfs_free_tree_block(trans, root, leaf, 0, 1);
return 0;
} }
/* /*
* delete the item at the leaf level in path. If that empties * delete the item at the leaf level in path. If that empties
...@@ -3865,6 +3885,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, ...@@ -3865,6 +3885,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (leaf == root->node) { if (leaf == root->node) {
btrfs_set_header_level(leaf, 0); btrfs_set_header_level(leaf, 0);
} else { } else {
btrfs_set_path_blocking(path);
clean_tree_block(trans, root, leaf);
ret = btrfs_del_leaf(trans, root, path, leaf); ret = btrfs_del_leaf(trans, root, path, leaf);
BUG_ON(ret); BUG_ON(ret);
} }
......
...@@ -707,6 +707,20 @@ struct btrfs_space_info { ...@@ -707,6 +707,20 @@ struct btrfs_space_info {
atomic_t caching_threads; atomic_t caching_threads;
}; };
struct btrfs_block_rsv {
u64 size;
u64 reserved;
u64 freed[2];
struct btrfs_space_info *space_info;
struct list_head list;
spinlock_t lock;
atomic_t usage;
unsigned int priority:8;
unsigned int durable:1;
unsigned int refill_used:1;
unsigned int full:1;
};
/* /*
* free clusters are used to claim free space in relatively large chunks, * free clusters are used to claim free space in relatively large chunks,
* allowing us to do less seeky writes. They are used for all metadata * allowing us to do less seeky writes. They are used for all metadata
...@@ -757,6 +771,7 @@ struct btrfs_block_group_cache { ...@@ -757,6 +771,7 @@ struct btrfs_block_group_cache {
spinlock_t lock; spinlock_t lock;
u64 pinned; u64 pinned;
u64 reserved; u64 reserved;
u64 reserved_pinned;
u64 bytes_super; u64 bytes_super;
u64 flags; u64 flags;
u64 sectorsize; u64 sectorsize;
...@@ -822,6 +837,22 @@ struct btrfs_fs_info { ...@@ -822,6 +837,22 @@ struct btrfs_fs_info {
/* logical->physical extent mapping */ /* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree; struct btrfs_mapping_tree mapping_tree;
/* block reservation for extent, checksum and root tree */
struct btrfs_block_rsv global_block_rsv;
/* block reservation for delay allocation */
struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */
struct btrfs_block_rsv chunk_block_rsv;
struct btrfs_block_rsv empty_block_rsv;
/* list of block reservations that cross multiple transactions */
struct list_head durable_block_rsv_list;
struct mutex durable_block_rsv_mutex;
u64 generation; u64 generation;
u64 last_trans_committed; u64 last_trans_committed;
...@@ -1008,6 +1039,9 @@ struct btrfs_root { ...@@ -1008,6 +1039,9 @@ struct btrfs_root {
struct completion kobj_unregister; struct completion kobj_unregister;
struct mutex objectid_mutex; struct mutex objectid_mutex;
spinlock_t accounting_lock;
struct btrfs_block_rsv *block_rsv;
struct mutex log_mutex; struct mutex log_mutex;
wait_queue_head_t log_writer_wait; wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2]; wait_queue_head_t log_commit_wait[2];
...@@ -1980,10 +2014,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ...@@ -1980,10 +2014,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid, u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level, struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size); u64 hint, u64 empty_size);
int btrfs_free_tree_block(struct btrfs_trans_handle *trans, void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u32 blocksize, struct extent_buffer *buf,
u64 parent, u64 root_objectid, int level); u64 parent, int last_ref);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u32 blocksize, u64 bytenr, u32 blocksize,
...@@ -2037,9 +2071,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ...@@ -2037,9 +2071,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 size); u64 size);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans, int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start); struct btrfs_root *root, u64 group_start);
int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
struct btrfs_block_group_cache *group);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info); void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
...@@ -2058,6 +2089,30 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, ...@@ -2058,6 +2089,30 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes); u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes); u64 bytes);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int *retries);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved, int min_factor);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
/* ctree.c */ /* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot); int level, int *slot);
......
...@@ -903,6 +903,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, ...@@ -903,6 +903,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->name = NULL; root->name = NULL;
root->in_sysfs = 0; root->in_sysfs = 0;
root->inode_tree = RB_ROOT; root->inode_tree = RB_ROOT;
root->block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list); INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list); INIT_LIST_HEAD(&root->orphan_list);
...@@ -910,6 +911,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, ...@@ -910,6 +911,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->node_lock); spin_lock_init(&root->node_lock);
spin_lock_init(&root->list_lock); spin_lock_init(&root->list_lock);
spin_lock_init(&root->inode_lock); spin_lock_init(&root->inode_lock);
spin_lock_init(&root->accounting_lock);
mutex_init(&root->objectid_mutex); mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex); mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_writer_wait);
...@@ -1620,6 +1622,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1620,6 +1622,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->space_info);
btrfs_mapping_init(&fs_info->mapping_tree); btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv);
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
btrfs_init_block_rsv(&fs_info->trans_block_rsv);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
btrfs_init_block_rsv(&fs_info->empty_block_rsv);
INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
mutex_init(&fs_info->durable_block_rsv_mutex);
atomic_set(&fs_info->nr_async_submits, 0); atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->async_submit_draining, 0);
......
...@@ -35,10 +35,9 @@ ...@@ -35,10 +35,9 @@
static int update_block_group(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc, u64 bytenr, u64 num_bytes, int alloc);
int mark_free); static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
static int update_reserved_extents(struct btrfs_block_group_cache *cache, u64 num_bytes, int reserve, int sinfo);
u64 num_bytes, int reserve);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans, static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 bytenr, u64 num_bytes, u64 parent,
...@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ...@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
static int do_chunk_alloc(struct btrfs_trans_handle *trans, static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes, struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force); u64 flags, int force);
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 bytenr, u64 num_bytes,
int is_data, int reserved,
struct extent_buffer **must_clean);
static int find_next_key(struct btrfs_path *path, int level, static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key); struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes, static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
...@@ -97,8 +90,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache) ...@@ -97,8 +90,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
void btrfs_put_block_group(struct btrfs_block_group_cache *cache) void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
{ {
if (atomic_dec_and_test(&cache->count)) if (atomic_dec_and_test(&cache->count)) {
WARN_ON(cache->pinned > 0);
WARN_ON(cache->reserved > 0);
WARN_ON(cache->reserved_pinned > 0);
kfree(cache); kfree(cache);
}
} }
/* /*
...@@ -325,7 +322,7 @@ static int caching_kthread(void *data) ...@@ -325,7 +322,7 @@ static int caching_kthread(void *data)
exclude_super_stripes(extent_root, block_group); exclude_super_stripes(extent_root, block_group);
spin_lock(&block_group->space_info->lock); spin_lock(&block_group->space_info->lock);
block_group->space_info->bytes_super += block_group->bytes_super; block_group->space_info->bytes_readonly += block_group->bytes_super;
spin_unlock(&block_group->space_info->lock); spin_unlock(&block_group->space_info->lock);
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
...@@ -1880,7 +1877,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, ...@@ -1880,7 +1877,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
/* helper function to actually process a single delayed ref entry */ /* helper function to actually process a single delayed ref entry */
static int run_one_delayed_ref(struct btrfs_trans_handle *trans, static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
...@@ -1900,32 +1896,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, ...@@ -1900,32 +1896,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
BUG_ON(extent_op); BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node); head = btrfs_delayed_node_to_head(node);
if (insert_reserved) { if (insert_reserved) {
int mark_free = 0; btrfs_pin_extent(root, node->bytenr,
struct extent_buffer *must_clean = NULL; node->num_bytes, 1);
ret = pin_down_bytes(trans, root, NULL,
node->bytenr, node->num_bytes,
head->is_data, 1, &must_clean);
if (ret > 0)
mark_free = 1;
if (must_clean) {
clean_tree_block(NULL, root, must_clean);
btrfs_tree_unlock(must_clean);
free_extent_buffer(must_clean);
}
if (head->is_data) { if (head->is_data) {
ret = btrfs_del_csums(trans, root, ret = btrfs_del_csums(trans, root,
node->bytenr, node->bytenr,
node->num_bytes); node->num_bytes);
BUG_ON(ret); BUG_ON(ret);
} }
if (mark_free) {
ret = btrfs_free_reserved_extent(root,
node->bytenr,
node->num_bytes);
BUG_ON(ret);
}
} }
mutex_unlock(&head->mutex); mutex_unlock(&head->mutex);
return 0; return 0;
...@@ -2356,6 +2334,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, ...@@ -2356,6 +2334,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
ret = 0; ret = 0;
out: out:
btrfs_free_path(path); btrfs_free_path(path);
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
WARN_ON(ret > 0);
return ret; return ret;
} }
...@@ -2706,7 +2686,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, ...@@ -2706,7 +2686,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_pinned = 0; found->bytes_pinned = 0;
found->bytes_reserved = 0; found->bytes_reserved = 0;
found->bytes_readonly = 0; found->bytes_readonly = 0;
found->bytes_delalloc = 0; found->bytes_may_use = 0;
found->full = 0; found->full = 0;
found->force_alloc = 0; found->force_alloc = 0;
*space_info = found; *space_info = found;
...@@ -2731,19 +2711,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) ...@@ -2731,19 +2711,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
} }
} }
static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
{
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
if (!cache->ro) {
cache->space_info->bytes_readonly += cache->key.offset -
btrfs_block_group_used(&cache->item);
cache->ro = 1;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
}
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{ {
u64 num_devices = root->fs_info->fs_devices->rw_devices; u64 num_devices = root->fs_info->fs_devices->rw_devices;
...@@ -2802,11 +2769,8 @@ static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) ...@@ -2802,11 +2769,8 @@ static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
{ {
u64 alloc_target;
alloc_target = btrfs_get_alloc_profile(root, 1);
BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
alloc_target); BTRFS_BLOCK_GROUP_DATA);
} }
static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
...@@ -3412,10 +3376,334 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, ...@@ -3412,10 +3376,334 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
return reclaimed >= to_reclaim; return reclaimed >= to_reclaim;
} }
static int should_retry_reserve(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int *retries)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
int ret;
if ((*retries) > 2)
return -ENOSPC;
ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
if (ret)
return 1;
if (trans && trans->transaction->in_commit)
return -ENOSPC;
ret = shrink_delalloc(trans, root, space_info, num_bytes);
if (ret)
return ret;
spin_lock(&space_info->lock);
if (space_info->bytes_pinned < num_bytes)
ret = 1;
spin_unlock(&space_info->lock);
if (ret)
return -ENOSPC;
(*retries)++;
if (trans)
return -EAGAIN;
trans = btrfs_join_transaction(root, 1);
BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
return 1;
}
static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
u64 unused;
int ret = -ENOSPC;
spin_lock(&space_info->lock);
unused = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
if (unused < space_info->total_bytes)
unused = space_info->total_bytes - unused;
else
unused = 0;
if (unused >= num_bytes) {
if (block_rsv->priority >= 10) {
space_info->bytes_reserved += num_bytes;
ret = 0;
} else {
if ((unused + block_rsv->reserved) *
block_rsv->priority >=
(num_bytes + block_rsv->reserved) * 10) {
space_info->bytes_reserved += num_bytes;
ret = 0;
}
}
}
spin_unlock(&space_info->lock);
return ret;
}
static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
if (root->ref_cows)
block_rsv = trans->block_rsv;
else
block_rsv = root->block_rsv;
if (!block_rsv)
block_rsv = &root->fs_info->empty_block_rsv;
return block_rsv;
}
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
int ret = -ENOSPC;
spin_lock(&block_rsv->lock);
if (block_rsv->reserved >= num_bytes) {
block_rsv->reserved -= num_bytes;
if (block_rsv->reserved < block_rsv->size)
block_rsv->full = 0;
ret = 0;
}
spin_unlock(&block_rsv->lock);
return ret;
}
static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int update_size)
{
spin_lock(&block_rsv->lock);
block_rsv->reserved += num_bytes;
if (update_size)
block_rsv->size += num_bytes;
else if (block_rsv->reserved >= block_rsv->size)
block_rsv->full = 1;
spin_unlock(&block_rsv->lock);
}
void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
spin_lock(&block_rsv->lock);
if (num_bytes == (u64)-1)
num_bytes = block_rsv->size;
block_rsv->size -= num_bytes;
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
} else {
num_bytes = 0;
}
spin_unlock(&block_rsv->lock);
if (num_bytes > 0) {
if (dest) {
block_rsv_add_bytes(dest, num_bytes, 0);
} else {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
spin_unlock(&space_info->lock);
}
}
}
static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
struct btrfs_block_rsv *dst, u64 num_bytes)
{
int ret;
ret = block_rsv_use_bytes(src, num_bytes);
if (ret)
return ret;
block_rsv_add_bytes(dst, num_bytes, 1);
return 0;
}
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
{
memset(rsv, 0, sizeof(*rsv));
spin_lock_init(&rsv->lock);
atomic_set(&rsv->usage, 1);
rsv->priority = 6;
INIT_LIST_HEAD(&rsv->list);
}
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 alloc_target;
block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
if (!block_rsv)
return NULL;
btrfs_init_block_rsv(block_rsv);
alloc_target = btrfs_get_alloc_profile(root, 0);
block_rsv->space_info = __find_space_info(fs_info,
BTRFS_BLOCK_GROUP_METADATA);
return block_rsv;
}
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv)
{
if (rsv && atomic_dec_and_test(&rsv->usage)) {
btrfs_block_rsv_release(root, rsv, (u64)-1);
if (!rsv->durable)
kfree(rsv);
}
}
/*
* make the block_rsv struct be able to capture freed space.
* the captured space will re-add to the the block_rsv struct
* after transaction commit
*/
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv)
{
block_rsv->durable = 1;
mutex_lock(&fs_info->durable_block_rsv_mutex);
list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
mutex_unlock(&fs_info->durable_block_rsv_mutex);
}
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int *retries)
{
int ret;
if (num_bytes == 0)
return 0;
again:
ret = reserve_metadata_bytes(block_rsv, num_bytes);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
}
ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
if (ret > 0)
goto again;
return ret;
}
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved, int min_factor)
{
u64 num_bytes = 0;
int commit_trans = 0;
int ret = -ENOSPC;
if (!block_rsv)
return 0;
spin_lock(&block_rsv->lock);
if (min_factor > 0)
num_bytes = div_factor(block_rsv->size, min_factor);
if (min_reserved > num_bytes)
num_bytes = min_reserved;
if (block_rsv->reserved >= num_bytes) {
ret = 0;
} else {
num_bytes -= block_rsv->reserved;
if (block_rsv->durable &&
block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
commit_trans = 1;
}
spin_unlock(&block_rsv->lock);
if (!ret)
return 0;
if (block_rsv->refill_used) {
ret = reserve_metadata_bytes(block_rsv, num_bytes);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
}
}
if (commit_trans) {
if (trans)
return -EAGAIN;
trans = btrfs_join_transaction(root, 1);
BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
return 0;
}
WARN_ON(1);
printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
block_rsv->size, block_rsv->reserved,
block_rsv->freed[0], block_rsv->freed[1]);
return -ENOSPC;
}
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes)
{
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
if (global_rsv->full || global_rsv == block_rsv ||
block_rsv->space_info != global_rsv->space_info)
global_rsv = NULL;
block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
}
static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *space_info;
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
fs_info->chunk_block_rsv.space_info = space_info;
fs_info->chunk_block_rsv.priority = 10;
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
fs_info->trans_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.priority = 10;
fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
}
static int update_block_group(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc, u64 bytenr, u64 num_bytes, int alloc)
int mark_free)
{ {
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *info = root->fs_info; struct btrfs_fs_info *info = root->fs_info;
...@@ -3459,30 +3747,21 @@ static int update_block_group(struct btrfs_trans_handle *trans, ...@@ -3459,30 +3747,21 @@ static int update_block_group(struct btrfs_trans_handle *trans,
cache->space_info->bytes_reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes;
cache->space_info->bytes_used += num_bytes; cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor; cache->space_info->disk_used += num_bytes * factor;
if (cache->ro)
cache->space_info->bytes_readonly -= num_bytes;
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
} else { } else {
old_val -= num_bytes; old_val -= num_bytes;
btrfs_set_block_group_used(&cache->item, old_val); btrfs_set_block_group_used(&cache->item, old_val);
cache->pinned += num_bytes;
cache->space_info->bytes_pinned += num_bytes;
cache->space_info->bytes_used -= num_bytes; cache->space_info->bytes_used -= num_bytes;
cache->space_info->disk_used -= num_bytes * factor; cache->space_info->disk_used -= num_bytes * factor;
if (cache->ro)
cache->space_info->bytes_readonly += num_bytes;
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
if (mark_free) {
int ret;
ret = btrfs_discard_extent(root, bytenr,
num_bytes);
WARN_ON(ret);
ret = btrfs_add_free_space(cache, bytenr, set_extent_dirty(info->pinned_extents,
num_bytes); bytenr, bytenr + num_bytes - 1,
WARN_ON(ret); GFP_NOFS | __GFP_NOFAIL);
}
} }
btrfs_put_block_group(cache); btrfs_put_block_group(cache);
total -= num_bytes; total -= num_bytes;
...@@ -3506,18 +3785,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) ...@@ -3506,18 +3785,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
return bytenr; return bytenr;
} }
/* static int pin_down_extent(struct btrfs_root *root,
* this function must be called within transaction struct btrfs_block_group_cache *cache,
*/
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int reserved) u64 bytenr, u64 num_bytes, int reserved)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *cache;
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache);
spin_lock(&cache->space_info->lock); spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock); spin_lock(&cache->lock);
cache->pinned += num_bytes; cache->pinned += num_bytes;
...@@ -3529,28 +3800,68 @@ int btrfs_pin_extent(struct btrfs_root *root, ...@@ -3529,28 +3800,68 @@ int btrfs_pin_extent(struct btrfs_root *root,
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
btrfs_put_block_group(cache); set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
return 0;
}
set_extent_dirty(fs_info->pinned_extents, /*
bytenr, bytenr + num_bytes - 1, GFP_NOFS); * this function must be called within transaction
*/
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int reserved)
{
struct btrfs_block_group_cache *cache;
cache = btrfs_lookup_block_group(root->fs_info, bytenr);
BUG_ON(!cache);
pin_down_extent(root, cache, bytenr, num_bytes, reserved);
btrfs_put_block_group(cache);
return 0; return 0;
} }
static int update_reserved_extents(struct btrfs_block_group_cache *cache, /*
u64 num_bytes, int reserve) * update size of reserved extents. this function may return -EAGAIN
* if 'reserve' is true or 'sinfo' is false.
*/
static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve, int sinfo)
{ {
spin_lock(&cache->space_info->lock); int ret = 0;
if (sinfo) {
struct btrfs_space_info *space_info = cache->space_info;
spin_lock(&space_info->lock);
spin_lock(&cache->lock); spin_lock(&cache->lock);
if (reserve) { if (reserve) {
if (cache->ro) {
ret = -EAGAIN;
} else {
cache->reserved += num_bytes; cache->reserved += num_bytes;
cache->space_info->bytes_reserved += num_bytes; space_info->bytes_reserved += num_bytes;
}
} else { } else {
if (cache->ro)
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes; cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes; space_info->bytes_reserved -= num_bytes;
} }
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&space_info->lock);
return 0; } else {
spin_lock(&cache->lock);
if (cache->ro) {
ret = -EAGAIN;
} else {
if (reserve)
cache->reserved += num_bytes;
else
cache->reserved -= num_bytes;
}
spin_unlock(&cache->lock);
}
return ret;
} }
int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
...@@ -3607,14 +3918,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) ...@@ -3607,14 +3918,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
btrfs_add_free_space(cache, start, len); btrfs_add_free_space(cache, start, len);
} }
start += len;
spin_lock(&cache->space_info->lock); spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock); spin_lock(&cache->lock);
cache->pinned -= len; cache->pinned -= len;
cache->space_info->bytes_pinned -= len; cache->space_info->bytes_pinned -= len;
if (cache->ro) {
cache->space_info->bytes_readonly += len;
} else if (cache->reserved_pinned > 0) {
len = min(len, cache->reserved_pinned);
cache->reserved_pinned -= len;
cache->space_info->bytes_reserved += len;
}
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
start += len;
} }
if (cache) if (cache)
...@@ -3627,8 +3945,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ...@@ -3627,8 +3945,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
{ {
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *unpin; struct extent_io_tree *unpin;
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *next_rsv;
u64 start; u64 start;
u64 end; u64 end;
int idx;
int ret; int ret;
if (fs_info->pinned_extents == &fs_info->freed_extents[0]) if (fs_info->pinned_extents == &fs_info->freed_extents[0])
...@@ -3638,70 +3959,41 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ...@@ -3638,70 +3959,41 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
while (1) { while (1) {
ret = find_first_extent_bit(unpin, 0, &start, &end, ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY); EXTENT_DIRTY);
if (ret) if (ret)
break; break;
ret = btrfs_discard_extent(root, start, end + 1 - start);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
unpin_extent_range(root, start, end);
cond_resched();
}
return ret;
}
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 bytenr, u64 num_bytes,
int is_data, int reserved,
struct extent_buffer **must_clean)
{
int err = 0;
struct extent_buffer *buf;
if (is_data) ret = btrfs_discard_extent(root, start, end + 1 - start);
goto pinit;
/* clear_extent_dirty(unpin, start, end, GFP_NOFS);
* discard is sloooow, and so triggering discards on unpin_extent_range(root, start, end);
* individual btree blocks isn't a good plan. Just cond_resched();
* pin everything in discard mode. }
*/
if (btrfs_test_opt(root, DISCARD))
goto pinit;
buf = btrfs_find_tree_block(root, bytenr, num_bytes); mutex_lock(&fs_info->durable_block_rsv_mutex);
if (!buf) list_for_each_entry_safe(block_rsv, next_rsv,
goto pinit; &fs_info->durable_block_rsv_list, list) {
/* we can reuse a block if it hasn't been written idx = trans->transid & 0x1;
* and it is from this transaction. We can't if (block_rsv->freed[idx] > 0) {
* reuse anything from the tree log root because block_rsv_add_bytes(block_rsv,
* it has tiny sub-transactions. block_rsv->freed[idx], 0);
*/ block_rsv->freed[idx] = 0;
if (btrfs_buffer_uptodate(buf, 0) &&
btrfs_try_tree_lock(buf)) {
u64 header_owner = btrfs_header_owner(buf);
u64 header_transid = btrfs_header_generation(buf);
if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
header_transid == trans->transid &&
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
*must_clean = buf;
return 1;
} }
btrfs_tree_unlock(buf); if (atomic_read(&block_rsv->usage) == 0) {
btrfs_block_rsv_release(root, block_rsv, (u64)-1);
if (block_rsv->freed[0] == 0 &&
block_rsv->freed[1] == 0) {
list_del_init(&block_rsv->list);
kfree(block_rsv);
} }
free_extent_buffer(buf); } else {
pinit: btrfs_block_rsv_release(root, block_rsv, 0);
if (path) }
btrfs_set_path_blocking(path); }
/* unlocks the pinned mutex */ mutex_unlock(&fs_info->durable_block_rsv_mutex);
btrfs_pin_extent(root, bytenr, num_bytes, reserved);
BUG_ON(err < 0);
return 0; return 0;
} }
...@@ -3862,9 +4154,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ...@@ -3862,9 +4154,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
BUG_ON(ret); BUG_ON(ret);
} }
} else { } else {
int mark_free = 0;
struct extent_buffer *must_clean = NULL;
if (found_extent) { if (found_extent) {
BUG_ON(is_data && refs_to_drop != BUG_ON(is_data && refs_to_drop !=
extent_data_ref_count(root, path, iref)); extent_data_ref_count(root, path, iref));
...@@ -3877,31 +4166,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ...@@ -3877,31 +4166,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
} }
} }
ret = pin_down_bytes(trans, root, path, bytenr,
num_bytes, is_data, 0, &must_clean);
if (ret > 0)
mark_free = 1;
BUG_ON(ret < 0);
/*
* it is going to be very rare for someone to be waiting
* on the block we're freeing. del_items might need to
* schedule, so rather than get fancy, just force it
* to blocking here
*/
if (must_clean)
btrfs_set_lock_blocking(must_clean);
ret = btrfs_del_items(trans, extent_root, path, path->slots[0], ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del); num_to_del);
BUG_ON(ret); BUG_ON(ret);
btrfs_release_path(extent_root, path); btrfs_release_path(extent_root, path);
if (must_clean) {
clean_tree_block(NULL, root, must_clean);
btrfs_tree_unlock(must_clean);
free_extent_buffer(must_clean);
}
if (is_data) { if (is_data) {
ret = btrfs_del_csums(trans, root, bytenr, num_bytes); ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
BUG_ON(ret); BUG_ON(ret);
...@@ -3911,8 +4180,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ...@@ -3911,8 +4180,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
(bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
} }
ret = update_block_group(trans, root, bytenr, num_bytes, 0, ret = update_block_group(trans, root, bytenr, num_bytes, 0);
mark_free);
BUG_ON(ret); BUG_ON(ret);
} }
btrfs_free_path(path); btrfs_free_path(path);
...@@ -3920,7 +4188,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ...@@ -3920,7 +4188,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
} }
/* /*
* when we free an extent, it is possible (and likely) that we free the last * when we free an block, it is possible (and likely) that we free the last
* delayed ref for that extent as well. This searches the delayed ref tree for * delayed ref for that extent as well. This searches the delayed ref tree for
* a given extent, and if there are no other delayed refs to be processed, it * a given extent, and if there are no other delayed refs to be processed, it
* removes it from the tree. * removes it from the tree.
...@@ -3932,7 +4200,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, ...@@ -3932,7 +4200,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref; struct btrfs_delayed_ref_node *ref;
struct rb_node *node; struct rb_node *node;
int ret; int ret = 0;
delayed_refs = &trans->transaction->delayed_refs; delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
...@@ -3984,17 +4252,99 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, ...@@ -3984,17 +4252,99 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
list_del_init(&head->cluster); list_del_init(&head->cluster);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
ret = run_one_delayed_ref(trans, root->fs_info->tree_root, BUG_ON(head->extent_op);
&head->node, head->extent_op, if (head->must_insert_reserved)
head->must_insert_reserved); ret = 1;
BUG_ON(ret);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node); btrfs_put_delayed_ref(&head->node);
return 0; return ret;
out: out:
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
return 0; return 0;
} }
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_group_cache *cache = NULL;
int ret;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
parent, root->root_key.objectid,
btrfs_header_level(buf),
BTRFS_DROP_DELAYED_REF, NULL);
BUG_ON(ret);
}
if (!last_ref)
return;
block_rsv = get_block_rsv(trans, root);
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
BUG_ON(block_rsv->space_info != cache->space_info);
if (btrfs_header_generation(buf) == trans->transid) {
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = check_ref_cleanup(trans, root, buf->start);
if (!ret)
goto pin;
}
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
pin_down_extent(root, cache, buf->start, buf->len, 1);
goto pin;
}
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(cache, buf->start, buf->len);
ret = update_reserved_bytes(cache, buf->len, 0, 0);
if (ret == -EAGAIN) {
/* block group became read-only */
update_reserved_bytes(cache, buf->len, 0, 1);
goto out;
}
ret = 1;
spin_lock(&block_rsv->lock);
if (block_rsv->reserved < block_rsv->size) {
block_rsv->reserved += buf->len;
ret = 0;
}
spin_unlock(&block_rsv->lock);
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
spin_unlock(&cache->space_info->lock);
}
goto out;
}
pin:
if (block_rsv->durable && !cache->ro) {
ret = 0;
spin_lock(&cache->lock);
if (!cache->ro) {
cache->reserved_pinned += buf->len;
ret = 1;
}
spin_unlock(&cache->lock);
if (ret) {
spin_lock(&block_rsv->lock);
block_rsv->freed[trans->transid & 0x1] += buf->len;
spin_unlock(&block_rsv->lock);
}
}
out:
btrfs_put_block_group(cache);
}
int btrfs_free_extent(struct btrfs_trans_handle *trans, int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 bytenr, u64 num_bytes, u64 parent,
...@@ -4016,8 +4366,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, ...@@ -4016,8 +4366,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
parent, root_objectid, (int)owner, parent, root_objectid, (int)owner,
BTRFS_DROP_DELAYED_REF, NULL); BTRFS_DROP_DELAYED_REF, NULL);
BUG_ON(ret); BUG_ON(ret);
ret = check_ref_cleanup(trans, root, bytenr);
BUG_ON(ret);
} else { } else {
ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
parent, root_objectid, owner, parent, root_objectid, owner,
...@@ -4027,21 +4375,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, ...@@ -4027,21 +4375,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
u64 parent, u64 root_objectid, int level)
{
u64 used;
spin_lock(&root->node_lock);
used = btrfs_root_used(&root->root_item) - blocksize;
btrfs_set_root_used(&root->root_item, used);
spin_unlock(&root->node_lock);
return btrfs_free_extent(trans, root, bytenr, blocksize,
parent, root_objectid, level, 0);
}
static u64 stripe_align(struct btrfs_root *root, u64 val) static u64 stripe_align(struct btrfs_root *root, u64 val)
{ {
u64 mask = ((u64)root->stripesize - 1); u64 mask = ((u64)root->stripesize - 1);
...@@ -4131,7 +4464,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -4131,7 +4464,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 empty_size, u64 num_bytes, u64 empty_size,
u64 search_start, u64 search_end, u64 search_start, u64 search_end,
u64 hint_byte, struct btrfs_key *ins, u64 hint_byte, struct btrfs_key *ins,
u64 exclude_start, u64 exclude_nr,
int data) int data)
{ {
int ret = 0; int ret = 0;
...@@ -4143,8 +4475,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -4143,8 +4475,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int done_chunk_alloc = 0; int done_chunk_alloc = 0;
struct btrfs_space_info *space_info; struct btrfs_space_info *space_info;
int last_ptr_loop = 0; int last_ptr_loop = 0;
int index = 0;
int loop = 0; int loop = 0;
int index = 0;
bool found_uncached_bg = false; bool found_uncached_bg = false;
bool failed_cluster_refill = false; bool failed_cluster_refill = false;
bool failed_alloc = false; bool failed_alloc = false;
...@@ -4415,23 +4747,22 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -4415,23 +4747,22 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
goto loop; goto loop;
} }
if (exclude_nr > 0 && ins->objectid = search_start;
(search_start + num_bytes > exclude_start && ins->offset = num_bytes;
search_start < exclude_start + exclude_nr)) {
search_start = exclude_start + exclude_nr; if (offset < search_start)
btrfs_add_free_space(block_group, offset,
search_start - offset);
BUG_ON(offset > search_start);
ret = update_reserved_bytes(block_group, num_bytes, 1,
(data & BTRFS_BLOCK_GROUP_DATA));
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes); btrfs_add_free_space(block_group, offset, num_bytes);
/*
* if search_start is still in this block group
* then we just re-search this block group
*/
if (search_start >= block_group->key.objectid &&
search_start < (block_group->key.objectid +
block_group->key.offset))
goto have_block_group;
goto loop; goto loop;
} }
/* we are all good, lets return */
ins->objectid = search_start; ins->objectid = search_start;
ins->offset = num_bytes; ins->offset = num_bytes;
...@@ -4439,10 +4770,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -4439,10 +4770,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
btrfs_add_free_space(block_group, offset, btrfs_add_free_space(block_group, offset,
search_start - offset); search_start - offset);
BUG_ON(offset > search_start); BUG_ON(offset > search_start);
update_reserved_extents(block_group, num_bytes, 1);
/* we are all good, lets return */
break; break;
loop: loop:
failed_cluster_refill = false; failed_cluster_refill = false;
...@@ -4616,9 +4943,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, ...@@ -4616,9 +4943,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
WARN_ON(num_bytes < root->sectorsize); WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(trans, root, num_bytes, empty_size, ret = find_free_extent(trans, root, num_bytes, empty_size,
search_start, search_end, hint_byte, ins, search_start, search_end, hint_byte,
trans->alloc_exclude_start, ins, data);
trans->alloc_exclude_nr, data);
if (ret == -ENOSPC && num_bytes > min_alloc_size) { if (ret == -ENOSPC && num_bytes > min_alloc_size) {
num_bytes = num_bytes >> 1; num_bytes = num_bytes >> 1;
...@@ -4656,7 +4982,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) ...@@ -4656,7 +4982,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
ret = btrfs_discard_extent(root, start, len); ret = btrfs_discard_extent(root, start, len);
btrfs_add_free_space(cache, start, len); btrfs_add_free_space(cache, start, len);
update_reserved_extents(cache, len, 0); update_reserved_bytes(cache, len, 0, 1);
btrfs_put_block_group(cache); btrfs_put_block_group(cache);
return ret; return ret;
...@@ -4719,8 +5045,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ...@@ -4719,8 +5045,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path); btrfs_free_path(path);
ret = update_block_group(trans, root, ins->objectid, ins->offset, ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
1, 0);
if (ret) { if (ret) {
printk(KERN_ERR "btrfs update block group failed for %llu " printk(KERN_ERR "btrfs update block group failed for %llu "
"%llu\n", (unsigned long long)ins->objectid, "%llu\n", (unsigned long long)ins->objectid,
...@@ -4780,8 +5105,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ...@@ -4780,8 +5105,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path); btrfs_free_path(path);
ret = update_block_group(trans, root, ins->objectid, ins->offset, ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
1, 0);
if (ret) { if (ret) {
printk(KERN_ERR "btrfs update block group failed for %llu " printk(KERN_ERR "btrfs update block group failed for %llu "
"%llu\n", (unsigned long long)ins->objectid, "%llu\n", (unsigned long long)ins->objectid,
...@@ -4857,73 +5181,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, ...@@ -4857,73 +5181,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
put_caching_control(caching_ctl); put_caching_control(caching_ctl);
} }
update_reserved_extents(block_group, ins->offset, 1); ret = update_reserved_bytes(block_group, ins->offset, 1, 1);
BUG_ON(ret);
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1); 0, owner, offset, ins, 1);
return ret; return ret;
} }
/*
* finds a free extent and does all the dirty work required for allocation
* returns the key for the extent through ins, and a tree buffer for
* the first block of the extent through buf.
*
* returns 0 if everything worked, non-zero otherwise.
*/
static int alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 empty_size, u64 hint_byte, u64 search_end,
struct btrfs_key *ins)
{
int ret;
u64 flags = 0;
ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
empty_size, hint_byte, search_end,
ins, 0);
if (ret)
return ret;
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0)
parent = ins->objectid;
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else
BUG_ON(parent > 0);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
BUG_ON(!extent_op);
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
else
memset(&extent_op->key, 0, sizeof(extent_op->key));
extent_op->flags_to_set = flags;
extent_op->update_key = 1;
extent_op->update_flags = 1;
extent_op->is_data = 0;
ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
ins->offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op);
BUG_ON(ret);
}
if (root_objectid == root->root_key.objectid) {
u64 used;
spin_lock(&root->node_lock);
used = btrfs_root_used(&root->root_item) + num_bytes;
btrfs_set_root_used(&root->root_item, used);
spin_unlock(&root->node_lock);
}
return ret;
}
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u32 blocksize, u64 bytenr, u32 blocksize,
...@@ -4962,8 +5227,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, ...@@ -4962,8 +5227,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
return buf; return buf;
} }
static struct btrfs_block_rsv *
use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize)
{
struct btrfs_block_rsv *block_rsv;
int ret;
block_rsv = get_block_rsv(trans, root);
if (block_rsv->size == 0) {
ret = reserve_metadata_bytes(block_rsv, blocksize);
if (ret)
return ERR_PTR(ret);
return block_rsv;
}
ret = block_rsv_use_bytes(block_rsv, blocksize);
if (!ret)
return block_rsv;
WARN_ON(1);
printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
block_rsv->size, block_rsv->reserved,
block_rsv->freed[0], block_rsv->freed[1]);
return ERR_PTR(-ENOSPC);
}
static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
{
block_rsv_add_bytes(block_rsv, blocksize, 0);
block_rsv_release_bytes(block_rsv, NULL, 0);
}
/* /*
* helper function to allocate a block for a given tree * finds a free extent and does all the dirty work required for allocation
* returns the key for the extent through ins, and a tree buffer for
* the first block of the extent through buf.
*
* returns the tree buffer or NULL. * returns the tree buffer or NULL.
*/ */
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
...@@ -4973,18 +5275,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ...@@ -4973,18 +5275,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 hint, u64 empty_size) u64 hint, u64 empty_size)
{ {
struct btrfs_key ins; struct btrfs_key ins;
int ret; struct btrfs_block_rsv *block_rsv;
struct extent_buffer *buf; struct extent_buffer *buf;
u64 flags = 0;
int ret;
ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
key, level, empty_size, hint, (u64)-1, &ins); block_rsv = use_block_rsv(trans, root, blocksize);
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
empty_size, hint, (u64)-1, &ins, 0);
if (ret) { if (ret) {
BUG_ON(ret > 0); unuse_block_rsv(block_rsv, blocksize);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
buf = btrfs_init_new_buffer(trans, root, ins.objectid, buf = btrfs_init_new_buffer(trans, root, ins.objectid,
blocksize, level); blocksize, level);
BUG_ON(IS_ERR(buf));
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0)
parent = ins.objectid;
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else
BUG_ON(parent > 0);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
BUG_ON(!extent_op);
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
else
memset(&extent_op->key, 0, sizeof(extent_op->key));
extent_op->flags_to_set = flags;
extent_op->update_key = 1;
extent_op->update_flags = 1;
extent_op->is_data = 0;
ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
ins.offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op);
BUG_ON(ret);
}
return buf; return buf;
} }
...@@ -5309,7 +5646,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ...@@ -5309,7 +5646,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
struct btrfs_path *path, struct btrfs_path *path,
struct walk_control *wc) struct walk_control *wc)
{ {
int ret = 0; int ret;
int level = wc->level; int level = wc->level;
struct extent_buffer *eb = path->nodes[level]; struct extent_buffer *eb = path->nodes[level];
u64 parent = 0; u64 parent = 0;
...@@ -5387,13 +5724,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ...@@ -5387,13 +5724,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_owner(path->nodes[level + 1])); btrfs_header_owner(path->nodes[level + 1]));
} }
ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
root->root_key.objectid, level, 0);
BUG_ON(ret);
out: out:
wc->refs[level] = 0; wc->refs[level] = 0;
wc->flags[level] = 0; wc->flags[level] = 0;
return ret; return 0;
} }
static noinline int walk_down_tree(struct btrfs_trans_handle *trans, static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
...@@ -7216,48 +7551,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) ...@@ -7216,48 +7551,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
return flags; return flags;
} }
static int __alloc_chunk_for_shrink(struct btrfs_root *root, static int set_block_group_ro(struct btrfs_block_group_cache *cache)
struct btrfs_block_group_cache *shrink_block_group,
int force)
{ {
struct btrfs_trans_handle *trans; struct btrfs_space_info *sinfo = cache->space_info;
u64 new_alloc_flags; u64 num_bytes;
u64 calc; int ret = -ENOSPC;
spin_lock(&shrink_block_group->lock);
if (btrfs_block_group_used(&shrink_block_group->item) +
shrink_block_group->reserved > 0) {
spin_unlock(&shrink_block_group->lock);
trans = btrfs_start_transaction(root, 1); if (cache->ro)
spin_lock(&shrink_block_group->lock); return 0;
new_alloc_flags = update_block_group_flags(root, spin_lock(&sinfo->lock);
shrink_block_group->flags); spin_lock(&cache->lock);
if (new_alloc_flags != shrink_block_group->flags) { num_bytes = cache->key.offset - cache->reserved - cache->pinned -
calc = cache->bytes_super - btrfs_block_group_used(&cache->item);
btrfs_block_group_used(&shrink_block_group->item);
} else { if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
calc = shrink_block_group->key.offset; sinfo->bytes_may_use + sinfo->bytes_readonly +
cache->reserved_pinned + num_bytes < sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
sinfo->bytes_reserved += cache->reserved_pinned;
cache->reserved_pinned = 0;
cache->ro = 1;
ret = 0;
} }
spin_unlock(&shrink_block_group->lock); spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
return ret;
}
do_chunk_alloc(trans, root->fs_info->extent_root, int btrfs_set_block_group_ro(struct btrfs_root *root,
calc + 2 * 1024 * 1024, new_alloc_flags, force); struct btrfs_block_group_cache *cache)
{
struct btrfs_trans_handle *trans;
u64 alloc_flags;
int ret;
BUG_ON(cache->ro);
trans = btrfs_join_transaction(root, 1);
BUG_ON(IS_ERR(trans));
alloc_flags = update_block_group_flags(root, cache->flags);
if (alloc_flags != cache->flags)
do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
ret = set_block_group_ro(cache);
if (!ret)
goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags);
ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
if (ret < 0)
goto out;
ret = set_block_group_ro(cache);
out:
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
} else return ret;
spin_unlock(&shrink_block_group->lock);
return 0;
} }
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
int btrfs_prepare_block_group_relocation(struct btrfs_root *root, BUG_ON(!cache->ro);
struct btrfs_block_group_cache *group)
{ spin_lock(&sinfo->lock);
__alloc_chunk_for_shrink(root, group, 1); spin_lock(&cache->lock);
set_block_group_readonly(group); num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
sinfo->bytes_readonly -= num_bytes;
cache->ro = 0;
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
return 0; return 0;
} }
...@@ -7428,7 +7795,11 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ...@@ -7428,7 +7795,11 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
space_info = list_entry(info->space_info.next, space_info = list_entry(info->space_info.next,
struct btrfs_space_info, struct btrfs_space_info,
list); list);
if (space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0) {
WARN_ON(1);
dump_space_info(space_info, 0, 0);
}
list_del(&space_info->list); list_del(&space_info->list);
kfree(space_info); kfree(space_info);
} }
...@@ -7476,7 +7847,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -7476,7 +7847,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
cache = kzalloc(sizeof(*cache), GFP_NOFS); cache = kzalloc(sizeof(*cache), GFP_NOFS);
if (!cache) { if (!cache) {
ret = -ENOMEM; ret = -ENOMEM;
break; goto error;
} }
atomic_set(&cache->count, 1); atomic_set(&cache->count, 1);
...@@ -7533,7 +7904,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -7533,7 +7904,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
BUG_ON(ret); BUG_ON(ret);
cache->space_info = space_info; cache->space_info = space_info;
spin_lock(&cache->space_info->lock); spin_lock(&cache->space_info->lock);
cache->space_info->bytes_super += cache->bytes_super; cache->space_info->bytes_readonly += cache->bytes_super;
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
__link_block_group(space_info, cache); __link_block_group(space_info, cache);
...@@ -7543,7 +7914,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -7543,7 +7914,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
set_avail_alloc_bits(root->fs_info, cache->flags); set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid)) if (btrfs_chunk_readonly(root, cache->key.objectid))
set_block_group_readonly(cache); set_block_group_ro(cache);
} }
list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
...@@ -7557,10 +7928,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -7557,10 +7928,12 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* mirrored block groups. * mirrored block groups.
*/ */
list_for_each_entry(cache, &space_info->block_groups[3], list) list_for_each_entry(cache, &space_info->block_groups[3], list)
set_block_group_readonly(cache); set_block_group_ro(cache);
list_for_each_entry(cache, &space_info->block_groups[4], list) list_for_each_entry(cache, &space_info->block_groups[4], list)
set_block_group_readonly(cache); set_block_group_ro(cache);
} }
init_global_block_rsv(info);
ret = 0; ret = 0;
error: error:
btrfs_free_path(path); btrfs_free_path(path);
...@@ -7621,7 +7994,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ...@@ -7621,7 +7994,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
BUG_ON(ret); BUG_ON(ret);
spin_lock(&cache->space_info->lock); spin_lock(&cache->space_info->lock);
cache->space_info->bytes_super += cache->bytes_super; cache->space_info->bytes_readonly += cache->bytes_super;
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
__link_block_group(cache->space_info, cache); __link_block_group(cache->space_info, cache);
......
...@@ -3514,6 +3514,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) ...@@ -3514,6 +3514,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
struct btrfs_fs_info *fs_info = extent_root->fs_info; struct btrfs_fs_info *fs_info = extent_root->fs_info;
struct reloc_control *rc; struct reloc_control *rc;
int ret; int ret;
int rw = 0;
int err = 0; int err = 0;
rc = kzalloc(sizeof(*rc), GFP_NOFS); rc = kzalloc(sizeof(*rc), GFP_NOFS);
...@@ -3524,15 +3525,22 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) ...@@ -3524,15 +3525,22 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
INIT_LIST_HEAD(&rc->reloc_roots); INIT_LIST_HEAD(&rc->reloc_roots);
rc->extent_root = extent_root;
rc->block_group = btrfs_lookup_block_group(fs_info, group_start); rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
BUG_ON(!rc->block_group); BUG_ON(!rc->block_group);
if (!rc->block_group->ro) {
ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
if (ret) {
err = ret;
goto out;
}
rw = 1;
}
btrfs_init_workers(&rc->workers, "relocate", btrfs_init_workers(&rc->workers, "relocate",
fs_info->thread_pool_size, NULL); fs_info->thread_pool_size, NULL);
rc->extent_root = extent_root;
btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
rc->data_inode = create_reloc_inode(fs_info, rc->block_group); rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
if (IS_ERR(rc->data_inode)) { if (IS_ERR(rc->data_inode)) {
err = PTR_ERR(rc->data_inode); err = PTR_ERR(rc->data_inode);
...@@ -3597,6 +3605,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) ...@@ -3597,6 +3605,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
WARN_ON(rc->block_group->reserved > 0); WARN_ON(rc->block_group->reserved > 0);
WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
out: out:
if (err && rw)
btrfs_set_block_group_rw(extent_root, rc->block_group);
iput(rc->data_inode); iput(rc->data_inode);
btrfs_stop_workers(&rc->workers); btrfs_stop_workers(&rc->workers);
btrfs_put_block_group(rc->block_group); btrfs_put_block_group(rc->block_group);
......
...@@ -185,9 +185,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ...@@ -185,9 +185,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
h->blocks_reserved = num_blocks; h->blocks_reserved = num_blocks;
h->blocks_used = 0; h->blocks_used = 0;
h->block_group = 0; h->block_group = 0;
h->alloc_exclude_nr = 0;
h->alloc_exclude_start = 0;
h->delayed_ref_updates = 0; h->delayed_ref_updates = 0;
h->block_rsv = NULL;
if (!current->journal_info && type != TRANS_USERSPACE) if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h; current->journal_info = h;
......
...@@ -45,13 +45,13 @@ struct btrfs_transaction { ...@@ -45,13 +45,13 @@ struct btrfs_transaction {
struct btrfs_trans_handle { struct btrfs_trans_handle {
u64 transid; u64 transid;
u64 block_group;
u64 bytes_reserved;
unsigned long blocks_reserved; unsigned long blocks_reserved;
unsigned long blocks_used; unsigned long blocks_used;
struct btrfs_transaction *transaction;
u64 block_group;
u64 alloc_exclude_start;
u64 alloc_exclude_nr;
unsigned long delayed_ref_updates; unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
}; };
struct btrfs_pending_snapshot { struct btrfs_pending_snapshot {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment