Commit f0486c68 authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason

Btrfs: Introduce contexts for metadata reservation

Introducing metadata reseravtion contexts has two major advantages.
First, it makes metadata reseravtion more traceable. Second, it can
reclaim freed space and re-add them to the itself after transaction
committed.

Besides add btrfs_block_rsv structure and related helper functions,
This patch contains following changes:

Move code that decides if freed tree block should be pinned into
btrfs_free_tree_block().

Make space accounting more accurate, mainly for handling read only
block groups.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 2ead6ae7
......@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
struct extent_buffer *cow)
struct extent_buffer *cow,
int *last_ref)
{
u64 refs;
u64 owner;
......@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
BUG_ON(ret);
}
clean_tree_block(trans, root, buf);
*last_ref = 1;
}
return 0;
}
......@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
struct extent_buffer *cow;
int level;
int last_ref = 0;
int unlock_orig = 0;
u64 parent_start;
......@@ -442,7 +445,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
update_ref_for_cow(trans, root, buf, cow);
update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (buf == root->node) {
WARN_ON(parent && parent != buf);
......@@ -457,8 +460,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
extent_buffer_get(cow);
spin_unlock(&root->node_lock);
btrfs_free_tree_block(trans, root, buf->start, buf->len,
parent_start, root->root_key.objectid, level);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
......@@ -473,8 +476,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
btrfs_free_tree_block(trans, root, buf->start, buf->len,
parent_start, root->root_key.objectid, level);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
......@@ -949,6 +952,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
return bin_search(eb, key, level, slot);
}
static void root_add_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
btrfs_root_used(&root->root_item) + size);
spin_unlock(&root->accounting_lock);
}
static void root_sub_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
btrfs_root_used(&root->root_item) - size);
spin_unlock(&root->accounting_lock);
}
/* given a node and slot number, this reads the blocks it points to. The
* extent buffer is returned with a reference taken (but unlocked).
* NULL is returned on error.
......@@ -1019,7 +1038,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
BUG_ON(ret);
if (ret) {
btrfs_tree_unlock(child);
free_extent_buffer(child);
goto enospc;
}
spin_lock(&root->node_lock);
root->node = child;
......@@ -1034,11 +1057,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
0, root->root_key.objectid, level);
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
/* once for the root ptr */
free_extent_buffer(mid);
return ret;
return 0;
}
if (btrfs_header_nritems(mid) >
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
......@@ -1088,23 +1112,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
u64 bytenr = right->start;
u32 blocksize = right->len;
clean_tree_block(trans, root, right);
btrfs_tree_unlock(right);
free_extent_buffer(right);
right = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot +
1);
if (wret)
ret = wret;
wret = btrfs_free_tree_block(trans, root,
bytenr, blocksize, 0,
root->root_key.objectid,
level);
if (wret)
ret = wret;
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1);
free_extent_buffer(right);
right = NULL;
} else {
struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0);
......@@ -1136,21 +1153,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
/* we've managed to empty the middle node, drop it */
u64 bytenr = mid->start;
u32 blocksize = mid->len;
clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid);
free_extent_buffer(mid);
mid = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot);
if (wret)
ret = wret;
wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
0, root->root_key.objectid, level);
if (wret)
ret = wret;
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
free_extent_buffer(mid);
mid = NULL;
} else {
/* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key;
......@@ -1740,7 +1751,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
p->nodes[level + 1],
p->slots[level + 1], &b);
if (err) {
free_extent_buffer(b);
ret = err;
goto done;
}
......@@ -2076,6 +2086,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
if (IS_ERR(c))
return PTR_ERR(c);
root_add_used(root, root->nodesize);
memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_nritems(c, 1);
btrfs_set_header_level(c, level);
......@@ -2134,6 +2146,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
int nritems;
BUG_ON(!path->nodes[level]);
btrfs_assert_tree_locked(path->nodes[level]);
lower = path->nodes[level];
nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems);
......@@ -2202,6 +2215,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
if (IS_ERR(split))
return PTR_ERR(split);
root_add_used(root, root->nodesize);
memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_level(split, btrfs_header_level(c));
btrfs_set_header_bytenr(split, split->start);
......@@ -2415,6 +2430,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
else
clean_tree_block(trans, root, left);
btrfs_mark_buffer_dirty(right);
btrfs_item_key(right, &disk_key, 0);
......@@ -2660,6 +2678,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(left);
if (right_nritems)
btrfs_mark_buffer_dirty(right);
else
clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0);
wret = fixup_low_keys(trans, root, path, &disk_key, 1);
......@@ -2669,8 +2689,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
path->slots[0] += old_left_nritems;
if (btrfs_header_nritems(path->nodes[0]) == 0)
clean_tree_block(trans, root, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = left;
......@@ -2932,10 +2950,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
root->root_key.objectid,
&disk_key, 0, l->start, 0);
if (IS_ERR(right)) {
BUG_ON(1);
if (IS_ERR(right))
return PTR_ERR(right);
}
root_add_used(root, root->leafsize);
memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(right, right->start);
......@@ -3054,7 +3072,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
ret = split_leaf(trans, root, &key, path, ins_len, 1);
BUG_ON(ret);
if (ret)
goto err;
path->keep_locks = 0;
btrfs_unlock_up_safe(path, 1);
......@@ -3796,9 +3815,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
*/
btrfs_unlock_up_safe(path, 0);
ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
0, root->root_key.objectid, 0);
return ret;
root_sub_used(root, leaf->len);
btrfs_free_tree_block(trans, root, leaf, 0, 1);
return 0;
}
/*
* delete the item at the leaf level in path. If that empties
......@@ -3865,6 +3885,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (leaf == root->node) {
btrfs_set_header_level(leaf, 0);
} else {
btrfs_set_path_blocking(path);
clean_tree_block(trans, root, leaf);
ret = btrfs_del_leaf(trans, root, path, leaf);
BUG_ON(ret);
}
......
......@@ -707,6 +707,20 @@ struct btrfs_space_info {
atomic_t caching_threads;
};
struct btrfs_block_rsv {
u64 size;
u64 reserved;
u64 freed[2];
struct btrfs_space_info *space_info;
struct list_head list;
spinlock_t lock;
atomic_t usage;
unsigned int priority:8;
unsigned int durable:1;
unsigned int refill_used:1;
unsigned int full:1;
};
/*
* free clusters are used to claim free space in relatively large chunks,
* allowing us to do less seeky writes. They are used for all metadata
......@@ -757,6 +771,7 @@ struct btrfs_block_group_cache {
spinlock_t lock;
u64 pinned;
u64 reserved;
u64 reserved_pinned;
u64 bytes_super;
u64 flags;
u64 sectorsize;
......@@ -822,6 +837,22 @@ struct btrfs_fs_info {
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
/* block reservation for extent, checksum and root tree */
struct btrfs_block_rsv global_block_rsv;
/* block reservation for delay allocation */
struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */
struct btrfs_block_rsv chunk_block_rsv;
struct btrfs_block_rsv empty_block_rsv;
/* list of block reservations that cross multiple transactions */
struct list_head durable_block_rsv_list;
struct mutex durable_block_rsv_mutex;
u64 generation;
u64 last_trans_committed;
......@@ -1008,6 +1039,9 @@ struct btrfs_root {
struct completion kobj_unregister;
struct mutex objectid_mutex;
spinlock_t accounting_lock;
struct btrfs_block_rsv *block_rsv;
struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
......@@ -1980,10 +2014,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size);
int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
u64 parent, u64 root_objectid, int level);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
......@@ -2037,9 +2071,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 size);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
struct btrfs_block_group_cache *group);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
......@@ -2058,6 +2089,30 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int *retries);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved, int min_factor);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
......
......@@ -903,6 +903,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->name = NULL;
root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
root->block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list);
......@@ -910,6 +911,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->node_lock);
spin_lock_init(&root->list_lock);
spin_lock_init(&root->inode_lock);
spin_lock_init(&root->accounting_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
......@@ -1620,6 +1622,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv);
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
btrfs_init_block_rsv(&fs_info->trans_block_rsv);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
btrfs_init_block_rsv(&fs_info->empty_block_rsv);
INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
mutex_init(&fs_info->durable_block_rsv_mutex);
atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
......
......@@ -35,10 +35,9 @@
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
static int update_reserved_extents(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve);
u64 bytenr, u64 num_bytes, int alloc);
static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve, int sinfo);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
......@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 bytenr, u64 num_bytes,
int is_data, int reserved,
struct extent_buffer **must_clean);
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
......@@ -97,8 +90,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
{
if (atomic_dec_and_test(&cache->count))
if (atomic_dec_and_test(&cache->count)) {
WARN_ON(cache->pinned > 0);
WARN_ON(cache->reserved > 0);
WARN_ON(cache->reserved_pinned > 0);
kfree(cache);
}
}
/*
......@@ -325,7 +322,7 @@ static int caching_kthread(void *data)
exclude_super_stripes(extent_root, block_group);
spin_lock(&block_group->space_info->lock);
block_group->space_info->bytes_super += block_group->bytes_super;
block_group->space_info->bytes_readonly += block_group->bytes_super;
spin_unlock(&block_group->space_info->lock);
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
......@@ -1880,7 +1877,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
return ret;
}
/* helper function to actually process a single delayed ref entry */
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
......@@ -1900,32 +1896,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
if (insert_reserved) {
int mark_free = 0;
struct extent_buffer *must_clean = NULL;
ret = pin_down_bytes(trans, root, NULL,
node->bytenr, node->num_bytes,
head->is_data, 1, &must_clean);
if (ret > 0)
mark_free = 1;
if (must_clean) {
clean_tree_block(NULL, root, must_clean);
btrfs_tree_unlock(must_clean);
free_extent_buffer(must_clean);
}
btrfs_pin_extent(root, node->bytenr,
node->num_bytes, 1);
if (head->is_data) {
ret = btrfs_del_csums(trans, root,
node->bytenr,
node->num_bytes);
BUG_ON(ret);
}
if (mark_free) {
ret = btrfs_free_reserved_extent(root,
node->bytenr,
node->num_bytes);
BUG_ON(ret);
}
}
mutex_unlock(&head->mutex);
return 0;
......@@ -2356,6 +2334,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
ret = 0;
out:
btrfs_free_path(path);
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
WARN_ON(ret > 0);
return ret;
}
......@@ -2706,7 +2686,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
found->bytes_delalloc = 0;
found->bytes_may_use = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
......@@ -2731,19 +2711,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
}
static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
{
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
if (!cache->ro) {
cache->space_info->bytes_readonly += cache->key.offset -
btrfs_block_group_used(&cache->item);
cache->ro = 1;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
}
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
u64 num_devices = root->fs_info->fs_devices->rw_devices;
......@@ -2802,11 +2769,8 @@ static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
{
u64 alloc_target;
alloc_target = btrfs_get_alloc_profile(root, 1);
BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
alloc_target);
BTRFS_BLOCK_GROUP_DATA);
}
static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
......@@ -3412,10 +3376,334 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
return reclaimed >= to_reclaim;
}
static int should_retry_reserve(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int *retries)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
int ret;
if ((*retries) > 2)
return -ENOSPC;
ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
if (ret)
return 1;
if (trans && trans->transaction->in_commit)
return -ENOSPC;
ret = shrink_delalloc(trans, root, space_info, num_bytes);
if (ret)
return ret;
spin_lock(&space_info->lock);
if (space_info->bytes_pinned < num_bytes)
ret = 1;
spin_unlock(&space_info->lock);
if (ret)
return -ENOSPC;
(*retries)++;
if (trans)
return -EAGAIN;
trans = btrfs_join_transaction(root, 1);
BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
return 1;
}
static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
u64 unused;
int ret = -ENOSPC;
spin_lock(&space_info->lock);
unused = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
if (unused < space_info->total_bytes)
unused = space_info->total_bytes - unused;
else
unused = 0;
if (unused >= num_bytes) {
if (block_rsv->priority >= 10) {
space_info->bytes_reserved += num_bytes;
ret = 0;
} else {
if ((unused + block_rsv->reserved) *
block_rsv->priority >=
(num_bytes + block_rsv->reserved) * 10) {
space_info->bytes_reserved += num_bytes;
ret = 0;
}
}
}
spin_unlock(&space_info->lock);
return ret;
}
static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
if (root->ref_cows)
block_rsv = trans->block_rsv;
else
block_rsv = root->block_rsv;
if (!block_rsv)
block_rsv = &root->fs_info->empty_block_rsv;
return block_rsv;
}
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
int ret = -ENOSPC;
spin_lock(&block_rsv->lock);
if (block_rsv->reserved >= num_bytes) {
block_rsv->reserved -= num_bytes;
if (block_rsv->reserved < block_rsv->size)
block_rsv->full = 0;
ret = 0;
}
spin_unlock(&block_rsv->lock);
return ret;
}
static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int update_size)
{
spin_lock(&block_rsv->lock);
block_rsv->reserved += num_bytes;
if (update_size)
block_rsv->size += num_bytes;
else if (block_rsv->reserved >= block_rsv->size)
block_rsv->full = 1;
spin_unlock(&block_rsv->lock);
}
void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
spin_lock(&block_rsv->lock);
if (num_bytes == (u64)-1)
num_bytes = block_rsv->size;
block_rsv->size -= num_bytes;
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
} else {
num_bytes = 0;
}
spin_unlock(&block_rsv->lock);
if (num_bytes > 0) {
if (dest) {
block_rsv_add_bytes(dest, num_bytes, 0);
} else {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
spin_unlock(&space_info->lock);
}
}
}
static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
struct btrfs_block_rsv *dst, u64 num_bytes)
{
int ret;
ret = block_rsv_use_bytes(src, num_bytes);
if (ret)
return ret;
block_rsv_add_bytes(dst, num_bytes, 1);
return 0;
}
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
{
memset(rsv, 0, sizeof(*rsv));
spin_lock_init(&rsv->lock);
atomic_set(&rsv->usage, 1);
rsv->priority = 6;
INIT_LIST_HEAD(&rsv->list);
}
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 alloc_target;
block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
if (!block_rsv)
return NULL;
btrfs_init_block_rsv(block_rsv);
alloc_target = btrfs_get_alloc_profile(root, 0);
block_rsv->space_info = __find_space_info(fs_info,
BTRFS_BLOCK_GROUP_METADATA);
return block_rsv;
}
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv)
{
if (rsv && atomic_dec_and_test(&rsv->usage)) {
btrfs_block_rsv_release(root, rsv, (u64)-1);
if (!rsv->durable)
kfree(rsv);
}
}
/*
* make the block_rsv struct be able to capture freed space.
* the captured space will re-add to the the block_rsv struct
* after transaction commit
*/
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv)
{
block_rsv->durable = 1;
mutex_lock(&fs_info->durable_block_rsv_mutex);
list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
mutex_unlock(&fs_info->durable_block_rsv_mutex);
}
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int *retries)
{
int ret;
if (num_bytes == 0)
return 0;
again:
ret = reserve_metadata_bytes(block_rsv, num_bytes);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
}
ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
if (ret > 0)
goto again;
return ret;
}
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved, int min_factor)
{
u64 num_bytes = 0;
int commit_trans = 0;
int ret = -ENOSPC;
if (!block_rsv)
return 0;
spin_lock(&block_rsv->lock);
if (min_factor > 0)
num_bytes = div_factor(block_rsv->size, min_factor);
if (min_reserved > num_bytes)
num_bytes = min_reserved;
if (block_rsv->reserved >= num_bytes) {
ret = 0;
} else {
num_bytes -= block_rsv->reserved;
if (block_rsv->durable &&
block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
commit_trans = 1;
}
spin_unlock(&block_rsv->lock);
if (!ret)
return 0;
if (block_rsv->refill_used) {
ret = reserve_metadata_bytes(block_rsv, num_bytes);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
}
}
if (commit_trans) {
if (trans)
return -EAGAIN;
trans = btrfs_join_transaction(root, 1);
BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
return 0;
}
WARN_ON(1);
printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
block_rsv->size, block_rsv->reserved,
block_rsv->freed[0], block_rsv->freed[1]);
return -ENOSPC;
}
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes)
{
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
if (global_rsv->full || global_rsv == block_rsv ||
block_rsv->space_info != global_rsv->space_info)
global_rsv = NULL;
block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
}
static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *space_info;
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
fs_info->chunk_block_rsv.space_info = space_info;
fs_info->chunk_block_rsv.priority = 10;
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
fs_info->trans_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.priority = 10;
fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
}
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free)
u64 bytenr, u64 num_bytes, int alloc)
{
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *info = root->fs_info;
......@@ -3459,30 +3747,21 @@ static int update_block_group(struct btrfs_trans_handle *trans,
cache->space_info->bytes_reserved -= num_bytes;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
if (cache->ro)
cache->space_info->bytes_readonly -= num_bytes;
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
} else {
old_val -= num_bytes;
btrfs_set_block_group_used(&cache->item, old_val);
cache->pinned += num_bytes;
cache->space_info->bytes_pinned += num_bytes;
cache->space_info->bytes_used -= num_bytes;
cache->space_info->disk_used -= num_bytes * factor;
if (cache->ro)
cache->space_info->bytes_readonly += num_bytes;
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
if (mark_free) {
int ret;
ret = btrfs_discard_extent(root, bytenr,
num_bytes);
WARN_ON(ret);
ret = btrfs_add_free_space(cache, bytenr,
num_bytes);
WARN_ON(ret);
}
set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
}
btrfs_put_block_group(cache);
total -= num_bytes;
......@@ -3506,18 +3785,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
return bytenr;
}
/*
* this function must be called within transaction
*/
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int reserved)
static int pin_down_extent(struct btrfs_root *root,
struct btrfs_block_group_cache *cache,
u64 bytenr, u64 num_bytes, int reserved)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *cache;
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache);
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned += num_bytes;
......@@ -3529,28 +3800,68 @@ int btrfs_pin_extent(struct btrfs_root *root,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
btrfs_put_block_group(cache);
set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
return 0;
}
set_extent_dirty(fs_info->pinned_extents,
bytenr, bytenr + num_bytes - 1, GFP_NOFS);
/*
* this function must be called within transaction
*/
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int reserved)
{
struct btrfs_block_group_cache *cache;
cache = btrfs_lookup_block_group(root->fs_info, bytenr);
BUG_ON(!cache);
pin_down_extent(root, cache, bytenr, num_bytes, reserved);
btrfs_put_block_group(cache);
return 0;
}
static int update_reserved_extents(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve)
/*
* update size of reserved extents. this function may return -EAGAIN
* if 'reserve' is true or 'sinfo' is false.
*/
static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve, int sinfo)
{
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
if (reserve) {
cache->reserved += num_bytes;
cache->space_info->bytes_reserved += num_bytes;
int ret = 0;
if (sinfo) {
struct btrfs_space_info *space_info = cache->space_info;
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
if (reserve) {
if (cache->ro) {
ret = -EAGAIN;
} else {
cache->reserved += num_bytes;
space_info->bytes_reserved += num_bytes;
}
} else {
if (cache->ro)
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
} else {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
spin_lock(&cache->lock);
if (cache->ro) {
ret = -EAGAIN;
} else {
if (reserve)
cache->reserved += num_bytes;
else
cache->reserved -= num_bytes;
}
spin_unlock(&cache->lock);
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
return 0;
return ret;
}
int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
......@@ -3607,14 +3918,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
btrfs_add_free_space(cache, start, len);
}
start += len;
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned -= len;
cache->space_info->bytes_pinned -= len;
if (cache->ro) {
cache->space_info->bytes_readonly += len;
} else if (cache->reserved_pinned > 0) {
len = min(len, cache->reserved_pinned);
cache->reserved_pinned -= len;
cache->space_info->bytes_reserved += len;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
start += len;
}
if (cache)
......@@ -3627,8 +3945,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *unpin;
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *next_rsv;
u64 start;
u64 end;
int idx;
int ret;
if (fs_info->pinned_extents == &fs_info->freed_extents[0])
......@@ -3649,59 +3970,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
cond_resched();
}
return ret;
}
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 bytenr, u64 num_bytes,
int is_data, int reserved,
struct extent_buffer **must_clean)
{
int err = 0;
struct extent_buffer *buf;
if (is_data)
goto pinit;
mutex_lock(&fs_info->durable_block_rsv_mutex);
list_for_each_entry_safe(block_rsv, next_rsv,
&fs_info->durable_block_rsv_list, list) {
/*
* discard is sloooow, and so triggering discards on
* individual btree blocks isn't a good plan. Just
* pin everything in discard mode.
*/
if (btrfs_test_opt(root, DISCARD))
goto pinit;
buf = btrfs_find_tree_block(root, bytenr, num_bytes);
if (!buf)
goto pinit;
idx = trans->transid & 0x1;
if (block_rsv->freed[idx] > 0) {
block_rsv_add_bytes(block_rsv,
block_rsv->freed[idx], 0);
block_rsv->freed[idx] = 0;
}
if (atomic_read(&block_rsv->usage) == 0) {
btrfs_block_rsv_release(root, block_rsv, (u64)-1);
/* we can reuse a block if it hasn't been written
* and it is from this transaction. We can't
* reuse anything from the tree log root because
* it has tiny sub-transactions.
*/
if (btrfs_buffer_uptodate(buf, 0) &&
btrfs_try_tree_lock(buf)) {
u64 header_owner = btrfs_header_owner(buf);
u64 header_transid = btrfs_header_generation(buf);
if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
header_transid == trans->transid &&
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
*must_clean = buf;
return 1;
if (block_rsv->freed[0] == 0 &&
block_rsv->freed[1] == 0) {
list_del_init(&block_rsv->list);
kfree(block_rsv);
}
} else {
btrfs_block_rsv_release(root, block_rsv, 0);
}
btrfs_tree_unlock(buf);
}
free_extent_buffer(buf);
pinit:
if (path)
btrfs_set_path_blocking(path);
/* unlocks the pinned mutex */
btrfs_pin_extent(root, bytenr, num_bytes, reserved);
mutex_unlock(&fs_info->durable_block_rsv_mutex);
BUG_ON(err < 0);
return 0;
}
......@@ -3862,9 +4154,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
BUG_ON(ret);
}
} else {
int mark_free = 0;
struct extent_buffer *must_clean = NULL;
if (found_extent) {
BUG_ON(is_data && refs_to_drop !=
extent_data_ref_count(root, path, iref));
......@@ -3877,31 +4166,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
ret = pin_down_bytes(trans, root, path, bytenr,
num_bytes, is_data, 0, &must_clean);
if (ret > 0)
mark_free = 1;
BUG_ON(ret < 0);
/*
* it is going to be very rare for someone to be waiting
* on the block we're freeing. del_items might need to
* schedule, so rather than get fancy, just force it
* to blocking here
*/
if (must_clean)
btrfs_set_lock_blocking(must_clean);
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
BUG_ON(ret);
btrfs_release_path(extent_root, path);
if (must_clean) {
clean_tree_block(NULL, root, must_clean);
btrfs_tree_unlock(must_clean);
free_extent_buffer(must_clean);
}
if (is_data) {
ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
BUG_ON(ret);
......@@ -3911,8 +4180,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
(bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
}
ret = update_block_group(trans, root, bytenr, num_bytes, 0,
mark_free);
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
BUG_ON(ret);
}
btrfs_free_path(path);
......@@ -3920,7 +4188,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
/*
* when we free an extent, it is possible (and likely) that we free the last
* when we free an block, it is possible (and likely) that we free the last
* delayed ref for that extent as well. This searches the delayed ref tree for
* a given extent, and if there are no other delayed refs to be processed, it
* removes it from the tree.
......@@ -3932,7 +4200,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct rb_node *node;
int ret;
int ret = 0;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
......@@ -3984,17 +4252,99 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
list_del_init(&head->cluster);
spin_unlock(&delayed_refs->lock);
ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
&head->node, head->extent_op,
head->must_insert_reserved);
BUG_ON(ret);
BUG_ON(head->extent_op);
if (head->must_insert_reserved)
ret = 1;
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
return 0;
return ret;
out:
spin_unlock(&delayed_refs->lock);
return 0;
}
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_group_cache *cache = NULL;
int ret;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
parent, root->root_key.objectid,
btrfs_header_level(buf),
BTRFS_DROP_DELAYED_REF, NULL);
BUG_ON(ret);
}
if (!last_ref)
return;
block_rsv = get_block_rsv(trans, root);
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
BUG_ON(block_rsv->space_info != cache->space_info);
if (btrfs_header_generation(buf) == trans->transid) {
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = check_ref_cleanup(trans, root, buf->start);
if (!ret)
goto pin;
}
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
pin_down_extent(root, cache, buf->start, buf->len, 1);
goto pin;
}
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(cache, buf->start, buf->len);
ret = update_reserved_bytes(cache, buf->len, 0, 0);
if (ret == -EAGAIN) {
/* block group became read-only */
update_reserved_bytes(cache, buf->len, 0, 1);
goto out;
}
ret = 1;
spin_lock(&block_rsv->lock);
if (block_rsv->reserved < block_rsv->size) {
block_rsv->reserved += buf->len;
ret = 0;
}
spin_unlock(&block_rsv->lock);
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
spin_unlock(&cache->space_info->lock);
}
goto out;
}
pin:
if (block_rsv->durable && !cache->ro) {
ret = 0;
spin_lock(&cache->lock);
if (!cache->ro) {
cache->reserved_pinned += buf->len;
ret = 1;
}
spin_unlock(&cache->lock);
if (ret) {
spin_lock(&block_rsv->lock);
block_rsv->freed[trans->transid & 0x1] += buf->len;
spin_unlock(&block_rsv->lock);
}
}
out:
btrfs_put_block_group(cache);
}
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
......@@ -4016,8 +4366,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
parent, root_objectid, (int)owner,
BTRFS_DROP_DELAYED_REF, NULL);
BUG_ON(ret);
ret = check_ref_cleanup(trans, root, bytenr);
BUG_ON(ret);
} else {
ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
parent, root_objectid, owner,
......@@ -4027,21 +4375,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
return ret;
}
int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
u64 parent, u64 root_objectid, int level)
{
u64 used;
spin_lock(&root->node_lock);
used = btrfs_root_used(&root->root_item) - blocksize;
btrfs_set_root_used(&root->root_item, used);
spin_unlock(&root->node_lock);
return btrfs_free_extent(trans, root, bytenr, blocksize,
parent, root_objectid, level, 0);
}
static u64 stripe_align(struct btrfs_root *root, u64 val)
{
u64 mask = ((u64)root->stripesize - 1);
......@@ -4131,7 +4464,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 empty_size,
u64 search_start, u64 search_end,
u64 hint_byte, struct btrfs_key *ins,
u64 exclude_start, u64 exclude_nr,
int data)
{
int ret = 0;
......@@ -4143,8 +4475,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int done_chunk_alloc = 0;
struct btrfs_space_info *space_info;
int last_ptr_loop = 0;
int index = 0;
int loop = 0;
int index = 0;
bool found_uncached_bg = false;
bool failed_cluster_refill = false;
bool failed_alloc = false;
......@@ -4415,23 +4747,22 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
goto loop;
}
if (exclude_nr > 0 &&
(search_start + num_bytes > exclude_start &&
search_start < exclude_start + exclude_nr)) {
search_start = exclude_start + exclude_nr;
ins->objectid = search_start;
ins->offset = num_bytes;
if (offset < search_start)
btrfs_add_free_space(block_group, offset,
search_start - offset);
BUG_ON(offset > search_start);
ret = update_reserved_bytes(block_group, num_bytes, 1,
(data & BTRFS_BLOCK_GROUP_DATA));
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes);
/*
* if search_start is still in this block group
* then we just re-search this block group
*/
if (search_start >= block_group->key.objectid &&
search_start < (block_group->key.objectid +
block_group->key.offset))
goto have_block_group;
goto loop;
}
/* we are all good, lets return */
ins->objectid = search_start;
ins->offset = num_bytes;
......@@ -4439,10 +4770,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
btrfs_add_free_space(block_group, offset,
search_start - offset);
BUG_ON(offset > search_start);
update_reserved_extents(block_group, num_bytes, 1);
/* we are all good, lets return */
break;
loop:
failed_cluster_refill = false;
......@@ -4616,9 +4943,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(trans, root, num_bytes, empty_size,
search_start, search_end, hint_byte, ins,
trans->alloc_exclude_start,
trans->alloc_exclude_nr, data);
search_start, search_end, hint_byte,
ins, data);
if (ret == -ENOSPC && num_bytes > min_alloc_size) {
num_bytes = num_bytes >> 1;
......@@ -4656,7 +4982,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
ret = btrfs_discard_extent(root, start, len);
btrfs_add_free_space(cache, start, len);
update_reserved_extents(cache, len, 0);
update_reserved_bytes(cache, len, 0, 1);
btrfs_put_block_group(cache);
return ret;
......@@ -4719,8 +5045,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
ret = update_block_group(trans, root, ins->objectid, ins->offset,
1, 0);
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
if (ret) {
printk(KERN_ERR "btrfs update block group failed for %llu "
"%llu\n", (unsigned long long)ins->objectid,
......@@ -4780,8 +5105,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
ret = update_block_group(trans, root, ins->objectid, ins->offset,
1, 0);
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
if (ret) {
printk(KERN_ERR "btrfs update block group failed for %llu "
"%llu\n", (unsigned long long)ins->objectid,
......@@ -4857,73 +5181,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
put_caching_control(caching_ctl);
}
update_reserved_extents(block_group, ins->offset, 1);
ret = update_reserved_bytes(block_group, ins->offset, 1, 1);
BUG_ON(ret);
btrfs_put_block_group(block_group);
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
return ret;
}
/*
* finds a free extent and does all the dirty work required for allocation
* returns the key for the extent through ins, and a tree buffer for
* the first block of the extent through buf.
*
* returns 0 if everything worked, non-zero otherwise.
*/
static int alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 empty_size, u64 hint_byte, u64 search_end,
struct btrfs_key *ins)
{
int ret;
u64 flags = 0;
ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
empty_size, hint_byte, search_end,
ins, 0);
if (ret)
return ret;
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0)
parent = ins->objectid;
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else
BUG_ON(parent > 0);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
BUG_ON(!extent_op);
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
else
memset(&extent_op->key, 0, sizeof(extent_op->key));
extent_op->flags_to_set = flags;
extent_op->update_key = 1;
extent_op->update_flags = 1;
extent_op->is_data = 0;
ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
ins->offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op);
BUG_ON(ret);
}
if (root_objectid == root->root_key.objectid) {
u64 used;
spin_lock(&root->node_lock);
used = btrfs_root_used(&root->root_item) + num_bytes;
btrfs_set_root_used(&root->root_item, used);
spin_unlock(&root->node_lock);
}
return ret;
}
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
......@@ -4962,8 +5227,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
return buf;
}
static struct btrfs_block_rsv *
use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize)
{
struct btrfs_block_rsv *block_rsv;
int ret;
block_rsv = get_block_rsv(trans, root);
if (block_rsv->size == 0) {
ret = reserve_metadata_bytes(block_rsv, blocksize);
if (ret)
return ERR_PTR(ret);
return block_rsv;
}
ret = block_rsv_use_bytes(block_rsv, blocksize);
if (!ret)
return block_rsv;
WARN_ON(1);
printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
block_rsv->size, block_rsv->reserved,
block_rsv->freed[0], block_rsv->freed[1]);
return ERR_PTR(-ENOSPC);
}
static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
{
block_rsv_add_bytes(block_rsv, blocksize, 0);
block_rsv_release_bytes(block_rsv, NULL, 0);
}
/*
* helper function to allocate a block for a given tree
* finds a free extent and does all the dirty work required for allocation
* returns the key for the extent through ins, and a tree buffer for
* the first block of the extent through buf.
*
* returns the tree buffer or NULL.
*/
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
......@@ -4973,18 +5275,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 hint, u64 empty_size)
{
struct btrfs_key ins;
int ret;
struct btrfs_block_rsv *block_rsv;
struct extent_buffer *buf;
u64 flags = 0;
int ret;
ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
key, level, empty_size, hint, (u64)-1, &ins);
block_rsv = use_block_rsv(trans, root, blocksize);
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
empty_size, hint, (u64)-1, &ins, 0);
if (ret) {
BUG_ON(ret > 0);
unuse_block_rsv(block_rsv, blocksize);
return ERR_PTR(ret);
}
buf = btrfs_init_new_buffer(trans, root, ins.objectid,
blocksize, level);
BUG_ON(IS_ERR(buf));
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0)
parent = ins.objectid;
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else
BUG_ON(parent > 0);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
BUG_ON(!extent_op);
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
else
memset(&extent_op->key, 0, sizeof(extent_op->key));
extent_op->flags_to_set = flags;
extent_op->update_key = 1;
extent_op->update_flags = 1;
extent_op->is_data = 0;
ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
ins.offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op);
BUG_ON(ret);
}
return buf;
}
......@@ -5309,7 +5646,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct walk_control *wc)
{
int ret = 0;
int ret;
int level = wc->level;
struct extent_buffer *eb = path->nodes[level];
u64 parent = 0;
......@@ -5387,13 +5724,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_owner(path->nodes[level + 1]));
}
ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
root->root_key.objectid, level, 0);
BUG_ON(ret);
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
out:
wc->refs[level] = 0;
wc->flags[level] = 0;
return ret;
return 0;
}
static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
......@@ -7216,48 +7551,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
return flags;
}
static int __alloc_chunk_for_shrink(struct btrfs_root *root,
struct btrfs_block_group_cache *shrink_block_group,
int force)
static int set_block_group_ro(struct btrfs_block_group_cache *cache)
{
struct btrfs_trans_handle *trans;
u64 new_alloc_flags;
u64 calc;
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
int ret = -ENOSPC;
spin_lock(&shrink_block_group->lock);
if (btrfs_block_group_used(&shrink_block_group->item) +
shrink_block_group->reserved > 0) {
spin_unlock(&shrink_block_group->lock);
if (cache->ro)
return 0;
trans = btrfs_start_transaction(root, 1);
spin_lock(&shrink_block_group->lock);
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
sinfo->bytes_may_use + sinfo->bytes_readonly +
cache->reserved_pinned + num_bytes < sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
sinfo->bytes_reserved += cache->reserved_pinned;
cache->reserved_pinned = 0;
cache->ro = 1;
ret = 0;
}
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
return ret;
}
new_alloc_flags = update_block_group_flags(root,
shrink_block_group->flags);
if (new_alloc_flags != shrink_block_group->flags) {
calc =
btrfs_block_group_used(&shrink_block_group->item);
} else {
calc = shrink_block_group->key.offset;
}
spin_unlock(&shrink_block_group->lock);
int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
do_chunk_alloc(trans, root->fs_info->extent_root,
calc + 2 * 1024 * 1024, new_alloc_flags, force);
{
struct btrfs_trans_handle *trans;
u64 alloc_flags;
int ret;
btrfs_end_transaction(trans, root);
} else
spin_unlock(&shrink_block_group->lock);
return 0;
}
BUG_ON(cache->ro);
trans = btrfs_join_transaction(root, 1);
BUG_ON(IS_ERR(trans));
int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
struct btrfs_block_group_cache *group)
alloc_flags = update_block_group_flags(root, cache->flags);
if (alloc_flags != cache->flags)
do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
ret = set_block_group_ro(cache);
if (!ret)
goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags);
ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
if (ret < 0)
goto out;
ret = set_block_group_ro(cache);
out:
btrfs_end_transaction(trans, root);
return ret;
}
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
__alloc_chunk_for_shrink(root, group, 1);
set_block_group_readonly(group);
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
BUG_ON(!cache->ro);
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
sinfo->bytes_readonly -= num_bytes;
cache->ro = 0;
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
return 0;
}
......@@ -7428,7 +7795,11 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
list);
if (space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0) {
WARN_ON(1);
dump_space_info(space_info, 0, 0);
}
list_del(&space_info->list);
kfree(space_info);
}
......@@ -7476,7 +7847,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
cache = kzalloc(sizeof(*cache), GFP_NOFS);
if (!cache) {
ret = -ENOMEM;
break;
goto error;
}
atomic_set(&cache->count, 1);
......@@ -7533,7 +7904,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
BUG_ON(ret);
cache->space_info = space_info;
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_super += cache->bytes_super;
cache->space_info->bytes_readonly += cache->bytes_super;
spin_unlock(&cache->space_info->lock);
__link_block_group(space_info, cache);
......@@ -7543,7 +7914,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid))
set_block_group_readonly(cache);
set_block_group_ro(cache);
}
list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
......@@ -7557,10 +7928,12 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* mirrored block groups.
*/
list_for_each_entry(cache, &space_info->block_groups[3], list)
set_block_group_readonly(cache);
set_block_group_ro(cache);
list_for_each_entry(cache, &space_info->block_groups[4], list)
set_block_group_readonly(cache);
set_block_group_ro(cache);
}
init_global_block_rsv(info);
ret = 0;
error:
btrfs_free_path(path);
......@@ -7621,7 +7994,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
BUG_ON(ret);
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_super += cache->bytes_super;
cache->space_info->bytes_readonly += cache->bytes_super;
spin_unlock(&cache->space_info->lock);
__link_block_group(cache->space_info, cache);
......
......@@ -3514,6 +3514,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
struct btrfs_fs_info *fs_info = extent_root->fs_info;
struct reloc_control *rc;
int ret;
int rw = 0;
int err = 0;
rc = kzalloc(sizeof(*rc), GFP_NOFS);
......@@ -3524,15 +3525,22 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
INIT_LIST_HEAD(&rc->reloc_roots);
rc->extent_root = extent_root;
rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
BUG_ON(!rc->block_group);
if (!rc->block_group->ro) {
ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
if (ret) {
err = ret;
goto out;
}
rw = 1;
}
btrfs_init_workers(&rc->workers, "relocate",
fs_info->thread_pool_size, NULL);
rc->extent_root = extent_root;
btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
if (IS_ERR(rc->data_inode)) {
err = PTR_ERR(rc->data_inode);
......@@ -3597,6 +3605,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
WARN_ON(rc->block_group->reserved > 0);
WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
out:
if (err && rw)
btrfs_set_block_group_rw(extent_root, rc->block_group);
iput(rc->data_inode);
btrfs_stop_workers(&rc->workers);
btrfs_put_block_group(rc->block_group);
......
......@@ -185,9 +185,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
h->blocks_reserved = num_blocks;
h->blocks_used = 0;
h->block_group = 0;
h->alloc_exclude_nr = 0;
h->alloc_exclude_start = 0;
h->delayed_ref_updates = 0;
h->block_rsv = NULL;
if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h;
......
......@@ -45,13 +45,13 @@ struct btrfs_transaction {
struct btrfs_trans_handle {
u64 transid;
u64 block_group;
u64 bytes_reserved;
unsigned long blocks_reserved;
unsigned long blocks_used;
struct btrfs_transaction *transaction;
u64 block_group;
u64 alloc_exclude_start;
u64 alloc_exclude_nr;
unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
};
struct btrfs_pending_snapshot {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment