Commit d2fb3437 authored by Yan Zheng's avatar Yan Zheng Committed by Chris Mason

Btrfs: fix leaking block group on balance

The block group structs are referenced in many different
places, and it's not safe to free while balancing.  So, those block
group structs were simply leaked instead.

This patch replaces the block group pointer in the inode with the starting byte
offset of the block group and adds reference counting to the block group
struct.
Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
parent cfc8ea87
......@@ -28,11 +28,6 @@ struct btrfs_inode {
/* which subvolume this inode belongs to */
struct btrfs_root *root;
/* the block group preferred for allocations. This pointer is buggy
* and needs to be replaced with a bytenr instead
*/
struct btrfs_block_group_cache *block_group;
/* key used to find this inode on disk. This is used by the code
* to read in roots of subvolumes
*/
......@@ -115,6 +110,9 @@ struct btrfs_inode {
*/
u64 index_cnt;
/* the start of block group preferred for allocations. */
u64 block_group;
struct inode vfs_inode;
};
......
......@@ -653,6 +653,9 @@ struct btrfs_block_group_cache {
/* for block groups in the same raid type */
struct list_head list;
/* usage count */
atomic_t count;
};
struct btrfs_leaf_ref_tree {
......@@ -1706,10 +1709,8 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
btrfs_fs_info *info,
u64 bytenr);
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache
*hint, u64 search_start,
int data, int owner);
u64 btrfs_find_block_group(struct btrfs_root *root,
u64 search_start, u64 search_hint, int owner);
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u32 blocksize, u64 parent,
......@@ -1770,6 +1771,7 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
u64 owner_objectid);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_root *root);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
......@@ -2019,10 +2021,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry,
struct btrfs_trans_handle *trans, u64 new_dirid,
struct btrfs_block_group_cache *block_group);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root, struct dentry *dentry,
u64 new_dirid, u64 alloc_hint);
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio, unsigned long bio_flags);
......
......@@ -53,10 +53,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
btrfs_root *extent_root, int all);
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
btrfs_root *extent_root, int all);
static struct btrfs_block_group_cache *
__btrfs_find_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache *hint,
u64 search_start, int data, int owner);
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int is_data);
......@@ -142,6 +138,8 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
break;
}
}
if (ret)
atomic_inc(&ret->count);
spin_unlock(&info->block_group_cache_lock);
return ret;
......@@ -318,6 +316,12 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
return cache;
}
static inline void put_block_group(struct btrfs_block_group_cache *cache)
{
if (atomic_dec_and_test(&cache->count))
kfree(cache);
}
static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
u64 flags)
{
......@@ -341,54 +345,16 @@ static u64 div_factor(u64 num, int factor)
return num;
}
static struct btrfs_block_group_cache *
__btrfs_find_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache *hint,
u64 search_start, int data, int owner)
u64 btrfs_find_block_group(struct btrfs_root *root,
u64 search_start, u64 search_hint, int owner)
{
struct btrfs_block_group_cache *cache;
struct btrfs_block_group_cache *found_group = NULL;
struct btrfs_fs_info *info = root->fs_info;
u64 used;
u64 last = 0;
u64 free_check;
u64 last = max(search_hint, search_start);
u64 group_start = 0;
int full_search = 0;
int factor = 10;
int factor = 9;
int wrapped = 0;
if (data & BTRFS_BLOCK_GROUP_METADATA)
factor = 9;
if (search_start) {
struct btrfs_block_group_cache *shint;
shint = btrfs_lookup_first_block_group(info, search_start);
if (shint && block_group_bits(shint, data)) {
spin_lock(&shint->lock);
used = btrfs_block_group_used(&shint->item);
if (used + shint->pinned + shint->reserved <
div_factor(shint->key.offset, factor)) {
spin_unlock(&shint->lock);
return shint;
}
spin_unlock(&shint->lock);
}
}
if (hint && block_group_bits(hint, data)) {
spin_lock(&hint->lock);
used = btrfs_block_group_used(&hint->item);
if (used + hint->pinned + hint->reserved <
div_factor(hint->key.offset, factor)) {
spin_unlock(&hint->lock);
return hint;
}
spin_unlock(&hint->lock);
last = hint->key.objectid + hint->key.offset;
} else {
if (hint)
last = max(hint->key.objectid, search_start);
else
last = search_start;
}
again:
while (1) {
cache = btrfs_lookup_first_block_group(root->fs_info, last);
......@@ -399,16 +365,18 @@ __btrfs_find_block_group(struct btrfs_root *root,
last = cache->key.objectid + cache->key.offset;
used = btrfs_block_group_used(&cache->item);
if (block_group_bits(cache, data)) {
free_check = div_factor(cache->key.offset, factor);
if ((full_search || !cache->ro) &&
block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
if (used + cache->pinned + cache->reserved <
free_check) {
found_group = cache;
div_factor(cache->key.offset, factor)) {
group_start = cache->key.objectid;
spin_unlock(&cache->lock);
put_block_group(cache);
goto found;
}
}
spin_unlock(&cache->lock);
put_block_group(cache);
cond_resched();
}
if (!wrapped) {
......@@ -423,18 +391,7 @@ __btrfs_find_block_group(struct btrfs_root *root,
goto again;
}
found:
return found_group;
}
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache
*hint, u64 search_start,
int data, int owner)
{
struct btrfs_block_group_cache *ret;
ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
return ret;
return group_start;
}
/* simple helper to search for an existing extent at a given offset */
......@@ -1809,6 +1766,19 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
return werr;
}
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
{
struct btrfs_block_group_cache *block_group;
int readonly = 0;
block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
if (!block_group || block_group->ro)
readonly = 1;
if (block_group)
put_block_group(block_group);
return readonly;
}
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
struct btrfs_space_info **space_info)
......@@ -1995,10 +1965,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
int ret;
ret = btrfs_add_free_space(cache, bytenr,
num_bytes);
if (ret)
return -1;
WARN_ON(ret);
}
}
put_block_group(cache);
total -= num_bytes;
bytenr += num_bytes;
}
......@@ -2008,12 +1978,16 @@ static int update_block_group(struct btrfs_trans_handle *trans,
static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
{
struct btrfs_block_group_cache *cache;
u64 bytenr;
cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
if (!cache)
return 0;
return cache->key.objectid;
bytenr = cache->key.objectid;
put_block_group(cache);
return bytenr;
}
int btrfs_update_pinned_extents(struct btrfs_root *root,
......@@ -2055,6 +2029,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
if (cache->cached)
btrfs_add_free_space(cache, bytenr, len);
}
put_block_group(cache);
bytenr += len;
num -= len;
}
......@@ -2085,6 +2060,7 @@ static int update_reserved_extents(struct btrfs_root *root,
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
put_block_group(cache);
bytenr += len;
num -= len;
}
......@@ -2724,6 +2700,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
cache = btrfs_lookup_block_group(root->fs_info, bytenr);
BUG_ON(!cache);
btrfs_add_free_space(cache, bytenr, num_bytes);
put_block_group(cache);
update_reserved_extents(root, bytenr, num_bytes, 0);
return 0;
}
......@@ -2928,6 +2905,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
}
new_group:
mutex_unlock(&block_group->alloc_mutex);
put_block_group(block_group);
block_group = NULL;
new_group_no_lock:
/* don't try to compare new allocations against the
* last allocation any more
......@@ -2997,6 +2976,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
block_group = list_entry(cur, struct btrfs_block_group_cache,
list);
atomic_inc(&block_group->count);
search_start = block_group->key.objectid;
cur = cur->next;
}
......@@ -3004,7 +2985,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
/* we found what we needed */
if (ins->objectid) {
if (!(data & BTRFS_BLOCK_GROUP_DATA))
trans->block_group = block_group;
trans->block_group = block_group->key.objectid;
if (last_ptr)
*last_ptr = ins->objectid + ins->offset;
......@@ -3015,6 +2996,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
loop, allowed_chunk_alloc);
ret = -ENOSPC;
}
if (block_group)
put_block_group(block_group);
up_read(&space_info->groups_sem);
return ret;
......@@ -3124,6 +3107,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
return -ENOSPC;
}
btrfs_add_free_space(cache, start, len);
put_block_group(cache);
update_reserved_extents(root, start, len, 0);
return 0;
}
......@@ -3288,6 +3272,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
ret = btrfs_remove_free_space(block_group, ins->objectid,
ins->offset);
BUG_ON(ret);
put_block_group(block_group);
ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
ref_generation, owner, ins);
return ret;
......@@ -5703,6 +5688,7 @@ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
WARN_ON(block_group->reserved > 0);
WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
spin_unlock(&block_group->lock);
put_block_group(block_group);
ret = 0;
out:
btrfs_free_path(path);
......@@ -5763,6 +5749,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
down_write(&block_group->space_info->groups_sem);
list_del(&block_group->list);
up_write(&block_group->space_info->groups_sem);
WARN_ON(atomic_read(&block_group->count) != 1);
kfree(block_group);
spin_lock(&info->block_group_cache_lock);
......@@ -5807,6 +5795,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
break;
}
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
mutex_init(&cache->alloc_mutex);
mutex_init(&cache->cache_mutex);
......@@ -5861,11 +5850,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->key.objectid = chunk_offset;
cache->key.offset = size;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
mutex_init(&cache->alloc_mutex);
mutex_init(&cache->cache_mutex);
INIT_LIST_HEAD(&cache->list);
btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
btrfs_set_block_group_used(&cache->item, bytes_used);
btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
......@@ -5926,10 +5916,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
block_group->space_info->full = 0;
/*
memset(shrink_block_group, 0, sizeof(*shrink_block_group));
kfree(shrink_block_group);
*/
put_block_group(block_group);
put_block_group(block_group);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
......
......@@ -989,7 +989,6 @@ static int run_delalloc_nocow(struct inode *inode, struct page *locked_page,
if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
struct btrfs_block_group_cache *block_group;
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
extent_end = found_key.offset +
btrfs_file_extent_num_bytes(leaf, fi);
......@@ -1007,9 +1006,7 @@ static int run_delalloc_nocow(struct inode *inode, struct page *locked_page,
goto out_check;
if (btrfs_cross_ref_exist(trans, root, disk_bytenr))
goto out_check;
block_group = btrfs_lookup_block_group(root->fs_info,
disk_bytenr);
if (!block_group || block_group->ro)
if (btrfs_extent_readonly(root, disk_bytenr))
goto out_check;
disk_bytenr += btrfs_file_extent_offset(leaf, fi);
nocow = 1;
......@@ -1969,16 +1966,11 @@ void btrfs_read_locked_inode(struct inode *inode)
rdev = btrfs_inode_rdev(leaf, inode_item);
BTRFS_I(inode)->index_cnt = (u64)-1;
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
alloc_group_block);
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
if (!BTRFS_I(inode)->block_group) {
BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
NULL, 0,
BTRFS_BLOCK_GROUP_METADATA, 0);
}
BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
alloc_group_block, 0);
btrfs_free_path(path);
inode_item = NULL;
......@@ -2048,8 +2040,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_inode_transid(leaf, item, trans->transid);
btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
btrfs_set_inode_block_group(leaf, item,
BTRFS_I(inode)->block_group->key.objectid);
btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
}
/*
......@@ -3358,14 +3349,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir,
const char *name, int name_len,
u64 ref_objectid,
u64 objectid,
struct btrfs_block_group_cache *group,
int mode, u64 *index)
u64 ref_objectid, u64 objectid,
u64 alloc_hint, int mode, u64 *index)
{
struct inode *inode;
struct btrfs_inode_item *inode_item;
struct btrfs_block_group_cache *new_inode_group;
struct btrfs_key *location;
struct btrfs_path *path;
struct btrfs_inode_ref *ref;
......@@ -3401,13 +3389,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
owner = 0;
else
owner = 1;
new_inode_group = btrfs_find_block_group(root, group, 0,
BTRFS_BLOCK_GROUP_METADATA, owner);
if (!new_inode_group) {
printk("find_block group failed\n");
new_inode_group = group;
}
BTRFS_I(inode)->block_group = new_inode_group;
BTRFS_I(inode)->block_group =
btrfs_find_block_group(root, 0, alloc_hint, owner);
key[0].objectid = objectid;
btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
......@@ -4366,16 +4349,16 @@ static void btrfs_truncate(struct inode *inode)
/*
* create a new subvolume directory/inode (helper for the ioctl).
*/
int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry,
struct btrfs_trans_handle *trans, u64 new_dirid,
struct btrfs_block_group_cache *block_group)
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root, struct dentry *dentry,
u64 new_dirid, u64 alloc_hint)
{
struct inode *inode;
int error;
u64 index = 0;
inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
new_dirid, block_group, S_IFDIR | 0700, &index);
new_dirid, alloc_hint, S_IFDIR | 0700, &index);
if (IS_ERR(inode))
return PTR_ERR(inode);
inode->i_op = &btrfs_dir_inode_operations;
......
......@@ -173,7 +173,7 @@ static noinline int create_subvol(struct btrfs_root *root,
trans = btrfs_start_transaction(new_root, 1);
BUG_ON(!trans);
ret = btrfs_create_subvol_root(new_root, dentry, trans, new_dirid,
ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid,
BTRFS_I(dir)->block_group);
if (ret)
goto fail;
......
......@@ -182,7 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
h->transaction = root->fs_info->running_transaction;
h->blocks_reserved = num_blocks;
h->blocks_used = 0;
h->block_group = NULL;
h->block_group = 0;
h->alloc_exclude_nr = 0;
h->alloc_exclude_start = 0;
root->fs_info->running_transaction->use_count++;
......
......@@ -41,7 +41,7 @@ struct btrfs_trans_handle {
unsigned long blocks_reserved;
unsigned long blocks_used;
struct btrfs_transaction *transaction;
struct btrfs_block_group_cache *block_group;
u64 block_group;
u64 alloc_exclude_start;
u64 alloc_exclude_nr;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment