Commit e2d1f923 authored by Dongsheng Yang's avatar Dongsheng Yang Committed by Chris Mason

btrfs: qgroup: do a reservation in a higher level.

There are two problems in qgroup:

a). The PAGE_CACHE is 4K, even when we are writing a data of 1K,
qgroup will reserve a 4K size. It will cause the last 3K in a qgroup
is not available to user.

b). When user is writing a inline data, qgroup will not reserve it,
it means this is a window we can exceed the limit of a qgroup.

The main idea of this patch is reserving the data size of write_bytes
rather than the reserve_bytes. It means qgroup will not care about
the data size btrfs will reserve for user, but only care about the
data size user is going to write. Then reserve it when user want to
write and release it in transaction committed.

In this way, qgroup can be released from the complex procedure in
btrfs and only do the reserve when user want to write and account
when the data is written in commit_transaction().
Signed-off-by: default avatarDongsheng Yang <yangds.fnst@cn.fujitsu.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 237c0e9f
...@@ -3448,7 +3448,7 @@ enum btrfs_reserve_flush_enum { ...@@ -3448,7 +3448,7 @@ enum btrfs_reserve_flush_enum {
BTRFS_RESERVE_FLUSH_ALL, BTRFS_RESERVE_FLUSH_ALL,
}; };
int btrfs_check_data_free_space(struct inode *inode, u64 bytes); int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
......
...@@ -3331,7 +3331,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, ...@@ -3331,7 +3331,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
num_pages *= 16; num_pages *= 16;
num_pages *= PAGE_CACHE_SIZE; num_pages *= PAGE_CACHE_SIZE;
ret = btrfs_check_data_free_space(inode, num_pages); ret = btrfs_check_data_free_space(inode, num_pages, num_pages);
if (ret) if (ret)
goto out_put; goto out_put;
...@@ -3851,7 +3851,7 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) ...@@ -3851,7 +3851,7 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
* This will check the space that the inode allocates from to make sure we have * This will check the space that the inode allocates from to make sure we have
* enough space for bytes. * enough space for bytes.
*/ */
int btrfs_check_data_free_space(struct inode *inode, u64 bytes) int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
{ {
struct btrfs_space_info *data_sinfo; struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
...@@ -3969,7 +3969,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) ...@@ -3969,7 +3969,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
data_sinfo->flags, bytes, 1); data_sinfo->flags, bytes, 1);
return -ENOSPC; return -ENOSPC;
} }
ret = btrfs_qgroup_reserve(root, bytes); ret = btrfs_qgroup_reserve(root, write_bytes);
if (ret) if (ret)
goto out; goto out;
data_sinfo->bytes_may_use += bytes; data_sinfo->bytes_may_use += bytes;
...@@ -3995,7 +3995,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) ...@@ -3995,7 +3995,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
data_sinfo = root->fs_info->data_sinfo; data_sinfo = root->fs_info->data_sinfo;
spin_lock(&data_sinfo->lock); spin_lock(&data_sinfo->lock);
WARN_ON(data_sinfo->bytes_may_use < bytes); WARN_ON(data_sinfo->bytes_may_use < bytes);
btrfs_qgroup_free(root, bytes);
data_sinfo->bytes_may_use -= bytes; data_sinfo->bytes_may_use -= bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info", trace_btrfs_space_reservation(root->fs_info, "space_info",
data_sinfo->flags, bytes, 0); data_sinfo->flags, bytes, 0);
...@@ -5243,8 +5242,6 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root, ...@@ -5243,8 +5242,6 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
u64 qgroup_reserved) u64 qgroup_reserved)
{ {
btrfs_block_rsv_release(root, rsv, (u64)-1); btrfs_block_rsv_release(root, rsv, (u64)-1);
if (qgroup_reserved)
btrfs_qgroup_free(root, qgroup_reserved);
} }
/** /**
...@@ -5478,11 +5475,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -5478,11 +5475,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
to_free = 0; to_free = 0;
} }
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
if (dropped) { if (dropped)
if (root->fs_info->quota_enabled)
btrfs_qgroup_free(root, dropped * root->nodesize);
to_free += btrfs_calc_trans_metadata_size(root, dropped); to_free += btrfs_calc_trans_metadata_size(root, dropped);
}
if (to_free) { if (to_free) {
btrfs_block_rsv_release(root, block_rsv, to_free); btrfs_block_rsv_release(root, block_rsv, to_free);
...@@ -5524,9 +5518,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) ...@@ -5524,9 +5518,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
trace_btrfs_space_reservation(root->fs_info, "delalloc", trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_free, 0); btrfs_ino(inode), to_free, 0);
if (root->fs_info->quota_enabled) {
btrfs_qgroup_free(root, dropped * root->nodesize);
}
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free); to_free);
...@@ -5551,7 +5542,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) ...@@ -5551,7 +5542,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
{ {
int ret; int ret;
ret = btrfs_check_data_free_space(inode, num_bytes); ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes);
if (ret) if (ret)
return ret; return ret;
...@@ -5727,12 +5718,8 @@ static int pin_down_extent(struct btrfs_root *root, ...@@ -5727,12 +5718,8 @@ static int pin_down_extent(struct btrfs_root *root,
set_extent_dirty(root->fs_info->pinned_extents, bytenr, set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
if (reserved) { if (reserved)
btrfs_qgroup_update_reserved_bytes(root->fs_info,
root->root_key.objectid,
num_bytes, -1);
trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
}
return 0; return 0;
} }
...@@ -6470,9 +6457,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, ...@@ -6470,9 +6457,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_put_block_group(cache); btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, buf->start, buf->len); trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0; pin = 0;
btrfs_qgroup_update_reserved_bytes(root->fs_info,
root->root_key.objectid,
buf->len, -1);
} }
out: out:
if (pin) if (pin)
...@@ -7205,9 +7189,6 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, ...@@ -7205,9 +7189,6 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
ret = btrfs_discard_extent(root, start, len, NULL); ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len); btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
btrfs_qgroup_update_reserved_bytes(root->fs_info,
root->root_key.objectid,
len, -1);
} }
btrfs_put_block_group(cache); btrfs_put_block_group(cache);
...@@ -7446,9 +7427,6 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, ...@@ -7446,9 +7427,6 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
BUG_ON(ret); /* logic error */ BUG_ON(ret); /* logic error */
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1); 0, owner, offset, ins, 1);
btrfs_qgroup_update_reserved_bytes(root->fs_info,
root->root_key.objectid,
ins->offset, 1);
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
return ret; return ret;
} }
...@@ -7595,10 +7573,6 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, ...@@ -7595,10 +7573,6 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
return ERR_PTR(ret); return ERR_PTR(ret);
} }
btrfs_qgroup_update_reserved_bytes(root->fs_info,
root_objectid,
ins.offset, 1);
buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
BUG_ON(IS_ERR(buf)); /* -ENOMEM */ BUG_ON(IS_ERR(buf)); /* -ENOMEM */
......
...@@ -1510,7 +1510,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -1510,7 +1510,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
} }
reserve_bytes = num_pages << PAGE_CACHE_SHIFT; reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
ret = btrfs_check_data_free_space(inode, reserve_bytes); ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes);
if (ret == -ENOSPC && if (ret == -ENOSPC &&
(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC))) { BTRFS_INODE_PREALLOC))) {
...@@ -2573,7 +2573,7 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -2573,7 +2573,7 @@ static long btrfs_fallocate(struct file *file, int mode,
* Make sure we have enough space before we do the * Make sure we have enough space before we do the
* allocation. * allocation.
*/ */
ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start);
if (ret) if (ret)
return ret; return ret;
......
...@@ -754,9 +754,6 @@ static noinline void submit_compressed_extents(struct inode *inode, ...@@ -754,9 +754,6 @@ static noinline void submit_compressed_extents(struct inode *inode,
} }
goto out_free; goto out_free;
} }
btrfs_qgroup_update_reserved_bytes(root->fs_info,
root->root_key.objectid,
ins.offset, 1);
/* /*
* here we're doing allocation and writeback of the * here we're doing allocation and writeback of the
* compressed pages * compressed pages
...@@ -981,10 +978,6 @@ static noinline int cow_file_range(struct inode *inode, ...@@ -981,10 +978,6 @@ static noinline int cow_file_range(struct inode *inode,
if (ret < 0) if (ret < 0)
goto out_unlock; goto out_unlock;
btrfs_qgroup_update_reserved_bytes(root->fs_info,
root->root_key.objectid,
ins.offset, 1);
em = alloc_extent_map(); em = alloc_extent_map();
if (!em) { if (!em) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -7037,10 +7030,6 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, ...@@ -7037,10 +7030,6 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
return ERR_PTR(ret); return ERR_PTR(ret);
} }
btrfs_qgroup_update_reserved_bytes(root->fs_info,
root->root_key.objectid,
ins.offset, 1);
return em; return em;
} }
...@@ -9595,10 +9584,6 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, ...@@ -9595,10 +9584,6 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
break; break;
} }
btrfs_qgroup_update_reserved_bytes(root->fs_info,
root->root_key.objectid,
ins.offset, 1);
btrfs_drop_extent_cache(inode, cur_offset, btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + ins.offset -1, 0); cur_offset + ins.offset -1, 0);
......
...@@ -72,7 +72,6 @@ struct btrfs_qgroup { ...@@ -72,7 +72,6 @@ struct btrfs_qgroup {
/* /*
* reservation tracking * reservation tracking
*/ */
u64 may_use;
u64 reserved; u64 reserved;
/* /*
...@@ -2383,67 +2382,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, ...@@ -2383,67 +2382,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info,
u64 ref_root,
u64 num_bytes,
int sign)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
struct ulist_node *unode;
struct ulist_iterator uiter;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
return 0;
if (num_bytes == 0)
return 0;
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
if (!quota_root)
goto out;
qgroup = find_qgroup_rb(fs_info, ref_root);
if (!qgroup)
goto out;
ulist_reinit(fs_info->qgroup_ulist);
ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
(uintptr_t)qgroup, GFP_ATOMIC);
if (ret < 0)
goto out;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
struct btrfs_qgroup *qg;
struct btrfs_qgroup_list *glist;
qg = u64_to_ptr(unode->aux);
qg->reserved += sign * num_bytes;
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
glist->group->qgroupid,
(uintptr_t)glist->group, GFP_ATOMIC);
if (ret < 0)
goto out;
}
}
out:
spin_unlock(&fs_info->qgroup_lock);
return ret;
}
/*
* reserve some space for a qgroup and all its parents. The reservation takes
* place with start_transaction or dealloc_reserve, similar to ENOSPC
* accounting. If not enough space is available, EDQUOT is returned.
* We assume that the requested space is new for all qgroups.
*/
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
{ {
struct btrfs_root *quota_root; struct btrfs_root *quota_root;
...@@ -2486,14 +2424,14 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) ...@@ -2486,14 +2424,14 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
qg = u64_to_ptr(unode->aux); qg = u64_to_ptr(unode->aux);
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
qg->reserved + qg->may_use + (s64)qg->rfer + num_bytes > qg->reserved + (s64)qg->rfer + num_bytes >
qg->max_rfer) { qg->max_rfer) {
ret = -EDQUOT; ret = -EDQUOT;
goto out; goto out;
} }
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
qg->reserved + qg->may_use + (s64)qg->excl + num_bytes > qg->reserved + (s64)qg->excl + num_bytes >
qg->max_excl) { qg->max_excl) {
ret = -EDQUOT; ret = -EDQUOT;
goto out; goto out;
...@@ -2517,7 +2455,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) ...@@ -2517,7 +2455,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
qg = u64_to_ptr(unode->aux); qg = u64_to_ptr(unode->aux);
qg->may_use += num_bytes; qg->reserved += num_bytes;
} }
out: out:
...@@ -2563,7 +2501,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) ...@@ -2563,7 +2501,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
qg = u64_to_ptr(unode->aux); qg = u64_to_ptr(unode->aux);
qg->may_use -= num_bytes; qg->reserved -= num_bytes;
list_for_each_entry(glist, &qg->groups, next_group) { list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist, ret = ulist_add(fs_info->qgroup_ulist,
......
...@@ -94,10 +94,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, ...@@ -94,10 +94,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit); struct btrfs_qgroup_inherit *inherit);
int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info,
u64 ref_root,
u64 num_bytes,
int sign);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
......
...@@ -3027,7 +3027,7 @@ int prealloc_file_extent_cluster(struct inode *inode, ...@@ -3027,7 +3027,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
ret = btrfs_check_data_free_space(inode, cluster->end + ret = btrfs_check_data_free_space(inode, cluster->end +
1 - cluster->start); 1 - cluster->start, 0);
if (ret) if (ret)
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment