Commit d54b5c13 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "This contains a few fixups to the qgroup patches that were merged this
  dev cycle, unaligned access fix, blockgroup removal corner case fix
  and a small debugging output tweak"

* tag 'for-4.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: print-tree: debugging output enhancement
  btrfs: Fix race condition between delayed refs and blockgroup removal
  btrfs: fix unaligned access in readdir
  btrfs: Fix wrong btrfs_delalloc_release_extents parameter
  btrfs: delayed-inode: Remove wrong qgroup meta reservation calls
  btrfs: qgroup: Use independent and accurate per inode qgroup rsv
  btrfs: qgroup: Commit transaction in advance to reduce early EDQUOT
parents 37a535ed c0872323
...@@ -459,6 +459,25 @@ struct btrfs_block_rsv { ...@@ -459,6 +459,25 @@ struct btrfs_block_rsv {
unsigned short full; unsigned short full;
unsigned short type; unsigned short type;
unsigned short failfast; unsigned short failfast;
/*
* Qgroup equivalent for @size @reserved
*
* Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
* about things like csum size nor how many tree blocks it will need to
* reserve.
*
* Qgroup cares more about net change of the extent usage.
*
* So for one newly inserted file extent, in worst case it will cause
* leaf split and level increase, nodesize for each file extent is
* already too much.
*
* In short, qgroup_size/reserved is the upper limit of possible needed
* qgroup metadata reservation.
*/
u64 qgroup_rsv_size;
u64 qgroup_rsv_reserved;
}; };
/* /*
...@@ -714,6 +733,12 @@ struct btrfs_delayed_root; ...@@ -714,6 +733,12 @@ struct btrfs_delayed_root;
*/ */
#define BTRFS_FS_EXCL_OP 16 #define BTRFS_FS_EXCL_OP 16
/*
* To info transaction_kthread we need an immediate commit so it doesn't
* need to wait for commit_interval
*/
#define BTRFS_FS_NEED_ASYNC_COMMIT 17
struct btrfs_fs_info { struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE]; u8 fsid[BTRFS_FSID_SIZE];
u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
......
...@@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, ...@@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
dst_rsv = &fs_info->delayed_block_rsv; dst_rsv = &fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
/*
* Here we migrate space rsv from transaction rsv, since have already
* reserved space when starting a transaction. So no need to reserve
* qgroup space here.
*/
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
if (!ret) { if (!ret) {
trace_btrfs_space_reservation(fs_info, "delayed_item", trace_btrfs_space_reservation(fs_info, "delayed_item",
...@@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, ...@@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
return; return;
rsv = &fs_info->delayed_block_rsv; rsv = &fs_info->delayed_block_rsv;
btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved); /*
* Check btrfs_delayed_item_reserve_metadata() to see why we don't need
* to release/reserve qgroup space.
*/
trace_btrfs_space_reservation(fs_info, "delayed_item", trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid, item->bytes_reserved, item->key.objectid, item->bytes_reserved,
0); 0);
...@@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata( ...@@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
if (ret < 0)
return ret;
/* /*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction * btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we * which doesn't reserve space for speed. This is a problem since we
...@@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata( ...@@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata(
*/ */
if (!src_rsv || (!trans->bytes_reserved && if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
ret = btrfs_qgroup_reserve_meta_prealloc(root,
fs_info->nodesize, true);
if (ret < 0)
return ret;
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH); BTRFS_RESERVE_NO_FLUSH);
/* /*
...@@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata( ...@@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata(
"delayed_inode", "delayed_inode",
btrfs_ino(inode), btrfs_ino(inode),
num_bytes, 1); num_bytes, 1);
} else {
btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
} }
return ret; return ret;
} }
......
...@@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, ...@@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref, struct btrfs_delayed_ref_head *head_ref,
struct btrfs_qgroup_extent_record *qrecord, struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
int action, int is_data, int *qrecord_inserted_ret, int action, int is_data, int is_system,
int *qrecord_inserted_ret,
int *old_ref_mod, int *new_ref_mod) int *old_ref_mod, int *new_ref_mod)
{ {
struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
...@@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, ...@@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
head_ref->ref_mod = count_mod; head_ref->ref_mod = count_mod;
head_ref->must_insert_reserved = must_insert_reserved; head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data; head_ref->is_data = is_data;
head_ref->is_system = is_system;
head_ref->ref_tree = RB_ROOT; head_ref->ref_tree = RB_ROOT;
INIT_LIST_HEAD(&head_ref->ref_add_list); INIT_LIST_HEAD(&head_ref->ref_add_list);
RB_CLEAR_NODE(&head_ref->href_node); RB_CLEAR_NODE(&head_ref->href_node);
...@@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ...@@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL; struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted; int qrecord_inserted;
int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
BUG_ON(extent_op && extent_op->is_data); BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
...@@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ...@@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
*/ */
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, 0, 0, action, 0, bytenr, num_bytes, 0, 0, action, 0,
&qrecord_inserted, old_ref_mod, is_system, &qrecord_inserted,
new_ref_mod); old_ref_mod, new_ref_mod);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action); num_bytes, parent, ref_root, level, action);
...@@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, ...@@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
*/ */
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, ref_root, reserved, bytenr, num_bytes, ref_root, reserved,
action, 1, &qrecord_inserted, action, 1, 0, &qrecord_inserted,
old_ref_mod, new_ref_mod); old_ref_mod, new_ref_mod);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
...@@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, ...@@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs; delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
/*
* extent_ops just modify the flags of an extent and they don't result
* in ref count changes, hence it's safe to pass false/0 for is_system
* argument
*/
add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr, add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data, NULL, NULL, NULL); extent_op->is_data, 0, NULL, NULL, NULL);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
return 0; return 0;
......
...@@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head { ...@@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head {
*/ */
unsigned int must_insert_reserved:1; unsigned int must_insert_reserved:1;
unsigned int is_data:1; unsigned int is_data:1;
unsigned int is_system:1;
unsigned int processing:1; unsigned int processing:1;
}; };
......
...@@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg) ...@@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg)
now = get_seconds(); now = get_seconds();
if (cur->state < TRANS_STATE_BLOCKED && if (cur->state < TRANS_STATE_BLOCKED &&
!test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
(now < cur->start_time || (now < cur->start_time ||
now - cur->start_time < fs_info->commit_interval)) { now - cur->start_time < fs_info->commit_interval)) {
spin_unlock(&fs_info->trans_lock); spin_unlock(&fs_info->trans_lock);
......
...@@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, ...@@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
trace_run_delayed_ref_head(fs_info, head, 0); trace_run_delayed_ref_head(fs_info, head, 0);
if (head->total_ref_mod < 0) { if (head->total_ref_mod < 0) {
struct btrfs_block_group_cache *cache; struct btrfs_space_info *space_info;
u64 flags;
cache = btrfs_lookup_block_group(fs_info, head->bytenr); if (head->is_data)
ASSERT(cache); flags = BTRFS_BLOCK_GROUP_DATA;
percpu_counter_add(&cache->space_info->total_bytes_pinned, else if (head->is_system)
flags = BTRFS_BLOCK_GROUP_SYSTEM;
else
flags = BTRFS_BLOCK_GROUP_METADATA;
space_info = __find_space_info(fs_info, flags);
ASSERT(space_info);
percpu_counter_add(&space_info->total_bytes_pinned,
-head->num_bytes); -head->num_bytes);
btrfs_put_block_group(cache);
if (head->is_data) { if (head->is_data) {
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
...@@ -5559,14 +5565,18 @@ static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, ...@@ -5559,14 +5565,18 @@ static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv, struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes) struct btrfs_block_rsv *dest, u64 num_bytes,
u64 *qgroup_to_release_ret)
{ {
struct btrfs_space_info *space_info = block_rsv->space_info; struct btrfs_space_info *space_info = block_rsv->space_info;
u64 qgroup_to_release = 0;
u64 ret; u64 ret;
spin_lock(&block_rsv->lock); spin_lock(&block_rsv->lock);
if (num_bytes == (u64)-1) if (num_bytes == (u64)-1) {
num_bytes = block_rsv->size; num_bytes = block_rsv->size;
qgroup_to_release = block_rsv->qgroup_rsv_size;
}
block_rsv->size -= num_bytes; block_rsv->size -= num_bytes;
if (block_rsv->reserved >= block_rsv->size) { if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size; num_bytes = block_rsv->reserved - block_rsv->size;
...@@ -5575,6 +5585,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, ...@@ -5575,6 +5585,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
} else { } else {
num_bytes = 0; num_bytes = 0;
} }
if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
qgroup_to_release = block_rsv->qgroup_rsv_reserved -
block_rsv->qgroup_rsv_size;
block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
} else {
qgroup_to_release = 0;
}
spin_unlock(&block_rsv->lock); spin_unlock(&block_rsv->lock);
ret = num_bytes; ret = num_bytes;
...@@ -5597,6 +5614,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, ...@@ -5597,6 +5614,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
space_info_add_old_bytes(fs_info, space_info, space_info_add_old_bytes(fs_info, space_info,
num_bytes); num_bytes);
} }
if (qgroup_to_release_ret)
*qgroup_to_release_ret = qgroup_to_release;
return ret; return ret;
} }
...@@ -5738,17 +5757,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, ...@@ -5738,17 +5757,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv; struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 num_bytes = 0; u64 num_bytes = 0;
u64 qgroup_num_bytes = 0;
int ret = -ENOSPC; int ret = -ENOSPC;
spin_lock(&block_rsv->lock); spin_lock(&block_rsv->lock);
if (block_rsv->reserved < block_rsv->size) if (block_rsv->reserved < block_rsv->size)
num_bytes = block_rsv->size - block_rsv->reserved; num_bytes = block_rsv->size - block_rsv->reserved;
if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
qgroup_num_bytes = block_rsv->qgroup_rsv_size -
block_rsv->qgroup_rsv_reserved;
spin_unlock(&block_rsv->lock); spin_unlock(&block_rsv->lock);
if (num_bytes == 0) if (num_bytes == 0)
return 0; return 0;
ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
if (ret) if (ret)
return ret; return ret;
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
...@@ -5756,7 +5779,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, ...@@ -5756,7 +5779,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
block_rsv_add_bytes(block_rsv, num_bytes, 0); block_rsv_add_bytes(block_rsv, num_bytes, 0);
trace_btrfs_space_reservation(root->fs_info, "delalloc", trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), num_bytes, 1); btrfs_ino(inode), num_bytes, 1);
}
/* Don't forget to increase qgroup_rsv_reserved */
spin_lock(&block_rsv->lock);
block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
spin_unlock(&block_rsv->lock);
} else
btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
return ret; return ret;
} }
...@@ -5777,20 +5806,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) ...@@ -5777,20 +5806,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv; struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 released = 0; u64 released = 0;
u64 qgroup_to_release = 0;
/* /*
* Since we statically set the block_rsv->size we just want to say we * Since we statically set the block_rsv->size we just want to say we
* are releasing 0 bytes, and then we'll just get the reservation over * are releasing 0 bytes, and then we'll just get the reservation over
* the size free'd. * the size free'd.
*/ */
released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0); released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
&qgroup_to_release);
if (released > 0) if (released > 0)
trace_btrfs_space_reservation(fs_info, "delalloc", trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), released, 0); btrfs_ino(inode), released, 0);
if (qgroup_free) if (qgroup_free)
btrfs_qgroup_free_meta_prealloc(inode->root, released); btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
else else
btrfs_qgroup_convert_reserved_meta(inode->root, released); btrfs_qgroup_convert_reserved_meta(inode->root,
qgroup_to_release);
} }
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
...@@ -5802,7 +5834,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, ...@@ -5802,7 +5834,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
if (global_rsv == block_rsv || if (global_rsv == block_rsv ||
block_rsv->space_info != global_rsv->space_info) block_rsv->space_info != global_rsv->space_info)
global_rsv = NULL; global_rsv = NULL;
block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes); block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
} }
static void update_global_block_rsv(struct btrfs_fs_info *fs_info) static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
...@@ -5882,7 +5914,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) ...@@ -5882,7 +5914,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
static void release_global_block_rsv(struct btrfs_fs_info *fs_info) static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
{ {
block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
(u64)-1); (u64)-1, NULL);
WARN_ON(fs_info->trans_block_rsv.size > 0); WARN_ON(fs_info->trans_block_rsv.size > 0);
WARN_ON(fs_info->trans_block_rsv.reserved > 0); WARN_ON(fs_info->trans_block_rsv.reserved > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0); WARN_ON(fs_info->chunk_block_rsv.size > 0);
...@@ -5906,7 +5938,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) ...@@ -5906,7 +5938,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
WARN_ON_ONCE(!list_empty(&trans->new_bgs)); WARN_ON_ONCE(!list_empty(&trans->new_bgs));
block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
trans->chunk_bytes_reserved); trans->chunk_bytes_reserved, NULL);
trans->chunk_bytes_reserved = 0; trans->chunk_bytes_reserved = 0;
} }
...@@ -6011,6 +6043,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, ...@@ -6011,6 +6043,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
{ {
struct btrfs_block_rsv *block_rsv = &inode->block_rsv; struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 reserve_size = 0; u64 reserve_size = 0;
u64 qgroup_rsv_size = 0;
u64 csum_leaves; u64 csum_leaves;
unsigned outstanding_extents; unsigned outstanding_extents;
...@@ -6023,9 +6056,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, ...@@ -6023,9 +6056,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
inode->csum_bytes); inode->csum_bytes);
reserve_size += btrfs_calc_trans_metadata_size(fs_info, reserve_size += btrfs_calc_trans_metadata_size(fs_info,
csum_leaves); csum_leaves);
/*
* For qgroup rsv, the calculation is very simple:
* account one nodesize for each outstanding extent
*
* This is overestimating in most cases.
*/
qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
spin_lock(&block_rsv->lock); spin_lock(&block_rsv->lock);
block_rsv->size = reserve_size; block_rsv->size = reserve_size;
block_rsv->qgroup_rsv_size = qgroup_rsv_size;
spin_unlock(&block_rsv->lock); spin_unlock(&block_rsv->lock);
} }
...@@ -8403,7 +8444,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, ...@@ -8403,7 +8444,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv, u32 blocksize) struct btrfs_block_rsv *block_rsv, u32 blocksize)
{ {
block_rsv_add_bytes(block_rsv, blocksize, 0); block_rsv_add_bytes(block_rsv, blocksize, 0);
block_rsv_release_bytes(fs_info, block_rsv, NULL, 0); block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
} }
/* /*
......
...@@ -1748,7 +1748,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -1748,7 +1748,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
unlock_extent_cached(&BTRFS_I(inode)->io_tree, unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state); lockstart, lockend, &cached_state);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
(ret != 0)); true);
if (ret) { if (ret) {
btrfs_drop_pages(pages, num_pages); btrfs_drop_pages(pages, num_pages);
break; break;
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/iversion.h> #include <linux/iversion.h>
#include <asm/unaligned.h>
#include "ctree.h" #include "ctree.h"
#include "disk-io.h" #include "disk-io.h"
#include "transaction.h" #include "transaction.h"
...@@ -5905,11 +5906,13 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx) ...@@ -5905,11 +5906,13 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
struct dir_entry *entry = addr; struct dir_entry *entry = addr;
char *name = (char *)(entry + 1); char *name = (char *)(entry + 1);
ctx->pos = entry->offset; ctx->pos = get_unaligned(&entry->offset);
if (!dir_emit(ctx, name, entry->name_len, entry->ino, if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
entry->type)) get_unaligned(&entry->ino),
get_unaligned(&entry->type)))
return 1; return 1;
addr += sizeof(struct dir_entry) + entry->name_len; addr += sizeof(struct dir_entry) +
get_unaligned(&entry->name_len);
ctx->pos++; ctx->pos++;
} }
return 0; return 0;
...@@ -5999,14 +6002,15 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) ...@@ -5999,14 +6002,15 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
} }
entry = addr; entry = addr;
entry->name_len = name_len; put_unaligned(name_len, &entry->name_len);
name_ptr = (char *)(entry + 1); name_ptr = (char *)(entry + 1);
read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
name_len); name_len);
entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)],
&entry->type);
btrfs_dir_item_key_to_cpu(leaf, di, &location); btrfs_dir_item_key_to_cpu(leaf, di, &location);
entry->ino = location.objectid; put_unaligned(location.objectid, &entry->ino);
entry->offset = found_key.offset; put_unaligned(found_key.offset, &entry->offset);
entries++; entries++;
addr += sizeof(struct dir_entry) + name_len; addr += sizeof(struct dir_entry) + name_len;
total_len += sizeof(struct dir_entry) + name_len; total_len += sizeof(struct dir_entry) + name_len;
......
...@@ -189,9 +189,10 @@ void btrfs_print_leaf(struct extent_buffer *l) ...@@ -189,9 +189,10 @@ void btrfs_print_leaf(struct extent_buffer *l)
fs_info = l->fs_info; fs_info = l->fs_info;
nr = btrfs_header_nritems(l); nr = btrfs_header_nritems(l);
btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d", btrfs_info(fs_info,
btrfs_header_bytenr(l), nr, "leaf %llu gen %llu total ptrs %d free space %d owner %llu",
btrfs_leaf_free_space(fs_info, l)); btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
for (i = 0 ; i < nr ; i++) { for (i = 0 ; i < nr ; i++) {
item = btrfs_item_nr(i); item = btrfs_item_nr(i);
btrfs_item_key_to_cpu(l, &key, i); btrfs_item_key_to_cpu(l, &key, i);
...@@ -325,7 +326,7 @@ void btrfs_print_leaf(struct extent_buffer *l) ...@@ -325,7 +326,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
} }
} }
void btrfs_print_tree(struct extent_buffer *c) void btrfs_print_tree(struct extent_buffer *c, bool follow)
{ {
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
int i; u32 nr; int i; u32 nr;
...@@ -342,15 +343,19 @@ void btrfs_print_tree(struct extent_buffer *c) ...@@ -342,15 +343,19 @@ void btrfs_print_tree(struct extent_buffer *c)
return; return;
} }
btrfs_info(fs_info, btrfs_info(fs_info,
"node %llu level %d total ptrs %d free spc %u", "node %llu level %d gen %llu total ptrs %d free spc %u owner %llu",
btrfs_header_bytenr(c), level, nr, btrfs_header_bytenr(c), level, btrfs_header_generation(c),
(u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr); nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr,
btrfs_header_owner(c));
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
btrfs_node_key_to_cpu(c, &key, i); btrfs_node_key_to_cpu(c, &key, i);
pr_info("\tkey %d (%llu %u %llu) block %llu\n", pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n",
i, key.objectid, key.type, key.offset, i, key.objectid, key.type, key.offset,
btrfs_node_blockptr(c, i)); btrfs_node_blockptr(c, i),
btrfs_node_ptr_generation(c, i));
} }
if (!follow)
return;
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
struct btrfs_key first_key; struct btrfs_key first_key;
struct extent_buffer *next; struct extent_buffer *next;
...@@ -372,7 +377,7 @@ void btrfs_print_tree(struct extent_buffer *c) ...@@ -372,7 +377,7 @@ void btrfs_print_tree(struct extent_buffer *c)
if (btrfs_header_level(next) != if (btrfs_header_level(next) !=
level - 1) level - 1)
BUG(); BUG();
btrfs_print_tree(next); btrfs_print_tree(next, follow);
free_extent_buffer(next); free_extent_buffer(next);
} }
} }
...@@ -7,6 +7,6 @@ ...@@ -7,6 +7,6 @@
#define BTRFS_PRINT_TREE_H #define BTRFS_PRINT_TREE_H
void btrfs_print_leaf(struct extent_buffer *l); void btrfs_print_leaf(struct extent_buffer *l);
void btrfs_print_tree(struct extent_buffer *c); void btrfs_print_tree(struct extent_buffer *c, bool follow);
#endif #endif
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/btrfs.h> #include <linux/btrfs.h>
#include <linux/sizes.h>
#include "ctree.h" #include "ctree.h"
#include "transaction.h" #include "transaction.h"
...@@ -2375,8 +2376,21 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, ...@@ -2375,8 +2376,21 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) /*
* Two limits to commit transaction in advance.
*
* For RATIO, it will be 1/RATIO of the remaining limit
* (excluding data and prealloc meta) as threshold.
* For SIZE, it will be in byte unit as threshold.
*/
#define QGROUP_PERTRANS_RATIO 32
#define QGROUP_PERTRANS_SIZE SZ_32M
static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
const struct btrfs_qgroup *qg, u64 num_bytes)
{ {
u64 limit;
u64 threshold;
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
return false; return false;
...@@ -2385,6 +2399,31 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) ...@@ -2385,6 +2399,31 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
return false; return false;
/*
* Even if we passed the check, it's better to check if reservation
* for meta_pertrans is pushing us near limit.
* If there is too much pertrans reservation or it's near the limit,
* let's try commit transaction to free some, using transaction_kthread
*/
if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER |
BTRFS_QGROUP_LIMIT_MAX_EXCL))) {
if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
limit = qg->max_excl;
else
limit = qg->max_rfer;
threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] -
qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) /
QGROUP_PERTRANS_RATIO;
threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE);
/*
* Use transaction_kthread to commit transaction, so we no
* longer need to bother nested transaction nor lock context.
*/
if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold)
btrfs_commit_transaction_locksafe(fs_info);
}
return true; return true;
} }
...@@ -2434,7 +2473,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, ...@@ -2434,7 +2473,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
qg = unode_aux_to_qgroup(unode); qg = unode_aux_to_qgroup(unode);
if (enforce && !qgroup_check_limits(qg, num_bytes)) { if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) {
ret = -EDQUOT; ret = -EDQUOT;
goto out; goto out;
} }
......
...@@ -2267,6 +2267,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2267,6 +2267,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/ */
cur_trans->state = TRANS_STATE_COMPLETED; cur_trans->state = TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait); wake_up(&cur_trans->commit_wait);
clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
spin_lock(&fs_info->trans_lock); spin_lock(&fs_info->trans_lock);
list_del_init(&cur_trans->list); list_del_init(&cur_trans->list);
......
...@@ -199,6 +199,20 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); ...@@ -199,6 +199,20 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans); int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
int wait_for_unblock); int wait_for_unblock);
/*
* Try to commit transaction asynchronously, so this is safe to call
* even holding a spinlock.
*
* It's done by informing transaction_kthread to commit transaction without
* waiting for commit interval.
*/
static inline void btrfs_commit_transaction_locksafe(
struct btrfs_fs_info *fs_info)
{
set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
wake_up_process(fs_info->transaction_kthread);
}
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans); int btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
void btrfs_throttle(struct btrfs_fs_info *fs_info); void btrfs_throttle(struct btrfs_fs_info *fs_info);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment