Commit a79b7d4b authored by Chris Mason's avatar Chris Mason

Btrfs: async delayed refs

Delayed extent operations are triggered during transaction commits.
The goal is to queue up a healthly batch of changes to the extent
allocation tree and run through them in bulk.

This farms them off to async helper threads.  The goal is to have the
bulk of the delayed operations being done in the background, but this is
also important to limit our stack footprint.
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 40f76580
...@@ -1546,6 +1546,9 @@ struct btrfs_fs_info { ...@@ -1546,6 +1546,9 @@ struct btrfs_fs_info {
*/ */
struct btrfs_workqueue *fixup_workers; struct btrfs_workqueue *fixup_workers;
struct btrfs_workqueue *delayed_workers; struct btrfs_workqueue *delayed_workers;
/* the extent workers do delayed refs on the extent allocation tree */
struct btrfs_workqueue *extent_workers;
struct task_struct *transaction_kthread; struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread; struct task_struct *cleaner_kthread;
int thread_pool_size; int thread_pool_size;
...@@ -3268,6 +3271,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, ...@@ -3268,6 +3271,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
void btrfs_put_block_group(struct btrfs_block_group_cache *cache); void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count); struct btrfs_root *root, unsigned long count);
int btrfs_async_run_delayed_refs(struct btrfs_root *root,
unsigned long count, int wait);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, struct btrfs_root *root, u64 bytenr,
......
...@@ -2069,6 +2069,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) ...@@ -2069,6 +2069,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->readahead_workers);
btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_destroy_workqueue(fs_info->flush_workers);
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
btrfs_destroy_workqueue(fs_info->extent_workers);
} }
static void free_root_extent_buffers(struct btrfs_root *root) static void free_root_extent_buffers(struct btrfs_root *root)
...@@ -2586,6 +2587,10 @@ int open_ctree(struct super_block *sb, ...@@ -2586,6 +2587,10 @@ int open_ctree(struct super_block *sb,
btrfs_alloc_workqueue("readahead", flags, max_active, 2); btrfs_alloc_workqueue("readahead", flags, max_active, 2);
fs_info->qgroup_rescan_workers = fs_info->qgroup_rescan_workers =
btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
fs_info->extent_workers =
btrfs_alloc_workqueue("extent-refs", flags,
min_t(u64, fs_devices->num_devices,
max_active), 8);
if (!(fs_info->workers && fs_info->delalloc_workers && if (!(fs_info->workers && fs_info->delalloc_workers &&
fs_info->submit_workers && fs_info->flush_workers && fs_info->submit_workers && fs_info->flush_workers &&
...@@ -2595,6 +2600,7 @@ int open_ctree(struct super_block *sb, ...@@ -2595,6 +2600,7 @@ int open_ctree(struct super_block *sb,
fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers && fs_info->caching_workers && fs_info->readahead_workers &&
fs_info->fixup_workers && fs_info->delayed_workers && fs_info->fixup_workers && fs_info->delayed_workers &&
fs_info->fixup_workers && fs_info->extent_workers &&
fs_info->qgroup_rescan_workers)) { fs_info->qgroup_rescan_workers)) {
err = -ENOMEM; err = -ENOMEM;
goto fail_sb_buffer; goto fail_sb_buffer;
......
...@@ -2674,15 +2674,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, ...@@ -2674,15 +2674,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
u64 num_entries = u64 num_entries =
atomic_read(&trans->transaction->delayed_refs.num_entries); atomic_read(&trans->transaction->delayed_refs.num_entries);
u64 avg_runtime; u64 avg_runtime;
u64 val;
smp_mb(); smp_mb();
avg_runtime = fs_info->avg_delayed_ref_runtime; avg_runtime = fs_info->avg_delayed_ref_runtime;
val = num_entries * avg_runtime;
if (num_entries * avg_runtime >= NSEC_PER_SEC) if (num_entries * avg_runtime >= NSEC_PER_SEC)
return 1; return 1;
if (val >= NSEC_PER_SEC / 2)
return 2;
return btrfs_check_space_for_delayed_refs(trans, root); return btrfs_check_space_for_delayed_refs(trans, root);
} }
struct async_delayed_refs {
struct btrfs_root *root;
int count;
int error;
int sync;
struct completion wait;
struct btrfs_work work;
};
static void delayed_ref_async_start(struct btrfs_work *work)
{
struct async_delayed_refs *async;
struct btrfs_trans_handle *trans;
int ret;
async = container_of(work, struct async_delayed_refs, work);
trans = btrfs_join_transaction(async->root);
if (IS_ERR(trans)) {
async->error = PTR_ERR(trans);
goto done;
}
/*
* trans->sync means that when we call end_transaciton, we won't
* wait on delayed refs
*/
trans->sync = true;
ret = btrfs_run_delayed_refs(trans, async->root, async->count);
if (ret)
async->error = ret;
ret = btrfs_end_transaction(trans, async->root);
if (ret && !async->error)
async->error = ret;
done:
if (async->sync)
complete(&async->wait);
else
kfree(async);
}
int btrfs_async_run_delayed_refs(struct btrfs_root *root,
unsigned long count, int wait)
{
struct async_delayed_refs *async;
int ret;
async = kmalloc(sizeof(*async), GFP_NOFS);
if (!async)
return -ENOMEM;
async->root = root->fs_info->tree_root;
async->count = count;
async->error = 0;
if (wait)
async->sync = 1;
else
async->sync = 0;
init_completion(&async->wait);
btrfs_init_work(&async->work, delayed_ref_async_start,
NULL, NULL);
btrfs_queue_work(root->fs_info->extent_workers, &async->work);
if (wait) {
wait_for_completion(&async->wait);
ret = async->error;
kfree(async);
return ret;
}
return 0;
}
/* /*
* this starts processing the delayed reference count updates and * this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be * extent insertions we have queued up so far. count can be
......
...@@ -2678,6 +2678,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ...@@ -2678,6 +2678,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans = NULL; trans = NULL;
goto out_unlock; goto out_unlock;
} }
trans->block_rsv = &root->fs_info->delalloc_block_rsv; trans->block_rsv = &root->fs_info->delalloc_block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
......
...@@ -697,6 +697,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -697,6 +697,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
unsigned long cur = trans->delayed_ref_updates; unsigned long cur = trans->delayed_ref_updates;
int lock = (trans->type != TRANS_JOIN_NOLOCK); int lock = (trans->type != TRANS_JOIN_NOLOCK);
int err = 0; int err = 0;
int must_run_delayed_refs = 0;
if (trans->use_count > 1) { if (trans->use_count > 1) {
trans->use_count--; trans->use_count--;
...@@ -711,10 +712,18 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -711,10 +712,18 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_create_pending_block_groups(trans, root); btrfs_create_pending_block_groups(trans, root);
trans->delayed_ref_updates = 0; trans->delayed_ref_updates = 0;
if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) { if (!trans->sync) {
must_run_delayed_refs =
btrfs_should_throttle_delayed_refs(trans, root);
cur = max_t(unsigned long, cur, 32); cur = max_t(unsigned long, cur, 32);
trans->delayed_ref_updates = 0;
btrfs_run_delayed_refs(trans, root, cur); /*
* don't make the caller wait if they are from a NOLOCK
* or ATTACH transaction, it will deadlock with commit
*/
if (must_run_delayed_refs == 1 &&
(trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH)))
must_run_delayed_refs = 2;
} }
if (trans->qgroup_reserved) { if (trans->qgroup_reserved) {
...@@ -775,6 +784,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -775,6 +784,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
assert_qgroups_uptodate(trans); assert_qgroups_uptodate(trans);
kmem_cache_free(btrfs_trans_handle_cachep, trans); kmem_cache_free(btrfs_trans_handle_cachep, trans);
if (must_run_delayed_refs) {
btrfs_async_run_delayed_refs(root, cur,
must_run_delayed_refs == 1);
}
return err; return err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment