Commit 291c7d2f authored by Josef Bacik's avatar Josef Bacik Committed by Chris Mason

Btrfs: wait on caching if we're loading the free space cache

We've been hitting panics when running xfstest 13 in a loop for long periods of
time.  And actually this problem has always existed so we've been hitting these
things randomly for a while.  Basically what happens is we get a thread coming
into the allocator and reading the space cache off of disk and adding the
entries to the free space cache as we go.  Then we get another thread that comes
in and tries to allocate from that block group.  Since block_group->cached !=
BTRFS_CACHE_NO it goes ahead and tries to do the allocation.  We do this because
if we're doing the old slow way of caching we don't want to hold people up and
wait for everything to finish.  The problem with this is we could end up
discarding the space cache at some arbitrary point in the future, which means we
could very well end up allocating space that is either bad, or when the real
caching happens it could end up thinking the space isn't in use when it really
is and cause all sorts of other problems.

The solution is to add a new flag to indicate we are loading the free space
cache from disk, and always try to cache the block group if cache->cached !=
BTRFS_CACHE_FINISHED.  That way if we are loading the space cache anybody else
who tries to allocate from the block group will have to wait until it's finished
to make sure it completes successfully.  Thanks,
Signed-off-by: default avatarJosef Bacik <josef@redhat.com>
parent 5bb14682
...@@ -848,7 +848,8 @@ struct btrfs_free_cluster { ...@@ -848,7 +848,8 @@ struct btrfs_free_cluster {
enum btrfs_caching_type { enum btrfs_caching_type {
BTRFS_CACHE_NO = 0, BTRFS_CACHE_NO = 0,
BTRFS_CACHE_STARTED = 1, BTRFS_CACHE_STARTED = 1,
BTRFS_CACHE_FINISHED = 2, BTRFS_CACHE_FAST = 2,
BTRFS_CACHE_FINISHED = 3,
}; };
enum btrfs_disk_cache_state { enum btrfs_disk_cache_state {
......
...@@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, ...@@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
struct btrfs_root *root, struct btrfs_root *root,
int load_cache_only) int load_cache_only)
{ {
DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl; struct btrfs_caching_control *caching_ctl;
int ret = 0; int ret = 0;
smp_mb(); caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
if (cache->cached != BTRFS_CACHE_NO) BUG_ON(!caching_ctl);
INIT_LIST_HEAD(&caching_ctl->list);
mutex_init(&caching_ctl->mutex);
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
caching_ctl->progress = cache->key.objectid;
atomic_set(&caching_ctl->count, 1);
caching_ctl->work.func = caching_thread;
spin_lock(&cache->lock);
/*
* This should be a rare occasion, but this could happen I think in the
* case where one thread starts to load the space cache info, and then
* some other thread starts a transaction commit which tries to do an
* allocation while the other thread is still loading the space cache
* info. The previous loop should have kept us from choosing this block
* group, but if we've moved to the state where we will wait on caching
* block groups we need to first check if we're doing a fast load here,
* so we can wait for it to finish, otherwise we could end up allocating
* from a block group who's cache gets evicted for one reason or
* another.
*/
while (cache->cached == BTRFS_CACHE_FAST) {
struct btrfs_caching_control *ctl;
ctl = cache->caching_ctl;
atomic_inc(&ctl->count);
prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&cache->lock);
schedule();
finish_wait(&ctl->wait, &wait);
put_caching_control(ctl);
spin_lock(&cache->lock);
}
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
kfree(caching_ctl);
return 0; return 0;
}
WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_FAST;
spin_unlock(&cache->lock);
/* /*
* We can't do the read from on-disk cache during a commit since we need * We can't do the read from on-disk cache during a commit since we need
...@@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, ...@@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
if (trans && (!trans->transaction->in_commit) && if (trans && (!trans->transaction->in_commit) &&
(root && root != root->fs_info->tree_root) && (root && root != root->fs_info->tree_root) &&
btrfs_test_opt(root, SPACE_CACHE)) { btrfs_test_opt(root, SPACE_CACHE)) {
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
return 0;
}
cache->cached = BTRFS_CACHE_STARTED;
spin_unlock(&cache->lock);
ret = load_free_space_cache(fs_info, cache); ret = load_free_space_cache(fs_info, cache);
spin_lock(&cache->lock); spin_lock(&cache->lock);
if (ret == 1) { if (ret == 1) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_FINISHED; cache->cached = BTRFS_CACHE_FINISHED;
cache->last_byte_to_unpin = (u64)-1; cache->last_byte_to_unpin = (u64)-1;
} else { } else {
if (load_cache_only) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_NO; cache->cached = BTRFS_CACHE_NO;
} else {
cache->cached = BTRFS_CACHE_STARTED;
}
} }
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
wake_up(&caching_ctl->wait);
if (ret == 1) { if (ret == 1) {
put_caching_control(caching_ctl);
free_excluded_extents(fs_info->extent_root, cache); free_excluded_extents(fs_info->extent_root, cache);
return 0; return 0;
} }
} } else {
/*
if (load_cache_only) * We are not going to do the fast caching, set cached to the
return 0; * appropriate value and wakeup any waiters.
*/
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
BUG_ON(!caching_ctl);
INIT_LIST_HEAD(&caching_ctl->list);
mutex_init(&caching_ctl->mutex);
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
caching_ctl->progress = cache->key.objectid;
/* one for caching kthread, one for caching block group list */
atomic_set(&caching_ctl->count, 2);
caching_ctl->work.func = caching_thread;
spin_lock(&cache->lock); spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) { if (load_cache_only) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_NO;
} else {
cache->cached = BTRFS_CACHE_STARTED;
}
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
kfree(caching_ctl); wake_up(&caching_ctl->wait);
}
if (load_cache_only) {
put_caching_control(caching_ctl);
return 0; return 0;
} }
cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_STARTED;
spin_unlock(&cache->lock);
down_write(&fs_info->extent_commit_sem); down_write(&fs_info->extent_commit_sem);
atomic_inc(&caching_ctl->count);
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->extent_commit_sem); up_write(&fs_info->extent_commit_sem);
...@@ -5177,13 +5218,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -5177,13 +5218,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
} }
have_block_group: have_block_group:
if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { cached = block_group_cache_done(block_group);
if (unlikely(!cached)) {
u64 free_percent; u64 free_percent;
found_uncached_bg = true;
ret = cache_block_group(block_group, trans, ret = cache_block_group(block_group, trans,
orig_root, 1); orig_root, 1);
if (block_group->cached == BTRFS_CACHE_FINISHED) if (block_group->cached == BTRFS_CACHE_FINISHED)
goto have_block_group; goto alloc;
free_percent = btrfs_block_group_used(&block_group->item); free_percent = btrfs_block_group_used(&block_group->item);
free_percent *= 100; free_percent *= 100;
...@@ -5205,7 +5248,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -5205,7 +5248,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
orig_root, 0); orig_root, 0);
BUG_ON(ret); BUG_ON(ret);
} }
found_uncached_bg = true;
/* /*
* If loop is set for cached only, try the next block * If loop is set for cached only, try the next block
...@@ -5215,10 +5257,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -5215,10 +5257,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
goto loop; goto loop;
} }
cached = block_group_cache_done(block_group); alloc:
if (unlikely(!cached))
found_uncached_bg = true;
if (unlikely(block_group->ro)) if (unlikely(block_group->ro))
goto loop; goto loop;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment