Commit 5b0e95bf authored by Josef Bacik's avatar Josef Bacik

Btrfs: inline checksums into the disk free space cache

Yeah yeah I know this is how we used to do it and then I changed it, but damnit
I'm changing it back.  The fact is that writing out checksums will modify
metadata, which could cause us to dirty a block group we've already written out,
so we have to truncate it and all of it's checksums and re-write it which will
write new checksums which could dirty a blockg roup that has already been
written and you see where I'm going with this?  This can cause unmount or really
anything that depends on a transaction to commit to take it's sweet damned time
to happen.  So go back to the way it was, only this time we're specifically
setting NODATACOW because we can't go through the COW pathway anyway and we're
doing our own built-in cow'ing by truncating the free space cache.  The other
new thing is once we truncate the old cache and preallocate the new space, we
don't need to do that song and dance at all for the rest of the transaction, we
can just overwrite the existing space with the new cache if the block group
changes for whatever reason, and the NODATACOW will let us do this fine.  So
keep track of which transaction we last cleared our cache in and if we cleared
it in this transaction just say we're all setup and carry on.  This survives
xfstests and stress.sh.

The inode cache will continue to use the normal csum infrastructure since it
only gets written once and there will be no more modifications to the fs tree in
a transaction commit.
Signed-off-by: default avatarJosef Bacik <josef@redhat.com>
parent 9a82ca65
......@@ -838,6 +838,7 @@ struct btrfs_block_group_cache {
u64 bytes_super;
u64 flags;
u64 sectorsize;
u64 cache_generation;
unsigned int ro:1;
unsigned int dirty:1;
unsigned int iref:1;
......
......@@ -2717,6 +2717,13 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
goto again;
}
/* We've already setup this transaction, go ahead and exit */
if (block_group->cache_generation == trans->transid &&
i_size_read(inode)) {
dcs = BTRFS_DC_SETUP;
goto out_put;
}
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
......@@ -2756,19 +2763,16 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
num_pages *= 16;
num_pages *= PAGE_CACHE_SIZE;
ret = btrfs_delalloc_reserve_space(inode, num_pages);
ret = btrfs_check_data_free_space(inode, num_pages);
if (ret)
goto out_put;
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
num_pages, num_pages,
&alloc_hint);
if (!ret) {
if (!ret)
dcs = BTRFS_DC_SETUP;
btrfs_free_reserved_data_space(inode, num_pages);
} else {
btrfs_delalloc_release_space(inode, num_pages);
}
btrfs_free_reserved_data_space(inode, num_pages);
out_put:
iput(inode);
......@@ -2776,6 +2780,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
btrfs_release_path(path);
out:
spin_lock(&block_group->lock);
if (!ret)
block_group->cache_generation = trans->transid;
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
......
This diff is collapsed.
......@@ -1792,12 +1792,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
}
ret = 0;
out:
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
if (nolock) {
if (trans)
if (root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
if (trans) {
if (nolock)
btrfs_end_transaction_nolock(trans, root);
} else {
if (trans)
else
btrfs_end_transaction(trans, root);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment