Commit 0019f10d authored by Josef Bacik's avatar Josef Bacik

Btrfs: re-work delalloc flushing

Currently we try and flush delalloc, but we only do that in a sort of weak way,
which works fine in most cases but if we're under heavy pressure we need to be
able to wait for flushing to happen.  Also instead of checking the bytes
reserved in the block_rsv, check the space info since it is more accurate.  The
sync option will be used in a future patch.
Signed-off-by: default avatarJosef Bacik <josef@redhat.com>
parent 6d48755d
......@@ -2376,7 +2376,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
int sync);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_writepages(struct address_space *mapping,
......
......@@ -3111,9 +3111,10 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 to_reclaim)
struct btrfs_root *root, u64 to_reclaim, int sync)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
......@@ -3122,9 +3123,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
int ret;
block_rsv = &root->fs_info->delalloc_block_rsv;
spin_lock(&block_rsv->lock);
reserved = block_rsv->reserved;
spin_unlock(&block_rsv->lock);
space_info = block_rsv->space_info;
spin_lock(&space_info->lock);
reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock);
if (reserved == 0)
return 0;
......@@ -3132,7 +3134,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
max_reclaim = min(reserved, to_reclaim);
while (1) {
ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0, sync);
if (!ret) {
if (no_reclaim > 2)
break;
......@@ -3147,11 +3149,11 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
pause = 1;
}
spin_lock(&block_rsv->lock);
if (reserved > block_rsv->reserved)
reclaimed = reserved - block_rsv->reserved;
reserved = block_rsv->reserved;
spin_unlock(&block_rsv->lock);
spin_lock(&space_info->lock);
if (reserved > space_info->bytes_reserved)
reclaimed += reserved - space_info->bytes_reserved;
reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock);
if (reserved == 0 || reclaimed >= max_reclaim)
break;
......@@ -3180,7 +3182,7 @@ static int should_retry_reserve(struct btrfs_trans_handle *trans,
if (trans && trans->transaction->in_commit)
return -ENOSPC;
ret = shrink_delalloc(trans, root, num_bytes);
ret = shrink_delalloc(trans, root, num_bytes, 0);
if (ret)
return ret;
......@@ -3729,7 +3731,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
block_rsv_add_bytes(block_rsv, to_reserve, 1);
if (block_rsv->size > 512 * 1024 * 1024)
shrink_delalloc(NULL, root, to_reserve);
shrink_delalloc(NULL, root, to_reserve, 0);
return 0;
}
......
......@@ -6603,7 +6603,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return 0;
}
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
int sync)
{
struct btrfs_inode *binode;
struct inode *inode = NULL;
......@@ -6625,7 +6626,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
write_inode_now(inode, 0);
if (sync) {
filemap_write_and_wait(inode->i_mapping);
/*
* We have to do this because compression doesn't
* actually set PG_writeback until it submits the pages
* for IO, which happens in an async thread, so we could
* race and not actually wait for any writeback pages
* because they've not been submitted yet. Technically
* this could still be the case for the ordered stuff
* since the async thread may not have started to do its
* work yet. If this becomes the case then we need to
* figure out a way to make sure that in writepage we
* wait for any async pages to be submitted before
* returning so that fdatawait does what its supposed to
* do.
*/
btrfs_wait_ordered_range(inode, 0, (u64)-1);
} else {
filemap_flush(inode->i_mapping);
}
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment