Commit 0ca1f7ce authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason

Btrfs: Update metadata reservation for delayed allocation

Introduce metadata reservation context for delayed allocation
and update various related functions.

This patch also introduces EXTENT_FIRST_DELALLOC control bit for
set/clear_extent_bit. It tells set/clear_bit_hook whether they
are processing the first extent_state with EXTENT_DELALLOC bit
set. This change is important if set/clear_extent_bit involves
multiple extent_state.
Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent a22285a6
......@@ -137,8 +137,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
spinlock_t accounting_lock;
atomic_t outstanding_extents;
int reserved_extents;
int outstanding_extents;
/*
* ordered_data_close is set by truncate when a file that used
......
......@@ -2079,19 +2079,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items);
int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes);
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items, int *retries);
......@@ -2099,6 +2088,10 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,
......
This diff is collapsed.
......@@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree,
}
static int set_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->set_bit_hook) {
return tree->ops->set_bit_hook(tree->mapping->host,
state->start, state->end,
state->state, bits);
state, bits);
}
return 0;
}
static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
......@@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree,
*/
static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end,
int bits)
int *bits)
{
struct rb_node *node;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
int ret;
if (end < start) {
......@@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree,
if (ret)
return ret;
if (bits & EXTENT_DIRTY)
if (bits_to_set & EXTENT_DIRTY)
tree->dirty_bytes += end - start + 1;
state->state |= bits;
state->state |= bits_to_set;
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
......@@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
* struct is freed and removed from the tree
*/
static int clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state, int bits, int wake,
int delete)
struct extent_state *state,
int *bits, int wake)
{
int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING;
int bits_to_clear = *bits & ~EXTENT_CTLBITS;
int ret = state->state & bits_to_clear;
if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
WARN_ON(range > tree->dirty_bytes);
tree->dirty_bytes -= range;
......@@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree,
state->state &= ~bits_to_clear;
if (wake)
wake_up(&state->wq);
if (delete || state->state == 0) {
if (state->state == 0) {
if (state->tree) {
clear_state_cb(tree, state, state->state);
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
free_extent_state(state);
......@@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int set = 0;
int clear = 0;
if (delete)
bits |= ~EXTENT_CTLBITS;
bits |= EXTENT_FIRST_DELALLOC;
if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
clear = 1;
again:
......@@ -580,8 +581,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (err)
goto out;
if (state->end <= end) {
set |= clear_state_bit(tree, state, bits, wake,
delete);
set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
......@@ -602,7 +602,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (wake)
wake_up(&state->wq);
set |= clear_state_bit(tree, prealloc, bits, wake, delete);
set |= clear_state_bit(tree, prealloc, &bits, wake);
prealloc = NULL;
goto out;
......@@ -613,7 +613,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
else
next_node = NULL;
set |= clear_state_bit(tree, state, bits, wake, delete);
set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
......@@ -706,19 +706,19 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
static int set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
int bits)
int *bits)
{
int ret;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
ret = set_state_cb(tree, state, bits);
if (ret)
return ret;
if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
}
state->state |= bits;
state->state |= bits_to_set;
return 0;
}
......@@ -757,6 +757,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
bits |= EXTENT_FIRST_DELALLOC;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
......@@ -778,7 +779,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
*/
node = tree_search(tree, start);
if (!node) {
err = insert_state(tree, prealloc, start, end, bits);
err = insert_state(tree, prealloc, start, end, &bits);
prealloc = NULL;
BUG_ON(err == -EEXIST);
goto out;
......@@ -802,7 +803,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
goto out;
}
err = set_state_bits(tree, state, bits);
err = set_state_bits(tree, state, &bits);
if (err)
goto out;
......@@ -852,7 +853,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (err)
goto out;
if (state->end <= end) {
err = set_state_bits(tree, state, bits);
err = set_state_bits(tree, state, &bits);
if (err)
goto out;
cache_state(state, cached_state);
......@@ -877,7 +878,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
else
this_end = last_start - 1;
err = insert_state(tree, prealloc, start, this_end,
bits);
&bits);
BUG_ON(err == -EEXIST);
if (err) {
prealloc = NULL;
......@@ -903,7 +904,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
err = set_state_bits(tree, prealloc, bits);
err = set_state_bits(tree, prealloc, &bits);
if (err) {
prealloc = NULL;
goto out;
......@@ -966,8 +967,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0,
NULL, mask);
EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
}
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
......@@ -1435,9 +1435,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
if (op & EXTENT_CLEAR_DELALLOC)
clear_bits |= EXTENT_DELALLOC;
if (op & EXTENT_CLEAR_ACCOUNTING)
clear_bits |= EXTENT_DO_ACCOUNTING;
clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
......
......@@ -16,7 +16,9 @@
#define EXTENT_BOUNDARY (1 << 9)
#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11)
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/* flags for bio submission */
#define EXTENT_BIO_COMPRESSED 1
......@@ -69,10 +71,10 @@ struct extent_io_ops {
struct extent_state *state);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
unsigned long old, unsigned long bits);
int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
int *bits);
int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long bits);
int *bits);
int (*merge_extent_hook)(struct inode *inode,
struct extent_state *new,
struct extent_state *other);
......
......@@ -852,13 +852,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* do the reserve before the mutex lock in case we have to do some
* flushing. We wouldn't deadlock, but this is more polite.
*/
err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
if (err)
goto out_nolock;
mutex_lock(&inode->i_mutex);
current->backing_dev_info = inode->i_mapping->backing_dev_info;
......@@ -921,7 +914,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);
ret = btrfs_check_data_free_space(root, inode, write_bytes);
ret = btrfs_delalloc_reserve_space(inode, write_bytes);
if (ret)
goto out;
......@@ -929,26 +922,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
pos, first_index, last_index,
write_bytes);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
ret = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, buf);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_drop_pages(pages, num_pages);
goto out;
if (ret == 0) {
dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
}
ret = dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
btrfs_drop_pages(pages, num_pages);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
......@@ -975,9 +962,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
mutex_unlock(&inode->i_mutex);
if (ret)
err = ret;
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
out_nolock:
kfree(pages);
if (pinned[0])
page_cache_release(pinned[0]);
......
This diff is collapsed.
......@@ -587,19 +587,9 @@ static int btrfs_defrag_file(struct file *file,
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
BTRFS_I(inode)->force_compress = 1;
ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
if (ret) {
ret = -ENOSPC;
break;
}
ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
PAGE_CACHE_SIZE);
ret = -ENOSPC;
break;
}
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret)
goto err_unlock;
again:
if (inode->i_size == 0 ||
i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
......@@ -608,8 +598,10 @@ static int btrfs_defrag_file(struct file *file,
}
page = grab_cache_page(inode->i_mapping, i);
if (!page)
if (!page) {
ret = -ENOMEM;
goto err_reservations;
}
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
......@@ -617,6 +609,7 @@ static int btrfs_defrag_file(struct file *file,
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
ret = -EIO;
goto err_reservations;
}
}
......@@ -630,8 +623,7 @@ static int btrfs_defrag_file(struct file *file,
wait_on_page_writeback(page);
if (PageDirty(page)) {
btrfs_free_reserved_data_space(root, inode,
PAGE_CACHE_SIZE);
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto loop_unlock;
}
......@@ -669,7 +661,6 @@ static int btrfs_defrag_file(struct file *file,
page_cache_release(page);
mutex_unlock(&inode->i_mutex);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
i++;
}
......@@ -699,9 +690,9 @@ static int btrfs_defrag_file(struct file *file,
return 0;
err_reservations:
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
err_unlock:
mutex_unlock(&inode->i_mutex);
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
return ret;
}
......
......@@ -311,13 +311,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
spin_lock(&BTRFS_I(inode)->accounting_lock);
WARN_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->accounting_lock);
btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
inode, 1);
spin_lock(&root->fs_info->ordered_extent_lock);
list_del_init(&entry->root_extent_list);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment