Commit efa56464 authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason

Btrfs: Pre-allocate space for data relocation

Pre-allocate space for data relocation. This can detect ENOPSC
condition caused by fragmentation of free space.
Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 4a500fd1
...@@ -2420,6 +2420,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t size); ...@@ -2420,6 +2420,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t size);
int btrfs_invalidate_inodes(struct btrfs_root *root); int btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode); void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_root *root); void btrfs_run_delayed_iputs(struct btrfs_root *root);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
extern const struct dentry_operations btrfs_dentry_operations; extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */ /* ioctl.c */
......
...@@ -1175,6 +1175,13 @@ static noinline int run_delalloc_nocow(struct inode *inode, ...@@ -1175,6 +1175,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
num_bytes, num_bytes, type); num_bytes, num_bytes, type);
BUG_ON(ret); BUG_ON(ret);
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, cur_offset,
num_bytes);
BUG_ON(ret);
}
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
cur_offset, cur_offset + num_bytes - 1, cur_offset, cur_offset + num_bytes - 1,
locked_page, EXTENT_CLEAR_UNLOCK_PAGE | locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
...@@ -6080,16 +6087,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, ...@@ -6080,16 +6087,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
return err; return err;
} }
static int prealloc_file_range(struct inode *inode, u64 start, u64 end, int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 alloc_hint, int mode, loff_t actual_len) u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint)
{ {
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins; struct btrfs_key ins;
u64 cur_offset = start; u64 cur_offset = start;
u64 num_bytes = end - start;
int ret = 0; int ret = 0;
u64 i_size;
while (num_bytes > 0) { while (num_bytes > 0) {
trans = btrfs_start_transaction(root, 3); trans = btrfs_start_transaction(root, 3);
...@@ -6098,9 +6104,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, ...@@ -6098,9 +6104,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
break; break;
} }
ret = btrfs_reserve_extent(trans, root, num_bytes, ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
root->sectorsize, 0, alloc_hint, 0, *alloc_hint, (u64)-1, &ins, 1);
(u64)-1, &ins, 1);
if (ret) { if (ret) {
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
break; break;
...@@ -6117,20 +6122,19 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, ...@@ -6117,20 +6122,19 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
num_bytes -= ins.offset; num_bytes -= ins.offset;
cur_offset += ins.offset; cur_offset += ins.offset;
alloc_hint = ins.objectid + ins.offset; *alloc_hint = ins.objectid + ins.offset;
inode->i_ctime = CURRENT_TIME; inode->i_ctime = CURRENT_TIME;
BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
if (!(mode & FALLOC_FL_KEEP_SIZE) && if (!(mode & FALLOC_FL_KEEP_SIZE) &&
(actual_len > inode->i_size) && (actual_len > inode->i_size) &&
(cur_offset > inode->i_size)) { (cur_offset > inode->i_size)) {
if (cur_offset > actual_len) if (cur_offset > actual_len)
i_size = actual_len; i_size_write(inode, actual_len);
else else
i_size = cur_offset; i_size_write(inode, cur_offset);
i_size_write(inode, i_size); i_size_write(inode, cur_offset);
btrfs_ordered_update_i_size(inode, i_size, NULL); btrfs_ordered_update_i_size(inode, cur_offset, NULL);
} }
ret = btrfs_update_inode(trans, root, inode); ret = btrfs_update_inode(trans, root, inode);
...@@ -6216,16 +6220,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, ...@@ -6216,16 +6220,16 @@ static long btrfs_fallocate(struct inode *inode, int mode,
if (em->block_start == EXTENT_MAP_HOLE || if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size && (cur_offset >= inode->i_size &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
ret = prealloc_file_range(inode, ret = btrfs_prealloc_file_range(inode, 0, cur_offset,
cur_offset, last_byte, last_byte - cur_offset,
alloc_hint, mode, offset+len); 1 << inode->i_blkbits,
offset + len,
&alloc_hint);
if (ret < 0) { if (ret < 0) {
free_extent_map(em); free_extent_map(em);
break; break;
} }
} }
if (em->block_start <= EXTENT_MAP_LAST_BYTE)
alloc_hint = em->block_start;
free_extent_map(em); free_extent_map(em);
cur_offset = last_byte; cur_offset = last_byte;
......
...@@ -2545,6 +2545,50 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, ...@@ -2545,6 +2545,50 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
return err; return err;
} }
static noinline_for_stack
int prealloc_file_extent_cluster(struct inode *inode,
struct file_extent_cluster *cluster)
{
u64 alloc_hint = 0;
u64 start;
u64 end;
u64 offset = BTRFS_I(inode)->index_cnt;
u64 num_bytes;
int nr = 0;
int ret = 0;
BUG_ON(cluster->start != cluster->boundary[0]);
mutex_lock(&inode->i_mutex);
ret = btrfs_check_data_free_space(inode, cluster->end +
1 - cluster->start);
if (ret)
goto out;
while (nr < cluster->nr) {
start = cluster->boundary[nr] - offset;
if (nr + 1 < cluster->nr)
end = cluster->boundary[nr + 1] - 1 - offset;
else
end = cluster->end - offset;
lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
num_bytes = end + 1 - start;
ret = btrfs_prealloc_file_range(inode, 0, start,
num_bytes, num_bytes,
end + 1, &alloc_hint);
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
if (ret)
break;
nr++;
}
btrfs_free_reserved_data_space(inode, cluster->end +
1 - cluster->start);
out:
mutex_unlock(&inode->i_mutex);
return ret;
}
static noinline_for_stack static noinline_for_stack
int setup_extent_mapping(struct inode *inode, u64 start, u64 end, int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
u64 block_start) u64 block_start)
...@@ -2588,7 +2632,6 @@ static int relocate_file_extent_cluster(struct inode *inode, ...@@ -2588,7 +2632,6 @@ static int relocate_file_extent_cluster(struct inode *inode,
u64 offset = BTRFS_I(inode)->index_cnt; u64 offset = BTRFS_I(inode)->index_cnt;
unsigned long index; unsigned long index;
unsigned long last_index; unsigned long last_index;
unsigned int dirty_page = 0;
struct page *page; struct page *page;
struct file_ra_state *ra; struct file_ra_state *ra;
int nr = 0; int nr = 0;
...@@ -2601,21 +2644,24 @@ static int relocate_file_extent_cluster(struct inode *inode, ...@@ -2601,21 +2644,24 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (!ra) if (!ra)
return -ENOMEM; return -ENOMEM;
index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; ret = prealloc_file_extent_cluster(inode, cluster);
last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; if (ret)
goto out;
mutex_lock(&inode->i_mutex); file_ra_state_init(ra, inode->i_mapping);
i_size_write(inode, cluster->end + 1 - offset);
ret = setup_extent_mapping(inode, cluster->start - offset, ret = setup_extent_mapping(inode, cluster->start - offset,
cluster->end - offset, cluster->start); cluster->end - offset, cluster->start);
if (ret) if (ret)
goto out_unlock; goto out;
file_ra_state_init(ra, inode->i_mapping);
WARN_ON(cluster->start != cluster->boundary[0]); index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
while (index <= last_index) { while (index <= last_index) {
ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
if (ret)
goto out;
page = find_lock_page(inode->i_mapping, index); page = find_lock_page(inode->i_mapping, index);
if (!page) { if (!page) {
page_cache_sync_readahead(inode->i_mapping, page_cache_sync_readahead(inode->i_mapping,
...@@ -2623,8 +2669,10 @@ static int relocate_file_extent_cluster(struct inode *inode, ...@@ -2623,8 +2669,10 @@ static int relocate_file_extent_cluster(struct inode *inode,
last_index + 1 - index); last_index + 1 - index);
page = grab_cache_page(inode->i_mapping, index); page = grab_cache_page(inode->i_mapping, index);
if (!page) { if (!page) {
btrfs_delalloc_release_metadata(inode,
PAGE_CACHE_SIZE);
ret = -ENOMEM; ret = -ENOMEM;
goto out_unlock; goto out;
} }
} }
...@@ -2640,8 +2688,10 @@ static int relocate_file_extent_cluster(struct inode *inode, ...@@ -2640,8 +2688,10 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
btrfs_delalloc_release_metadata(inode,
PAGE_CACHE_SIZE);
ret = -EIO; ret = -EIO;
goto out_unlock; goto out;
} }
} }
...@@ -2660,10 +2710,9 @@ static int relocate_file_extent_cluster(struct inode *inode, ...@@ -2660,10 +2710,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
EXTENT_BOUNDARY, GFP_NOFS); EXTENT_BOUNDARY, GFP_NOFS);
nr++; nr++;
} }
btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
set_page_dirty(page); set_page_dirty(page);
dirty_page++;
unlock_extent(&BTRFS_I(inode)->io_tree, unlock_extent(&BTRFS_I(inode)->io_tree,
page_start, page_end, GFP_NOFS); page_start, page_end, GFP_NOFS);
...@@ -2671,20 +2720,11 @@ static int relocate_file_extent_cluster(struct inode *inode, ...@@ -2671,20 +2720,11 @@ static int relocate_file_extent_cluster(struct inode *inode,
page_cache_release(page); page_cache_release(page);
index++; index++;
if (nr < cluster->nr && balance_dirty_pages_ratelimited(inode->i_mapping);
page_end + 1 + offset == cluster->boundary[nr]) { btrfs_throttle(BTRFS_I(inode)->root);
balance_dirty_pages_ratelimited_nr(inode->i_mapping,
dirty_page);
dirty_page = 0;
}
}
if (dirty_page) {
balance_dirty_pages_ratelimited_nr(inode->i_mapping,
dirty_page);
} }
WARN_ON(nr != cluster->nr); WARN_ON(nr != cluster->nr);
out_unlock: out:
mutex_unlock(&inode->i_mutex);
kfree(ra); kfree(ra);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment