Commit 7b508037 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()

The function defrag_one_cluster() is able to defrag one range well
enough, we only need to do preparation for it, including:

- Clamp and align the defrag range
- Exclude invalid cases
- Proper inode locking

The old infrastructures will not be removed in this patch, as it would
be too noisy to review.
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent b18c3ab2
...@@ -1760,25 +1760,15 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, ...@@ -1760,25 +1760,15 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
u64 newer_than, unsigned long max_to_defrag) u64 newer_than, unsigned long max_to_defrag)
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root; unsigned long sectors_defragged = 0;
unsigned long last_index;
u64 isize = i_size_read(inode); u64 isize = i_size_read(inode);
u64 last_len = 0; u64 cur;
u64 skip = 0; u64 last_byte;
u64 defrag_end = 0;
u64 newer_off = range->start;
unsigned long i;
unsigned long ra_index = 0;
int ret;
int defrag_count = 0;
int compress_type = BTRFS_COMPRESS_ZLIB;
u32 extent_thresh = range->extent_thresh;
unsigned long max_cluster = SZ_256K >> PAGE_SHIFT;
unsigned long cluster = max_cluster;
u64 new_align = ~((u64)SZ_128K - 1);
struct page **pages = NULL;
bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS; bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS;
bool ra_allocated = false; bool ra_allocated = false;
int compress_type = BTRFS_COMPRESS_ZLIB;
int ret = 0;
u32 extent_thresh = range->extent_thresh;
if (isize == 0) if (isize == 0)
return 0; return 0;
...@@ -1796,6 +1786,14 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, ...@@ -1796,6 +1786,14 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
if (extent_thresh == 0) if (extent_thresh == 0)
extent_thresh = SZ_256K; extent_thresh = SZ_256K;
if (range->start + range->len > range->start) {
/* Got a specific range */
last_byte = min(isize, range->start + range->len) - 1;
} else {
/* Defrag until file end */
last_byte = isize - 1;
}
/* /*
* If we were not given a ra, allocate a readahead context. As * If we were not given a ra, allocate a readahead context. As
* readahead is just an optimization, defrag will work without it so * readahead is just an optimization, defrag will work without it so
...@@ -1808,159 +1806,67 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, ...@@ -1808,159 +1806,67 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
file_ra_state_init(ra, inode->i_mapping); file_ra_state_init(ra, inode->i_mapping);
} }
pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL); /* Align the range */
if (!pages) { cur = round_down(range->start, fs_info->sectorsize);
ret = -ENOMEM; last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
goto out_ra;
}
/* find the last page to defrag */
if (range->start + range->len > range->start) {
last_index = min_t(u64, isize - 1,
range->start + range->len - 1) >> PAGE_SHIFT;
} else {
last_index = (isize - 1) >> PAGE_SHIFT;
}
if (newer_than) {
ret = find_new_extents(root, inode, newer_than,
&newer_off, SZ_64K);
if (!ret) {
range->start = newer_off;
/*
* we always align our defrag to help keep
* the extents in the file evenly spaced
*/
i = (newer_off & new_align) >> PAGE_SHIFT;
} else
goto out_ra;
} else {
i = range->start >> PAGE_SHIFT;
}
if (!max_to_defrag)
max_to_defrag = last_index - i + 1;
/*
* make writeback starts from i, so the defrag range can be
* written sequentially.
*/
if (i < inode->i_mapping->writeback_index)
inode->i_mapping->writeback_index = i;
while (i <= last_index && defrag_count < max_to_defrag && while (cur < last_byte) {
(i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) { u64 cluster_end;
/*
* make sure we stop running if someone unmounts
* the FS
*/
if (!(inode->i_sb->s_flags & SB_ACTIVE))
break;
if (btrfs_defrag_cancelled(fs_info)) {
btrfs_debug(fs_info, "defrag_file cancelled");
ret = -EAGAIN;
goto error;
}
if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT,
extent_thresh, &last_len, &skip,
&defrag_end, do_compress)){
unsigned long next;
/*
* the should_defrag function tells us how much to skip
* bump our counter by the suggested amount
*/
next = DIV_ROUND_UP(skip, PAGE_SIZE);
i = max(i + 1, next);
continue;
}
if (!newer_than) { /* The cluster size 256K should always be page aligned */
cluster = (PAGE_ALIGN(defrag_end) >> BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
PAGE_SHIFT) - i;
cluster = min(cluster, max_cluster);
} else {
cluster = max_cluster;
}
if (i + cluster > ra_index) { /* We want the cluster end at page boundary when possible */
ra_index = max(i, ra_index); cluster_end = (((cur >> PAGE_SHIFT) +
if (ra) (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1;
page_cache_sync_readahead(inode->i_mapping, ra, cluster_end = min(cluster_end, last_byte);
NULL, ra_index, cluster);
ra_index += cluster;
}
btrfs_inode_lock(inode, 0); btrfs_inode_lock(inode, 0);
if (IS_SWAPFILE(inode)) { if (IS_SWAPFILE(inode)) {
ret = -ETXTBSY; ret = -ETXTBSY;
} else {
if (do_compress)
BTRFS_I(inode)->defrag_compress = compress_type;
ret = cluster_pages_for_defrag(inode, pages, i, cluster);
}
if (ret < 0) {
btrfs_inode_unlock(inode, 0); btrfs_inode_unlock(inode, 0);
goto out_ra; break;
} }
if (!(inode->i_sb->s_flags & SB_ACTIVE)) {
defrag_count += ret;
balance_dirty_pages_ratelimited(inode->i_mapping);
btrfs_inode_unlock(inode, 0); btrfs_inode_unlock(inode, 0);
if (newer_than) {
if (newer_off == (u64)-1)
break;
if (ret > 0)
i += ret;
newer_off = max(newer_off + 1,
(u64)i << PAGE_SHIFT);
ret = find_new_extents(root, inode, newer_than,
&newer_off, SZ_64K);
if (!ret) {
range->start = newer_off;
i = (newer_off & new_align) >> PAGE_SHIFT;
} else {
break; break;
} }
} else { if (do_compress)
if (ret > 0) { BTRFS_I(inode)->defrag_compress = compress_type;
i += ret; ret = defrag_one_cluster(BTRFS_I(inode), ra, cur,
last_len += ret << PAGE_SHIFT; cluster_end + 1 - cur, extent_thresh,
} else { newer_than, do_compress,
i++; &sectors_defragged, max_to_defrag);
last_len = 0; btrfs_inode_unlock(inode, 0);
} if (ret < 0)
} break;
cur = cluster_end + 1;
} }
ret = defrag_count; if (ra_allocated)
error: kfree(ra);
if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { if (sectors_defragged) {
/*
* We have defragged some sectors, for compression case they
* need to be written back immediately.
*/
if (range->flags & BTRFS_DEFRAG_RANGE_START_IO) {
filemap_flush(inode->i_mapping); filemap_flush(inode->i_mapping);
if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags)) &BTRFS_I(inode)->runtime_flags))
filemap_flush(inode->i_mapping); filemap_flush(inode->i_mapping);
} }
if (range->compress_type == BTRFS_COMPRESS_LZO)
if (range->compress_type == BTRFS_COMPRESS_LZO) {
btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
} else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { else if (range->compress_type == BTRFS_COMPRESS_ZSTD)
btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
ret = sectors_defragged;
} }
out_ra:
if (do_compress) { if (do_compress) {
btrfs_inode_lock(inode, 0); btrfs_inode_lock(inode, 0);
BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE; BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE;
btrfs_inode_unlock(inode, 0); btrfs_inode_unlock(inode, 0);
} }
if (ra_allocated)
kfree(ra);
kfree(pages);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment