Commit 5da9d01b authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason

Btrfs: Shrink delay allocated space in a synchronized

Shrink delayed allocation space in a synchronized manner is more
controllable than flushing all delay allocated space in an async
thread.
Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 424499db
...@@ -700,10 +700,6 @@ struct btrfs_space_info { ...@@ -700,10 +700,6 @@ struct btrfs_space_info {
struct list_head list; struct list_head list;
/* for controlling how we free up space for allocations */
wait_queue_head_t flush_wait;
int flushing;
/* for block groups in our same type */ /* for block groups in our same type */
struct list_head block_groups[BTRFS_NR_RAID_TYPES]; struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock; spinlock_t lock;
...@@ -928,7 +924,6 @@ struct btrfs_fs_info { ...@@ -928,7 +924,6 @@ struct btrfs_fs_info {
struct btrfs_workers endio_meta_write_workers; struct btrfs_workers endio_meta_write_workers;
struct btrfs_workers endio_write_workers; struct btrfs_workers endio_write_workers;
struct btrfs_workers submit_workers; struct btrfs_workers submit_workers;
struct btrfs_workers enospc_workers;
/* /*
* fixup workers take dirty pages that didn't properly go through * fixup workers take dirty pages that didn't properly go through
* the cow mechanism and make them safe to write. It happens * the cow mechanism and make them safe to write. It happens
...@@ -2312,6 +2307,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, ...@@ -2312,6 +2307,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type); u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state); struct extent_state **cached_state);
int btrfs_writepages(struct address_space *mapping, int btrfs_writepages(struct address_space *mapping,
......
...@@ -1759,9 +1759,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1759,9 +1759,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
min_t(u64, fs_devices->num_devices, min_t(u64, fs_devices->num_devices,
fs_info->thread_pool_size), fs_info->thread_pool_size),
&fs_info->generic_worker); &fs_info->generic_worker);
btrfs_init_workers(&fs_info->enospc_workers, "enospc",
fs_info->thread_pool_size,
&fs_info->generic_worker);
/* a higher idle thresh on the submit workers makes it much more /* a higher idle thresh on the submit workers makes it much more
* likely that bios will be send down in a sane order to the * likely that bios will be send down in a sane order to the
...@@ -1809,7 +1806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1809,7 +1806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_meta_workers, 1); btrfs_start_workers(&fs_info->endio_meta_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
btrfs_start_workers(&fs_info->endio_write_workers, 1); btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->enospc_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
...@@ -2040,7 +2036,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -2040,7 +2036,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_stop_workers(&fs_info->endio_meta_write_workers); btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers); btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->enospc_workers);
fail_iput: fail_iput:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping); invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
iput(fs_info->btree_inode); iput(fs_info->btree_inode);
...@@ -2473,7 +2468,6 @@ int close_ctree(struct btrfs_root *root) ...@@ -2473,7 +2468,6 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_meta_write_workers); btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers); btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->enospc_workers);
btrfs_close_devices(fs_info->fs_devices); btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree); btrfs_mapping_tree_free(&fs_info->mapping_tree);
......
...@@ -74,6 +74,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, ...@@ -74,6 +74,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_space_info *sinfo, u64 num_bytes); struct btrfs_space_info *sinfo, u64 num_bytes);
static int shrink_delalloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_space_info *sinfo, u64 to_reclaim);
static noinline int static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache) block_group_cache_done(struct btrfs_block_group_cache *cache)
...@@ -2693,7 +2696,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, ...@@ -2693,7 +2696,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(&found->block_groups[i]); INIT_LIST_HEAD(&found->block_groups[i]);
init_rwsem(&found->groups_sem); init_rwsem(&found->groups_sem);
init_waitqueue_head(&found->flush_wait);
spin_lock_init(&found->lock); spin_lock_init(&found->lock);
found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_SYSTEM |
...@@ -2907,105 +2909,6 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) ...@@ -2907,105 +2909,6 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
meta_sinfo->force_delalloc = 0; meta_sinfo->force_delalloc = 0;
} }
struct async_flush {
struct btrfs_root *root;
struct btrfs_space_info *info;
struct btrfs_work work;
};
static noinline void flush_delalloc_async(struct btrfs_work *work)
{
struct async_flush *async;
struct btrfs_root *root;
struct btrfs_space_info *info;
async = container_of(work, struct async_flush, work);
root = async->root;
info = async->info;
btrfs_start_delalloc_inodes(root, 0);
wake_up(&info->flush_wait);
btrfs_wait_ordered_extents(root, 0, 0);
spin_lock(&info->lock);
info->flushing = 0;
spin_unlock(&info->lock);
wake_up(&info->flush_wait);
kfree(async);
}
static void wait_on_flush(struct btrfs_space_info *info)
{
DEFINE_WAIT(wait);
u64 used;
while (1) {
prepare_to_wait(&info->flush_wait, &wait,
TASK_UNINTERRUPTIBLE);
spin_lock(&info->lock);
if (!info->flushing) {
spin_unlock(&info->lock);
break;
}
used = info->bytes_used + info->bytes_reserved +
info->bytes_pinned + info->bytes_readonly +
info->bytes_super + info->bytes_root +
info->bytes_may_use + info->bytes_delalloc;
if (used < info->total_bytes) {
spin_unlock(&info->lock);
break;
}
spin_unlock(&info->lock);
schedule();
}
finish_wait(&info->flush_wait, &wait);
}
static void flush_delalloc(struct btrfs_root *root,
struct btrfs_space_info *info)
{
struct async_flush *async;
bool wait = false;
spin_lock(&info->lock);
if (!info->flushing)
info->flushing = 1;
else
wait = true;
spin_unlock(&info->lock);
if (wait) {
wait_on_flush(info);
return;
}
async = kzalloc(sizeof(*async), GFP_NOFS);
if (!async)
goto flush;
async->root = root;
async->info = info;
async->work.func = flush_delalloc_async;
btrfs_queue_worker(&root->fs_info->enospc_workers,
&async->work);
wait_on_flush(info);
return;
flush:
btrfs_start_delalloc_inodes(root, 0);
btrfs_wait_ordered_extents(root, 0, 0);
spin_lock(&info->lock);
info->flushing = 0;
spin_unlock(&info->lock);
wake_up(&info->flush_wait);
}
/* /*
* Reserve metadata space for delalloc. * Reserve metadata space for delalloc.
*/ */
...@@ -3058,7 +2961,7 @@ int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, ...@@ -3058,7 +2961,7 @@ int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
filemap_flush(inode->i_mapping); filemap_flush(inode->i_mapping);
goto again; goto again;
} else if (flushed == 3) { } else if (flushed == 3) {
flush_delalloc(root, meta_sinfo); shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
goto again; goto again;
} }
spin_lock(&meta_sinfo->lock); spin_lock(&meta_sinfo->lock);
...@@ -3171,7 +3074,7 @@ int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) ...@@ -3171,7 +3074,7 @@ int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
} }
if (retries == 2) { if (retries == 2) {
flush_delalloc(root, meta_sinfo); shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
goto again; goto again;
} }
spin_lock(&meta_sinfo->lock); spin_lock(&meta_sinfo->lock);
...@@ -3197,7 +3100,7 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, ...@@ -3197,7 +3100,7 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
{ {
struct btrfs_space_info *data_sinfo; struct btrfs_space_info *data_sinfo;
u64 used; u64 used;
int ret = 0, committed = 0, flushed = 0; int ret = 0, committed = 0;
/* make sure bytes are sectorsize aligned */ /* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
...@@ -3217,13 +3120,6 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, ...@@ -3217,13 +3120,6 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
if (used + bytes > data_sinfo->total_bytes) { if (used + bytes > data_sinfo->total_bytes) {
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
if (!flushed) {
spin_unlock(&data_sinfo->lock);
flush_delalloc(root, data_sinfo);
flushed = 1;
goto again;
}
/* /*
* if we don't have enough free bytes in this space then we need * if we don't have enough free bytes in this space then we need
* to alloc a new chunk. * to alloc a new chunk.
...@@ -3467,6 +3363,55 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, ...@@ -3467,6 +3363,55 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
return ret == 1 ? 1 : 0; return ret == 1 ? 1 : 0;
} }
/*
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_space_info *sinfo, u64 to_reclaim)
{
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
int pause = 1;
int ret;
spin_lock(&sinfo->lock);
reserved = sinfo->bytes_delalloc;
spin_unlock(&sinfo->lock);
if (reserved == 0)
return 0;
max_reclaim = min(reserved, to_reclaim);
while (1) {
ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
if (!ret) {
__set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(pause);
pause <<= 1;
if (pause > HZ / 10)
pause = HZ / 10;
} else {
pause = 1;
}
spin_lock(&sinfo->lock);
if (reserved > sinfo->bytes_delalloc)
reclaimed = reserved - sinfo->bytes_delalloc;
reserved = sinfo->bytes_delalloc;
spin_unlock(&sinfo->lock);
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
}
return reclaimed >= to_reclaim;
}
static int update_block_group(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc, u64 bytenr, u64 num_bytes, int alloc,
......
...@@ -5611,6 +5611,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) ...@@ -5611,6 +5611,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return 0; return 0;
} }
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
{
struct btrfs_inode *binode;
struct inode *inode = NULL;
spin_lock(&root->fs_info->delalloc_lock);
while (!list_empty(&root->fs_info->delalloc_inodes)) {
binode = list_entry(root->fs_info->delalloc_inodes.next,
struct btrfs_inode, delalloc_inodes);
inode = igrab(&binode->vfs_inode);
if (inode) {
list_move_tail(&binode->delalloc_inodes,
&root->fs_info->delalloc_inodes);
break;
}
list_del_init(&binode->delalloc_inodes);
cond_resched_lock(&root->fs_info->delalloc_lock);
}
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
write_inode_now(inode, 0);
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
iput(inode);
return 1;
}
return 0;
}
static int btrfs_symlink(struct inode *dir, struct dentry *dentry, static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
const char *symname) const char *symname)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment