Commit 82d5902d authored by Li Zefan's avatar Li Zefan

Btrfs: Support reading/writing on disk free ino cache

This is similar to block group caching.

We dedicate a special inode in fs tree to save free ino cache.

At the very first time we create/delete a file after mount, the free ino
cache will be loaded from disk into memory. When the fs tree is commited,
the cache will be written back to disk.

To keep compatibility, we check the root generation against the generation
of the special inode when loading the cache, so the loading will fail
if the btrfs filesystem was mounted in an older kernel before.
Signed-off-by: default avatarLi Zefan <lizf@cn.fujitsu.com>
parent 33345d01
...@@ -105,6 +105,12 @@ struct btrfs_ordered_sum; ...@@ -105,6 +105,12 @@ struct btrfs_ordered_sum;
/* For storing free space cache */ /* For storing free space cache */
#define BTRFS_FREE_SPACE_OBJECTID -11ULL #define BTRFS_FREE_SPACE_OBJECTID -11ULL
/*
* The inode number assigned to the special inode for sotring
* free ino cache
*/
#define BTRFS_FREE_INO_OBJECTID -12ULL
/* dummy objectid represents multiple objectids */ /* dummy objectid represents multiple objectids */
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
...@@ -1110,6 +1116,7 @@ struct btrfs_root { ...@@ -1110,6 +1116,7 @@ struct btrfs_root {
wait_queue_head_t cache_wait; wait_queue_head_t cache_wait;
struct btrfs_free_space_ctl *free_ino_pinned; struct btrfs_free_space_ctl *free_ino_pinned;
u64 cache_progress; u64 cache_progress;
struct inode *cache_inode;
struct mutex log_mutex; struct mutex log_mutex;
wait_queue_head_t log_writer_wait; wait_queue_head_t log_writer_wait;
......
...@@ -2505,6 +2505,7 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) ...@@ -2505,6 +2505,7 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
static void free_fs_root(struct btrfs_root *root) static void free_fs_root(struct btrfs_root *root)
{ {
iput(root->cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
if (root->anon_super.s_dev) { if (root->anon_super.s_dev) {
down_write(&root->anon_super.s_umount); down_write(&root->anon_super.s_umount);
......
...@@ -3145,7 +3145,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) ...@@ -3145,7 +3145,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
/* make sure bytes are sectorsize aligned */ /* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
if (root == root->fs_info->tree_root) { if (root == root->fs_info->tree_root ||
BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
alloc_chunk = 0; alloc_chunk = 0;
committed = 1; committed = 1;
} }
......
...@@ -209,7 +209,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, ...@@ -209,7 +209,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
return ret; return ret;
} }
return btrfs_update_inode(trans, root, inode); ret = btrfs_update_inode(trans, root, inode);
return ret;
} }
static int readahead_cache(struct inode *inode) static int readahead_cache(struct inode *inode)
...@@ -525,6 +526,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, ...@@ -525,6 +526,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_CLEAR; block_group->disk_cache_state = BTRFS_DC_CLEAR;
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
ret = 0;
printk(KERN_ERR "btrfs: failed to load free space cache " printk(KERN_ERR "btrfs: failed to load free space cache "
"for block group %llu\n", block_group->key.objectid); "for block group %llu\n", block_group->key.objectid);
...@@ -893,6 +895,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -893,6 +895,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_ERROR; block_group->disk_cache_state = BTRFS_DC_ERROR;
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
ret = 0;
printk(KERN_ERR "btrfs: failed to write free space cace " printk(KERN_ERR "btrfs: failed to write free space cace "
"for block group %llu\n", block_group->key.objectid); "for block group %llu\n", block_group->key.objectid);
...@@ -2458,3 +2461,95 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) ...@@ -2458,3 +2461,95 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
return ino; return ino;
} }
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
struct btrfs_path *path)
{
struct inode *inode = NULL;
spin_lock(&root->cache_lock);
if (root->cache_inode)
inode = igrab(root->cache_inode);
spin_unlock(&root->cache_lock);
if (inode)
return inode;
inode = __lookup_free_space_inode(root, path, 0);
if (IS_ERR(inode))
return inode;
spin_lock(&root->cache_lock);
if (!root->fs_info->closing)
root->cache_inode = igrab(inode);
spin_unlock(&root->cache_lock);
return inode;
}
int create_free_ino_inode(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path)
{
return __create_free_space_inode(root, trans, path,
BTRFS_FREE_INO_OBJECTID, 0);
}
int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_path *path;
struct inode *inode;
int ret = 0;
u64 root_gen = btrfs_root_generation(&root->root_item);
/*
* If we're unmounting then just return, since this does a search on the
* normal root and not the commit root and we could deadlock.
*/
smp_mb();
if (fs_info->closing)
return 0;
path = btrfs_alloc_path();
if (!path)
return 0;
inode = lookup_free_ino_inode(root, path);
if (IS_ERR(inode))
goto out;
if (root_gen != BTRFS_I(inode)->generation)
goto out_put;
ret = __load_free_space_cache(root, inode, ctl, path, 0);
if (ret < 0)
printk(KERN_ERR "btrfs: failed to load free ino cache for "
"root %llu\n", root->root_key.objectid);
out_put:
iput(inode);
out:
btrfs_free_path(path);
return ret;
}
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct inode *inode;
int ret;
inode = lookup_free_ino_inode(root, path);
if (IS_ERR(inode))
return 0;
ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
if (ret < 0)
printk(KERN_ERR "btrfs: failed to write free ino cache "
"for root %llu\n", root->root_key.objectid);
iput(inode);
return ret;
}
...@@ -65,6 +65,17 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -65,6 +65,17 @@ int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group, struct btrfs_block_group_cache *block_group,
struct btrfs_path *path); struct btrfs_path *path);
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
struct btrfs_path *path);
int create_free_ino_inode(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path);
int load_free_ino_cache(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path);
void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group); void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
u64 bytenr, u64 size); u64 bytenr, u64 size);
......
...@@ -137,6 +137,7 @@ static int caching_kthread(void *data) ...@@ -137,6 +137,7 @@ static int caching_kthread(void *data)
static void start_caching(struct btrfs_root *root) static void start_caching(struct btrfs_root *root)
{ {
struct task_struct *tsk; struct task_struct *tsk;
int ret;
spin_lock(&root->cache_lock); spin_lock(&root->cache_lock);
if (root->cached != BTRFS_CACHE_NO) { if (root->cached != BTRFS_CACHE_NO) {
...@@ -147,6 +148,14 @@ static void start_caching(struct btrfs_root *root) ...@@ -147,6 +148,14 @@ static void start_caching(struct btrfs_root *root)
root->cached = BTRFS_CACHE_STARTED; root->cached = BTRFS_CACHE_STARTED;
spin_unlock(&root->cache_lock); spin_unlock(&root->cache_lock);
ret = load_free_ino_cache(root->fs_info, root);
if (ret == 1) {
spin_lock(&root->cache_lock);
root->cached = BTRFS_CACHE_FINISHED;
spin_unlock(&root->cache_lock);
return;
}
tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
root->root_key.objectid); root->root_key.objectid);
BUG_ON(IS_ERR(tsk)); BUG_ON(IS_ERR(tsk));
...@@ -352,6 +361,84 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root) ...@@ -352,6 +361,84 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root)
pinned->op = &pinned_free_ino_op; pinned->op = &pinned_free_ino_op;
} }
int btrfs_save_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_path *path;
struct inode *inode;
u64 alloc_hint = 0;
int ret;
int prealloc;
bool retry = false;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
again:
inode = lookup_free_ino_inode(root, path);
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
ret = PTR_ERR(inode);
goto out;
}
if (IS_ERR(inode)) {
BUG_ON(retry);
retry = true;
ret = create_free_ino_inode(root, trans, path);
if (ret)
goto out;
goto again;
}
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
WARN_ON(ret);
if (i_size_read(inode) > 0) {
ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
if (ret)
goto out_put;
}
spin_lock(&root->cache_lock);
if (root->cached != BTRFS_CACHE_FINISHED) {
ret = -1;
spin_unlock(&root->cache_lock);
goto out_put;
}
spin_unlock(&root->cache_lock);
spin_lock(&ctl->tree_lock);
prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
spin_unlock(&ctl->tree_lock);
/* Just to make sure we have enough space */
prealloc += 8 * PAGE_CACHE_SIZE;
ret = btrfs_check_data_free_space(inode, prealloc);
if (ret)
goto out_put;
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret)
goto out_put;
btrfs_free_reserved_data_space(inode, prealloc);
out_put:
iput(inode);
out:
if (ret == 0)
ret = btrfs_write_out_ino_cache(root, trans, path);
btrfs_free_path(path);
return ret;
}
static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
{ {
struct btrfs_path *path; struct btrfs_path *path;
......
...@@ -5,6 +5,8 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root); ...@@ -5,6 +5,8 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root);
void btrfs_unpin_free_ino(struct btrfs_root *root); void btrfs_unpin_free_ino(struct btrfs_root *root);
void btrfs_return_ino(struct btrfs_root *root, u64 objectid); void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid); int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
int btrfs_save_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans);
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid); int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
......
...@@ -745,6 +745,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, ...@@ -745,6 +745,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
return alloc_hint; return alloc_hint;
} }
static inline bool is_free_space_inode(struct btrfs_root *root,
struct inode *inode)
{
if (root == root->fs_info->tree_root ||
BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
/* /*
* when extent_io.c finds a delayed allocation range in the file, * when extent_io.c finds a delayed allocation range in the file,
* the call backs end up in this code. The basic idea is to * the call backs end up in this code. The basic idea is to
...@@ -777,7 +786,7 @@ static noinline int cow_file_range(struct inode *inode, ...@@ -777,7 +786,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0; int ret = 0;
BUG_ON(root == root->fs_info->tree_root); BUG_ON(is_free_space_inode(root, inode));
trans = btrfs_join_transaction(root, 1); trans = btrfs_join_transaction(root, 1);
BUG_ON(IS_ERR(trans)); BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode); btrfs_set_trans_block_group(trans, inode);
...@@ -1048,17 +1057,18 @@ static noinline int run_delalloc_nocow(struct inode *inode, ...@@ -1048,17 +1057,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,
int type; int type;
int nocow; int nocow;
int check_prev = 1; int check_prev = 1;
bool nolock = false; bool nolock;
u64 ino = btrfs_ino(inode); u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path(); path = btrfs_alloc_path();
BUG_ON(!path); BUG_ON(!path);
if (root == root->fs_info->tree_root) {
nolock = true; nolock = is_free_space_inode(root, inode);
if (nolock)
trans = btrfs_join_transaction_nolock(root, 1); trans = btrfs_join_transaction_nolock(root, 1);
} else { else
trans = btrfs_join_transaction(root, 1); trans = btrfs_join_transaction(root, 1);
}
BUG_ON(IS_ERR(trans)); BUG_ON(IS_ERR(trans));
cow_start = (u64)-1; cow_start = (u64)-1;
...@@ -1316,8 +1326,7 @@ static int btrfs_set_bit_hook(struct inode *inode, ...@@ -1316,8 +1326,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start; u64 len = state->end + 1 - state->start;
int do_list = (root->root_key.objectid != bool do_list = !is_free_space_inode(root, inode);
BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC) if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC; *bits &= ~EXTENT_FIRST_DELALLOC;
...@@ -1350,8 +1359,7 @@ static int btrfs_clear_bit_hook(struct inode *inode, ...@@ -1350,8 +1359,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start; u64 len = state->end + 1 - state->start;
int do_list = (root->root_key.objectid != bool do_list = !is_free_space_inode(root, inode);
BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC) if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC; *bits &= ~EXTENT_FIRST_DELALLOC;
...@@ -1458,7 +1466,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ...@@ -1458,7 +1466,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
if (root == root->fs_info->tree_root) if (is_free_space_inode(root, inode))
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
else else
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
...@@ -1701,7 +1709,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ...@@ -1701,7 +1709,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
struct extent_state *cached_state = NULL; struct extent_state *cached_state = NULL;
int compress_type = 0; int compress_type = 0;
int ret; int ret;
bool nolock = false; bool nolock;
ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1); end - start + 1);
...@@ -1709,7 +1717,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ...@@ -1709,7 +1717,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
return 0; return 0;
BUG_ON(!ordered_extent); BUG_ON(!ordered_extent);
nolock = (root == root->fs_info->tree_root); nolock = is_free_space_inode(root, inode);
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list)); BUG_ON(!list_empty(&ordered_extent->list));
...@@ -3473,7 +3481,9 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, ...@@ -3473,7 +3481,9 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
if (path->slots[0] == 0 || if (path->slots[0] == 0 ||
path->slots[0] != pending_del_slot) { path->slots[0] != pending_del_slot) {
if (root->ref_cows) { if (root->ref_cows &&
BTRFS_I(inode)->location.objectid !=
BTRFS_FREE_INO_OBJECTID) {
err = -EAGAIN; err = -EAGAIN;
goto out; goto out;
} }
...@@ -3765,7 +3775,7 @@ void btrfs_evict_inode(struct inode *inode) ...@@ -3765,7 +3775,7 @@ void btrfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
root == root->fs_info->tree_root)) is_free_space_inode(root, inode)))
goto no_delete; goto no_delete;
if (is_bad_inode(inode)) { if (is_bad_inode(inode)) {
...@@ -4382,7 +4392,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -4382,7 +4392,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
return 0; return 0;
smp_mb(); smp_mb();
nolock = (root->fs_info->closing && root == root->fs_info->tree_root); if (root->fs_info->closing && is_free_space_inode(root, inode))
nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) { if (wbc->sync_mode == WB_SYNC_ALL) {
if (nolock) if (nolock)
...@@ -6900,7 +6911,7 @@ int btrfs_drop_inode(struct inode *inode) ...@@ -6900,7 +6911,7 @@ int btrfs_drop_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0 && if (btrfs_root_refs(&root->root_item) == 0 &&
root != root->fs_info->tree_root) !is_free_space_inode(root, inode))
return 1; return 1;
else else
return generic_drop_inode(inode); return generic_drop_inode(inode);
......
...@@ -761,6 +761,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, ...@@ -761,6 +761,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
btrfs_update_reloc_root(trans, root); btrfs_update_reloc_root(trans, root);
btrfs_orphan_commit_root(trans, root); btrfs_orphan_commit_root(trans, root);
btrfs_save_ino_cache(root, trans);
if (root->commit_root != root->node) { if (root->commit_root != root->node) {
mutex_lock(&root->fs_commit_mutex); mutex_lock(&root->fs_commit_mutex);
switch_commit_root(root); switch_commit_root(root);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment