Commit d52be818 authored by Josef Bacik's avatar Josef Bacik

Btrfs: simplify unlink reservations

Dave pointed out a problem where if you filled up a file system as much as
possible you couldn't remove any files.  The whole unlink reservation thing is
convoluted because it tries to guess if it's going to add space to unlink
something or not, and has all these odd uncommented cases where it simply does
not try.  So to fix this I've added a way to conditionally steal from the global
reserve if we can't make our normal reservation.  If we have more than half the
space in the global reserve free we will go ahead and steal from the global
reserve.  With this patch Dave's reproducer now works and I can rm all the files
on the file system.  Thanks,
Reported-by: default avatarDavid Sterba <dsterba@suse.cz>
Signed-off-by: default avatarJosef Bacik <jbacik@fusionio.com>
parent c6adc9cc
...@@ -1495,7 +1495,6 @@ struct btrfs_fs_info { ...@@ -1495,7 +1495,6 @@ struct btrfs_fs_info {
int do_barriers; int do_barriers;
int closing; int closing;
int log_root_recovering; int log_root_recovering;
int enospc_unlink;
u64 total_pinned; u64 total_pinned;
...@@ -3183,6 +3182,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, ...@@ -3183,6 +3182,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv, struct btrfs_block_rsv *dst_rsv,
u64 num_bytes); u64 num_bytes);
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *dest, u64 num_bytes,
int min_factor);
void btrfs_block_rsv_release(struct btrfs_root *root, void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, struct btrfs_block_rsv *block_rsv,
u64 num_bytes); u64 num_bytes);
......
...@@ -4297,6 +4297,31 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, ...@@ -4297,6 +4297,31 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
spin_unlock(&block_rsv->lock); spin_unlock(&block_rsv->lock);
} }
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *dest, u64 num_bytes,
int min_factor)
{
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 min_bytes;
if (global_rsv->space_info != dest->space_info)
return -ENOSPC;
spin_lock(&global_rsv->lock);
min_bytes = div_factor(global_rsv->size, min_factor);
if (global_rsv->reserved < min_bytes + num_bytes) {
spin_unlock(&global_rsv->lock);
return -ENOSPC;
}
global_rsv->reserved -= num_bytes;
if (global_rsv->reserved < global_rsv->size)
global_rsv->full = 0;
spin_unlock(&global_rsv->lock);
block_rsv_add_bytes(dest, num_bytes, 1);
return 0;
}
static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv, struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes) struct btrfs_block_rsv *dest, u64 num_bytes)
......
...@@ -3680,52 +3680,19 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, ...@@ -3680,52 +3680,19 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
/* helper to check if there is any shared block in the path */
static int check_path_shared(struct btrfs_root *root,
struct btrfs_path *path)
{
struct extent_buffer *eb;
int level;
u64 refs = 1;
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
int ret;
if (!path->nodes[level])
break;
eb = path->nodes[level];
if (!btrfs_block_can_be_shared(root, eb))
continue;
ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
&refs, NULL);
if (refs > 1)
return 1;
}
return 0;
}
/* /*
* helper to start transaction for unlink and rmdir. * helper to start transaction for unlink and rmdir.
* *
* unlink and rmdir are special in btrfs, they do not always free space. * unlink and rmdir are special in btrfs, they do not always free space, so
* so in enospc case, we should make sure they will free space before * if we cannot make our reservations the normal way try and see if there is
* allowing them to use the global metadata reservation. * plenty of slack room in the global reserve to migrate, otherwise we cannot
* allow the unlink to occur.
*/ */
static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
struct dentry *dentry)
{ {
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
struct btrfs_dir_item *di;
struct inode *inode = dentry->d_inode;
u64 index;
int check_link = 1;
int err = -ENOSPC;
int ret; int ret;
u64 ino = btrfs_ino(inode);
u64 dir_ino = btrfs_ino(dir);
/* /*
* 1 for the possible orphan item * 1 for the possible orphan item
...@@ -3738,158 +3705,23 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, ...@@ -3738,158 +3705,23 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans; return trans;
if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) if (PTR_ERR(trans) == -ENOSPC) {
return ERR_PTR(-ENOSPC); u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
/* check if there is someone else holds reference */
if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
return ERR_PTR(-ENOSPC);
if (atomic_read(&inode->i_count) > 2)
return ERR_PTR(-ENOSPC);
if (xchg(&root->fs_info->enospc_unlink, 1)) trans = btrfs_start_transaction(root, 0);
return ERR_PTR(-ENOSPC); if (IS_ERR(trans))
path = btrfs_alloc_path();
if (!path) {
root->fs_info->enospc_unlink = 0;
return ERR_PTR(-ENOMEM);
}
/* 1 for the orphan item */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
btrfs_free_path(path);
root->fs_info->enospc_unlink = 0;
return trans; return trans;
} ret = btrfs_cond_migrate_bytes(root->fs_info,
&root->fs_info->trans_block_rsv,
path->skip_locking = 1; num_bytes, 5);
path->search_commit_root = 1;
ret = btrfs_lookup_inode(trans, root, path,
&BTRFS_I(dir)->location, 0);
if (ret < 0) {
err = ret;
goto out;
}
if (ret == 0) {
if (check_path_shared(root, path))
goto out;
} else {
check_link = 0;
}
btrfs_release_path(path);
ret = btrfs_lookup_inode(trans, root, path,
&BTRFS_I(inode)->location, 0);
if (ret < 0) {
err = ret;
goto out;
}
if (ret == 0) {
if (check_path_shared(root, path))
goto out;
} else {
check_link = 0;
}
btrfs_release_path(path);
if (ret == 0 && S_ISREG(inode->i_mode)) {
ret = btrfs_lookup_file_extent(trans, root, path,
ino, (u64)-1, 0);
if (ret < 0) {
err = ret;
goto out;
}
BUG_ON(ret == 0); /* Corruption */
if (check_path_shared(root, path))
goto out;
btrfs_release_path(path);
}
if (!check_link) {
err = 0;
goto out;
}
di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
dentry->d_name.name, dentry->d_name.len, 0);
if (IS_ERR(di)) {
err = PTR_ERR(di);
goto out;
}
if (di) {
if (check_path_shared(root, path))
goto out;
} else {
err = 0;
goto out;
}
btrfs_release_path(path);
ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name,
dentry->d_name.len, ino, dir_ino, 0,
&index);
if (ret) { if (ret) {
err = ret;
goto out;
}
if (check_path_shared(root, path))
goto out;
btrfs_release_path(path);
/*
* This is a commit root search, if we can lookup inode item and other
* relative items in the commit root, it means the transaction of
* dir/file creation has been committed, and the dir index item that we
* delay to insert has also been inserted into the commit root. So
* we needn't worry about the delayed insertion of the dir index item
* here.
*/
di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
dentry->d_name.name, dentry->d_name.len, 0);
if (IS_ERR(di)) {
err = PTR_ERR(di);
goto out;
}
BUG_ON(ret == -ENOENT);
if (check_path_shared(root, path))
goto out;
err = 0;
out:
btrfs_free_path(path);
/* Migrate the orphan reservation over */
if (!err)
err = btrfs_block_rsv_migrate(trans->block_rsv,
&root->fs_info->global_block_rsv,
trans->bytes_reserved);
if (err) {
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
root->fs_info->enospc_unlink = 0; return ERR_PTR(ret);
return ERR_PTR(err);
} }
trans->block_rsv = &root->fs_info->global_block_rsv;
return trans;
}
static void __unlink_end_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) {
btrfs_block_rsv_release(root, trans->block_rsv,
trans->bytes_reserved);
trans->block_rsv = &root->fs_info->trans_block_rsv; trans->block_rsv = &root->fs_info->trans_block_rsv;
BUG_ON(!root->fs_info->enospc_unlink); trans->bytes_reserved = num_bytes;
root->fs_info->enospc_unlink = 0;
} }
btrfs_end_transaction(trans, root); return trans;
} }
static int btrfs_unlink(struct inode *dir, struct dentry *dentry) static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
...@@ -3899,7 +3731,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ...@@ -3899,7 +3731,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
int ret; int ret;
trans = __unlink_start_trans(dir, dentry); trans = __unlink_start_trans(dir);
if (IS_ERR(trans)) if (IS_ERR(trans))
return PTR_ERR(trans); return PTR_ERR(trans);
...@@ -3917,7 +3749,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ...@@ -3917,7 +3749,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
} }
out: out:
__unlink_end_trans(trans, root); btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root); btrfs_btree_balance_dirty(root);
return ret; return ret;
} }
...@@ -4014,7 +3846,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) ...@@ -4014,7 +3846,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
return -EPERM; return -EPERM;
trans = __unlink_start_trans(dir, dentry); trans = __unlink_start_trans(dir);
if (IS_ERR(trans)) if (IS_ERR(trans))
return PTR_ERR(trans); return PTR_ERR(trans);
...@@ -4036,7 +3868,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) ...@@ -4036,7 +3868,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (!err) if (!err)
btrfs_i_size_write(inode, 0); btrfs_i_size_write(inode, 0);
out: out:
__unlink_end_trans(trans, root); btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root); btrfs_btree_balance_dirty(root);
return err; return err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment