Commit 54f77024 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Fix deadlock in __wait_on_freeing_inode()

We can't call __wait_on_freeing_inode() with btree locks held; we're
waiting on another thread that's in evict(), and before it clears that
bit it needs to write that inode to flush timestamps - deadlock.

Fixing this involves a fair amount of re-jiggering to plumb a new
transaction restart.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 112d21fd
...@@ -190,7 +190,8 @@ struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum ...@@ -190,7 +190,8 @@ struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum
return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
} }
static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans,
subvol_inum inum)
{ {
struct bch_inode_info *inode; struct bch_inode_info *inode;
repeat: repeat:
...@@ -202,7 +203,15 @@ static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, subvol_inum ...@@ -202,7 +203,15 @@ static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, subvol_inum
return NULL; return NULL;
} }
if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) { if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) {
if (!trans) {
__wait_on_freeing_inode(&inode->v); __wait_on_freeing_inode(&inode->v);
} else {
bch2_trans_unlock(trans);
__wait_on_freeing_inode(&inode->v);
int ret = bch2_trans_relock(trans);
if (ret)
return ERR_PTR(ret);
}
goto repeat; goto repeat;
} }
__iget(&inode->v); __iget(&inode->v);
...@@ -226,7 +235,9 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod ...@@ -226,7 +235,9 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod
} }
} }
static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bch_inode_info *inode) static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c,
struct btree_trans *trans,
struct bch_inode_info *inode)
{ {
struct bch_inode_info *old = inode; struct bch_inode_info *old = inode;
...@@ -235,7 +246,7 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bc ...@@ -235,7 +246,7 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bc
if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table, if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table,
&inode->hash, &inode->hash,
bch2_vfs_inodes_params))) { bch2_vfs_inodes_params))) {
old = bch2_inode_hash_find(c, inode->ei_inum); old = bch2_inode_hash_find(c, trans, inode->ei_inum);
if (!old) if (!old)
goto retry; goto retry;
...@@ -254,7 +265,7 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bc ...@@ -254,7 +265,7 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bc
*/ */
set_nlink(&inode->v, 1); set_nlink(&inode->v, 1);
discard_new_inode(&inode->v); discard_new_inode(&inode->v);
inode = old; return old;
} else { } else {
inode_fake_hash(&inode->v); inode_fake_hash(&inode->v);
...@@ -263,9 +274,8 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bc ...@@ -263,9 +274,8 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bc
mutex_lock(&c->vfs_inodes_lock); mutex_lock(&c->vfs_inodes_lock);
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
mutex_unlock(&c->vfs_inodes_lock); mutex_unlock(&c->vfs_inodes_lock);
}
return inode; return inode;
}
} }
#define memalloc_flags_do(_flags, _do) \ #define memalloc_flags_do(_flags, _do) \
...@@ -325,9 +335,24 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans) ...@@ -325,9 +335,24 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
return inode; return inode;
} }
static struct bch_inode_info *bch2_inode_hash_init_insert(struct btree_trans *trans,
subvol_inum inum,
struct bch_inode_unpacked *bi,
struct bch_subvolume *subvol)
{
struct bch_inode_info *inode = bch2_new_inode(trans);
if (IS_ERR(inode))
return inode;
bch2_vfs_inode_init(trans, inum, inode, bi, subvol);
return bch2_inode_hash_insert(trans->c, trans, inode);
}
struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
{ {
struct bch_inode_info *inode = bch2_inode_hash_find(c, inum); struct bch_inode_info *inode = bch2_inode_hash_find(c, NULL, inum);
if (inode) if (inode)
return &inode->v; return &inode->v;
...@@ -338,11 +363,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) ...@@ -338,11 +363,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
int ret = lockrestart_do(trans, int ret = lockrestart_do(trans,
bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans)); PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
if (!ret) {
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
inode = bch2_inode_hash_insert(c, inode);
}
bch2_trans_put(trans); bch2_trans_put(trans);
return ret ? ERR_PTR(ret) : &inode->v; return ret ? ERR_PTR(ret) : &inode->v;
...@@ -433,8 +454,16 @@ __bch2_create(struct mnt_idmap *idmap, ...@@ -433,8 +454,16 @@ __bch2_create(struct mnt_idmap *idmap,
* we must insert the new inode into the inode cache before calling * we must insert the new inode into the inode cache before calling
* bch2_trans_exit() and dropping locks, else we could race with another * bch2_trans_exit() and dropping locks, else we could race with another
* thread pulling the inode in and modifying it: * thread pulling the inode in and modifying it:
*
* also, calling bch2_inode_hash_insert() without passing in the
* transaction object is sketchy - if we could ever end up in
* __wait_on_freeing_inode(), we'd risk deadlock.
*
* But that shouldn't be possible, since we still have the inode locked
* that we just created, and we _really_ can't take a transaction
* restart here.
*/ */
inode = bch2_inode_hash_insert(c, inode); inode = bch2_inode_hash_insert(c, NULL, inode);
bch2_trans_put(trans); bch2_trans_put(trans);
err: err:
posix_acl_release(default_acl); posix_acl_release(default_acl);
...@@ -474,7 +503,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, ...@@ -474,7 +503,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
if (ret) if (ret)
goto err; goto err;
struct bch_inode_info *inode = bch2_inode_hash_find(c, inum); struct bch_inode_info *inode = bch2_inode_hash_find(c, trans, inum);
if (inode) if (inode)
goto out; goto out;
...@@ -482,7 +511,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, ...@@ -482,7 +511,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
struct bch_inode_unpacked inode_u; struct bch_inode_unpacked inode_u;
ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?: bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans)); PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
c, "dirent to missing inode:\n %s", c, "dirent to missing inode:\n %s",
...@@ -502,9 +531,6 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, ...@@ -502,9 +531,6 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
ret = -ENOENT; ret = -ENOENT;
goto err; goto err;
} }
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
inode = bch2_inode_hash_insert(c, inode);
out: out:
bch2_trans_iter_exit(trans, &dirent_iter); bch2_trans_iter_exit(trans, &dirent_iter);
printbuf_exit(&buf); printbuf_exit(&buf);
...@@ -1545,7 +1571,8 @@ static const struct export_operations bch_export_ops = { ...@@ -1545,7 +1571,8 @@ static const struct export_operations bch_export_ops = {
.get_name = bch2_get_name, .get_name = bch2_get_name,
}; };
static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, static void bch2_vfs_inode_init(struct btree_trans *trans,
subvol_inum inum,
struct bch_inode_info *inode, struct bch_inode_info *inode,
struct bch_inode_unpacked *bi, struct bch_inode_unpacked *bi,
struct bch_subvolume *subvol) struct bch_subvolume *subvol)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment