Commit 02dbfc99 authored by Omar Sandoval's avatar Omar Sandoval Committed by Chris Mason

Btrfs: fix ->iterate_shared() by upgrading i_rwsem for delayed nodes

Commit fe742fd4 ("Revert "btrfs: switch to ->iterate_shared()"")
backed out the conversion to ->iterate_shared() for Btrfs because the
delayed inode handling in btrfs_real_readdir() is racy. However, we can
still do readdir in parallel if there are no delayed nodes.

This is a temporary fix which upgrades the shared inode lock to an
exclusive lock only when we have delayed items until we come up with a
more complete solution. While we're here, rename the
btrfs_{get,put}_delayed_items functions to make it very clear that
they're just for readdir.

Tested with xfstests and by doing a parallel kernel build:

	while make tinyconfig && make -j4 && git clean dqfx; do
		:
	done

along with a bunch of parallel finds in another shell:

	while true; do
		for ((i=0; i<4; i++)); do
			find . >/dev/null &
		done
		wait
	done
Signed-off-by: default avatarOmar Sandoval <osandov@fb.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 33688abb
...@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) ...@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
return 0; return 0;
} }
void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, bool btrfs_readdir_get_delayed_items(struct inode *inode,
struct list_head *del_list) struct list_head *ins_list,
struct list_head *del_list)
{ {
struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *item; struct btrfs_delayed_item *item;
delayed_node = btrfs_get_delayed_node(inode); delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node) if (!delayed_node)
return; return false;
/*
* We can only do one readdir with delayed items at a time because of
* item->readdir_list.
*/
inode_unlock_shared(inode);
inode_lock(inode);
mutex_lock(&delayed_node->mutex); mutex_lock(&delayed_node->mutex);
item = __btrfs_first_delayed_insertion_item(delayed_node); item = __btrfs_first_delayed_insertion_item(delayed_node);
...@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, ...@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
* requeue or dequeue this delayed node. * requeue or dequeue this delayed node.
*/ */
atomic_dec(&delayed_node->refs); atomic_dec(&delayed_node->refs);
return true;
} }
void btrfs_put_delayed_items(struct list_head *ins_list, void btrfs_readdir_put_delayed_items(struct inode *inode,
struct list_head *del_list) struct list_head *ins_list,
struct list_head *del_list)
{ {
struct btrfs_delayed_item *curr, *next; struct btrfs_delayed_item *curr, *next;
...@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list, ...@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
if (atomic_dec_and_test(&curr->refs)) if (atomic_dec_and_test(&curr->refs))
kfree(curr); kfree(curr);
} }
/*
* The VFS is going to do up_read(), so we need to downgrade back to a
* read lock.
*/
downgrade_write(&inode->i_rwsem);
} }
int btrfs_should_delete_dir_index(struct list_head *del_list, int btrfs_should_delete_dir_index(struct list_head *del_list,
......
...@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); ...@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
void btrfs_destroy_delayed_inodes(struct btrfs_root *root); void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
/* Used for readdir() */ /* Used for readdir() */
void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, bool btrfs_readdir_get_delayed_items(struct inode *inode,
struct list_head *del_list); struct list_head *ins_list,
void btrfs_put_delayed_items(struct list_head *ins_list, struct list_head *del_list);
struct list_head *del_list); void btrfs_readdir_put_delayed_items(struct inode *inode,
struct list_head *ins_list,
struct list_head *del_list);
int btrfs_should_delete_dir_index(struct list_head *del_list, int btrfs_should_delete_dir_index(struct list_head *del_list,
u64 index); u64 index);
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
......
...@@ -5757,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) ...@@ -5757,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
int name_len; int name_len;
int is_curr = 0; /* ctx->pos points to the current index? */ int is_curr = 0; /* ctx->pos points to the current index? */
bool emitted; bool emitted;
bool put = false;
/* FIXME, use a real flag for deciding about the key type */ /* FIXME, use a real flag for deciding about the key type */
if (root->fs_info->tree_root == root) if (root->fs_info->tree_root == root)
...@@ -5774,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) ...@@ -5774,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (key_type == BTRFS_DIR_INDEX_KEY) { if (key_type == BTRFS_DIR_INDEX_KEY) {
INIT_LIST_HEAD(&ins_list); INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list); INIT_LIST_HEAD(&del_list);
btrfs_get_delayed_items(inode, &ins_list, &del_list); put = btrfs_readdir_get_delayed_items(inode, &ins_list,
&del_list);
} }
key.type = key_type; key.type = key_type;
...@@ -5921,8 +5923,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) ...@@ -5921,8 +5923,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
nopos: nopos:
ret = 0; ret = 0;
err: err:
if (key_type == BTRFS_DIR_INDEX_KEY) if (put)
btrfs_put_delayed_items(&ins_list, &del_list); btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
btrfs_free_path(path); btrfs_free_path(path);
return ret; return ret;
} }
...@@ -10534,7 +10536,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { ...@@ -10534,7 +10536,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
static const struct file_operations btrfs_dir_file_operations = { static const struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.read = generic_read_dir, .read = generic_read_dir,
.iterate = btrfs_real_readdir, .iterate_shared = btrfs_real_readdir,
.unlocked_ioctl = btrfs_ioctl, .unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl, .compat_ioctl = btrfs_compat_ioctl,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment