Commit 16ad3be1 authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba

Btrfs: remove unnecessary delalloc mutex for inodes

The inode delalloc mutex was added a long time ago by commit f248679e
("Btrfs: add a delalloc mutex to inodes for delalloc reservations"), and
the reason for its introduction is not very clear from the change log. It
claims it solves bogus warnings from lockdep, however it lacks an example
report/warning from lockdep, or any explanation.

Since we have enough concurrentcy protection from the locks of the space
info and block reserve objects, and such lockdep warnings don't seem to
exist anymore (at least on a 5.3 kernel I couldn't get them with fstests,
ltp, fs_mark, etc), remove it, simplifying things a bit and decreasing
the size of the btrfs_inode structure. With some quick fio tests doing
direct IO and mmap writes I couldn't observe any significant performance
increase either (direct IO writes that don't increase the file's size
don't hold the inode's lock for their entire duration and mmap writes
don't hold the inode's lock at all), which are the only type of writes
that could see any performance gain due to less serialization.

Review feedback from Josef:

The problem was taking the i_mutex in mmap, which is how I was
protecting delalloc reservations originally.  The delalloc mutex didn't
come with all of the other dependencies.  That's what the lockdep
messages were about, removing the lock isn't going to make them appear
again.

We _had_ to lock around this because we used to do tricks to keep from
over-reserving, and if we didn't serialize delalloc reservations we'd
end up with ugly accounting problems when we tried to clean things up.

However with my recentish changes this isn't the case anymore.  Every
operation is responsible for reserving its space, and then adding it to
the inode.  Then cleaning up is straightforward and can't be mucked up
by other users.  So we no longer need the delalloc mutex to safe us from
ourselves.
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent bf2df5ae
...@@ -63,9 +63,6 @@ struct btrfs_inode { ...@@ -63,9 +63,6 @@ struct btrfs_inode {
/* held while logging the inode in tree-log.c */ /* held while logging the inode in tree-log.c */
struct mutex log_mutex; struct mutex log_mutex;
/* held while doing delalloc reservations */
struct mutex delalloc_mutex;
/* used to order data wrt metadata */ /* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree; struct btrfs_ordered_inode_tree ordered_tree;
......
...@@ -307,7 +307,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) ...@@ -307,7 +307,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
unsigned nr_extents; unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0; int ret = 0;
bool delalloc_lock = true;
/* /*
* If we are a free space inode we need to not flush since we will be in * If we are a free space inode we need to not flush since we will be in
...@@ -320,7 +319,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) ...@@ -320,7 +319,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
*/ */
if (btrfs_is_free_space_inode(inode)) { if (btrfs_is_free_space_inode(inode)) {
flush = BTRFS_RESERVE_NO_FLUSH; flush = BTRFS_RESERVE_NO_FLUSH;
delalloc_lock = false;
} else { } else {
if (current->journal_info) if (current->journal_info)
flush = BTRFS_RESERVE_FLUSH_LIMIT; flush = BTRFS_RESERVE_FLUSH_LIMIT;
...@@ -329,9 +327,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) ...@@ -329,9 +327,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
schedule_timeout(1); schedule_timeout(1);
} }
if (delalloc_lock)
mutex_lock(&inode->delalloc_mutex);
num_bytes = ALIGN(num_bytes, fs_info->sectorsize); num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
/* /*
...@@ -348,10 +343,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) ...@@ -348,10 +343,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
&qgroup_reserve); &qgroup_reserve);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true); ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
if (ret) if (ret)
goto out_fail; return ret;
ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush); ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
if (ret) if (ret) {
goto out_qgroup; btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
return ret;
}
/* /*
* Now we need to update our outstanding extents and csum bytes _first_ * Now we need to update our outstanding extents and csum bytes _first_
...@@ -375,15 +372,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) ...@@ -375,15 +372,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
block_rsv->qgroup_rsv_reserved += qgroup_reserve; block_rsv->qgroup_rsv_reserved += qgroup_reserve;
spin_unlock(&block_rsv->lock); spin_unlock(&block_rsv->lock);
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
return 0; return 0;
out_qgroup:
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
out_fail:
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
return ret;
} }
/** /**
......
...@@ -9356,7 +9356,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ...@@ -9356,7 +9356,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->io_failure_tree.track_uptodate = true; ei->io_failure_tree.track_uptodate = true;
atomic_set(&ei->sync_writers, 0); atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex); mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree); btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes); INIT_LIST_HEAD(&ei->delalloc_inodes);
INIT_LIST_HEAD(&ei->delayed_iput); INIT_LIST_HEAD(&ei->delayed_iput);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment