Commit 0f2b8098 authored by Josef Bacik's avatar Josef Bacik Committed by David Sterba

btrfs: take the cleaner_mutex earlier in qgroup disable

One of my CI runs popped the following lockdep splat

======================================================
WARNING: possible circular locking dependency detected
6.9.0-rc4+ #1 Not tainted
------------------------------------------------------
btrfs/471533 is trying to acquire lock:
ffff92ba46980850 (&fs_info->cleaner_mutex){+.+.}-{3:3}, at: btrfs_quota_disable+0x54/0x4c0

but task is already holding lock:
ffff92ba46980bd0 (&fs_info->subvol_sem){++++}-{3:3}, at: btrfs_ioctl+0x1c8f/0x2600

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&fs_info->subvol_sem){++++}-{3:3}:
       down_read+0x42/0x170
       btrfs_rename+0x607/0xb00
       btrfs_rename2+0x2e/0x70
       vfs_rename+0xaf8/0xfc0
       do_renameat2+0x586/0x600
       __x64_sys_rename+0x43/0x50
       do_syscall_64+0x95/0x180
       entry_SYSCALL_64_after_hwframe+0x76/0x7e

-> #1 (&sb->s_type->i_mutex_key#16){++++}-{3:3}:
       down_write+0x3f/0xc0
       btrfs_inode_lock+0x40/0x70
       prealloc_file_extent_cluster+0x1b0/0x370
       relocate_file_extent_cluster+0xb2/0x720
       relocate_data_extent+0x107/0x160
       relocate_block_group+0x442/0x550
       btrfs_relocate_block_group+0x2cb/0x4b0
       btrfs_relocate_chunk+0x50/0x1b0
       btrfs_balance+0x92f/0x13d0
       btrfs_ioctl+0x1abf/0x2600
       __x64_sys_ioctl+0x97/0xd0
       do_syscall_64+0x95/0x180
       entry_SYSCALL_64_after_hwframe+0x76/0x7e

-> #0 (&fs_info->cleaner_mutex){+.+.}-{3:3}:
       __lock_acquire+0x13e7/0x2180
       lock_acquire+0xcb/0x2e0
       __mutex_lock+0xbe/0xc00
       btrfs_quota_disable+0x54/0x4c0
       btrfs_ioctl+0x206b/0x2600
       __x64_sys_ioctl+0x97/0xd0
       do_syscall_64+0x95/0x180
       entry_SYSCALL_64_after_hwframe+0x76/0x7e

other info that might help us debug this:

Chain exists of:
  &fs_info->cleaner_mutex --> &sb->s_type->i_mutex_key#16 --> &fs_info->subvol_sem

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&fs_info->subvol_sem);
                               lock(&sb->s_type->i_mutex_key#16);
                               lock(&fs_info->subvol_sem);
  lock(&fs_info->cleaner_mutex);

 *** DEADLOCK ***

2 locks held by btrfs/471533:
 #0: ffff92ba4319e420 (sb_writers#14){.+.+}-{0:0}, at: btrfs_ioctl+0x3b5/0x2600
 #1: ffff92ba46980bd0 (&fs_info->subvol_sem){++++}-{3:3}, at: btrfs_ioctl+0x1c8f/0x2600

stack backtrace:
CPU: 1 PID: 471533 Comm: btrfs Kdump: loaded Not tainted 6.9.0-rc4+ #1
Call Trace:
 <TASK>
 dump_stack_lvl+0x77/0xb0
 check_noncircular+0x148/0x160
 ? lock_acquire+0xcb/0x2e0
 __lock_acquire+0x13e7/0x2180
 lock_acquire+0xcb/0x2e0
 ? btrfs_quota_disable+0x54/0x4c0
 ? lock_is_held_type+0x9a/0x110
 __mutex_lock+0xbe/0xc00
 ? btrfs_quota_disable+0x54/0x4c0
 ? srso_return_thunk+0x5/0x5f
 ? lock_acquire+0xcb/0x2e0
 ? btrfs_quota_disable+0x54/0x4c0
 ? btrfs_quota_disable+0x54/0x4c0
 btrfs_quota_disable+0x54/0x4c0
 btrfs_ioctl+0x206b/0x2600
 ? srso_return_thunk+0x5/0x5f
 ? __do_sys_statfs+0x61/0x70
 __x64_sys_ioctl+0x97/0xd0
 do_syscall_64+0x95/0x180
 ? srso_return_thunk+0x5/0x5f
 ? reacquire_held_locks+0xd1/0x1f0
 ? do_user_addr_fault+0x307/0x8a0
 ? srso_return_thunk+0x5/0x5f
 ? lock_acquire+0xcb/0x2e0
 ? srso_return_thunk+0x5/0x5f
 ? srso_return_thunk+0x5/0x5f
 ? find_held_lock+0x2b/0x80
 ? srso_return_thunk+0x5/0x5f
 ? lock_release+0xca/0x2a0
 ? srso_return_thunk+0x5/0x5f
 ? do_user_addr_fault+0x35c/0x8a0
 ? srso_return_thunk+0x5/0x5f
 ? trace_hardirqs_off+0x4b/0xc0
 ? srso_return_thunk+0x5/0x5f
 ? lockdep_hardirqs_on_prepare+0xde/0x190
 ? srso_return_thunk+0x5/0x5f

This happens because when we call rename we already have the inode mutex
held, and then we acquire the subvol_sem if we are a subvolume.  This
makes the dependency

inode lock -> subvol sem

When we're running data relocation we will preallocate space for the
data relocation inode, and we always run the relocation under the
->cleaner_mutex.  This now creates the dependency of

cleaner_mutex -> inode lock (from the prealloc) -> subvol_sem

Qgroup delete is doing this in the opposite order, it is acquiring the
subvol_sem and then it is acquiring the cleaner_mutex, which results in
this lockdep splat.  This deadlock can't happen in reality, because we
won't ever rename the data reloc inode, nor is the data reloc inode a
subvolume.

However this is fairly easy to fix, simply take the cleaner mutex in the
case where we are disabling qgroups before we take the subvol_sem.  This
resolves the lockdep splat.
Reviewed-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarJosef Bacik <josef@toxicpanda.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 9af503d9
......@@ -3758,15 +3758,43 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
goto drop_write;
}
down_write(&fs_info->subvol_sem);
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA:
down_write(&fs_info->subvol_sem);
ret = btrfs_quota_enable(fs_info, sa);
up_write(&fs_info->subvol_sem);
break;
case BTRFS_QUOTA_CTL_DISABLE:
/*
* Lock the cleaner mutex to prevent races with concurrent
* relocation, because relocation may be building backrefs for
* blocks of the quota root while we are deleting the root. This
* is like dropping fs roots of deleted snapshots/subvolumes, we
* need the same protection.
*
* This also prevents races between concurrent tasks trying to
* disable quotas, because we will unlock and relock
* qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
*
* We take this here because we have the dependency of
*
* inode_lock -> subvol_sem
*
* because of rename. With relocation we can prealloc extents,
* so that makes the dependency chain
*
* cleaner_mutex -> inode_lock -> subvol_sem
*
* so we must take the cleaner_mutex here before we take the
* subvol_sem. The deadlock can't actually happen, but this
* quiets lockdep.
*/
mutex_lock(&fs_info->cleaner_mutex);
down_write(&fs_info->subvol_sem);
ret = btrfs_quota_disable(fs_info);
up_write(&fs_info->subvol_sem);
mutex_unlock(&fs_info->cleaner_mutex);
break;
default:
ret = -EINVAL;
......@@ -3774,7 +3802,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
}
kfree(sa);
up_write(&fs_info->subvol_sem);
drop_write:
mnt_drop_write_file(file);
return ret;
......
......@@ -1342,16 +1342,10 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
lockdep_assert_held_write(&fs_info->subvol_sem);
/*
* Lock the cleaner mutex to prevent races with concurrent relocation,
* because relocation may be building backrefs for blocks of the quota
* root while we are deleting the root. This is like dropping fs roots
* of deleted snapshots/subvolumes, we need the same protection.
*
* This also prevents races between concurrent tasks trying to disable
* quotas, because we will unlock and relock qgroup_ioctl_lock across
* BTRFS_FS_QUOTA_ENABLED changes.
* Relocation will mess with backrefs, so make sure we have the
* cleaner_mutex held to protect us from relocate.
*/
mutex_lock(&fs_info->cleaner_mutex);
lockdep_assert_held(&fs_info->cleaner_mutex);
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
......@@ -1373,9 +1367,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
btrfs_qgroup_wait_for_completion(fs_info, false);
/*
* We have nothing held here and no trans handle, just return the error
* if there is one.
*/
ret = flush_reservations(fs_info);
if (ret)
goto out_unlock_cleaner;
return ret;
/*
* 1 For the root item
......@@ -1439,9 +1437,6 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_end_transaction(trans);
else if (trans)
ret = btrfs_commit_transaction(trans);
out_unlock_cleaner:
mutex_unlock(&fs_info->cleaner_mutex);
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment