Commit 0916056e authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: fix parent pointer scrub racing with subdirectory reparenting

Jan Kara pointed out that rename() doesn't lock a subdirectory that is
being moved from one parent to another, even though the move requires an
update to the subdirectory's dotdot entry.  This means that it's *not*
sufficient to hold a directory's IOLOCK to stabilize the dotdot entry.
We must hold the ILOCK of both the child and the alleged parent, and
there's no use in holding the parent's IOLOCK.

With that in mind, we can get rid of all the messy code that tries to
grab the parent's IOLOCK, which means we don't need to let go of the
ILOCK of the directory whose parent we are checking.  We still have to
use nonblocking mode to take the ILOCK of the alleged parent, so the
revalidation loop has to stay.

However, we can remove the retry counter, since threads aren't supposed
to hold the ILOCK for long periods of time.  Remove the inverted ilock
helper from the common code since nobody uses it.  Remove the entire
source of -EDEADLOCK-based "retry harder" scrub executions.

Link: https://lore.kernel.org/linux-xfs/20230117123735.un7wbamlbdihninm@quack3/Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
parent b049962c
...@@ -962,28 +962,6 @@ xchk_metadata_inode_forks( ...@@ -962,28 +962,6 @@ xchk_metadata_inode_forks(
return 0; return 0;
} }
/*
* Try to lock an inode in violation of the usual locking order rules. For
* example, trying to get the IOLOCK while in transaction context, or just
* plain breaking AG-order or inode-order inode locking rules. Either way,
* the only way to avoid an ABBA deadlock is to use trylock and back off if
* we can't.
*/
int
xchk_ilock_inverted(
struct xfs_inode *ip,
uint lock_mode)
{
int i;
for (i = 0; i < 20; i++) {
if (xfs_ilock_nowait(ip, lock_mode))
return 0;
delay(1);
}
return -EDEADLOCK;
}
/* Pause background reaping of resources. */ /* Pause background reaping of resources. */
void void
xchk_stop_reaping( xchk_stop_reaping(
......
...@@ -148,7 +148,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm) ...@@ -148,7 +148,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
} }
int xchk_metadata_inode_forks(struct xfs_scrub *sc); int xchk_metadata_inode_forks(struct xfs_scrub *sc);
int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
void xchk_stop_reaping(struct xfs_scrub *sc); void xchk_stop_reaping(struct xfs_scrub *sc);
void xchk_start_reaping(struct xfs_scrub *sc); void xchk_start_reaping(struct xfs_scrub *sc);
......
...@@ -64,15 +64,37 @@ xchk_parent_actor( ...@@ -64,15 +64,37 @@ xchk_parent_actor(
} }
/* /*
* Given the inode number of the alleged parent of the inode being * Try to lock a parent directory for checking dirents. Returns the inode
* scrubbed, try to validate that the parent has exactly one directory * flags for the locks we now hold, or zero if we failed.
* entry pointing back to the inode being scrubbed. */
STATIC unsigned int
xchk_parent_ilock_dir(
struct xfs_inode *dp)
{
if (!xfs_ilock_nowait(dp, XFS_ILOCK_SHARED))
return 0;
if (!xfs_need_iread_extents(&dp->i_df))
return XFS_ILOCK_SHARED;
xfs_iunlock(dp, XFS_ILOCK_SHARED);
if (!xfs_ilock_nowait(dp, XFS_ILOCK_EXCL))
return 0;
return XFS_ILOCK_EXCL;
}
/*
* Given the inode number of the alleged parent of the inode being scrubbed,
* try to validate that the parent has exactly one directory entry pointing
* back to the inode being scrubbed. Returns -EAGAIN if we need to revalidate
* the dotdot entry.
*/ */
STATIC int STATIC int
xchk_parent_validate( xchk_parent_validate(
struct xfs_scrub *sc, struct xfs_scrub *sc,
xfs_ino_t parent_ino, xfs_ino_t parent_ino)
bool *try_again)
{ {
struct xchk_parent_ctx spc = { struct xchk_parent_ctx spc = {
.sc = sc, .sc = sc,
...@@ -81,23 +103,21 @@ xchk_parent_validate( ...@@ -81,23 +103,21 @@ xchk_parent_validate(
struct xfs_mount *mp = sc->mp; struct xfs_mount *mp = sc->mp;
struct xfs_inode *dp = NULL; struct xfs_inode *dp = NULL;
xfs_nlink_t expected_nlink; xfs_nlink_t expected_nlink;
uint lock_mode; unsigned int lock_mode;
int error = 0; int error = 0;
*try_again = false;
/* Is this the root dir? Then '..' must point to itself. */ /* Is this the root dir? Then '..' must point to itself. */
if (sc->ip == mp->m_rootip) { if (sc->ip == mp->m_rootip) {
if (sc->ip->i_ino != mp->m_sb.sb_rootino || if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
sc->ip->i_ino != parent_ino) sc->ip->i_ino != parent_ino)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
goto out; return 0;
} }
/* '..' must not point to ourselves. */ /* '..' must not point to ourselves. */
if (sc->ip->i_ino == parent_ino) { if (sc->ip->i_ino == parent_ino) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
goto out; return 0;
} }
/* /*
...@@ -124,41 +144,39 @@ xchk_parent_validate( ...@@ -124,41 +144,39 @@ xchk_parent_validate(
if (error == -EINVAL || error == -ENOENT) { if (error == -EINVAL || error == -ENOENT) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
goto out; return error;
} }
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out; return error;
if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
goto out_rele; goto out_rele;
} }
/* lock_mode = xchk_parent_ilock_dir(dp);
* We prefer to keep the inode locked while we lock and search if (!lock_mode) {
* its alleged parent for a forward reference. If we can grab xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
* the iolock, validate the pointers and we're done. We must xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
* use nowait here to avoid an ABBA deadlock on the parent and error = -EAGAIN;
* the child inodes.
*/
if (!xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
*try_again = true;
goto out_rele; goto out_rele;
} }
lock_mode = xfs_ilock_data_map_shared(dp); /* Look for a directory entry in the parent pointing to the child. */
error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc); error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc);
xfs_iunlock(dp, lock_mode);
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out_unlock; goto out_unlock;
/*
* Ensure that the parent has as many links to the child as the child
* thinks it has to the parent.
*/
if (spc.nlink != expected_nlink) if (spc.nlink != expected_nlink)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
out_unlock: out_unlock:
xfs_iunlock(dp, XFS_IOLOCK_SHARED); xfs_iunlock(dp, lock_mode);
out_rele: out_rele:
xfs_irele(dp); xfs_irele(dp);
out:
return error; return error;
} }
...@@ -169,8 +187,6 @@ xchk_parent( ...@@ -169,8 +187,6 @@ xchk_parent(
{ {
struct xfs_mount *mp = sc->mp; struct xfs_mount *mp = sc->mp;
xfs_ino_t parent_ino; xfs_ino_t parent_ino;
bool try_again;
int tries = 0;
int error = 0; int error = 0;
/* /*
...@@ -183,49 +199,29 @@ xchk_parent( ...@@ -183,49 +199,29 @@ xchk_parent(
/* We're not a special inode, are we? */ /* We're not a special inode, are we? */
if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
goto out; return 0;
} }
/*
* The VFS grabs a read or write lock via i_rwsem before it reads
* or writes to a directory. If we've gotten this far we've
* already obtained IOLOCK_EXCL, which (since 4.10) is the same as
* getting a write lock on i_rwsem. Therefore, it is safe for us
* to drop the ILOCK here in order to do directory lookups.
*/
sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
do { do {
if (xchk_should_terminate(sc, &error))
break;
/* Look up '..' */ /* Look up '..' */
error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot,
&parent_ino, NULL); &parent_ino);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out; return error;
if (!xfs_verify_dir_ino(mp, parent_ino)) { if (!xfs_verify_dir_ino(mp, parent_ino)) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
goto out; return 0;
} }
error = xchk_parent_validate(sc, parent_ino, &try_again);
if (error)
goto out;
} while (try_again && ++tries < 20);
/* /*
* We gave it our best shot but failed, so mark this scrub * Check that the dotdot entry points to a parent directory
* incomplete. Userspace can decide if it wants to try again. * containing a dirent pointing to this subdirectory.
*/ */
if (try_again && tries == 20) error = xchk_parent_validate(sc, parent_ino);
xchk_set_incomplete(sc); } while (error == -EAGAIN);
out:
/*
* If we failed to lock the parent inode even after a retry, just mark
* this scrub incomplete and return.
*/
if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
error = 0;
xchk_set_incomplete(sc);
}
return error; return error;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment