Commit e1cccd91 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Lachlan McIlroy

[XFS] kill xfs_lock_dir_and_entry

When multiple inodes are locked in XFS it happens in order of the inode
number, with the everything but the first inode trylocked if any of the
previous inodes is in the AIL.

Except for the sorting of the inodes this logic is implemented in
xfs_lock_inodes, but also partially duplicated in xfs_lock_dir_and_entry
in a particularly stupid way adds a lock roundtrip if the inode ordering
is not optimal.

This patch adds a new helper xfs_lock_two_inodes that takes two inodes and
locks them in the most optimal way according to the above locking protocol
and uses it for all places that want to lock two inodes.

The only caller of xfs_lock_inodes is xfs_rename which might lock up to
four inodes.

SGI-PV: 981498

SGI-Modid: xfs-linux-melb:xfs-kern:31772a
Signed-off-by: default avatarChristoph Hellwig <hch@infradead.org>
Signed-off-by: default avatarDonald Douwsma <donaldd@sgi.com>
Signed-off-by: default avatarLachlan McIlroy <lachlan@sgi.com>
parent 1550d0b0
......@@ -128,7 +128,6 @@ xfs_swap_extents(
xfs_swapext_t *sxp)
{
xfs_mount_t *mp;
xfs_inode_t *ips[2];
xfs_trans_t *tp;
xfs_bstat_t *sbp = &sxp->sx_stat;
bhv_vnode_t *vp, *tvp;
......@@ -153,16 +152,7 @@ xfs_swap_extents(
vp = VFS_I(ip);
tvp = VFS_I(tip);
/* Lock in i_ino order */
if (ip->i_ino < tip->i_ino) {
ips[0] = ip;
ips[1] = tip;
} else {
ips[0] = tip;
ips[1] = ip;
}
xfs_lock_inodes(ips, 2, lock_flags);
xfs_lock_two_inodes(ip, tip, lock_flags);
locked = 1;
/* Verify that both files have the same format */
......@@ -265,7 +255,7 @@ xfs_swap_extents(
locked = 0;
goto error0;
}
xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
/*
* Count the number of extended attribute blocks
......
......@@ -531,6 +531,7 @@ void xfs_iflush_all(struct xfs_mount *);
void xfs_ichgtime(xfs_inode_t *, int);
xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
void xfs_synchronize_atime(xfs_inode_t *);
void xfs_mark_inode_dirty_sync(xfs_inode_t *);
......
......@@ -1707,111 +1707,6 @@ xfs_create(
goto std_return;
}
#ifdef DEBUG
/*
* Some counters to see if (and how often) we are hitting some deadlock
* prevention code paths.
*/
int xfs_rm_locks;
int xfs_rm_lock_delays;
int xfs_rm_attempts;
#endif
/*
* The following routine will lock the inodes associated with the
* directory and the named entry in the directory. The locks are
* acquired in increasing inode number.
*
* If the entry is "..", then only the directory is locked. The
* vnode ref count will still include that from the .. entry in
* this case.
*
* There is a deadlock we need to worry about. If the locked directory is
* in the AIL, it might be blocking up the log. The next inode we lock
* could be already locked by another thread waiting for log space (e.g
* a permanent log reservation with a long running transaction (see
* xfs_itruncate_finish)). To solve this, we must check if the directory
* is in the ail and use lock_nowait. If we can't lock, we need to
* drop the inode lock on the directory and try again. xfs_iunlock will
* potentially push the tail if we were holding up the log.
*/
STATIC int
xfs_lock_dir_and_entry(
xfs_inode_t *dp,
xfs_inode_t *ip) /* inode of entry 'name' */
{
int attempts;
xfs_ino_t e_inum;
xfs_inode_t *ips[2];
xfs_log_item_t *lp;
#ifdef DEBUG
xfs_rm_locks++;
#endif
attempts = 0;
again:
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
e_inum = ip->i_ino;
xfs_itrace_ref(ip);
/*
* We want to lock in increasing inum. Since we've already
* acquired the lock on the directory, we may need to release
* if if the inum of the entry turns out to be less.
*/
if (e_inum > dp->i_ino) {
/*
* We are already in the right order, so just
* lock on the inode of the entry.
* We need to use nowait if dp is in the AIL.
*/
lp = (xfs_log_item_t *)dp->i_itemp;
if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
attempts++;
#ifdef DEBUG
xfs_rm_attempts++;
#endif
/*
* Unlock dp and try again.
* xfs_iunlock will try to push the tail
* if the inode is in the AIL.
*/
xfs_iunlock(dp, XFS_ILOCK_EXCL);
if ((attempts % 5) == 0) {
delay(1); /* Don't just spin the CPU */
#ifdef DEBUG
xfs_rm_lock_delays++;
#endif
}
goto again;
}
} else {
xfs_ilock(ip, XFS_ILOCK_EXCL);
}
} else if (e_inum < dp->i_ino) {
xfs_iunlock(dp, XFS_ILOCK_EXCL);
ips[0] = ip;
ips[1] = dp;
xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
}
/* else e_inum == dp->i_ino */
/* This can happen if we're asked to lock /x/..
* the entry is "..", which is also the parent directory.
*/
return 0;
}
#ifdef DEBUG
int xfs_locked_n;
int xfs_small_retries;
......@@ -1946,6 +1841,45 @@ xfs_lock_inodes(
#endif
}
void
xfs_lock_two_inodes(
xfs_inode_t *ip0,
xfs_inode_t *ip1,
uint lock_mode)
{
xfs_inode_t *temp;
int attempts = 0;
xfs_log_item_t *lp;
ASSERT(ip0->i_ino != ip1->i_ino);
if (ip0->i_ino > ip1->i_ino) {
temp = ip0;
ip0 = ip1;
ip1 = temp;
}
again:
xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
/*
* If the first lock we have locked is in the AIL, we must TRY to get
* the second lock. If we can't get it, we must release the first one
* and try again.
*/
lp = (xfs_log_item_t *)ip0->i_itemp;
if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
xfs_iunlock(ip0, lock_mode);
if ((++attempts % 5) == 0)
delay(1); /* Don't just spin the CPU */
goto again;
}
} else {
xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
}
}
int
xfs_remove(
xfs_inode_t *dp,
......@@ -2018,9 +1952,7 @@ xfs_remove(
goto out_trans_cancel;
}
error = xfs_lock_dir_and_entry(dp, ip);
if (error)
goto out_trans_cancel;
xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
/*
* At this point, we've gotten both the directory and the entry
......@@ -2047,9 +1979,6 @@ xfs_remove(
}
}
/*
* Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
*/
XFS_BMAP_INIT(&free_list, &first_block);
error = xfs_dir_removename(tp, dp, name, ip->i_ino,
&first_block, &free_list, resblks);
......@@ -2155,7 +2084,6 @@ xfs_link(
{
xfs_mount_t *mp = tdp->i_mount;
xfs_trans_t *tp;
xfs_inode_t *ips[2];
int error;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
......@@ -2203,15 +2131,7 @@ xfs_link(
goto error_return;
}
if (sip->i_ino < tdp->i_ino) {
ips[0] = sip;
ips[1] = tdp;
} else {
ips[0] = tdp;
ips[1] = sip;
}
xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
/*
* Increment vnode ref counts since xfs_trans_commit &
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment