Commit 4c46819a authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers

xfs: do not write the buffer from xfs_iflush

Instead of writing the buffer directly from inside xfs_iflush return it to
the caller and let the caller decide what to do with the buffer.  Also
remove the pincount check in xfs_iflush that all non-blocking callers already
implement and the now unused flags parameter.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarMark Tinguely <tinguely@sgi.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 8a48088f
...@@ -2384,22 +2384,22 @@ xfs_iflush_cluster( ...@@ -2384,22 +2384,22 @@ xfs_iflush_cluster(
} }
/* /*
* xfs_iflush() will write a modified inode's changes out to the * Flush dirty inode metadata into the backing buffer.
* inode's on disk home. The caller must have the inode lock held *
* in at least shared mode and the inode flush completion must be * The caller must have the inode lock and the inode flush lock held. The
* active as well. The inode lock will still be held upon return from * inode lock will still be held upon return to the caller, and the inode
* the call and the caller is free to unlock it. * flush lock will be released after the inode has reached the disk.
* The inode flush will be completed when the inode reaches the disk. *
* The flags indicate how the inode's buffer should be written out. * The caller must write out the buffer returned in *bpp and release it.
*/ */
int int
xfs_iflush( xfs_iflush(
xfs_inode_t *ip, struct xfs_inode *ip,
uint flags) struct xfs_buf **bpp)
{ {
xfs_buf_t *bp; struct xfs_mount *mp = ip->i_mount;
xfs_dinode_t *dip; struct xfs_buf *bp;
xfs_mount_t *mp; struct xfs_dinode *dip;
int error; int error;
XFS_STATS_INC(xs_iflush_count); XFS_STATS_INC(xs_iflush_count);
...@@ -2409,24 +2409,8 @@ xfs_iflush( ...@@ -2409,24 +2409,8 @@ xfs_iflush(
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
mp = ip->i_mount; *bpp = NULL;
/*
* We can't flush the inode until it is unpinned, so wait for it if we
* are allowed to block. We know no one new can pin it, because we are
* holding the inode lock shared and you need to hold it exclusively to
* pin the inode.
*
* If we are not allowed to block, force the log out asynchronously so
* that when we come back the inode will be unpinned. If other inodes
* in the same cluster are dirty, they will probably write the inode
* out for us if they occur after the log force completes.
*/
if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
xfs_iunpin(ip);
xfs_ifunlock(ip);
return EAGAIN;
}
xfs_iunpin_wait(ip); xfs_iunpin_wait(ip);
/* /*
...@@ -2458,8 +2442,7 @@ xfs_iflush( ...@@ -2458,8 +2442,7 @@ xfs_iflush(
/* /*
* Get the buffer containing the on-disk inode. * Get the buffer containing the on-disk inode.
*/ */
error = xfs_itobp(mp, NULL, ip, &dip, &bp, error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK);
(flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
if (error || !bp) { if (error || !bp) {
xfs_ifunlock(ip); xfs_ifunlock(ip);
return error; return error;
...@@ -2487,13 +2470,8 @@ xfs_iflush( ...@@ -2487,13 +2470,8 @@ xfs_iflush(
if (error) if (error)
goto cluster_corrupt_out; goto cluster_corrupt_out;
if (flags & SYNC_WAIT) *bpp = bp;
error = xfs_bwrite(bp); return 0;
else
xfs_buf_delwri_queue(bp);
xfs_buf_relse(bp);
return error;
corrupt_out: corrupt_out:
xfs_buf_relse(bp); xfs_buf_relse(bp);
......
...@@ -529,7 +529,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); ...@@ -529,7 +529,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_iext_realloc(xfs_inode_t *, int, int);
void xfs_iunpin_wait(xfs_inode_t *); void xfs_iunpin_wait(xfs_inode_t *);
int xfs_iflush(xfs_inode_t *, uint); int xfs_iflush(struct xfs_inode *, struct xfs_buf **);
void xfs_promote_inode(struct xfs_inode *); void xfs_promote_inode(struct xfs_inode *);
void xfs_lock_inodes(xfs_inode_t **, int, uint); void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
......
...@@ -506,6 +506,15 @@ xfs_inode_item_trylock( ...@@ -506,6 +506,15 @@ xfs_inode_item_trylock(
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
return XFS_ITEM_LOCKED; return XFS_ITEM_LOCKED;
/*
* Re-check the pincount now that we stabilized the value by
* taking the ilock.
*/
if (xfs_ipincount(ip) > 0) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return XFS_ITEM_PINNED;
}
if (!xfs_iflock_nowait(ip)) { if (!xfs_iflock_nowait(ip)) {
/* /*
* inode has already been flushed to the backing buffer, * inode has already been flushed to the backing buffer,
...@@ -666,6 +675,8 @@ xfs_inode_item_push( ...@@ -666,6 +675,8 @@ xfs_inode_item_push(
{ {
struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode; struct xfs_inode *ip = iip->ili_inode;
struct xfs_buf *bp = NULL;
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip)); ASSERT(xfs_isiflocked(ip));
...@@ -689,7 +700,11 @@ xfs_inode_item_push( ...@@ -689,7 +700,11 @@ xfs_inode_item_push(
* will pull the inode from the AIL, mark it clean and unlock the flush * will pull the inode from the AIL, mark it clean and unlock the flush
* lock. * lock.
*/ */
(void) xfs_iflush(ip, SYNC_TRYLOCK); error = xfs_iflush(ip, &bp);
if (!error) {
xfs_buf_delwri_queue(bp);
xfs_buf_relse(bp);
}
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
} }
......
...@@ -648,10 +648,6 @@ xfs_reclaim_inode_grab( ...@@ -648,10 +648,6 @@ xfs_reclaim_inode_grab(
* (*) dgc: I don't think the clean, pinned state is possible but it gets * (*) dgc: I don't think the clean, pinned state is possible but it gets
* handled anyway given the order of checks implemented. * handled anyway given the order of checks implemented.
* *
* As can be seen from the table, the return value of xfs_iflush() is not
* sufficient to correctly decide the reclaim action here. The checks in
* xfs_iflush() might look like duplicates, but they are not.
*
* Also, because we get the flush lock first, we know that any inode that has * Also, because we get the flush lock first, we know that any inode that has
* been flushed delwri has had the flush completed by the time we check that * been flushed delwri has had the flush completed by the time we check that
* the inode is clean. * the inode is clean.
...@@ -679,7 +675,8 @@ xfs_reclaim_inode( ...@@ -679,7 +675,8 @@ xfs_reclaim_inode(
struct xfs_perag *pag, struct xfs_perag *pag,
int sync_mode) int sync_mode)
{ {
int error; struct xfs_buf *bp = NULL;
int error;
restart: restart:
error = 0; error = 0;
...@@ -728,29 +725,33 @@ xfs_reclaim_inode( ...@@ -728,29 +725,33 @@ xfs_reclaim_inode(
/* /*
* Now we have an inode that needs flushing. * Now we have an inode that needs flushing.
* *
* We do a nonblocking flush here even if we are doing a SYNC_WAIT * Note that xfs_iflush will never block on the inode buffer lock, as
* reclaim as we can deadlock with inode cluster removal.
* xfs_ifree_cluster() can lock the inode buffer before it locks the * xfs_ifree_cluster() can lock the inode buffer before it locks the
* ip->i_lock, and we are doing the exact opposite here. As a result, * ip->i_lock, and we are doing the exact opposite here. As a result,
* doing a blocking xfs_itobp() to get the cluster buffer will result * doing a blocking xfs_itobp() to get the cluster buffer would result
* in an ABBA deadlock with xfs_ifree_cluster(). * in an ABBA deadlock with xfs_ifree_cluster().
* *
* As xfs_ifree_cluser() must gather all inodes that are active in the * As xfs_ifree_cluser() must gather all inodes that are active in the
* cache to mark them stale, if we hit this case we don't actually want * cache to mark them stale, if we hit this case we don't actually want
* to do IO here - we want the inode marked stale so we can simply * to do IO here - we want the inode marked stale so we can simply
* reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, * reclaim it. Hence if we get an EAGAIN error here, just unlock the
* just unlock the inode, back off and try again. Hopefully the next * inode, back off and try again. Hopefully the next pass through will
* pass through will see the stale flag set on the inode. * see the stale flag set on the inode.
*/ */
error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); error = xfs_iflush(ip, &bp);
if (error == EAGAIN) { if (error == EAGAIN) {
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
/* backoff longer than in xfs_ifree_cluster */ /* backoff longer than in xfs_ifree_cluster */
delay(2); delay(2);
goto restart; goto restart;
} }
xfs_iflock(ip);
if (!error) {
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
}
xfs_iflock(ip);
reclaim: reclaim:
xfs_ifunlock(ip); xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment