[XFS] add infrastructure for waiting on I/O completion at inode reclaim

time SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196854a Signed-off-by: Christoph Hellwig <hch@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>

[XFS] add infrastructure for waiting on I/O completion at inode reclaim
time SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196854a Signed-off-by: Christoph Hellwig <hch@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
51c91ed5 · Christoph Hellwig · Nathan Scott · 592cb26b · 51c91ed5 · 51c91ed5
Commit 51c91ed5 authored Sep 02, 2005 by Christoph Hellwig Committed by Nathan Scott Sep 02, 2005
4 changed files
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -139,7 +139,7 @@ linvfs_unwritten_convert(
 	XFS_BUF_SET_FSPRIVATE(bp, NULL);
 	XFS_BUF_CLR_IODONE_FUNC(bp);
 	XFS_BUF_UNDATAIO(bp);
-	iput(LINVFS_GET_IP(vp));
+	vn_iowake(vp);
 	pagebuf_iodone(bp, 0, 0);
 }
@@ -448,14 +448,7 @@ xfs_map_unwritten(
 	if (!pb)
 		return -EAGAIN;
-	/* Take a reference to the inode to prevent it from
+	atomic_inc(&LINVFS_GET_VP(inode)->v_iocount);
-	 * being reclaimed while we have outstanding unwritten
-	 * extent IO on it.
-	 */
-	if ((igrab(inode)) != inode) {
-		pagebuf_free(pb);
-		return -EAGAIN;
-	}
 	/* Set the count to 1 initially, this will stop an I/O
 	 * completion callout which happens before we have started

--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -42,17 +42,33 @@ DEFINE_SPINLOCK(vnumber_lock);
 */
 #define NVSYNC                  37
 #define vptosync(v)             (&vsync[((unsigned long)v) % NVSYNC])
-sv_t vsync[NVSYNC];
+STATIC wait_queue_head_t vsync[NVSYNC];
 void
 vn_init(void)
 {
-	register sv_t *svp;
+	int i;
-	register int i;
-	for (svp = vsync, i = 0; i < NVSYNC; i++, svp++)
+	for (i = 0; i < NVSYNC; i++)
-		init_sv(svp, SV_DEFAULT, "vsy", i);
+		init_waitqueue_head(&vsync[i]);
+}
+void
+vn_iowait(
+	struct vnode	*vp)
+{
+	wait_queue_head_t *wq = vptosync(vp);
+	wait_event(*wq, (atomic_read(&vp->v_iocount) == 0));
+}
+void
+vn_iowake(
+	struct vnode	*vp)
+{
+	if (atomic_dec_and_test(&vp->v_iocount))
+		wake_up(vptosync(vp));
 }
 /*
@@ -111,6 +127,8 @@ vn_initialize(
 	/* Initialize the first behavior and the behavior chain head. */
 	vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
+	atomic_set(&vp->v_iocount, 0);
 #ifdef	XFS_VNODE_TRACE
 	vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
 #endif	/* XFS_VNODE_TRACE */

--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -80,6 +80,7 @@ typedef struct vnode {
 	vnumber_t	v_number;		/* in-core vnode number */
 	vn_bhv_head_t	v_bh;			/* behavior head */
 	spinlock_t	v_lock;			/* VN_LOCK/VN_UNLOCK */
+	atomic_t	v_iocount;		/* outstanding I/O count */
 #ifdef XFS_VNODE_TRACE
 	struct ktrace	*v_trace;		/* trace header structure    */
 #endif
@@ -506,6 +507,9 @@ extern int	vn_revalidate(struct vnode *);
 extern void	vn_revalidate_core(struct vnode *, vattr_t *);
 extern void	vn_remove(struct vnode *);
+extern void	vn_iowait(struct vnode *vp);
+extern void	vn_iowake(struct vnode *vp);
 static inline int vn_count(struct vnode *vp)
 {
 	return atomic_read(&LINVFS_GET_IP(vp)->i_count);

--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3846,51 +3846,10 @@ xfs_reclaim(
 		return 0;
 	}
-	if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+	vn_iowait(vp);
-		if (ip->i_d.di_size > 0) {
-			/*
-			 * Flush and invalidate any data left around that is
-			 * a part of this file.
-			 *
-			 * Get the inode's i/o lock so that buffers are pushed
-			 * out while holding the proper lock.  We can't hold
-			 * the inode lock here since flushing out buffers may
-			 * cause us to try to get the lock in xfs_strategy().
-			 *
-			 * We don't have to call remapf() here, because there
-			 * cannot be any mapped file references to this vnode
-			 * since it is being reclaimed.
-			 */
-			xfs_ilock(ip, XFS_IOLOCK_EXCL);
-			/*
+	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-			 * If we hit an IO error, we need to make sure that the
-			 * buffer and page caches of file data for
-			 * the file are tossed away. We don't want to use
-			 * VOP_FLUSHINVAL_PAGES here because we don't want dirty
-			 * pages to stay attached to the vnode, but be
-			 * marked P_BAD. pdflush/vnode_pagebad
-			 * hates that.
-			 */
-			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-				VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE);
-			} else {
-				VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
-			}
-			ASSERT(VN_CACHED(vp) == 0);
-			ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) ||
-			       ip->i_delayed_blks == 0);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-		} else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-			/*
-			 * di_size field may not be quite accurate if we're
-			 * shutting down.
-			 */
-			VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
 	ASSERT(VN_CACHED(vp) == 0);
-		}
-	}
 	/* If we have nothing to flush with this inode then complete the
 	 * teardown now, otherwise break the link between the xfs inode