[XFS] Rework parts of the write path so that when a direct write

needs to fallback to buffered in the generic code, we are able to relock the XFS inode correctly. Signed-off-by: Nathan Scott <nathans@sgi.com>

[XFS] Rework parts of the write path so that when a direct write
needs to fallback to buffered in the generic code, we are able to relock the XFS inode correctly. Signed-off-by: Nathan Scott <nathans@sgi.com>
50964d31 · Christoph Hellwig · Nathan Scott · 31316be2 · 50964d31 · 50964d31
Commit 50964d31 authored Aug 28, 2004 by Christoph Hellwig Committed by Nathan Scott Aug 28, 2004
Hide whitespace changes
Inline Side-by-side

Showing with 130 additions and 107 deletions

fs/xfs/linux-2.6/xfs_file.c fs/xfs/linux-2.6/xfs_file.c +4 -18

fs/xfs/linux-2.6/xfs_lrw.c fs/xfs/linux-2.6/xfs_lrw.c +126 -89

No files found.
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -116,17 +116,10 @@ __linvfs_write(
 	ssize_t		rval;

 	BUG_ON(iocb->ki_pos != pos);
-	if (unlikely(file->f_flags & O_DIRECT)) {
+	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-		VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos,
-				ioflags, NULL, rval);
-	} else {
-		down(&inode->i_sem);
-		VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos,
-				ioflags, NULL, rval);
-		up(&inode->i_sem);
-	}

+	VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
 	return rval;
 }

@@ -214,17 +207,10 @@ __linvfs_writev(

 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = *ppos;
-	if (unlikely(file->f_flags & O_DIRECT)) {
+	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-		VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos,
-				ioflags, NULL, rval);
-	} else {
-		down(&inode->i_sem);
-		VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos,
-				ioflags, NULL, rval);
-		up(&inode->i_sem);
-	}

+	VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
 	if (rval == -EIOCBQUEUED)
 		rval = wait_on_sync_kiocb(&kiocb);


--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -225,40 +225,11 @@ xfs_inval_cached_pages(
 	int		write,
 	int		relock)
 {
-	xfs_mount_t	*mp;
-
-	if (!VN_CACHED(vp)) {
-		return;
-	}
-
-	mp = io->io_mount;
-
-	/*
-	 * We need to get the I/O lock exclusively in order
-	 * to safely invalidate pages and mappings.
-	 */
-	if (relock) {
-		XFS_IUNLOCK(mp, io, XFS_IOLOCK_SHARED);
-		XFS_ILOCK(mp, io, XFS_IOLOCK_EXCL);
+	if (VN_CACHED(vp)) {
+		xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
+		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
 	}

-	/* Writing beyond EOF creates a hole that must be zeroed */
-	if (write && (offset > XFS_SIZE(mp, io))) {
-		xfs_fsize_t	isize;
-
-		XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
-		isize = XFS_SIZE(mp, io);
-		if (offset > isize) {
-			xfs_zero_eof(vp, io, offset, isize, offset);
-		}
-		XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
-	}
-
-	xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
-	VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
-	if (relock) {
-		XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
-	}
 }

 ssize_t			/* bytes read, or (-)  error */
@@ -637,32 +608,34 @@ xfs_write(
 	bhv_desc_t		*bdp,
 	struct kiocb		*iocb,
 	const struct iovec	*iovp,
-	unsigned int		segs,
+	unsigned int		nsegs,
 	loff_t			*offset,
 	int			ioflags,
 	cred_t			*credp)
 {
 	struct file		*file = iocb->ki_filp;
-	size_t			size = 0;
+	struct address_space	*mapping = file->f_mapping;
+	struct inode		*inode = mapping->host;
+	unsigned long		segs = nsegs;
 	xfs_inode_t		*xip;
 	xfs_mount_t		*mp;
-	ssize_t			ret;
-	int			error = 0;
+	ssize_t			ret = 0, error = 0;
 	xfs_fsize_t		isize, new_size;
-	xfs_fsize_t		n, limit;
 	xfs_iocore_t		*io;
 	vnode_t			*vp;
 	unsigned long		seg;
 	int			iolock;
 	int			eventsent = 0;
 	vrwlock_t		locktype;
+	size_t			ocount = 0, count;
+	loff_t			pos;
+	int			need_isem = 1, need_flush = 0;

 	XFS_STATS_INC(xs_write_calls);

 	vp = BHV_TO_VNODE(bdp);
 	xip = XFS_BHVTOI(bdp);

-	/* START copy & waste from filemap.c */
 	for (seg = 0; seg < segs; seg++) {
 		const struct iovec *iv = &iovp[seg];

@@ -670,73 +643,90 @@ xfs_write(
 		 * If any segment has a negative length, or the cumulative
 		 * length ever wraps negative then return -EINVAL.
 		 */
-		size += iv->iov_len;
-		if (unlikely((ssize_t)(size|iv->iov_len) < 0))
-			return XFS_ERROR(-EINVAL);
+		ocount += iv->iov_len;
+		if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
+			return -EINVAL;
+		if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
+			continue;
+		if (seg == 0)
+			return -EFAULT;
+		segs = seg;
+		ocount -= iv->iov_len;  /* This segment is no good */
+		break;
 	}
-	/* END copy & waste from filemap.c */

-	if (size == 0)
+	count = ocount;
+	pos = *offset;
+
+	if (count == 0)
 		return 0;

 	io = &xip->i_iocore;
 	mp = io->io_mount;

-	if (XFS_FORCED_SHUTDOWN(mp)) {
+	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
-	}

 	if (ioflags & IO_ISDIRECT) {
 		xfs_buftarg_t	*target =
 			(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;

-		if ((*offset & target->pbr_smask) ||
-		    (size & target->pbr_smask)) {
+		if ((pos & target->pbr_smask) || (count & target->pbr_smask))
 			return XFS_ERROR(-EINVAL);
-		}
-		iolock = XFS_IOLOCK_SHARED;
-		locktype = VRWLOCK_WRITE_DIRECT;
-	} else {
+
+		if (!VN_CACHED(vp) && pos < i_size_read(inode))
+			need_isem = 0;
+
+		if (VN_CACHED(vp))
+			need_flush = 1;
+	}
+
+relock:
+	if (need_isem) {
 		iolock = XFS_IOLOCK_EXCL;
 		locktype = VRWLOCK_WRITE;
+
+		down(&inode->i_sem);
+	} else {
+		iolock = XFS_IOLOCK_SHARED;
+		locktype = VRWLOCK_WRITE_DIRECT;
 	}

 	xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);

-	isize = xip->i_d.di_size;
-	limit = XFS_MAXIOFFSET(mp);
+	isize = i_size_read(inode);

 	if (file->f_flags & O_APPEND)
 		*offset = isize;

 start:
-	n = limit - *offset;
-	if (n <= 0) {
+	error = -generic_write_checks(file, &pos, &count,
+					S_ISBLK(inode->i_mode));
+	if (error) {
 		xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
-		return -EFBIG;
+		goto out_unlock_isem;
 	}

-	if (n < size)
-		size = n;
-
-	new_size = *offset + size;
-	if (new_size > isize) {
+	new_size = pos + count;
+	if (new_size > isize)
 		io->io_new_size = new_size;
-	}

 	if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
 	    !(ioflags & IO_INVIS) && !eventsent)) {
-		loff_t		savedsize = *offset;
-		int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
+		loff_t		savedsize = pos;
+		int		dmflags = FILP_DELAY_FLAG(file);
+
+		if (need_isem)
+			dmflags |= DM_FLAGS_ISEM;

 		xfs_iunlock(xip, XFS_ILOCK_EXCL);
 		error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
-				      *offset, size,
+				      pos, count,
 				      dmflags, &locktype);
 		if (error) {
 			xfs_iunlock(xip, iolock);
-			return -error;
+			goto out_unlock_isem;
 		}
 		xfs_ilock(xip, XFS_ILOCK_EXCL);
 		eventsent = 1;
@@ -748,9 +738,8 @@ xfs_write(
 		 * event prevents another call to XFS_SEND_DATA, which is
 		 * what allows the size to change in the first place.
 		 */
-		if ((file->f_flags & O_APPEND) &&
-		    savedsize != xip->i_d.di_size) {
-			*offset = isize = xip->i_d.di_size;
+		if ((file->f_flags & O_APPEND) && savedsize != isize) {
+			pos = isize = xip->i_d.di_size;
 			goto start;
 		}
 	}
@@ -761,8 +750,10 @@ xfs_write(
 	 *
 	 * We must update xfs' times since revalidate will overcopy xfs.
 	 */
-	if (size && !(ioflags & IO_INVIS))
+	if (!(ioflags & IO_INVIS)) {
 		xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+		inode_update_time(inode, 1);
+	}

 	/*
 	 * If the offset is beyond the size of the file, we have a couple
@@ -773,12 +764,12 @@ xfs_write(
 	 * to zero it out up to the new size.
 	 */

-	if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) {
-		error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset,
-			isize, *offset + size);
+	if (pos > isize) {
+		error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
+					isize, pos + count);
 		if (error) {
 			xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
-			return(-error);
+			goto out_unlock_isem;
 		}
 	}
 	xfs_iunlock(xip, XFS_ILOCK_EXCL);
@@ -795,22 +786,64 @@ xfs_write(
 		(S_ISGID | S_IXGRP))) &&
 	     !capable(CAP_FSETID)) {
 		error = xfs_write_clear_setuid(xip);
-		if (error) {
+		if (likely(!error))
+			error = -remove_suid(file->f_dentry);
+		if (unlikely(error)) {
 			xfs_iunlock(xip, iolock);
-			return -error;
+			goto out_unlock_isem;
 		}
 	}

 retry:
-	if (ioflags & IO_ISDIRECT) {
-		xfs_inval_cached_pages(vp, io, *offset, 1, 1);
-		xfs_rw_enter_trace(XFS_DIOWR_ENTER,
-				io, (void *)iovp, segs, *offset, ioflags);
+	/* We can write back this queue in page reclaim */
+	current->backing_dev_info = mapping->backing_dev_info;
+
+	if ((ioflags & IO_ISDIRECT)) {
+		if (need_flush) {
+			xfs_inval_cached_trace(io, pos, -1,
+					ctooff(offtoct(pos)), -1);
+			VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)),
+					-1, FI_REMAPF_LOCKED);
+		}
+
+		if (need_isem) {
+			/* demote the lock now the cached pages are gone */
+			XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
+			up(&inode->i_sem);
+
+			iolock = XFS_IOLOCK_SHARED;
+			locktype = VRWLOCK_WRITE_DIRECT;
+			need_isem = 0;
+		}
+
+ 		xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
+				*offset, ioflags);
+		ret = generic_file_direct_write(iocb, iovp,
+				&segs, pos, offset, count, ocount);
+
+		/*
+		 * direct-io write to a hole: fall through to buffered I/O
+		 * for completing the rest of the request.
+		 */
+		if (ret >= 0 && ret != count) {
+			XFS_STATS_ADD(xs_write_bytes, ret);
+
+			pos += ret;
+			count -= ret;
+
+			need_isem = 1;
+			ioflags &= ~IO_ISDIRECT;
+			xfs_iunlock(xip, iolock);
+			goto relock;
+		}
 	} else {
-		xfs_rw_enter_trace(XFS_WRITE_ENTER,
-				io, (void *)iovp, segs, *offset, ioflags);
+		xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs,
+				*offset, ioflags);
+		ret = generic_file_buffered_write(iocb, iovp, segs,
+				pos, offset, count, ret);
 	}
-	ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset);
+
+	current->backing_dev_info = NULL;

 	if ((ret == -ENOSPC) &&
 	    DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
@@ -821,17 +854,15 @@ xfs_write(
 				DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
 				0, 0, 0); /* Delay flag intentionally  unused */
 		if (error)
-			return -error;
+			goto out_unlock_isem;
 		xfs_rwlock(bdp, locktype);
-		*offset = xip->i_d.di_size;
+		pos = xip->i_d.di_size;
 		goto retry;
 	}

 	if (*offset > xip->i_d.di_size) {
 		xfs_ilock(xip, XFS_ILOCK_EXCL);
 		if (*offset > xip->i_d.di_size) {
-			struct inode	*inode = LINVFS_GET_IP(vp);
-
 			xip->i_d.di_size = *offset;
 			i_size_write(inode, *offset);
 			xip->i_update_core = 1;
@@ -842,7 +873,8 @@ xfs_write(

 	if (ret <= 0) {
 		xfs_rwunlock(bdp, locktype);
-		return ret;
+		error = -ret;
+		goto out_unlock_isem;
 	}

 	XFS_STATS_ADD(xs_write_bytes, ret);
@@ -929,7 +961,12 @@ xfs_write(
 	} /* (ioflags & O_SYNC) */

 	xfs_rwunlock(bdp, locktype);
-	return(ret);
+	error = -ret;
+
+ out_unlock_isem:
+	if (need_isem)
+		up(&inode->i_sem);
+	return -error;
 }

 /*