Merge branch 'fallocate-insert-range' into for-next

a448f8f1 · Dave Chinner · 2b93681f · a904b1ca · a448f8f1 · a448f8f1
Commit a448f8f1 authored Mar 25, 2015 by Dave Chinner
9 changed files
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,8 +231,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 		return -EINVAL;
 	/* Return error if mode is not supported */
-	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+	if (mode & ~FALLOC_FL_SUPPORTED_MASK)
-		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
 		return -EOPNOTSUPP;
 	/* Punch hole and zero range are mutually exclusive */
@@ -250,6 +249,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	    (mode & ~FALLOC_FL_COLLAPSE_RANGE))
 		return -EINVAL;
+	/* Insert range should only be used exclusively. */
+	if ((mode & FALLOC_FL_INSERT_RANGE) &&
+	    (mode & ~FALLOC_FL_INSERT_RANGE))
+		return -EINVAL;
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;

--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -166,6 +166,11 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
 */
 #define XFS_BMAP_MAX_SHIFT_EXTENTS	1
+enum shift_direction {
+	SHIFT_LEFT = 0,
+	SHIFT_RIGHT,
+};
 #ifdef DEBUG
 void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
 		int whichfork, unsigned long caller_ip);
@@ -211,8 +216,10 @@ int	xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
 		xfs_extnum_t num);
 uint	xfs_default_attroffset(struct xfs_inode *ip);
 int	xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
-		xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb,
+		xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
-		int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock,
+		int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
-		struct xfs_bmap_free *flist, int num_exts);
+		struct xfs_bmap_free *flist, enum shift_direction direction,
+		int num_exts);
+int	xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
 #endif	/* __XFS_BMAP_H__ */
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1376,22 +1376,19 @@ xfs_zero_file_space(
 }
 /*
- * xfs_collapse_file_space()
+ * @next_fsb will keep track of the extent currently undergoing shift.
- *	This routine frees disk space and shift extent for the given file.
+ * @stop_fsb will keep track of the extent at which we have to stop.
- *	The first thing we do is to free data blocks in the specified range
+ * If we are shifting left, we will start with block (offset + len) and
- *	by calling xfs_free_file_space(). It would also sync dirty data
+ * shift each extent till last extent.
- *	and invalidate page cache over the region on which collapse range
+ * If we are shifting right, we will start with last extent inside file space
- *	is working. And Shift extent records to the left to cover a hole.
+ * and continue until we reach the block corresponding to offset.
- * RETURNS:
- *	0 on success
- *	errno on error
- *
 */
 int
-xfs_collapse_file_space(
+xfs_shift_file_space(
 	struct xfs_inode        *ip,
 	xfs_off_t               offset,
-	xfs_off_t		len)
+	xfs_off_t               len,
+	enum shift_direction	direction)
 {
 	int			done = 0;
 	struct xfs_mount	*mp = ip->i_mount;
@@ -1400,20 +1397,25 @@ xfs_collapse_file_space(
 	struct xfs_bmap_free	free_list;
 	xfs_fsblock_t		first_block;
 	int			committed;
-	xfs_fileoff_t		start_fsb;
+	xfs_fileoff_t		stop_fsb;
 	xfs_fileoff_t		next_fsb;
 	xfs_fileoff_t		shift_fsb;
-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+	ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
-	trace_xfs_collapse_file_space(ip);
+	if (direction == SHIFT_LEFT) {
 		next_fsb = XFS_B_TO_FSB(mp, offset + len);
-	shift_fsb = XFS_B_TO_FSB(mp, len);
+		stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
+	} else {
+		/*
+		 * If right shift, delegate the work of initialization of
+		 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
+		 */
+		next_fsb = NULLFSBLOCK;
+		stop_fsb = XFS_B_TO_FSB(mp, offset);
+	}
-	error = xfs_free_file_space(ip, offset, len);
+	shift_fsb = XFS_B_TO_FSB(mp, len);
-	if (error)
-		return error;
 	/*
 	 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
@@ -1427,20 +1429,28 @@ xfs_collapse_file_space(
 	/*
 	 * Writeback and invalidate cache for the remainder of the file as we're
-	 * about to shift down every extent from the collapse range to EOF. The
+	 * about to shift down every extent from offset to EOF.
-	 * free of the collapse range above might have already done some of
-	 * this, but we shouldn't rely on it to do anything outside of the range
-	 * that was freed.
 	 */
 	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-					     offset + len, -1);
+					     offset, -1);
 	if (error)
 		return error;
 	error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-					(offset + len) >> PAGE_CACHE_SHIFT, -1);
+					offset >> PAGE_CACHE_SHIFT, -1);
 	if (error)
 		return error;
+	/*
+	 * The extent shiting code works on extent granularity. So, if
+	 * stop_fsb is not the starting block of extent, we need to split
+	 * the extent at stop_fsb.
+	 */
+	if (direction == SHIFT_RIGHT) {
+		error = xfs_bmap_split_extent(ip, stop_fsb);
+		if (error)
+			return error;
+	}
 	while (!error && !done) {
 		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
 		/*
@@ -1464,7 +1474,7 @@ xfs_collapse_file_space(
 		if (error)
 			goto out;
-		xfs_trans_ijoin(tp, ip, 0);
+		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 		xfs_bmap_init(&free_list, &first_block);
@@ -1472,10 +1482,9 @@ xfs_collapse_file_space(
 		 * We are using the write transaction in which max 2 bmbt
 		 * updates are allowed
 		 */
-		start_fsb = next_fsb;
+		error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
-		error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb,
+				&done, stop_fsb, &first_block, &free_list,
-				&done, &next_fsb, &first_block, &free_list,
+				direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
-				XFS_BMAP_MAX_SHIFT_EXTENTS);
 		if (error)
 			goto out;
@@ -1484,17 +1493,69 @@ xfs_collapse_file_space(
 			goto out;
 		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	}
 	return error;
 out:
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
+/*
+ * xfs_collapse_file_space()
+ *	This routine frees disk space and shift extent for the given file.
+ *	The first thing we do is to free data blocks in the specified range
+ *	by calling xfs_free_file_space(). It would also sync dirty data
+ *	and invalidate page cache over the region on which collapse range
+ *	is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ *	0 on success
+ *	errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+	struct xfs_inode	*ip,
+	xfs_off_t		offset,
+	xfs_off_t		len)
+{
+	int error;
+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+	trace_xfs_collapse_file_space(ip);
+	error = xfs_free_file_space(ip, offset, len);
+	if (error)
+		return error;
+	return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
+}
+/*
+ * xfs_insert_file_space()
+ *	This routine create hole space by shifting extents for the given file.
+ *	The first thing we do is to sync dirty data and invalidate page cache
+ *	over the region on which insert range is working. And split an extent
+ *	to two extents at given offset by calling xfs_bmap_split_extent.
+ *	And shift all extent records which are laying between [offset,
+ *	last allocated extent] to the right to reserve hole range.
+ * RETURNS:
+ *	0 on success
+ *	errno on error
+ */
+int
+xfs_insert_file_space(
+	struct xfs_inode	*ip,
+	loff_t			offset,
+	loff_t			len)
+{
+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+	trace_xfs_insert_file_space(ip);
+	return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
+}
 /*
 * We need to check that the format of the data fork in the temporary inode is
 * valid for the target inode before doing the swap. This is not a problem with

--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,6 +63,8 @@ int	xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
 			    xfs_off_t len);
 int	xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
 				xfs_off_t len);
+int	xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
+				xfs_off_t len);
 /* EOF block manipulation functions */
 bool	xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);

--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -822,6 +822,11 @@ xfs_file_write_iter(
 	return ret;
 }
+#define	XFS_FALLOC_FL_SUPPORTED						\
+		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
+		 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |	\
+		 FALLOC_FL_INSERT_RANGE)
 STATIC long
 xfs_file_fallocate(
 	struct file		*file,
@@ -835,11 +840,11 @@ xfs_file_fallocate(
 	enum xfs_prealloc_flags	flags = 0;
 	uint			iolock = XFS_IOLOCK_EXCL;
 	loff_t			new_size = 0;
+	bool			do_file_insert = 0;
 	if (!S_ISREG(inode->i_mode))
 		return -EINVAL;
-	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+	if (mode & ~XFS_FALLOC_FL_SUPPORTED)
-		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
 		return -EOPNOTSUPP;
 	xfs_ilock(ip, iolock);
@@ -876,6 +881,27 @@ xfs_file_fallocate(
 		error = xfs_collapse_file_space(ip, offset, len);
 		if (error)
 			goto out_unlock;
+	} else if (mode & FALLOC_FL_INSERT_RANGE) {
+		unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+		new_size = i_size_read(inode) + len;
+		if (offset & blksize_mask || len & blksize_mask) {
+			error = -EINVAL;
+			goto out_unlock;
+		}
+		/* check the new inode size does not wrap through zero */
+		if (new_size > inode->i_sb->s_maxbytes) {
+			error = -EFBIG;
+			goto out_unlock;
+		}
+		/* Offset should be less than i_size */
+		if (offset >= i_size_read(inode)) {
+			error = -EINVAL;
+			goto out_unlock;
+		}
+		do_file_insert = 1;
 	} else {
 		flags |= XFS_PREALLOC_SET;
@@ -910,8 +936,19 @@ xfs_file_fallocate(
 		iattr.ia_valid = ATTR_SIZE;
 		iattr.ia_size = new_size;
 		error = xfs_setattr_size(ip, &iattr);
+		if (error)
+			goto out_unlock;
 	}
+	/*
+	 * Perform hole insertion now that the file size has been
+	 * updated so that if we crash during the operation we don't
+	 * leave shifted extents past EOF and hence losing access to
+	 * the data that is contained within them.
+	 */
+	if (do_file_insert)
+		error = xfs_insert_file_space(ip, offset, len);
 out_unlock:
 	xfs_iunlock(ip, iolock);
 	return error;

--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -664,6 +664,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
 DEFINE_INODE_EVENT(xfs_free_file_space);
 DEFINE_INODE_EVENT(xfs_zero_file_space);
 DEFINE_INODE_EVENT(xfs_collapse_file_space);
+DEFINE_INODE_EVENT(xfs_insert_file_space);
 DEFINE_INODE_EVENT(xfs_readdir);
 #ifdef CONFIG_XFS_POSIX_ACL
 DEFINE_INODE_EVENT(xfs_get_acl);

--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -21,4 +21,10 @@ struct space_resv {
 #define FS_IOC_RESVSP		_IOW('X', 40, struct space_resv)
 #define FS_IOC_RESVSP64		_IOW('X', 42, struct space_resv)
+#define	FALLOC_FL_SUPPORTED_MASK	(FALLOC_FL_KEEP_SIZE |		\
+					 FALLOC_FL_PUNCH_HOLE |		\
+					 FALLOC_FL_COLLAPSE_RANGE |	\
+					 FALLOC_FL_ZERO_RANGE |		\
+					 FALLOC_FL_INSERT_RANGE)
 #endif /* _FALLOC_H_ */
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -41,4 +41,21 @@
 */
 #define FALLOC_FL_ZERO_RANGE		0x10
+/*
+ * FALLOC_FL_INSERT_RANGE is use to insert space within the file size without
+ * overwriting any existing data. The contents of the file beyond offset are
+ * shifted towards right by len bytes to create a hole.  As such, this
+ * operation will increase the size of the file by len bytes.
+ *
+ * Different filesystems may implement different limitations on the granularity
+ * of the operation. Most will limit operations to filesystem block size
+ * boundaries, but this boundary may be larger or smaller depending on
+ * the filesystem and/or the configuration of the filesystem or file.
+ *
+ * Attempting to insert space using this flag at OR beyond the end of
+ * the file is considered an illegal operation - just use ftruncate(2) or
+ * fallocate(2) with mode 0 for such type of operations.
+ */
+#define FALLOC_FL_INSERT_RANGE		0x20
 #endif /* _UAPI_FALLOC_H_ */