Commit 7fbabbb4 authored by Christian Brauner's avatar Christian Brauner

Merge patch series "Subject: sort out the fallocate mode mess"

Christoph Hellwig <hch@lst.de> says:

I've recently been looking at the XFS fallocate implementation and got
upset about the messing parsing of the mode argument, which mixes modes
and an optional flag in a really confusing way.

This series tries to clean this up by better defining what is the
operation mode and what is an optional flag, so that both the core
code and file systems can use switch statements to switch on the mode.

* patches from https://lore.kernel.org/r/20240827065123.1762168-1-hch@lst.de:
  xfs: refactor xfs_file_fallocate
  xfs: move the xfs_is_always_cow_inode check into xfs_alloc_file_space
  xfs: call xfs_flush_unmap_range from xfs_free_file_space
  fs: sort out the fallocate mode vs flag mess
  ext4: remove tracing for FALLOC_FL_NO_HIDE_STALE
  block: remove checks for FALLOC_FL_NO_HIDE_STALE

Link: https://lore.kernel.org/r/20240827065123.1762168-1-hch@lst.deSigned-off-by: default avatarChristian Brauner <brauner@kernel.org>
parents 47ac09b9 4acaddf5
......@@ -771,7 +771,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
#define BLKDEV_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
FALLOC_FL_ZERO_RANGE)
static long blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len)
......@@ -830,14 +830,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
len >> SECTOR_SHIFT, GFP_KERNEL,
BLKDEV_ZERO_NOFALLBACK);
break;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
if (error)
goto fail;
error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
len >> SECTOR_SHIFT, GFP_KERNEL);
break;
default:
error = -EOPNOTSUPP;
}
......
......@@ -252,40 +252,39 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (offset < 0 || len <= 0)
return -EINVAL;
/* Return error if mode is not supported */
if (mode & ~FALLOC_FL_SUPPORTED_MASK)
if (mode & ~(FALLOC_FL_MODE_MASK | FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
/* Punch hole and zero range are mutually exclusive */
if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
/* Punch hole must have keep size set */
if ((mode & FALLOC_FL_PUNCH_HOLE) &&
!(mode & FALLOC_FL_KEEP_SIZE))
/*
* Modes are exclusive, even if that is not obvious from the encoding
* as bit masks and the mix with the flag in the same namespace.
*
* To make things even more complicated, FALLOC_FL_ALLOCATE_RANGE is
* encoded as no bit set.
*/
switch (mode & FALLOC_FL_MODE_MASK) {
case FALLOC_FL_ALLOCATE_RANGE:
case FALLOC_FL_UNSHARE_RANGE:
case FALLOC_FL_ZERO_RANGE:
break;
case FALLOC_FL_PUNCH_HOLE:
if (!(mode & FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
break;
case FALLOC_FL_COLLAPSE_RANGE:
case FALLOC_FL_INSERT_RANGE:
if (mode & FALLOC_FL_KEEP_SIZE)
return -EOPNOTSUPP;
break;
default:
return -EOPNOTSUPP;
/* Collapse range should only be used exclusively. */
if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
(mode & ~FALLOC_FL_COLLAPSE_RANGE))
return -EINVAL;
/* Insert range should only be used exclusively. */
if ((mode & FALLOC_FL_INSERT_RANGE) &&
(mode & ~FALLOC_FL_INSERT_RANGE))
return -EINVAL;
/* Unshare range should only be used with allocate mode. */
if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
(mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
return -EINVAL;
}
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
/*
* We can only allow pure fallocate on append only files
* On append-only files only space preallocation is supported.
*/
if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
return -EPERM;
......
......@@ -653,6 +653,9 @@ xfs_alloc_file_space(
xfs_bmbt_irec_t imaps[1], *imapp;
int error;
if (xfs_is_always_cow_inode(ip))
return 0;
trace_xfs_alloc_file_space(ip);
if (xfs_is_shutdown(mp))
......@@ -848,6 +851,14 @@ xfs_free_file_space(
if (len <= 0) /* if nothing being freed */
return 0;
/*
* Now AIO and DIO has drained we flush and (if necessary) invalidate
* the cached range over the first operation we are about to run.
*/
error = xfs_flush_unmap_range(ip, offset, len);
if (error)
return error;
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
......
......@@ -852,6 +852,192 @@ static inline bool xfs_file_sync_writes(struct file *filp)
return false;
}
static int
xfs_falloc_newsize(
struct file *file,
int mode,
loff_t offset,
loff_t len,
loff_t *new_size)
{
struct inode *inode = file_inode(file);
if ((mode & FALLOC_FL_KEEP_SIZE) || offset + len <= i_size_read(inode))
return 0;
*new_size = offset + len;
return inode_newsize_ok(inode, *new_size);
}
static int
xfs_falloc_setsize(
struct file *file,
loff_t new_size)
{
struct iattr iattr = {
.ia_valid = ATTR_SIZE,
.ia_size = new_size,
};
if (!new_size)
return 0;
return xfs_vn_setattr_size(file_mnt_idmap(file), file_dentry(file),
&iattr);
}
static int
xfs_falloc_collapse_range(
struct file *file,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t new_size = i_size_read(inode) - len;
int error;
if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
return -EINVAL;
/*
* There is no need to overlap collapse range with EOF, in which case it
* is effectively a truncate operation
*/
if (offset + len >= i_size_read(inode))
return -EINVAL;
error = xfs_collapse_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
static int
xfs_falloc_insert_range(
struct file *file,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t isize = i_size_read(inode);
int error;
if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
return -EINVAL;
/*
* New inode size must not exceed ->s_maxbytes, accounting for
* possible signed overflow.
*/
if (inode->i_sb->s_maxbytes - isize < len)
return -EFBIG;
/* Offset should be less than i_size */
if (offset >= isize)
return -EINVAL;
error = xfs_falloc_setsize(file, isize + len);
if (error)
return error;
/*
* Perform hole insertion now that the file size has been updated so
* that if we crash during the operation we don't leave shifted extents
* past EOF and hence losing access to the data that is contained within
* them.
*/
return xfs_insert_file_space(XFS_I(inode), offset, len);
}
/*
* Punch a hole and prealloc the range. We use a hole punch rather than
* unwritten extent conversion for two reasons:
*
* 1.) Hole punch handles partial block zeroing for us.
* 2.) If prealloc returns ENOSPC, the file range is still zero-valued by
* virtue of the hole punch.
*/
static int
xfs_falloc_zero_range(
struct file *file,
int mode,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
unsigned int blksize = i_blocksize(inode);
loff_t new_size = 0;
int error;
trace_xfs_zero_file_space(XFS_I(inode));
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
if (error)
return error;
error = xfs_free_file_space(XFS_I(inode), offset, len);
if (error)
return error;
len = round_up(offset + len, blksize) - round_down(offset, blksize);
offset = round_down(offset, blksize);
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
static int
xfs_falloc_unshare_range(
struct file *file,
int mode,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t new_size = 0;
int error;
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
if (error)
return error;
error = xfs_reflink_unshare(XFS_I(inode), offset, len);
if (error)
return error;
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
static int
xfs_falloc_allocate_range(
struct file *file,
int mode,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t new_size = 0;
int error;
/*
* If always_cow mode we can't use preallocations and thus should not
* create them.
*/
if (xfs_is_always_cow_inode(XFS_I(inode)))
return -EOPNOTSUPP;
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
if (error)
return error;
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
#define XFS_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
......@@ -868,8 +1054,6 @@ xfs_file_fallocate(
struct xfs_inode *ip = XFS_I(inode);
long error;
uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
loff_t new_size = 0;
bool do_file_insert = false;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
......@@ -890,156 +1074,35 @@ xfs_file_fallocate(
*/
inode_dio_wait(inode);
/*
* Now AIO and DIO has drained we flush and (if necessary) invalidate
* the cached range over the first operation we are about to run.
*
* We care about zero and collapse here because they both run a hole
* punch over the range first. Because that can zero data, and the range
* of invalidation for the shift operations is much larger, we still do
* the required flush for collapse in xfs_prepare_shift().
*
* Insert has the same range requirements as collapse, and we extend the
* file first which can zero data. Hence insert has the same
* flush/invalidate requirements as collapse and so they are both
* handled at the right time by xfs_prepare_shift().
*/
if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_COLLAPSE_RANGE)) {
error = xfs_flush_unmap_range(ip, offset, len);
if (error)
goto out_unlock;
}
error = file_modified(file);
if (error)
goto out_unlock;
if (mode & FALLOC_FL_PUNCH_HOLE) {
switch (mode & FALLOC_FL_MODE_MASK) {
case FALLOC_FL_PUNCH_HOLE:
error = xfs_free_file_space(ip, offset, len);
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
/*
* There is no need to overlap collapse range with EOF,
* in which case it is effectively a truncate operation
*/
if (offset + len >= i_size_read(inode)) {
error = -EINVAL;
goto out_unlock;
}
new_size = i_size_read(inode) - len;
error = xfs_collapse_file_space(ip, offset, len);
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
loff_t isize = i_size_read(inode);
if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
/*
* New inode size must not exceed ->s_maxbytes, accounting for
* possible signed overflow.
*/
if (inode->i_sb->s_maxbytes - isize < len) {
error = -EFBIG;
goto out_unlock;
}
new_size = isize + len;
/* Offset should be less than i_size */
if (offset >= isize) {
error = -EINVAL;
goto out_unlock;
}
do_file_insert = true;
} else {
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
new_size = offset + len;
error = inode_newsize_ok(inode, new_size);
if (error)
goto out_unlock;
}
if (mode & FALLOC_FL_ZERO_RANGE) {
/*
* Punch a hole and prealloc the range. We use a hole
* punch rather than unwritten extent conversion for two
* reasons:
*
* 1.) Hole punch handles partial block zeroing for us.
* 2.) If prealloc returns ENOSPC, the file range is
* still zero-valued by virtue of the hole punch.
*/
unsigned int blksize = i_blocksize(inode);
trace_xfs_zero_file_space(ip);
error = xfs_free_file_space(ip, offset, len);
if (error)
goto out_unlock;
len = round_up(offset + len, blksize) -
round_down(offset, blksize);
offset = round_down(offset, blksize);
} else if (mode & FALLOC_FL_UNSHARE_RANGE) {
error = xfs_reflink_unshare(ip, offset, len);
if (error)
goto out_unlock;
} else {
/*
* If always_cow mode we can't use preallocations and
* thus should not create them.
*/
if (xfs_is_always_cow_inode(ip)) {
error = -EOPNOTSUPP;
goto out_unlock;
}
}
if (!xfs_is_always_cow_inode(ip)) {
error = xfs_alloc_file_space(ip, offset, len);
if (error)
goto out_unlock;
}
}
/* Change file size if needed */
if (new_size) {
struct iattr iattr;
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
error = xfs_vn_setattr_size(file_mnt_idmap(file),
file_dentry(file), &iattr);
if (error)
goto out_unlock;
}
/*
* Perform hole insertion now that the file size has been
* updated so that if we crash during the operation we don't
* leave shifted extents past EOF and hence losing access to
* the data that is contained within them.
*/
if (do_file_insert) {
error = xfs_insert_file_space(ip, offset, len);
if (error)
goto out_unlock;
break;
case FALLOC_FL_COLLAPSE_RANGE:
error = xfs_falloc_collapse_range(file, offset, len);
break;
case FALLOC_FL_INSERT_RANGE:
error = xfs_falloc_insert_range(file, offset, len);
break;
case FALLOC_FL_ZERO_RANGE:
error = xfs_falloc_zero_range(file, mode, offset, len);
break;
case FALLOC_FL_UNSHARE_RANGE:
error = xfs_falloc_unshare_range(file, mode, offset, len);
break;
case FALLOC_FL_ALLOCATE_RANGE:
error = xfs_falloc_allocate_range(file, mode, offset, len);
break;
default:
error = -EOPNOTSUPP;
break;
}
if (xfs_file_sync_writes(file))
if (!error && xfs_file_sync_writes(file))
error = xfs_log_force_inode(ip);
out_unlock:
......
......@@ -25,12 +25,18 @@ struct space_resv {
#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv)
#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv)
#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \
FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE | \
FALLOC_FL_UNSHARE_RANGE)
/*
* Mask of all supported fallocate modes. Only one can be set at a time.
*
* In addition to the mode bit, the mode argument can also encode flags.
* FALLOC_FL_KEEP_SIZE is the only supported flag so far.
*/
#define FALLOC_FL_MODE_MASK (FALLOC_FL_ALLOCATE_RANGE | \
FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE | \
FALLOC_FL_UNSHARE_RANGE)
/* on ia32 l_start is on a 32-bit boundary */
#if defined(CONFIG_X86_64)
......
......@@ -91,7 +91,6 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
#define show_falloc_mode(mode) __print_flags(mode, "|", \
{ FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
{ FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \
{ FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
......
......@@ -2,6 +2,7 @@
#ifndef _UAPI_FALLOC_H_
#define _UAPI_FALLOC_H_
#define FALLOC_FL_ALLOCATE_RANGE 0x00 /* allocate range */
#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment