Commit acdda3aa authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Dave Chinner

xfs: use iomap_dio_rw

Straight switch over to using iomap for direct I/O - we already have the
non-COW dio path in write_begin for DAX and files with extent size hints,
so nothing to add there.  The COW path is ported over from the old
get_blocks version and a bit of a mess, but I have some work in progress
to make it look more like the buffered I/O COW path.

This gets rid of xfs_get_blocks_direct and the last caller of
xfs_get_blocks with the create flag set, so all that code can be removed.

Last but not least I've removed a comment in xfs_filemap_fault that
refers to xfs_get_blocks entirely instead of updating it - while the
reference is correct, the whole DAX fault path looks different than
the non-DAX one, so it seems rather pointless.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Tested-by: default avatarJens Axboe <axboe@fb.com>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parent ff6a9292
This diff is collapsed.
...@@ -55,12 +55,6 @@ struct xfs_ioend { ...@@ -55,12 +55,6 @@ struct xfs_ioend {
extern const struct address_space_operations xfs_address_space_operations; extern const struct address_space_operations xfs_address_space_operations;
int xfs_get_blocks(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private);
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
extern void xfs_count_page_state(struct page *, int *, int *); extern void xfs_count_page_state(struct page *, int *, int *);
......
...@@ -210,62 +210,21 @@ xfs_file_dio_aio_read( ...@@ -210,62 +210,21 @@ xfs_file_dio_aio_read(
struct kiocb *iocb, struct kiocb *iocb,
struct iov_iter *to) struct iov_iter *to)
{ {
struct address_space *mapping = iocb->ki_filp->f_mapping; struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
loff_t isize = i_size_read(inode);
size_t count = iov_iter_count(to); size_t count = iov_iter_count(to);
loff_t end = iocb->ki_pos + count - 1; ssize_t ret;
struct iov_iter data;
struct xfs_buftarg *target;
ssize_t ret = 0;
trace_xfs_file_direct_read(ip, count, iocb->ki_pos); trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
if (!count) if (!count)
return 0; /* skip atime */ return 0; /* skip atime */
if (XFS_IS_REALTIME_INODE(ip))
target = ip->i_mount->m_rtdev_targp;
else
target = ip->i_mount->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
if (iocb->ki_pos == isize)
return 0;
return -EINVAL;
}
file_accessed(iocb->ki_filp); file_accessed(iocb->ki_filp);
xfs_ilock(ip, XFS_IOLOCK_SHARED); xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (mapping->nrpages) { ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
if (ret)
goto out_unlock;
/*
* Invalidate whole pages. This can return an error if we fail
* to invalidate a page, but this should never happen on XFS.
* Warn if it does fail.
*/
ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
data = *to;
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
xfs_get_blocks_direct, NULL, NULL, 0);
if (ret >= 0) {
iocb->ki_pos += ret;
iov_iter_advance(to, ret);
}
out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret; return ret;
} }
...@@ -465,6 +424,58 @@ xfs_file_aio_write_checks( ...@@ -465,6 +424,58 @@ xfs_file_aio_write_checks(
return 0; return 0;
} }
static int
xfs_dio_write_end_io(
struct kiocb *iocb,
ssize_t size,
unsigned flags)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos;
bool update_size = false;
int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
if (size <= 0)
return size;
/*
* We need to update the in-core inode size here so that we don't end up
* with the on-disk inode size being outside the in-core inode size. We
* have no other method of updating EOF for AIO, so always do it here
* if necessary.
*
* We need to lock the test/set EOF update as we can be racing with
* other IO completions here to update the EOF. Failing to serialise
* here can result in EOF moving backwards and Bad Things Happen when
* that occurs.
*/
spin_lock(&ip->i_flags_lock);
if (offset + size > i_size_read(inode)) {
i_size_write(inode, offset + size);
update_size = true;
}
spin_unlock(&ip->i_flags_lock);
if (flags & IOMAP_DIO_COW) {
error = xfs_reflink_end_cow(ip, offset, size);
if (error)
return error;
}
if (flags & IOMAP_DIO_UNWRITTEN)
error = xfs_iomap_write_unwritten(ip, offset, size);
else if (update_size)
error = xfs_setfilesize(ip, offset, size);
return error;
}
/* /*
* xfs_file_dio_aio_write - handle direct IO writes * xfs_file_dio_aio_write - handle direct IO writes
* *
...@@ -504,9 +515,7 @@ xfs_file_dio_aio_write( ...@@ -504,9 +515,7 @@ xfs_file_dio_aio_write(
int unaligned_io = 0; int unaligned_io = 0;
int iolock; int iolock;
size_t count = iov_iter_count(from); size_t count = iov_iter_count(from);
loff_t end; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
struct iov_iter data;
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp; mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */ /* DIO must be aligned to device logical sector size */
...@@ -534,23 +543,6 @@ xfs_file_dio_aio_write( ...@@ -534,23 +543,6 @@ xfs_file_dio_aio_write(
if (ret) if (ret)
goto out; goto out;
count = iov_iter_count(from); count = iov_iter_count(from);
end = iocb->ki_pos + count - 1;
if (mapping->nrpages) {
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
if (ret)
goto out;
/*
* Invalidate whole pages. This can return an error if we fail
* to invalidate a page, but this should never happen on XFS.
* Warn if it does fail.
*/
ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
/* /*
* If we are doing unaligned IO, wait for all other IO to drain, * If we are doing unaligned IO, wait for all other IO to drain,
...@@ -573,22 +565,7 @@ xfs_file_dio_aio_write( ...@@ -573,22 +565,7 @@ xfs_file_dio_aio_write(
goto out; goto out;
} }
data = *from; ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
xfs_get_blocks_direct, xfs_end_io_direct_write,
NULL, DIO_ASYNC_EXTEND);
/* see generic_file_direct_write() for why this is necessary */
if (mapping->nrpages) {
invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT,
end >> PAGE_SHIFT);
}
if (ret > 0) {
iocb->ki_pos += ret;
iov_iter_advance(from, ret);
}
out: out:
xfs_iunlock(ip, iolock); xfs_iunlock(ip, iolock);
...@@ -1468,15 +1445,9 @@ xfs_filemap_fault( ...@@ -1468,15 +1445,9 @@ xfs_filemap_fault(
return xfs_filemap_page_mkwrite(vma, vmf); return xfs_filemap_page_mkwrite(vma, vmf);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) { if (IS_DAX(inode))
/*
* we do not want to trigger unwritten extent conversion on read
* faults - that is unnecessary overhead and would also require
* changes to xfs_get_blocks_direct() to map unwritten extent
* ioend for conversion on read-only mappings.
*/
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
} else else
ret = filemap_fault(vma, vmf); ret = filemap_fault(vma, vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
......
...@@ -950,6 +950,19 @@ static inline bool imap_needs_alloc(struct inode *inode, ...@@ -950,6 +950,19 @@ static inline bool imap_needs_alloc(struct inode *inode,
(IS_DAX(inode) && ISUNWRITTEN(imap)); (IS_DAX(inode) && ISUNWRITTEN(imap));
} }
static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
{
/*
* COW writes will allocate delalloc space, so we need to make sure
* to take the lock exclusively here.
*/
if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO)))
return true;
if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE))
return true;
return false;
}
static int static int
xfs_file_iomap_begin( xfs_file_iomap_begin(
struct inode *inode, struct inode *inode,
...@@ -969,18 +982,14 @@ xfs_file_iomap_begin( ...@@ -969,18 +982,14 @@ xfs_file_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
!xfs_get_extsz_hint(ip)) { !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */ /* Reserve delalloc blocks for regular writeback. */
return xfs_file_iomap_begin_delay(inode, offset, length, flags, return xfs_file_iomap_begin_delay(inode, offset, length, flags,
iomap); iomap);
} }
/* if (need_excl_ilock(ip, flags)) {
* COW writes will allocate delalloc space, so we need to make sure
* to take the lock exclusively here.
*/
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
lockmode = XFS_ILOCK_EXCL; lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
} else { } else {
...@@ -993,17 +1002,41 @@ xfs_file_iomap_begin( ...@@ -993,17 +1002,41 @@ xfs_file_iomap_begin(
offset_fsb = XFS_B_TO_FSBT(mp, offset); offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + length); end_fsb = XFS_B_TO_FSB(mp, offset + length);
if (xfs_is_reflink_inode(ip) &&
(flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
if (shared) {
xfs_iunlock(ip, lockmode);
goto alloc_done;
}
ASSERT(!isnullstartblock(imap.br_startblock));
}
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0); &nimaps, 0);
if (error) if (error)
goto out_unlock; goto out_unlock;
if (flags & IOMAP_REPORT) { if ((flags & IOMAP_REPORT) ||
(xfs_is_reflink_inode(ip) &&
(flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
/* Trim the mapping to the nearest shared extent boundary. */ /* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared, error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
&trimmed); &trimmed);
if (error) if (error)
goto out_unlock; goto out_unlock;
/*
* We're here because we're trying to do a directio write to a
* region that isn't aligned to a filesystem block. If the
* extent is shared, fall back to buffered mode to handle the
* RMW.
*/
if (!(flags & IOMAP_REPORT) && shared) {
trace_xfs_reflink_bounce_dio_write(ip, &imap);
error = -EREMCHG;
goto out_unlock;
}
} }
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
...@@ -1038,6 +1071,7 @@ xfs_file_iomap_begin( ...@@ -1038,6 +1071,7 @@ xfs_file_iomap_begin(
if (error) if (error)
return error; return error;
alloc_done:
iomap->flags = IOMAP_F_NEW; iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
} else { } else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment