Commit a06c277a authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner

xfs: DIO writes within EOF don't need an ioend

DIO writes that lie entirely within EOF have nothing to do in IO
completion. In this case, we don't need no steekin' ioend, and so we
can avoid allocating an ioend until we have a mapping that spans
EOF.

This means that IO completion has two contexts - deferred completion
to the dio workqueue that uses an ioend, and interrupt completion
that does nothing because there is nothing that can be done in this
context.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parent 6dfa1b67
...@@ -1234,15 +1234,19 @@ xfs_vm_releasepage( ...@@ -1234,15 +1234,19 @@ xfs_vm_releasepage(
} }
/* /*
* When we map a DIO buffer, we need to attach an ioend that describes the type * When we map a DIO buffer, we may need to attach an ioend that describes the
* of write IO we are doing. This passes to the completion function the * type of write IO we are doing. This passes to the completion function the
* operations it needs to perform. * operations it needs to perform. If the mapping is for an overwrite wholly
* within the EOF then we don't need an ioend and so we don't allocate one.
* This avoids the unnecessary overhead of allocating and freeing ioends for
* workloads that don't require transactions on IO completion.
* *
* If we get multiple mappings in a single IO, we might be mapping different * If we get multiple mappings in a single IO, we might be mapping different
* types. But because the direct IO can only have a single private pointer, we * types. But because the direct IO can only have a single private pointer, we
* need to ensure that: * need to ensure that:
* *
* a) the ioend spans the entire region of the IO; and * a) i) the ioend spans the entire region of unwritten mappings; or
* ii) the ioend spans all the mappings that cross or are beyond EOF; and
* b) if it contains unwritten extents, it is *permanently* marked as such * b) if it contains unwritten extents, it is *permanently* marked as such
* *
* We could do this by chaining ioends like buffered IO does, but we only * We could do this by chaining ioends like buffered IO does, but we only
...@@ -1283,21 +1287,23 @@ xfs_map_direct( ...@@ -1283,21 +1287,23 @@ xfs_map_direct(
trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset, trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
ioend->io_size, ioend->io_type, ioend->io_size, ioend->io_type,
imap); imap);
} else { } else if (type == XFS_IO_UNWRITTEN ||
offset + size > i_size_read(inode)) {
ioend = xfs_alloc_ioend(inode, type); ioend = xfs_alloc_ioend(inode, type);
ioend->io_offset = offset; ioend->io_offset = offset;
ioend->io_size = size; ioend->io_size = size;
bh_result->b_private = ioend; bh_result->b_private = ioend;
set_buffer_defer_completion(bh_result);
trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type, trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
imap); imap);
} else {
trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
imap);
} }
if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend))
set_buffer_defer_completion(bh_result);
} }
/* /*
* If this is O_DIRECT or the mpage code calling tell them how large the mapping * If this is O_DIRECT or the mpage code calling tell them how large the mapping
* is, so that we can avoid repeated get_blocks calls. * is, so that we can avoid repeated get_blocks calls.
...@@ -1519,9 +1525,11 @@ xfs_get_blocks_direct( ...@@ -1519,9 +1525,11 @@ xfs_get_blocks_direct(
/* /*
* Complete a direct I/O write request. * Complete a direct I/O write request.
* *
* If the private argument is non-NULL __xfs_get_blocks signals us that we * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
* need to issue a transaction to convert the range from unwritten to written * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
* extents. * wholly within the EOF and so there is nothing for us to do. Note that in this
* case the completion can be called in interrupt context, whereas if we have an
* ioend we will always be called in task context (i.e. from a workqueue).
*/ */
STATIC void STATIC void
xfs_end_io_direct_write( xfs_end_io_direct_write(
...@@ -1535,7 +1543,13 @@ xfs_end_io_direct_write( ...@@ -1535,7 +1543,13 @@ xfs_end_io_direct_write(
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
struct xfs_ioend *ioend = private; struct xfs_ioend *ioend = private;
trace_xfs_gbmap_direct_endio(ip, offset, size, ioend->io_type, NULL); trace_xfs_gbmap_direct_endio(ip, offset, size,
ioend ? ioend->io_type : 0, NULL);
if (!ioend) {
ASSERT(offset + size <= i_size_read(inode));
return;
}
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
goto out_end_io; goto out_end_io;
...@@ -1548,12 +1562,12 @@ xfs_end_io_direct_write( ...@@ -1548,12 +1562,12 @@ xfs_end_io_direct_write(
/* /*
* The ioend only maps whole blocks, while the IO may be sector aligned. * The ioend only maps whole blocks, while the IO may be sector aligned.
* Hence the ioend offset/size may not match the IO offset/size exactly, * Hence the ioend offset/size may not match the IO offset/size exactly.
* but should span it completely. Write the IO sizes into the ioend so * Because we don't map overwrites within EOF into the ioend, the offset
* that completion processing does the right thing. * may not match, but only if the endio spans EOF. Either way, write
* the IO sizes into the ioend so that completion processing does the
* right thing.
*/ */
ASSERT(size <= ioend->io_size);
ASSERT(offset >= ioend->io_offset);
ASSERT(offset + size <= ioend->io_offset + ioend->io_size); ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
ioend->io_size = size; ioend->io_size = size;
ioend->io_offset = offset; ioend->io_offset = offset;
...@@ -1562,20 +1576,15 @@ xfs_end_io_direct_write( ...@@ -1562,20 +1576,15 @@ xfs_end_io_direct_write(
* The ioend tells us whether we are doing unwritten extent conversion * The ioend tells us whether we are doing unwritten extent conversion
* or an append transaction that updates the on-disk file size. These * or an append transaction that updates the on-disk file size. These
* cases are the only cases where we should *potentially* be needing * cases are the only cases where we should *potentially* be needing
* to update the VFS inode size. When the ioend indicates this, we * to update the VFS inode size.
* are *guaranteed* to be running in non-interrupt context.
* *
* We need to update the in-core inode size here so that we don't end up * We need to update the in-core inode size here so that we don't end up
* with the on-disk inode size being outside the in-core inode size. * with the on-disk inode size being outside the in-core inode size. We
* While we can do this in the process context after the IO has * have no other method of updating EOF for AIO, so always do it here
* completed, this does not work for AIO and hence we always update * if necessary.
* the in-core inode size here if necessary.
*/ */
if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend)) { if (offset + size > i_size_read(inode))
if (offset + size > i_size_read(inode)) i_size_write(inode, offset + size);
i_size_write(inode, offset + size);
} else
ASSERT(offset + size <= i_size_read(inode));
/* /*
* If we are doing an append IO that needs to update the EOF on disk, * If we are doing an append IO that needs to update the EOF on disk,
...@@ -1584,7 +1593,7 @@ xfs_end_io_direct_write( ...@@ -1584,7 +1593,7 @@ xfs_end_io_direct_write(
* result in the ioend processing passing on the error if it is * result in the ioend processing passing on the error if it is
* possible as we can't return it from here. * possible as we can't return it from here.
*/ */
if (ioend->io_type == XFS_IO_OVERWRITE && xfs_ioend_is_append(ioend)) if (ioend->io_type == XFS_IO_OVERWRITE)
ioend->io_error = xfs_setfilesize_trans_alloc(ioend); ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
out_end_io: out_end_io:
......
...@@ -1220,6 +1220,7 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); ...@@ -1220,6 +1220,7 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
DEFINE_IOMAP_EVENT(xfs_gbmap_direct); DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio); DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
DECLARE_EVENT_CLASS(xfs_simple_io_class, DECLARE_EVENT_CLASS(xfs_simple_io_class,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment