Commit 60271ab7 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Darrick J. Wong

xfs: fix SEEK_DATA for speculative COW fork preallocation

We speculatively allocate extents in the COW fork to reduce
fragmentation.  But when we write data into such COW fork blocks that
do now shadow an allocation in the data fork SEEK_DATA will not
correctly report it, as it only looks at the data fork extents.
The only reason why that hasn't been an issue so far is because
we even use these speculative COW fork preallocations over holes in
the data fork at all for buffered writes, and blocks in the COW
fork that are written by direct writes are moved into the data
fork immediately at I/O completion time.

Add a new set of iomap_ops for SEEK_HOLE/SEEK_DATA which looks into
both the COW and data fork, and reports all COW extents as unwritten
to the iomap layer.  While this isn't strictly true for COW fork
extents that were already converted to real extents, the practical
semantics that you can't read data from them until they are moved
into the data fork are very similar, and this will force the iomap
layer into probing the extents for actually present data.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 16be1433
...@@ -1068,10 +1068,10 @@ xfs_file_llseek( ...@@ -1068,10 +1068,10 @@ xfs_file_llseek(
default: default:
return generic_file_llseek(file, offset, whence); return generic_file_llseek(file, offset, whence);
case SEEK_HOLE: case SEEK_HOLE:
offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops); offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
break; break;
case SEEK_DATA: case SEEK_DATA:
offset = iomap_seek_data(inode, offset, &xfs_iomap_ops); offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
break; break;
} }
......
...@@ -1068,6 +1068,92 @@ const struct iomap_ops xfs_iomap_ops = { ...@@ -1068,6 +1068,92 @@ const struct iomap_ops xfs_iomap_ops = {
.iomap_end = xfs_file_iomap_end, .iomap_end = xfs_file_iomap_end,
}; };
static int
xfs_seek_iomap_begin(
struct inode *inode,
loff_t offset,
loff_t length,
unsigned flags,
struct iomap *iomap)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length);
xfs_fileoff_t cow_fsb = NULLFILEOFF, data_fsb = NULLFILEOFF;
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec imap, cmap;
int error = 0;
unsigned lockmode;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
lockmode = xfs_ilock_data_map_shared(ip);
if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
if (error)
goto out_unlock;
}
if (xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) {
/*
* If we found a data extent we are done.
*/
if (imap.br_startoff <= offset_fsb)
goto done;
data_fsb = imap.br_startoff;
} else {
/*
* Fake a hole until the end of the file.
*/
data_fsb = min(XFS_B_TO_FSB(mp, offset + length),
XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes));
}
/*
* If a COW fork extent covers the hole, report it - capped to the next
* data fork extent:
*/
if (xfs_inode_has_cow_data(ip) &&
xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
cow_fsb = cmap.br_startoff;
if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
if (data_fsb < cow_fsb + cmap.br_blockcount)
end_fsb = min(end_fsb, data_fsb);
xfs_trim_extent(&cmap, offset_fsb, end_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true);
/*
* This is a COW extent, so we must probe the page cache
* because there could be dirty page cache being backed
* by this extent.
*/
iomap->type = IOMAP_UNWRITTEN;
goto out_unlock;
}
/*
* Else report a hole, capped to the next found data or COW extent.
*/
if (cow_fsb != NULLFILEOFF && cow_fsb < data_fsb)
imap.br_blockcount = cow_fsb - offset_fsb;
else
imap.br_blockcount = data_fsb - offset_fsb;
imap.br_startoff = offset_fsb;
imap.br_startblock = HOLESTARTBLOCK;
imap.br_state = XFS_EXT_NORM;
done:
xfs_trim_extent(&imap, offset_fsb, end_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &imap, false);
out_unlock:
xfs_iunlock(ip, lockmode);
return error;
}
const struct iomap_ops xfs_seek_iomap_ops = {
.iomap_begin = xfs_seek_iomap_begin,
};
static int static int
xfs_xattr_iomap_begin( xfs_xattr_iomap_begin(
struct inode *inode, struct inode *inode,
......
...@@ -40,6 +40,7 @@ xfs_aligned_fsb_count( ...@@ -40,6 +40,7 @@ xfs_aligned_fsb_count(
} }
extern const struct iomap_ops xfs_iomap_ops; extern const struct iomap_ops xfs_iomap_ops;
extern const struct iomap_ops xfs_seek_iomap_ops;
extern const struct iomap_ops xfs_xattr_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops;
#endif /* __XFS_IOMAP_H__*/ #endif /* __XFS_IOMAP_H__*/
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment