Commit 51446f5b authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Dave Chinner

xfs: rewrite and optimize the delalloc write path

Currently xfs_iomap_write_delay does up to lookups in the inode
extent tree, which is rather costly especially with the new iomap
based write path and small write sizes.

But it turns out that the low-level xfs_bmap_search_extents gives us
all the information we need in the regular delalloc buffered write
path:

 - it will return us an extent covering the block we are looking up
   if it exists.  In that case we can simply return that extent to
   the caller and are done
 - it will tell us if we are beyoned the last current allocated
   block with an eof return parameter.  In that case we can create a
   delalloc reservation and use the also returned information about
   the last extent in the file as the hint to size our delalloc
   reservation.
 - it can tell us that we are writing into a hole, but that there is
   an extent beyoned this hole.  In this case we can create a
   delalloc reservation that covers the requested size (possible
   capped to the next existing allocation).

All that can be done in one single routine instead of bouncing up
and down a few layers.  This reduced the CPU overhead of the block
mapping routines and also simplified the code a lot.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parent 85a6e764
......@@ -1388,7 +1388,7 @@ xfs_bmap_search_multi_extents(
* Else, *lastxp will be set to the index of the found
* entry; *gotp will contain the entry.
*/
STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
xfs_bmbt_rec_host_t * /* pointer to found extent entry */
xfs_bmap_search_extents(
xfs_inode_t *ip, /* incore inode pointer */
xfs_fileoff_t bno, /* block number searched for */
......@@ -4074,7 +4074,7 @@ xfs_bmapi_read(
return 0;
}
STATIC int
int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
xfs_fileoff_t aoff,
......@@ -4170,91 +4170,6 @@ xfs_bmapi_reserve_delalloc(
return error;
}
/*
* Map file blocks to filesystem blocks, adding delayed allocations as needed.
*/
int
xfs_bmapi_delay(
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t bno, /* starting file offs. mapped */
xfs_filblks_t len, /* length to map in file */
struct xfs_bmbt_irec *mval, /* output: map values */
int *nmap, /* i/o: mval size/count */
int flags) /* XFS_BMAPI_... */
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
struct xfs_bmbt_irec got; /* current file extent record */
struct xfs_bmbt_irec prev; /* previous file extent record */
xfs_fileoff_t obno; /* old block number (offset) */
xfs_fileoff_t end; /* end of mapped file region */
xfs_extnum_t lastx; /* last useful extent number */
int eof; /* we've hit the end of extents */
int n = 0; /* current extent index */
int error = 0;
ASSERT(*nmap >= 1);
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
XFS_STATS_INC(mp, xs_blk_mapw);
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
if (error)
return error;
}
xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
end = bno + len;
obno = bno;
while (bno < end && n < *nmap) {
if (eof || got.br_startoff > bno) {
error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
&prev, &lastx, eof);
if (error) {
if (n == 0) {
*nmap = 0;
return error;
}
break;
}
}
/* set up the extent map to return. */
xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
/* If we're done, stop now. */
if (bno >= end || n >= *nmap)
break;
/* Else go on to the next record. */
prev = got;
if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
else
eof = 1;
}
*nmap = n;
return 0;
}
static int
xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
......
......@@ -181,9 +181,6 @@ int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_fsblock_t *firstblock, xfs_extlen_t total,
......@@ -202,5 +199,12 @@ int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
struct xfs_defer_ops *dfops, enum shift_direction direction,
int num_exts);
int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
struct xfs_bmbt_rec_host *
xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno,
int fork, int *eofp, xfs_extnum_t *lastxp,
struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp);
int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, xfs_fileoff_t aoff,
xfs_filblks_t len, struct xfs_bmbt_irec *got,
struct xfs_bmbt_irec *prev, xfs_extnum_t *lastx, int eof);
#endif /* __XFS_BMAP_H__ */
This diff is collapsed.
......@@ -25,8 +25,6 @@ struct xfs_bmbt_irec;
int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
struct xfs_bmbt_irec *, int);
int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
struct xfs_bmbt_irec *);
int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
struct xfs_bmbt_irec *);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment