Commit 956f1b8f authored by Dave Chinner's avatar Dave Chinner

Merge tag 'rmap-speedups-5.19_2022-04-28' of...

Merge tag 'rmap-speedups-5.19_2022-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-5.19-for-next

xfs: fix rmap inefficiencies

Reduce the performance impact of the reverse mapping btree when
reflink is enabled by using the much faster non-overlapped btree
lookup functions when we're searching the rmap index with a fully
specified key.  If we find the exact record we're looking for,
great!  We don't have to perform the full overlapped scan.  For
filesystems with high sharing factors this reduces the xfs_scrub
runtime by a good 15%%.

This has been shown to reduce the fstests runtime for realtime rmap
configurations by 30%%, since the lack of AGs severely limits
scalability.
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parents 5e116e99 1edf8056
...@@ -34,18 +34,32 @@ int ...@@ -34,18 +34,32 @@ int
xfs_rmap_lookup_le( xfs_rmap_lookup_le(
struct xfs_btree_cur *cur, struct xfs_btree_cur *cur,
xfs_agblock_t bno, xfs_agblock_t bno,
xfs_extlen_t len,
uint64_t owner, uint64_t owner,
uint64_t offset, uint64_t offset,
unsigned int flags, unsigned int flags,
struct xfs_rmap_irec *irec,
int *stat) int *stat)
{ {
int get_stat = 0;
int error;
cur->bc_rec.r.rm_startblock = bno; cur->bc_rec.r.rm_startblock = bno;
cur->bc_rec.r.rm_blockcount = len; cur->bc_rec.r.rm_blockcount = 0;
cur->bc_rec.r.rm_owner = owner; cur->bc_rec.r.rm_owner = owner;
cur->bc_rec.r.rm_offset = offset; cur->bc_rec.r.rm_offset = offset;
cur->bc_rec.r.rm_flags = flags; cur->bc_rec.r.rm_flags = flags;
return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
if (error || !(*stat) || !irec)
return error;
error = xfs_rmap_get_rec(cur, irec, &get_stat);
if (error)
return error;
if (!get_stat)
return -EFSCORRUPTED;
return 0;
} }
/* /*
...@@ -251,7 +265,6 @@ xfs_rmap_get_rec( ...@@ -251,7 +265,6 @@ xfs_rmap_get_rec(
struct xfs_find_left_neighbor_info { struct xfs_find_left_neighbor_info {
struct xfs_rmap_irec high; struct xfs_rmap_irec high;
struct xfs_rmap_irec *irec; struct xfs_rmap_irec *irec;
int *stat;
}; };
/* For each rmap given, figure out if it matches the key we want. */ /* For each rmap given, figure out if it matches the key we want. */
...@@ -276,7 +289,6 @@ xfs_rmap_find_left_neighbor_helper( ...@@ -276,7 +289,6 @@ xfs_rmap_find_left_neighbor_helper(
return 0; return 0;
*info->irec = *rec; *info->irec = *rec;
*info->stat = 1;
return -ECANCELED; return -ECANCELED;
} }
...@@ -285,7 +297,7 @@ xfs_rmap_find_left_neighbor_helper( ...@@ -285,7 +297,7 @@ xfs_rmap_find_left_neighbor_helper(
* return a match with the same owner and adjacent physical and logical * return a match with the same owner and adjacent physical and logical
* block ranges. * block ranges.
*/ */
int STATIC int
xfs_rmap_find_left_neighbor( xfs_rmap_find_left_neighbor(
struct xfs_btree_cur *cur, struct xfs_btree_cur *cur,
xfs_agblock_t bno, xfs_agblock_t bno,
...@@ -296,6 +308,7 @@ xfs_rmap_find_left_neighbor( ...@@ -296,6 +308,7 @@ xfs_rmap_find_left_neighbor(
int *stat) int *stat)
{ {
struct xfs_find_left_neighbor_info info; struct xfs_find_left_neighbor_info info;
int found = 0;
int error; int error;
*stat = 0; *stat = 0;
...@@ -313,21 +326,44 @@ xfs_rmap_find_left_neighbor( ...@@ -313,21 +326,44 @@ xfs_rmap_find_left_neighbor(
info.high.rm_flags = flags; info.high.rm_flags = flags;
info.high.rm_blockcount = 0; info.high.rm_blockcount = 0;
info.irec = irec; info.irec = irec;
info.stat = stat;
trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp, trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp,
cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags); cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
/*
* Historically, we always used the range query to walk every reverse
* mapping that could possibly overlap the key that the caller asked
* for, and filter out the ones that don't. That is very slow when
* there are a lot of records.
*
* However, there are two scenarios where the classic btree search can
* produce correct results -- if the index contains a record that is an
* exact match for the lookup key; and if there are no other records
* between the record we want and the key we supplied.
*
* As an optimization, try a non-overlapped lookup first. This makes
* extent conversion and remap operations run a bit faster if the
* physical extents aren't being shared. If we don't find what we
* want, we fall back to the overlapped query.
*/
error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
&found);
if (error)
return error;
if (found)
error = xfs_rmap_find_left_neighbor_helper(cur, irec, &info);
if (!error)
error = xfs_rmap_query_range(cur, &info.high, &info.high, error = xfs_rmap_query_range(cur, &info.high, &info.high,
xfs_rmap_find_left_neighbor_helper, &info); xfs_rmap_find_left_neighbor_helper, &info);
if (error == -ECANCELED) if (error != -ECANCELED)
error = 0; return error;
if (*stat)
*stat = 1;
trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, irec->rm_startblock, cur->bc_ag.pag->pag_agno, irec->rm_startblock,
irec->rm_blockcount, irec->rm_owner, irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
irec->rm_offset, irec->rm_flags); irec->rm_flags);
return error; return 0;
} }
/* For each rmap given, figure out if it matches the key we want. */ /* For each rmap given, figure out if it matches the key we want. */
...@@ -353,7 +389,6 @@ xfs_rmap_lookup_le_range_helper( ...@@ -353,7 +389,6 @@ xfs_rmap_lookup_le_range_helper(
return 0; return 0;
*info->irec = *rec; *info->irec = *rec;
*info->stat = 1;
return -ECANCELED; return -ECANCELED;
} }
...@@ -374,6 +409,7 @@ xfs_rmap_lookup_le_range( ...@@ -374,6 +409,7 @@ xfs_rmap_lookup_le_range(
int *stat) int *stat)
{ {
struct xfs_find_left_neighbor_info info; struct xfs_find_left_neighbor_info info;
int found = 0;
int error; int error;
info.high.rm_startblock = bno; info.high.rm_startblock = bno;
...@@ -386,20 +422,44 @@ xfs_rmap_lookup_le_range( ...@@ -386,20 +422,44 @@ xfs_rmap_lookup_le_range(
info.high.rm_blockcount = 0; info.high.rm_blockcount = 0;
*stat = 0; *stat = 0;
info.irec = irec; info.irec = irec;
info.stat = stat;
trace_xfs_rmap_lookup_le_range(cur->bc_mp, trace_xfs_rmap_lookup_le_range(cur->bc_mp, cur->bc_ag.pag->pag_agno,
cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags); bno, 0, owner, offset, flags);
/*
* Historically, we always used the range query to walk every reverse
* mapping that could possibly overlap the key that the caller asked
* for, and filter out the ones that don't. That is very slow when
* there are a lot of records.
*
* However, there are two scenarios where the classic btree search can
* produce correct results -- if the index contains a record that is an
* exact match for the lookup key; and if there are no other records
* between the record we want and the key we supplied.
*
* As an optimization, try a non-overlapped lookup first. This makes
* scrub run much faster on most filesystems because bmbt records are
* usually an exact match for rmap records. If we don't find what we
* want, we fall back to the overlapped query.
*/
error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
&found);
if (error)
return error;
if (found)
error = xfs_rmap_lookup_le_range_helper(cur, irec, &info);
if (!error)
error = xfs_rmap_query_range(cur, &info.high, &info.high, error = xfs_rmap_query_range(cur, &info.high, &info.high,
xfs_rmap_lookup_le_range_helper, &info); xfs_rmap_lookup_le_range_helper, &info);
if (error == -ECANCELED) if (error != -ECANCELED)
error = 0; return error;
if (*stat)
*stat = 1;
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, irec->rm_startblock, cur->bc_ag.pag->pag_agno, irec->rm_startblock,
irec->rm_blockcount, irec->rm_owner, irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
irec->rm_offset, irec->rm_flags); irec->rm_flags);
return error; return 0;
} }
/* /*
...@@ -510,7 +570,7 @@ xfs_rmap_unmap( ...@@ -510,7 +570,7 @@ xfs_rmap_unmap(
* for the AG headers at rm_startblock == 0 created by mkfs/growfs that * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
* will not ever be removed from the tree. * will not ever be removed from the tree.
*/ */
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i); error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec, &i);
if (error) if (error)
goto out_error; goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) { if (XFS_IS_CORRUPT(mp, i != 1)) {
...@@ -518,13 +578,6 @@ xfs_rmap_unmap( ...@@ -518,13 +578,6 @@ xfs_rmap_unmap(
goto out_error; goto out_error;
} }
error = xfs_rmap_get_rec(cur, &ltrec, &i);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto out_error;
}
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, ltrec.rm_startblock, cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_blockcount, ltrec.rm_owner,
...@@ -786,18 +839,11 @@ xfs_rmap_map( ...@@ -786,18 +839,11 @@ xfs_rmap_map(
* record for our insertion point. This will also give us the record for * record for our insertion point. This will also give us the record for
* start block contiguity tests. * start block contiguity tests.
*/ */
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec,
&have_lt); &have_lt);
if (error) if (error)
goto out_error; goto out_error;
if (have_lt) { if (have_lt) {
error = xfs_rmap_get_rec(cur, &ltrec, &have_lt);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, have_lt != 1)) {
error = -EFSCORRUPTED;
goto out_error;
}
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, ltrec.rm_startblock, cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_blockcount, ltrec.rm_owner,
...@@ -1022,7 +1068,7 @@ xfs_rmap_convert( ...@@ -1022,7 +1068,7 @@ xfs_rmap_convert(
* record for our insertion point. This will also give us the record for * record for our insertion point. This will also give us the record for
* start block contiguity tests. * start block contiguity tests.
*/ */
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i); error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, &PREV, &i);
if (error) if (error)
goto done; goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) { if (XFS_IS_CORRUPT(mp, i != 1)) {
...@@ -1030,13 +1076,6 @@ xfs_rmap_convert( ...@@ -1030,13 +1076,6 @@ xfs_rmap_convert(
goto done; goto done;
} }
error = xfs_rmap_get_rec(cur, &PREV, &i);
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, PREV.rm_startblock, cur->bc_ag.pag->pag_agno, PREV.rm_startblock,
PREV.rm_blockcount, PREV.rm_owner, PREV.rm_blockcount, PREV.rm_owner,
...@@ -1140,7 +1179,7 @@ xfs_rmap_convert( ...@@ -1140,7 +1179,7 @@ xfs_rmap_convert(
_RET_IP_); _RET_IP_);
/* reset the cursor back to PREV */ /* reset the cursor back to PREV */
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i); error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i);
if (error) if (error)
goto done; goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) { if (XFS_IS_CORRUPT(mp, i != 1)) {
...@@ -2677,7 +2716,7 @@ xfs_rmap_record_exists( ...@@ -2677,7 +2716,7 @@ xfs_rmap_record_exists(
ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) || ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
(flags & XFS_RMAP_BMBT_BLOCK)); (flags & XFS_RMAP_BMBT_BLOCK));
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &irec,
&has_record); &has_record);
if (error) if (error)
return error; return error;
...@@ -2686,14 +2725,6 @@ xfs_rmap_record_exists( ...@@ -2686,14 +2725,6 @@ xfs_rmap_record_exists(
return 0; return 0;
} }
error = xfs_rmap_get_rec(cur, &irec, &has_record);
if (error)
return error;
if (!has_record) {
*has_rmap = false;
return 0;
}
*has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno && *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
irec.rm_startblock + irec.rm_blockcount >= bno + len); irec.rm_startblock + irec.rm_blockcount >= bno + len);
return 0; return 0;
......
...@@ -122,8 +122,8 @@ int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp, ...@@ -122,8 +122,8 @@ int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
const struct xfs_owner_info *oinfo); const struct xfs_owner_info *oinfo);
int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
xfs_extlen_t len, uint64_t owner, uint64_t offset, uint64_t owner, uint64_t offset, unsigned int flags,
unsigned int flags, int *stat); struct xfs_rmap_irec *irec, int *stat);
int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno, int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
xfs_extlen_t len, uint64_t owner, uint64_t offset, xfs_extlen_t len, uint64_t owner, uint64_t offset,
unsigned int flags, int *stat); unsigned int flags, int *stat);
...@@ -184,9 +184,6 @@ int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type, ...@@ -184,9 +184,6 @@ int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
xfs_fsblock_t startblock, xfs_filblks_t blockcount, xfs_fsblock_t startblock, xfs_filblks_t blockcount,
xfs_exntst_t state, struct xfs_btree_cur **pcur); xfs_exntst_t state, struct xfs_btree_cur **pcur);
int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno,
uint64_t owner, uint64_t offset, unsigned int flags,
struct xfs_rmap_irec *irec, int *stat);
int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno, int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
uint64_t owner, uint64_t offset, unsigned int flags, uint64_t owner, uint64_t offset, unsigned int flags,
struct xfs_rmap_irec *irec, int *stat); struct xfs_rmap_irec *irec, int *stat);
......
...@@ -133,29 +133,13 @@ xchk_bmap_get_rmap( ...@@ -133,29 +133,13 @@ xchk_bmap_get_rmap(
if (info->is_shared) { if (info->is_shared) {
error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
owner, offset, rflags, rmap, &has_rmap); owner, offset, rflags, rmap, &has_rmap);
if (!xchk_should_check_xref(info->sc, &error, } else {
&info->sc->sa.rmap_cur)) error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
return false; owner, offset, rflags, rmap, &has_rmap);
goto out;
} }
if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
/*
* Otherwise, use the (faster) regular lookup.
*/
error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
offset, rflags, &has_rmap);
if (!xchk_should_check_xref(info->sc, &error,
&info->sc->sa.rmap_cur))
return false; return false;
if (!has_rmap)
goto out;
error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
if (!xchk_should_check_xref(info->sc, &error,
&info->sc->sa.rmap_cur))
return false;
out:
if (!has_rmap) if (!has_rmap)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff); irec->br_startoff);
......
...@@ -418,6 +418,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, ...@@ -418,6 +418,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__field(unsigned, lockval) __field(unsigned, lockval)
__field(unsigned, flags) __field(unsigned, flags)
__field(unsigned long, caller_ip) __field(unsigned long, caller_ip)
__field(const void *, buf_ops)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = bp->b_target->bt_dev; __entry->dev = bp->b_target->bt_dev;
...@@ -428,9 +429,10 @@ DECLARE_EVENT_CLASS(xfs_buf_class, ...@@ -428,9 +429,10 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__entry->lockval = bp->b_sema.count; __entry->lockval = bp->b_sema.count;
__entry->flags = bp->b_flags; __entry->flags = bp->b_flags;
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
__entry->buf_ops = bp->b_ops;
), ),
TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
"lock %d flags %s caller %pS", "lock %d flags %s bufops %pS caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->bno, (unsigned long long)__entry->bno,
__entry->nblks, __entry->nblks,
...@@ -438,6 +440,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, ...@@ -438,6 +440,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__entry->pincount, __entry->pincount,
__entry->lockval, __entry->lockval,
__print_flags(__entry->flags, "|", XFS_BUF_FLAGS), __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
__entry->buf_ops,
(void *)__entry->caller_ip) (void *)__entry->caller_ip)
) )
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment