Commit c473a332 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: stagger the starting AG of scrub iscans to reduce contention

Online directory and parent repairs on parent-pointer equipped
filesystems have shown that starting a large number of parallel iscans
causes a lot of AGI buffer contention.  Try to reduce this by making it
so that iscans scan wrap around the end of the filesystem, and using a
rotor to stagger where each scanner begins.  Surprisingly, this boosts
CPU utilization (on the author's test machines) from effectively
single-threaded to 160%.  Not great, but see the next patch.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 4e98cc90
...@@ -170,10 +170,24 @@ xchk_iscan_move_cursor( ...@@ -170,10 +170,24 @@ xchk_iscan_move_cursor(
{ {
struct xfs_scrub *sc = iscan->sc; struct xfs_scrub *sc = iscan->sc;
struct xfs_mount *mp = sc->mp; struct xfs_mount *mp = sc->mp;
xfs_ino_t cursor, visited;
BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);
/*
* Special-case ino == 0 here so that we never set visited_ino to
* NULLFSINO when wrapping around EOFS, for that will let through all
* live updates.
*/
cursor = XFS_AGINO_TO_INO(mp, agno, agino);
if (cursor == 0)
visited = XFS_MAXINUMBER;
else
visited = cursor - 1;
mutex_lock(&iscan->lock); mutex_lock(&iscan->lock);
iscan->cursor_ino = XFS_AGINO_TO_INO(mp, agno, agino); iscan->cursor_ino = cursor;
iscan->__visited_ino = iscan->cursor_ino - 1; iscan->__visited_ino = visited;
trace_xchk_iscan_move_cursor(iscan); trace_xchk_iscan_move_cursor(iscan);
mutex_unlock(&iscan->lock); mutex_unlock(&iscan->lock);
} }
...@@ -257,12 +271,13 @@ xchk_iscan_advance( ...@@ -257,12 +271,13 @@ xchk_iscan_advance(
* Did not find any more inodes in this AG, move on to the next * Did not find any more inodes in this AG, move on to the next
* AG. * AG.
*/ */
xchk_iscan_move_cursor(iscan, ++agno, 0); agno = (agno + 1) % mp->m_sb.sb_agcount;
xchk_iscan_move_cursor(iscan, agno, 0);
xfs_trans_brelse(sc->tp, agi_bp); xfs_trans_brelse(sc->tp, agi_bp);
xfs_perag_put(pag); xfs_perag_put(pag);
trace_xchk_iscan_advance_ag(iscan); trace_xchk_iscan_advance_ag(iscan);
} while (agno < mp->m_sb.sb_agcount); } while (iscan->cursor_ino != iscan->scan_start_ino);
xchk_iscan_finish(iscan); xchk_iscan_finish(iscan);
return 0; return 0;
...@@ -420,6 +435,23 @@ xchk_iscan_teardown( ...@@ -420,6 +435,23 @@ xchk_iscan_teardown(
mutex_destroy(&iscan->lock); mutex_destroy(&iscan->lock);
} }
/* Pick an AG from which to start a scan. */
static inline xfs_ino_t
xchk_iscan_rotor(
struct xfs_mount *mp)
{
static atomic_t agi_rotor;
unsigned int r = atomic_inc_return(&agi_rotor) - 1;
/*
* Rotoring *backwards* through the AGs, so we add one here before
* subtracting from the agcount to arrive at an AG number.
*/
r = (r % mp->m_sb.sb_agcount) + 1;
return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
}
/* /*
* Set ourselves up to start an inode scan. If the @iget_timeout and * Set ourselves up to start an inode scan. If the @iget_timeout and
* @iget_retry_delay parameters are set, the scan will try to iget each inode * @iget_retry_delay parameters are set, the scan will try to iget each inode
...@@ -434,15 +466,20 @@ xchk_iscan_start( ...@@ -434,15 +466,20 @@ xchk_iscan_start(
unsigned int iget_retry_delay, unsigned int iget_retry_delay,
struct xchk_iscan *iscan) struct xchk_iscan *iscan)
{ {
xfs_ino_t start_ino;
start_ino = xchk_iscan_rotor(sc->mp);
iscan->sc = sc; iscan->sc = sc;
clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate); clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
iscan->iget_timeout = iget_timeout; iscan->iget_timeout = iget_timeout;
iscan->iget_retry_delay = iget_retry_delay; iscan->iget_retry_delay = iget_retry_delay;
iscan->__visited_ino = 0; iscan->__visited_ino = start_ino;
iscan->cursor_ino = 0; iscan->cursor_ino = start_ino;
iscan->scan_start_ino = start_ino;
mutex_init(&iscan->lock); mutex_init(&iscan->lock);
trace_xchk_iscan_start(iscan); trace_xchk_iscan_start(iscan, start_ino);
} }
/* /*
...@@ -471,15 +508,45 @@ xchk_iscan_want_live_update( ...@@ -471,15 +508,45 @@ xchk_iscan_want_live_update(
struct xchk_iscan *iscan, struct xchk_iscan *iscan,
xfs_ino_t ino) xfs_ino_t ino)
{ {
bool ret; bool ret = false;
if (xchk_iscan_aborted(iscan)) if (xchk_iscan_aborted(iscan))
return false; return false;
mutex_lock(&iscan->lock); mutex_lock(&iscan->lock);
trace_xchk_iscan_want_live_update(iscan, ino); trace_xchk_iscan_want_live_update(iscan, ino);
ret = iscan->__visited_ino >= ino;
mutex_unlock(&iscan->lock);
/* Scan is finished, caller should receive all updates. */
if (iscan->__visited_ino == NULLFSINO) {
ret = true;
goto unlock;
}
/*
* The visited cursor hasn't yet wrapped around the end of the FS. If
* @ino is inside the starred range, the caller should receive updates:
*
* 0 ------------ S ************ V ------------ EOFS
*/
if (iscan->scan_start_ino <= iscan->__visited_ino) {
if (ino >= iscan->scan_start_ino &&
ino <= iscan->__visited_ino)
ret = true;
goto unlock;
}
/*
* The visited cursor wrapped around the end of the FS. If @ino is
* inside the starred range, the caller should receive updates:
*
* 0 ************ V ------------ S ************ EOFS
*/
if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
ret = true;
unlock:
mutex_unlock(&iscan->lock);
return ret; return ret;
} }
...@@ -12,6 +12,13 @@ struct xchk_iscan { ...@@ -12,6 +12,13 @@ struct xchk_iscan {
/* Lock to protect the scan cursor. */ /* Lock to protect the scan cursor. */
struct mutex lock; struct mutex lock;
/*
* This is the first inode in the inumber address space that we
* examined. When the scan wraps around back to here, the scan is
* finished.
*/
xfs_ino_t scan_start_ino;
/* This is the inode that will be examined next. */ /* This is the inode that will be examined next. */
xfs_ino_t cursor_ino; xfs_ino_t cursor_ino;
......
...@@ -1173,25 +1173,27 @@ DEFINE_EVENT(xchk_iscan_class, name, \ ...@@ -1173,25 +1173,27 @@ DEFINE_EVENT(xchk_iscan_class, name, \
DEFINE_ISCAN_EVENT(xchk_iscan_move_cursor); DEFINE_ISCAN_EVENT(xchk_iscan_move_cursor);
DEFINE_ISCAN_EVENT(xchk_iscan_visit); DEFINE_ISCAN_EVENT(xchk_iscan_visit);
DEFINE_ISCAN_EVENT(xchk_iscan_advance_ag); DEFINE_ISCAN_EVENT(xchk_iscan_advance_ag);
DEFINE_ISCAN_EVENT(xchk_iscan_start);
DECLARE_EVENT_CLASS(xchk_iscan_ino_class, DECLARE_EVENT_CLASS(xchk_iscan_ino_class,
TP_PROTO(struct xchk_iscan *iscan, xfs_ino_t ino), TP_PROTO(struct xchk_iscan *iscan, xfs_ino_t ino),
TP_ARGS(iscan, ino), TP_ARGS(iscan, ino),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(xfs_ino_t, startino)
__field(xfs_ino_t, cursor) __field(xfs_ino_t, cursor)
__field(xfs_ino_t, visited) __field(xfs_ino_t, visited)
__field(xfs_ino_t, ino) __field(xfs_ino_t, ino)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = iscan->sc->mp->m_super->s_dev; __entry->dev = iscan->sc->mp->m_super->s_dev;
__entry->startino = iscan->scan_start_ino;
__entry->cursor = iscan->cursor_ino; __entry->cursor = iscan->cursor_ino;
__entry->visited = iscan->__visited_ino; __entry->visited = iscan->__visited_ino;
__entry->ino = ino; __entry->ino = ino;
), ),
TP_printk("dev %d:%d iscan cursor 0x%llx visited 0x%llx ino 0x%llx", TP_printk("dev %d:%d iscan start 0x%llx cursor 0x%llx visited 0x%llx ino 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->startino,
__entry->cursor, __entry->cursor,
__entry->visited, __entry->visited,
__entry->ino) __entry->ino)
...@@ -1201,6 +1203,7 @@ DEFINE_EVENT(xchk_iscan_ino_class, name, \ ...@@ -1201,6 +1203,7 @@ DEFINE_EVENT(xchk_iscan_ino_class, name, \
TP_PROTO(struct xchk_iscan *iscan, xfs_ino_t ino), \ TP_PROTO(struct xchk_iscan *iscan, xfs_ino_t ino), \
TP_ARGS(iscan, ino)) TP_ARGS(iscan, ino))
DEFINE_ISCAN_INO_EVENT(xchk_iscan_want_live_update); DEFINE_ISCAN_INO_EVENT(xchk_iscan_want_live_update);
DEFINE_ISCAN_INO_EVENT(xchk_iscan_start);
TRACE_EVENT(xchk_iscan_iget, TRACE_EVENT(xchk_iscan_iget,
TP_PROTO(struct xchk_iscan *iscan, int error), TP_PROTO(struct xchk_iscan *iscan, int error),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment