Commit 9b247179 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: cache unlinked pointers in an rhashtable

Use a rhashtable to cache the unlinked list incore.  This should speed
up unlinked processing considerably when there are a lot of inodes on
the unlinked list because iunlink_remove no longer has to traverse an
entire bucket list to find which inode points to the one being removed.

The incore list structure records "X.next_unlinked = Y" relations, with
the rhashtable using Y to index the records.  This makes finding the
inode X that points to a inode Y very quick.  If our cache fails to find
anything we can always fall back on the old method.

FWIW this drastically reduces the amount of time it takes to remove
inodes from the unlinked list.  I wrote a program to open a lot of
O_TMPFILE files and then close them in the same order, which takes
a very long time if we have to traverse the unlinked lists.  With the
ptach, I see:

+ /d/t/tmpfile/tmpfile
Opened 193531 files in 6.33s.
Closed 193531 files in 5.86s

real    0m12.192s
user    0m0.064s
sys     0m11.619s
+ cd /
+ umount /mnt

real    0m0.050s
user    0m0.004s
sys     0m0.030s

And without the patch:

+ /d/t/tmpfile/tmpfile
Opened 193588 files in 6.35s.
Closed 193588 files in 751.61s

real    12m38.853s
user    0m0.084s
sys     12m34.470s
+ cd /
+ umount /mnt

real    0m0.086s
user    0m0.000s
sys     0m0.060s
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
parent 4664c66c
...@@ -54,7 +54,8 @@ ...@@ -54,7 +54,8 @@
#define XFS_ERRTAG_BUF_LRU_REF 31 #define XFS_ERRTAG_BUF_LRU_REF 31
#define XFS_ERRTAG_FORCE_SCRUB_REPAIR 32 #define XFS_ERRTAG_FORCE_SCRUB_REPAIR 32
#define XFS_ERRTAG_FORCE_SUMMARY_RECALC 33 #define XFS_ERRTAG_FORCE_SUMMARY_RECALC 33
#define XFS_ERRTAG_MAX 34 #define XFS_ERRTAG_IUNLINK_FALLBACK 34
#define XFS_ERRTAG_MAX 35
/* /*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc. * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
...@@ -93,5 +94,6 @@ ...@@ -93,5 +94,6 @@
#define XFS_RANDOM_BUF_LRU_REF 2 #define XFS_RANDOM_BUF_LRU_REF 2
#define XFS_RANDOM_FORCE_SCRUB_REPAIR 1 #define XFS_RANDOM_FORCE_SCRUB_REPAIR 1
#define XFS_RANDOM_FORCE_SUMMARY_RECALC 1 #define XFS_RANDOM_FORCE_SUMMARY_RECALC 1
#define XFS_RANDOM_IUNLINK_FALLBACK (XFS_RANDOM_DEFAULT/10)
#endif /* __XFS_ERRORTAG_H_ */ #endif /* __XFS_ERRORTAG_H_ */
...@@ -51,6 +51,7 @@ static unsigned int xfs_errortag_random_default[] = { ...@@ -51,6 +51,7 @@ static unsigned int xfs_errortag_random_default[] = {
XFS_RANDOM_BUF_LRU_REF, XFS_RANDOM_BUF_LRU_REF,
XFS_RANDOM_FORCE_SCRUB_REPAIR, XFS_RANDOM_FORCE_SCRUB_REPAIR,
XFS_RANDOM_FORCE_SUMMARY_RECALC, XFS_RANDOM_FORCE_SUMMARY_RECALC,
XFS_RANDOM_IUNLINK_FALLBACK,
}; };
struct xfs_errortag_attr { struct xfs_errortag_attr {
...@@ -159,6 +160,7 @@ XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); ...@@ -159,6 +160,7 @@ XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN);
XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF);
XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR); XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR);
XFS_ERRORTAG_ATTR_RW(bad_summary, XFS_ERRTAG_FORCE_SUMMARY_RECALC); XFS_ERRORTAG_ATTR_RW(bad_summary, XFS_ERRTAG_FORCE_SUMMARY_RECALC);
XFS_ERRORTAG_ATTR_RW(iunlink_fallback, XFS_ERRTAG_IUNLINK_FALLBACK);
static struct attribute *xfs_errortag_attrs[] = { static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(noerror), XFS_ERRORTAG_ATTR_LIST(noerror),
...@@ -195,6 +197,7 @@ static struct attribute *xfs_errortag_attrs[] = { ...@@ -195,6 +197,7 @@ static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(buf_lru_ref), XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
XFS_ERRORTAG_ATTR_LIST(force_repair), XFS_ERRORTAG_ATTR_LIST(force_repair),
XFS_ERRORTAG_ATTR_LIST(bad_summary), XFS_ERRORTAG_ATTR_LIST(bad_summary),
XFS_ERRORTAG_ATTR_LIST(iunlink_fallback),
NULL, NULL,
}; };
......
This diff is collapsed.
...@@ -500,4 +500,7 @@ extern struct kmem_zone *xfs_inode_zone; ...@@ -500,4 +500,7 @@ extern struct kmem_zone *xfs_inode_zone;
bool xfs_inode_verify_forks(struct xfs_inode *ip); bool xfs_inode_verify_forks(struct xfs_inode *ip);
int xfs_iunlink_init(struct xfs_perag *pag);
void xfs_iunlink_destroy(struct xfs_perag *pag);
#endif /* __XFS_INODE_H__ */ #endif /* __XFS_INODE_H__ */
...@@ -149,6 +149,7 @@ xfs_free_perag( ...@@ -149,6 +149,7 @@ xfs_free_perag(
spin_unlock(&mp->m_perag_lock); spin_unlock(&mp->m_perag_lock);
ASSERT(pag); ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0); ASSERT(atomic_read(&pag->pag_ref) == 0);
xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag); xfs_buf_hash_destroy(pag);
mutex_destroy(&pag->pag_ici_reclaim_lock); mutex_destroy(&pag->pag_ici_reclaim_lock);
call_rcu(&pag->rcu_head, __xfs_free_perag); call_rcu(&pag->rcu_head, __xfs_free_perag);
...@@ -227,6 +228,9 @@ xfs_initialize_perag( ...@@ -227,6 +228,9 @@ xfs_initialize_perag(
/* first new pag is fully initialized */ /* first new pag is fully initialized */
if (first_initialised == NULLAGNUMBER) if (first_initialised == NULLAGNUMBER)
first_initialised = index; first_initialised = index;
error = xfs_iunlink_init(pag);
if (error)
goto out_hash_destroy;
} }
index = xfs_set_inode_alloc(mp, agcount); index = xfs_set_inode_alloc(mp, agcount);
...@@ -249,6 +253,7 @@ xfs_initialize_perag( ...@@ -249,6 +253,7 @@ xfs_initialize_perag(
if (!pag) if (!pag)
break; break;
xfs_buf_hash_destroy(pag); xfs_buf_hash_destroy(pag);
xfs_iunlink_destroy(pag);
mutex_destroy(&pag->pag_ici_reclaim_lock); mutex_destroy(&pag->pag_ici_reclaim_lock);
kmem_free(pag); kmem_free(pag);
} }
......
...@@ -396,6 +396,13 @@ typedef struct xfs_perag { ...@@ -396,6 +396,13 @@ typedef struct xfs_perag {
/* reference count */ /* reference count */
uint8_t pagf_refcount_level; uint8_t pagf_refcount_level;
/*
* Unlinked inode information. This incore information reflects
* data stored in the AGI, so callers must hold the AGI buffer lock
* or have some other means to control concurrency.
*/
struct rhashtable pagi_unlinked_hash;
} xfs_perag_t; } xfs_perag_t;
static inline struct xfs_ag_resv * static inline struct xfs_ag_resv *
......
...@@ -3447,6 +3447,7 @@ DEFINE_EVENT(xfs_ag_inode_class, name, \ ...@@ -3447,6 +3447,7 @@ DEFINE_EVENT(xfs_ag_inode_class, name, \
TP_ARGS(ip)) TP_ARGS(ip))
DEFINE_AGINODE_EVENT(xfs_iunlink); DEFINE_AGINODE_EVENT(xfs_iunlink);
DEFINE_AGINODE_EVENT(xfs_iunlink_remove); DEFINE_AGINODE_EVENT(xfs_iunlink_remove);
DEFINE_AG_EVENT(xfs_iunlink_map_prev_fallback);
#endif /* _TRACE_XFS_H */ #endif /* _TRACE_XFS_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment