Commit e7b58f7c authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: teach buftargs to maintain their own buffer hashtable

Currently, cached buffers are indexed by per-AG hashtables.  This works
great for the data device, but won't work for in-memory btrees.  To
handle that use case, buftargs will need to be able to index buffers
independently of other data structures.

We accomplish this by hoisting the rhashtable and its lock into a
separate xfs_buf_cache structure, make the buftarg point to the
_buf_cache structure, and rework various functions to use it.  This
will enable the in-memory buftarg to come up with its own _buf_cache.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 1c51ac09
......@@ -264,7 +264,7 @@ xfs_free_perag(
xfs_defer_drain_free(&pag->pag_intents_drain);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_buf_hash_destroy(pag);
xfs_buf_cache_destroy(&pag->pag_bcache);
/* drop the mount's active reference */
xfs_perag_rele(pag);
......@@ -352,7 +352,7 @@ xfs_free_unused_perag_range(
spin_unlock(&mp->m_perag_lock);
if (!pag)
break;
xfs_buf_hash_destroy(pag);
xfs_buf_cache_destroy(&pag->pag_bcache);
xfs_defer_drain_free(&pag->pag_intents_drain);
kfree(pag);
}
......@@ -419,7 +419,7 @@ xfs_initialize_perag(
pag->pagb_tree = RB_ROOT;
#endif /* __KERNEL__ */
error = xfs_buf_hash_init(pag);
error = xfs_buf_cache_init(&pag->pag_bcache);
if (error)
goto out_remove_pag;
......
......@@ -106,9 +106,7 @@ struct xfs_perag {
int pag_ici_reclaimable; /* reclaimable inodes */
unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
/* buffer cache index */
spinlock_t pag_buf_lock; /* lock for pag_buf_hash */
struct rhashtable pag_buf_hash;
struct xfs_buf_cache pag_bcache;
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
......
......@@ -510,18 +510,18 @@ static const struct rhashtable_params xfs_buf_hash_params = {
};
int
xfs_buf_hash_init(
struct xfs_perag *pag)
xfs_buf_cache_init(
struct xfs_buf_cache *bch)
{
spin_lock_init(&pag->pag_buf_lock);
return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
spin_lock_init(&bch->bc_lock);
return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
}
void
xfs_buf_hash_destroy(
struct xfs_perag *pag)
xfs_buf_cache_destroy(
struct xfs_buf_cache *bch)
{
rhashtable_destroy(&pag->pag_buf_hash);
rhashtable_destroy(&bch->bc_hash);
}
static int
......@@ -584,7 +584,7 @@ xfs_buf_find_lock(
static inline int
xfs_buf_lookup(
struct xfs_perag *pag,
struct xfs_buf_cache *bch,
struct xfs_buf_map *map,
xfs_buf_flags_t flags,
struct xfs_buf **bpp)
......@@ -593,7 +593,7 @@ xfs_buf_lookup(
int error;
rcu_read_lock();
bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params);
bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
if (!bp || !atomic_inc_not_zero(&bp->b_hold)) {
rcu_read_unlock();
return -ENOENT;
......@@ -618,6 +618,7 @@ xfs_buf_lookup(
static int
xfs_buf_find_insert(
struct xfs_buftarg *btp,
struct xfs_buf_cache *bch,
struct xfs_perag *pag,
struct xfs_buf_map *cmap,
struct xfs_buf_map *map,
......@@ -646,18 +647,18 @@ xfs_buf_find_insert(
goto out_free_buf;
}
spin_lock(&pag->pag_buf_lock);
bp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash,
spin_lock(&bch->bc_lock);
bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
&new_bp->b_rhash_head, xfs_buf_hash_params);
if (IS_ERR(bp)) {
error = PTR_ERR(bp);
spin_unlock(&pag->pag_buf_lock);
spin_unlock(&bch->bc_lock);
goto out_free_buf;
}
if (bp) {
/* found an existing buffer */
atomic_inc(&bp->b_hold);
spin_unlock(&pag->pag_buf_lock);
spin_unlock(&bch->bc_lock);
error = xfs_buf_find_lock(bp, flags);
if (error)
xfs_buf_rele(bp);
......@@ -668,17 +669,36 @@ xfs_buf_find_insert(
/* The new buffer keeps the perag reference until it is freed. */
new_bp->b_pag = pag;
spin_unlock(&pag->pag_buf_lock);
spin_unlock(&bch->bc_lock);
*bpp = new_bp;
return 0;
out_free_buf:
xfs_buf_free(new_bp);
out_drop_pag:
xfs_perag_put(pag);
if (pag)
xfs_perag_put(pag);
return error;
}
static inline struct xfs_perag *
xfs_buftarg_get_pag(
struct xfs_buftarg *btp,
const struct xfs_buf_map *map)
{
struct xfs_mount *mp = btp->bt_mount;
return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn));
}
static inline struct xfs_buf_cache *
xfs_buftarg_buf_cache(
struct xfs_buftarg *btp,
struct xfs_perag *pag)
{
return &pag->pag_bcache;
}
/*
* Assembles a buffer covering the specified range. The code is optimised for
* cache hits, as metadata intensive workloads will see 3 orders of magnitude
......@@ -692,6 +712,7 @@ xfs_buf_get_map(
xfs_buf_flags_t flags,
struct xfs_buf **bpp)
{
struct xfs_buf_cache *bch;
struct xfs_perag *pag;
struct xfs_buf *bp = NULL;
struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
......@@ -707,10 +728,10 @@ xfs_buf_get_map(
if (error)
return error;
pag = xfs_perag_get(btp->bt_mount,
xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
pag = xfs_buftarg_get_pag(btp, &cmap);
bch = xfs_buftarg_buf_cache(btp, pag);
error = xfs_buf_lookup(pag, &cmap, flags, &bp);
error = xfs_buf_lookup(bch, &cmap, flags, &bp);
if (error && error != -ENOENT)
goto out_put_perag;
......@@ -722,13 +743,14 @@ xfs_buf_get_map(
goto out_put_perag;
/* xfs_buf_find_insert() consumes the perag reference. */
error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps,
error = xfs_buf_find_insert(btp, bch, pag, &cmap, map, nmaps,
flags, &bp);
if (error)
return error;
} else {
XFS_STATS_INC(btp->bt_mount, xb_get_locked);
xfs_perag_put(pag);
if (pag)
xfs_perag_put(pag);
}
/* We do not hold a perag reference anymore. */
......@@ -756,7 +778,8 @@ xfs_buf_get_map(
return 0;
out_put_perag:
xfs_perag_put(pag);
if (pag)
xfs_perag_put(pag);
return error;
}
......@@ -1016,7 +1039,9 @@ static void
xfs_buf_rele_cached(
struct xfs_buf *bp)
{
struct xfs_buftarg *btp = bp->b_target;
struct xfs_perag *pag = bp->b_pag;
struct xfs_buf_cache *bch = xfs_buftarg_buf_cache(btp, pag);
bool release;
bool freebuf = false;
......@@ -1035,7 +1060,7 @@ xfs_buf_rele_cached(
* leading to a use-after-free scenario.
*/
spin_lock(&bp->b_lock);
release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
release = atomic_dec_and_lock(&bp->b_hold, &bch->bc_lock);
if (!release) {
/*
* Drop the in-flight state if the buffer is already on the LRU
......@@ -1056,11 +1081,11 @@ xfs_buf_rele_cached(
* buffer for the LRU and clear the (now stale) dispose list
* state flag
*/
if (list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru)) {
if (list_lru_add_obj(&btp->bt_lru, &bp->b_lru)) {
bp->b_state &= ~XFS_BSTATE_DISPOSE;
atomic_inc(&bp->b_hold);
}
spin_unlock(&pag->pag_buf_lock);
spin_unlock(&bch->bc_lock);
} else {
/*
* most of the time buffers will already be removed from the
......@@ -1069,16 +1094,17 @@ xfs_buf_rele_cached(
* was on was the disposal list
*/
if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
list_lru_del_obj(&btp->bt_lru, &bp->b_lru);
} else {
ASSERT(list_empty(&bp->b_lru));
}
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
xfs_buf_hash_params);
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
xfs_buf_hash_params);
spin_unlock(&bch->bc_lock);
if (pag)
xfs_perag_put(pag);
freebuf = true;
}
......
......@@ -83,6 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
struct xfs_buf_cache {
spinlock_t bc_lock;
struct rhashtable bc_hash;
};
int xfs_buf_cache_init(struct xfs_buf_cache *bch);
void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
/*
* The xfs_buftarg contains 2 notions of "sector size" -
*
......
......@@ -505,9 +505,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
int xfs_buf_hash_init(struct xfs_perag *pag);
void xfs_buf_hash_destroy(struct xfs_perag *pag);
extern void xfs_uuid_table_free(void);
extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment