Commit 74acb705 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'repair-refcount-scalability-6.9_2024-02-23' of...

Merge tag 'repair-refcount-scalability-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.9-mergeC

xfs: reduce refcount repair memory usage

The refcountbt repair code has serious memory usage problems when the
block sharing factor of the filesystem is very high.  This can happen if
a deduplication tool has been run against the filesystem, or if the fs
stores reflinked VM images that have been aging for a long time.

Recall that the original reference counting algorithm walks the reverse
mapping records of the filesystem to generate reference counts.  For any
given block in the AG, the rmap bag structure contains the all rmap
records that cover that block; the refcount is the size of that bag.

For online repair, the bag doesn't need the owner, offset, or state flag
information, so it discards those.  This halves the record size, but the
bag structure still stores one excerpted record for each reverse
mapping.  If the sharing count is high, this will use a LOT of memory
storing redundant records.  In the extreme case, 100k mappings to the
same piece of space will consume 100k*16 bytes = 1.6M of memory.

For offline repair, the bag stores the owner values so that we know
which inodes need to be marked as being reflink inodes.  If a
deduplication tool has been run and there are many blocks within a file
pointing to the same physical space, this will stll use a lot of memory
to store redundant records.

The solution to this problem is to deduplicate the bag records when
possible by adding a reference count to the bag record, and changing the
bag add function to detect an existing record to bump the refcount.  In
the above example, the 100k mappings will now use 24 bytes of memory.
These lookups can be done efficiently with a btree, so we create a new
refcount bag btree type (inside of online repair).  This is why we
refactored the btree code in the previous patchset.

The btree conversion also dramatically reduces the runtime of the
refcount generation algorithm, because the code to delete all bag
records that end at a given agblock now only has to delete one record
instead of (using the example above) 100k records.  As an added benefit,
record deletion now gives back the unused xfile space, which it did not
do previously.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'repair-refcount-scalability-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: port refcount repair to the new refcount bag structure
  xfs: create refcount bag structure for btree repairs
  xfs: define an in-memory btree for storing refcount bag info during repairs
parents fd43925c 7fbaab57
...@@ -198,6 +198,8 @@ xfs-y += $(addprefix scrub/, \ ...@@ -198,6 +198,8 @@ xfs-y += $(addprefix scrub/, \
inode_repair.o \ inode_repair.o \
newbt.o \ newbt.o \
nlinks_repair.o \ nlinks_repair.o \
rcbag_btree.o \
rcbag.o \
reap.o \ reap.o \
refcount_repair.o \ refcount_repair.o \
repair.o \ repair.o \
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"
#include "xfs_error.h"
#include "scrub/scrub.h"
#include "scrub/rcbag_btree.h"
#include "scrub/rcbag.h"
#include "scrub/trace.h"
struct rcbag {
struct xfs_mount *mp;
struct xfbtree xfbtree;
uint64_t nr_items;
};
int
rcbag_init(
struct xfs_mount *mp,
struct xfs_buftarg *btp,
struct rcbag **bagp)
{
struct rcbag *bag;
int error;
bag = kzalloc(sizeof(struct rcbag), XCHK_GFP_FLAGS);
if (!bag)
return -ENOMEM;
bag->nr_items = 0;
bag->mp = mp;
error = rcbagbt_mem_init(mp, &bag->xfbtree, btp);
if (error)
goto out_bag;
*bagp = bag;
return 0;
out_bag:
kfree(bag);
return error;
}
void
rcbag_free(
struct rcbag **bagp)
{
struct rcbag *bag = *bagp;
xfbtree_destroy(&bag->xfbtree);
kfree(bag);
*bagp = NULL;
}
/* Track an rmap in the refcount bag. */
int
rcbag_add(
struct rcbag *bag,
struct xfs_trans *tp,
const struct xfs_rmap_irec *rmap)
{
struct rcbag_rec bagrec;
struct xfs_mount *mp = bag->mp;
struct xfs_btree_cur *cur;
int has;
int error;
cur = rcbagbt_mem_cursor(mp, tp, &bag->xfbtree);
error = rcbagbt_lookup_eq(cur, rmap, &has);
if (error)
goto out_cur;
if (has) {
error = rcbagbt_get_rec(cur, &bagrec, &has);
if (error)
goto out_cur;
if (!has) {
error = -EFSCORRUPTED;
goto out_cur;
}
bagrec.rbg_refcount++;
error = rcbagbt_update(cur, &bagrec);
if (error)
goto out_cur;
} else {
bagrec.rbg_startblock = rmap->rm_startblock;
bagrec.rbg_blockcount = rmap->rm_blockcount;
bagrec.rbg_refcount = 1;
error = rcbagbt_insert(cur, &bagrec, &has);
if (error)
goto out_cur;
if (!has) {
error = -EFSCORRUPTED;
goto out_cur;
}
}
xfs_btree_del_cursor(cur, 0);
error = xfbtree_trans_commit(&bag->xfbtree, tp);
if (error)
return error;
bag->nr_items++;
return 0;
out_cur:
xfs_btree_del_cursor(cur, error);
xfbtree_trans_cancel(&bag->xfbtree, tp);
return error;
}
/* Return the number of records in the bag. */
uint64_t
rcbag_count(
const struct rcbag *rcbag)
{
return rcbag->nr_items;
}
static inline uint32_t rcbag_rec_next_bno(const struct rcbag_rec *r)
{
return r->rbg_startblock + r->rbg_blockcount;
}
/*
* Find the next block where the refcount changes, given the next rmap we
* looked at and the ones we're already tracking.
*/
int
rcbag_next_edge(
struct rcbag *bag,
struct xfs_trans *tp,
const struct xfs_rmap_irec *next_rmap,
bool next_valid,
uint32_t *next_bnop)
{
struct rcbag_rec bagrec;
struct xfs_mount *mp = bag->mp;
struct xfs_btree_cur *cur;
uint32_t next_bno = NULLAGBLOCK;
int has;
int error;
if (next_valid)
next_bno = next_rmap->rm_startblock;
cur = rcbagbt_mem_cursor(mp, tp, &bag->xfbtree);
error = xfs_btree_goto_left_edge(cur);
if (error)
goto out_cur;
while (true) {
error = xfs_btree_increment(cur, 0, &has);
if (error)
goto out_cur;
if (!has)
break;
error = rcbagbt_get_rec(cur, &bagrec, &has);
if (error)
goto out_cur;
if (!has) {
error = -EFSCORRUPTED;
goto out_cur;
}
next_bno = min(next_bno, rcbag_rec_next_bno(&bagrec));
}
/*
* We should have found /something/ because either next_rrm is the next
* interesting rmap to look at after emitting this refcount extent, or
* there are other rmaps in rmap_bag contributing to the current
* sharing count. But if something is seriously wrong, bail out.
*/
if (next_bno == NULLAGBLOCK) {
error = -EFSCORRUPTED;
goto out_cur;
}
xfs_btree_del_cursor(cur, 0);
*next_bnop = next_bno;
return 0;
out_cur:
xfs_btree_del_cursor(cur, error);
return error;
}
/* Pop all refcount bag records that end at next_bno */
int
rcbag_remove_ending_at(
struct rcbag *bag,
struct xfs_trans *tp,
uint32_t next_bno)
{
struct rcbag_rec bagrec;
struct xfs_mount *mp = bag->mp;
struct xfs_btree_cur *cur;
int has;
int error;
/* go to the right edge of the tree */
cur = rcbagbt_mem_cursor(mp, tp, &bag->xfbtree);
memset(&cur->bc_rec, 0xFF, sizeof(cur->bc_rec));
error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, &has);
if (error)
goto out_cur;
while (true) {
error = xfs_btree_decrement(cur, 0, &has);
if (error)
goto out_cur;
if (!has)
break;
error = rcbagbt_get_rec(cur, &bagrec, &has);
if (error)
goto out_cur;
if (!has) {
error = -EFSCORRUPTED;
goto out_cur;
}
if (rcbag_rec_next_bno(&bagrec) != next_bno)
continue;
error = xfs_btree_delete(cur, &has);
if (error)
goto out_cur;
if (!has) {
error = -EFSCORRUPTED;
goto out_cur;
}
bag->nr_items -= bagrec.rbg_refcount;
}
xfs_btree_del_cursor(cur, 0);
return xfbtree_trans_commit(&bag->xfbtree, tp);
out_cur:
xfs_btree_del_cursor(cur, error);
xfbtree_trans_cancel(&bag->xfbtree, tp);
return error;
}
/* Dump the rcbag. */
void
rcbag_dump(
struct rcbag *bag,
struct xfs_trans *tp)
{
struct rcbag_rec bagrec;
struct xfs_mount *mp = bag->mp;
struct xfs_btree_cur *cur;
unsigned long long nr = 0;
int has;
int error;
cur = rcbagbt_mem_cursor(mp, tp, &bag->xfbtree);
error = xfs_btree_goto_left_edge(cur);
if (error)
goto out_cur;
while (true) {
error = xfs_btree_increment(cur, 0, &has);
if (error)
goto out_cur;
if (!has)
break;
error = rcbagbt_get_rec(cur, &bagrec, &has);
if (error)
goto out_cur;
if (!has) {
error = -EFSCORRUPTED;
goto out_cur;
}
xfs_err(bag->mp, "[%llu]: bno 0x%x fsbcount 0x%x refcount 0x%llx\n",
nr++,
(unsigned int)bagrec.rbg_startblock,
(unsigned int)bagrec.rbg_blockcount,
(unsigned long long)bagrec.rbg_refcount);
}
out_cur:
xfs_btree_del_cursor(cur, error);
}
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_RCBAG_H__
#define __XFS_SCRUB_RCBAG_H__
struct xfs_mount;
struct rcbag;
struct xfs_buftarg;
int rcbag_init(struct xfs_mount *mp, struct xfs_buftarg *btp,
struct rcbag **bagp);
void rcbag_free(struct rcbag **bagp);
int rcbag_add(struct rcbag *bag, struct xfs_trans *tp,
const struct xfs_rmap_irec *rmap);
uint64_t rcbag_count(const struct rcbag *bag);
int rcbag_next_edge(struct rcbag *bag, struct xfs_trans *tp,
const struct xfs_rmap_irec *next_rmap, bool next_valid,
uint32_t *next_bnop);
int rcbag_remove_ending_at(struct rcbag *bag, struct xfs_trans *tp,
uint32_t next_bno);
void rcbag_dump(struct rcbag *bag, struct xfs_trans *tp);
#endif /* __XFS_SCRUB_RCBAG_H__ */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"
#include "xfs_error.h"
#include "scrub/rcbag_btree.h"
#include "scrub/trace.h"
static struct kmem_cache *rcbagbt_cur_cache;
STATIC void
rcbagbt_init_key_from_rec(
union xfs_btree_key *key,
const union xfs_btree_rec *rec)
{
struct rcbag_key *bag_key = (struct rcbag_key *)key;
const struct rcbag_rec *bag_rec = (const struct rcbag_rec *)rec;
BUILD_BUG_ON(sizeof(struct rcbag_key) > sizeof(union xfs_btree_key));
BUILD_BUG_ON(sizeof(struct rcbag_rec) > sizeof(union xfs_btree_rec));
bag_key->rbg_startblock = bag_rec->rbg_startblock;
bag_key->rbg_blockcount = bag_rec->rbg_blockcount;
}
STATIC void
rcbagbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
struct rcbag_rec *bag_rec = (struct rcbag_rec *)rec;
struct rcbag_rec *bag_irec = (struct rcbag_rec *)&cur->bc_rec;
bag_rec->rbg_startblock = bag_irec->rbg_startblock;
bag_rec->rbg_blockcount = bag_irec->rbg_blockcount;
bag_rec->rbg_refcount = bag_irec->rbg_refcount;
}
STATIC int64_t
rcbagbt_key_diff(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
struct rcbag_rec *rec = (struct rcbag_rec *)&cur->bc_rec;
const struct rcbag_key *kp = (const struct rcbag_key *)key;
if (kp->rbg_startblock > rec->rbg_startblock)
return 1;
if (kp->rbg_startblock < rec->rbg_startblock)
return -1;
if (kp->rbg_blockcount > rec->rbg_blockcount)
return 1;
if (kp->rbg_blockcount < rec->rbg_blockcount)
return -1;
return 0;
}
STATIC int64_t
rcbagbt_diff_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
const union xfs_btree_key *mask)
{
const struct rcbag_key *kp1 = (const struct rcbag_key *)k1;
const struct rcbag_key *kp2 = (const struct rcbag_key *)k2;
ASSERT(mask == NULL);
if (kp1->rbg_startblock > kp2->rbg_startblock)
return 1;
if (kp1->rbg_startblock < kp2->rbg_startblock)
return -1;
if (kp1->rbg_blockcount > kp2->rbg_blockcount)
return 1;
if (kp1->rbg_blockcount < kp2->rbg_blockcount)
return -1;
return 0;
}
STATIC int
rcbagbt_keys_inorder(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2)
{
const struct rcbag_key *kp1 = (const struct rcbag_key *)k1;
const struct rcbag_key *kp2 = (const struct rcbag_key *)k2;
if (kp1->rbg_startblock > kp2->rbg_startblock)
return 0;
if (kp1->rbg_startblock < kp2->rbg_startblock)
return 1;
if (kp1->rbg_blockcount > kp2->rbg_blockcount)
return 0;
if (kp1->rbg_blockcount < kp2->rbg_blockcount)
return 1;
return 0;
}
STATIC int
rcbagbt_recs_inorder(
struct xfs_btree_cur *cur,
const union xfs_btree_rec *r1,
const union xfs_btree_rec *r2)
{
const struct rcbag_rec *rp1 = (const struct rcbag_rec *)r1;
const struct rcbag_rec *rp2 = (const struct rcbag_rec *)r2;
if (rp1->rbg_startblock > rp2->rbg_startblock)
return 0;
if (rp1->rbg_startblock < rp2->rbg_startblock)
return 1;
if (rp1->rbg_blockcount > rp2->rbg_blockcount)
return 0;
if (rp1->rbg_blockcount < rp2->rbg_blockcount)
return 1;
return 0;
}
static xfs_failaddr_t
rcbagbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
xfs_failaddr_t fa;
unsigned int level;
unsigned int maxrecs;
if (!xfs_verify_magic(bp, block->bb_magic))
return __this_address;
fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
if (fa)
return fa;
level = be16_to_cpu(block->bb_level);
if (level >= rcbagbt_maxlevels_possible())
return __this_address;
maxrecs = rcbagbt_maxrecs(mp, XFBNO_BLOCKSIZE, level == 0);
return xfs_btree_memblock_verify(bp, maxrecs);
}
static void
rcbagbt_rw_verify(
struct xfs_buf *bp)
{
xfs_failaddr_t fa = rcbagbt_verify(bp);
if (fa)
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}
/* skip crc checks on in-memory btrees to save time */
static const struct xfs_buf_ops rcbagbt_mem_buf_ops = {
.name = "rcbagbt_mem",
.magic = { 0, cpu_to_be32(RCBAG_MAGIC) },
.verify_read = rcbagbt_rw_verify,
.verify_write = rcbagbt_rw_verify,
.verify_struct = rcbagbt_verify,
};
static const struct xfs_btree_ops rcbagbt_mem_ops = {
.name = "rcbag",
.type = XFS_BTREE_TYPE_MEM,
.rec_len = sizeof(struct rcbag_rec),
.key_len = sizeof(struct rcbag_key),
.ptr_len = XFS_BTREE_LONG_PTR_LEN,
.lru_refs = 1,
.statoff = XFS_STATS_CALC_INDEX(xs_rcbag_2),
.dup_cursor = xfbtree_dup_cursor,
.set_root = xfbtree_set_root,
.alloc_block = xfbtree_alloc_block,
.free_block = xfbtree_free_block,
.get_minrecs = xfbtree_get_minrecs,
.get_maxrecs = xfbtree_get_maxrecs,
.init_key_from_rec = rcbagbt_init_key_from_rec,
.init_rec_from_cur = rcbagbt_init_rec_from_cur,
.init_ptr_from_cur = xfbtree_init_ptr_from_cur,
.key_diff = rcbagbt_key_diff,
.buf_ops = &rcbagbt_mem_buf_ops,
.diff_two_keys = rcbagbt_diff_two_keys,
.keys_inorder = rcbagbt_keys_inorder,
.recs_inorder = rcbagbt_recs_inorder,
};
/* Create a cursor for an in-memory btree. */
struct xfs_btree_cur *
rcbagbt_mem_cursor(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfbtree *xfbtree)
{
struct xfs_btree_cur *cur;
cur = xfs_btree_alloc_cursor(mp, tp, &rcbagbt_mem_ops,
rcbagbt_maxlevels_possible(), rcbagbt_cur_cache);
cur->bc_mem.xfbtree = xfbtree;
cur->bc_nlevels = xfbtree->nlevels;
return cur;
}
/* Create an in-memory refcount bag btree. */
int
rcbagbt_mem_init(
struct xfs_mount *mp,
struct xfbtree *xfbt,
struct xfs_buftarg *btp)
{
xfbt->owner = 0;
return xfbtree_init(mp, xfbt, btp, &rcbagbt_mem_ops);
}
/* Calculate number of records in a refcount bag btree block. */
static inline unsigned int
rcbagbt_block_maxrecs(
unsigned int blocklen,
bool leaf)
{
if (leaf)
return blocklen / sizeof(struct rcbag_rec);
return blocklen /
(sizeof(struct rcbag_key) + sizeof(rcbag_ptr_t));
}
/*
* Calculate number of records in an refcount bag btree block.
*/
unsigned int
rcbagbt_maxrecs(
struct xfs_mount *mp,
unsigned int blocklen,
bool leaf)
{
blocklen -= RCBAG_BLOCK_LEN;
return rcbagbt_block_maxrecs(blocklen, leaf);
}
/* Compute the max possible height for refcount bag btrees. */
unsigned int
rcbagbt_maxlevels_possible(void)
{
unsigned int minrecs[2];
unsigned int blocklen;
blocklen = XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
minrecs[0] = rcbagbt_block_maxrecs(blocklen, true) / 2;
minrecs[1] = rcbagbt_block_maxrecs(blocklen, false) / 2;
return xfs_btree_space_to_height(minrecs, ULLONG_MAX);
}
/* Calculate the refcount bag btree size for some records. */
unsigned long long
rcbagbt_calc_size(
unsigned long long nr_records)
{
unsigned int minrecs[2];
unsigned int blocklen;
blocklen = XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
minrecs[0] = rcbagbt_block_maxrecs(blocklen, true) / 2;
minrecs[1] = rcbagbt_block_maxrecs(blocklen, false) / 2;
return xfs_btree_calc_size(minrecs, nr_records);
}
int __init
rcbagbt_init_cur_cache(void)
{
rcbagbt_cur_cache = kmem_cache_create("xfs_rcbagbt_cur",
xfs_btree_cur_sizeof(rcbagbt_maxlevels_possible()),
0, 0, NULL);
if (!rcbagbt_cur_cache)
return -ENOMEM;
return 0;
}
void
rcbagbt_destroy_cur_cache(void)
{
kmem_cache_destroy(rcbagbt_cur_cache);
rcbagbt_cur_cache = NULL;
}
/* Look up the refcount bag record corresponding to this reverse mapping. */
int
rcbagbt_lookup_eq(
struct xfs_btree_cur *cur,
const struct xfs_rmap_irec *rmap,
int *success)
{
struct rcbag_rec *rec = (struct rcbag_rec *)&cur->bc_rec;
rec->rbg_startblock = rmap->rm_startblock;
rec->rbg_blockcount = rmap->rm_blockcount;
return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, success);
}
/* Get the data from the pointed-to record. */
int
rcbagbt_get_rec(
struct xfs_btree_cur *cur,
struct rcbag_rec *rec,
int *has)
{
union xfs_btree_rec *btrec;
int error;
error = xfs_btree_get_rec(cur, &btrec, has);
if (error || !(*has))
return error;
memcpy(rec, btrec, sizeof(struct rcbag_rec));
return 0;
}
/* Update the record referred to by cur to the value given. */
int
rcbagbt_update(
struct xfs_btree_cur *cur,
const struct rcbag_rec *rec)
{
union xfs_btree_rec btrec;
memcpy(&btrec, rec, sizeof(struct rcbag_rec));
return xfs_btree_update(cur, &btrec);
}
/* Update the record referred to by cur to the value given. */
int
rcbagbt_insert(
struct xfs_btree_cur *cur,
const struct rcbag_rec *rec,
int *success)
{
struct rcbag_rec *btrec = (struct rcbag_rec *)&cur->bc_rec;
memcpy(btrec, rec, sizeof(struct rcbag_rec));
return xfs_btree_insert(cur, success);
}
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_RCBAG_BTREE_H__
#define __XFS_SCRUB_RCBAG_BTREE_H__
#ifdef CONFIG_XFS_BTREE_IN_MEM
struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
#define RCBAG_MAGIC 0x74826671 /* 'JRBG' */
struct rcbag_key {
uint32_t rbg_startblock;
uint32_t rbg_blockcount;
};
struct rcbag_rec {
uint32_t rbg_startblock;
uint32_t rbg_blockcount;
uint64_t rbg_refcount;
};
typedef __be64 rcbag_ptr_t;
/* reflinks only exist on crc enabled filesystems */
#define RCBAG_BLOCK_LEN XFS_BTREE_LBLOCK_CRC_LEN
/*
* Record, key, and pointer address macros for btree blocks.
*
* (note that some of these may appear unused, but they are used in userspace)
*/
#define RCBAG_REC_ADDR(block, index) \
((struct rcbag_rec *) \
((char *)(block) + RCBAG_BLOCK_LEN + \
(((index) - 1) * sizeof(struct rcbag_rec))))
#define RCBAG_KEY_ADDR(block, index) \
((struct rcbag_key *) \
((char *)(block) + RCBAG_BLOCK_LEN + \
((index) - 1) * sizeof(struct rcbag_key)))
#define RCBAG_PTR_ADDR(block, index, maxrecs) \
((rcbag_ptr_t *) \
((char *)(block) + RCBAG_BLOCK_LEN + \
(maxrecs) * sizeof(struct rcbag_key) + \
((index) - 1) * sizeof(rcbag_ptr_t)))
unsigned int rcbagbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen,
bool leaf);
unsigned long long rcbagbt_calc_size(unsigned long long nr_records);
unsigned int rcbagbt_maxlevels_possible(void);
int __init rcbagbt_init_cur_cache(void);
void rcbagbt_destroy_cur_cache(void);
struct xfs_btree_cur *rcbagbt_mem_cursor(struct xfs_mount *mp,
struct xfs_trans *tp, struct xfbtree *xfbtree);
int rcbagbt_mem_init(struct xfs_mount *mp, struct xfbtree *xfbtree,
struct xfs_buftarg *btp);
int rcbagbt_lookup_eq(struct xfs_btree_cur *cur,
const struct xfs_rmap_irec *rmap, int *success);
int rcbagbt_get_rec(struct xfs_btree_cur *cur, struct rcbag_rec *rec, int *has);
int rcbagbt_update(struct xfs_btree_cur *cur, const struct rcbag_rec *rec);
int rcbagbt_insert(struct xfs_btree_cur *cur, const struct rcbag_rec *rec,
int *success);
#else
# define rcbagbt_init_cur_cache() 0
# define rcbagbt_destroy_cur_cache() ((void)0)
#endif /* CONFIG_XFS_BTREE_IN_MEM */
#endif /* __XFS_SCRUB_RCBAG_BTREE_H__ */
...@@ -7,8 +7,10 @@ ...@@ -7,8 +7,10 @@
#include "xfs_fs.h" #include "xfs_fs.h"
#include "xfs_shared.h" #include "xfs_shared.h"
#include "xfs_format.h" #include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h" #include "xfs_trans_resv.h"
#include "xfs_mount.h" #include "xfs_mount.h"
#include "xfs_trans.h"
#include "xfs_ag.h" #include "xfs_ag.h"
#include "xfs_btree.h" #include "xfs_btree.h"
#include "xfs_rmap.h" #include "xfs_rmap.h"
...@@ -17,6 +19,7 @@ ...@@ -17,6 +19,7 @@
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/btree.h" #include "scrub/btree.h"
#include "scrub/trace.h" #include "scrub/trace.h"
#include "scrub/repair.h"
/* /*
* Set us up to scrub reference count btrees. * Set us up to scrub reference count btrees.
...@@ -27,6 +30,15 @@ xchk_setup_ag_refcountbt( ...@@ -27,6 +30,15 @@ xchk_setup_ag_refcountbt(
{ {
if (xchk_need_intent_drain(sc)) if (xchk_need_intent_drain(sc))
xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
if (xchk_could_repair(sc)) {
int error;
error = xrep_setup_ag_refcountbt(sc);
if (error)
return error;
}
return xchk_setup_ag_btree(sc, false); return xchk_setup_ag_btree(sc, false);
} }
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "scrub/xfarray.h" #include "scrub/xfarray.h"
#include "scrub/newbt.h" #include "scrub/newbt.h"
#include "scrub/reap.h" #include "scrub/reap.h"
#include "scrub/rcbag.h"
/* /*
* Rebuilding the Reference Count Btree * Rebuilding the Reference Count Btree
...@@ -98,12 +99,6 @@ ...@@ -98,12 +99,6 @@
* insert all the records. * insert all the records.
*/ */
/* The only parts of the rmap that we care about for computing refcounts. */
struct xrep_refc_rmap {
xfs_agblock_t startblock;
xfs_extlen_t blockcount;
} __packed;
struct xrep_refc { struct xrep_refc {
/* refcount extents */ /* refcount extents */
struct xfarray *refcount_records; struct xfarray *refcount_records;
...@@ -123,6 +118,20 @@ struct xrep_refc { ...@@ -123,6 +118,20 @@ struct xrep_refc {
xfs_extlen_t btblocks; xfs_extlen_t btblocks;
}; };
/* Set us up to repair refcount btrees. */
int
xrep_setup_ag_refcountbt(
struct xfs_scrub *sc)
{
char *descr;
int error;
descr = xchk_xfile_ag_descr(sc, "rmap record bag");
error = xrep_setup_xfbtree(sc, descr);
kfree(descr);
return error;
}
/* Check for any obvious conflicts with this shared/CoW staging extent. */ /* Check for any obvious conflicts with this shared/CoW staging extent. */
STATIC int STATIC int
xrep_refc_check_ext( xrep_refc_check_ext(
...@@ -224,10 +233,9 @@ xrep_refc_rmap_shareable( ...@@ -224,10 +233,9 @@ xrep_refc_rmap_shareable(
STATIC int STATIC int
xrep_refc_walk_rmaps( xrep_refc_walk_rmaps(
struct xrep_refc *rr, struct xrep_refc *rr,
struct xrep_refc_rmap *rrm, struct xfs_rmap_irec *rmap,
bool *have_rec) bool *have_rec)
{ {
struct xfs_rmap_irec rmap;
struct xfs_btree_cur *cur = rr->sc->sa.rmap_cur; struct xfs_btree_cur *cur = rr->sc->sa.rmap_cur;
struct xfs_mount *mp = cur->bc_mp; struct xfs_mount *mp = cur->bc_mp;
int have_gt; int have_gt;
...@@ -251,7 +259,7 @@ xrep_refc_walk_rmaps( ...@@ -251,7 +259,7 @@ xrep_refc_walk_rmaps(
if (!have_gt) if (!have_gt)
return 0; return 0;
error = xfs_rmap_get_rec(cur, &rmap, &have_gt); error = xfs_rmap_get_rec(cur, rmap, &have_gt);
if (error) if (error)
return error; return error;
if (XFS_IS_CORRUPT(mp, !have_gt)) { if (XFS_IS_CORRUPT(mp, !have_gt)) {
...@@ -259,23 +267,22 @@ xrep_refc_walk_rmaps( ...@@ -259,23 +267,22 @@ xrep_refc_walk_rmaps(
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
if (rmap.rm_owner == XFS_RMAP_OWN_COW) { if (rmap->rm_owner == XFS_RMAP_OWN_COW) {
error = xrep_refc_stash_cow(rr, rmap.rm_startblock, error = xrep_refc_stash_cow(rr, rmap->rm_startblock,
rmap.rm_blockcount); rmap->rm_blockcount);
if (error) if (error)
return error; return error;
} else if (rmap.rm_owner == XFS_RMAP_OWN_REFC) { } else if (rmap->rm_owner == XFS_RMAP_OWN_REFC) {
/* refcountbt block, dump it when we're done. */ /* refcountbt block, dump it when we're done. */
rr->btblocks += rmap.rm_blockcount; rr->btblocks += rmap->rm_blockcount;
error = xagb_bitmap_set(&rr->old_refcountbt_blocks, error = xagb_bitmap_set(&rr->old_refcountbt_blocks,
rmap.rm_startblock, rmap.rm_blockcount); rmap->rm_startblock,
rmap->rm_blockcount);
if (error) if (error)
return error; return error;
} }
} while (!xrep_refc_rmap_shareable(mp, &rmap)); } while (!xrep_refc_rmap_shareable(mp, rmap));
rrm->startblock = rmap.rm_startblock;
rrm->blockcount = rmap.rm_blockcount;
*have_rec = true; *have_rec = true;
return 0; return 0;
} }
...@@ -357,45 +364,6 @@ xrep_refc_sort_records( ...@@ -357,45 +364,6 @@ xrep_refc_sort_records(
return error; return error;
} }
#define RRM_NEXT(r) ((r).startblock + (r).blockcount)
/*
* Find the next block where the refcount changes, given the next rmap we
* looked at and the ones we're already tracking.
*/
static inline int
xrep_refc_next_edge(
struct xfarray *rmap_bag,
struct xrep_refc_rmap *next_rrm,
bool next_valid,
xfs_agblock_t *nbnop)
{
struct xrep_refc_rmap rrm;
xfarray_idx_t array_cur = XFARRAY_CURSOR_INIT;
xfs_agblock_t nbno = NULLAGBLOCK;
int error;
if (next_valid)
nbno = next_rrm->startblock;
while ((error = xfarray_iter(rmap_bag, &array_cur, &rrm)) == 1)
nbno = min_t(xfs_agblock_t, nbno, RRM_NEXT(rrm));
if (error)
return error;
/*
* We should have found /something/ because either next_rrm is the next
* interesting rmap to look at after emitting this refcount extent, or
* there are other rmaps in rmap_bag contributing to the current
* sharing count. But if something is seriously wrong, bail out.
*/
if (nbno == NULLAGBLOCK)
return -EFSCORRUPTED;
*nbnop = nbno;
return 0;
}
/* /*
* Walk forward through the rmap btree to collect all rmaps starting at * Walk forward through the rmap btree to collect all rmaps starting at
* @bno in @rmap_bag. These represent the file(s) that share ownership of * @bno in @rmap_bag. These represent the file(s) that share ownership of
...@@ -405,22 +373,21 @@ xrep_refc_next_edge( ...@@ -405,22 +373,21 @@ xrep_refc_next_edge(
static int static int
xrep_refc_push_rmaps_at( xrep_refc_push_rmaps_at(
struct xrep_refc *rr, struct xrep_refc *rr,
struct xfarray *rmap_bag, struct rcbag *rcstack,
xfs_agblock_t bno, xfs_agblock_t bno,
struct xrep_refc_rmap *rrm, struct xfs_rmap_irec *rmap,
bool *have, bool *have)
uint64_t *stack_sz)
{ {
struct xfs_scrub *sc = rr->sc; struct xfs_scrub *sc = rr->sc;
int have_gt; int have_gt;
int error; int error;
while (*have && rrm->startblock == bno) { while (*have && rmap->rm_startblock == bno) {
error = xfarray_store_anywhere(rmap_bag, rrm); error = rcbag_add(rcstack, rr->sc->tp, rmap);
if (error) if (error)
return error; return error;
(*stack_sz)++;
error = xrep_refc_walk_rmaps(rr, rrm, have); error = xrep_refc_walk_rmaps(rr, rmap, have);
if (error) if (error)
return error; return error;
} }
...@@ -441,12 +408,9 @@ STATIC int ...@@ -441,12 +408,9 @@ STATIC int
xrep_refc_find_refcounts( xrep_refc_find_refcounts(
struct xrep_refc *rr) struct xrep_refc *rr)
{ {
struct xrep_refc_rmap rrm;
struct xfs_scrub *sc = rr->sc; struct xfs_scrub *sc = rr->sc;
struct xfarray *rmap_bag; struct rcbag *rcstack;
char *descr; uint64_t old_stack_height;
uint64_t old_stack_sz;
uint64_t stack_sz = 0;
xfs_agblock_t sbno; xfs_agblock_t sbno;
xfs_agblock_t cbno; xfs_agblock_t cbno;
xfs_agblock_t nbno; xfs_agblock_t nbno;
...@@ -456,14 +420,11 @@ xrep_refc_find_refcounts( ...@@ -456,14 +420,11 @@ xrep_refc_find_refcounts(
xrep_ag_btcur_init(sc, &sc->sa); xrep_ag_btcur_init(sc, &sc->sa);
/* /*
* Set up a sparse array to store all the rmap records that we're * Set up a bag to store all the rmap records that we're tracking to
* tracking to generate a reference count record. If this exceeds * generate a reference count record. If the size of the bag exceeds
* MAXREFCOUNT, we clamp rc_refcount. * MAXREFCOUNT, we clamp rc_refcount.
*/ */
descr = xchk_xfile_ag_descr(sc, "rmap record bag"); error = rcbag_init(sc->mp, sc->xmbtp, &rcstack);
error = xfarray_create(descr, 0, sizeof(struct xrep_refc_rmap),
&rmap_bag);
kfree(descr);
if (error) if (error)
goto out_cur; goto out_cur;
...@@ -474,62 +435,54 @@ xrep_refc_find_refcounts( ...@@ -474,62 +435,54 @@ xrep_refc_find_refcounts(
/* Process reverse mappings into refcount data. */ /* Process reverse mappings into refcount data. */
while (xfs_btree_has_more_records(sc->sa.rmap_cur)) { while (xfs_btree_has_more_records(sc->sa.rmap_cur)) {
struct xfs_rmap_irec rmap;
/* Push all rmaps with pblk == sbno onto the stack */ /* Push all rmaps with pblk == sbno onto the stack */
error = xrep_refc_walk_rmaps(rr, &rrm, &have); error = xrep_refc_walk_rmaps(rr, &rmap, &have);
if (error) if (error)
goto out_bag; goto out_bag;
if (!have) if (!have)
break; break;
sbno = cbno = rrm.startblock; sbno = cbno = rmap.rm_startblock;
error = xrep_refc_push_rmaps_at(rr, rmap_bag, sbno, error = xrep_refc_push_rmaps_at(rr, rcstack, sbno, &rmap,
&rrm, &have, &stack_sz); &have);
if (error) if (error)
goto out_bag; goto out_bag;
/* Set nbno to the bno of the next refcount change */ /* Set nbno to the bno of the next refcount change */
error = xrep_refc_next_edge(rmap_bag, &rrm, have, &nbno); error = rcbag_next_edge(rcstack, sc->tp, &rmap, have, &nbno);
if (error) if (error)
goto out_bag; goto out_bag;
ASSERT(nbno > sbno); ASSERT(nbno > sbno);
old_stack_sz = stack_sz; old_stack_height = rcbag_count(rcstack);
/* While stack isn't empty... */ /* While stack isn't empty... */
while (stack_sz) { while (rcbag_count(rcstack) > 0) {
xfarray_idx_t array_cur = XFARRAY_CURSOR_INIT;
/* Pop all rmaps that end at nbno */ /* Pop all rmaps that end at nbno */
while ((error = xfarray_iter(rmap_bag, &array_cur, error = rcbag_remove_ending_at(rcstack, sc->tp, nbno);
&rrm)) == 1) {
if (RRM_NEXT(rrm) != nbno)
continue;
error = xfarray_unset(rmap_bag, array_cur - 1);
if (error)
goto out_bag;
stack_sz--;
}
if (error) if (error)
goto out_bag; goto out_bag;
/* Push array items that start at nbno */ /* Push array items that start at nbno */
error = xrep_refc_walk_rmaps(rr, &rrm, &have); error = xrep_refc_walk_rmaps(rr, &rmap, &have);
if (error) if (error)
goto out_bag; goto out_bag;
if (have) { if (have) {
error = xrep_refc_push_rmaps_at(rr, rmap_bag, error = xrep_refc_push_rmaps_at(rr, rcstack,
nbno, &rrm, &have, &stack_sz); nbno, &rmap, &have);
if (error) if (error)
goto out_bag; goto out_bag;
} }
/* Emit refcount if necessary */ /* Emit refcount if necessary */
ASSERT(nbno > cbno); ASSERT(nbno > cbno);
if (stack_sz != old_stack_sz) { if (rcbag_count(rcstack) != old_stack_height) {
if (old_stack_sz > 1) { if (old_stack_height > 1) {
error = xrep_refc_stash(rr, error = xrep_refc_stash(rr,
XFS_REFC_DOMAIN_SHARED, XFS_REFC_DOMAIN_SHARED,
cbno, nbno - cbno, cbno, nbno - cbno,
old_stack_sz); old_stack_height);
if (error) if (error)
goto out_bag; goto out_bag;
} }
...@@ -537,13 +490,13 @@ xrep_refc_find_refcounts( ...@@ -537,13 +490,13 @@ xrep_refc_find_refcounts(
} }
/* Stack empty, go find the next rmap */ /* Stack empty, go find the next rmap */
if (stack_sz == 0) if (rcbag_count(rcstack) == 0)
break; break;
old_stack_sz = stack_sz; old_stack_height = rcbag_count(rcstack);
sbno = nbno; sbno = nbno;
/* Set nbno to the bno of the next refcount change */ /* Set nbno to the bno of the next refcount change */
error = xrep_refc_next_edge(rmap_bag, &rrm, have, error = rcbag_next_edge(rcstack, sc->tp, &rmap, have,
&nbno); &nbno);
if (error) if (error)
goto out_bag; goto out_bag;
...@@ -552,14 +505,13 @@ xrep_refc_find_refcounts( ...@@ -552,14 +505,13 @@ xrep_refc_find_refcounts(
} }
} }
ASSERT(stack_sz == 0); ASSERT(rcbag_count(rcstack) == 0);
out_bag: out_bag:
xfarray_destroy(rmap_bag); rcbag_free(&rcstack);
out_cur: out_cur:
xchk_ag_btcur_free(&sc->sa); xchk_ag_btcur_free(&sc->sa);
return error; return error;
} }
#undef RRM_NEXT
/* Retrieve refcountbt data for bulk load. */ /* Retrieve refcountbt data for bulk load. */
STATIC int STATIC int
......
...@@ -89,6 +89,7 @@ int xrep_reset_perag_resv(struct xfs_scrub *sc); ...@@ -89,6 +89,7 @@ int xrep_reset_perag_resv(struct xfs_scrub *sc);
int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten); int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
int xrep_metadata_inode_forks(struct xfs_scrub *sc); int xrep_metadata_inode_forks(struct xfs_scrub *sc);
int xrep_setup_ag_rmapbt(struct xfs_scrub *sc); int xrep_setup_ag_rmapbt(struct xfs_scrub *sc);
int xrep_setup_ag_refcountbt(struct xfs_scrub *sc);
/* Repair setup functions */ /* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc); int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
...@@ -186,6 +187,7 @@ xrep_setup_nothing( ...@@ -186,6 +187,7 @@ xrep_setup_nothing(
} }
#define xrep_setup_ag_allocbt xrep_setup_nothing #define xrep_setup_ag_allocbt xrep_setup_nothing
#define xrep_setup_ag_rmapbt xrep_setup_nothing #define xrep_setup_ag_rmapbt xrep_setup_nothing
#define xrep_setup_ag_refcountbt xrep_setup_nothing
#define xrep_setup_inode(sc, imap) ((void)0) #define xrep_setup_inode(sc, imap) ((void)0)
......
...@@ -51,7 +51,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) ...@@ -51,7 +51,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{ "fibt2", xfsstats_offset(xs_rmap_2) }, { "fibt2", xfsstats_offset(xs_rmap_2) },
{ "rmapbt", xfsstats_offset(xs_refcbt_2) }, { "rmapbt", xfsstats_offset(xs_refcbt_2) },
{ "refcntbt", xfsstats_offset(xs_rmap_mem_2) }, { "refcntbt", xfsstats_offset(xs_rmap_mem_2) },
{ "rmapbt_mem", xfsstats_offset(xs_qm_dqreclaims)}, { "rmapbt_mem", xfsstats_offset(xs_rcbag_2) },
{ "rcbagbt", xfsstats_offset(xs_qm_dqreclaims)},
/* we print both series of quota information together */ /* we print both series of quota information together */
{ "qm", xfsstats_offset(xs_xstrat_bytes)}, { "qm", xfsstats_offset(xs_xstrat_bytes)},
}; };
......
...@@ -126,6 +126,7 @@ struct __xfsstats { ...@@ -126,6 +126,7 @@ struct __xfsstats {
uint32_t xs_rmap_2[__XBTS_MAX]; uint32_t xs_rmap_2[__XBTS_MAX];
uint32_t xs_refcbt_2[__XBTS_MAX]; uint32_t xs_refcbt_2[__XBTS_MAX];
uint32_t xs_rmap_mem_2[__XBTS_MAX]; uint32_t xs_rmap_mem_2[__XBTS_MAX];
uint32_t xs_rcbag_2[__XBTS_MAX];
uint32_t xs_qm_dqreclaims; uint32_t xs_qm_dqreclaims;
uint32_t xs_qm_dqreclaim_misses; uint32_t xs_qm_dqreclaim_misses;
uint32_t xs_qm_dquot_dups; uint32_t xs_qm_dquot_dups;
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include "xfs_dahash_test.h" #include "xfs_dahash_test.h"
#include "xfs_rtbitmap.h" #include "xfs_rtbitmap.h"
#include "scrub/stats.h" #include "scrub/stats.h"
#include "scrub/rcbag_btree.h"
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/fs_context.h> #include <linux/fs_context.h>
...@@ -2060,10 +2061,14 @@ xfs_init_caches(void) ...@@ -2060,10 +2061,14 @@ xfs_init_caches(void)
if (error) if (error)
goto out_destroy_log_ticket_cache; goto out_destroy_log_ticket_cache;
error = xfs_defer_init_item_caches(); error = rcbagbt_init_cur_cache();
if (error) if (error)
goto out_destroy_btree_cur_cache; goto out_destroy_btree_cur_cache;
error = xfs_defer_init_item_caches();
if (error)
goto out_destroy_rcbagbt_cur_cache;
xfs_da_state_cache = kmem_cache_create("xfs_da_state", xfs_da_state_cache = kmem_cache_create("xfs_da_state",
sizeof(struct xfs_da_state), sizeof(struct xfs_da_state),
0, 0, NULL); 0, 0, NULL);
...@@ -2220,6 +2225,8 @@ xfs_init_caches(void) ...@@ -2220,6 +2225,8 @@ xfs_init_caches(void)
kmem_cache_destroy(xfs_da_state_cache); kmem_cache_destroy(xfs_da_state_cache);
out_destroy_defer_item_cache: out_destroy_defer_item_cache:
xfs_defer_destroy_item_caches(); xfs_defer_destroy_item_caches();
out_destroy_rcbagbt_cur_cache:
rcbagbt_destroy_cur_cache();
out_destroy_btree_cur_cache: out_destroy_btree_cur_cache:
xfs_btree_destroy_cur_caches(); xfs_btree_destroy_cur_caches();
out_destroy_log_ticket_cache: out_destroy_log_ticket_cache:
...@@ -2257,6 +2264,7 @@ xfs_destroy_caches(void) ...@@ -2257,6 +2264,7 @@ xfs_destroy_caches(void)
kmem_cache_destroy(xfs_ifork_cache); kmem_cache_destroy(xfs_ifork_cache);
kmem_cache_destroy(xfs_da_state_cache); kmem_cache_destroy(xfs_da_state_cache);
xfs_defer_destroy_item_caches(); xfs_defer_destroy_item_caches();
rcbagbt_destroy_cur_cache();
xfs_btree_destroy_cur_caches(); xfs_btree_destroy_cur_caches();
kmem_cache_destroy(xfs_log_ticket_cache); kmem_cache_destroy(xfs_log_ticket_cache);
kmem_cache_destroy(xfs_buf_cache); kmem_cache_destroy(xfs_buf_cache);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment