Commit 27b2df98 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Kill for_each_btree_key()

for_each_btree_key() handles transaction restarts, like
for_each_btree_key2(), but only calls bch2_trans_begin() after a
transaction restart - for_each_btree_key2() wraps every loop iteration
in a transaction.

The for_each_btree_key() behaviour is problematic when it leads to
holding the SRCU lock that prevents key cache reclaim for an unbounded
amount of time - there's no real need to keep it around.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 8c066ede
......@@ -544,8 +544,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
u8 gen;
int ret;
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ({
/*
* Not a fsck error because this is checked/repaired by
* bch2_check_alloc_key() which runs later:
......@@ -572,8 +572,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
}
g.v.gens[offset] = gen;
}
bch2_trans_iter_exit(trans, &iter);
0;
}));
if (have_bucket_gens_key && !ret)
ret = commit_do(trans, NULL, NULL,
......@@ -582,8 +582,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
bch_err_fn(c, ret);
return ret;
}
......@@ -601,8 +600,8 @@ int bch2_alloc_read(struct bch_fs *c)
const struct bch_bucket_gens *g;
u64 b;
for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ret = for_each_btree_key2(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_PREFETCH, k, ({
u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
......@@ -624,13 +623,13 @@ int bch2_alloc_read(struct bch_fs *c)
b < min_t(u64, ca->mi.nbuckets, end);
b++)
*bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
}
bch2_trans_iter_exit(trans, &iter);
0;
}));
} else {
struct bch_alloc_v4 a;
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ({
/*
* Not a fsck error because this is checked/repaired by
* bch2_check_alloc_key() which runs later:
......@@ -641,16 +640,14 @@ int bch2_alloc_read(struct bch_fs *c)
ca = bch_dev_bkey_exists(c, k.k->p.inode);
*bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
}
bch2_trans_iter_exit(trans, &iter);
0;
}));
}
bch2_trans_put(trans);
up_read(&c->gc_lock);
if (ret)
bch_err_fn(c, ret);
bch_err_fn(c, ret);
return ret;
}
......
......@@ -1665,7 +1665,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
static int bch2_gc_reflink_start(struct bch_fs *c,
bool metadata_only)
{
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct reflink_gc *r;
......@@ -1674,30 +1673,30 @@ static int bch2_gc_reflink_start(struct bch_fs *c,
if (metadata_only)
return 0;
trans = bch2_trans_get(c);
c->reflink_gc_nr = 0;
for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
ret = bch2_trans_run(c,
for_each_btree_key2(trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ({
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
continue;
if (!refcount)
continue;
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r) {
ret = -BCH_ERR_ENOMEM_gc_reflink_start;
break;
}
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r) {
ret = -BCH_ERR_ENOMEM_gc_reflink_start;
break;
}
r->offset = k.k->p.offset;
r->size = k.k->size;
r->refcount = 0;
}
bch2_trans_iter_exit(trans, &iter);
r->offset = k.k->p.offset;
r->size = k.k->size;
r->refcount = 0;
0;
})));
bch2_trans_put(trans);
bch_err_fn(c, ret);
return ret;
}
......
......@@ -777,7 +777,7 @@ transaction_restart: \
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
#define for_each_btree_key(_trans, _iter, _btree_id, \
#define for_each_btree_key_old(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
......
......@@ -1833,7 +1833,6 @@ void bch2_fs_ec_flush(struct bch_fs *c)
int bch2_stripes_read(struct bch_fs *c)
{
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
const struct bch_stripe *s;
......@@ -1841,36 +1840,33 @@ int bch2_stripes_read(struct bch_fs *c)
unsigned i;
int ret;
for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
if (k.k->type != KEY_TYPE_stripe)
continue;
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
if (ret)
break;
s = bkey_s_c_to_stripe(k).v;
ret = bch2_trans_run(c,
for_each_btree_key2(trans, iter, BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k, ({
if (k.k->type != KEY_TYPE_stripe)
continue;
m = genradix_ptr(&c->stripes, k.k->p.offset);
m->sectors = le16_to_cpu(s->sectors);
m->algorithm = s->algorithm;
m->nr_blocks = s->nr_blocks;
m->nr_redundant = s->nr_redundant;
m->blocks_nonempty = 0;
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
if (ret)
break;
for (i = 0; i < s->nr_blocks; i++)
m->blocks_nonempty += !!stripe_blockcount_get(s, i);
s = bkey_s_c_to_stripe(k).v;
bch2_stripes_heap_insert(c, m, k.k->p.offset);
}
bch2_trans_iter_exit(trans, &iter);
m = genradix_ptr(&c->stripes, k.k->p.offset);
m->sectors = le16_to_cpu(s->sectors);
m->algorithm = s->algorithm;
m->nr_blocks = s->nr_blocks;
m->nr_redundant = s->nr_redundant;
m->blocks_nonempty = 0;
bch2_trans_put(trans);
for (i = 0; i < s->nr_blocks; i++)
m->blocks_nonempty += !!stripe_blockcount_get(s, i);
if (ret)
bch_err_fn(c, ret);
bch2_stripes_heap_insert(c, m, k.k->p.offset);
0;
})));
bch_err_fn(c, ret);
return ret;
}
......
......@@ -589,14 +589,13 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
u32 restart_count = trans->restart_count;
int ret;
w->recalculate_sums = false;
w->inodes.nr = 0;
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->p.offset != inum)
break;
......@@ -609,8 +608,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
return ret;
w->first_this_inode = true;
return trans_was_restarted(trans, restart_count);
return 0;
}
static struct inode_walker_entry *
......@@ -2146,19 +2144,14 @@ int bch2_check_directory_structure(struct bch_fs *c)
pathbuf path = { 0, };
int ret;
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
for_each_btree_key_old(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (!bkey_is_inode(k.k))
continue;
ret = bch2_inode_unpack(k, &u);
if (ret) {
/* Should have been caught earlier in fsck: */
bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
break;
}
BUG_ON(bch2_inode_unpack(k, &u));
if (u.bi_flags & BCH_INODE_unlinked)
continue;
......@@ -2170,6 +2163,7 @@ int bch2_check_directory_structure(struct bch_fs *c)
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
darray_exit(&path);
bch_err_fn(c, ret);
return ret;
}
......@@ -2255,47 +2249,42 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
struct nlink_table *t,
u64 start, u64 *end)
{
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_inode_unpacked u;
int ret = 0;
for_each_btree_key(trans, iter, BTREE_ID_inodes,
POS(0, start),
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (!bkey_is_inode(k.k))
continue;
/* Should never fail, checked by bch2_inode_invalid: */
BUG_ON(bch2_inode_unpack(k, &u));
/*
* Backpointer and directory structure checks are sufficient for
* directories, since they can't have hardlinks:
*/
if (S_ISDIR(u.bi_mode))
continue;
int ret = bch2_trans_run(c,
for_each_btree_key2(trans, iter, BTREE_ID_inodes,
POS(0, start),
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
if (!bkey_is_inode(k.k))
continue;
if (!u.bi_nlink)
continue;
/* Should never fail, checked by bch2_inode_invalid: */
BUG_ON(bch2_inode_unpack(k, &u));
ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
if (ret) {
*end = k.k->p.offset;
ret = 0;
break;
}
/*
* Backpointer and directory structure checks are sufficient for
* directories, since they can't have hardlinks:
*/
if (S_ISDIR(u.bi_mode))
continue;
}
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
if (!u.bi_nlink)
continue;
if (ret)
bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
if (ret) {
*end = k.k->p.offset;
ret = 0;
break;
}
0;
})));
bch_err_fn(c, ret);
return ret;
}
......@@ -2303,42 +2292,39 @@ noinline_for_stack
static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
u64 range_start, u64 range_end)
{
struct btree_trans *trans = bch2_trans_get(c);
struct snapshots_seen s;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_dirent d;
int ret;
snapshots_seen_init(&s);
for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
if (ret)
break;
switch (k.k->type) {
case KEY_TYPE_dirent:
d = bkey_s_c_to_dirent(k);
int ret = bch2_trans_run(c,
for_each_btree_key2(trans, iter, BTREE_ID_dirents, POS_MIN,
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
if (ret)
break;
if (d.v->d_type != DT_DIR &&
d.v->d_type != DT_SUBVOL)
inc_link(c, &s, links, range_start, range_end,
le64_to_cpu(d.v->d_inum),
bch2_snapshot_equiv(c, d.k->p.snapshot));
break;
}
}
bch2_trans_iter_exit(trans, &iter);
switch (k.k->type) {
case KEY_TYPE_dirent:
d = bkey_s_c_to_dirent(k);
if (ret)
bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
if (d.v->d_type != DT_DIR &&
d.v->d_type != DT_SUBVOL)
inc_link(c, &s, links, range_start, range_end,
le64_to_cpu(d.v->d_inum),
bch2_snapshot_equiv(c, d.k->p.snapshot));
break;
}
0;
})));
bch2_trans_put(trans);
snapshots_seen_exit(&s);
bch_err_fn(c, ret);
return ret;
}
......
......@@ -1168,29 +1168,29 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
* but we can't retry because the btree write buffer won't have been
* flushed and we'd spin:
*/
for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_lazy_rw,
may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
if (ret < 0)
break;
if (ret) {
if (!test_bit(BCH_FS_rw, &c->flags)) {
bch2_trans_unlock(trans);
bch2_fs_lazy_rw(c);
}
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass);
if (ret > 0) {
bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot);
ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
break;
/*
* We don't want to loop here: a transaction restart
* error here means we handled a transaction restart and
* we're actually done, but if we loop we'll retry the
* same key because the write buffer hasn't been flushed
* yet
*/
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
}
}
}
bch2_trans_iter_exit(trans, &iter);
ret;
}));
if (!ret && need_another_pass) {
ret = bch2_btree_write_buffer_flush_sync(trans);
......
......@@ -377,8 +377,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
io_opts->d.nr = 0;
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
ret = for_each_btree_key2(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
BTREE_ITER_ALL_SNAPSHOTS, k, ({
if (k.k->p.offset != extent_k.k->p.inode)
break;
......@@ -391,11 +391,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
ret = darray_push(&io_opts->d, e);
if (ret)
break;
}
bch2_trans_iter_exit(trans, &iter);
darray_push(&io_opts->d, e);
}));
io_opts->cur_inum = extent_k.k->p.inode;
}
......
......@@ -534,7 +534,8 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
keys->gap = keys->nr;
set_bit(BCH_FS_may_go_rw, &c->flags);
if (keys->nr || c->opts.fsck)
if (keys->nr || c->opts.fsck || !c->sb.clean)
return bch2_fs_read_write_early(c);
return 0;
}
......
......@@ -1410,19 +1410,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
}
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ret) {
ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ({
if (k.k->type != KEY_TYPE_snapshot)
continue;
snap = bkey_s_c_to_snapshot(k);
if (BCH_SNAPSHOT_DELETED(snap.v)) {
ret = snapshot_list_add(c, &deleted, k.k->p.offset);
if (ret)
break;
}
}
bch2_trans_iter_exit(trans, &iter);
BCH_SNAPSHOT_DELETED(snap.v)
? snapshot_list_add(c, &deleted, k.k->p.offset)
: 0;
}));
if (ret) {
bch_err_msg(c, ret, "walking snapshots");
......@@ -1469,18 +1466,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
bch2_trans_unlock(trans);
down_write(&c->snapshot_create_lock);
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ret) {
ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ({
u32 snapshot = k.k->p.offset;
u32 equiv = bch2_snapshot_equiv(c, snapshot);
if (equiv != snapshot)
snapshot_list_add(c, &deleted_interior, snapshot);
}
bch2_trans_iter_exit(trans, &iter);
equiv != snapshot
? snapshot_list_add(c, &deleted_interior, snapshot)
: 0;
}));
if (ret)
if (ret) {
bch_err_msg(c, ret, "walking snapshots");
goto err_create_lock;
}
/*
* Fixing children of deleted snapshots can't be done completely
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment