Commit 27b2df98 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Kill for_each_btree_key()

for_each_btree_key() handles transaction restarts, like
for_each_btree_key2(), but only calls bch2_trans_begin() after a
transaction restart - for_each_btree_key2() wraps every loop iteration
in a transaction.

The for_each_btree_key() behaviour is problematic when it leads to
holding the SRCU lock that prevents key cache reclaim for an unbounded
amount of time - there's no real need to keep it around.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 8c066ede
...@@ -544,8 +544,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) ...@@ -544,8 +544,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
u8 gen; u8 gen;
int ret; int ret;
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH, k, ({
/* /*
* Not a fsck error because this is checked/repaired by * Not a fsck error because this is checked/repaired by
* bch2_check_alloc_key() which runs later: * bch2_check_alloc_key() which runs later:
...@@ -572,8 +572,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) ...@@ -572,8 +572,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
} }
g.v.gens[offset] = gen; g.v.gens[offset] = gen;
} 0;
bch2_trans_iter_exit(trans, &iter); }));
if (have_bucket_gens_key && !ret) if (have_bucket_gens_key && !ret)
ret = commit_do(trans, NULL, NULL, ret = commit_do(trans, NULL, NULL,
...@@ -582,7 +582,6 @@ int bch2_bucket_gens_init(struct bch_fs *c) ...@@ -582,7 +582,6 @@ int bch2_bucket_gens_init(struct bch_fs *c)
bch2_trans_put(trans); bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }
...@@ -601,8 +600,8 @@ int bch2_alloc_read(struct bch_fs *c) ...@@ -601,8 +600,8 @@ int bch2_alloc_read(struct bch_fs *c)
const struct bch_bucket_gens *g; const struct bch_bucket_gens *g;
u64 b; u64 b;
for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN, ret = for_each_btree_key2(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH, k, ({
u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
...@@ -624,13 +623,13 @@ int bch2_alloc_read(struct bch_fs *c) ...@@ -624,13 +623,13 @@ int bch2_alloc_read(struct bch_fs *c)
b < min_t(u64, ca->mi.nbuckets, end); b < min_t(u64, ca->mi.nbuckets, end);
b++) b++)
*bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK]; *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
} 0;
bch2_trans_iter_exit(trans, &iter); }));
} else { } else {
struct bch_alloc_v4 a; struct bch_alloc_v4 a;
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH, k, ({
/* /*
* Not a fsck error because this is checked/repaired by * Not a fsck error because this is checked/repaired by
* bch2_check_alloc_key() which runs later: * bch2_check_alloc_key() which runs later:
...@@ -641,16 +640,14 @@ int bch2_alloc_read(struct bch_fs *c) ...@@ -641,16 +640,14 @@ int bch2_alloc_read(struct bch_fs *c)
ca = bch_dev_bkey_exists(c, k.k->p.inode); ca = bch_dev_bkey_exists(c, k.k->p.inode);
*bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
} 0;
bch2_trans_iter_exit(trans, &iter); }));
} }
bch2_trans_put(trans); bch2_trans_put(trans);
up_read(&c->gc_lock); up_read(&c->gc_lock);
if (ret)
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }
......
...@@ -1665,7 +1665,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) ...@@ -1665,7 +1665,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
static int bch2_gc_reflink_start(struct bch_fs *c, static int bch2_gc_reflink_start(struct bch_fs *c,
bool metadata_only) bool metadata_only)
{ {
struct btree_trans *trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct reflink_gc *r; struct reflink_gc *r;
...@@ -1674,11 +1673,11 @@ static int bch2_gc_reflink_start(struct bch_fs *c, ...@@ -1674,11 +1673,11 @@ static int bch2_gc_reflink_start(struct bch_fs *c,
if (metadata_only) if (metadata_only)
return 0; return 0;
trans = bch2_trans_get(c);
c->reflink_gc_nr = 0; c->reflink_gc_nr = 0;
for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, ret = bch2_trans_run(c,
BTREE_ITER_PREFETCH, k, ret) { for_each_btree_key2(trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ({
const __le64 *refcount = bkey_refcount_c(k); const __le64 *refcount = bkey_refcount_c(k);
if (!refcount) if (!refcount)
...@@ -1694,10 +1693,10 @@ static int bch2_gc_reflink_start(struct bch_fs *c, ...@@ -1694,10 +1693,10 @@ static int bch2_gc_reflink_start(struct bch_fs *c,
r->offset = k.k->p.offset; r->offset = k.k->p.offset;
r->size = k.k->size; r->size = k.k->size;
r->refcount = 0; r->refcount = 0;
} 0;
bch2_trans_iter_exit(trans, &iter); })));
bch2_trans_put(trans); bch_err_fn(c, ret);
return ret; return ret;
} }
......
...@@ -777,7 +777,7 @@ transaction_restart: \ ...@@ -777,7 +777,7 @@ transaction_restart: \
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags))) (_journal_seq), (_commit_flags)))
#define for_each_btree_key(_trans, _iter, _btree_id, \ #define for_each_btree_key_old(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \ _start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \ (_start), (_flags)); \
......
...@@ -1833,7 +1833,6 @@ void bch2_fs_ec_flush(struct bch_fs *c) ...@@ -1833,7 +1833,6 @@ void bch2_fs_ec_flush(struct bch_fs *c)
int bch2_stripes_read(struct bch_fs *c) int bch2_stripes_read(struct bch_fs *c)
{ {
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
const struct bch_stripe *s; const struct bch_stripe *s;
...@@ -1841,8 +1840,9 @@ int bch2_stripes_read(struct bch_fs *c) ...@@ -1841,8 +1840,9 @@ int bch2_stripes_read(struct bch_fs *c)
unsigned i; unsigned i;
int ret; int ret;
for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN, ret = bch2_trans_run(c,
BTREE_ITER_PREFETCH, k, ret) { for_each_btree_key2(trans, iter, BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k, ({
if (k.k->type != KEY_TYPE_stripe) if (k.k->type != KEY_TYPE_stripe)
continue; continue;
...@@ -1863,14 +1863,10 @@ int bch2_stripes_read(struct bch_fs *c) ...@@ -1863,14 +1863,10 @@ int bch2_stripes_read(struct bch_fs *c)
m->blocks_nonempty += !!stripe_blockcount_get(s, i); m->blocks_nonempty += !!stripe_blockcount_get(s, i);
bch2_stripes_heap_insert(c, m, k.k->p.offset); bch2_stripes_heap_insert(c, m, k.k->p.offset);
} 0;
bch2_trans_iter_exit(trans, &iter); })));
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }
......
...@@ -589,13 +589,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, ...@@ -589,13 +589,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
u32 restart_count = trans->restart_count;
int ret; int ret;
w->recalculate_sums = false; w->recalculate_sums = false;
w->inodes.nr = 0; w->inodes.nr = 0;
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
BTREE_ITER_ALL_SNAPSHOTS, k, ret) { BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->p.offset != inum) if (k.k->p.offset != inum)
break; break;
...@@ -609,8 +608,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, ...@@ -609,8 +608,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
return ret; return ret;
w->first_this_inode = true; w->first_this_inode = true;
return 0;
return trans_was_restarted(trans, restart_count);
} }
static struct inode_walker_entry * static struct inode_walker_entry *
...@@ -2146,19 +2144,14 @@ int bch2_check_directory_structure(struct bch_fs *c) ...@@ -2146,19 +2144,14 @@ int bch2_check_directory_structure(struct bch_fs *c)
pathbuf path = { 0, }; pathbuf path = { 0, };
int ret; int ret;
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, for_each_btree_key_old(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH| BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) { BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (!bkey_is_inode(k.k)) if (!bkey_is_inode(k.k))
continue; continue;
ret = bch2_inode_unpack(k, &u); BUG_ON(bch2_inode_unpack(k, &u));
if (ret) {
/* Should have been caught earlier in fsck: */
bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
break;
}
if (u.bi_flags & BCH_INODE_unlinked) if (u.bi_flags & BCH_INODE_unlinked)
continue; continue;
...@@ -2170,6 +2163,7 @@ int bch2_check_directory_structure(struct bch_fs *c) ...@@ -2170,6 +2163,7 @@ int bch2_check_directory_structure(struct bch_fs *c)
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans); bch2_trans_put(trans);
darray_exit(&path); darray_exit(&path);
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }
...@@ -2255,17 +2249,16 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, ...@@ -2255,17 +2249,16 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
struct nlink_table *t, struct nlink_table *t,
u64 start, u64 *end) u64 start, u64 *end)
{ {
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bch_inode_unpacked u; struct bch_inode_unpacked u;
int ret = 0;
for_each_btree_key(trans, iter, BTREE_ID_inodes, int ret = bch2_trans_run(c,
for_each_btree_key2(trans, iter, BTREE_ID_inodes,
POS(0, start), POS(0, start),
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH| BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) { BTREE_ITER_ALL_SNAPSHOTS, k, ({
if (!bkey_is_inode(k.k)) if (!bkey_is_inode(k.k))
continue; continue;
...@@ -2288,14 +2281,10 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, ...@@ -2288,14 +2281,10 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
ret = 0; ret = 0;
break; break;
} }
0;
})));
} bch_err_fn(c, ret);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
if (ret)
bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
return ret; return ret;
} }
...@@ -2303,19 +2292,18 @@ noinline_for_stack ...@@ -2303,19 +2292,18 @@ noinline_for_stack
static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
u64 range_start, u64 range_end) u64 range_start, u64 range_end)
{ {
struct btree_trans *trans = bch2_trans_get(c);
struct snapshots_seen s; struct snapshots_seen s;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_c_dirent d; struct bkey_s_c_dirent d;
int ret;
snapshots_seen_init(&s); snapshots_seen_init(&s);
for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, int ret = bch2_trans_run(c,
for_each_btree_key2(trans, iter, BTREE_ID_dirents, POS_MIN,
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH| BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) { BTREE_ITER_ALL_SNAPSHOTS, k, ({
ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
if (ret) if (ret)
break; break;
...@@ -2331,14 +2319,12 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links ...@@ -2331,14 +2319,12 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
bch2_snapshot_equiv(c, d.k->p.snapshot)); bch2_snapshot_equiv(c, d.k->p.snapshot));
break; break;
} }
} 0;
bch2_trans_iter_exit(trans, &iter); })));
if (ret)
bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
bch2_trans_put(trans);
snapshots_seen_exit(&s); snapshots_seen_exit(&s);
bch_err_fn(c, ret);
return ret; return ret;
} }
......
...@@ -1168,29 +1168,29 @@ int bch2_delete_dead_inodes(struct bch_fs *c) ...@@ -1168,29 +1168,29 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
* but we can't retry because the btree write buffer won't have been * but we can't retry because the btree write buffer won't have been
* flushed and we'd spin: * flushed and we'd spin:
*/ */
for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
ret = commit_do(trans, NULL, NULL, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
BCH_TRANS_COMMIT_no_enospc| ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass);
BCH_TRANS_COMMIT_lazy_rw, if (ret > 0) {
may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
if (ret < 0)
break;
if (ret) {
if (!test_bit(BCH_FS_rw, &c->flags)) {
bch2_trans_unlock(trans);
bch2_fs_lazy_rw(c);
}
bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot);
ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) /*
break; * We don't want to loop here: a transaction restart
* error here means we handled a transaction restart and
* we're actually done, but if we loop we'll retry the
* same key because the write buffer hasn't been flushed
* yet
*/
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
} }
} }
bch2_trans_iter_exit(trans, &iter);
ret;
}));
if (!ret && need_another_pass) { if (!ret && need_another_pass) {
ret = bch2_btree_write_buffer_flush_sync(trans); ret = bch2_btree_write_buffer_flush_sync(trans);
......
...@@ -377,8 +377,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, ...@@ -377,8 +377,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
io_opts->d.nr = 0; io_opts->d.nr = 0;
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), ret = for_each_btree_key2(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
BTREE_ITER_ALL_SNAPSHOTS, k, ret) { BTREE_ITER_ALL_SNAPSHOTS, k, ({
if (k.k->p.offset != extent_k.k->p.inode) if (k.k->p.offset != extent_k.k->p.inode)
break; break;
...@@ -391,11 +391,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, ...@@ -391,11 +391,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
bch2_inode_opts_get(&e.io_opts, trans->c, &inode); bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
ret = darray_push(&io_opts->d, e); darray_push(&io_opts->d, e);
if (ret) }));
break;
}
bch2_trans_iter_exit(trans, &iter);
io_opts->cur_inum = extent_k.k->p.inode; io_opts->cur_inum = extent_k.k->p.inode;
} }
......
...@@ -534,7 +534,8 @@ static int bch2_set_may_go_rw(struct bch_fs *c) ...@@ -534,7 +534,8 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
keys->gap = keys->nr; keys->gap = keys->nr;
set_bit(BCH_FS_may_go_rw, &c->flags); set_bit(BCH_FS_may_go_rw, &c->flags);
if (keys->nr || c->opts.fsck)
if (keys->nr || c->opts.fsck || !c->sb.clean)
return bch2_fs_read_write_early(c); return bch2_fs_read_write_early(c);
return 0; return 0;
} }
......
...@@ -1410,19 +1410,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ...@@ -1410,19 +1410,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err; goto err;
} }
for_each_btree_key(trans, iter, BTREE_ID_snapshots, ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ret) { POS_MIN, 0, k, ({
if (k.k->type != KEY_TYPE_snapshot) if (k.k->type != KEY_TYPE_snapshot)
continue; continue;
snap = bkey_s_c_to_snapshot(k); snap = bkey_s_c_to_snapshot(k);
if (BCH_SNAPSHOT_DELETED(snap.v)) { BCH_SNAPSHOT_DELETED(snap.v)
ret = snapshot_list_add(c, &deleted, k.k->p.offset); ? snapshot_list_add(c, &deleted, k.k->p.offset)
if (ret) : 0;
break; }));
}
}
bch2_trans_iter_exit(trans, &iter);
if (ret) { if (ret) {
bch_err_msg(c, ret, "walking snapshots"); bch_err_msg(c, ret, "walking snapshots");
...@@ -1469,18 +1466,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ...@@ -1469,18 +1466,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
down_write(&c->snapshot_create_lock); down_write(&c->snapshot_create_lock);
for_each_btree_key(trans, iter, BTREE_ID_snapshots, ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ret) { POS_MIN, 0, k, ({
u32 snapshot = k.k->p.offset; u32 snapshot = k.k->p.offset;
u32 equiv = bch2_snapshot_equiv(c, snapshot); u32 equiv = bch2_snapshot_equiv(c, snapshot);
if (equiv != snapshot) equiv != snapshot
snapshot_list_add(c, &deleted_interior, snapshot); ? snapshot_list_add(c, &deleted_interior, snapshot)
} : 0;
bch2_trans_iter_exit(trans, &iter); }));
if (ret) if (ret) {
bch_err_msg(c, ret, "walking snapshots");
goto err_create_lock; goto err_create_lock;
}
/* /*
* Fixing children of deleted snapshots can't be done completely * Fixing children of deleted snapshots can't be done completely
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment