Commit edfbba58 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Add btree node prefetching to bch2_btree_and_journal_walk()

bch2_btree_and_journal_walk() walks the btree overlaying keys from the
journal; it was introduced so that we could read in the alloc btree
prior to journal replay being done, when journalling of updates to
interior btree nodes was introduced.

But it didn't have btree node prefetching, which introduced a severe
regression with mount times, particularly on spinning rust. This patch
implements btree node prefetching for the btree + journal walk,
hopefully fixing that.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 2a3731e3
...@@ -1008,20 +1008,20 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, ...@@ -1008,20 +1008,20 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
} }
void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
const struct bkey_i *k, unsigned level) const struct bkey_i *k,
enum btree_id btree_id, unsigned level)
{ {
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
struct btree *b; struct btree *b;
BUG_ON(!btree_node_locked(iter, level + 1)); BUG_ON(iter && !btree_node_locked(iter, level + 1));
BUG_ON(level >= BTREE_MAX_DEPTH); BUG_ON(level >= BTREE_MAX_DEPTH);
b = btree_cache_find(bc, k); b = btree_cache_find(bc, k);
if (b) if (b)
return; return;
bch2_btree_node_fill(c, iter, k, iter->btree_id, bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false);
level, SIX_LOCK_read, false);
} }
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
......
...@@ -32,7 +32,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *, ...@@ -32,7 +32,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
struct btree *, enum btree_node_sibling); struct btree *, enum btree_node_sibling);
void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *, void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
const struct bkey_i *, unsigned); const struct bkey_i *, enum btree_id, unsigned);
void bch2_fs_btree_cache_exit(struct bch_fs *); void bch2_fs_btree_cache_exit(struct bch_fs *);
int bch2_fs_btree_cache_init(struct bch_fs *); int bch2_fs_btree_cache_init(struct bch_fs *);
......
...@@ -1067,7 +1067,8 @@ static void btree_iter_prefetch(struct btree_iter *iter) ...@@ -1067,7 +1067,8 @@ static void btree_iter_prefetch(struct btree_iter *iter)
break; break;
bch2_bkey_buf_unpack(&tmp, c, l->b, k); bch2_bkey_buf_unpack(&tmp, c, l->b, k);
bch2_btree_node_prefetch(c, iter, tmp.k, iter->level - 1); bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id,
iter->level - 1);
} }
if (!was_locked) if (!was_locked)
......
...@@ -206,6 +206,31 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i ...@@ -206,6 +206,31 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i
/* Walk btree, overlaying keys from the journal: */ /* Walk btree, overlaying keys from the journal: */
static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
struct btree_and_journal_iter iter)
{
unsigned i = 0, nr = b->c.level > 1 ? 2 : 16;
struct bkey_s_c k;
struct bkey_buf tmp;
BUG_ON(!b->c.level);
bch2_bkey_buf_init(&tmp);
while (i < nr &&
(k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_bkey_buf_reassemble(&tmp, c, k);
bch2_btree_node_prefetch(c, NULL, tmp.k,
b->c.btree_id, b->c.level - 1);
bch2_btree_and_journal_iter_advance(&iter);
i++;
}
bch2_bkey_buf_exit(&tmp, c);
}
static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b, static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
struct journal_keys *journal_keys, struct journal_keys *journal_keys,
enum btree_id btree_id, enum btree_id btree_id,
...@@ -214,8 +239,11 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b ...@@ -214,8 +239,11 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
{ {
struct btree_and_journal_iter iter; struct btree_and_journal_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_buf tmp;
struct btree *child;
int ret = 0; int ret = 0;
bch2_bkey_buf_init(&tmp);
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
...@@ -224,23 +252,19 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b ...@@ -224,23 +252,19 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
break; break;
if (b->c.level) { if (b->c.level) {
struct btree *child;
struct bkey_buf tmp;
bch2_bkey_buf_init(&tmp);
bch2_bkey_buf_reassemble(&tmp, c, k); bch2_bkey_buf_reassemble(&tmp, c, k);
k = bkey_i_to_s_c(tmp.k);
bch2_btree_and_journal_iter_advance(&iter); bch2_btree_and_journal_iter_advance(&iter);
child = bch2_btree_node_get_noiter(c, tmp.k, child = bch2_btree_node_get_noiter(c, tmp.k,
b->c.btree_id, b->c.level - 1); b->c.btree_id, b->c.level - 1);
bch2_bkey_buf_exit(&tmp, c);
ret = PTR_ERR_OR_ZERO(child); ret = PTR_ERR_OR_ZERO(child);
if (ret) if (ret)
break; break;
btree_and_journal_iter_prefetch(c, b, iter);
ret = (node_fn ? node_fn(c, b) : 0) ?: ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, child, bch2_btree_and_journal_walk_recurse(c, child,
journal_keys, btree_id, node_fn, key_fn); journal_keys, btree_id, node_fn, key_fn);
...@@ -253,6 +277,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b ...@@ -253,6 +277,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
} }
} }
bch2_bkey_buf_exit(&tmp, c);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment