Commit ec7ccbde authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Fix CPU usage in journal read path

In journal_entry_add(), we were repeatedly scanning the journal entries
radix tree to scan for old entries that can be freed, with O(n^2)
behaviour. This patch tweaks things to remember the previous last_seq,
so we don't have to scan for entries to free from the start.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 6e811bbb
...@@ -16,17 +16,17 @@ ...@@ -16,17 +16,17 @@
#include "replicas.h" #include "replicas.h"
#include "trace.h" #include "trace.h"
static inline u32 journal_entry_radix_idx(struct bch_fs *c, static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
struct jset *j)
{ {
return (le64_to_cpu(j->seq) - c->journal_entries_base_seq) & (~0U >> 1); return (seq - c->journal_entries_base_seq) & (~0U >> 1);
} }
static void __journal_replay_free(struct bch_fs *c, static void __journal_replay_free(struct bch_fs *c,
struct journal_replay *i) struct journal_replay *i)
{ {
struct journal_replay **p = struct journal_replay **p =
genradix_ptr(&c->journal_entries, journal_entry_radix_idx(c, &i->j)); genradix_ptr(&c->journal_entries,
journal_entry_radix_idx(c, le64_to_cpu(i->j.seq)));
BUG_ON(*p != i); BUG_ON(*p != i);
*p = NULL; *p = NULL;
...@@ -44,6 +44,7 @@ static void journal_replay_free(struct bch_fs *c, struct journal_replay *i) ...@@ -44,6 +44,7 @@ static void journal_replay_free(struct bch_fs *c, struct journal_replay *i)
struct journal_list { struct journal_list {
struct closure cl; struct closure cl;
u64 last_seq;
struct mutex lock; struct mutex lock;
int ret; int ret;
}; };
...@@ -64,55 +65,50 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, ...@@ -64,55 +65,50 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
struct journal_replay **_i, *i, *dup; struct journal_replay **_i, *i, *dup;
struct journal_ptr *ptr; struct journal_ptr *ptr;
size_t bytes = vstruct_bytes(j); size_t bytes = vstruct_bytes(j);
u64 last_seq = 0; u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0;
int ret = JOURNAL_ENTRY_ADD_OK; int ret = JOURNAL_ENTRY_ADD_OK;
/* Is this entry older than the range we need? */
if (!c->opts.read_entire_journal &&
le64_to_cpu(j->seq) < jlist->last_seq)
return JOURNAL_ENTRY_ADD_OUT_OF_RANGE;
/* /*
* Xarrays are indexed by a ulong, not a u64, so we can't index them by * genradixes are indexed by a ulong, not a u64, so we can't index them
* sequence number directly: * by sequence number directly: Assume instead that they will all fall
* Assume instead that they will all fall within the range of +-2billion * within the range of +-2billion of the filrst one we find.
* of the filrst one we find.
*/ */
if (!c->journal_entries_base_seq) if (!c->journal_entries_base_seq)
c->journal_entries_base_seq = max_t(s64, 1, le64_to_cpu(j->seq) - S32_MAX); c->journal_entries_base_seq = max_t(s64, 1, le64_to_cpu(j->seq) - S32_MAX);
#if 0
list_for_each_entry_reverse(i, jlist->head, list) {
if (!JSET_NO_FLUSH(&i->j)) {
last_seq = le64_to_cpu(i->j.last_seq);
break;
}
}
#endif
/* Is this entry older than the range we need? */
if (!c->opts.read_entire_journal &&
le64_to_cpu(j->seq) < last_seq) {
ret = JOURNAL_ENTRY_ADD_OUT_OF_RANGE;
goto out;
}
/* Drop entries we don't need anymore */ /* Drop entries we don't need anymore */
if (!JSET_NO_FLUSH(j) && !c->opts.read_entire_journal) { if (last_seq > jlist->last_seq && !c->opts.read_entire_journal) {
genradix_for_each(&c->journal_entries, iter, _i) { genradix_for_each_from(&c->journal_entries, iter, _i,
journal_entry_radix_idx(c, jlist->last_seq)) {
i = *_i; i = *_i;
if (!i) if (!i || i->ignore)
continue; continue;
if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq)) if (le64_to_cpu(i->j.seq) >= last_seq)
break; break;
journal_replay_free(c, i); journal_replay_free(c, i);
} }
} }
_i = genradix_ptr(&c->journal_entries, journal_entry_radix_idx(c, j)); jlist->last_seq = max(jlist->last_seq, last_seq);
dup = _i ? *_i : NULL;
_i = genradix_ptr_alloc(&c->journal_entries,
journal_entry_radix_idx(c, le64_to_cpu(j->seq)),
GFP_KERNEL);
if (!_i)
return -ENOMEM;
/* /*
* Duplicate journal entries? If so we want the one that didn't have a * Duplicate journal entries? If so we want the one that didn't have a
* checksum error: * checksum error:
*/ */
dup = *_i;
if (dup) { if (dup) {
if (dup->bad) { if (dup->bad) {
/* we'll replace @dup: */ /* we'll replace @dup: */
...@@ -130,10 +126,8 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, ...@@ -130,10 +126,8 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
} }
i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL); i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
if (!i) { if (!i)
ret = -ENOMEM; return -ENOMEM;
goto out;
}
i->nr_ptrs = 0; i->nr_ptrs = 0;
i->bad = bad; i->bad = bad;
...@@ -146,14 +140,6 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, ...@@ -146,14 +140,6 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
__journal_replay_free(c, dup); __journal_replay_free(c, dup);
} }
_i = genradix_ptr_alloc(&c->journal_entries,
journal_entry_radix_idx(c, &i->j),
GFP_KERNEL);
if (!_i) {
bch_err(c, "failed to allocate c->journal_entries entry");
ret = -ENOMEM;
goto out;
}
*_i = i; *_i = i;
found: found:
...@@ -1064,6 +1050,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) ...@@ -1064,6 +1050,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
closure_init_stack(&jlist.cl); closure_init_stack(&jlist.cl);
mutex_init(&jlist.lock); mutex_init(&jlist.lock);
jlist.last_seq = 0;
jlist.ret = 0; jlist.ret = 0;
for_each_member_device(ca, c, iter) { for_each_member_device(ca, c, iter) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment