Commit e6fab655 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Avoid taking journal lock unnecessarily

Previously, any time we failed to get a journal reservation we'd retry,
with the journal lock held; but this isn't necessary given
wait_event()/wake_up() ordering.

This avoids performance cliffs when the journal starts to get backed up
and lock contention shoots up.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent bdec47f5
...@@ -27,6 +27,26 @@ static const char * const bch2_journal_errors[] = { ...@@ -27,6 +27,26 @@ static const char * const bch2_journal_errors[] = {
NULL NULL
}; };
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
{
return seq > j->seq_ondisk;
}
static bool __journal_entry_is_open(union journal_res_state state)
{
return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
}
static inline unsigned nr_unwritten_journal_entries(struct journal *j)
{
return atomic64_read(&j->seq) - j->seq_ondisk;
}
static bool journal_entry_is_open(struct journal *j)
{
return __journal_entry_is_open(j->reservations);
}
static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u64 seq) static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u64 seq)
{ {
union journal_res_state s = READ_ONCE(j->reservations); union journal_res_state s = READ_ONCE(j->reservations);
...@@ -66,26 +86,7 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) ...@@ -66,26 +86,7 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
seq <= journal_cur_seq(j); seq <= journal_cur_seq(j);
seq++) seq++)
bch2_journal_buf_to_text(out, j, seq); bch2_journal_buf_to_text(out, j, seq);
} prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed");
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
{
return seq > j->seq_ondisk;
}
static bool __journal_entry_is_open(union journal_res_state state)
{
return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
}
static inline unsigned nr_unwritten_journal_entries(struct journal *j)
{
return atomic64_read(&j->seq) - j->seq_ondisk;
}
static bool journal_entry_is_open(struct journal *j)
{
return __journal_entry_is_open(j->reservations);
} }
static inline struct journal_buf * static inline struct journal_buf *
...@@ -468,33 +469,32 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, ...@@ -468,33 +469,32 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
if (journal_res_get_fast(j, res, flags)) if (journal_res_get_fast(j, res, flags))
return 0; return 0;
if (bch2_journal_error(j)) if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
return -BCH_ERR_erofs_journal_err; ret = JOURNAL_ERR_journal_full;
can_discard = j->can_discard;
goto out;
}
spin_lock(&j->lock); if (j->blocked)
return -BCH_ERR_journal_res_get_blocked;
/* check once more in case somebody else shut things down... */ if (bch2_journal_error(j))
if (bch2_journal_error(j)) {
spin_unlock(&j->lock);
return -BCH_ERR_erofs_journal_err; return -BCH_ERR_erofs_journal_err;
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
ret = JOURNAL_ERR_max_in_flight;
goto out;
} }
spin_lock(&j->lock);
/* /*
* Recheck after taking the lock, so we don't race with another thread * Recheck after taking the lock, so we don't race with another thread
* that just did journal_entry_open() and call bch2_journal_entry_close() * that just did journal_entry_open() and call bch2_journal_entry_close()
* unnecessarily * unnecessarily
*/ */
if (journal_res_get_fast(j, res, flags)) { if (journal_res_get_fast(j, res, flags)) {
spin_unlock(&j->lock); ret = 0;
return 0;
}
if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
/*
* Don't want to close current journal entry, just need to
* invoke reclaim:
*/
ret = JOURNAL_ERR_journal_full;
goto unlock; goto unlock;
} }
...@@ -510,30 +510,31 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, ...@@ -510,30 +510,31 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1); j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false); __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false);
ret = journal_entry_open(j); ret = journal_entry_open(j) ?: JOURNAL_ERR_retry;
if (ret == JOURNAL_ERR_max_in_flight) {
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
&j->max_in_flight_start, true);
if (trace_journal_entry_full_enabled()) {
struct printbuf buf = PRINTBUF;
buf.atomic++;
bch2_journal_bufs_to_text(&buf, j);
trace_journal_entry_full(c, buf.buf);
printbuf_exit(&buf);
}
count_event(c, journal_entry_full);
}
unlock: unlock:
can_discard = j->can_discard; can_discard = j->can_discard;
spin_unlock(&j->lock); spin_unlock(&j->lock);
out:
if (!ret) if (ret == JOURNAL_ERR_retry)
goto retry; goto retry;
if (!ret)
return 0;
if (journal_error_check_stuck(j, ret, flags)) if (journal_error_check_stuck(j, ret, flags))
ret = -BCH_ERR_journal_res_get_blocked; ret = -BCH_ERR_journal_res_get_blocked;
if (ret == JOURNAL_ERR_max_in_flight &&
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
&j->max_in_flight_start, true)) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
bch2_journal_bufs_to_text(&buf, j);
trace_journal_entry_full(c, buf.buf);
printbuf_exit(&buf);
count_event(c, journal_entry_full);
}
/* /*
* Journal is full - can't rely on reclaim from work item due to * Journal is full - can't rely on reclaim from work item due to
* freezing: * freezing:
......
...@@ -134,6 +134,7 @@ enum journal_flags { ...@@ -134,6 +134,7 @@ enum journal_flags {
/* Reasons we may fail to get a journal reservation: */ /* Reasons we may fail to get a journal reservation: */
#define JOURNAL_ERRORS() \ #define JOURNAL_ERRORS() \
x(ok) \ x(ok) \
x(retry) \
x(blocked) \ x(blocked) \
x(max_in_flight) \ x(max_in_flight) \
x(journal_full) \ x(journal_full) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment