Commit 6088234c authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: JOURNAL_SPACE_LOW

"bcachefs; Fix deadlock in bch2_btree_update_start()" was a significant
performance regression (nearly 50%) on multithreaded random writes with
fio.

The reason is that the journal watermark checks multiple things,
including the state of the btree write buffer, and on multithreaded
update heavy workloads we're bottleneked on write buffer flushing - we
don't want kicknig off btree updates to depend on the state of the write
buffer.

This isn't strictly correct; the interior btree update path does do
write buffer updates, but it's a tiny fraction of total accounting
updates and we're more concerned with space in the journal itself.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 05801b65
...@@ -659,7 +659,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, ...@@ -659,7 +659,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
commit_flags |= BCH_WATERMARK_reclaim; commit_flags |= BCH_WATERMARK_reclaim;
if (ck->journal.seq != journal_last_seq(j) || if (ck->journal.seq != journal_last_seq(j) ||
j->watermark == BCH_WATERMARK_stripe) !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res; commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
ret = bch2_btree_iter_traverse(&b_iter) ?: ret = bch2_btree_iter_traverse(&b_iter) ?:
......
...@@ -1125,18 +1125,14 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ...@@ -1125,18 +1125,14 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags &= ~BCH_WATERMARK_MASK; flags &= ~BCH_WATERMARK_MASK;
flags |= watermark; flags |= watermark;
if (watermark < c->journal.watermark) { if (watermark < BCH_WATERMARK_reclaim &&
struct journal_res res = { 0 }; test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) {
unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK; if (flags & BCH_TRANS_COMMIT_journal_reclaim)
return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && bch2_trans_unlock(trans);
watermark < BCH_WATERMARK_reclaim) wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags));
journal_flags |= JOURNAL_RES_GET_NONBLOCK; ret = bch2_trans_relock(trans);
ret = drop_locks_do(trans,
bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
ret = -BCH_ERR_journal_reclaim_would_deadlock;
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
} }
......
...@@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j) ...@@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j)
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb)) track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
trace_and_count(c, journal_full, c); trace_and_count(c, journal_full, c);
mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin);
swap(watermark, j->watermark); swap(watermark, j->watermark);
if (watermark > j->watermark) if (watermark > j->watermark)
journal_wake(j); journal_wake(j);
......
...@@ -134,6 +134,7 @@ enum journal_flags { ...@@ -134,6 +134,7 @@ enum journal_flags {
JOURNAL_STARTED, JOURNAL_STARTED,
JOURNAL_MAY_SKIP_FLUSH, JOURNAL_MAY_SKIP_FLUSH,
JOURNAL_NEED_FLUSH_WRITE, JOURNAL_NEED_FLUSH_WRITE,
JOURNAL_SPACE_LOW,
}; };
/* Reasons we may fail to get a journal reservation: */ /* Reasons we may fail to get a journal reservation: */
......
...@@ -788,6 +788,14 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi ...@@ -788,6 +788,14 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi
#endif #endif
static inline void mod_bit(long nr, volatile unsigned long *addr, bool v)
{
if (v)
set_bit(nr, addr);
else
clear_bit(nr, addr);
}
static inline void __set_bit_le64(size_t bit, __le64 *addr) static inline void __set_bit_le64(size_t bit, __le64 *addr)
{ {
addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64)); addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment