Commit 31f63fd1 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Introduce a separate journal watermark for copygc

Since journal reclaim -> btree key cache flushing may require the
allocation of new btree nodes, it has an implicit dependency on copygc
in order to make forward progress - so we should avoid blocking copygc
unless the journal is really close to full.

This introduces watermarks to replace our single MAY_GET_UNRESERVED bit
in the journal, and adds a watermark for copygc and plumbs it through.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent d905f67e
......@@ -670,7 +670,6 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
ret = bch2_trans_do(c, NULL, &commit_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RESERVED|
flags,
bucket_invalidate_btree(&trans, ca, b, &u));
......
......@@ -421,7 +421,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
(ck->journal.seq == journal_last_seq(j)
? BTREE_INSERT_JOURNAL_RESERVED
? JOURNAL_WATERMARK_reserved
: 0)|
commit_flags);
if (ret) {
......
......@@ -16,12 +16,12 @@ bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
enum btree_insert_flags {
__BTREE_INSERT_NOFAIL,
/* First two bits for journal watermark: */
__BTREE_INSERT_NOFAIL = 2,
__BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_LAZY_RW,
__BTREE_INSERT_USE_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
__BTREE_INSERT_JOURNAL_RECLAIM,
__BTREE_INSERT_NOWAIT,
__BTREE_INSERT_GC_LOCK_HELD,
......@@ -41,9 +41,6 @@ enum btree_insert_flags {
/* Insert is for journal replay - don't get journal reservations: */
#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY)
/* Indicates that we have pre-reserved space in the journal: */
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
/* Insert is being called from journal reclaim path: */
#define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM)
......
......@@ -599,7 +599,7 @@ static void btree_update_nodes_written(struct btree_update *as)
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_JOURNAL_RESERVED,
JOURNAL_WATERMARK_reserved,
btree_update_nodes_written_trans(&trans, as));
bch2_trans_exit(&trans);
......@@ -964,14 +964,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
? BCH_DISK_RESERVATION_NOFAIL : 0;
unsigned nr_nodes[2];
unsigned update_level = level;
int journal_flags = 0;
int journal_flags = flags & JOURNAL_WATERMARK_MASK;
int ret = 0;
BUG_ON(!path->should_be_locked);
if (flags & BTREE_INSERT_JOURNAL_RESERVED)
journal_flags |= JOURNAL_RES_GET_RESERVED;
closure_init_stack(&cl);
retry:
nr_nodes[0] = nr_nodes[1] = 0;
......@@ -1972,7 +1969,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_JOURNAL_RESERVED);
JOURNAL_WATERMARK_reserved);
if (ret)
goto err;
......
......@@ -296,11 +296,10 @@ static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
struct bch_fs *c = trans->c;
int ret;
if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
flags |= JOURNAL_RES_GET_RESERVED;
ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
trans->journal_u64s, flags);
trans->journal_u64s,
flags|
(trans->flags & JOURNAL_WATERMARK_MASK));
return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
}
......@@ -902,8 +901,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
ret = bch2_journal_preres_get(&c->journal,
&trans->journal_preres, trans->journal_preres_u64s,
JOURNAL_RES_GET_NONBLOCK|
((trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
? JOURNAL_RES_GET_RESERVED : 0));
(trans->flags & JOURNAL_WATERMARK_MASK));
if (unlikely(ret == -EAGAIN))
ret = bch2_trans_journal_preres_get_cold(trans,
trans->journal_preres_u64s, trace_ip);
......@@ -988,7 +986,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
bch2_trans_unlock(trans);
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
!(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) {
!(trans->flags & JOURNAL_WATERMARK_reserved)) {
trans->restarted = true;
ret = -EAGAIN;
break;
......
......@@ -19,6 +19,18 @@
#include "super-io.h"
#include "trace.h"
#define x(n) #n,
static const char * const bch2_journal_watermarks[] = {
JOURNAL_WATERMARKS()
NULL
};
static const char * const bch2_journal_errors[] = {
JOURNAL_ERRORS()
NULL
};
#undef x
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
{
return seq > j->seq_ondisk;
......@@ -207,19 +219,19 @@ static int journal_entry_open(struct journal *j)
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
if (j->blocked)
return cur_entry_blocked;
return JOURNAL_ERR_blocked;
if (j->cur_entry_error)
return j->cur_entry_error;
if (bch2_journal_error(j))
return cur_entry_insufficient_devices; /* -EROFS */
return JOURNAL_ERR_insufficient_devices; /* -EROFS */
if (!fifo_free(&j->pin))
return cur_entry_journal_pin_full;
return JOURNAL_ERR_journal_pin_full;
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
return cur_entry_max_in_flight;
return JOURNAL_ERR_max_in_flight;
BUG_ON(!j->cur_entry_sectors);
......@@ -238,7 +250,7 @@ static int journal_entry_open(struct journal *j)
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
if (u64s <= 0)
return cur_entry_journal_full;
return JOURNAL_ERR_journal_full;
if (fifo_empty(&j->pin) && j->reclaim_thread)
wake_up_process(j->reclaim_thread);
......@@ -354,13 +366,12 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
return 0;
}
if (!(flags & JOURNAL_RES_GET_RESERVED) &&
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark) {
/*
* Don't want to close current journal entry, just need to
* invoke reclaim:
*/
ret = cur_entry_journal_full;
ret = JOURNAL_ERR_journal_full;
goto unlock;
}
......@@ -378,10 +389,10 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
ret = journal_entry_open(j);
if (ret == cur_entry_max_in_flight)
if (ret == JOURNAL_ERR_max_in_flight)
trace_journal_entry_full(c);
unlock:
if ((ret && ret != cur_entry_insufficient_devices) &&
if ((ret && ret != JOURNAL_ERR_insufficient_devices) &&
!j->res_get_blocked_start) {
j->res_get_blocked_start = local_clock() ?: 1;
trace_journal_full(c);
......@@ -393,14 +404,15 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
if (!ret)
goto retry;
if ((ret == cur_entry_journal_full ||
ret == cur_entry_journal_pin_full) &&
if ((ret == JOURNAL_ERR_journal_full ||
ret == JOURNAL_ERR_journal_pin_full) &&
!can_discard &&
!nr_unwritten_journal_entries(j) &&
(flags & JOURNAL_RES_GET_RESERVED)) {
(flags & JOURNAL_WATERMARK_MASK) == JOURNAL_WATERMARK_reserved) {
struct printbuf buf = PRINTBUF;
bch_err(c, "Journal stuck! Hava a pre-reservation but journal full");
bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (ret %s)",
bch2_journal_errors[ret]);
bch2_journal_debug_to_text(&buf, j);
bch_err(c, "%s", buf.buf);
......@@ -418,8 +430,8 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
* Journal is full - can't rely on reclaim from work item due to
* freezing:
*/
if ((ret == cur_entry_journal_full ||
ret == cur_entry_journal_pin_full) &&
if ((ret == JOURNAL_ERR_journal_full ||
ret == JOURNAL_ERR_journal_pin_full) &&
!(flags & JOURNAL_RES_GET_NONBLOCK)) {
if (can_discard) {
bch2_journal_do_discards(j);
......@@ -432,7 +444,7 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
}
}
return ret == cur_entry_insufficient_devices ? -EROFS : -EAGAIN;
return ret == JOURNAL_ERR_insufficient_devices ? -EROFS : -EAGAIN;
}
/*
......@@ -1187,13 +1199,14 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
rcu_read_lock();
s = READ_ONCE(j->reservations);
pr_buf(out, "dirty journal entries:\t%llu\n", fifo_used(&j->pin));
pr_buf(out, "dirty journal entries:\t%llu/%llu\n",fifo_used(&j->pin), j->pin.size);
pr_buf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j));
pr_buf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk);
pr_buf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
pr_buf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
pr_buf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
pr_buf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining);
pr_buf(out, "watermark:\t\t%s\n", bch2_journal_watermarks[j->watermark]);
pr_buf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
pr_buf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
pr_buf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes);
......@@ -1203,7 +1216,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
pr_buf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now)
? jiffies_to_msecs(j->next_reclaim - jiffies) : 0);
pr_buf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors);
pr_buf(out, "current entry error:\t%u\n", j->cur_entry_error);
pr_buf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]);
pr_buf(out, "current entry:\t\t");
switch (s.cur_entry_offset) {
......
......@@ -293,9 +293,9 @@ static inline void bch2_journal_res_put(struct journal *j,
int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
unsigned);
#define JOURNAL_RES_GET_NONBLOCK (1 << 0)
#define JOURNAL_RES_GET_CHECK (1 << 1)
#define JOURNAL_RES_GET_RESERVED (1 << 2)
/* First two bits for JOURNAL_WATERMARK: */
#define JOURNAL_RES_GET_NONBLOCK (1 << 2)
#define JOURNAL_RES_GET_CHECK (1 << 3)
static inline int journal_res_get_fast(struct journal *j,
struct journal_res *res,
......@@ -316,8 +316,7 @@ static inline int journal_res_get_fast(struct journal *j,
EBUG_ON(!journal_state_count(new, new.idx));
if (!(flags & JOURNAL_RES_GET_RESERVED) &&
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark)
return 0;
new.cur_entry_offset += res->u64s;
......@@ -370,23 +369,27 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
/* journal_preres: */
static inline bool journal_check_may_get_unreserved(struct journal *j)
static inline void journal_set_watermark(struct journal *j)
{
union journal_preres_state s = READ_ONCE(j->prereserved);
bool ret = s.reserved < s.remaining &&
fifo_free(&j->pin) > j->pin.size / 4;
lockdep_assert_held(&j->lock);
if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
if (ret) {
set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
journal_wake(j);
} else {
clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
}
}
return ret;
unsigned watermark = JOURNAL_WATERMARK_any;
if (fifo_free(&j->pin) < j->pin.size / 4)
watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_copygc);
if (fifo_free(&j->pin) < j->pin.size / 8)
watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_reserved);
if (s.reserved > s.remaining)
watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_copygc);
if (!s.remaining)
watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_reserved);
if (watermark == j->watermark)
return;
swap(watermark, j->watermark);
if (watermark > j->watermark)
journal_wake(j);
}
static inline void bch2_journal_preres_put(struct journal *j,
......@@ -406,12 +409,8 @@ static inline void bch2_journal_preres_put(struct journal *j,
closure_wake_up(&j->preres_wait);
}
if (s.reserved <= s.remaining &&
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
spin_lock(&j->lock);
journal_check_may_get_unreserved(j);
spin_unlock(&j->lock);
}
if (s.reserved <= s.remaining && j->watermark)
journal_set_watermark(j);
}
int __bch2_journal_preres_get(struct journal *,
......@@ -432,7 +431,7 @@ static inline int bch2_journal_preres_get_fast(struct journal *j,
old.v = new.v = v;
ret = 0;
if ((flags & JOURNAL_RES_GET_RESERVED) ||
if ((flags & JOURNAL_WATERMARK_reserved) ||
new.reserved + d < new.remaining) {
new.reserved += d;
ret = 1;
......
......@@ -195,7 +195,7 @@ void bch2_journal_space_available(struct journal *j)
j->can_discard = can_discard;
if (nr_online < c->opts.metadata_replicas_required) {
ret = cur_entry_insufficient_devices;
ret = JOURNAL_ERR_insufficient_devices;
goto out;
}
......@@ -224,9 +224,9 @@ void bch2_journal_space_available(struct journal *j)
bch2_fatal_error(c);
spin_lock(&j->lock);
ret = cur_entry_journal_stuck;
ret = JOURNAL_ERR_journal_stuck;
} else if (!j->space[journal_space_discarded].next_entry)
ret = cur_entry_journal_full;
ret = JOURNAL_ERR_journal_full;
if ((j->space[journal_space_clean_ondisk].next_entry <
j->space[journal_space_clean_ondisk].total) &&
......@@ -245,7 +245,7 @@ void bch2_journal_space_available(struct journal *j)
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
j->cur_entry_error = ret;
journal_set_remaining(j, u64s_remaining);
journal_check_may_get_unreserved(j);
journal_set_watermark(j);
if (!ret)
journal_wake(j);
......
......@@ -144,16 +144,45 @@ enum journal_space_from {
enum {
JOURNAL_REPLAY_DONE,
JOURNAL_STARTED,
JOURNAL_MAY_GET_UNRESERVED,
JOURNAL_MAY_SKIP_FLUSH,
};
#define JOURNAL_WATERMARKS() \
x(any) \
x(copygc) \
x(reserved)
enum journal_watermark {
#define x(n) JOURNAL_WATERMARK_##n,
JOURNAL_WATERMARKS()
#undef x
};
#define JOURNAL_WATERMARK_MASK 3
/* Reasons we may fail to get a journal reservation: */
#define JOURNAL_ERRORS() \
x(ok) \
x(blocked) \
x(max_in_flight) \
x(journal_full) \
x(journal_pin_full) \
x(journal_stuck) \
x(insufficient_devices)
enum journal_errors {
#define x(n) JOURNAL_ERR_##n,
JOURNAL_ERRORS()
#undef x
};
/* Embedded in struct bch_fs */
struct journal {
/* Fastpath stuff up front: */
struct {
union journal_res_state reservations;
enum journal_watermark watermark;
union journal_preres_state prereserved;
......@@ -173,15 +202,7 @@ struct journal {
* 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
* insufficient devices:
*/
enum {
cur_entry_ok,
cur_entry_blocked,
cur_entry_max_in_flight,
cur_entry_journal_full,
cur_entry_journal_pin_full,
cur_entry_journal_stuck,
cur_entry_insufficient_devices,
} cur_entry_error;
enum journal_errors cur_entry_error;
unsigned buf_size_want;
/*
......
......@@ -91,7 +91,7 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
data_opts->target = io_opts->background_target;
data_opts->nr_replicas = 1;
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_JOURNAL_RESERVED;
JOURNAL_WATERMARK_copygc;
data_opts->rewrite_dev = p.ptr.dev;
if (p.has_ec)
......
......@@ -562,8 +562,9 @@ static int bch2_journal_replay(struct bch_fs *c)
ret = bch2_trans_do(c, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RESERVED|
(!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0),
(!k->allocated
? BTREE_INSERT_JOURNAL_REPLAY|JOURNAL_WATERMARK_reserved
: 0),
bch2_journal_replay_key(&trans, k));
if (ret) {
bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment