Commit 83ec519a authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: When shutting down, flush btree node writes last

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent adac06fa
......@@ -13,6 +13,9 @@ void bch2_btree_node_prep_for_write(struct btree_trans *,
bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
struct btree *, struct btree_node_iter *,
struct bkey_i *);
int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64);
int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64);
void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
......
......@@ -227,12 +227,12 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
return 0;
}
static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
int bch2_btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 0, seq);
}
static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
int bch2_btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 1, seq);
}
......@@ -244,8 +244,8 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c,
bch2_journal_pin_add(&c->journal, seq, &w->journal,
btree_node_write_idx(b) == 0
? btree_node_flush0
: btree_node_flush1);
? bch2_btree_node_flush0
: bch2_btree_node_flush1);
}
/**
......
......@@ -67,8 +67,9 @@ journal_seq_to_buf(struct journal *j, u64 seq)
static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
{
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->key_cache_list);
unsigned i;
for (i = 0; i < ARRAY_SIZE(p->list); i++)
INIT_LIST_HEAD(&p->list[i]);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, count);
p->devs.nr = 0;
......@@ -1347,6 +1348,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *pin;
unsigned i;
spin_lock(&j->lock);
*seq = max(*seq, j->pin.front);
......@@ -1364,15 +1366,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
prt_newline(out);
printbuf_indent_add(out, 2);
list_for_each_entry(pin, &pin_list->list, list) {
prt_printf(out, "\t%px %ps", pin, pin->flush);
prt_newline(out);
}
list_for_each_entry(pin, &pin_list->key_cache_list, list) {
prt_printf(out, "\t%px %ps", pin, pin->flush);
prt_newline(out);
}
for (i = 0; i < ARRAY_SIZE(pin_list->list); i++)
list_for_each_entry(pin, &pin_list->list[i], list) {
prt_printf(out, "\t%px %ps", pin, pin->flush);
prt_newline(out);
}
if (!list_empty(&pin_list->flushed)) {
prt_printf(out, "flushed:");
......
......@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "btree_key_cache.h"
#include "btree_update.h"
#include "errcode.h"
#include "error.h"
#include "journal.h"
......@@ -318,9 +319,7 @@ static void bch2_journal_reclaim_fast(struct journal *j)
*/
while (!fifo_empty(&j->pin) &&
!atomic_read(&fifo_peek_front(&j->pin).count)) {
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).flushed));
BUG_ON(!fifo_pop(&j->pin, temp));
fifo_pop(&j->pin, temp);
popped = true;
}
......@@ -379,6 +378,17 @@ void bch2_journal_pin_drop(struct journal *j,
spin_unlock(&j->lock);
}
enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
{
if (fn == bch2_btree_node_flush0 ||
fn == bch2_btree_node_flush1)
return JOURNAL_PIN_btree;
else if (fn == bch2_btree_key_cache_journal_flush)
return JOURNAL_PIN_key_cache;
else
return JOURNAL_PIN_other;
}
void bch2_journal_pin_set(struct journal *j, u64 seq,
struct journal_entry_pin *pin,
journal_pin_flush_fn flush_fn)
......@@ -407,10 +417,8 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
pin->seq = seq;
pin->flush = flush_fn;
if (flush_fn == bch2_btree_key_cache_journal_flush)
list_add(&pin->list, &pin_list->key_cache_list);
else if (flush_fn)
list_add(&pin->list, &pin_list->list);
if (flush_fn)
list_add(&pin->list, &pin_list->list[journal_pin_type(flush_fn)]);
else
list_add(&pin->list, &pin_list->flushed);
......@@ -446,37 +454,37 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
static struct journal_entry_pin *
journal_get_next_pin(struct journal *j,
bool get_any,
bool get_key_cache,
u64 max_seq, u64 *seq)
u64 seq_to_flush,
unsigned allowed_below_seq,
unsigned allowed_above_seq,
u64 *seq)
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *ret = NULL;
unsigned i;
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
if (*seq > max_seq && !get_any && !get_key_cache)
if (*seq > seq_to_flush && !allowed_above_seq)
break;
if (*seq <= max_seq || get_any) {
ret = list_first_entry_or_null(&pin_list->list,
struct journal_entry_pin, list);
if (ret)
return ret;
}
if (*seq <= max_seq || get_any || get_key_cache) {
ret = list_first_entry_or_null(&pin_list->key_cache_list,
struct journal_entry_pin, list);
if (ret)
return ret;
}
for (i = 0; i < JOURNAL_PIN_NR; i++)
if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) ||
((1U << i) & allowed_above_seq)) {
ret = list_first_entry_or_null(&pin_list->list[i],
struct journal_entry_pin, list);
if (ret)
return ret;
}
}
return NULL;
}
/* returns true if we did work */
static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
static size_t journal_flush_pins(struct journal *j,
u64 seq_to_flush,
unsigned allowed_below_seq,
unsigned allowed_above_seq,
unsigned min_any,
unsigned min_key_cache)
{
......@@ -489,15 +497,25 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
lockdep_assert_held(&j->reclaim_lock);
while (1) {
unsigned allowed_above = allowed_above_seq;
unsigned allowed_below = allowed_below_seq;
if (min_any) {
allowed_above |= ~0;
allowed_below |= ~0;
}
if (min_key_cache) {
allowed_above |= 1U << JOURNAL_PIN_key_cache;
allowed_below |= 1U << JOURNAL_PIN_key_cache;
}
cond_resched();
j->last_flushed = jiffies;
spin_lock(&j->lock);
pin = journal_get_next_pin(j,
min_any != 0,
min_key_cache != 0,
seq_to_flush, &seq);
pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq);
if (pin) {
BUG_ON(j->flush_in_progress);
j->flush_in_progress = pin;
......@@ -656,6 +674,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
atomic_long_read(&c->btree_key_cache.nr_keys));
nr_flushed = journal_flush_pins(j, seq_to_flush,
~0, 0,
min_nr, min_key_cache);
if (direct)
......@@ -776,7 +795,11 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
mutex_lock(&j->reclaim_lock);
if (journal_flush_pins(j, seq_to_flush, 0, 0))
if (journal_flush_pins(j, seq_to_flush,
(1U << JOURNAL_PIN_key_cache)|
(1U << JOURNAL_PIN_other), 0, 0, 0) ||
journal_flush_pins(j, seq_to_flush,
(1U << JOURNAL_PIN_btree), 0, 0, 0))
*did_work = true;
spin_lock(&j->lock);
......
......@@ -43,9 +43,15 @@ struct journal_buf {
* flushed:
*/
enum journal_pin_type {
JOURNAL_PIN_btree,
JOURNAL_PIN_key_cache,
JOURNAL_PIN_other,
JOURNAL_PIN_NR,
};
struct journal_entry_pin_list {
struct list_head list;
struct list_head key_cache_list;
struct list_head list[JOURNAL_PIN_NR];
struct list_head flushed;
atomic_t count;
struct bch_devs_list devs;
......
......@@ -209,7 +209,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
bch2_copygc_stop(c);
bch2_gc_thread_stop(c);
bch_verbose(c, "flushing journal and stopping allocators");
bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
journal_cur_seq(&c->journal));
do {
clean_passes++;
......@@ -223,7 +224,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
}
} while (clean_passes < 2);
bch_verbose(c, "flushing journal and stopping allocators complete");
bch_verbose(c, "flushing journal and stopping allocators complete, journal seq %llu",
journal_cur_seq(&c->journal));
if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment