Commit 039fc4c5 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Fixes for going RO

Now that interior btree updates are fully transactional, we don't need
to write out alloc info in a loop. However, interior btree updates do
put more things in the journal, so we still need a loop in the RO
sequence.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent baeed3c3
...@@ -869,6 +869,15 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, ...@@ -869,6 +869,15 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
if (!invalidating_cached_data) if (!invalidating_cached_data)
goto out; goto out;
/*
* If the read-only path is trying to shut down, we can't be generating
* new btree updates:
*/
if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
ret = 1;
goto out;
}
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b)); bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
...@@ -956,7 +965,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, ...@@ -956,7 +965,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
} }
return ret; return ret < 0 ? ret : 0;
} }
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
......
...@@ -482,6 +482,7 @@ enum { ...@@ -482,6 +482,7 @@ enum {
BCH_FS_ALLOC_CLEAN, BCH_FS_ALLOC_CLEAN,
BCH_FS_ALLOCATOR_STARTED, BCH_FS_ALLOCATOR_STARTED,
BCH_FS_ALLOCATOR_RUNNING, BCH_FS_ALLOCATOR_RUNNING,
BCH_FS_ALLOCATOR_STOPPING,
BCH_FS_INITIAL_GC_DONE, BCH_FS_INITIAL_GC_DONE,
BCH_FS_FSCK_DONE, BCH_FS_FSCK_DONE,
BCH_FS_STARTED, BCH_FS_STARTED,
......
...@@ -413,10 +413,12 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq) ...@@ -413,10 +413,12 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
return ret; return ret;
} }
static void journal_flush_pins(struct journal *j, u64 seq_to_flush, /* returns true if we did work */
static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
unsigned min_nr) unsigned min_nr)
{ {
struct journal_entry_pin *pin; struct journal_entry_pin *pin;
bool ret = false;
u64 seq; u64 seq;
lockdep_assert_held(&j->reclaim_lock); lockdep_assert_held(&j->reclaim_lock);
...@@ -431,7 +433,10 @@ static void journal_flush_pins(struct journal *j, u64 seq_to_flush, ...@@ -431,7 +433,10 @@ static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
BUG_ON(j->flush_in_progress != pin); BUG_ON(j->flush_in_progress != pin);
j->flush_in_progress = NULL; j->flush_in_progress = NULL;
wake_up(&j->pin_flush_wait); wake_up(&j->pin_flush_wait);
ret = true;
} }
return ret;
} }
/** /**
...@@ -523,7 +528,8 @@ void bch2_journal_reclaim_work(struct work_struct *work) ...@@ -523,7 +528,8 @@ void bch2_journal_reclaim_work(struct work_struct *work)
mutex_unlock(&j->reclaim_lock); mutex_unlock(&j->reclaim_lock);
} }
static int journal_flush_done(struct journal *j, u64 seq_to_flush) static int journal_flush_done(struct journal *j, u64 seq_to_flush,
bool *did_work)
{ {
int ret; int ret;
...@@ -533,7 +539,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush) ...@@ -533,7 +539,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
mutex_lock(&j->reclaim_lock); mutex_lock(&j->reclaim_lock);
journal_flush_pins(j, seq_to_flush, 0); *did_work = journal_flush_pins(j, seq_to_flush, 0);
spin_lock(&j->lock); spin_lock(&j->lock);
/* /*
...@@ -551,12 +557,17 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush) ...@@ -551,12 +557,17 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
return ret; return ret;
} }
void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
{ {
bool did_work = false;
if (!test_bit(JOURNAL_STARTED, &j->flags)) if (!test_bit(JOURNAL_STARTED, &j->flags))
return; return false;
closure_wait_event(&j->async_wait,
journal_flush_done(j, seq_to_flush, &did_work));
closure_wait_event(&j->async_wait, journal_flush_done(j, seq_to_flush)); return did_work;
} }
int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
......
...@@ -53,11 +53,11 @@ void bch2_journal_do_discards(struct journal *); ...@@ -53,11 +53,11 @@ void bch2_journal_do_discards(struct journal *);
void bch2_journal_reclaim(struct journal *); void bch2_journal_reclaim(struct journal *);
void bch2_journal_reclaim_work(struct work_struct *); void bch2_journal_reclaim_work(struct work_struct *);
void bch2_journal_flush_pins(struct journal *, u64); bool bch2_journal_flush_pins(struct journal *, u64);
static inline void bch2_journal_flush_all_pins(struct journal *j) static inline bool bch2_journal_flush_all_pins(struct journal *j)
{ {
bch2_journal_flush_pins(j, U64_MAX); return bch2_journal_flush_pins(j, U64_MAX);
} }
int bch2_journal_flush_device_pins(struct journal *, int); int bch2_journal_flush_device_pins(struct journal *, int);
......
...@@ -175,7 +175,7 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) ...@@ -175,7 +175,7 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
static void __bch2_fs_read_only(struct bch_fs *c) static void __bch2_fs_read_only(struct bch_fs *c)
{ {
struct bch_dev *ca; struct bch_dev *ca;
bool wrote; bool wrote = false;
unsigned i, clean_passes = 0; unsigned i, clean_passes = 0;
int ret; int ret;
...@@ -200,12 +200,12 @@ static void __bch2_fs_read_only(struct bch_fs *c) ...@@ -200,12 +200,12 @@ static void __bch2_fs_read_only(struct bch_fs *c)
goto nowrote_alloc; goto nowrote_alloc;
bch_verbose(c, "writing alloc info"); bch_verbose(c, "writing alloc info");
/*
do { * This should normally just be writing the bucket read/write clocks:
wrote = false; */
ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?: ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote); bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
bch_verbose(c, "writing alloc info complete");
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret); bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
...@@ -213,26 +213,33 @@ static void __bch2_fs_read_only(struct bch_fs *c) ...@@ -213,26 +213,33 @@ static void __bch2_fs_read_only(struct bch_fs *c)
if (ret) if (ret)
goto nowrote_alloc; goto nowrote_alloc;
for_each_member_device(ca, c, i) bch_verbose(c, "flushing journal and stopping allocators");
bch2_dev_allocator_quiesce(c, ca);
bch2_journal_flush_all_pins(&c->journal); bch2_journal_flush_all_pins(&c->journal);
set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
do {
clean_passes++;
if (bch2_journal_flush_all_pins(&c->journal))
clean_passes = 0;
/* /*
* We need to explicitly wait on btree interior updates to complete * In flight interior btree updates will generate more journal
* before stopping the journal, flushing all journal pins isn't * updates and btree updates (alloc btree):
* sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
* interior updates have to drop their journal pin before they're
* fully complete:
*/ */
if (bch2_btree_interior_updates_nr_pending(c)) {
closure_wait_event(&c->btree_interior_update_wait, closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c)); !bch2_btree_interior_updates_nr_pending(c));
clean_passes = 0;
}
flush_work(&c->btree_interior_update_work); flush_work(&c->btree_interior_update_work);
clean_passes = wrote ? 0 : clean_passes + 1; if (bch2_journal_flush_all_pins(&c->journal))
clean_passes = 0;
} while (clean_passes < 2); } while (clean_passes < 2);
bch_verbose(c, "flushing journal and stopping allocators complete");
bch_verbose(c, "writing alloc info complete");
set_bit(BCH_FS_ALLOC_CLEAN, &c->flags); set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
nowrote_alloc: nowrote_alloc:
closure_wait_event(&c->btree_interior_update_wait, closure_wait_event(&c->btree_interior_update_wait,
...@@ -243,11 +250,10 @@ static void __bch2_fs_read_only(struct bch_fs *c) ...@@ -243,11 +250,10 @@ static void __bch2_fs_read_only(struct bch_fs *c)
bch2_dev_allocator_stop(ca); bch2_dev_allocator_stop(ca);
clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
bch2_fs_journal_stop(&c->journal); bch2_fs_journal_stop(&c->journal);
/* XXX: mark super that alloc info is persistent */
/* /*
* the journal kicks off btree writes via reclaim - wait for in flight * the journal kicks off btree writes via reclaim - wait for in flight
* writes after stopping journal: * writes after stopping journal:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment