Commit 26aff849 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2023-12-10' of https://evilpiepirate.org/git/bcachefs

Pull more bcachefs bugfixes from Kent Overstreet:

 - Fix a rare emergency shutdown path bug: dropping journal pins after
   the filesystem has mostly been torn down is not what we want.

 - Fix some concurrency issues with the btree write buffer and journal
   replay by not using the btree write buffer until journal replay is
   finished

 - A fixup from the prior patch to kill journal pre-reservations: at the
   start of the btree update path, where previously we took a
   pre-reservation, we do at least want to check the journal watermark.

 - Fix a race between dropping device metadata and btree node writes,
   which would re-add a pointer to a device that had just been dropped

 - Fix one of the SCRU lock warnings, in
   bch2_compression_stats_to_text().

 - Partial fix for a rare transaction paths overflow, when indirect
   extents had been split by background tasks, by not running certain
   triggers when they're not needed.

 - Fix for creating a snapshot with implicit source in a subdirectory of
   the containing subvolume

 - Don't unfreeze when we're emergency read-only

 - Fix for rebalance spinning trying to compress unwritten extentns

 - Another deleted_inodes fix, for directories

 - Fix a rare deadlock (usually just an unecessary wait) when flushing
   the journal with an open journal entry.

* tag 'bcachefs-2023-12-10' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Close journal entry if necessary when flushing all pins
  bcachefs: Fix uninitialized var in bch2_journal_replay()
  bcachefs: Fix deleted inode check for dirs
  bcachefs: rebalance shouldn't attempt to compress unwritten extents
  bcachefs: don't attempt rw on unfreeze when shutdown
  bcachefs: Fix creating snapshot with implict source
  bcachefs: Don't run indirect extent trigger unless inserting/deleting
  bcachefs: Convert compression_stats to for_each_btree_key2
  bcachefs: Fix bch2_extent_drop_ptrs() call
  bcachefs: Fix a journal deadlock in replay
  bcachefs; Don't use btree write buffer until journal replay is finished
  bcachefs: Don't drop journal pins in exit path
parents 52bf9f6c a66ff26b
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "debug.h" #include "debug.h"
#include "errcode.h" #include "errcode.h"
#include "error.h" #include "error.h"
#include "journal.h"
#include "trace.h" #include "trace.h"
#include <linux/prefetch.h> #include <linux/prefetch.h>
...@@ -424,14 +425,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) ...@@ -424,14 +425,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
BUG_ON(btree_node_read_in_flight(b) || BUG_ON(btree_node_read_in_flight(b) ||
btree_node_write_in_flight(b)); btree_node_write_in_flight(b));
if (btree_node_dirty(b))
bch2_btree_complete_write(c, b, btree_current_write(b));
clear_btree_node_dirty_acct(c, b);
btree_node_data_free(c, b); btree_node_data_free(c, b);
} }
BUG_ON(atomic_read(&c->btree_cache.dirty)); BUG_ON(!bch2_journal_error(&c->journal) &&
atomic_read(&c->btree_cache.dirty));
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
......
...@@ -1704,8 +1704,8 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, ...@@ -1704,8 +1704,8 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
} }
void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
struct btree_write *w) struct btree_write *w)
{ {
unsigned long old, new, v = READ_ONCE(b->will_make_reachable); unsigned long old, new, v = READ_ONCE(b->will_make_reachable);
......
...@@ -134,9 +134,6 @@ void bch2_btree_node_read(struct bch_fs *, struct btree *, bool); ...@@ -134,9 +134,6 @@ void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id, int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned); const struct bkey_i *, unsigned);
void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
enum btree_write_flags { enum btree_write_flags {
......
...@@ -992,8 +992,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) ...@@ -992,8 +992,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
list_for_each_entry_safe(ck, n, &items, list) { list_for_each_entry_safe(ck, n, &items, list) {
cond_resched(); cond_resched();
bch2_journal_pin_drop(&c->journal, &ck->journal);
list_del(&ck->list); list_del(&ck->list);
kfree(ck->k); kfree(ck->k);
six_lock_exit(&ck->c.lock); six_lock_exit(&ck->c.lock);
......
...@@ -554,6 +554,19 @@ int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq, ...@@ -554,6 +554,19 @@ int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq,
BTREE_UPDATE_PREJOURNAL); BTREE_UPDATE_PREJOURNAL);
} }
static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans,
enum btree_id btree,
struct bkey_i *k)
{
struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k));
int ret = PTR_ERR_OR_ZERO(n);
if (ret)
return ret;
bkey_copy(n, k);
return bch2_btree_insert_trans(trans, btree, n, 0);
}
int __must_check bch2_trans_update_buffered(struct btree_trans *trans, int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
enum btree_id btree, enum btree_id btree,
struct bkey_i *k) struct bkey_i *k)
...@@ -564,6 +577,9 @@ int __must_check bch2_trans_update_buffered(struct btree_trans *trans, ...@@ -564,6 +577,9 @@ int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size); EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size);
EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
if (unlikely(trans->journal_replay_not_finished))
return bch2_btree_insert_clone_trans(trans, btree, k);
trans_for_each_wb_update(trans, i) { trans_for_each_wb_update(trans, i) {
if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) { if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) {
bkey_copy(&i->k, k); bkey_copy(&i->k, k);
......
...@@ -1056,6 +1056,17 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ...@@ -1056,6 +1056,17 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags &= ~BCH_WATERMARK_MASK; flags &= ~BCH_WATERMARK_MASK;
flags |= watermark; flags |= watermark;
if (!(flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
watermark < c->journal.watermark) {
struct journal_res res = { 0 };
ret = drop_locks_do(trans,
bch2_journal_res_get(&c->journal, &res, 1,
watermark|JOURNAL_RES_GET_CHECK));
if (ret)
return ERR_PTR(ret);
}
while (1) { while (1) {
nr_nodes[!!update_level] += 1 + split; nr_nodes[!!update_level] += 1 + split;
update_level++; update_level++;
......
...@@ -471,7 +471,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, ...@@ -471,7 +471,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
* we aren't using the extent overwrite path to delete, we're * we aren't using the extent overwrite path to delete, we're
* just using the normal key deletion path: * just using the normal key deletion path:
*/ */
if (bkey_deleted(&n->k)) if (bkey_deleted(&n->k) && !(iter->flags & BTREE_ITER_IS_EXTENTS))
n->k.size = 0; n->k.size = 0;
return bch2_trans_relock(trans) ?: return bch2_trans_relock(trans) ?:
...@@ -591,7 +591,7 @@ int bch2_data_update_init(struct btree_trans *trans, ...@@ -591,7 +591,7 @@ int bch2_data_update_init(struct btree_trans *trans,
m->data_opts.rewrite_ptrs = 0; m->data_opts.rewrite_ptrs = 0;
/* if iter == NULL, it's just a promote */ /* if iter == NULL, it's just a promote */
if (iter) if (iter)
ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts); ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
goto done; goto done;
} }
......
...@@ -485,20 +485,15 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, ...@@ -485,20 +485,15 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
return ret; return ret;
} }
int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot)
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
u32 snapshot;
int ret; int ret;
ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
if (ret)
return ret;
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
SPOS(dir.inum, 0, snapshot), SPOS(dir, 0, snapshot),
POS(dir.inum, U64_MAX), 0, k, ret) POS(dir, U64_MAX), 0, k, ret)
if (k.k->type == KEY_TYPE_dirent) { if (k.k->type == KEY_TYPE_dirent) {
ret = -ENOTEMPTY; ret = -ENOTEMPTY;
break; break;
...@@ -508,6 +503,14 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) ...@@ -508,6 +503,14 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
return ret; return ret;
} }
int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
{
u32 snapshot;
return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?:
bch2_empty_dir_snapshot(trans, dir.inum, snapshot);
}
int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
{ {
struct btree_trans *trans = bch2_trans_get(c); struct btree_trans *trans = bch2_trans_get(c);
......
...@@ -64,6 +64,7 @@ u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum, ...@@ -64,6 +64,7 @@ u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum,
const struct bch_hash_info *, const struct bch_hash_info *,
const struct qstr *, subvol_inum *); const struct qstr *, subvol_inum *);
int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32);
int bch2_empty_dir_trans(struct btree_trans *, subvol_inum); int bch2_empty_dir_trans(struct btree_trans *, subvol_inum);
int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *); int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *);
......
...@@ -1294,7 +1294,8 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, ...@@ -1294,7 +1294,8 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
unsigned i = 0; unsigned i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) { if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
p.ptr.unwritten) {
rewrite_ptrs = 0; rewrite_ptrs = 0;
goto incompressible; goto incompressible;
} }
......
...@@ -413,7 +413,7 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, ...@@ -413,7 +413,7 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
!arg.src_ptr) !arg.src_ptr)
snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol; snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol;
inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
dst_dentry, arg.mode|S_IFDIR, dst_dentry, arg.mode|S_IFDIR,
......
...@@ -1733,6 +1733,9 @@ static int bch2_unfreeze(struct super_block *sb) ...@@ -1733,6 +1733,9 @@ static int bch2_unfreeze(struct super_block *sb)
struct bch_fs *c = sb->s_fs_info; struct bch_fs *c = sb->s_fs_info;
int ret; int ret;
if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
return 0;
down_write(&c->state_lock); down_write(&c->state_lock);
ret = bch2_fs_read_write(c); ret = bch2_fs_read_write(c);
up_write(&c->state_lock); up_write(&c->state_lock);
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "btree_update.h" #include "btree_update.h"
#include "buckets.h" #include "buckets.h"
#include "compress.h" #include "compress.h"
#include "dirent.h"
#include "error.h" #include "error.h"
#include "extents.h" #include "extents.h"
#include "extent_update.h" #include "extent_update.h"
...@@ -1093,11 +1094,15 @@ static int may_delete_deleted_inode(struct btree_trans *trans, ...@@ -1093,11 +1094,15 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
if (ret) if (ret)
goto out; goto out;
if (fsck_err_on(S_ISDIR(inode.bi_mode), c, if (S_ISDIR(inode.bi_mode)) {
deleted_inode_is_dir, ret = bch2_empty_dir_snapshot(trans, pos.offset, pos.snapshot);
"directory %llu:%u in deleted_inodes btree", if (fsck_err_on(ret == -ENOTEMPTY, c, deleted_inode_is_dir,
pos.offset, pos.snapshot)) "non empty directory %llu:%u in deleted_inodes btree",
goto delete; pos.offset, pos.snapshot))
goto delete;
if (ret)
goto out;
}
if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c, if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c,
deleted_inode_not_unlinked, deleted_inode_not_unlinked,
......
...@@ -249,7 +249,7 @@ static bool journal_entry_want_write(struct journal *j) ...@@ -249,7 +249,7 @@ static bool journal_entry_want_write(struct journal *j)
return ret; return ret;
} }
static bool journal_entry_close(struct journal *j) bool bch2_journal_entry_close(struct journal *j)
{ {
bool ret; bool ret;
...@@ -383,7 +383,7 @@ static bool journal_quiesced(struct journal *j) ...@@ -383,7 +383,7 @@ static bool journal_quiesced(struct journal *j)
bool ret = atomic64_read(&j->seq) == j->seq_ondisk; bool ret = atomic64_read(&j->seq) == j->seq_ondisk;
if (!ret) if (!ret)
journal_entry_close(j); bch2_journal_entry_close(j);
return ret; return ret;
} }
...@@ -436,7 +436,7 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, ...@@ -436,7 +436,7 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
/* /*
* Recheck after taking the lock, so we don't race with another thread * Recheck after taking the lock, so we don't race with another thread
* that just did journal_entry_open() and call journal_entry_close() * that just did journal_entry_open() and call bch2_journal_entry_close()
* unnecessarily * unnecessarily
*/ */
if (journal_res_get_fast(j, res, flags)) { if (journal_res_get_fast(j, res, flags)) {
...@@ -1041,7 +1041,7 @@ void bch2_fs_journal_stop(struct journal *j) ...@@ -1041,7 +1041,7 @@ void bch2_fs_journal_stop(struct journal *j)
bch2_journal_reclaim_stop(j); bch2_journal_reclaim_stop(j);
bch2_journal_flush_all_pins(j); bch2_journal_flush_all_pins(j);
wait_event(j->wait, journal_entry_close(j)); wait_event(j->wait, bch2_journal_entry_close(j));
/* /*
* Always write a new journal entry, to make sure the clock hands are up * Always write a new journal entry, to make sure the clock hands are up
......
...@@ -266,6 +266,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u ...@@ -266,6 +266,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u
return s; return s;
} }
bool bch2_journal_entry_close(struct journal *);
void bch2_journal_buf_put_final(struct journal *, u64, bool); void bch2_journal_buf_put_final(struct journal *, u64, bool);
static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq) static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
......
...@@ -1599,6 +1599,7 @@ static CLOSURE_CALLBACK(journal_write_done) ...@@ -1599,6 +1599,7 @@ static CLOSURE_CALLBACK(journal_write_done)
} while ((v = atomic64_cmpxchg(&j->reservations.counter, } while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v); old.v, new.v)) != old.v);
bch2_journal_reclaim_fast(j);
bch2_journal_space_available(j); bch2_journal_space_available(j);
closure_wake_up(&w->wait); closure_wake_up(&w->wait);
......
...@@ -776,6 +776,9 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, ...@@ -776,6 +776,9 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
(1U << JOURNAL_PIN_btree), 0, 0, 0)) (1U << JOURNAL_PIN_btree), 0, 0, 0))
*did_work = true; *did_work = true;
if (seq_to_flush > journal_cur_seq(j))
bch2_journal_entry_close(j);
spin_lock(&j->lock); spin_lock(&j->lock);
/* /*
* If journal replay hasn't completed, the unreplayed journal entries * If journal replay hasn't completed, the unreplayed journal entries
......
...@@ -144,7 +144,7 @@ static int bch2_journal_replay(struct bch_fs *c) ...@@ -144,7 +144,7 @@ static int bch2_journal_replay(struct bch_fs *c)
u64 start_seq = c->journal_replay_seq_start; u64 start_seq = c->journal_replay_seq_start;
u64 end_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start;
size_t i; size_t i;
int ret; int ret = 0;
move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
keys->gap = keys->nr; keys->gap = keys->nr;
......
...@@ -121,6 +121,14 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans, ...@@ -121,6 +121,14 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans,
{ {
check_indirect_extent_deleting(new, &flags); check_indirect_extent_deleting(new, &flags);
if (old.k->type == KEY_TYPE_reflink_v &&
new->k.type == KEY_TYPE_reflink_v &&
old.k->u64s == new->k.u64s &&
!memcmp(bkey_s_c_to_reflink_v(old).v->start,
bkey_i_to_reflink_v(new)->v.start,
bkey_val_bytes(&new->k) - 8))
return 0;
return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags);
} }
......
...@@ -276,8 +276,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c ...@@ -276,8 +276,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
if (!btree_type_has_ptrs(id)) if (!btree_type_has_ptrs(id))
continue; continue;
for_each_btree_key(trans, iter, id, POS_MIN, ret = for_each_btree_key2(trans, iter, id, POS_MIN,
BTREE_ITER_ALL_SNAPSHOTS, k, ret) { BTREE_ITER_ALL_SNAPSHOTS, k, ({
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry; const union bch_extent_entry *entry;
struct extent_ptr_decoded p; struct extent_ptr_decoded p;
...@@ -309,8 +309,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c ...@@ -309,8 +309,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
nr_uncompressed_extents++; nr_uncompressed_extents++;
else if (compressed) else if (compressed)
nr_compressed_extents++; nr_compressed_extents++;
} 0;
bch2_trans_iter_exit(trans, &iter); }));
} }
bch2_trans_put(trans); bch2_trans_put(trans);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment