Commit c40b1994 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2024-05-24' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Nothing exciting, just syzbot fixes (except for the one
  FMODE_CAN_ODIRECT patch).

  Looks like syzbot reports have slowed down; this is all catch up from
  two weeks of conferences.

  Next hardening project is using Thomas's error injection tooling to
  torture test repair"

* tag 'bcachefs-2024-05-24' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Fix race path in bch2_inode_insert()
  bcachefs: Ensure we're RW before journalling
  bcachefs: Fix shutdown ordering
  bcachefs: Fix unsafety in bch2_dirent_name_bytes()
  bcachefs: Fix stack oob in __bch2_encrypt_bio()
  bcachefs: Fix btree_trans leak in bch2_readahead()
  bcachefs: Fix bogus verify_replicas_entry() assert
  bcachefs: Check for subvolues with bogus snapshot/inode fields
  bcachefs: bch2_checksum() returns 0 for unknown checksum type
  bcachefs: Fix bch2_alloc_ciphers()
  bcachefs: Add missing guard in bch2_snapshot_has_children()
  bcachefs: Fix missing parens in drop_locks_do()
  bcachefs: Improve bch2_assert_pos_locked()
  bcachefs: Fix shift overflows in replicas.c
  bcachefs: Fix shift overflow in btree_lost_data()
  bcachefs: Fix ref in trans_mark_dev_sbs() error path
  bcachefs: set FMODE_CAN_ODIRECT instead of a dummy direct_IO method
  bcachefs: Fix rcu splat in check_fix_ptrs()
parents 9ea370f3 d93ff5fa
......@@ -1555,6 +1555,12 @@ enum btree_id {
BTREE_ID_NR
};
/*
* Maximum number of btrees that we will _ever_ have under the current scheme,
* where we refer to them with bitfields
*/
#define BTREE_ID_NR_MAX 64
static inline bool btree_id_is_alloc(enum btree_id id)
{
switch (id) {
......
......@@ -332,6 +332,8 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k
void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
struct bpos pos, bool key_cache)
{
bch2_trans_verify_not_unlocked(trans);
struct btree_path *path;
struct trans_for_each_path_inorder_iter iter;
struct printbuf buf = PRINTBUF;
......
......@@ -838,7 +838,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
#define drop_locks_do(_trans, _do) \
({ \
bch2_trans_unlock(_trans); \
_do ?: bch2_trans_relock(_trans); \
(_do) ?: bch2_trans_relock(_trans); \
})
#define allocate_dropping_locks_errcode(_trans, _do) \
......
......@@ -479,9 +479,8 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
percpu_down_read(&c->mark_lock);
rcu_read_lock();
bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
if (!ca) {
if (fsck_err(c, ptr_to_invalid_device,
"pointer to missing device %u\n"
......@@ -558,7 +557,7 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
do_update = true;
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
continue;
goto next;
if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type),
c, ptr_bucket_data_type_mismatch,
......@@ -601,8 +600,9 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
do_update = true;
}
next:
bch2_dev_put(ca);
}
rcu_read_unlock();
if (do_update) {
if (flags & BTREE_TRIGGER_is_root) {
......@@ -638,9 +638,10 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
} else {
struct bkey_ptrs ptrs;
union bch_extent_entry *entry;
rcu_read_lock();
restart_drop_ptrs:
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
rcu_read_lock();
bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) {
struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
......@@ -1464,7 +1465,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c,
for_each_online_member(c, ca) {
int ret = bch2_trans_mark_dev_sb(c, ca, flags);
if (ret) {
bch2_dev_put(ca);
percpu_ref_put(&ca->io_ref);
return ret;
}
}
......
......@@ -233,7 +233,7 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
return ret;
}
default:
BUG();
return (struct bch_csum) {};
}
}
......@@ -307,7 +307,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
return ret;
}
default:
BUG();
return (struct bch_csum) {};
}
}
......@@ -352,8 +352,12 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
bytes += bv.bv_len;
}
sg_mark_end(sg - 1);
return do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
if (sg != sgl) {
sg_mark_end(sg - 1);
return do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
}
return ret;
}
struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
......@@ -648,26 +652,26 @@ int bch2_decrypt_sb_key(struct bch_fs *c,
static int bch2_alloc_ciphers(struct bch_fs *c)
{
int ret;
if (!c->chacha20)
c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
ret = PTR_ERR_OR_ZERO(c->chacha20);
if (c->chacha20)
return 0;
struct crypto_sync_skcipher *chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
int ret = PTR_ERR_OR_ZERO(chacha20);
if (ret) {
bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret));
return ret;
}
if (!c->poly1305)
c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
ret = PTR_ERR_OR_ZERO(c->poly1305);
struct crypto_shash *poly1305 = crypto_alloc_shash("poly1305", 0, 0);
ret = PTR_ERR_OR_ZERO(poly1305);
if (ret) {
bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret));
crypto_free_sync_skcipher(chacha20);
return ret;
}
c->chacha20 = chacha20;
c->poly1305 = poly1305;
return 0;
}
......@@ -762,11 +766,11 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
void bch2_fs_encryption_exit(struct bch_fs *c)
{
if (!IS_ERR_OR_NULL(c->poly1305))
if (c->poly1305)
crypto_free_shash(c->poly1305);
if (!IS_ERR_OR_NULL(c->chacha20))
if (c->chacha20)
crypto_free_sync_skcipher(c->chacha20);
if (!IS_ERR_OR_NULL(c->sha256))
if (c->sha256)
crypto_free_shash(c->sha256);
}
......@@ -779,6 +783,7 @@ int bch2_fs_encryption_init(struct bch_fs *c)
c->sha256 = crypto_alloc_shash("sha256", 0, 0);
ret = PTR_ERR_OR_ZERO(c->sha256);
if (ret) {
c->sha256 = NULL;
bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret));
goto out;
}
......
......@@ -15,6 +15,9 @@
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
{
if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
return 0;
unsigned bkey_u64s = bkey_val_u64s(d.k);
unsigned bkey_bytes = bkey_u64s * sizeof(u64);
u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1];
......
......@@ -257,7 +257,6 @@ void bch2_readahead(struct readahead_control *ractl)
struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_io_opts opts;
struct btree_trans *trans = bch2_trans_get(c);
struct folio *folio;
struct readpages_iter readpages_iter;
......@@ -269,6 +268,7 @@ void bch2_readahead(struct readahead_control *ractl)
bch2_pagecache_add_get(inode);
struct btree_trans *trans = bch2_trans_get(c);
while ((folio = readpage_iter_peek(&readpages_iter))) {
unsigned n = min_t(unsigned,
readpages_iter.folios.nr -
......@@ -289,10 +289,10 @@ void bch2_readahead(struct readahead_control *ractl)
&readpages_iter);
bch2_trans_unlock(trans);
}
bch2_trans_put(trans);
bch2_pagecache_add_put(inode);
bch2_trans_put(trans);
darray_exit(&readpages_iter.folios);
}
......
......@@ -188,8 +188,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
BUG_ON(!old);
if (unlikely(old != inode)) {
__destroy_inode(&inode->v);
kmem_cache_free(bch2_inode_cache, inode);
discard_new_inode(&inode->v);
inode = old;
} else {
mutex_lock(&c->vfs_inodes_lock);
......@@ -1145,6 +1144,8 @@ static int bch2_open(struct inode *vinode, struct file *file)
return ret;
}
file->f_mode |= FMODE_CAN_ODIRECT;
return generic_file_open(vinode, file);
}
......@@ -1237,7 +1238,6 @@ static const struct address_space_operations bch_address_space_operations = {
.write_end = bch2_write_end,
.invalidate_folio = bch2_invalidate_folio,
.release_folio = bch2_release_folio,
.direct_IO = noop_direct_IO,
#ifdef CONFIG_MIGRATION
.migrate_folio = filemap_migrate_folio,
#endif
......
......@@ -45,6 +45,13 @@ int bch2_printbuf_make_room(struct printbuf *out, unsigned extra)
unsigned new_size = roundup_pow_of_two(out->size + extra);
/* Sanity check... */
if (new_size > PAGE_SIZE << MAX_PAGE_ORDER) {
out->allocation_failure = true;
out->overflow = true;
return -ENOMEM;
}
/*
* Note: output buffer must be freeable with kfree(), it's not required
* that the user use printbuf_exit().
......
......@@ -35,6 +35,9 @@
void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
{
if (btree >= BTREE_ID_NR_MAX)
return;
u64 b = BIT_ULL(btree);
if (!(c->sb.btrees_lost_data & b)) {
......@@ -808,9 +811,11 @@ int bch2_fs_recovery(struct bch_fs *c)
clear_bit(BCH_FS_fsck_running, &c->flags);
/* fsync if we fixed errors */
if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
if (test_bit(BCH_FS_errors_fixed, &c->flags) &&
bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) {
bch2_journal_flush_all_pins(&c->journal);
bch2_journal_meta(&c->journal);
bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
}
/* If we fixed errors, verify that fs is actually clean now: */
......
......@@ -23,14 +23,12 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv)
static void verify_replicas_entry(struct bch_replicas_entry_v1 *e)
{
#ifdef CONFIG_BCACHEFS_DEBUG
unsigned i;
BUG_ON(e->data_type >= BCH_DATA_NR);
BUG_ON(!e->nr_devs);
BUG_ON(e->nr_required > 1 &&
e->nr_required >= e->nr_devs);
for (i = 0; i + 1 < e->nr_devs; i++)
for (unsigned i = 0; i + 1 < e->nr_devs; i++)
BUG_ON(e->devs[i] >= e->devs[i + 1]);
#endif
}
......@@ -192,24 +190,17 @@ cpu_replicas_add_entry(struct bch_fs *c,
struct bch_replicas_cpu *old,
struct bch_replicas_entry_v1 *new_entry)
{
unsigned i;
struct bch_replicas_cpu new = {
.nr = old->nr + 1,
.entry_size = max_t(unsigned, old->entry_size,
replicas_entry_bytes(new_entry)),
};
for (i = 0; i < new_entry->nr_devs; i++)
BUG_ON(!bch2_dev_exists(c, new_entry->devs[i]));
BUG_ON(!new_entry->data_type);
verify_replicas_entry(new_entry);
new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL);
if (!new.entries)
return new;
for (i = 0; i < old->nr; i++)
for (unsigned i = 0; i < old->nr; i++)
memcpy(cpu_replicas_entry(&new, i),
cpu_replicas_entry(old, i),
old->entry_size);
......@@ -230,8 +221,6 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
if (unlikely(entry_size > r->entry_size))
return -1;
verify_replicas_entry(search);
#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size)
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
entry_cmp, search);
......@@ -524,13 +513,16 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
c->replicas_gc.nr = 0;
c->replicas_gc.entry_size = 0;
for_each_cpu_replicas_entry(&c->replicas, e)
if (!((1 << e->data_type) & typemask)) {
for_each_cpu_replicas_entry(&c->replicas, e) {
/* Preserve unknown data types */
if (e->data_type >= BCH_DATA_NR ||
!((1 << e->data_type) & typemask)) {
c->replicas_gc.nr++;
c->replicas_gc.entry_size =
max_t(unsigned, c->replicas_gc.entry_size,
replicas_entry_bytes(e));
}
}
c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
c->replicas_gc.entry_size,
......@@ -542,7 +534,8 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
}
for_each_cpu_replicas_entry(&c->replicas, e)
if (!((1 << e->data_type) & typemask))
if (e->data_type >= BCH_DATA_NR ||
!((1 << e->data_type) & typemask))
memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
e, c->replicas_gc.entry_size);
......@@ -998,7 +991,7 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
{
struct bch_sb_field_replicas *replicas;
struct bch_sb_field_replicas_v0 *replicas_v0;
unsigned i, data_has = 0;
unsigned data_has = 0;
replicas = bch2_sb_field_get(sb, replicas);
replicas_v0 = bch2_sb_field_get(sb, replicas_v0);
......@@ -1006,17 +999,26 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
if (replicas) {
struct bch_replicas_entry_v1 *r;
for_each_replicas_entry(replicas, r)
for (i = 0; i < r->nr_devs; i++)
for_each_replicas_entry(replicas, r) {
if (r->data_type >= sizeof(data_has) * 8)
continue;
for (unsigned i = 0; i < r->nr_devs; i++)
if (r->devs[i] == dev)
data_has |= 1 << r->data_type;
}
} else if (replicas_v0) {
struct bch_replicas_entry_v0 *r;
for_each_replicas_entry_v0(replicas_v0, r)
for (i = 0; i < r->nr_devs; i++)
for_each_replicas_entry_v0(replicas_v0, r) {
if (r->data_type >= sizeof(data_has) * 8)
continue;
for (unsigned i = 0; i < r->nr_devs; i++)
if (r->devs[i] == dev)
data_has |= 1 << r->data_type;
}
}
......
......@@ -273,7 +273,9 @@
x(dup_backpointer_to_bad_csum_extent, 265) \
x(btree_bitmap_not_marked, 266) \
x(sb_clean_entry_overrun, 267) \
x(btree_ptr_v2_written_0, 268)
x(btree_ptr_v2_written_0, 268) \
x(subvol_snapshot_bad, 269) \
x(subvol_inode_bad, 270)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
......
......@@ -176,12 +176,9 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
{
const struct snapshot_t *t;
bool ret;
rcu_read_lock();
t = snapshot_t(c, id);
ret = (t->children[0]|t->children[1]) != 0;
const struct snapshot_t *t = snapshot_t(c, id);
bool ret = t && (t->children[0]|t->children[1]) != 0;
rcu_read_unlock();
return ret;
......
......@@ -210,12 +210,21 @@ int bch2_check_subvol_children(struct bch_fs *c)
int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bch_validate_flags flags, struct printbuf *err)
{
struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k);
int ret = 0;
bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err,
subvol_pos_bad,
"invalid pos");
bkey_fsck_err_on(!subvol.v->snapshot, c, err,
subvol_snapshot_bad,
"invalid snapshot");
bkey_fsck_err_on(!subvol.v->inode, c, err,
subvol_inode_bad,
"invalid inode");
fsck_err:
return ret;
}
......
......@@ -551,9 +551,9 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_fs_io_read_exit(c);
bch2_fs_buckets_waiting_for_journal_exit(c);
bch2_fs_btree_interior_update_exit(c);
bch2_fs_btree_iter_exit(c);
bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
bch2_fs_btree_cache_exit(c);
bch2_fs_btree_iter_exit(c);
bch2_fs_replicas_exit(c);
bch2_fs_journal_exit(&c->journal);
bch2_io_clock_exit(&c->io_clock[WRITE]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment