Commit e1effd42 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: More improvements for alloc info checks

 - Move checks for whether the device & bucket are valid from the
   .key_invalid method to bch2_check_alloc_key(). This is because
   .key_invalid() is called on keys that may no longer exist (post
   journal replay), which is a problem when removing/resizing devices.

 - We weren't checking the need_discard btree to ensure that every set
   bucket has a corresponding alloc key. This refactors the code for
   checking the freespace btree, so that it now checks both.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent afb6f7f6
...@@ -306,11 +306,6 @@ int bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k, struct prin ...@@ -306,11 +306,6 @@ int bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k, struct prin
{ {
struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
if (!bch2_dev_exists2(c, k.k->p.inode)) {
pr_buf(err, "invalid device (%llu)", k.k->p.inode);
return -EINVAL;
}
/* allow for unknown fields */ /* allow for unknown fields */
if (bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v)) { if (bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v)) {
pr_buf(err, "incorrect value size (%zu < %u)", pr_buf(err, "incorrect value size (%zu < %u)",
...@@ -325,11 +320,6 @@ int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, struct prin ...@@ -325,11 +320,6 @@ int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, struct prin
{ {
struct bkey_alloc_unpacked u; struct bkey_alloc_unpacked u;
if (!bch2_dev_exists2(c, k.k->p.inode)) {
pr_buf(err, "invalid device (%llu)", k.k->p.inode);
return -EINVAL;
}
if (bch2_alloc_unpack_v2(&u, k)) { if (bch2_alloc_unpack_v2(&u, k)) {
pr_buf(err, "unpack error"); pr_buf(err, "unpack error");
return -EINVAL; return -EINVAL;
...@@ -341,20 +331,6 @@ int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, struct prin ...@@ -341,20 +331,6 @@ int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, struct prin
int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, struct printbuf *err)
{ {
struct bkey_alloc_unpacked u; struct bkey_alloc_unpacked u;
struct bch_dev *ca;
if (!bch2_dev_exists2(c, k.k->p.inode)) {
pr_buf(err, "invalid device (%llu)", k.k->p.inode);
return -EINVAL;
}
ca = bch_dev_bkey_exists(c, k.k->p.inode);
if (k.k->p.offset < ca->mi.first_bucket ||
k.k->p.offset >= ca->mi.nbuckets) {
pr_buf(err, "invalid bucket");
return -EINVAL;
}
if (bch2_alloc_unpack_v3(&u, k)) { if (bch2_alloc_unpack_v3(&u, k)) {
pr_buf(err, "unpack error"); pr_buf(err, "unpack error");
...@@ -366,18 +342,9 @@ int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, struct prin ...@@ -366,18 +342,9 @@ int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, struct prin
int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, struct printbuf *err)
{ {
struct bch_dev *ca; if (bkey_val_bytes(k.k) != sizeof(struct bch_alloc_v4)) {
pr_buf(err, "bad val size (%zu != %zu)",
if (!bch2_dev_exists2(c, k.k->p.inode)) { bkey_val_bytes(k.k), sizeof(struct bch_alloc_v4));
pr_buf(err, "invalid device (%llu)", k.k->p.inode);
return -EINVAL;
}
ca = bch_dev_bkey_exists(c, k.k->p.inode);
if (k.k->p.offset < ca->mi.first_bucket ||
k.k->p.offset >= ca->mi.nbuckets) {
pr_buf(err, "invalid bucket");
return -EINVAL; return -EINVAL;
} }
...@@ -577,6 +544,7 @@ static int bch2_check_alloc_key(struct btree_trans *trans, ...@@ -577,6 +544,7 @@ static int bch2_check_alloc_key(struct btree_trans *trans,
struct btree_iter *alloc_iter) struct btree_iter *alloc_iter)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_dev *ca;
struct btree_iter discard_iter, freespace_iter; struct btree_iter discard_iter, freespace_iter;
struct bch_alloc_v4 a; struct bch_alloc_v4 a;
unsigned discard_key_type, freespace_key_type; unsigned discard_key_type, freespace_key_type;
...@@ -593,7 +561,16 @@ static int bch2_check_alloc_key(struct btree_trans *trans, ...@@ -593,7 +561,16 @@ static int bch2_check_alloc_key(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c,
"alloc key for invalid device or bucket"))
return bch2_btree_delete_at(trans, alloc_iter, 0);
ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode);
if (!ca->mi.freespace_initialized)
return 0;
bch2_alloc_to_v4(alloc_k, &a); bch2_alloc_to_v4(alloc_k, &a);
discard_key_type = bucket_state(a) == BUCKET_need_discard discard_key_type = bucket_state(a) == BUCKET_need_discard
? KEY_TYPE_set : 0; ? KEY_TYPE_set : 0;
freespace_key_type = bucket_state(a) == BUCKET_free freespace_key_type = bucket_state(a) == BUCKET_free
...@@ -668,21 +645,8 @@ static int bch2_check_alloc_key(struct btree_trans *trans, ...@@ -668,21 +645,8 @@ static int bch2_check_alloc_key(struct btree_trans *trans,
return ret; return ret;
} }
static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos) static int bch2_check_discard_freespace_key(struct btree_trans *trans,
{ struct btree_iter *iter)
struct bch_dev *ca;
if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode])
return false;
ca = bch_dev_bkey_exists(c, pos.inode);
return pos.offset >= ca->mi.first_bucket &&
pos.offset < ca->mi.nbuckets;
}
static int bch2_check_freespace_key(struct btree_trans *trans,
struct btree_iter *freespace_iter,
bool initial)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter alloc_iter; struct btree_iter alloc_iter;
...@@ -691,10 +655,13 @@ static int bch2_check_freespace_key(struct btree_trans *trans, ...@@ -691,10 +655,13 @@ static int bch2_check_freespace_key(struct btree_trans *trans,
u64 genbits; u64 genbits;
struct bpos pos; struct bpos pos;
struct bkey_i *update; struct bkey_i *update;
enum bucket_state state = iter->btree_id == BTREE_ID_need_discard
? BUCKET_need_discard
: BUCKET_free;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret; int ret;
freespace_k = bch2_btree_iter_peek(freespace_iter); freespace_k = bch2_btree_iter_peek(iter);
if (!freespace_k.k) if (!freespace_k.k)
return 1; return 1;
...@@ -702,15 +669,16 @@ static int bch2_check_freespace_key(struct btree_trans *trans, ...@@ -702,15 +669,16 @@ static int bch2_check_freespace_key(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
pos = freespace_iter->pos; pos = iter->pos;
pos.offset &= ~(~0ULL << 56); pos.offset &= ~(~0ULL << 56);
genbits = freespace_iter->pos.offset & (~0ULL << 56); genbits = iter->pos.offset & (~0ULL << 56);
bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, pos, 0); bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, pos, 0);
if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c, if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
"%llu:%llu set in freespace btree but device or bucket does not exist", "%llu:%llu set in %s btree but device or bucket does not exist",
pos.inode, pos.offset)) pos.inode, pos.offset,
bch2_btree_ids[iter->btree_id]))
goto delete; goto delete;
k = bch2_btree_iter_peek_slot(&alloc_iter); k = bch2_btree_iter_peek_slot(&alloc_iter);
...@@ -720,11 +688,13 @@ static int bch2_check_freespace_key(struct btree_trans *trans, ...@@ -720,11 +688,13 @@ static int bch2_check_freespace_key(struct btree_trans *trans,
bch2_alloc_to_v4(k, &a); bch2_alloc_to_v4(k, &a);
if (fsck_err_on(bucket_state(a) != BUCKET_free || if (fsck_err_on(bucket_state(a) != state ||
genbits != alloc_freespace_genbits(a), c, (state == BUCKET_free &&
"%s\n incorrectly set in freespace index (free %u, genbits %llu should be %llu)", genbits != alloc_freespace_genbits(a)), c,
"%s\n incorrectly set in %s index (free %u, genbits %llu should be %llu)",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf), (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
bucket_state(a) == BUCKET_free, bch2_btree_ids[iter->btree_id],
bucket_state(a) == state,
genbits >> 56, alloc_freespace_genbits(a) >> 56)) genbits >> 56, alloc_freespace_genbits(a) >> 56))
goto delete; goto delete;
out: out:
...@@ -734,46 +704,54 @@ static int bch2_check_freespace_key(struct btree_trans *trans, ...@@ -734,46 +704,54 @@ static int bch2_check_freespace_key(struct btree_trans *trans,
printbuf_exit(&buf); printbuf_exit(&buf);
return ret; return ret;
delete: delete:
update = bch2_trans_kmalloc(trans, sizeof(*update)); if (iter->btree_id == BTREE_ID_freespace) {
ret = PTR_ERR_OR_ZERO(update); /* should probably add a helper for deleting extents */
if (ret) update = bch2_trans_kmalloc(trans, sizeof(*update));
goto err; ret = PTR_ERR_OR_ZERO(update);
if (ret)
goto err;
bkey_init(&update->k); bkey_init(&update->k);
update->k.p = freespace_iter->pos; update->k.p = iter->pos;
bch2_key_resize(&update->k, 1); bch2_key_resize(&update->k, 1);
ret = bch2_trans_update(trans, freespace_iter, update, 0) ?: ret = bch2_trans_update(trans, iter, update, 0);
bch2_trans_commit(trans, NULL, NULL, 0); } else {
ret = bch2_btree_delete_at(trans, iter, 0);
}
goto out; goto out;
} }
int bch2_check_alloc_info(struct bch_fs *c, bool initial) int bch2_check_alloc_info(struct bch_fs *c)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret = 0, last_dev = -1; int ret = 0;
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH, k, ret) {
if (k.k->p.inode != last_dev) { ret = __bch2_trans_do(&trans, NULL, NULL, 0,
struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode); bch2_check_alloc_key(&trans, &iter));
if (ret)
if (!ca->mi.freespace_initialized) { break;
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); }
continue; bch2_trans_iter_exit(&trans, &iter);
}
last_dev = k.k->p.inode; if (ret)
} goto err;
bch2_trans_iter_init(&trans, &iter, BTREE_ID_need_discard, POS_MIN,
BTREE_ITER_PREFETCH);
while (1) {
ret = __bch2_trans_do(&trans, NULL, NULL, 0, ret = __bch2_trans_do(&trans, NULL, NULL, 0,
bch2_check_alloc_key(&trans, &iter)); bch2_check_discard_freespace_key(&trans, &iter));
if (ret) if (ret)
break; break;
bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos));
} }
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
...@@ -784,7 +762,7 @@ int bch2_check_alloc_info(struct bch_fs *c, bool initial) ...@@ -784,7 +762,7 @@ int bch2_check_alloc_info(struct bch_fs *c, bool initial)
BTREE_ITER_PREFETCH); BTREE_ITER_PREFETCH);
while (1) { while (1) {
ret = __bch2_trans_do(&trans, NULL, NULL, 0, ret = __bch2_trans_do(&trans, NULL, NULL, 0,
bch2_check_freespace_key(&trans, &iter, initial)); bch2_check_discard_freespace_key(&trans, &iter));
if (ret) if (ret)
break; break;
......
...@@ -11,6 +11,18 @@ ...@@ -11,6 +11,18 @@
/* How out of date a pointer gen is allowed to be: */ /* How out of date a pointer gen is allowed to be: */
#define BUCKET_GC_GEN_MAX 96U #define BUCKET_GC_GEN_MAX 96U
static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
{
struct bch_dev *ca;
if (!bch2_dev_exists2(c, pos.inode))
return false;
ca = bch_dev_bkey_exists(c, pos.inode);
return pos.offset >= ca->mi.first_bucket &&
pos.offset < ca->mi.nbuckets;
}
static inline u8 alloc_gc_gen(struct bch_alloc_v4 a) static inline u8 alloc_gc_gen(struct bch_alloc_v4 a)
{ {
return a.gen - a.oldest_gen; return a.gen - a.oldest_gen;
...@@ -113,7 +125,7 @@ int bch2_alloc_read(struct bch_fs *); ...@@ -113,7 +125,7 @@ int bch2_alloc_read(struct bch_fs *);
int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c, int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c,
struct bkey_i *, unsigned); struct bkey_i *, unsigned);
int bch2_check_alloc_info(struct bch_fs *, bool); int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_do_discards(struct bch_fs *); void bch2_do_discards(struct bch_fs *);
......
...@@ -511,14 +511,9 @@ int bch2_mark_alloc(struct btree_trans *trans, ...@@ -511,14 +511,9 @@ int bch2_mark_alloc(struct btree_trans *trans,
u64 journal_seq = trans->journal_res.seq; u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_alloc_v4 old_a, new_a; struct bch_alloc_v4 old_a, new_a;
struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode); struct bch_dev *ca;
int ret = 0; int ret = 0;
if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket ||
new.k->p.offset >= ca->mi.nbuckets, trans,
"alloc key outside range of device's buckets"))
return -EIO;
/* /*
* alloc btree is read in by bch2_alloc_read, not gc: * alloc btree is read in by bch2_alloc_read, not gc:
*/ */
...@@ -526,6 +521,12 @@ int bch2_mark_alloc(struct btree_trans *trans, ...@@ -526,6 +521,12 @@ int bch2_mark_alloc(struct btree_trans *trans,
!(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
return 0; return 0;
if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
"alloc key for invalid device or bucket"))
return -EIO;
ca = bch_dev_bkey_exists(c, new.k->p.inode);
bch2_alloc_to_v4(old, &old_a); bch2_alloc_to_v4(old, &old_a);
bch2_alloc_to_v4(new, &new_a); bch2_alloc_to_v4(new, &new_a);
......
...@@ -1237,7 +1237,7 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -1237,7 +1237,7 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.fsck) { if (c->opts.fsck) {
bch_info(c, "checking need_discard and freespace btrees"); bch_info(c, "checking need_discard and freespace btrees");
err = "error checking need_discard and freespace btrees"; err = "error checking need_discard and freespace btrees";
ret = bch2_check_alloc_info(c, true); ret = bch2_check_alloc_info(c);
if (ret) if (ret)
goto err; goto err;
......
...@@ -1474,15 +1474,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ...@@ -1474,15 +1474,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
goto err; goto err;
} }
/*
* must flush all existing journal entries, they might have
* (overwritten) keys that point to the device we're removing:
*/
bch2_journal_flush_all_pins(&c->journal);
/*
* hack to ensure bch2_replicas_gc2() clears out entries to this device
*/
bch2_journal_meta(&c->journal);
ret = bch2_journal_error(&c->journal); ret = bch2_journal_error(&c->journal);
if (ret) { if (ret) {
bch_err(ca, "Remove failed, journal error"); bch_err(ca, "Remove failed, journal error");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment