Commit eb331fe5 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Check for stale dirty pointer before reads

Since we retry reads when we discover we read from a pointer that went
stale, if a dirty pointer is erroniously stale it would cause us to loop
retrying that read forever - unless we check before issuing the read,
while the btree is still locked, when we know that a dirty pointer
should never be stale.

This patch adds that check, along with printing some helpful debug info.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent fcf01959
......@@ -1043,8 +1043,6 @@ static void bchfs_read(struct btree_trans *trans,
sectors = min(sectors, k.k->size - offset_into_extent);
bch2_trans_unlock(trans);
if (readpages_iter)
readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
extent_partial_reads_expensive(k));
......
......@@ -2032,6 +2032,33 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
return ret;
}
static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
struct bkey_s_c k,
struct bch_extent_ptr ptr)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
struct btree_iter iter;
char buf[200];
int ret;
bch2_bkey_val_to_text(&PBUF(buf), c, k);
bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf);
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)),
BTREE_ITER_CACHED);
ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
if (ret)
return;
bch2_bkey_val_to_text(&PBUF(buf), c, k);
bch_err(c, "%s", buf);
bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
bch2_trans_iter_exit(trans, &iter);
}
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bvec_iter iter, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
......@@ -2041,7 +2068,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
struct bch_dev *ca;
struct bch_dev *ca = NULL;
struct promote_op *promote = NULL;
bool bounce = false, read_full = false, narrow_crcs = false;
struct bpos data_pos = bkey_start_pos(k.k);
......@@ -2058,7 +2085,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
zero_fill_bio_iter(&orig->bio, iter);
goto out_read_done;
}
retry_pick:
pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
......@@ -2071,8 +2098,27 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
goto err;
}
if (pick_ret > 0)
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
/*
* Stale dirty pointers are treated as IO errors, but @failed isn't
* allocated unless we're in the retry path - so if we're not in the
* retry path, don't check here, it'll be caught in bch2_read_endio()
* and we'll end up in the retry path:
*/
if ((flags & BCH_READ_IN_RETRY) &&
!pick.ptr.cached &&
unlikely(ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, k, pick.ptr);
bch2_mark_io_failure(failed, &pick);
goto retry_pick;
}
/*
* Unlock the iterator while the btree node's lock is still in
* cache, before doing the IO:
*/
bch2_trans_unlock(trans);
if (flags & BCH_READ_NODECODE) {
/*
......@@ -2367,12 +2413,6 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
*/
sectors = min(sectors, k.k->size - offset_into_extent);
/*
* Unlock the iterator while the btree node's lock is still in
* cache, before doing the IO:
*/
bch2_trans_unlock(&trans);
bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
swap(bvec_iter.bi_size, bytes);
......
......@@ -752,10 +752,12 @@ static int __bch2_move_data(struct bch_fs *c,
BUG();
}
/* unlock before doing IO: */
/*
* The iterator gets unlocked by __bch2_read_extent - need to
* save a copy of @k elsewhere:
*/
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
data_cmd, data_opts);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment