Commit 79203111 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Unwritten extents support

 - bch2_extent_merge checks unwritten bit
 - read path returns 0s for unwritten extents without actually reading
 - reflink path skips over unwritten extents
 - bch2_bkey_ptrs_invalid() checks for extents with both written and
   unwritten extents, and non-normal extents (stripes, btree ptrs) with
   unwritten ptrs
 - fiemap checks for unwritten extents and returns
   FIEMAP_EXTENT_UNWRITTEN
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 2f1f7fe9
......@@ -582,7 +582,7 @@ struct bch_extent_ptr {
__u64 type:1,
cached:1,
unused:1,
reservation:1,
unwritten:1,
offset:44, /* 8 petabytes */
dev:8,
gen:8;
......@@ -590,7 +590,7 @@ struct bch_extent_ptr {
__u64 gen:8,
dev:8,
offset:44,
reservation:1,
unwritten:1,
unused:1,
cached:1,
type:1;
......
......@@ -116,6 +116,13 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
return -EIO;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
/*
* Unwritten extent: no need to actually read, treat it as a
* hole and return 0s:
*/
if (p.ptr.unwritten)
return 0;
ca = bch_dev_bkey_exists(c, p.ptr.dev);
/*
......@@ -269,6 +276,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
rp.ptr.offset + rp.crc.offset ||
lp.ptr.dev != rp.ptr.dev ||
lp.ptr.gen != rp.ptr.gen ||
lp.ptr.unwritten != rp.ptr.unwritten ||
lp.has_ec != rp.has_ec)
return false;
......@@ -904,6 +912,9 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
const union bch_extent_entry *entry1, *entry2;
struct extent_ptr_decoded p1, p2;
if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2))
return false;
bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
if (p1.ptr.dev == p2.ptr.dev &&
......@@ -981,10 +992,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
prt_printf(out, "ptr: %u:%llu:%u gen %u%s", ptr->dev,
b, offset, ptr->gen,
ptr->cached ? " cached" : "");
prt_printf(out, "ptr: %u:%llu:%u gen %u",
ptr->dev, b, offset, ptr->gen);
if (ptr->cached)
prt_str(out, " cached");
if (ptr->unwritten)
prt_str(out, " unwritten");
if (ca && ptr_stale(ca, ptr))
prt_printf(out, " stale");
}
......@@ -1073,6 +1086,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned size_ondisk = k.k->size;
unsigned nonce = UINT_MAX;
unsigned nr_ptrs = 0;
bool unwritten = false;
int ret;
if (bkey_is_btree_ptr(k.k))
......@@ -1097,6 +1111,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
false, err);
if (ret)
return ret;
if (nr_ptrs && unwritten != entry->ptr.unwritten) {
prt_printf(err, "extent with unwritten and written ptrs");
return -BCH_ERR_invalid_bkey;
}
if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) {
prt_printf(err, "has unwritten ptrs");
return -BCH_ERR_invalid_bkey;
}
unwritten = entry->ptr.unwritten;
nr_ptrs++;
break;
case BCH_EXTENT_ENTRY_crc32:
......
......@@ -510,6 +510,23 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k)
}
}
static inline bool bkey_extent_is_unwritten(struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr)
if (ptr->unwritten)
return true;
return false;
}
static inline bool bkey_extent_is_reservation(struct bkey_s_c k)
{
return k.k->type == KEY_TYPE_reservation ||
bkey_extent_is_unwritten(k);
}
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
{
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
......
......@@ -341,11 +341,11 @@ static struct bch_page_state *bch2_page_state_create(struct page *page,
return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
}
static unsigned bkey_to_sector_state(const struct bkey *k)
static unsigned bkey_to_sector_state(struct bkey_s_c k)
{
if (k->type == KEY_TYPE_reservation)
if (bkey_extent_is_reservation(k))
return SECTOR_RESERVED;
if (bkey_extent_is_allocation(k))
if (bkey_extent_is_allocation(k.k))
return SECTOR_ALLOCATED;
return SECTOR_UNALLOCATED;
}
......@@ -396,7 +396,7 @@ static int bch2_page_state_set(struct bch_fs *c, subvol_inum inum,
SPOS(inum.inum, offset, snapshot),
BTREE_ITER_SLOTS, k, ret) {
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
unsigned state = bkey_to_sector_state(k.k);
unsigned state = bkey_to_sector_state(k);
while (pg_idx < nr_pages) {
struct page *page = pages[pg_idx];
......@@ -436,7 +436,7 @@ static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
struct bio_vec bv;
unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
unsigned state = bkey_to_sector_state(k.k);
unsigned state = bkey_to_sector_state(k);
bio_for_each_segment(bv, bio, iter)
__bch2_page_state_set(bv.bv_page, bv.bv_offset >> 9,
......@@ -3093,8 +3093,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
goto bkey_err;
/* already reserved */
if (k.k->type == KEY_TYPE_reservation &&
bkey_s_c_to_reservation(k).v->nr_replicas >= opts.data_replicas) {
if (bkey_extent_is_reservation(k) &&
bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
bch2_btree_iter_advance(&iter);
continue;
}
......
......@@ -811,6 +811,9 @@ static int bch2_fill_extent(struct bch_fs *c,
int flags2 = 0;
u64 offset = p.ptr.offset;
if (p.ptr.unwritten)
flags2 |= FIEMAP_EXTENT_UNWRITTEN;
if (p.crc.compression_type)
flags2 |= FIEMAP_EXTENT_ENCODED;
else
......
......@@ -1251,8 +1251,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
continue;
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
k.k->type != KEY_TYPE_reservation &&
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
!bkey_extent_is_reservation(k), c,
"extent type past end of inode %llu:%u, i_size %llu\n %s",
i->inode.bi_inum, i->snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
......
......@@ -1481,6 +1481,9 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
if (bch2_bkey_has_target(c, k, opts.promote_target))
return false;
if (bkey_extent_is_unwritten(k))
return false;
if (bch2_target_congested(c, opts.promote_target)) {
/* XXX trace this */
return false;
......
......@@ -251,9 +251,13 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
struct bkey_s_c k;
int ret;
for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret)
for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) {
if (bkey_extent_is_unwritten(k))
continue;
if (bkey_extent_is_data(k.k))
return k;
}
if (bkey_ge(iter->pos, end))
bch2_btree_iter_set_pos(iter, end);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment