Commit 271a3d3a authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: lift ordering restriction on 0 size extents

This lifts the restriction that 0 size extents must not overlap with
other extents, which means we can now sort extents and non extents the
same way, and will let us simplify a bunch of other stuff as well.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 0fdf1804
......@@ -18,6 +18,9 @@
#include <linux/random.h>
#include <linux/prefetch.h>
static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
struct btree *);
struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
{
unsigned offset = __btree_node_key_to_offset(b, k);
......@@ -63,8 +66,8 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set)
_n = bkey_next(_k);
bch2_bkey_to_text(buf, sizeof(buf), &k);
printk(KERN_ERR "block %u key %zi/%u: %s\n", set,
_k->_data - i->_data, i->u64s, buf);
printk(KERN_ERR "block %u key %5u: %s\n", set,
__btree_node_key_to_offset(b, _k), buf);
if (_n == vstruct_last(i))
continue;
......@@ -120,20 +123,6 @@ void bch2_dump_btree_node_iter(struct btree *b,
#ifdef CONFIG_BCACHEFS_DEBUG
static bool keys_out_of_order(struct btree *b,
const struct bkey_packed *prev,
const struct bkey_packed *next,
bool is_extents)
{
struct bkey nextu = bkey_unpack_key(b, next);
return bkey_cmp_left_packed_byval(b, prev, bkey_start_pos(&nextu)) > 0 ||
((is_extents
? !bkey_deleted(next)
: !bkey_deleted(prev)) &&
!bkey_cmp_packed(b, prev, next));
}
void __bch2_verify_btree_nr_keys(struct btree *b)
{
struct bset_tree *t;
......@@ -150,16 +139,21 @@ void __bch2_verify_btree_nr_keys(struct btree *b)
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
}
static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
struct btree *b,
struct bkey_packed *k)
static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
struct btree *b)
{
const struct bkey_packed *n = bch2_btree_node_iter_peek_all(iter, b);
struct btree_node_iter iter = *_iter;
const struct bkey_packed *k, *n;
k = bch2_btree_node_iter_peek_all(&iter, b);
__bch2_btree_node_iter_advance(&iter, b);
n = bch2_btree_node_iter_peek_all(&iter, b);
bkey_unpack_key(b, k);
if (n &&
keys_out_of_order(b, k, n, iter->is_extents)) {
__btree_node_iter_cmp(b, k, n) > 0) {
struct btree_node_iter_set *set;
struct bkey ku = bkey_unpack_key(b, k);
struct bkey nu = bkey_unpack_key(b, n);
char buf1[80], buf2[80];
......@@ -167,12 +161,22 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
bch2_dump_btree_node(b);
bch2_bkey_to_text(buf1, sizeof(buf1), &ku);
bch2_bkey_to_text(buf2, sizeof(buf2), &nu);
panic("out of order/overlapping:\n%s\n%s\n", buf1, buf2);
printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
buf1, buf2);
printk(KERN_ERR "iter was:");
btree_node_iter_for_each(_iter, set) {
struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
struct bset_tree *t = bch2_bkey_to_bset(b, k);
printk(" [%zi %zi]", t - b->set,
k->_data - bset(b, t)->_data);
}
panic("\n");
}
}
void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
struct btree *b)
struct btree *b)
{
struct btree_node_iter_set *set, *s2;
struct bset_tree *t;
......@@ -196,72 +200,72 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
/* Verify iterator is sorted: */
btree_node_iter_for_each(iter, set)
BUG_ON(set != iter->data &&
btree_node_iter_cmp(iter, b, set[-1], set[0]) > 0);
btree_node_iter_cmp(b, set[-1], set[0]) > 0);
}
void bch2_verify_key_order(struct btree *b,
struct btree_node_iter *iter,
struct bkey_packed *where)
void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
struct bkey_packed *insert, unsigned clobber_u64s)
{
struct bset_tree *t = bch2_bkey_to_bset(b, where);
struct bkey_packed *k, *prev;
struct bkey uk, uw = bkey_unpack_key(b, where);
k = bch2_bkey_prev_all(b, t, where);
if (k &&
keys_out_of_order(b, k, where, iter->is_extents)) {
char buf1[100], buf2[100];
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
#if 0
BUG_ON(prev &&
__btree_node_iter_cmp(b, prev, insert) > 0);
#else
if (prev &&
__btree_node_iter_cmp(b, prev, insert) > 0) {
struct bkey k1 = bkey_unpack_key(b, prev);
struct bkey k2 = bkey_unpack_key(b, insert);
char buf1[100];
char buf2[100];
bch2_dump_btree_node(b);
uk = bkey_unpack_key(b, k);
bch2_bkey_to_text(buf1, sizeof(buf1), &uk);
bch2_bkey_to_text(buf2, sizeof(buf2), &uw);
panic("out of order with prev:\n%s\n%s\n",
buf1, buf2);
bch2_bkey_to_text(buf1, sizeof(buf1), &k1);
bch2_bkey_to_text(buf2, sizeof(buf2), &k2);
panic("prev > insert:\n"
"prev key %5u %s\n"
"insert key %5u %s\n",
__btree_node_key_to_offset(b, prev), buf1,
__btree_node_key_to_offset(b, insert), buf2);
}
#endif
#if 0
BUG_ON(next != btree_bkey_last(b, t) &&
__btree_node_iter_cmp(b, insert, next) > 0);
#else
if (next != btree_bkey_last(b, t) &&
__btree_node_iter_cmp(b, insert, next) > 0) {
struct bkey k1 = bkey_unpack_key(b, insert);
struct bkey k2 = bkey_unpack_key(b, next);
char buf1[100];
char buf2[100];
k = bkey_next(where);
BUG_ON(k != btree_bkey_last(b, t) &&
keys_out_of_order(b, where, k, iter->is_extents));
for_each_bset(b, t) {
if (where >= btree_bkey_first(b, t) ||
where < btree_bkey_last(b, t))
continue;
k = bch2_btree_node_iter_bset_pos(iter, b, t);
if (k == btree_bkey_last(b, t))
k = bch2_bkey_prev_all(b, t, k);
while (bkey_cmp_left_packed_byval(b, k, bkey_start_pos(&uw)) > 0 &&
(prev = bch2_bkey_prev_all(b, t, k)))
k = prev;
for (;
k != btree_bkey_last(b, t);
k = bkey_next(k)) {
uk = bkey_unpack_key(b, k);
if (iter->is_extents) {
BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 ||
bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0));
} else {
BUG_ON(!bkey_cmp(uw.p, uk.p) &&
!bkey_deleted(&uk));
}
if (bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0)
break;
}
bch2_dump_btree_node(b);
bch2_bkey_to_text(buf1, sizeof(buf1), &k1);
bch2_bkey_to_text(buf2, sizeof(buf2), &k2);
panic("insert > next:\n"
"insert key %5u %s\n"
"next key %5u %s\n",
__btree_node_key_to_offset(b, insert), buf1,
__btree_node_key_to_offset(b, next), buf2);
}
#endif
}
void bch2_verify_key_order(struct btree *b,
struct btree_node_iter *_iter,
struct bkey_packed *where)
{
bch2_verify_insert_pos(b, where, where, where->u64s);
}
#else
static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
struct btree *b,
struct bkey_packed *k) {}
struct btree *b) {}
#endif
......@@ -1229,6 +1233,7 @@ void bch2_bset_insert(struct btree *b,
struct bkey_packed packed, *src = bkey_to_packed(insert);
bch2_bset_verify_rw_aux_tree(b, t);
bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
if (bch2_bkey_pack_key(&packed, &insert->k, f))
src = &packed;
......@@ -1255,7 +1260,6 @@ void bch2_bset_insert(struct btree *b,
bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
bch2_verify_key_order(b, iter, where);
bch2_verify_btree_nr_keys(b);
}
......@@ -1461,7 +1465,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter,
noinline __flatten __attribute__((cold))
static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
struct btree *b, struct bpos search,
bool strictly_greater, bool is_extents)
bool strictly_greater)
{
struct bset_tree *t;
......@@ -1518,7 +1522,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
*/
void bch2_btree_node_iter_init(struct btree_node_iter *iter,
struct btree *b, struct bpos search,
bool strictly_greater, bool is_extents)
bool strictly_greater)
{
struct bset_tree *t;
struct bkey_packed p, *packed_search = NULL;
......@@ -1526,7 +1530,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
EBUG_ON(bkey_cmp(search, b->data->min_key) < 0);
bset_aux_tree_verify(b);
__bch2_btree_node_iter_init(iter, is_extents);
memset(iter, 0, sizeof(*iter));
switch (bch2_bkey_pack_pos_lossy(&p, search, b)) {
case BKEY_PACK_POS_EXACT:
......@@ -1537,7 +1541,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
break;
case BKEY_PACK_POS_FAIL:
btree_node_iter_init_pack_failed(iter, b, search,
strictly_greater, is_extents);
strictly_greater);
return;
}
......@@ -1552,12 +1556,11 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
}
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
struct btree *b,
bool is_extents)
struct btree *b)
{
struct bset_tree *t;
__bch2_btree_node_iter_init(iter, is_extents);
memset(iter, 0, sizeof(*iter));
for_each_bset(b, t)
__bch2_btree_node_iter_push(iter, b,
......@@ -1585,7 +1588,7 @@ static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
{
bool ret;
if ((ret = (btree_node_iter_cmp(iter, b,
if ((ret = (btree_node_iter_cmp(b,
iter->data[first],
iter->data[first + 1]) > 0)))
swap(iter->data[first], iter->data[first + 1]);
......@@ -1640,23 +1643,14 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
btree_node_iter_sort_two(iter, b, 1);
}
/**
* bch_btree_node_iter_advance - advance @iter by one key
*
* Doesn't do debugchecks - for cases where (insert_fixup_extent()) a bset might
* momentarily have out of order extents.
*/
void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
struct btree *b)
{
#ifdef CONFIG_BCACHEFS_DEBUG
struct bkey_packed *k = bch2_btree_node_iter_peek_all(iter, b);
__bch2_btree_node_iter_advance(iter, b);
bch2_btree_node_iter_next_check(iter, b, k);
#else
__bch2_btree_node_iter_advance(iter, b);
bch2_btree_node_iter_verify(iter, b);
bch2_btree_node_iter_next_check(iter, b);
#endif
__bch2_btree_node_iter_advance(iter, b);
}
static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
......@@ -1689,8 +1683,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
bch2_btree_node_iter_bset_pos(iter, b, t),
min_key_type);
if (k &&
(!prev || __btree_node_iter_cmp(iter->is_extents, b,
k, prev) > 0)) {
(!prev || __btree_node_iter_cmp(b, k, prev) > 0)) {
prev = k;
end = t->end_offset;
}
......@@ -1723,11 +1716,11 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
struct btree_node_iter iter2 = *iter;
if (prev)
bch2_btree_node_iter_advance(&iter2, b);
__bch2_btree_node_iter_advance(&iter2, b);
while ((k = bch2_btree_node_iter_peek_all(&iter2, b)) != orig_pos) {
BUG_ON(k->type >= min_key_type);
bch2_btree_node_iter_advance(&iter2, b);
__bch2_btree_node_iter_advance(&iter2, b);
}
}
......
......@@ -369,6 +369,17 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b,
return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
}
/* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
const struct bkey *k)
{
int cmp = bkey_cmp(k->p, iter->pos);
return cmp > 0 ||
(cmp == 0 &&
!(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
}
/* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp_packed(const struct btree *b,
struct bpos *pos,
......@@ -419,7 +430,7 @@ enum bch_extent_overlap {
/* Returns how k overlaps with m */
static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
const struct bkey *m)
const struct bkey *m)
{
int cmp1 = bkey_cmp(k->p, m->p) < 0;
int cmp2 = bkey_cmp(bkey_start_pos(k),
......@@ -430,20 +441,13 @@ static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
/* Btree key iteration */
static inline void __bch2_btree_node_iter_init(struct btree_node_iter *iter,
bool is_extents)
{
iter->is_extents = is_extents;
memset(iter->data, 0, sizeof(iter->data));
}
void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
const struct bkey_packed *,
const struct bkey_packed *);
void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
struct bpos, bool, bool);
struct bpos, bool);
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
struct btree *, bool);
struct btree *);
struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
struct btree *,
struct bset_tree *);
......@@ -470,32 +474,21 @@ static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
return __btree_node_iter_set_end(iter, 0);
}
static inline int __btree_node_iter_cmp(bool is_extents,
struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
static inline int __btree_node_iter_cmp(struct btree *b,
const struct bkey_packed *l,
const struct bkey_packed *r)
{
/*
* For non extents, when keys compare equal the deleted keys have to
* come first - so that bch2_btree_node_iter_next_check() can detect
* duplicate nondeleted keys (and possibly other reasons?)
*
* For extents, bkey_deleted() is used as a proxy for k->size == 0, so
* deleted keys have to sort last.
*/
/* When keys compare equal deleted keys come first */
return bkey_cmp_packed(b, l, r)
?: (is_extents
? (int) bkey_deleted(l) - (int) bkey_deleted(r)
: (int) bkey_deleted(r) - (int) bkey_deleted(l))
?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
?: (l > r) - (l < r);
}
static inline int btree_node_iter_cmp(struct btree_node_iter *iter,
struct btree *b,
static inline int btree_node_iter_cmp(struct btree *b,
struct btree_node_iter_set l,
struct btree_node_iter_set r)
{
return __btree_node_iter_cmp(iter->is_extents, b,
return __btree_node_iter_cmp(b,
__btree_node_offset_to_key(b, l.k),
__btree_node_offset_to_key(b, r.k));
}
......@@ -582,21 +575,12 @@ bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1);
}
/*
* Iterates over all _live_ keys - skipping deleted (and potentially
* overlapping) keys
*/
#define for_each_btree_node_key(b, k, iter, _is_extents) \
for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
((k) = bch2_btree_node_iter_peek(iter, b)); \
bch2_btree_node_iter_advance(iter, b))
struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
struct btree *,
struct bkey *);
#define for_each_btree_node_key_unpack(b, k, iter, _is_extents, unpacked)\
for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \
for (bch2_btree_node_iter_init_from_start((iter), (b)); \
(k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
bch2_btree_node_iter_advance(iter, b))
......@@ -646,6 +630,8 @@ void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
void __bch2_verify_btree_nr_keys(struct btree *);
void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
struct bkey_packed *, unsigned);
void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
struct bkey_packed *);
......@@ -654,6 +640,10 @@ void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
struct btree *b) {}
static inline void bch2_verify_insert_pos(struct btree *b,
struct bkey_packed *where,
struct bkey_packed *insert,
unsigned clobber_u64s) {}
static inline void bch2_verify_key_order(struct btree *b,
struct btree_node_iter *iter,
struct bkey_packed *where) {}
......
......@@ -217,7 +217,6 @@ static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
if (btree_node_has_ptrs(b))
for_each_btree_node_key_unpack(b, k, &iter,
btree_node_is_extents(b),
&unpacked) {
bch2_bkey_debugcheck(c, b, k);
stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
......@@ -1044,7 +1043,6 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
struct bkey_s_c k;
for_each_btree_node_key_unpack(b, k, &node_iter,
btree_node_is_extents(b),
&unpacked) {
ret = bch2_btree_mark_key_initial(c,
btree_node_type(b), k);
......
......@@ -22,7 +22,7 @@
/* btree_node_iter_large: */
#define btree_node_iter_cmp_heap(h, _l, _r) \
__btree_node_iter_cmp((iter)->is_extents, b, \
__btree_node_iter_cmp(b, \
__btree_node_offset_to_key(b, (_l).k), \
__btree_node_offset_to_key(b, (_r).k))
......@@ -248,6 +248,9 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst,
sort_iter_sort(iter, sort_extent_whiteouts_cmp);
while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
if (bkey_deleted(in))
continue;
EBUG_ON(bkeyp_val_u64s(f, in));
EBUG_ON(in->type != KEY_TYPE_DISCARD);
......@@ -785,8 +788,7 @@ void bch2_btree_sort_into(struct bch_fs *c,
bch2_bset_set_no_aux_tree(dst, dst->set);
bch2_btree_node_iter_init_from_start(&src_iter, src,
btree_node_is_extents(src));
bch2_btree_node_iter_init_from_start(&src_iter, src);
if (btree_node_ops(src)->key_normalize ||
btree_node_ops(src)->key_merge)
......@@ -1171,7 +1173,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
int ret, retry_read = 0, write = READ;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
__bch2_btree_node_iter_large_init(iter, btree_node_is_extents(b));
iter->used = 0;
if (bch2_meta_read_fault("btree"))
btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
......
......@@ -146,20 +146,11 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
/* Sorting */
struct btree_node_iter_large {
u8 is_extents;
u16 used;
struct btree_node_iter_set data[MAX_BSETS];
};
static inline void
__bch2_btree_node_iter_large_init(struct btree_node_iter_large *iter,
bool is_extents)
{
iter->used = 0;
iter->is_extents = is_extents;
}
void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *,
struct btree *);
......
......@@ -375,14 +375,20 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct btree_node_iter tmp = l->iter;
struct bkey_packed *k;
if (iter->uptodate > BTREE_ITER_NEED_PEEK)
return;
bch2_btree_node_iter_verify(&l->iter, b);
/*
* For interior nodes, the iterator will have skipped past
* deleted keys:
*
* For extents, the iterator may have skipped past deleted keys (but not
* whiteouts)
*/
k = b->level
? bch2_btree_node_iter_prev(&tmp, b)
k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS
? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD)
: bch2_btree_node_iter_prev_all(&tmp, b);
if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k,
iter->flags & BTREE_ITER_IS_EXTENTS)) {
......@@ -390,7 +396,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct bkey uk = bkey_unpack_key(b, k);
bch2_bkey_to_text(buf, sizeof(buf), &uk);
panic("prev key should be before after pos:\n%s\n%llu:%llu\n",
panic("prev key should be before iter pos:\n%s\n%llu:%llu\n",
buf, iter->pos.inode, iter->pos.offset);
}
......@@ -401,15 +407,16 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct bkey uk = bkey_unpack_key(b, k);
bch2_bkey_to_text(buf, sizeof(buf), &uk);
panic("next key should be before iter pos:\n%llu:%llu\n%s\n",
panic("iter should be after current key:\n"
"iter pos %llu:%llu\n"
"cur key %s\n",
iter->pos.inode, iter->pos.offset, buf);
}
if (iter->uptodate == BTREE_ITER_UPTODATE &&
(iter->flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES) {
BUG_ON(!bkey_whiteout(&iter->k) &&
bch2_btree_node_iter_end(&l->iter));
}
BUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
(iter->flags & BTREE_ITER_TYPE) == BTREE_ITER_KEYS &&
!bkey_whiteout(&iter->k) &&
bch2_btree_node_iter_end(&l->iter));
}
void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
......@@ -420,6 +427,11 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
__bch2_btree_iter_verify(linked, b);
}
#else
static inline void __bch2_btree_iter_verify(struct btree_iter *iter,
struct btree *b) {}
#endif
static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
......@@ -434,7 +446,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
struct btree_node_iter_set *set;
unsigned offset = __btree_node_key_to_offset(b, where);
int shift = new_u64s - clobber_u64s;
unsigned old_end = (int) __btree_node_key_to_offset(b, end) - shift;
unsigned old_end = t->end_offset - shift;
btree_node_iter_for_each(node_iter, set)
if (set->end == old_end)
......@@ -456,7 +468,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
}
return;
found:
set->end = (int) set->end + shift;
set->end = t->end_offset;
/* Iterator hasn't gotten to the key that changed yet: */
if (set->k < offset)
......@@ -517,8 +529,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
k = bch2_bkey_prev_all(b, t,
bch2_btree_node_iter_bset_pos(node_iter, b, t));
if (k &&
__btree_node_iter_cmp(node_iter, b,
k, where) > 0) {
__btree_node_iter_cmp(b, k, where) > 0) {
struct btree_node_iter_set *set;
unsigned offset =
__btree_node_key_to_offset(b, bkey_next(k));
......@@ -557,10 +568,6 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
__bch2_btree_node_iter_fix(linked, b,
&linked->l[b->level].iter, t,
where, clobber_u64s, new_u64s);
/* interior node iterators are... special... */
if (!b->level)
bch2_btree_iter_verify(iter, b);
}
static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
......@@ -647,17 +654,6 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
btree_node_unlock(iter, b->level + 1);
}
/* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
const struct bkey *k)
{
int cmp = bkey_cmp(k->p, iter->pos);
return cmp > 0 ||
(cmp == 0 &&
!(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
}
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
struct btree *b)
{
......@@ -679,8 +675,7 @@ static inline void __btree_iter_init(struct btree_iter *iter,
struct btree_iter_level *l = &iter->l[b->level];
bch2_btree_node_iter_init(&l->iter, b, iter->pos,
iter->flags & BTREE_ITER_IS_EXTENTS,
btree_node_is_extents(b));
iter->flags & BTREE_ITER_IS_EXTENTS);
/* Skip to first non whiteout: */
if (b->level)
......@@ -1022,7 +1017,9 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
}
iter->uptodate = BTREE_ITER_NEED_PEEK;
bch2_btree_iter_verify_locks(iter);
__bch2_btree_iter_verify(iter, iter->l[iter->level].b);
return 0;
}
......@@ -1363,9 +1360,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
}
static inline struct bkey_s_c
__bch2_btree_iter_peek_slot(struct btree_iter *iter)
__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct btree_node_iter node_iter;
struct bkey_s_c k;
struct bkey n;
int ret;
......@@ -1376,6 +1374,17 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0)
__btree_iter_advance(l);
/*
* iterator is now at the correct position for inserting at iter->pos,
* but we need to keep iterating until we find the first non whiteout so
* we know how big a hole we have, if any:
*/
node_iter = l->iter;
if (k.k && bkey_whiteout(k.k))
k = __btree_iter_unpack(iter, l, &iter->k,
bch2_btree_node_iter_peek(&node_iter, l->b));
/*
* If we got to the end of the node, check if we need to traverse to the
* next node:
......@@ -1392,6 +1401,13 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (k.k &&
!bkey_whiteout(k.k) &&
bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
/*
* if we skipped forward to find the first non whiteout and
* there _wasn't_ actually a hole, we want the iterator to be
* pointed at the key we found:
*/
l->iter = node_iter;
EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
EBUG_ON(bkey_deleted(k.k));
iter->uptodate = BTREE_ITER_UPTODATE;
......@@ -1399,41 +1415,88 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
}
/* hole */
/* holes can't span inode numbers: */
if (iter->pos.offset == KEY_OFFSET_MAX) {
if (iter->pos.inode == KEY_INODE_MAX)
return bkey_s_c_null;
iter->pos = bkey_successor(iter->pos);
goto recheck;
}
if (!k.k)
k.k = &l->b->key.k;
bkey_init(&n);
n.p = iter->pos;
bch2_key_resize(&n,
min_t(u64, KEY_SIZE_MAX,
(k.k->p.inode == n.p.inode
? bkey_start_offset(k.k)
: KEY_OFFSET_MAX) -
n.p.offset));
//EBUG_ON(!n.size);
if (!n.size) {
char buf[100];
bch2_dump_btree_node(iter->l[0].b);
bch2_bkey_to_text(buf, sizeof(buf), k.k);
panic("iter at %llu:%llu\n"
"next key %s\n",
iter->pos.inode,
iter->pos.offset,
buf);
}
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
if (n.p.offset == KEY_OFFSET_MAX) {
if (n.p.inode == KEY_INODE_MAX)
return bkey_s_c_null;
iter->pos = bkey_successor(iter->pos);
goto recheck;
}
iter->k = n;
iter->uptodate = BTREE_ITER_UPTODATE;
return (struct bkey_s_c) { &iter->k, NULL };
}
if (k.k && bkey_whiteout(k.k)) {
struct btree_node_iter node_iter = l->iter;
static inline struct bkey_s_c
__bch2_btree_iter_peek_slot(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct bkey_s_c k;
int ret;
k = __btree_iter_unpack(iter, l, &iter->k,
bch2_btree_node_iter_peek(&node_iter, l->b));
}
if (iter->flags & BTREE_ITER_IS_EXTENTS)
return __bch2_btree_iter_peek_slot_extents(iter);
if (!k.k)
k.k = &l->b->key.k;
recheck:
while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k &&
bkey_deleted(k.k) &&
bkey_cmp(k.k->p, iter->pos) == 0)
__btree_iter_advance(l);
bch2_key_resize(&n,
min_t(u64, KEY_SIZE_MAX,
(k.k->p.inode == n.p.inode
? bkey_start_offset(k.k)
: KEY_OFFSET_MAX) -
n.p.offset));
/*
* If we got to the end of the node, check if we need to traverse to the
* next node:
*/
if (unlikely(!k.k && btree_iter_pos_after_node(iter, l->b))) {
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
EBUG_ON(!n.size);
goto recheck;
}
iter->k = n;
iter->uptodate = BTREE_ITER_UPTODATE;
return (struct bkey_s_c) { &iter->k, NULL };
if (k.k &&
!bkey_deleted(k.k) &&
!bkey_cmp(iter->pos, k.k->p)) {
iter->uptodate = BTREE_ITER_UPTODATE;
return k;
} else {
/* hole */
bkey_init(&iter->k);
iter->k.p = iter->pos;
iter->uptodate = BTREE_ITER_UPTODATE;
return (struct bkey_s_c) { &iter->k, NULL };
}
}
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
......
......@@ -176,8 +176,6 @@ struct btree_cache {
};
struct btree_node_iter {
u8 is_extents;
struct btree_node_iter_set {
u16 k, end;
} data[MAX_BSETS];
......@@ -459,9 +457,6 @@ struct btree_root {
* we're holding the write lock and we know what key is about to be overwritten:
*/
struct btree_iter;
struct btree_node_iter;
enum btree_insert_ret {
BTREE_INSERT_OK,
/* extent spanned multiple leaf nodes: have to traverse to next node: */
......
......@@ -35,7 +35,7 @@ static void btree_node_interior_verify(struct btree *b)
BUG_ON(!b->level);
bch2_btree_node_iter_init(&iter, b, b->key.k.p, false, false);
bch2_btree_node_iter_init(&iter, b, b->key.k.p, false);
#if 1
BUG_ON(!(k = bch2_btree_node_iter_peek(&iter, b)) ||
bkey_cmp_left_packed(b, k, &b->key.k.p));
......@@ -1322,7 +1322,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
bch2_btree_node_iter_init(&node_iter, b, k->k.p, false, false);
bch2_btree_node_iter_init(&node_iter, b, k->k.p, false);
while (!bch2_keylist_empty(keys)) {
k = bch2_keylist_front(keys);
......
......@@ -64,7 +64,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
if (bkey_whiteout(&insert->k) && !k->needs_whiteout) {
bch2_bset_delete(b, k, clobber_u64s);
bch2_btree_node_iter_fix(iter, b, node_iter, t,
k, clobber_u64s, 0);
k, clobber_u64s, 0);
bch2_btree_iter_verify(iter, b);
return true;
}
......@@ -73,7 +74,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
k->type = KEY_TYPE_DELETED;
bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
k->u64s, k->u64s);
k->u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
if (bkey_whiteout(&insert->k)) {
reserve_whiteout(b, k);
......@@ -98,7 +100,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
clobber_u64s, k->u64s);
clobber_u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
return true;
}
......
......@@ -858,30 +858,34 @@ void bch2_key_resize(struct bkey *k,
* that we have to unpack the key, modify the unpacked key - then this
* copies/repacks the unpacked to the original as necessary.
*/
static bool __extent_save(struct btree *b, struct btree_node_iter *iter,
struct bkey_packed *dst, struct bkey *src)
static void extent_save(struct btree *b, struct bkey_packed *dst,
struct bkey *src)
{
struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked;
bool ret;
if ((dst_unpacked = packed_to_bkey(dst))) {
if ((dst_unpacked = packed_to_bkey(dst)))
dst_unpacked->k = *src;
ret = true;
} else {
ret = bch2_bkey_pack_key(dst, src, f);
}
if (ret && iter)
bch2_verify_key_order(b, iter, dst);
return ret;
else
BUG_ON(!bch2_bkey_pack_key(dst, src, f));
}
static void extent_save(struct btree *b, struct btree_node_iter *iter,
struct bkey_packed *dst, struct bkey *src)
static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
struct bkey_i *src)
{
BUG_ON(!__extent_save(b, iter, dst, src));
struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked;
struct bkey_packed tmp;
if ((dst_unpacked = packed_to_bkey(dst)))
dst_unpacked->k = src->k;
else if (bch2_bkey_pack_key(&tmp, &src->k, f))
memcpy_u64s(dst, &tmp, f->key_u64s);
else
return false;
memcpy_u64s(bkeyp_val(f, dst), &src->v, bkey_val_u64s(&src->k));
return true;
}
/*
......@@ -1010,7 +1014,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
sort_key_next(iter, b, _r);
} else {
__bch2_cut_front(l.k->p, r);
extent_save(b, NULL, rk, r.k);
extent_save(b, rk, r.k);
}
extent_sort_sift(iter, b, _r - iter->data);
......@@ -1024,7 +1028,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k);
__bch2_cut_front(r.k->p, l);
extent_save(b, NULL, lk, l.k);
extent_save(b, lk, l.k);
extent_sort_sift(iter, b, 0);
......@@ -1032,7 +1036,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
bkey_to_packed(&tmp.k));
} else {
bch2_cut_back(bkey_start_pos(r.k), l.k);
extent_save(b, NULL, lk, l.k);
extent_save(b, lk, l.k);
}
}
......@@ -1135,6 +1139,55 @@ extent_insert_should_stop(struct extent_insert_state *s)
return BTREE_INSERT_OK;
}
static void verify_extent_nonoverlapping(struct btree *b,
struct btree_node_iter *_iter,
struct bkey_i *insert)
{
#ifdef CONFIG_BCACHEFS_DEBUG
struct btree_node_iter iter;
struct bkey_packed *k;
struct bkey uk;
iter = *_iter;
k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_DISCARD);
BUG_ON(k &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0));
iter = *_iter;
k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_DISCARD);
#if 0
BUG_ON(k &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0);
#else
if (k &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) {
char buf1[100];
char buf2[100];
bch2_bkey_to_text(buf1, sizeof(buf1), &insert->k);
bch2_bkey_to_text(buf2, sizeof(buf2), &uk);
bch2_dump_btree_node(b);
panic("insert > next :\n"
"insert %s\n"
"next %s\n",
buf1, buf2);
}
#endif
#endif
}
static void verify_modified_extent(struct btree_iter *iter,
struct bkey_packed *k)
{
bch2_btree_iter_verify(iter, iter->l[0].b);
bch2_verify_insert_pos(iter->l[0].b, k, k, k->u64s);
}
static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
struct bkey_i *insert)
{
......@@ -1148,6 +1201,14 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
unsigned clobber_u64s;
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
verify_extent_nonoverlapping(l->b, &l->iter, insert);
if (!prev) {
while ((prev = bch2_bkey_prev_all(l->b, t, where)) &&
(bkey_cmp_left_packed(l->b, prev, &insert->k.p) ?:
((int) bkey_deleted(&insert->k) - (int) bkey_deleted(prev))) > 0)
where = prev;
}
if (prev)
where = bkey_next(prev);
......@@ -1173,12 +1234,15 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_bset_insert(l->b, &l->iter, where, insert, clobber_u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, where,
clobber_u64s, where->u64s);
clobber_u64s, where->u64s);
bch2_verify_key_order(l->b, &l->iter, where);
bch2_btree_iter_verify(iter, l->b);
return;
drop_deleted_keys:
bch2_bset_delete(l->b, where, clobber_u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, t,
where, clobber_u64s, 0);
bch2_btree_iter_verify(iter, l->b);
}
static void extent_insert_committed(struct extent_insert_state *s)
......@@ -1226,8 +1290,10 @@ static void extent_insert_committed(struct extent_insert_state *s)
bch2_btree_journal_key(s->trans, iter, &split.k);
if (!s->deleting)
if (!s->deleting) {
bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
extent_bset_insert(c, iter, &split.k);
}
done:
bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
......@@ -1345,22 +1411,21 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b;
struct btree_node_iter *node_iter = &l->iter;
enum btree_insert_ret ret;
switch (overlap) {
case BCH_EXTENT_OVERLAP_FRONT:
/* insert overlaps with start of k: */
bch2_cut_subtract_front(s, insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
extent_save(b, node_iter, _k, k.k);
extent_save(b, _k, k.k);
bch2_verify_key_order(b, &l->iter, _k);
break;
case BCH_EXTENT_OVERLAP_BACK:
/* insert overlaps with end of k: */
bch2_cut_subtract_back(s, bkey_start_pos(&insert->k), k);
BUG_ON(bkey_deleted(k.k));
extent_save(b, node_iter, _k, k.k);
extent_save(b, _k, k.k);
/*
* As the auxiliary tree is indexed by the end of the
......@@ -1368,46 +1433,31 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
* auxiliary tree.
*/
bch2_bset_fix_invalidated_key(b, t, _k);
bch2_btree_node_iter_fix(iter, b, node_iter, t,
_k, _k->u64s, _k->u64s);
bch2_btree_node_iter_fix(iter, b, &l->iter, t,
_k, _k->u64s, _k->u64s);
bch2_verify_key_order(b, &l->iter, _k);
break;
case BCH_EXTENT_OVERLAP_ALL: {
struct bpos orig_pos = k.k->p;
/* The insert key completely covers k, invalidate k */
if (!bkey_whiteout(k.k))
btree_keys_account_key_drop(&b->nr,
t - b->set, _k);
bch2_drop_subtract(s, k);
k.k->p = bkey_start_pos(&insert->k);
if (!__extent_save(b, node_iter, _k, k.k)) {
/*
* Couldn't repack: we aren't necessarily able
* to repack if the new key is outside the range
* of the old extent, so we have to split
* @insert:
*/
k.k->p = orig_pos;
extent_save(b, node_iter, _k, k.k);
ret = extent_insert_advance_pos(s, k.s_c);
if (ret != BTREE_INSERT_OK)
return ret;
if (t == bset_tree_last(l->b)) {
unsigned u64s = _k->u64s;
extent_insert_committed(s);
/*
* We split and inserted upto at k.k->p - that
* has to coincide with iter->pos, so that we
* don't have anything more we have to insert
* until we recheck our journal reservation:
*/
EBUG_ON(bkey_cmp(s->committed, k.k->p));
bch2_bset_delete(l->b, _k, _k->u64s);
bch2_btree_node_iter_fix(iter, b, &l->iter, t,
_k, u64s, 0);
bch2_btree_iter_verify(iter, b);
} else {
bch2_bset_fix_invalidated_key(b, t, _k);
bch2_btree_node_iter_fix(iter, b, node_iter, t,
_k, _k->u64s, _k->u64s);
extent_save(b, _k, k.k);
bch2_btree_node_iter_fix(iter, b, &l->iter, t,
_k, _k->u64s, _k->u64s);
bch2_verify_key_order(b, &l->iter, _k);
}
break;
......@@ -1436,7 +1486,8 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
bch2_cut_subtract_front(s, insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
extent_save(b, node_iter, _k, k.k);
extent_save(b, _k, k.k);
bch2_verify_key_order(b, &l->iter, _k);
bch2_add_sectors(s, bkey_i_to_s_c(&split.k),
bkey_start_offset(&split.k.k),
......@@ -1450,26 +1501,20 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
}
static enum btree_insert_ret
__bch2_delete_fixup_extent(struct extent_insert_state *s)
__bch2_insert_fixup_extent(struct extent_insert_state *s)
{
struct bch_fs *c = s->trans->c;
struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b;
struct btree_node_iter *node_iter = &l->iter;
struct bkey_packed *_k;
struct bkey unpacked;
struct bkey_i *insert = s->insert->k;
enum btree_insert_ret ret = BTREE_INSERT_OK;
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
s->whiteout = *insert;
s->whiteout.k.type = KEY_TYPE_DISCARD;
while (bkey_cmp(s->committed, insert->k.p) < 0 &&
(ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
(_k = bch2_btree_node_iter_peek_all(node_iter, b))) {
(_k = bch2_btree_node_iter_peek_filter(&l->iter, b, KEY_TYPE_DISCARD))) {
struct bset_tree *t = bch2_bkey_to_bset(b, _k);
struct bkey_s k = __bkey_disassemble(b, _k, &unpacked);
enum bch_extent_overlap overlap;
......@@ -1480,112 +1525,92 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s)
if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
break;
if (bkey_whiteout(k.k)) {
s->committed = bpos_min(insert->k.p, k.k->p);
goto next;
}
overlap = bch2_extent_overlap(&insert->k, k.k);
ret = extent_insert_advance_pos(s, k.s_c);
if (ret)
break;
s->do_journal = true;
overlap = bch2_extent_overlap(&insert->k, k.k);
if (overlap == BCH_EXTENT_OVERLAP_ALL) {
btree_keys_account_key_drop(&b->nr,
t - b->set, _k);
bch2_subtract_sectors(s, k.s_c,
bkey_start_offset(k.k), k.k->size);
_k->type = KEY_TYPE_DISCARD;
reserve_whiteout(b, _k);
} else if (k.k->needs_whiteout ||
bkey_written(b, _k)) {
struct bkey_i discard = *insert;
discard.k.type = KEY_TYPE_DISCARD;
switch (overlap) {
case BCH_EXTENT_OVERLAP_FRONT:
bch2_cut_front(bkey_start_pos(k.k), &discard);
break;
case BCH_EXTENT_OVERLAP_BACK:
bch2_cut_back(k.k->p, &discard.k);
break;
default:
break;
}
if (!s->deleting) {
if (k.k->needs_whiteout || bkey_written(b, _k))
insert->k.needs_whiteout = true;
discard.k.needs_whiteout = true;
if (overlap == BCH_EXTENT_OVERLAP_ALL &&
bkey_whiteout(k.k) &&
k.k->needs_whiteout) {
unreserve_whiteout(b, _k);
_k->needs_whiteout = false;
}
ret = extent_squash(s, insert, t, _k, k, overlap);
BUG_ON(ret != BTREE_INSERT_OK);
extent_bset_insert(c, iter, &discard);
} else {
ret = extent_squash(s, insert, t, _k, k, overlap);
BUG_ON(ret != BTREE_INSERT_OK);
}
next:
bch2_cut_front(s->committed, insert);
bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
}
return ret;
}
static enum btree_insert_ret
__bch2_insert_fixup_extent(struct extent_insert_state *s)
{
struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b;
struct btree_node_iter *node_iter = &l->iter;
struct bkey_packed *_k;
struct bkey unpacked;
struct bkey_i *insert = s->insert->k;
enum btree_insert_ret ret = BTREE_INSERT_OK;
while (bkey_cmp(s->committed, insert->k.p) < 0 &&
(ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
(_k = bch2_btree_node_iter_peek_all(node_iter, b))) {
struct bset_tree *t = bch2_bkey_to_bset(b, _k);
struct bkey_s k = __bkey_disassemble(b, _k, &unpacked);
enum bch_extent_overlap overlap;
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0);
if (bkey_whiteout(k.k))
goto next;
s->do_journal = true;
if (overlap == BCH_EXTENT_OVERLAP_ALL) {
btree_keys_account_key_drop(&b->nr,
t - b->set, _k);
bch2_subtract_sectors(s, k.s_c,
bkey_start_offset(k.k), k.k->size);
_k->type = KEY_TYPE_DISCARD;
reserve_whiteout(b, _k);
} else if (k.k->needs_whiteout ||
bkey_written(b, _k)) {
struct bkey_i discard = *insert;
discard.k.type = KEY_TYPE_DISCARD;
switch (overlap) {
case BCH_EXTENT_OVERLAP_FRONT:
bch2_cut_front(bkey_start_pos(k.k), &discard);
break;
case BCH_EXTENT_OVERLAP_BACK:
bch2_cut_back(k.k->p, &discard.k);
break;
default:
break;
}
if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
break;
discard.k.needs_whiteout = true;
overlap = bch2_extent_overlap(&insert->k, k.k);
ret = extent_squash(s, insert, t, _k, k, overlap);
BUG_ON(ret != BTREE_INSERT_OK);
if (!k.k->size)
goto squash;
extent_bset_insert(c, iter, &discard);
} else {
ret = extent_squash(s, insert, t, _k, k, overlap);
BUG_ON(ret != BTREE_INSERT_OK);
}
next:
bch2_cut_front(s->committed, insert);
bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
}
/*
* Only call advance pos & call hook for nonzero size extents:
*/
ret = extent_insert_advance_pos(s, k.s_c);
if (ret)
if (ret != BTREE_INSERT_OK ||
overlap == BCH_EXTENT_OVERLAP_FRONT ||
overlap == BCH_EXTENT_OVERLAP_MIDDLE)
break;
}
if (k.k->size &&
(k.k->needs_whiteout || bkey_written(b, _k)))
insert->k.needs_whiteout = true;
if (ret == BTREE_INSERT_OK &&
bkey_cmp(s->committed, insert->k.p) < 0)
ret = extent_insert_advance_pos(s, bkey_s_c_null);
if (overlap == BCH_EXTENT_OVERLAP_ALL &&
bkey_whiteout(k.k) &&
k.k->needs_whiteout) {
unreserve_whiteout(b, _k);
_k->needs_whiteout = false;
}
squash:
ret = extent_squash(s, insert, t, _k, k, overlap);
if (ret != BTREE_INSERT_OK)
break;
/*
* may have skipped past some deleted extents greater than the insert
* key, before we got to a non deleted extent and knew we could bail out
* rewind the iterator a bit if necessary:
*/
{
struct btree_node_iter node_iter = l->iter;
struct bkey uk;
while ((_k = bch2_btree_node_iter_prev_all(&node_iter, l->b)) &&
(uk = bkey_unpack_key(l->b, _k),
bkey_cmp(uk.p, s->committed) > 0))
l->iter = node_iter;
}
return ret;
......@@ -1647,6 +1672,11 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
.deleting = bkey_whiteout(&insert->k->k),
};
if (s.deleting) {
s.whiteout = *insert->k;
s.whiteout.k.type = KEY_TYPE_DISCARD;
}
EBUG_ON(iter->level);
EBUG_ON(!insert->k->k.size);
......@@ -1657,6 +1687,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
* @insert->k and the node iterator that we're advancing:
*/
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
bch2_btree_iter_verify(iter, b);
if (!s.deleting &&
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
......@@ -1664,13 +1695,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
bkey_start_offset(&insert->k->k),
insert->k->k.size);
ret = !s.deleting
? __bch2_insert_fixup_extent(&s)
: __bch2_delete_fixup_extent(&s);
if (ret == BTREE_INSERT_OK &&
bkey_cmp(s.committed, insert->k->k.p) < 0)
ret = extent_insert_advance_pos(&s, bkey_s_c_null);
ret = __bch2_insert_fixup_extent(&s);
extent_insert_committed(&s);
......@@ -2172,130 +2197,6 @@ enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b,
return BCH_MERGE_MERGE;
}
static void extent_i_save(struct btree *b, struct bkey_packed *dst,
struct bkey_i *src)
{
struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked;
BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k));
/*
* We don't want the bch2_verify_key_order() call in extent_save(),
* because we may be out of order with deleted keys that are about to be
* removed by extent_bset_insert()
*/
if ((dst_unpacked = packed_to_bkey(dst)))
bkey_copy(dst_unpacked, src);
else
BUG_ON(!bch2_bkey_pack(dst, src, f));
}
static bool extent_merge_one_overlapping(struct btree_iter *iter,
struct bpos new_pos,
struct bset_tree *t,
struct bkey_packed *k, struct bkey uk,
bool check, bool could_pack)
{
struct btree_iter_level *l = &iter->l[0];
BUG_ON(!bkey_deleted(k));
if (check) {
return !bkey_packed(k) || could_pack;
} else {
uk.p = new_pos;
extent_save(l->b, &l->iter, k, &uk);
bch2_bset_fix_invalidated_key(l->b, t, k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, t,
k, k->u64s, k->u64s);
return true;
}
}
static bool extent_merge_do_overlapping(struct btree_iter *iter,
struct bkey *m, bool back_merge)
{
struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b;
struct btree_node_iter *node_iter = &l->iter;
struct bset_tree *t;
struct bkey_packed *k;
struct bkey uk;
struct bpos new_pos = back_merge ? m->p : bkey_start_pos(m);
bool could_pack = bkey_pack_pos((void *) &uk, new_pos, b);
bool check = true;
/*
* @m is the new merged extent:
*
* The merge took place in the last bset; we know there can't be any 0
* size extents overlapping with m there because if so they would have
* been between the two extents we merged.
*
* But in the other bsets, we have to check for and fix such extents:
*/
do_fixup:
for_each_bset(b, t) {
if (t == bset_tree_last(b))
break;
/*
* if we don't find this bset in the iterator we already got to
* the end of that bset, so start searching from the end.
*/
k = bch2_btree_node_iter_bset_pos(node_iter, b, t);
if (k == btree_bkey_last(b, t))
k = bch2_bkey_prev_all(b, t, k);
if (!k)
continue;
if (back_merge) {
/*
* Back merge: 0 size extents will be before the key
* that was just inserted (and thus the iterator
* position) - walk backwards to find them
*/
for (;
k &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(uk.p, bkey_start_pos(m)) > 0);
k = bch2_bkey_prev_all(b, t, k)) {
if (bkey_cmp(uk.p, m->p) >= 0)
continue;
if (!extent_merge_one_overlapping(iter, new_pos,
t, k, uk, check, could_pack))
return false;
}
} else {
/* Front merge - walk forwards */
for (;
k != btree_bkey_last(b, t) &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(uk.p, m->p) < 0);
k = bkey_next(k)) {
if (bkey_cmp(uk.p,
bkey_start_pos(m)) <= 0)
continue;
if (!extent_merge_one_overlapping(iter, new_pos,
t, k, uk, check, could_pack))
return false;
}
}
}
if (check) {
check = false;
goto do_fixup;
}
return true;
}
/*
* When merging an extent that we're inserting into a btree node, the new merged
* extent could overlap with an existing 0 size extent - if we don't fix that,
......@@ -2312,13 +2213,13 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
{
struct btree *b = iter->l[0].b;
struct btree_node_iter *node_iter = &iter->l[0].iter;
const struct bkey_format *f = &b->format;
struct bset_tree *t = bset_tree_last(b);
struct bkey_packed *m;
BKEY_PADDED(k) li;
BKEY_PADDED(k) ri;
struct bkey_i *mi;
struct bkey tmp;
BKEY_PADDED(k) li, ri;
struct bkey_packed *m = back_merge ? l : r;
struct bkey_i *mi = back_merge ? &li.k : &ri.k;
struct bset_tree *t = bch2_bkey_to_bset(b, m);
enum merge_result ret;
EBUG_ON(bkey_written(b, m));
/*
* We need to save copies of both l and r, because we might get a
......@@ -2327,57 +2228,49 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
bch2_bkey_unpack(b, &li.k, l);
bch2_bkey_unpack(b, &ri.k, r);
m = back_merge ? l : r;
mi = back_merge ? &li.k : &ri.k;
ret = bch2_extent_merge(c, b, &li.k, &ri.k);
if (ret == BCH_MERGE_NOMERGE)
return false;
/* l & r should be in last bset: */
EBUG_ON(bch2_bkey_to_bset(b, m) != t);
/*
* check if we overlap with deleted extents - would break the sort
* order:
*/
if (back_merge) {
struct bkey_packed *n = bkey_next(m);
switch (bch2_extent_merge(c, b, &li.k, &ri.k)) {
case BCH_MERGE_NOMERGE:
return false;
case BCH_MERGE_PARTIAL:
if (bkey_packed(m) && !bch2_bkey_pack_key((void *) &tmp, &mi->k, f))
if (n != btree_bkey_last(b, t) &&
bkey_cmp_left_packed(b, n, &li.k.k.p) <= 0 &&
bkey_deleted(n))
return false;
} else if (ret == BCH_MERGE_MERGE) {
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge))
if (prev &&
bkey_cmp_left_packed_byval(b, prev,
bkey_start_pos(&li.k.k)) > 0)
return false;
}
extent_i_save(b, m, mi);
bch2_bset_fix_invalidated_key(b, t, m);
/*
* Update iterator to reflect what we just inserted - otherwise,
* the iter_fix() call is going to put us _before_ the key we
* just partially merged with:
*/
if (back_merge)
bch2_btree_iter_set_pos_same_leaf(iter, li.k.k.p);
bch2_btree_node_iter_fix(iter, b, node_iter,
t, m, m->u64s, m->u64s);
if (ret == BCH_MERGE_PARTIAL) {
if (!extent_i_save(b, m, mi))
return false;
if (!back_merge)
bkey_copy(packed_to_bkey(l), &li.k);
else
bkey_copy(packed_to_bkey(r), &ri.k);
return false;
case BCH_MERGE_MERGE:
if (bkey_packed(m) && !bch2_bkey_pack_key((void *) &tmp, &li.k.k, f))
return false;
if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge))
} else {
if (!extent_i_save(b, m, &li.k))
return false;
}
extent_i_save(b, m, &li.k);
bch2_bset_fix_invalidated_key(b, t, m);
bch2_bset_fix_invalidated_key(b, t, m);
bch2_btree_node_iter_fix(iter, b, node_iter,
t, m, m->u64s, m->u64s);
verify_modified_extent(iter, m);
bch2_btree_node_iter_fix(iter, b, node_iter,
t, m, m->u64s, m->u64s);
return true;
default:
BUG();
}
return ret == BCH_MERGE_MERGE;
}
int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment