Commit 271a3d3a authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: lift ordering restriction on 0 size extents

This lifts the restriction that 0 size extents must not overlap with
other extents, which means we can now sort extents and non extents the
same way, and will let us simplify a bunch of other stuff as well.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 0fdf1804
This diff is collapsed.
......@@ -369,6 +369,17 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b,
return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
}
/* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
const struct bkey *k)
{
int cmp = bkey_cmp(k->p, iter->pos);
return cmp > 0 ||
(cmp == 0 &&
!(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
}
/* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp_packed(const struct btree *b,
struct bpos *pos,
......@@ -419,7 +430,7 @@ enum bch_extent_overlap {
/* Returns how k overlaps with m */
static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
const struct bkey *m)
const struct bkey *m)
{
int cmp1 = bkey_cmp(k->p, m->p) < 0;
int cmp2 = bkey_cmp(bkey_start_pos(k),
......@@ -430,20 +441,13 @@ static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
/* Btree key iteration */
static inline void __bch2_btree_node_iter_init(struct btree_node_iter *iter,
bool is_extents)
{
iter->is_extents = is_extents;
memset(iter->data, 0, sizeof(iter->data));
}
void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
const struct bkey_packed *,
const struct bkey_packed *);
void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
struct bpos, bool, bool);
struct bpos, bool);
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
struct btree *, bool);
struct btree *);
struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
struct btree *,
struct bset_tree *);
......@@ -470,32 +474,21 @@ static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
return __btree_node_iter_set_end(iter, 0);
}
static inline int __btree_node_iter_cmp(bool is_extents,
struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
static inline int __btree_node_iter_cmp(struct btree *b,
const struct bkey_packed *l,
const struct bkey_packed *r)
{
/*
* For non extents, when keys compare equal the deleted keys have to
* come first - so that bch2_btree_node_iter_next_check() can detect
* duplicate nondeleted keys (and possibly other reasons?)
*
* For extents, bkey_deleted() is used as a proxy for k->size == 0, so
* deleted keys have to sort last.
*/
/* When keys compare equal deleted keys come first */
return bkey_cmp_packed(b, l, r)
?: (is_extents
? (int) bkey_deleted(l) - (int) bkey_deleted(r)
: (int) bkey_deleted(r) - (int) bkey_deleted(l))
?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
?: (l > r) - (l < r);
}
static inline int btree_node_iter_cmp(struct btree_node_iter *iter,
struct btree *b,
static inline int btree_node_iter_cmp(struct btree *b,
struct btree_node_iter_set l,
struct btree_node_iter_set r)
{
return __btree_node_iter_cmp(iter->is_extents, b,
return __btree_node_iter_cmp(b,
__btree_node_offset_to_key(b, l.k),
__btree_node_offset_to_key(b, r.k));
}
......@@ -582,21 +575,12 @@ bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1);
}
/*
* Iterates over all _live_ keys - skipping deleted (and potentially
* overlapping) keys
*/
#define for_each_btree_node_key(b, k, iter, _is_extents) \
for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
((k) = bch2_btree_node_iter_peek(iter, b)); \
bch2_btree_node_iter_advance(iter, b))
struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
struct btree *,
struct bkey *);
#define for_each_btree_node_key_unpack(b, k, iter, _is_extents, unpacked)\
for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \
for (bch2_btree_node_iter_init_from_start((iter), (b)); \
(k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
bch2_btree_node_iter_advance(iter, b))
......@@ -646,6 +630,8 @@ void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
void __bch2_verify_btree_nr_keys(struct btree *);
void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
struct bkey_packed *, unsigned);
void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
struct bkey_packed *);
......@@ -654,6 +640,10 @@ void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
struct btree *b) {}
static inline void bch2_verify_insert_pos(struct btree *b,
struct bkey_packed *where,
struct bkey_packed *insert,
unsigned clobber_u64s) {}
static inline void bch2_verify_key_order(struct btree *b,
struct btree_node_iter *iter,
struct bkey_packed *where) {}
......
......@@ -217,7 +217,6 @@ static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
if (btree_node_has_ptrs(b))
for_each_btree_node_key_unpack(b, k, &iter,
btree_node_is_extents(b),
&unpacked) {
bch2_bkey_debugcheck(c, b, k);
stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
......@@ -1044,7 +1043,6 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
struct bkey_s_c k;
for_each_btree_node_key_unpack(b, k, &node_iter,
btree_node_is_extents(b),
&unpacked) {
ret = bch2_btree_mark_key_initial(c,
btree_node_type(b), k);
......
......@@ -22,7 +22,7 @@
/* btree_node_iter_large: */
#define btree_node_iter_cmp_heap(h, _l, _r) \
__btree_node_iter_cmp((iter)->is_extents, b, \
__btree_node_iter_cmp(b, \
__btree_node_offset_to_key(b, (_l).k), \
__btree_node_offset_to_key(b, (_r).k))
......@@ -248,6 +248,9 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst,
sort_iter_sort(iter, sort_extent_whiteouts_cmp);
while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
if (bkey_deleted(in))
continue;
EBUG_ON(bkeyp_val_u64s(f, in));
EBUG_ON(in->type != KEY_TYPE_DISCARD);
......@@ -785,8 +788,7 @@ void bch2_btree_sort_into(struct bch_fs *c,
bch2_bset_set_no_aux_tree(dst, dst->set);
bch2_btree_node_iter_init_from_start(&src_iter, src,
btree_node_is_extents(src));
bch2_btree_node_iter_init_from_start(&src_iter, src);
if (btree_node_ops(src)->key_normalize ||
btree_node_ops(src)->key_merge)
......@@ -1171,7 +1173,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
int ret, retry_read = 0, write = READ;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
__bch2_btree_node_iter_large_init(iter, btree_node_is_extents(b));
iter->used = 0;
if (bch2_meta_read_fault("btree"))
btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
......
......@@ -146,20 +146,11 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
/* Sorting */
struct btree_node_iter_large {
u8 is_extents;
u16 used;
struct btree_node_iter_set data[MAX_BSETS];
};
static inline void
__bch2_btree_node_iter_large_init(struct btree_node_iter_large *iter,
bool is_extents)
{
iter->used = 0;
iter->is_extents = is_extents;
}
void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *,
struct btree *);
......
......@@ -375,14 +375,20 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct btree_node_iter tmp = l->iter;
struct bkey_packed *k;
if (iter->uptodate > BTREE_ITER_NEED_PEEK)
return;
bch2_btree_node_iter_verify(&l->iter, b);
/*
* For interior nodes, the iterator will have skipped past
* deleted keys:
*
* For extents, the iterator may have skipped past deleted keys (but not
* whiteouts)
*/
k = b->level
? bch2_btree_node_iter_prev(&tmp, b)
k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS
? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD)
: bch2_btree_node_iter_prev_all(&tmp, b);
if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k,
iter->flags & BTREE_ITER_IS_EXTENTS)) {
......@@ -390,7 +396,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct bkey uk = bkey_unpack_key(b, k);
bch2_bkey_to_text(buf, sizeof(buf), &uk);
panic("prev key should be before after pos:\n%s\n%llu:%llu\n",
panic("prev key should be before iter pos:\n%s\n%llu:%llu\n",
buf, iter->pos.inode, iter->pos.offset);
}
......@@ -401,15 +407,16 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct bkey uk = bkey_unpack_key(b, k);
bch2_bkey_to_text(buf, sizeof(buf), &uk);
panic("next key should be before iter pos:\n%llu:%llu\n%s\n",
panic("iter should be after current key:\n"
"iter pos %llu:%llu\n"
"cur key %s\n",
iter->pos.inode, iter->pos.offset, buf);
}
if (iter->uptodate == BTREE_ITER_UPTODATE &&
(iter->flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES) {
BUG_ON(!bkey_whiteout(&iter->k) &&
bch2_btree_node_iter_end(&l->iter));
}
BUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
(iter->flags & BTREE_ITER_TYPE) == BTREE_ITER_KEYS &&
!bkey_whiteout(&iter->k) &&
bch2_btree_node_iter_end(&l->iter));
}
void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
......@@ -420,6 +427,11 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
__bch2_btree_iter_verify(linked, b);
}
#else
static inline void __bch2_btree_iter_verify(struct btree_iter *iter,
struct btree *b) {}
#endif
static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
......@@ -434,7 +446,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
struct btree_node_iter_set *set;
unsigned offset = __btree_node_key_to_offset(b, where);
int shift = new_u64s - clobber_u64s;
unsigned old_end = (int) __btree_node_key_to_offset(b, end) - shift;
unsigned old_end = t->end_offset - shift;
btree_node_iter_for_each(node_iter, set)
if (set->end == old_end)
......@@ -456,7 +468,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
}
return;
found:
set->end = (int) set->end + shift;
set->end = t->end_offset;
/* Iterator hasn't gotten to the key that changed yet: */
if (set->k < offset)
......@@ -517,8 +529,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
k = bch2_bkey_prev_all(b, t,
bch2_btree_node_iter_bset_pos(node_iter, b, t));
if (k &&
__btree_node_iter_cmp(node_iter, b,
k, where) > 0) {
__btree_node_iter_cmp(b, k, where) > 0) {
struct btree_node_iter_set *set;
unsigned offset =
__btree_node_key_to_offset(b, bkey_next(k));
......@@ -557,10 +568,6 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
__bch2_btree_node_iter_fix(linked, b,
&linked->l[b->level].iter, t,
where, clobber_u64s, new_u64s);
/* interior node iterators are... special... */
if (!b->level)
bch2_btree_iter_verify(iter, b);
}
static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
......@@ -647,17 +654,6 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
btree_node_unlock(iter, b->level + 1);
}
/* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
const struct bkey *k)
{
int cmp = bkey_cmp(k->p, iter->pos);
return cmp > 0 ||
(cmp == 0 &&
!(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
}
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
struct btree *b)
{
......@@ -679,8 +675,7 @@ static inline void __btree_iter_init(struct btree_iter *iter,
struct btree_iter_level *l = &iter->l[b->level];
bch2_btree_node_iter_init(&l->iter, b, iter->pos,
iter->flags & BTREE_ITER_IS_EXTENTS,
btree_node_is_extents(b));
iter->flags & BTREE_ITER_IS_EXTENTS);
/* Skip to first non whiteout: */
if (b->level)
......@@ -1022,7 +1017,9 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
}
iter->uptodate = BTREE_ITER_NEED_PEEK;
bch2_btree_iter_verify_locks(iter);
__bch2_btree_iter_verify(iter, iter->l[iter->level].b);
return 0;
}
......@@ -1363,9 +1360,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
}
static inline struct bkey_s_c
__bch2_btree_iter_peek_slot(struct btree_iter *iter)
__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct btree_node_iter node_iter;
struct bkey_s_c k;
struct bkey n;
int ret;
......@@ -1376,6 +1374,17 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0)
__btree_iter_advance(l);
/*
* iterator is now at the correct position for inserting at iter->pos,
* but we need to keep iterating until we find the first non whiteout so
* we know how big a hole we have, if any:
*/
node_iter = l->iter;
if (k.k && bkey_whiteout(k.k))
k = __btree_iter_unpack(iter, l, &iter->k,
bch2_btree_node_iter_peek(&node_iter, l->b));
/*
* If we got to the end of the node, check if we need to traverse to the
* next node:
......@@ -1392,6 +1401,13 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (k.k &&
!bkey_whiteout(k.k) &&
bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
/*
* if we skipped forward to find the first non whiteout and
* there _wasn't_ actually a hole, we want the iterator to be
* pointed at the key we found:
*/
l->iter = node_iter;
EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
EBUG_ON(bkey_deleted(k.k));
iter->uptodate = BTREE_ITER_UPTODATE;
......@@ -1399,41 +1415,88 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
}
/* hole */
/* holes can't span inode numbers: */
if (iter->pos.offset == KEY_OFFSET_MAX) {
if (iter->pos.inode == KEY_INODE_MAX)
return bkey_s_c_null;
iter->pos = bkey_successor(iter->pos);
goto recheck;
}
if (!k.k)
k.k = &l->b->key.k;
bkey_init(&n);
n.p = iter->pos;
bch2_key_resize(&n,
min_t(u64, KEY_SIZE_MAX,
(k.k->p.inode == n.p.inode
? bkey_start_offset(k.k)
: KEY_OFFSET_MAX) -
n.p.offset));
//EBUG_ON(!n.size);
if (!n.size) {
char buf[100];
bch2_dump_btree_node(iter->l[0].b);
bch2_bkey_to_text(buf, sizeof(buf), k.k);
panic("iter at %llu:%llu\n"
"next key %s\n",
iter->pos.inode,
iter->pos.offset,
buf);
}
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
if (n.p.offset == KEY_OFFSET_MAX) {
if (n.p.inode == KEY_INODE_MAX)
return bkey_s_c_null;
iter->pos = bkey_successor(iter->pos);
goto recheck;
}
iter->k = n;
iter->uptodate = BTREE_ITER_UPTODATE;
return (struct bkey_s_c) { &iter->k, NULL };
}
if (k.k && bkey_whiteout(k.k)) {
struct btree_node_iter node_iter = l->iter;
static inline struct bkey_s_c
__bch2_btree_iter_peek_slot(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct bkey_s_c k;
int ret;
k = __btree_iter_unpack(iter, l, &iter->k,
bch2_btree_node_iter_peek(&node_iter, l->b));
}
if (iter->flags & BTREE_ITER_IS_EXTENTS)
return __bch2_btree_iter_peek_slot_extents(iter);
if (!k.k)
k.k = &l->b->key.k;
recheck:
while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k &&
bkey_deleted(k.k) &&
bkey_cmp(k.k->p, iter->pos) == 0)
__btree_iter_advance(l);
bch2_key_resize(&n,
min_t(u64, KEY_SIZE_MAX,
(k.k->p.inode == n.p.inode
? bkey_start_offset(k.k)
: KEY_OFFSET_MAX) -
n.p.offset));
/*
* If we got to the end of the node, check if we need to traverse to the
* next node:
*/
if (unlikely(!k.k && btree_iter_pos_after_node(iter, l->b))) {
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
EBUG_ON(!n.size);
goto recheck;
}
iter->k = n;
iter->uptodate = BTREE_ITER_UPTODATE;
return (struct bkey_s_c) { &iter->k, NULL };
if (k.k &&
!bkey_deleted(k.k) &&
!bkey_cmp(iter->pos, k.k->p)) {
iter->uptodate = BTREE_ITER_UPTODATE;
return k;
} else {
/* hole */
bkey_init(&iter->k);
iter->k.p = iter->pos;
iter->uptodate = BTREE_ITER_UPTODATE;
return (struct bkey_s_c) { &iter->k, NULL };
}
}
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
......
......@@ -176,8 +176,6 @@ struct btree_cache {
};
struct btree_node_iter {
u8 is_extents;
struct btree_node_iter_set {
u16 k, end;
} data[MAX_BSETS];
......@@ -459,9 +457,6 @@ struct btree_root {
* we're holding the write lock and we know what key is about to be overwritten:
*/
struct btree_iter;
struct btree_node_iter;
enum btree_insert_ret {
BTREE_INSERT_OK,
/* extent spanned multiple leaf nodes: have to traverse to next node: */
......
......@@ -35,7 +35,7 @@ static void btree_node_interior_verify(struct btree *b)
BUG_ON(!b->level);
bch2_btree_node_iter_init(&iter, b, b->key.k.p, false, false);
bch2_btree_node_iter_init(&iter, b, b->key.k.p, false);
#if 1
BUG_ON(!(k = bch2_btree_node_iter_peek(&iter, b)) ||
bkey_cmp_left_packed(b, k, &b->key.k.p));
......@@ -1322,7 +1322,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
bch2_btree_node_iter_init(&node_iter, b, k->k.p, false, false);
bch2_btree_node_iter_init(&node_iter, b, k->k.p, false);
while (!bch2_keylist_empty(keys)) {
k = bch2_keylist_front(keys);
......
......@@ -64,7 +64,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
if (bkey_whiteout(&insert->k) && !k->needs_whiteout) {
bch2_bset_delete(b, k, clobber_u64s);
bch2_btree_node_iter_fix(iter, b, node_iter, t,
k, clobber_u64s, 0);
k, clobber_u64s, 0);
bch2_btree_iter_verify(iter, b);
return true;
}
......@@ -73,7 +74,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
k->type = KEY_TYPE_DELETED;
bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
k->u64s, k->u64s);
k->u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
if (bkey_whiteout(&insert->k)) {
reserve_whiteout(b, k);
......@@ -98,7 +100,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
clobber_u64s, k->u64s);
clobber_u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
return true;
}
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment