Commit 7a7d17b2 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Whiteouts for snapshots

This patch adds KEY_TYPE_whiteout, a new type of whiteout for snapshots,
when we're deleting and the key being deleted is in an ancestor
snapshot - and updates the transaction update/commit path to use it.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 8c6d298a
...@@ -327,7 +327,7 @@ static inline void bkey_init(struct bkey *k) ...@@ -327,7 +327,7 @@ static inline void bkey_init(struct bkey *k)
*/ */
#define BCH_BKEY_TYPES() \ #define BCH_BKEY_TYPES() \
x(deleted, 0) \ x(deleted, 0) \
x(discard, 1) \ x(whiteout, 1) \
x(error, 2) \ x(error, 2) \
x(cookie, 3) \ x(cookie, 3) \
x(hash_whiteout, 4) \ x(hash_whiteout, 4) \
...@@ -361,7 +361,7 @@ struct bch_deleted { ...@@ -361,7 +361,7 @@ struct bch_deleted {
struct bch_val v; struct bch_val v;
}; };
struct bch_discard { struct bch_whiteout {
struct bch_val v; struct bch_val v;
}; };
......
...@@ -63,7 +63,7 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) ...@@ -63,7 +63,7 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted) #define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted)
#define bkey_whiteout(_k) \ #define bkey_whiteout(_k) \
((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard) ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout)
enum bkey_lr_packed { enum bkey_lr_packed {
BKEY_PACKED_BOTH, BKEY_PACKED_BOTH,
......
...@@ -31,7 +31,7 @@ static const char *deleted_key_invalid(const struct bch_fs *c, ...@@ -31,7 +31,7 @@ static const char *deleted_key_invalid(const struct bch_fs *c,
.key_invalid = deleted_key_invalid, \ .key_invalid = deleted_key_invalid, \
} }
#define bch2_bkey_ops_discard (struct bkey_ops) { \ #define bch2_bkey_ops_whiteout (struct bkey_ops) { \
.key_invalid = deleted_key_invalid, \ .key_invalid = deleted_key_invalid, \
} }
...@@ -101,6 +101,8 @@ const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k) ...@@ -101,6 +101,8 @@ const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
static unsigned bch2_key_types_allowed[] = { static unsigned bch2_key_types_allowed[] = {
[BKEY_TYPE_extents] = [BKEY_TYPE_extents] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_whiteout)|
(1U << KEY_TYPE_error)| (1U << KEY_TYPE_error)|
(1U << KEY_TYPE_cookie)| (1U << KEY_TYPE_cookie)|
(1U << KEY_TYPE_extent)| (1U << KEY_TYPE_extent)|
...@@ -108,30 +110,43 @@ static unsigned bch2_key_types_allowed[] = { ...@@ -108,30 +110,43 @@ static unsigned bch2_key_types_allowed[] = {
(1U << KEY_TYPE_reflink_p)| (1U << KEY_TYPE_reflink_p)|
(1U << KEY_TYPE_inline_data), (1U << KEY_TYPE_inline_data),
[BKEY_TYPE_inodes] = [BKEY_TYPE_inodes] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_whiteout)|
(1U << KEY_TYPE_inode)| (1U << KEY_TYPE_inode)|
(1U << KEY_TYPE_inode_generation), (1U << KEY_TYPE_inode_generation),
[BKEY_TYPE_dirents] = [BKEY_TYPE_dirents] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_whiteout)|
(1U << KEY_TYPE_hash_whiteout)| (1U << KEY_TYPE_hash_whiteout)|
(1U << KEY_TYPE_dirent), (1U << KEY_TYPE_dirent),
[BKEY_TYPE_xattrs] = [BKEY_TYPE_xattrs] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_whiteout)|
(1U << KEY_TYPE_cookie)| (1U << KEY_TYPE_cookie)|
(1U << KEY_TYPE_hash_whiteout)| (1U << KEY_TYPE_hash_whiteout)|
(1U << KEY_TYPE_xattr), (1U << KEY_TYPE_xattr),
[BKEY_TYPE_alloc] = [BKEY_TYPE_alloc] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_alloc)| (1U << KEY_TYPE_alloc)|
(1U << KEY_TYPE_alloc_v2), (1U << KEY_TYPE_alloc_v2),
[BKEY_TYPE_quotas] = [BKEY_TYPE_quotas] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_quota), (1U << KEY_TYPE_quota),
[BKEY_TYPE_stripes] = [BKEY_TYPE_stripes] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_stripe), (1U << KEY_TYPE_stripe),
[BKEY_TYPE_reflink] = [BKEY_TYPE_reflink] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_reflink_v)| (1U << KEY_TYPE_reflink_v)|
(1U << KEY_TYPE_indirect_inline_data), (1U << KEY_TYPE_indirect_inline_data),
[BKEY_TYPE_subvolumes] = [BKEY_TYPE_subvolumes] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_subvolume), (1U << KEY_TYPE_subvolume),
[BKEY_TYPE_snapshots] = [BKEY_TYPE_snapshots] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_snapshot), (1U << KEY_TYPE_snapshot),
[BKEY_TYPE_btree] = [BKEY_TYPE_btree] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_btree_ptr)| (1U << KEY_TYPE_btree_ptr)|
(1U << KEY_TYPE_btree_ptr_v2), (1U << KEY_TYPE_btree_ptr_v2),
}; };
...@@ -139,21 +154,18 @@ static unsigned bch2_key_types_allowed[] = { ...@@ -139,21 +154,18 @@ static unsigned bch2_key_types_allowed[] = {
const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
enum btree_node_type type) enum btree_node_type type)
{ {
unsigned key_types_allowed = (1U << KEY_TYPE_deleted)|
bch2_key_types_allowed[type] ;
if (k.k->u64s < BKEY_U64s) if (k.k->u64s < BKEY_U64s)
return "u64s too small"; return "u64s too small";
if (!(key_types_allowed & (1U << k.k->type))) if (!(bch2_key_types_allowed[type] & (1U << k.k->type)))
return "invalid key type for this btree"; return "invalid key type for this btree";
if (type == BKEY_TYPE_btree && if (type == BKEY_TYPE_btree &&
bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
return "value too big"; return "value too big";
if (btree_node_type_is_extents(type)) { if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
if ((k.k->size == 0) != bkey_deleted(k.k)) if (k.k->size == 0)
return "bad size field"; return "bad size field";
if (k.k->size > k.k->p.offset) if (k.k->size > k.k->p.offset)
......
...@@ -1002,21 +1002,24 @@ static int bch2_trans_update_extent(struct btree_trans *trans, ...@@ -1002,21 +1002,24 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
goto next; goto next;
} }
if (!bkey_cmp(k.k->p, bkey_start_pos(&insert->k))) if (!bkey_cmp(k.k->p, start))
goto next; goto next;
while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) { while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) {
bool front_split = bkey_cmp(bkey_start_pos(k.k), start) < 0;
bool back_split = bkey_cmp(k.k->p, insert->k.p) > 0;
/* /*
* If we're going to be splitting a compressed extent, note it * If we're going to be splitting a compressed extent, note it
* so that __bch2_trans_commit() can increase our disk * so that __bch2_trans_commit() can increase our disk
* reservation: * reservation:
*/ */
if (bkey_cmp(bkey_start_pos(k.k), start) < 0 && if (((front_split && back_split) ||
bkey_cmp(k.k->p, insert->k.p) > 0 && ((front_split || back_split) && k.k->p.snapshot != insert->k.p.snapshot)) &&
(compressed_sectors = bch2_bkey_sectors_compressed(k))) (compressed_sectors = bch2_bkey_sectors_compressed(k)))
trans->extra_journal_res += compressed_sectors; trans->extra_journal_res += compressed_sectors;
if (bkey_cmp(bkey_start_pos(k.k), start) < 0) { if (front_split) {
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
if ((ret = PTR_ERR_OR_ZERO(update))) if ((ret = PTR_ERR_OR_ZERO(update)))
goto err; goto err;
...@@ -1027,6 +1030,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans, ...@@ -1027,6 +1030,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p, bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
BTREE_ITER_NOT_EXTENTS| BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS|
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&update_iter) ?:
bch2_trans_update(trans, &update_iter, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
flags);
bch2_trans_iter_exit(trans, &update_iter);
if (ret)
goto err;
}
if (k.k->p.snapshot != insert->k.p.snapshot &&
(front_split || back_split)) {
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
if ((ret = PTR_ERR_OR_ZERO(update)))
goto err;
bkey_reassemble(update, k);
bch2_cut_front(start, update);
bch2_cut_back(insert->k.p, update);
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS|
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&update_iter) ?: ret = bch2_btree_iter_traverse(&update_iter) ?:
bch2_trans_update(trans, &update_iter, update, bch2_trans_update(trans, &update_iter, update,
...@@ -1038,12 +1067,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans, ...@@ -1038,12 +1067,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
} }
if (bkey_cmp(k.k->p, insert->k.p) <= 0) { if (bkey_cmp(k.k->p, insert->k.p) <= 0) {
ret = bch2_btree_delete_at(trans, &iter, flags); update = bch2_trans_kmalloc(trans, sizeof(*update));
if ((ret = PTR_ERR_OR_ZERO(update)))
goto err;
bkey_init(&update->k);
update->k.p = k.k->p;
if (insert->k.p.snapshot != k.k->p.snapshot) {
update->k.p.snapshot = insert->k.p.snapshot;
update->k.type = KEY_TYPE_whiteout;
}
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&update_iter) ?:
bch2_trans_update(trans, &update_iter, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
flags);
bch2_trans_iter_exit(trans, &update_iter);
if (ret) if (ret)
goto err; goto err;
} }
if (bkey_cmp(k.k->p, insert->k.p) > 0) { if (back_split) {
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
if ((ret = PTR_ERR_OR_ZERO(update))) if ((ret = PTR_ERR_OR_ZERO(update)))
goto err; goto err;
...@@ -1051,10 +1100,15 @@ static int bch2_trans_update_extent(struct btree_trans *trans, ...@@ -1051,10 +1100,15 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
bkey_reassemble(update, k); bkey_reassemble(update, k);
bch2_cut_front(insert->k.p, update); bch2_cut_front(insert->k.p, update);
ret = bch2_trans_update(trans, &iter, update, flags); bch2_trans_copy_iter(&update_iter, &iter);
update_iter.pos = update->k.p;
ret = bch2_trans_update(trans, &update_iter, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
flags);
bch2_trans_iter_exit(trans, &update_iter);
if (ret) if (ret)
goto err; goto err;
goto out; goto out;
} }
next: next:
...@@ -1086,6 +1140,39 @@ static int bch2_trans_update_extent(struct btree_trans *trans, ...@@ -1086,6 +1140,39 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
return ret; return ret;
} }
/*
* When deleting, check if we need to emit a whiteout (because we're overwriting
* something in an ancestor snapshot)
*/
static int need_whiteout_for_snapshot(struct btree_trans *trans,
enum btree_id btree_id, struct bpos pos)
{
struct btree_iter iter;
struct bkey_s_c k;
u32 snapshot = pos.snapshot;
int ret;
if (!bch2_snapshot_parent(trans->c, pos.snapshot))
return 0;
pos.snapshot++;
for_each_btree_key(trans, iter, btree_id, pos,
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (bkey_cmp(k.k->p, pos))
break;
if (bch2_snapshot_is_ancestor(trans->c, snapshot,
k.k->p.snapshot)) {
ret = !bkey_whiteout(k.k);
break;
}
}
bch2_trans_iter_exit(trans, &iter);
return ret;
}
int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_update_flags flags) struct bkey_i *k, enum btree_update_flags flags)
{ {
...@@ -1118,6 +1205,16 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, ...@@ -1118,6 +1205,16 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
btree_insert_entry_cmp(i - 1, i) >= 0); btree_insert_entry_cmp(i - 1, i) >= 0);
#endif #endif
if (bkey_deleted(&n.k->k) &&
(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
int ret = need_whiteout_for_snapshot(trans, n.btree_id, n.k->k.p);
if (unlikely(ret < 0))
return ret;
if (ret)
n.k->k.type = KEY_TYPE_whiteout;
}
/* /*
* Pending updates are kept sorted: first, find position of new update, * Pending updates are kept sorted: first, find position of new update,
* then delete/trim any updates the new update overwrites: * then delete/trim any updates the new update overwrites:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment