Commit 3d48a7f8 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: KEY_TYPE_alloc_v4

This introduces a new alloc key which doesn't use varints. Soon we'll be
adding backpointers and storing them in alloc keys, which means our
pack/unpack workflow for alloc keys won't really work - we'll need to be
mutating alloc keys in place.

Instead of bch2_alloc_unpack(), we now have bch2_alloc_to_v4() that
converts older types of alloc keys to v4 if needed.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent d326ab2f
......@@ -33,13 +33,27 @@ const char * const bch2_allocator_states[] = {
NULL
};
/* Persistent alloc info: */
static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
#define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8,
BCH_ALLOC_FIELDS_V1()
#undef x
};
/* Persistent alloc info: */
struct bkey_alloc_unpacked {
u64 journal_seq;
u64 bucket;
u8 dev;
u8 gen;
u8 oldest_gen;
u8 data_type;
bool need_discard:1;
bool need_inc_gen:1;
#define x(_name, _bits) u##_bits _name;
BCH_ALLOC_FIELDS_V2()
#undef x
};
static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
const void **p, unsigned field)
......@@ -161,6 +175,8 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
out->gen = a.v->gen;
out->oldest_gen = a.v->oldest_gen;
out->data_type = a.v->data_type;
out->need_discard = BCH_ALLOC_V3_NEED_DISCARD(a.v);
out->need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN(a.v);
out->journal_seq = le64_to_cpu(a.v->journal_seq);
#define x(_name, _bits) \
......@@ -182,47 +198,7 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
return 0;
}
static void bch2_alloc_pack_v3(struct bkey_alloc_buf *dst,
const struct bkey_alloc_unpacked src)
{
struct bkey_i_alloc_v3 *a = bkey_alloc_v3_init(&dst->k);
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
u8 *out = a->v.data;
u8 *end = (void *) &dst[1];
u8 *last_nonzero_field = out;
unsigned bytes;
a->k.p = POS(src.dev, src.bucket);
a->v.gen = src.gen;
a->v.oldest_gen = src.oldest_gen;
a->v.data_type = src.data_type;
a->v.journal_seq = cpu_to_le64(src.journal_seq);
#define x(_name, _bits) \
nr_fields++; \
\
if (src._name) { \
out += bch2_varint_encode_fast(out, src._name); \
\
last_nonzero_field = out; \
last_nonzero_fieldnr = nr_fields; \
} else { \
*out++ = 0; \
}
BCH_ALLOC_FIELDS_V2()
#undef x
BUG_ON(out > end);
out = last_nonzero_field;
a->v.nr_fields = last_nonzero_fieldnr;
bytes = (u8 *) out - (u8 *) &a->v;
set_bkey_val_bytes(&a->k, bytes);
memset_u64s_tail(&a->v, 0, bytes);
}
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
{
struct bkey_alloc_unpacked ret = {
.dev = k.k->p.inode,
......@@ -245,32 +221,71 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
return ret;
}
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
const struct bkey_alloc_unpacked src)
void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
{
struct bkey_alloc_buf *dst;
if (k.k->type == KEY_TYPE_alloc_v4) {
*out = *bkey_s_c_to_alloc_v4(k).v;
} else {
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
*out = (struct bch_alloc_v4) {
.journal_seq = u.journal_seq,
.flags = u.need_discard,
.gen = u.gen,
.oldest_gen = u.oldest_gen,
.data_type = u.data_type,
.stripe_redundancy = u.stripe_redundancy,
.dirty_sectors = u.dirty_sectors,
.cached_sectors = u.cached_sectors,
.io_time[READ] = u.read_time,
.io_time[WRITE] = u.write_time,
.stripe = u.stripe,
};
}
}
dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
if (!IS_ERR(dst))
bch2_alloc_pack_v3(dst, src);
struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
{
struct bkey_i_alloc_v4 *ret;
return dst;
if (k.k->type == KEY_TYPE_alloc_v4) {
ret = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
if (!IS_ERR(ret))
bkey_reassemble(&ret->k_i, k);
} else {
ret = bch2_trans_kmalloc(trans, sizeof(*ret));
if (!IS_ERR(ret)) {
bkey_alloc_v4_init(&ret->k_i);
ret->k.p = k.k->p;
bch2_alloc_to_v4(k, &ret->v);
}
}
return ret;
}
int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_alloc_unpacked *u, unsigned trigger_flags)
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
struct bpos pos)
{
struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
int ret;
/*
* Without BTREE_UPDATE_NO_KEY_CACHE_COHERENCY, we may end up updating
* the btree instead of the key cache - this can casue the allocator to
* self-deadlock, since updating the btree may require allocating new
* btree nodes:
*/
return PTR_ERR_OR_ZERO(a) ?:
bch2_trans_update(trans, iter, &a->k, trigger_flags|
BTREE_UPDATE_NO_KEY_CACHE_COHERENCY);
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
BTREE_ITER_WITH_UPDATES|
BTREE_ITER_CACHED|
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret) {
bch2_trans_iter_exit(trans, iter);
return ERR_PTR(ret);
}
a = bch2_alloc_to_v4_mut(trans, k);
if (IS_ERR(a))
bch2_trans_iter_exit(trans, iter);
return a;
}
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
......@@ -316,28 +331,70 @@ const char *bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
const char *bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_alloc_unpacked u;
struct bch_dev *ca;
if (k.k->p.inode >= c->sb.nr_devices ||
!c->devs[k.k->p.inode])
return "invalid device";
ca = bch_dev_bkey_exists(c, k.k->p.inode);
if (k.k->p.offset < ca->mi.first_bucket ||
k.k->p.offset >= ca->mi.nbuckets)
return "invalid bucket";
if (bch2_alloc_unpack_v3(&u, k))
return "unpack error";
return NULL;
}
void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
const char *bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
struct bch_dev *ca;
pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu",
u.gen, u.oldest_gen, bch2_data_types[u.data_type],
u.journal_seq);
#define x(_name, ...) pr_buf(out, " " #_name " %llu", (u64) u._name);
BCH_ALLOC_FIELDS_V2()
#undef x
if (k.k->p.inode >= c->sb.nr_devices ||
!c->devs[k.k->p.inode])
return "invalid device";
ca = bch_dev_bkey_exists(c, k.k->p.inode);
if (k.k->p.offset < ca->mi.first_bucket ||
k.k->p.offset >= ca->mi.nbuckets)
return "invalid bucket";
return NULL;
}
void bch2_alloc_v4_swab(struct bkey_s k)
{
struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
a->journal_seq = swab64(a->journal_seq);
a->flags = swab32(a->flags);
a->dirty_sectors = swab32(a->dirty_sectors);
a->cached_sectors = swab32(a->cached_sectors);
a->io_time[0] = swab64(a->io_time[0]);
a->io_time[1] = swab64(a->io_time[1]);
a->stripe = swab32(a->stripe);
a->nr_external_backpointers = swab32(a->nr_external_backpointers);
}
void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
{
struct bch_alloc_v4 a;
bch2_alloc_to_v4(k, &a);
pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu",
a.gen, a.oldest_gen, bch2_data_types[a.data_type],
a.journal_seq, BCH_ALLOC_V4_NEED_DISCARD(&a));
pr_buf(out, " dirty_sectors %u", a.dirty_sectors);
pr_buf(out, " cached_sectors %u", a.cached_sectors);
pr_buf(out, " stripe %u", a.stripe);
pr_buf(out, " stripe_redundancy %u", a.stripe_redundancy);
pr_buf(out, " read_time %llu", a.io_time[READ]);
pr_buf(out, " write_time %llu", a.io_time[WRITE]);
}
int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
......@@ -345,9 +402,9 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_alloc_v4 a;
struct bch_dev *ca;
struct bucket *g;
struct bkey_alloc_unpacked u;
int ret;
bch2_trans_init(&trans, c, 0, 0);
......@@ -356,28 +413,28 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
BTREE_ITER_PREFETCH, k, ret) {
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = __bucket(ca, k.k->p.offset, gc);
u = bch2_alloc_unpack(k);
bch2_alloc_to_v4(k, &a);
if (!gc)
*bucket_gen(ca, k.k->p.offset) = u.gen;
*bucket_gen(ca, k.k->p.offset) = a.gen;
g->_mark.gen = u.gen;
g->io_time[READ] = u.read_time;
g->io_time[WRITE] = u.write_time;
g->oldest_gen = !gc ? u.oldest_gen : u.gen;
g->_mark.gen = a.gen;
g->io_time[READ] = a.io_time[READ];
g->io_time[WRITE] = a.io_time[WRITE];
g->oldest_gen = !gc ? a.oldest_gen : a.gen;
g->gen_valid = 1;
if (!gc ||
(metadata_only &&
(u.data_type == BCH_DATA_user ||
u.data_type == BCH_DATA_cached ||
u.data_type == BCH_DATA_parity))) {
g->_mark.data_type = u.data_type;
g->_mark.dirty_sectors = u.dirty_sectors;
g->_mark.cached_sectors = u.cached_sectors;
g->_mark.stripe = u.stripe != 0;
g->stripe = u.stripe;
g->stripe_redundancy = u.stripe_redundancy;
(a.data_type == BCH_DATA_user ||
a.data_type == BCH_DATA_cached ||
a.data_type == BCH_DATA_parity))) {
g->_mark.data_type = a.data_type;
g->_mark.dirty_sectors = a.dirty_sectors;
g->_mark.cached_sectors = a.cached_sectors;
g->_mark.stripe = a.stripe != 0;
g->stripe = a.stripe;
g->stripe_redundancy = a.stripe_redundancy;
}
}
......@@ -398,29 +455,22 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_alloc_unpacked u;
u64 *time, now;
struct bkey_i_alloc_v4 *a;
u64 now;
int ret = 0;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr),
BTREE_ITER_CACHED|
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
a = bch2_trans_start_alloc_update(trans, &iter, POS(dev, bucket_nr));
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto out;
u = bch2_alloc_unpack(k);
return ret;
time = rw == READ ? &u.read_time : &u.write_time;
now = atomic64_read(&c->io_clock[rw].now);
if (*time == now)
if (a->v.io_time[rw] == now)
goto out;
*time = now;
a->v.io_time[rw] = now;
ret = bch2_alloc_write(trans, &iter, &u, 0) ?:
ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL, 0);
out:
bch2_trans_iter_exit(trans, &iter);
......@@ -604,7 +654,7 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
static int bucket_invalidate_btree(struct btree_trans *trans,
struct bch_dev *ca, u64 b,
struct bkey_alloc_unpacked *u)
struct bkey_i_alloc_v4 *a)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
......@@ -621,16 +671,19 @@ static int bucket_invalidate_btree(struct btree_trans *trans,
if (ret)
goto err;
*u = bch2_alloc_unpack(k);
u->gen++;
u->data_type = 0;
u->dirty_sectors = 0;
u->cached_sectors = 0;
u->read_time = atomic64_read(&c->io_clock[READ].now);
u->write_time = atomic64_read(&c->io_clock[WRITE].now);
ret = bch2_alloc_write(trans, &iter, u,
BTREE_TRIGGER_BUCKET_INVALIDATE);
bkey_alloc_v4_init(&a->k_i);
a->k.p = iter.pos;
bch2_alloc_to_v4(k, &a->v);
a->v.gen++;
a->v.data_type = 0;
a->v.dirty_sectors = 0;
a->v.cached_sectors = 0;
a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now);
ret = bch2_trans_update(trans, &iter, &a->k_i,
BTREE_TRIGGER_BUCKET_INVALIDATE|
BTREE_UPDATE_NO_KEY_CACHE_COHERENCY);
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
......@@ -639,7 +692,7 @@ static int bucket_invalidate_btree(struct btree_trans *trans,
static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
u64 *journal_seq, unsigned flags)
{
struct bkey_alloc_unpacked u;
struct bkey_i_alloc_v4 a;
size_t b;
u64 commit_seq = 0;
int ret = 0;
......@@ -671,7 +724,7 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
flags,
bucket_invalidate_btree(&trans, ca, b, &u));
bucket_invalidate_btree(&trans, ca, b, &a));
if (!ret) {
/* remove from alloc_heap: */
......@@ -687,14 +740,14 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
* If we invalidating cached data then we need to wait on the
* journal commit:
*/
if (u.data_type)
if (a.v.data_type)
*journal_seq = max(*journal_seq, commit_seq);
/*
* We already waiting on u.alloc_seq when we filtered out
* buckets that need journal commit:
*/
BUG_ON(*journal_seq > u.journal_seq);
BUG_ON(*journal_seq > a.v.journal_seq);
} else {
size_t b2;
......
......@@ -10,48 +10,14 @@
extern const char * const bch2_allocator_states[];
struct bkey_alloc_unpacked {
u64 journal_seq;
u64 bucket;
u8 dev;
u8 gen;
u8 oldest_gen;
u8 data_type;
#define x(_name, _bits) u##_bits _name;
BCH_ALLOC_FIELDS_V2()
#undef x
};
/* How out of date a pointer gen is allowed to be: */
#define BUCKET_GC_GEN_MAX 96U
/* returns true if not equal */
static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
struct bkey_alloc_unpacked r)
{
return l.gen != r.gen ||
l.oldest_gen != r.oldest_gen ||
l.data_type != r.data_type
#define x(_name, ...) || l._name != r._name
BCH_ALLOC_FIELDS_V2()
#undef x
;
}
struct bkey_alloc_buf {
struct bkey_i k;
struct bch_alloc_v3 v;
#define x(_name, _bits) + _bits / 8
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
#undef x
} __attribute__((packed, aligned(8)));
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos);
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *,
const struct bkey_alloc_unpacked);
int bch2_alloc_write(struct btree_trans *, struct btree_iter *,
struct bkey_alloc_unpacked *, unsigned);
void bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *);
struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s_c);
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
......@@ -60,6 +26,8 @@ int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
const char *bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c);
const char *bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c);
const char *bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c k);
void bch2_alloc_v4_swab(struct bkey_s);
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_alloc (struct bkey_ops) { \
......@@ -80,6 +48,13 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.atomic_trigger = bch2_mark_alloc, \
}
#define bch2_bkey_ops_alloc_v4 (struct bkey_ops) { \
.key_invalid = bch2_alloc_v4_invalid, \
.val_to_text = bch2_alloc_to_text, \
.swab = bch2_alloc_v4_swab, \
.atomic_trigger = bch2_mark_alloc, \
}
static inline bool bkey_is_alloc(const struct bkey *k)
{
return k->type == KEY_TYPE_alloc ||
......
......@@ -82,6 +82,21 @@
typedef uuid_t __uuid_t;
#endif
#define BITMASK(name, type, field, offset, end) \
static const unsigned name##_OFFSET = offset; \
static const unsigned name##_BITS = (end - offset); \
\
static inline __u64 name(const type *k) \
{ \
return (k->field >> offset) & ~(~0ULL << (end - offset)); \
} \
\
static inline void SET_##name(type *k, __u64 v) \
{ \
k->field &= ~(~(~0ULL << (end - offset)) << offset); \
k->field |= (v & ~(~0ULL << (end - offset))) << offset; \
}
#define LE_BITMASK(_bits, name, type, field, offset, end) \
static const unsigned name##_OFFSET = offset; \
static const unsigned name##_BITS = (end - offset); \
......@@ -353,7 +368,8 @@ static inline void bkey_init(struct bkey *k)
x(inode_v2, 23) \
x(alloc_v3, 24) \
x(set, 25) \
x(lru, 26)
x(lru, 26) \
x(alloc_v4, 27)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
......@@ -903,6 +919,30 @@ struct bch_alloc_v3 {
__u8 data[];
} __attribute__((packed, aligned(8)));
struct bch_alloc_v4 {
struct bch_val v;
__u64 journal_seq;
__u32 flags;
__u8 gen;
__u8 oldest_gen;
__u8 data_type;
__u8 stripe_redundancy;
__u32 dirty_sectors;
__u32 cached_sectors;
__u64 io_time[2];
__u32 stripe;
__u32 nr_external_backpointers;
struct bpos backpointers[0];
} __attribute__((packed, aligned(8)));
LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
enum {
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
BCH_ALLOC_FIELDS_V1()
......
......@@ -149,7 +149,8 @@ static unsigned bch2_key_types_allowed[] = {
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_alloc)|
(1U << KEY_TYPE_alloc_v2)|
(1U << KEY_TYPE_alloc_v3),
(1U << KEY_TYPE_alloc_v3)|
(1U << KEY_TYPE_alloc_v4),
[BKEY_TYPE_quotas] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_quota),
......
......@@ -1306,6 +1306,19 @@ static int bch2_gc_start(struct bch_fs *c,
return 0;
}
/* returns true if not equal */
static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
struct bch_alloc_v4 r)
{
return l.gen != r.gen ||
l.oldest_gen != r.oldest_gen ||
l.data_type != r.data_type ||
l.dirty_sectors != r.dirty_sectors ||
l.cached_sectors != r.cached_sectors ||
l.stripe_redundancy != r.stripe_redundancy ||
l.stripe != r.stripe;
}
static int bch2_alloc_write_key(struct btree_trans *trans,
struct btree_iter *iter,
bool metadata_only)
......@@ -1314,8 +1327,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
struct bucket *g;
struct bkey_s_c k;
struct bkey_alloc_unpacked old_u, new_u, gc_u;
struct bkey_alloc_buf *a;
struct bkey_i_alloc_v4 *a;
struct bch_alloc_v4 old, new, gc;
int ret;
k = bch2_btree_iter_peek_slot(iter);
......@@ -1323,60 +1336,61 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
if (ret)
return ret;
old_u = new_u = bch2_alloc_unpack(k);
bch2_alloc_to_v4(k, &old);
new = old;
percpu_down_read(&c->mark_lock);
g = gc_bucket(ca, iter->pos.offset);
gc_u = (struct bkey_alloc_unpacked) {
.dev = iter->pos.inode,
.bucket = iter->pos.offset,
gc = (struct bch_alloc_v4) {
.gen = g->mark.gen,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
.read_time = g->io_time[READ],
.write_time = g->io_time[WRITE],
.io_time[READ] = g->io_time[READ],
.io_time[WRITE] = g->io_time[WRITE],
.stripe = g->stripe,
.stripe_redundancy = g->stripe_redundancy,
};
percpu_up_read(&c->mark_lock);
if (metadata_only &&
gc_u.data_type != BCH_DATA_sb &&
gc_u.data_type != BCH_DATA_journal &&
gc_u.data_type != BCH_DATA_btree)
gc.data_type != BCH_DATA_sb &&
gc.data_type != BCH_DATA_journal &&
gc.data_type != BCH_DATA_btree)
return 0;
if (gen_after(old_u.gen, gc_u.gen))
if (gen_after(old.gen, gc.gen))
return 0;
#define copy_bucket_field(_f) \
if (fsck_err_on(new_u._f != gc_u._f, c, \
if (fsck_err_on(new._f != gc._f, c, \
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
": got %u, should be %u", \
iter->pos.inode, iter->pos.offset, \
new_u.gen, \
bch2_data_types[new_u.data_type], \
new_u._f, gc_u._f)) \
new_u._f = gc_u._f; \
new.gen, \
bch2_data_types[new.data_type], \
new._f, gc._f)) \
new._f = gc._f; \
copy_bucket_field(gen);
copy_bucket_field(data_type);
copy_bucket_field(stripe);
copy_bucket_field(dirty_sectors);
copy_bucket_field(cached_sectors);
copy_bucket_field(stripe_redundancy);
copy_bucket_field(stripe);
#undef copy_bucket_field
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
if (!bch2_alloc_v4_cmp(old, new))
return 0;
a = bch2_alloc_pack(trans, new_u);
if (IS_ERR(a))
return PTR_ERR(a);
a = bch2_alloc_to_v4_mut(trans, k);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
return ret;
a->v = new;
ret = bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN);
fsck_err:
return ret;
}
......@@ -1873,7 +1887,8 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i
{
struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode);
struct bkey_s_c k;
struct bkey_alloc_unpacked u;
struct bch_alloc_v4 a;
struct bkey_i_alloc_v4 *a_mut;
int ret;
k = bch2_btree_iter_peek_slot(iter);
......@@ -1881,14 +1896,19 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i
if (ret)
return ret;
u = bch2_alloc_unpack(k);
bch2_alloc_to_v4(k, &a);
if (u.oldest_gen == ca->oldest_gen[iter->pos.offset])
if (a.oldest_gen == ca->oldest_gen[iter->pos.offset])
return 0;
u.oldest_gen = ca->oldest_gen[iter->pos.offset];
a_mut = bch2_alloc_to_v4_mut(trans, k);
ret = PTR_ERR_OR_ZERO(a_mut);
if (ret)
return ret;
a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
return bch2_alloc_write(trans, iter, &u, BTREE_TRIGGER_NORUN);
return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
}
int bch2_gc_gens(struct bch_fs *c)
......
......@@ -678,6 +678,7 @@ enum btree_update_flags {
((1U << KEY_TYPE_alloc)| \
(1U << KEY_TYPE_alloc_v2)| \
(1U << KEY_TYPE_alloc_v3)| \
(1U << KEY_TYPE_alloc_v4)| \
(1U << KEY_TYPE_stripe)| \
(1U << KEY_TYPE_inode)| \
(1U << KEY_TYPE_inode_v2)| \
......
......@@ -508,15 +508,14 @@ int bch2_mark_alloc(struct btree_trans *trans,
bool gc = flags & BTREE_TRIGGER_GC;
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old);
struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new);
struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev);
struct bch_alloc_v4 old_a, new_a;
struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode);
struct bucket *g;
struct bucket_mark old_m, m;
int ret = 0;
if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket ||
new_u.bucket >= ca->mi.nbuckets, trans,
if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket ||
new.k->p.offset >= ca->mi.nbuckets, trans,
"alloc key outside range of device's buckets"))
return -EIO;
......@@ -527,11 +526,13 @@ int bch2_mark_alloc(struct btree_trans *trans,
!(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
return 0;
bch2_alloc_to_v4(old, &old_a);
bch2_alloc_to_v4(new, &new_a);
if ((flags & BTREE_TRIGGER_INSERT) &&
!old_u.data_type != !new_u.data_type &&
new.k->type == KEY_TYPE_alloc_v3) {
struct bch_alloc_v3 *v = (struct bch_alloc_v3 *) new.v;
u64 old_journal_seq = le64_to_cpu(v->journal_seq);
!old_a.data_type != !new_a.data_type &&
new.k->type == KEY_TYPE_alloc_v4) {
struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
BUG_ON(!journal_seq);
......@@ -540,18 +541,18 @@ int bch2_mark_alloc(struct btree_trans *trans,
* before the bucket became empty again, then the we don't have
* to wait on a journal flush before we can reuse the bucket:
*/
new_u.journal_seq = !new_u.data_type &&
(journal_seq == old_journal_seq ||
bch2_journal_noflush_seq(&c->journal, old_journal_seq))
new_a.journal_seq = !new_a.data_type &&
(journal_seq == v->journal_seq ||
bch2_journal_noflush_seq(&c->journal, v->journal_seq))
? 0 : journal_seq;
v->journal_seq = cpu_to_le64(new_u.journal_seq);
v->journal_seq = new_a.journal_seq;
}
if (old_u.data_type && !new_u.data_type && new_u.journal_seq) {
if (old_a.data_type && !new_a.data_type && new_a.journal_seq) {
ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
c->journal.flushed_seq_ondisk,
new_u.dev, new_u.bucket,
new_u.journal_seq);
new.k->p.inode, new.k->p.offset,
new_a.journal_seq);
if (ret) {
bch2_fs_fatal_error(c,
"error setting bucket_needs_journal_commit: %i", ret);
......@@ -560,27 +561,27 @@ int bch2_mark_alloc(struct btree_trans *trans,
}
percpu_down_read(&c->mark_lock);
if (!gc && new_u.gen != old_u.gen)
*bucket_gen(ca, new_u.bucket) = new_u.gen;
if (!gc && new_a.gen != old_a.gen)
*bucket_gen(ca, new.k->p.offset) = new_a.gen;
g = __bucket(ca, new_u.bucket, gc);
g = __bucket(ca, new.k->p.offset, gc);
old_m = bucket_cmpxchg(g, m, ({
m.gen = new_u.gen;
m.data_type = new_u.data_type;
m.dirty_sectors = new_u.dirty_sectors;
m.cached_sectors = new_u.cached_sectors;
m.stripe = new_u.stripe != 0;
m.gen = new_a.gen;
m.data_type = new_a.data_type;
m.dirty_sectors = new_a.dirty_sectors;
m.cached_sectors = new_a.cached_sectors;
m.stripe = new_a.stripe != 0;
}));
bch2_dev_usage_update(c, ca, old_m, m, journal_seq, gc);
g->io_time[READ] = new_u.read_time;
g->io_time[WRITE] = new_u.write_time;
g->oldest_gen = new_u.oldest_gen;
g->io_time[READ] = new_a.io_time[READ];
g->io_time[WRITE] = new_a.io_time[WRITE];
g->oldest_gen = new_a.oldest_gen;
g->gen_valid = 1;
g->stripe = new_u.stripe;
g->stripe_redundancy = new_u.stripe_redundancy;
g->stripe = new_a.stripe;
g->stripe_redundancy = new_a.stripe_redundancy;
percpu_up_read(&c->mark_lock);
/*
......@@ -598,7 +599,7 @@ int bch2_mark_alloc(struct btree_trans *trans,
return ret;
}
trace_invalidate(ca, bucket_to_sector(ca, new_u.bucket),
trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
old_m.cached_sectors);
}
......@@ -1378,50 +1379,32 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
/* trans_mark: */
static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
const struct bch_extent_ptr *ptr,
struct bkey_alloc_unpacked *u)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bkey_s_c k;
int ret;
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc,
POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)),
BTREE_ITER_WITH_UPDATES|
BTREE_ITER_CACHED|
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret) {
bch2_trans_iter_exit(trans, iter);
return ret;
}
*u = bch2_alloc_unpack(k);
return 0;
}
static int bch2_trans_mark_pointer(struct btree_trans *trans,
struct bkey_s_c k, struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type)
{
struct btree_iter iter;
struct bkey_alloc_unpacked u;
struct bkey_i_alloc_v4 *a;
u16 dirty_sectors, cached_sectors;
int ret;
ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u);
if (ret)
return ret;
a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
if (IS_ERR(a))
return PTR_ERR(a);
dirty_sectors = a->v.dirty_sectors;
cached_sectors = a->v.cached_sectors;
ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
u.gen, &u.data_type,
&u.dirty_sectors, &u.cached_sectors);
a->v.gen, &a->v.data_type,
&dirty_sectors, &cached_sectors);
if (ret)
goto out;
ret = bch2_alloc_write(trans, &iter, &u, 0);
a->v.dirty_sectors = dirty_sectors;
a->v.cached_sectors = cached_sectors;
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
if (ret)
goto out;
out:
......@@ -1554,7 +1537,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
struct bch_fs *c = trans->c;
const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
struct btree_iter iter;
struct bkey_alloc_unpacked u;
struct bkey_i_alloc_v4 *a;
enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
? BCH_DATA_parity : 0;
s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
......@@ -1563,59 +1546,59 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
if (deleting)
sectors = -sectors;
ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u);
if (ret)
return ret;
a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr));
if (IS_ERR(a))
return PTR_ERR(a);
ret = check_bucket_ref(c, s.s_c, ptr, sectors, data_type,
u.gen, u.data_type,
u.dirty_sectors, u.cached_sectors);
a->v.gen, a->v.data_type,
a->v.dirty_sectors, a->v.cached_sectors);
if (ret)
goto err;
if (!deleting) {
if (bch2_trans_inconsistent_on(u.stripe ||
u.stripe_redundancy, trans,
if (bch2_trans_inconsistent_on(a->v.stripe ||
a->v.stripe_redundancy, trans,
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
iter.pos.inode, iter.pos.offset, u.gen,
bch2_data_types[u.data_type],
u.dirty_sectors,
u.stripe, s.k->p.offset)) {
iter.pos.inode, iter.pos.offset, a->v.gen,
bch2_data_types[a->v.data_type],
a->v.dirty_sectors,
a->v.stripe, s.k->p.offset)) {
ret = -EIO;
goto err;
}
if (bch2_trans_inconsistent_on(data_type && u.dirty_sectors, trans,
if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
iter.pos.inode, iter.pos.offset, u.gen,
bch2_data_types[u.data_type],
u.dirty_sectors,
iter.pos.inode, iter.pos.offset, a->v.gen,
bch2_data_types[a->v.data_type],
a->v.dirty_sectors,
s.k->p.offset)) {
ret = -EIO;
goto err;
}
u.stripe = s.k->p.offset;
u.stripe_redundancy = s.v->nr_redundant;
a->v.stripe = s.k->p.offset;
a->v.stripe_redundancy = s.v->nr_redundant;
} else {
if (bch2_trans_inconsistent_on(u.stripe != s.k->p.offset ||
u.stripe_redundancy != s.v->nr_redundant, trans,
if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
a->v.stripe_redundancy != s.v->nr_redundant, trans,
"bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
iter.pos.inode, iter.pos.offset, u.gen,
s.k->p.offset, u.stripe)) {
iter.pos.inode, iter.pos.offset, a->v.gen,
s.k->p.offset, a->v.stripe)) {
ret = -EIO;
goto err;
}
u.stripe = 0;
u.stripe_redundancy = 0;
a->v.stripe = 0;
a->v.stripe_redundancy = 0;
}
u.dirty_sectors += sectors;
a->v.dirty_sectors += sectors;
if (data_type)
u.data_type = !deleting ? data_type : 0;
a->v.data_type = !deleting ? data_type : 0;
ret = bch2_alloc_write(trans, &iter, &u, 0);
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
if (ret)
goto err;
err:
......@@ -1845,11 +1828,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_alloc_unpacked u;
struct bch_extent_ptr ptr = {
.dev = ca->dev_idx,
.offset = bucket_to_sector(ca, b),
};
struct bkey_i_alloc_v4 *a;
int ret = 0;
/*
......@@ -1858,26 +1837,27 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
if (b >= ca->mi.nbuckets)
return 0;
ret = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u);
if (ret)
return ret;
a = bch2_trans_start_alloc_update(trans, &iter, POS(ca->dev_idx, b));
if (IS_ERR(a))
return PTR_ERR(a);
if (u.data_type && u.data_type != type) {
if (a->v.data_type && a->v.data_type != type) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s",
iter.pos.inode, iter.pos.offset, u.gen,
bch2_data_types[u.data_type],
iter.pos.inode, iter.pos.offset, a->v.gen,
bch2_data_types[a->v.data_type],
bch2_data_types[type],
bch2_data_types[type]);
ret = -EIO;
goto out;
}
u.data_type = type;
u.dirty_sectors = sectors;
a->v.data_type = type;
a->v.dirty_sectors = sectors;
ret = bch2_alloc_write(trans, &iter, &u, 0);
ret = bch2_trans_update(trans, &iter, &a->k_i,
BTREE_UPDATE_NO_KEY_CACHE_COHERENCY);
if (ret)
goto out;
out:
......
......@@ -97,6 +97,14 @@ static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
return sector_to_bucket(ca, ptr->offset);
}
static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c,
const struct bch_extent_ptr *ptr)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
}
static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca,
const struct bch_extent_ptr *ptr)
{
......
......@@ -111,7 +111,7 @@ struct copygc_heap_entry {
u8 dev;
u8 gen;
u8 replicas;
u16 fragmentation;
u32 fragmentation;
u32 sectors;
u64 offset;
};
......
......@@ -129,7 +129,7 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_alloc_unpacked u;
struct bch_alloc_v4 a;
int ret;
bch2_trans_init(&trans, c, 0, 0);
......@@ -139,20 +139,20 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode);
struct copygc_heap_entry e;
u = bch2_alloc_unpack(k);
bch2_alloc_to_v4(k, &a);
if (u.data_type != BCH_DATA_user ||
u.dirty_sectors >= ca->mi.bucket_size ||
if (a.data_type != BCH_DATA_user ||
a.dirty_sectors >= ca->mi.bucket_size ||
bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset))
continue;
e = (struct copygc_heap_entry) {
.dev = iter.pos.inode,
.gen = u.gen,
.replicas = 1 + u.stripe_redundancy,
.fragmentation = u.dirty_sectors * (1U << 15)
/ ca->mi.bucket_size,
.sectors = u.dirty_sectors,
.gen = a.gen,
.replicas = 1 + a.stripe_redundancy,
.fragmentation = div_u64((u64) a.dirty_sectors * (1ULL << 31),
ca->mi.bucket_size),
.sectors = a.dirty_sectors,
.offset = bucket_to_sector(ca, iter.pos.offset),
};
heap_add_or_replace(h, e, -fragmentation_cmp, NULL);
......@@ -180,7 +180,7 @@ static int check_copygc_was_done(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_alloc_unpacked u;
struct bch_alloc_v4 a;
struct copygc_heap_entry *i;
int ret = 0;
......@@ -199,10 +199,10 @@ static int check_copygc_was_done(struct bch_fs *c,
if (ret)
break;
u = bch2_alloc_unpack(k);
bch2_alloc_to_v4(k, &a);
if (u.gen == i->gen && u.dirty_sectors) {
*sectors_not_moved += u.dirty_sectors;
if (a.gen == i->gen && a.dirty_sectors) {
*sectors_not_moved += a.dirty_sectors;
*buckets_not_moved += 1;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment