Commit 3d48a7f8 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: KEY_TYPE_alloc_v4

This introduces a new alloc key which doesn't use varints. Soon we'll be
adding backpointers and storing them in alloc keys, which means our
pack/unpack workflow for alloc keys won't really work - we'll need to be
mutating alloc keys in place.

Instead of bch2_alloc_unpack(), we now have bch2_alloc_to_v4() that
converts older types of alloc keys to v4 if needed.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent d326ab2f
...@@ -33,13 +33,27 @@ const char * const bch2_allocator_states[] = { ...@@ -33,13 +33,27 @@ const char * const bch2_allocator_states[] = {
NULL NULL
}; };
/* Persistent alloc info: */
static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = { static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
#define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8, #define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8,
BCH_ALLOC_FIELDS_V1() BCH_ALLOC_FIELDS_V1()
#undef x #undef x
}; };
/* Persistent alloc info: */ struct bkey_alloc_unpacked {
u64 journal_seq;
u64 bucket;
u8 dev;
u8 gen;
u8 oldest_gen;
u8 data_type;
bool need_discard:1;
bool need_inc_gen:1;
#define x(_name, _bits) u##_bits _name;
BCH_ALLOC_FIELDS_V2()
#undef x
};
static inline u64 alloc_field_v1_get(const struct bch_alloc *a, static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
const void **p, unsigned field) const void **p, unsigned field)
...@@ -161,6 +175,8 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out, ...@@ -161,6 +175,8 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
out->gen = a.v->gen; out->gen = a.v->gen;
out->oldest_gen = a.v->oldest_gen; out->oldest_gen = a.v->oldest_gen;
out->data_type = a.v->data_type; out->data_type = a.v->data_type;
out->need_discard = BCH_ALLOC_V3_NEED_DISCARD(a.v);
out->need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN(a.v);
out->journal_seq = le64_to_cpu(a.v->journal_seq); out->journal_seq = le64_to_cpu(a.v->journal_seq);
#define x(_name, _bits) \ #define x(_name, _bits) \
...@@ -182,47 +198,7 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out, ...@@ -182,47 +198,7 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
return 0; return 0;
} }
static void bch2_alloc_pack_v3(struct bkey_alloc_buf *dst, static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
const struct bkey_alloc_unpacked src)
{
struct bkey_i_alloc_v3 *a = bkey_alloc_v3_init(&dst->k);
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
u8 *out = a->v.data;
u8 *end = (void *) &dst[1];
u8 *last_nonzero_field = out;
unsigned bytes;
a->k.p = POS(src.dev, src.bucket);
a->v.gen = src.gen;
a->v.oldest_gen = src.oldest_gen;
a->v.data_type = src.data_type;
a->v.journal_seq = cpu_to_le64(src.journal_seq);
#define x(_name, _bits) \
nr_fields++; \
\
if (src._name) { \
out += bch2_varint_encode_fast(out, src._name); \
\
last_nonzero_field = out; \
last_nonzero_fieldnr = nr_fields; \
} else { \
*out++ = 0; \
}
BCH_ALLOC_FIELDS_V2()
#undef x
BUG_ON(out > end);
out = last_nonzero_field;
a->v.nr_fields = last_nonzero_fieldnr;
bytes = (u8 *) out - (u8 *) &a->v;
set_bkey_val_bytes(&a->k, bytes);
memset_u64s_tail(&a->v, 0, bytes);
}
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
{ {
struct bkey_alloc_unpacked ret = { struct bkey_alloc_unpacked ret = {
.dev = k.k->p.inode, .dev = k.k->p.inode,
...@@ -245,32 +221,71 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) ...@@ -245,32 +221,71 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
return ret; return ret;
} }
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans, void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
const struct bkey_alloc_unpacked src)
{ {
struct bkey_alloc_buf *dst; if (k.k->type == KEY_TYPE_alloc_v4) {
*out = *bkey_s_c_to_alloc_v4(k).v;
} else {
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
*out = (struct bch_alloc_v4) {
.journal_seq = u.journal_seq,
.flags = u.need_discard,
.gen = u.gen,
.oldest_gen = u.oldest_gen,
.data_type = u.data_type,
.stripe_redundancy = u.stripe_redundancy,
.dirty_sectors = u.dirty_sectors,
.cached_sectors = u.cached_sectors,
.io_time[READ] = u.read_time,
.io_time[WRITE] = u.write_time,
.stripe = u.stripe,
};
}
}
dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
if (!IS_ERR(dst)) {
bch2_alloc_pack_v3(dst, src); struct bkey_i_alloc_v4 *ret;
return dst; if (k.k->type == KEY_TYPE_alloc_v4) {
ret = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
if (!IS_ERR(ret))
bkey_reassemble(&ret->k_i, k);
} else {
ret = bch2_trans_kmalloc(trans, sizeof(*ret));
if (!IS_ERR(ret)) {
bkey_alloc_v4_init(&ret->k_i);
ret->k.p = k.k->p;
bch2_alloc_to_v4(k, &ret->v);
}
}
return ret;
} }
int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i_alloc_v4 *
struct bkey_alloc_unpacked *u, unsigned trigger_flags) bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
struct bpos pos)
{ {
struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u); struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
int ret;
/* bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
* Without BTREE_UPDATE_NO_KEY_CACHE_COHERENCY, we may end up updating BTREE_ITER_WITH_UPDATES|
* the btree instead of the key cache - this can casue the allocator to BTREE_ITER_CACHED|
* self-deadlock, since updating the btree may require allocating new BTREE_ITER_INTENT);
* btree nodes: k = bch2_btree_iter_peek_slot(iter);
*/ ret = bkey_err(k);
return PTR_ERR_OR_ZERO(a) ?: if (ret) {
bch2_trans_update(trans, iter, &a->k, trigger_flags| bch2_trans_iter_exit(trans, iter);
BTREE_UPDATE_NO_KEY_CACHE_COHERENCY); return ERR_PTR(ret);
}
a = bch2_alloc_to_v4_mut(trans, k);
if (IS_ERR(a))
bch2_trans_iter_exit(trans, iter);
return a;
} }
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
...@@ -316,28 +331,70 @@ const char *bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k) ...@@ -316,28 +331,70 @@ const char *bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
const char *bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k) const char *bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k)
{ {
struct bkey_alloc_unpacked u; struct bkey_alloc_unpacked u;
struct bch_dev *ca;
if (k.k->p.inode >= c->sb.nr_devices || if (k.k->p.inode >= c->sb.nr_devices ||
!c->devs[k.k->p.inode]) !c->devs[k.k->p.inode])
return "invalid device"; return "invalid device";
ca = bch_dev_bkey_exists(c, k.k->p.inode);
if (k.k->p.offset < ca->mi.first_bucket ||
k.k->p.offset >= ca->mi.nbuckets)
return "invalid bucket";
if (bch2_alloc_unpack_v3(&u, k)) if (bch2_alloc_unpack_v3(&u, k))
return "unpack error"; return "unpack error";
return NULL; return NULL;
} }
void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, const char *bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k)
struct bkey_s_c k)
{ {
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); struct bch_dev *ca;
pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu", if (k.k->p.inode >= c->sb.nr_devices ||
u.gen, u.oldest_gen, bch2_data_types[u.data_type], !c->devs[k.k->p.inode])
u.journal_seq); return "invalid device";
#define x(_name, ...) pr_buf(out, " " #_name " %llu", (u64) u._name);
BCH_ALLOC_FIELDS_V2() ca = bch_dev_bkey_exists(c, k.k->p.inode);
#undef x
if (k.k->p.offset < ca->mi.first_bucket ||
k.k->p.offset >= ca->mi.nbuckets)
return "invalid bucket";
return NULL;
}
void bch2_alloc_v4_swab(struct bkey_s k)
{
struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
a->journal_seq = swab64(a->journal_seq);
a->flags = swab32(a->flags);
a->dirty_sectors = swab32(a->dirty_sectors);
a->cached_sectors = swab32(a->cached_sectors);
a->io_time[0] = swab64(a->io_time[0]);
a->io_time[1] = swab64(a->io_time[1]);
a->stripe = swab32(a->stripe);
a->nr_external_backpointers = swab32(a->nr_external_backpointers);
}
void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
{
struct bch_alloc_v4 a;
bch2_alloc_to_v4(k, &a);
pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu",
a.gen, a.oldest_gen, bch2_data_types[a.data_type],
a.journal_seq, BCH_ALLOC_V4_NEED_DISCARD(&a));
pr_buf(out, " dirty_sectors %u", a.dirty_sectors);
pr_buf(out, " cached_sectors %u", a.cached_sectors);
pr_buf(out, " stripe %u", a.stripe);
pr_buf(out, " stripe_redundancy %u", a.stripe_redundancy);
pr_buf(out, " read_time %llu", a.io_time[READ]);
pr_buf(out, " write_time %llu", a.io_time[WRITE]);
} }
int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only) int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
...@@ -345,9 +402,9 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only) ...@@ -345,9 +402,9 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bch_alloc_v4 a;
struct bch_dev *ca; struct bch_dev *ca;
struct bucket *g; struct bucket *g;
struct bkey_alloc_unpacked u;
int ret; int ret;
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
...@@ -356,28 +413,28 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only) ...@@ -356,28 +413,28 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH, k, ret) {
ca = bch_dev_bkey_exists(c, k.k->p.inode); ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = __bucket(ca, k.k->p.offset, gc); g = __bucket(ca, k.k->p.offset, gc);
u = bch2_alloc_unpack(k); bch2_alloc_to_v4(k, &a);
if (!gc) if (!gc)
*bucket_gen(ca, k.k->p.offset) = u.gen; *bucket_gen(ca, k.k->p.offset) = a.gen;
g->_mark.gen = u.gen; g->_mark.gen = a.gen;
g->io_time[READ] = u.read_time; g->io_time[READ] = a.io_time[READ];
g->io_time[WRITE] = u.write_time; g->io_time[WRITE] = a.io_time[WRITE];
g->oldest_gen = !gc ? u.oldest_gen : u.gen; g->oldest_gen = !gc ? a.oldest_gen : a.gen;
g->gen_valid = 1; g->gen_valid = 1;
if (!gc || if (!gc ||
(metadata_only && (metadata_only &&
(u.data_type == BCH_DATA_user || (a.data_type == BCH_DATA_user ||
u.data_type == BCH_DATA_cached || a.data_type == BCH_DATA_cached ||
u.data_type == BCH_DATA_parity))) { a.data_type == BCH_DATA_parity))) {
g->_mark.data_type = u.data_type; g->_mark.data_type = a.data_type;
g->_mark.dirty_sectors = u.dirty_sectors; g->_mark.dirty_sectors = a.dirty_sectors;
g->_mark.cached_sectors = u.cached_sectors; g->_mark.cached_sectors = a.cached_sectors;
g->_mark.stripe = u.stripe != 0; g->_mark.stripe = a.stripe != 0;
g->stripe = u.stripe; g->stripe = a.stripe;
g->stripe_redundancy = u.stripe_redundancy; g->stripe_redundancy = a.stripe_redundancy;
} }
} }
...@@ -398,29 +455,22 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, ...@@ -398,29 +455,22 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_i_alloc_v4 *a;
struct bkey_alloc_unpacked u; u64 now;
u64 *time, now;
int ret = 0; int ret = 0;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr), a = bch2_trans_start_alloc_update(trans, &iter, POS(dev, bucket_nr));
BTREE_ITER_CACHED| ret = PTR_ERR_OR_ZERO(a);
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret) if (ret)
goto out; return ret;
u = bch2_alloc_unpack(k);
time = rw == READ ? &u.read_time : &u.write_time;
now = atomic64_read(&c->io_clock[rw].now); now = atomic64_read(&c->io_clock[rw].now);
if (*time == now) if (a->v.io_time[rw] == now)
goto out; goto out;
*time = now; a->v.io_time[rw] = now;
ret = bch2_alloc_write(trans, &iter, &u, 0) ?: ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL, 0); bch2_trans_commit(trans, NULL, NULL, 0);
out: out:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
...@@ -604,7 +654,7 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca) ...@@ -604,7 +654,7 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
static int bucket_invalidate_btree(struct btree_trans *trans, static int bucket_invalidate_btree(struct btree_trans *trans,
struct bch_dev *ca, u64 b, struct bch_dev *ca, u64 b,
struct bkey_alloc_unpacked *u) struct bkey_i_alloc_v4 *a)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
...@@ -621,16 +671,19 @@ static int bucket_invalidate_btree(struct btree_trans *trans, ...@@ -621,16 +671,19 @@ static int bucket_invalidate_btree(struct btree_trans *trans,
if (ret) if (ret)
goto err; goto err;
*u = bch2_alloc_unpack(k); bkey_alloc_v4_init(&a->k_i);
u->gen++; a->k.p = iter.pos;
u->data_type = 0; bch2_alloc_to_v4(k, &a->v);
u->dirty_sectors = 0; a->v.gen++;
u->cached_sectors = 0; a->v.data_type = 0;
u->read_time = atomic64_read(&c->io_clock[READ].now); a->v.dirty_sectors = 0;
u->write_time = atomic64_read(&c->io_clock[WRITE].now); a->v.cached_sectors = 0;
a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
ret = bch2_alloc_write(trans, &iter, u, a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now);
BTREE_TRIGGER_BUCKET_INVALIDATE);
ret = bch2_trans_update(trans, &iter, &a->k_i,
BTREE_TRIGGER_BUCKET_INVALIDATE|
BTREE_UPDATE_NO_KEY_CACHE_COHERENCY);
err: err:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
...@@ -639,7 +692,7 @@ static int bucket_invalidate_btree(struct btree_trans *trans, ...@@ -639,7 +692,7 @@ static int bucket_invalidate_btree(struct btree_trans *trans,
static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
u64 *journal_seq, unsigned flags) u64 *journal_seq, unsigned flags)
{ {
struct bkey_alloc_unpacked u; struct bkey_i_alloc_v4 a;
size_t b; size_t b;
u64 commit_seq = 0; u64 commit_seq = 0;
int ret = 0; int ret = 0;
...@@ -671,7 +724,7 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -671,7 +724,7 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
flags, flags,
bucket_invalidate_btree(&trans, ca, b, &u)); bucket_invalidate_btree(&trans, ca, b, &a));
if (!ret) { if (!ret) {
/* remove from alloc_heap: */ /* remove from alloc_heap: */
...@@ -687,14 +740,14 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -687,14 +740,14 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
* If we invalidating cached data then we need to wait on the * If we invalidating cached data then we need to wait on the
* journal commit: * journal commit:
*/ */
if (u.data_type) if (a.v.data_type)
*journal_seq = max(*journal_seq, commit_seq); *journal_seq = max(*journal_seq, commit_seq);
/* /*
* We already waiting on u.alloc_seq when we filtered out * We already waiting on u.alloc_seq when we filtered out
* buckets that need journal commit: * buckets that need journal commit:
*/ */
BUG_ON(*journal_seq > u.journal_seq); BUG_ON(*journal_seq > a.v.journal_seq);
} else { } else {
size_t b2; size_t b2;
......
...@@ -10,48 +10,14 @@ ...@@ -10,48 +10,14 @@
extern const char * const bch2_allocator_states[]; extern const char * const bch2_allocator_states[];
struct bkey_alloc_unpacked {
u64 journal_seq;
u64 bucket;
u8 dev;
u8 gen;
u8 oldest_gen;
u8 data_type;
#define x(_name, _bits) u##_bits _name;
BCH_ALLOC_FIELDS_V2()
#undef x
};
/* How out of date a pointer gen is allowed to be: */ /* How out of date a pointer gen is allowed to be: */
#define BUCKET_GC_GEN_MAX 96U #define BUCKET_GC_GEN_MAX 96U
/* returns true if not equal */ struct bkey_i_alloc_v4 *
static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l, bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos);
struct bkey_alloc_unpacked r)
{
return l.gen != r.gen ||
l.oldest_gen != r.oldest_gen ||
l.data_type != r.data_type
#define x(_name, ...) || l._name != r._name
BCH_ALLOC_FIELDS_V2()
#undef x
;
}
struct bkey_alloc_buf {
struct bkey_i k;
struct bch_alloc_v3 v;
#define x(_name, _bits) + _bits / 8
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
#undef x
} __attribute__((packed, aligned(8)));
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c); void bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *);
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *, struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s_c);
const struct bkey_alloc_unpacked);
int bch2_alloc_write(struct btree_trans *, struct btree_iter *,
struct bkey_alloc_unpacked *, unsigned);
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
...@@ -60,6 +26,8 @@ int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); ...@@ -60,6 +26,8 @@ int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
const char *bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c);
const char *bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c);
const char *bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c k);
void bch2_alloc_v4_swab(struct bkey_s);
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_alloc (struct bkey_ops) { \ #define bch2_bkey_ops_alloc (struct bkey_ops) { \
...@@ -80,6 +48,13 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); ...@@ -80,6 +48,13 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.atomic_trigger = bch2_mark_alloc, \ .atomic_trigger = bch2_mark_alloc, \
} }
#define bch2_bkey_ops_alloc_v4 (struct bkey_ops) { \
.key_invalid = bch2_alloc_v4_invalid, \
.val_to_text = bch2_alloc_to_text, \
.swab = bch2_alloc_v4_swab, \
.atomic_trigger = bch2_mark_alloc, \
}
static inline bool bkey_is_alloc(const struct bkey *k) static inline bool bkey_is_alloc(const struct bkey *k)
{ {
return k->type == KEY_TYPE_alloc || return k->type == KEY_TYPE_alloc ||
......
...@@ -82,6 +82,21 @@ ...@@ -82,6 +82,21 @@
typedef uuid_t __uuid_t; typedef uuid_t __uuid_t;
#endif #endif
#define BITMASK(name, type, field, offset, end) \
static const unsigned name##_OFFSET = offset; \
static const unsigned name##_BITS = (end - offset); \
\
static inline __u64 name(const type *k) \
{ \
return (k->field >> offset) & ~(~0ULL << (end - offset)); \
} \
\
static inline void SET_##name(type *k, __u64 v) \
{ \
k->field &= ~(~(~0ULL << (end - offset)) << offset); \
k->field |= (v & ~(~0ULL << (end - offset))) << offset; \
}
#define LE_BITMASK(_bits, name, type, field, offset, end) \ #define LE_BITMASK(_bits, name, type, field, offset, end) \
static const unsigned name##_OFFSET = offset; \ static const unsigned name##_OFFSET = offset; \
static const unsigned name##_BITS = (end - offset); \ static const unsigned name##_BITS = (end - offset); \
...@@ -353,7 +368,8 @@ static inline void bkey_init(struct bkey *k) ...@@ -353,7 +368,8 @@ static inline void bkey_init(struct bkey *k)
x(inode_v2, 23) \ x(inode_v2, 23) \
x(alloc_v3, 24) \ x(alloc_v3, 24) \
x(set, 25) \ x(set, 25) \
x(lru, 26) x(lru, 26) \
x(alloc_v4, 27)
enum bch_bkey_type { enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr, #define x(name, nr) KEY_TYPE_##name = nr,
...@@ -903,6 +919,30 @@ struct bch_alloc_v3 { ...@@ -903,6 +919,30 @@ struct bch_alloc_v3 {
__u8 data[]; __u8 data[];
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
struct bch_alloc_v4 {
struct bch_val v;
__u64 journal_seq;
__u32 flags;
__u8 gen;
__u8 oldest_gen;
__u8 data_type;
__u8 stripe_redundancy;
__u32 dirty_sectors;
__u32 cached_sectors;
__u64 io_time[2];
__u32 stripe;
__u32 nr_external_backpointers;
struct bpos backpointers[0];
} __attribute__((packed, aligned(8)));
LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
enum { enum {
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, #define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
BCH_ALLOC_FIELDS_V1() BCH_ALLOC_FIELDS_V1()
......
...@@ -149,7 +149,8 @@ static unsigned bch2_key_types_allowed[] = { ...@@ -149,7 +149,8 @@ static unsigned bch2_key_types_allowed[] = {
(1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_alloc)| (1U << KEY_TYPE_alloc)|
(1U << KEY_TYPE_alloc_v2)| (1U << KEY_TYPE_alloc_v2)|
(1U << KEY_TYPE_alloc_v3), (1U << KEY_TYPE_alloc_v3)|
(1U << KEY_TYPE_alloc_v4),
[BKEY_TYPE_quotas] = [BKEY_TYPE_quotas] =
(1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_quota), (1U << KEY_TYPE_quota),
......
...@@ -1306,6 +1306,19 @@ static int bch2_gc_start(struct bch_fs *c, ...@@ -1306,6 +1306,19 @@ static int bch2_gc_start(struct bch_fs *c,
return 0; return 0;
} }
/* returns true if not equal */
static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
struct bch_alloc_v4 r)
{
return l.gen != r.gen ||
l.oldest_gen != r.oldest_gen ||
l.data_type != r.data_type ||
l.dirty_sectors != r.dirty_sectors ||
l.cached_sectors != r.cached_sectors ||
l.stripe_redundancy != r.stripe_redundancy ||
l.stripe != r.stripe;
}
static int bch2_alloc_write_key(struct btree_trans *trans, static int bch2_alloc_write_key(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
bool metadata_only) bool metadata_only)
...@@ -1314,8 +1327,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans, ...@@ -1314,8 +1327,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode); struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
struct bucket *g; struct bucket *g;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_alloc_unpacked old_u, new_u, gc_u; struct bkey_i_alloc_v4 *a;
struct bkey_alloc_buf *a; struct bch_alloc_v4 old, new, gc;
int ret; int ret;
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
...@@ -1323,60 +1336,61 @@ static int bch2_alloc_write_key(struct btree_trans *trans, ...@@ -1323,60 +1336,61 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
old_u = new_u = bch2_alloc_unpack(k); bch2_alloc_to_v4(k, &old);
new = old;
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
g = gc_bucket(ca, iter->pos.offset); g = gc_bucket(ca, iter->pos.offset);
gc_u = (struct bkey_alloc_unpacked) { gc = (struct bch_alloc_v4) {
.dev = iter->pos.inode,
.bucket = iter->pos.offset,
.gen = g->mark.gen, .gen = g->mark.gen,
.data_type = g->mark.data_type, .data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors, .dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors, .cached_sectors = g->mark.cached_sectors,
.read_time = g->io_time[READ], .io_time[READ] = g->io_time[READ],
.write_time = g->io_time[WRITE], .io_time[WRITE] = g->io_time[WRITE],
.stripe = g->stripe, .stripe = g->stripe,
.stripe_redundancy = g->stripe_redundancy, .stripe_redundancy = g->stripe_redundancy,
}; };
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
if (metadata_only && if (metadata_only &&
gc_u.data_type != BCH_DATA_sb && gc.data_type != BCH_DATA_sb &&
gc_u.data_type != BCH_DATA_journal && gc.data_type != BCH_DATA_journal &&
gc_u.data_type != BCH_DATA_btree) gc.data_type != BCH_DATA_btree)
return 0; return 0;
if (gen_after(old_u.gen, gc_u.gen)) if (gen_after(old.gen, gc.gen))
return 0; return 0;
#define copy_bucket_field(_f) \ #define copy_bucket_field(_f) \
if (fsck_err_on(new_u._f != gc_u._f, c, \ if (fsck_err_on(new._f != gc._f, c, \
"bucket %llu:%llu gen %u data type %s has wrong " #_f \ "bucket %llu:%llu gen %u data type %s has wrong " #_f \
": got %u, should be %u", \ ": got %u, should be %u", \
iter->pos.inode, iter->pos.offset, \ iter->pos.inode, iter->pos.offset, \
new_u.gen, \ new.gen, \
bch2_data_types[new_u.data_type], \ bch2_data_types[new.data_type], \
new_u._f, gc_u._f)) \ new._f, gc._f)) \
new_u._f = gc_u._f; \ new._f = gc._f; \
copy_bucket_field(gen); copy_bucket_field(gen);
copy_bucket_field(data_type); copy_bucket_field(data_type);
copy_bucket_field(stripe);
copy_bucket_field(dirty_sectors); copy_bucket_field(dirty_sectors);
copy_bucket_field(cached_sectors); copy_bucket_field(cached_sectors);
copy_bucket_field(stripe_redundancy); copy_bucket_field(stripe_redundancy);
copy_bucket_field(stripe); copy_bucket_field(stripe);
#undef copy_bucket_field #undef copy_bucket_field
if (!bkey_alloc_unpacked_cmp(old_u, new_u)) if (!bch2_alloc_v4_cmp(old, new))
return 0; return 0;
a = bch2_alloc_pack(trans, new_u); a = bch2_alloc_to_v4_mut(trans, k);
if (IS_ERR(a)) ret = PTR_ERR_OR_ZERO(a);
return PTR_ERR(a); if (ret)
return ret;
a->v = new;
ret = bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN); ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN);
fsck_err: fsck_err:
return ret; return ret;
} }
...@@ -1873,7 +1887,8 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i ...@@ -1873,7 +1887,8 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i
{ {
struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode); struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode);
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_alloc_unpacked u; struct bch_alloc_v4 a;
struct bkey_i_alloc_v4 *a_mut;
int ret; int ret;
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
...@@ -1881,14 +1896,19 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i ...@@ -1881,14 +1896,19 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i
if (ret) if (ret)
return ret; return ret;
u = bch2_alloc_unpack(k); bch2_alloc_to_v4(k, &a);
if (u.oldest_gen == ca->oldest_gen[iter->pos.offset]) if (a.oldest_gen == ca->oldest_gen[iter->pos.offset])
return 0; return 0;
u.oldest_gen = ca->oldest_gen[iter->pos.offset]; a_mut = bch2_alloc_to_v4_mut(trans, k);
ret = PTR_ERR_OR_ZERO(a_mut);
if (ret)
return ret;
a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
return bch2_alloc_write(trans, iter, &u, BTREE_TRIGGER_NORUN); return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
} }
int bch2_gc_gens(struct bch_fs *c) int bch2_gc_gens(struct bch_fs *c)
......
...@@ -678,6 +678,7 @@ enum btree_update_flags { ...@@ -678,6 +678,7 @@ enum btree_update_flags {
((1U << KEY_TYPE_alloc)| \ ((1U << KEY_TYPE_alloc)| \
(1U << KEY_TYPE_alloc_v2)| \ (1U << KEY_TYPE_alloc_v2)| \
(1U << KEY_TYPE_alloc_v3)| \ (1U << KEY_TYPE_alloc_v3)| \
(1U << KEY_TYPE_alloc_v4)| \
(1U << KEY_TYPE_stripe)| \ (1U << KEY_TYPE_stripe)| \
(1U << KEY_TYPE_inode)| \ (1U << KEY_TYPE_inode)| \
(1U << KEY_TYPE_inode_v2)| \ (1U << KEY_TYPE_inode_v2)| \
......
...@@ -508,15 +508,14 @@ int bch2_mark_alloc(struct btree_trans *trans, ...@@ -508,15 +508,14 @@ int bch2_mark_alloc(struct btree_trans *trans,
bool gc = flags & BTREE_TRIGGER_GC; bool gc = flags & BTREE_TRIGGER_GC;
u64 journal_seq = trans->journal_res.seq; u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old); struct bch_alloc_v4 old_a, new_a;
struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new); struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode);
struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev);
struct bucket *g; struct bucket *g;
struct bucket_mark old_m, m; struct bucket_mark old_m, m;
int ret = 0; int ret = 0;
if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket || if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket ||
new_u.bucket >= ca->mi.nbuckets, trans, new.k->p.offset >= ca->mi.nbuckets, trans,
"alloc key outside range of device's buckets")) "alloc key outside range of device's buckets"))
return -EIO; return -EIO;
...@@ -527,11 +526,13 @@ int bch2_mark_alloc(struct btree_trans *trans, ...@@ -527,11 +526,13 @@ int bch2_mark_alloc(struct btree_trans *trans,
!(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
return 0; return 0;
bch2_alloc_to_v4(old, &old_a);
bch2_alloc_to_v4(new, &new_a);
if ((flags & BTREE_TRIGGER_INSERT) && if ((flags & BTREE_TRIGGER_INSERT) &&
!old_u.data_type != !new_u.data_type && !old_a.data_type != !new_a.data_type &&
new.k->type == KEY_TYPE_alloc_v3) { new.k->type == KEY_TYPE_alloc_v4) {
struct bch_alloc_v3 *v = (struct bch_alloc_v3 *) new.v; struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
u64 old_journal_seq = le64_to_cpu(v->journal_seq);
BUG_ON(!journal_seq); BUG_ON(!journal_seq);
...@@ -540,18 +541,18 @@ int bch2_mark_alloc(struct btree_trans *trans, ...@@ -540,18 +541,18 @@ int bch2_mark_alloc(struct btree_trans *trans,
* before the bucket became empty again, then the we don't have * before the bucket became empty again, then the we don't have
* to wait on a journal flush before we can reuse the bucket: * to wait on a journal flush before we can reuse the bucket:
*/ */
new_u.journal_seq = !new_u.data_type && new_a.journal_seq = !new_a.data_type &&
(journal_seq == old_journal_seq || (journal_seq == v->journal_seq ||
bch2_journal_noflush_seq(&c->journal, old_journal_seq)) bch2_journal_noflush_seq(&c->journal, v->journal_seq))
? 0 : journal_seq; ? 0 : journal_seq;
v->journal_seq = cpu_to_le64(new_u.journal_seq); v->journal_seq = new_a.journal_seq;
} }
if (old_u.data_type && !new_u.data_type && new_u.journal_seq) { if (old_a.data_type && !new_a.data_type && new_a.journal_seq) {
ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
c->journal.flushed_seq_ondisk, c->journal.flushed_seq_ondisk,
new_u.dev, new_u.bucket, new.k->p.inode, new.k->p.offset,
new_u.journal_seq); new_a.journal_seq);
if (ret) { if (ret) {
bch2_fs_fatal_error(c, bch2_fs_fatal_error(c,
"error setting bucket_needs_journal_commit: %i", ret); "error setting bucket_needs_journal_commit: %i", ret);
...@@ -560,27 +561,27 @@ int bch2_mark_alloc(struct btree_trans *trans, ...@@ -560,27 +561,27 @@ int bch2_mark_alloc(struct btree_trans *trans,
} }
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
if (!gc && new_u.gen != old_u.gen) if (!gc && new_a.gen != old_a.gen)
*bucket_gen(ca, new_u.bucket) = new_u.gen; *bucket_gen(ca, new.k->p.offset) = new_a.gen;
g = __bucket(ca, new_u.bucket, gc); g = __bucket(ca, new.k->p.offset, gc);
old_m = bucket_cmpxchg(g, m, ({ old_m = bucket_cmpxchg(g, m, ({
m.gen = new_u.gen; m.gen = new_a.gen;
m.data_type = new_u.data_type; m.data_type = new_a.data_type;
m.dirty_sectors = new_u.dirty_sectors; m.dirty_sectors = new_a.dirty_sectors;
m.cached_sectors = new_u.cached_sectors; m.cached_sectors = new_a.cached_sectors;
m.stripe = new_u.stripe != 0; m.stripe = new_a.stripe != 0;
})); }));
bch2_dev_usage_update(c, ca, old_m, m, journal_seq, gc); bch2_dev_usage_update(c, ca, old_m, m, journal_seq, gc);
g->io_time[READ] = new_u.read_time; g->io_time[READ] = new_a.io_time[READ];
g->io_time[WRITE] = new_u.write_time; g->io_time[WRITE] = new_a.io_time[WRITE];
g->oldest_gen = new_u.oldest_gen; g->oldest_gen = new_a.oldest_gen;
g->gen_valid = 1; g->gen_valid = 1;
g->stripe = new_u.stripe; g->stripe = new_a.stripe;
g->stripe_redundancy = new_u.stripe_redundancy; g->stripe_redundancy = new_a.stripe_redundancy;
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
/* /*
...@@ -598,7 +599,7 @@ int bch2_mark_alloc(struct btree_trans *trans, ...@@ -598,7 +599,7 @@ int bch2_mark_alloc(struct btree_trans *trans,
return ret; return ret;
} }
trace_invalidate(ca, bucket_to_sector(ca, new_u.bucket), trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
old_m.cached_sectors); old_m.cached_sectors);
} }
...@@ -1378,50 +1379,32 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans, ...@@ -1378,50 +1379,32 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
/* trans_mark: */ /* trans_mark: */
static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
const struct bch_extent_ptr *ptr,
struct bkey_alloc_unpacked *u)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bkey_s_c k;
int ret;
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc,
POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)),
BTREE_ITER_WITH_UPDATES|
BTREE_ITER_CACHED|
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret) {
bch2_trans_iter_exit(trans, iter);
return ret;
}
*u = bch2_alloc_unpack(k);
return 0;
}
static int bch2_trans_mark_pointer(struct btree_trans *trans, static int bch2_trans_mark_pointer(struct btree_trans *trans,
struct bkey_s_c k, struct extent_ptr_decoded p, struct bkey_s_c k, struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type) s64 sectors, enum bch_data_type data_type)
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_alloc_unpacked u; struct bkey_i_alloc_v4 *a;
u16 dirty_sectors, cached_sectors;
int ret; int ret;
ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
if (ret) if (IS_ERR(a))
return ret; return PTR_ERR(a);
dirty_sectors = a->v.dirty_sectors;
cached_sectors = a->v.cached_sectors;
ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type, ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
u.gen, &u.data_type, a->v.gen, &a->v.data_type,
&u.dirty_sectors, &u.cached_sectors); &dirty_sectors, &cached_sectors);
if (ret) if (ret)
goto out; goto out;
ret = bch2_alloc_write(trans, &iter, &u, 0); a->v.dirty_sectors = dirty_sectors;
a->v.cached_sectors = cached_sectors;
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
if (ret) if (ret)
goto out; goto out;
out: out:
...@@ -1554,7 +1537,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, ...@@ -1554,7 +1537,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
struct btree_iter iter; struct btree_iter iter;
struct bkey_alloc_unpacked u; struct bkey_i_alloc_v4 *a;
enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
? BCH_DATA_parity : 0; ? BCH_DATA_parity : 0;
s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0; s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
...@@ -1563,59 +1546,59 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, ...@@ -1563,59 +1546,59 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
if (deleting) if (deleting)
sectors = -sectors; sectors = -sectors;
ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u); a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr));
if (ret) if (IS_ERR(a))
return ret; return PTR_ERR(a);
ret = check_bucket_ref(c, s.s_c, ptr, sectors, data_type, ret = check_bucket_ref(c, s.s_c, ptr, sectors, data_type,
u.gen, u.data_type, a->v.gen, a->v.data_type,
u.dirty_sectors, u.cached_sectors); a->v.dirty_sectors, a->v.cached_sectors);
if (ret) if (ret)
goto err; goto err;
if (!deleting) { if (!deleting) {
if (bch2_trans_inconsistent_on(u.stripe || if (bch2_trans_inconsistent_on(a->v.stripe ||
u.stripe_redundancy, trans, a->v.stripe_redundancy, trans,
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
iter.pos.inode, iter.pos.offset, u.gen, iter.pos.inode, iter.pos.offset, a->v.gen,
bch2_data_types[u.data_type], bch2_data_types[a->v.data_type],
u.dirty_sectors, a->v.dirty_sectors,
u.stripe, s.k->p.offset)) { a->v.stripe, s.k->p.offset)) {
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
if (bch2_trans_inconsistent_on(data_type && u.dirty_sectors, trans, if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
iter.pos.inode, iter.pos.offset, u.gen, iter.pos.inode, iter.pos.offset, a->v.gen,
bch2_data_types[u.data_type], bch2_data_types[a->v.data_type],
u.dirty_sectors, a->v.dirty_sectors,
s.k->p.offset)) { s.k->p.offset)) {
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
u.stripe = s.k->p.offset; a->v.stripe = s.k->p.offset;
u.stripe_redundancy = s.v->nr_redundant; a->v.stripe_redundancy = s.v->nr_redundant;
} else { } else {
if (bch2_trans_inconsistent_on(u.stripe != s.k->p.offset || if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
u.stripe_redundancy != s.v->nr_redundant, trans, a->v.stripe_redundancy != s.v->nr_redundant, trans,
"bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)", "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
iter.pos.inode, iter.pos.offset, u.gen, iter.pos.inode, iter.pos.offset, a->v.gen,
s.k->p.offset, u.stripe)) { s.k->p.offset, a->v.stripe)) {
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
u.stripe = 0; a->v.stripe = 0;
u.stripe_redundancy = 0; a->v.stripe_redundancy = 0;
} }
u.dirty_sectors += sectors; a->v.dirty_sectors += sectors;
if (data_type) if (data_type)
u.data_type = !deleting ? data_type : 0; a->v.data_type = !deleting ? data_type : 0;
ret = bch2_alloc_write(trans, &iter, &u, 0); ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
if (ret) if (ret)
goto err; goto err;
err: err:
...@@ -1845,11 +1828,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, ...@@ -1845,11 +1828,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bkey_alloc_unpacked u; struct bkey_i_alloc_v4 *a;
struct bch_extent_ptr ptr = {
.dev = ca->dev_idx,
.offset = bucket_to_sector(ca, b),
};
int ret = 0; int ret = 0;
/* /*
...@@ -1858,26 +1837,27 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, ...@@ -1858,26 +1837,27 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
if (b >= ca->mi.nbuckets) if (b >= ca->mi.nbuckets)
return 0; return 0;
ret = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u); a = bch2_trans_start_alloc_update(trans, &iter, POS(ca->dev_idx, b));
if (ret) if (IS_ERR(a))
return ret; return PTR_ERR(a);
if (u.data_type && u.data_type != type) { if (a->v.data_type && a->v.data_type != type) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s", "while marking %s",
iter.pos.inode, iter.pos.offset, u.gen, iter.pos.inode, iter.pos.offset, a->v.gen,
bch2_data_types[u.data_type], bch2_data_types[a->v.data_type],
bch2_data_types[type], bch2_data_types[type],
bch2_data_types[type]); bch2_data_types[type]);
ret = -EIO; ret = -EIO;
goto out; goto out;
} }
u.data_type = type; a->v.data_type = type;
u.dirty_sectors = sectors; a->v.dirty_sectors = sectors;
ret = bch2_alloc_write(trans, &iter, &u, 0); ret = bch2_trans_update(trans, &iter, &a->k_i,
BTREE_UPDATE_NO_KEY_CACHE_COHERENCY);
if (ret) if (ret)
goto out; goto out;
out: out:
......
...@@ -97,6 +97,14 @@ static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, ...@@ -97,6 +97,14 @@ static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
return sector_to_bucket(ca, ptr->offset); return sector_to_bucket(ca, ptr->offset);
} }
static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c,
const struct bch_extent_ptr *ptr)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
}
static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca, static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca,
const struct bch_extent_ptr *ptr) const struct bch_extent_ptr *ptr)
{ {
......
...@@ -111,7 +111,7 @@ struct copygc_heap_entry { ...@@ -111,7 +111,7 @@ struct copygc_heap_entry {
u8 dev; u8 dev;
u8 gen; u8 gen;
u8 replicas; u8 replicas;
u16 fragmentation; u32 fragmentation;
u32 sectors; u32 sectors;
u64 offset; u64 offset;
}; };
......
...@@ -129,7 +129,7 @@ static int walk_buckets_to_copygc(struct bch_fs *c) ...@@ -129,7 +129,7 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_alloc_unpacked u; struct bch_alloc_v4 a;
int ret; int ret;
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
...@@ -139,20 +139,20 @@ static int walk_buckets_to_copygc(struct bch_fs *c) ...@@ -139,20 +139,20 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode); struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode);
struct copygc_heap_entry e; struct copygc_heap_entry e;
u = bch2_alloc_unpack(k); bch2_alloc_to_v4(k, &a);
if (u.data_type != BCH_DATA_user || if (a.data_type != BCH_DATA_user ||
u.dirty_sectors >= ca->mi.bucket_size || a.dirty_sectors >= ca->mi.bucket_size ||
bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset)) bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset))
continue; continue;
e = (struct copygc_heap_entry) { e = (struct copygc_heap_entry) {
.dev = iter.pos.inode, .dev = iter.pos.inode,
.gen = u.gen, .gen = a.gen,
.replicas = 1 + u.stripe_redundancy, .replicas = 1 + a.stripe_redundancy,
.fragmentation = u.dirty_sectors * (1U << 15) .fragmentation = div_u64((u64) a.dirty_sectors * (1ULL << 31),
/ ca->mi.bucket_size, ca->mi.bucket_size),
.sectors = u.dirty_sectors, .sectors = a.dirty_sectors,
.offset = bucket_to_sector(ca, iter.pos.offset), .offset = bucket_to_sector(ca, iter.pos.offset),
}; };
heap_add_or_replace(h, e, -fragmentation_cmp, NULL); heap_add_or_replace(h, e, -fragmentation_cmp, NULL);
...@@ -180,7 +180,7 @@ static int check_copygc_was_done(struct bch_fs *c, ...@@ -180,7 +180,7 @@ static int check_copygc_was_done(struct bch_fs *c,
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_alloc_unpacked u; struct bch_alloc_v4 a;
struct copygc_heap_entry *i; struct copygc_heap_entry *i;
int ret = 0; int ret = 0;
...@@ -199,10 +199,10 @@ static int check_copygc_was_done(struct bch_fs *c, ...@@ -199,10 +199,10 @@ static int check_copygc_was_done(struct bch_fs *c,
if (ret) if (ret)
break; break;
u = bch2_alloc_unpack(k); bch2_alloc_to_v4(k, &a);
if (u.gen == i->gen && u.dirty_sectors) { if (a.gen == i->gen && a.dirty_sectors) {
*sectors_not_moved += u.dirty_sectors; *sectors_not_moved += a.dirty_sectors;
*buckets_not_moved += 1; *buckets_not_moved += 1;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment