Commit 25be2e5d authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: bch_sb_field_journal_v2

Add a new superblock field which represents journal buckets as ranges:
also move code for the superblock journal fields to journal_sb.c.

This also reworks the code for resizing the journal to write the new
superblock before using the new journal buckets, and thus be a bit
safer.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent b17d3cec
......@@ -38,6 +38,7 @@ bcachefs-y := \
journal.o \
journal_io.o \
journal_reclaim.o \
journal_sb.o \
journal_seq_blacklist.o \
keylist.o \
migrate.o \
......
......@@ -1027,16 +1027,17 @@ struct bch_sb_field {
__le32 type;
};
#define BCH_SB_FIELDS() \
x(journal, 0) \
x(members, 1) \
x(crypt, 2) \
x(replicas_v0, 3) \
x(quota, 4) \
x(disk_groups, 5) \
x(clean, 6) \
x(replicas, 7) \
x(journal_seq_blacklist, 8)
#define BCH_SB_FIELDS() \
x(journal, 0) \
x(members, 1) \
x(crypt, 2) \
x(replicas_v0, 3) \
x(quota, 4) \
x(disk_groups, 5) \
x(clean, 6) \
x(replicas, 7) \
x(journal_seq_blacklist, 8) \
x(journal_v2, 9)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
......@@ -1045,6 +1046,14 @@ enum bch_sb_field_type {
BCH_SB_FIELD_NR
};
/*
* Most superblock fields are replicated in all device's superblocks - a few are
* not:
*/
#define BCH_SINGLE_DEVICE_SB_FIELDS \
((1U << BCH_SB_FIELD_journal)| \
(1U << BCH_SB_FIELD_journal_v2))
/* BCH_SB_FIELD_journal: */
struct bch_sb_field_journal {
......@@ -1052,6 +1061,15 @@ struct bch_sb_field_journal {
__le64 buckets[0];
};
struct bch_sb_field_journal_v2 {
struct bch_sb_field field;
struct bch_sb_field_journal_v2_entry {
__le64 start;
__le64 nr;
} d[0];
};
/* BCH_SB_FIELD_members: */
#define BCH_MIN_NR_NBUCKETS (1 << 6)
......
......@@ -15,8 +15,8 @@
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
#include "super-io.h"
#include "trace.h"
#define x(n) #n,
......@@ -779,28 +779,55 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
{
struct bch_fs *c = ca->fs;
struct journal_device *ja = &ca->journal;
struct bch_sb_field_journal *journal_buckets;
u64 *new_bucket_seq = NULL, *new_buckets = NULL;
struct open_bucket **ob = NULL;
long *bu = NULL;
unsigned i, nr_got = 0, nr_want = nr - ja->nr;
unsigned old_nr = ja->nr;
unsigned old_discard_idx = ja->discard_idx;
unsigned old_dirty_idx_ondisk = ja->dirty_idx_ondisk;
unsigned old_dirty_idx = ja->dirty_idx;
unsigned old_cur_idx = ja->cur_idx;
int ret = 0;
/* don't handle reducing nr of buckets yet: */
if (nr <= ja->nr)
return 0;
if (c) {
bch2_journal_flush_all_pins(&c->journal);
bch2_journal_block(&c->journal);
}
bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL);
ob = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL);
new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL);
new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL);
if (!new_buckets || !new_bucket_seq) {
if (!bu || !ob || !new_buckets || !new_bucket_seq) {
ret = -ENOMEM;
goto err;
goto err_unblock;
}
journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
nr + sizeof(*journal_buckets) / sizeof(u64));
if (!journal_buckets) {
ret = -ENOSPC;
goto err;
for (nr_got = 0; nr_got < nr_want; nr_got++) {
if (new_fs) {
bu[nr_got] = bch2_bucket_alloc_new_fs(ca);
if (bu[nr_got] < 0) {
ret = -ENOSPC;
break;
}
} else {
rcu_read_lock();
ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none,
false, cl);
rcu_read_unlock();
if (IS_ERR(ob[nr_got])) {
ret = cl ? -EAGAIN : -ENOSPC;
break;
}
bu[nr_got] = ob[nr_got]->bucket;
}
}
if (!nr_got)
goto err_unblock;
/*
* We may be called from the device add path, before the new device has
* actually been added to the running filesystem:
......@@ -813,51 +840,16 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
swap(new_buckets, ja->buckets);
swap(new_bucket_seq, ja->bucket_seq);
if (!new_fs)
spin_unlock(&c->journal.lock);
while (ja->nr < nr) {
struct open_bucket *ob = NULL;
unsigned pos;
long b;
if (new_fs) {
b = bch2_bucket_alloc_new_fs(ca);
if (b < 0) {
ret = -ENOSPC;
goto err;
}
} else {
rcu_read_lock();
ob = bch2_bucket_alloc(c, ca, RESERVE_none,
false, cl);
rcu_read_unlock();
if (IS_ERR(ob)) {
ret = cl ? -EAGAIN : -ENOSPC;
goto err;
}
b = ob->bucket;
}
for (i = 0; i < nr_got; i++) {
unsigned pos = ja->discard_idx ?: ja->nr;
long b = bu[i];
if (c)
spin_lock(&c->journal.lock);
/*
* XXX
* For resize at runtime, we should be writing the new
* superblock before inserting into the journal array
*/
pos = ja->discard_idx ?: ja->nr;
__array_insert_item(ja->buckets, ja->nr, pos);
__array_insert_item(ja->bucket_seq, ja->nr, pos);
__array_insert_item(journal_buckets->buckets, ja->nr, pos);
ja->nr++;
ja->buckets[pos] = b;
ja->bucket_seq[pos] = 0;
journal_buckets->buckets[pos] = cpu_to_le64(b);
if (pos <= ja->discard_idx)
ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
......@@ -867,29 +859,56 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
if (pos <= ja->cur_idx)
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
}
if (c)
spin_unlock(&c->journal.lock);
ret = bch2_journal_buckets_to_sb(c, ca);
if (ret) {
/* Revert: */
swap(new_buckets, ja->buckets);
swap(new_bucket_seq, ja->bucket_seq);
ja->nr = old_nr;
ja->discard_idx = old_discard_idx;
ja->dirty_idx_ondisk = old_dirty_idx_ondisk;
ja->dirty_idx = old_dirty_idx;
ja->cur_idx = old_cur_idx;
}
if (!new_fs)
spin_unlock(&c->journal.lock);
if (!new_fs) {
if (c)
bch2_journal_unblock(&c->journal);
if (ret)
goto err;
if (!new_fs) {
for (i = 0; i < nr_got; i++) {
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_trans_mark_metadata_bucket(&trans, ca,
b, BCH_DATA_journal,
bu[i], BCH_DATA_journal,
ca->mi.bucket_size));
bch2_open_bucket_put(c, ob);
if (ret)
if (ret) {
bch2_fs_inconsistent(c, "error marking new journal buckets: %i", ret);
goto err;
}
}
}
err:
bch2_sb_resize_journal(&ca->disk_sb,
ja->nr + sizeof(*journal_buckets) / sizeof(u64));
if (ob && !new_fs)
for (i = 0; i < nr_got; i++)
bch2_open_bucket_put(c, ob[i]);
kfree(new_bucket_seq);
kfree(new_buckets);
kfree(ob);
kfree(bu);
return ret;
err_unblock:
if (c)
bch2_journal_unblock(&c->journal);
goto err;
}
/*
......@@ -902,11 +921,15 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
struct journal_device *ja = &ca->journal;
struct closure cl;
unsigned current_nr;
int ret;
int ret = 0;
/* don't handle reducing nr of buckets yet: */
if (nr < ja->nr)
return 0;
closure_init_stack(&cl);
do {
while (ja->nr != nr && (ret == 0 || ret == -EAGAIN)) {
struct disk_reservation disk_res = { 0, 0 };
closure_sync(&cl);
......@@ -934,7 +957,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
if (ja->nr != current_nr)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
} while (ret == -EAGAIN);
}
return ret;
}
......@@ -942,6 +965,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
int bch2_dev_journal_alloc(struct bch_dev *ca)
{
unsigned nr;
int ret;
if (dynamic_fault("bcachefs:add:journal_alloc"))
return -ENOMEM;
......@@ -958,7 +982,15 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
min(1 << 13,
(1 << 24) / ca->mi.bucket_size));
return __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
if (ca->fs)
mutex_lock(&ca->fs->sb_lock);
ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
if (ca->fs)
mutex_unlock(&ca->fs->sb_lock);
return ret;
}
/* startup/shutdown: */
......@@ -1103,9 +1135,20 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
struct journal_device *ja = &ca->journal;
struct bch_sb_field_journal *journal_buckets =
bch2_sb_get_journal(sb);
struct bch_sb_field_journal_v2 *journal_buckets_v2 =
bch2_sb_get_journal_v2(sb);
unsigned i, nr_bvecs;
ja->nr = bch2_nr_journal_buckets(journal_buckets);
ja->nr = 0;
if (journal_buckets_v2) {
unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
for (i = 0; i < nr; i++)
ja->nr += le64_to_cpu(journal_buckets_v2->d[i].nr);
} else if (journal_buckets) {
ja->nr = bch2_nr_journal_buckets(journal_buckets);
}
ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
if (!ja->bucket_seq)
......@@ -1123,8 +1166,18 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
if (!ja->buckets)
return -ENOMEM;
for (i = 0; i < ja->nr; i++)
ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
if (journal_buckets_v2) {
unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
unsigned j, dst = 0;
for (i = 0; i < nr; i++)
for (j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++)
ja->buckets[dst++] =
le64_to_cpu(journal_buckets_v2->d[i].start) + j;
} else if (journal_buckets) {
for (i = 0; i < ja->nr; i++)
ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
}
return 0;
}
......
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "journal_sb.h"
#include "darray.h"
#include <linux/sort.h>
/* BCH_SB_FIELD_journal: */
static int u64_cmp(const void *_l, const void *_r)
{
const u64 *l = _l;
const u64 *r = _r;
return cmp_int(*l, *r);
}
static int bch2_sb_journal_validate(struct bch_sb *sb,
struct bch_sb_field *f,
struct printbuf *err)
{
struct bch_sb_field_journal *journal = field_to_type(f, journal);
struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
int ret = -EINVAL;
unsigned nr;
unsigned i;
u64 *b;
nr = bch2_nr_journal_buckets(journal);
if (!nr)
return 0;
b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
if (!b)
return -ENOMEM;
for (i = 0; i < nr; i++)
b[i] = le64_to_cpu(journal->buckets[i]);
sort(b, nr, sizeof(u64), u64_cmp, NULL);
if (!b[0]) {
pr_buf(err, "journal bucket at sector 0");
goto err;
}
if (b[0] < le16_to_cpu(m->first_bucket)) {
pr_buf(err, "journal bucket %llu before first bucket %u",
b[0], le16_to_cpu(m->first_bucket));
goto err;
}
if (b[nr - 1] >= le64_to_cpu(m->nbuckets)) {
pr_buf(err, "journal bucket %llu past end of device (nbuckets %llu)",
b[nr - 1], le64_to_cpu(m->nbuckets));
goto err;
}
for (i = 0; i + 1 < nr; i++)
if (b[i] == b[i + 1]) {
pr_buf(err, "duplicate journal buckets %llu", b[i]);
goto err;
}
ret = 0;
err:
kfree(b);
return ret;
}
static void bch2_sb_journal_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_journal *journal = field_to_type(f, journal);
unsigned i, nr = bch2_nr_journal_buckets(journal);
pr_buf(out, "Buckets: ");
for (i = 0; i < nr; i++)
pr_buf(out, " %llu", le64_to_cpu(journal->buckets[i]));
pr_newline(out);
}
const struct bch_sb_field_ops bch_sb_field_ops_journal = {
.validate = bch2_sb_journal_validate,
.to_text = bch2_sb_journal_to_text,
};
struct u64_range {
u64 start;
u64 end;
};
static int u64_range_cmp(const void *_l, const void *_r)
{
const struct u64_range *l = _l;
const struct u64_range *r = _r;
return cmp_int(l->start, r->start);
}
static int bch2_sb_journal_v2_validate(struct bch_sb *sb,
struct bch_sb_field *f,
struct printbuf *err)
{
struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2);
struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
int ret = -EINVAL;
unsigned nr;
unsigned i;
struct u64_range *b;
nr = bch2_sb_field_journal_v2_nr_entries(journal);
if (!nr)
return 0;
b = kmalloc_array(sizeof(*b), nr, GFP_KERNEL);
if (!b)
return -ENOMEM;
for (i = 0; i < nr; i++) {
b[i].start = le64_to_cpu(journal->d[i].start);
b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr);
}
sort(b, nr, sizeof(*b), u64_range_cmp, NULL);
if (!b[0].start) {
pr_buf(err, "journal bucket at sector 0");
goto err;
}
if (b[0].start < le16_to_cpu(m->first_bucket)) {
pr_buf(err, "journal bucket %llu before first bucket %u",
b[0].start, le16_to_cpu(m->first_bucket));
goto err;
}
if (b[nr - 1].end > le64_to_cpu(m->nbuckets)) {
pr_buf(err, "journal bucket %llu past end of device (nbuckets %llu)",
b[nr - 1].end - 1, le64_to_cpu(m->nbuckets));
goto err;
}
for (i = 0; i + 1 < nr; i++) {
if (b[i].end > b[i + 1].start) {
pr_buf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu",
b[i].start, b[i].end, b[i + 1].start, b[i + 1].end);
goto err;
}
}
ret = 0;
err:
kfree(b);
return ret;
}
static void bch2_sb_journal_v2_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2);
unsigned i, nr = bch2_sb_field_journal_v2_nr_entries(journal);
pr_buf(out, "Buckets: ");
for (i = 0; i < nr; i++)
pr_buf(out, " %llu-%llu",
le64_to_cpu(journal->d[i].start),
le64_to_cpu(journal->d[i].start) + le64_to_cpu(journal->d[i].nr));
pr_newline(out);
}
const struct bch_sb_field_ops bch_sb_field_ops_journal_v2 = {
.validate = bch2_sb_journal_v2_validate,
.to_text = bch2_sb_journal_v2_to_text,
};
int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca)
{
struct journal_device *ja = &ca->journal;
struct bch_sb_field_journal_v2 *j;
unsigned i, dst = 0, nr = 1;
if (c)
lockdep_assert_held(&c->sb_lock);
if (!ja->nr) {
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal_v2);
return 0;
}
for (i = 0; i + 1 < ja->nr; i++)
if (ja->buckets[i] + 1 != ja->buckets[i + 1])
nr++;
j = bch2_sb_resize_journal_v2(&ca->disk_sb,
(sizeof(*j) + sizeof(j->d[0]) * nr) / sizeof(u64));
if (!j)
return -ENOSPC;
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
j->d[dst].start = le64_to_cpu(ja->buckets[0]);
j->d[dst].nr = le64_to_cpu(1);
for (i = 1; i < ja->nr; i++) {
if (ja->buckets[i] == ja->buckets[i - 1] + 1) {
le64_add_cpu(&j->d[dst].nr, 1);
} else {
dst++;
j->d[dst].start = le64_to_cpu(ja->buckets[i]);
j->d[dst].nr = le64_to_cpu(1);
}
}
BUG_ON(dst + 1 != nr);
return 0;
}
/* SPDX-License-Identifier: GPL-2.0 */
#include "super-io.h"
#include "vstructs.h"
static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
{
return j
? (__le64 *) vstruct_end(&j->field) - j->buckets
: 0;
}
static inline unsigned bch2_sb_field_journal_v2_nr_entries(struct bch_sb_field_journal_v2 *j)
{
if (!j)
return 0;
return (struct bch_sb_field_journal_v2_entry *) vstruct_end(&j->field) - &j->d[0];
}
extern const struct bch_sb_field_ops bch_sb_field_ops_journal;
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2;
int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *);
......@@ -10,6 +10,7 @@
#include "io.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
#include "replicas.h"
#include "quota.h"
......@@ -459,7 +460,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
memcpy(dst->compat, src->compat, sizeof(dst->compat));
for (i = 0; i < BCH_SB_FIELD_NR; i++) {
if (i == BCH_SB_FIELD_journal)
if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS)
continue;
src_f = bch2_sb_field_get(src, i);
......@@ -929,85 +930,6 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
mutex_unlock(&c->sb_lock);
}
/* BCH_SB_FIELD_journal: */
static int u64_cmp(const void *_l, const void *_r)
{
u64 l = *((const u64 *) _l), r = *((const u64 *) _r);
return l < r ? -1 : l > r ? 1 : 0;
}
static int bch2_sb_journal_validate(struct bch_sb *sb,
struct bch_sb_field *f,
struct printbuf *err)
{
struct bch_sb_field_journal *journal = field_to_type(f, journal);
struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
int ret = -EINVAL;
unsigned nr;
unsigned i;
u64 *b;
nr = bch2_nr_journal_buckets(journal);
if (!nr)
return 0;
b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
if (!b)
return -ENOMEM;
for (i = 0; i < nr; i++)
b[i] = le64_to_cpu(journal->buckets[i]);
sort(b, nr, sizeof(u64), u64_cmp, NULL);
if (!b[0]) {
pr_buf(err, "journal bucket at sector 0");
goto err;
}
if (b[0] < le16_to_cpu(m->first_bucket)) {
pr_buf(err, "journal bucket %llu before first bucket %u",
b[0], le16_to_cpu(m->first_bucket));
goto err;
}
if (b[nr - 1] >= le64_to_cpu(m->nbuckets)) {
pr_buf(err, "journal bucket %llu past end of device (nbuckets %llu)",
b[nr - 1], le64_to_cpu(m->nbuckets));
goto err;
}
for (i = 0; i + 1 < nr; i++)
if (b[i] == b[i + 1]) {
pr_buf(err, "duplicate journal buckets %llu", b[i]);
goto err;
}
ret = 0;
err:
kfree(b);
return ret;
}
static void bch2_sb_journal_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_journal *journal = field_to_type(f, journal);
unsigned i, nr = bch2_nr_journal_buckets(journal);
pr_buf(out, "Buckets: ");
for (i = 0; i < nr; i++)
pr_buf(out, " %llu", le64_to_cpu(journal->buckets[i]));
pr_newline(out);
}
static const struct bch_sb_field_ops bch_sb_field_ops_journal = {
.validate = bch2_sb_journal_validate,
.to_text = bch2_sb_journal_to_text,
};
/* BCH_SB_FIELD_members: */
static int bch2_sb_members_validate(struct bch_sb *sb,
......
......@@ -75,15 +75,6 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
__bch2_check_set_feature(c, feat);
}
/* BCH_SB_FIELD_journal: */
static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
{
return j
? (__le64 *) vstruct_end(&j->field) - j->buckets
: 0;
}
/* BCH_SB_FIELD_members: */
static inline bool bch2_member_exists(struct bch_member *m)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment