Commit 03e183cb authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Verify fs hasn't been modified before going rw

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 134915f3
...@@ -390,6 +390,7 @@ struct bch_dev { ...@@ -390,6 +390,7 @@ struct bch_dev {
char name[BDEVNAME_SIZE]; char name[BDEVNAME_SIZE];
struct bch_sb_handle disk_sb; struct bch_sb_handle disk_sb;
struct bch_sb *sb_read_scratch;
int sb_write_error; int sb_write_error;
struct bch_devs_mask self; struct bch_devs_mask self;
......
...@@ -1682,6 +1682,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) ...@@ -1682,6 +1682,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
ret = bch2_fs_read_write(c); ret = bch2_fs_read_write(c);
if (ret) { if (ret) {
bch_err(c, "error going rw: %i", ret); bch_err(c, "error going rw: %i", ret);
mutex_unlock(&c->state_lock);
return -EINVAL; return -EINVAL;
} }
......
...@@ -107,10 +107,11 @@ static int journal_replay_entry_early(struct bch_fs *c, ...@@ -107,10 +107,11 @@ static int journal_replay_entry_early(struct bch_fs *c,
} }
static int verify_superblock_clean(struct bch_fs *c, static int verify_superblock_clean(struct bch_fs *c,
struct bch_sb_field_clean *clean, struct bch_sb_field_clean **cleanp,
struct jset *j) struct jset *j)
{ {
unsigned i; unsigned i;
struct bch_sb_field_clean *clean = *cleanp;
int ret = 0; int ret = 0;
if (!clean || !j) if (!clean || !j)
...@@ -120,11 +121,9 @@ static int verify_superblock_clean(struct bch_fs *c, ...@@ -120,11 +121,9 @@ static int verify_superblock_clean(struct bch_fs *c,
"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown", "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
le64_to_cpu(clean->journal_seq), le64_to_cpu(clean->journal_seq),
le64_to_cpu(j->seq))) { le64_to_cpu(j->seq))) {
ret = bch2_fs_mark_dirty(c); kfree(clean);
if (ret) { *cleanp = NULL;
bch_err(c, "error going rw"); return 0;
return ret;
}
} }
mustfix_fsck_err_on(j->read_clock != clean->read_clock, c, mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
...@@ -236,7 +235,7 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -236,7 +235,7 @@ int bch2_fs_recovery(struct bch_fs *c)
BUG_ON(ret); BUG_ON(ret);
} }
ret = verify_superblock_clean(c, clean, j); ret = verify_superblock_clean(c, &clean, j);
if (ret) if (ret)
goto err; goto err;
...@@ -430,7 +429,7 @@ int bch2_fs_initialize(struct bch_fs *c) ...@@ -430,7 +429,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_journal_set_replay_done(&c->journal); bch2_journal_set_replay_done(&c->journal);
err = "error going read write"; err = "error going read write";
ret = bch2_fs_read_write_early(c); ret = __bch2_fs_read_write(c, true);
if (ret) if (ret)
goto err; goto err;
......
...@@ -509,6 +509,8 @@ static const char *read_one_super(struct bch_sb_handle *sb, u64 offset) ...@@ -509,6 +509,8 @@ static const char *read_one_super(struct bch_sb_handle *sb, u64 offset)
if (bch2_crc_cmp(csum, sb->sb->csum)) if (bch2_crc_cmp(csum, sb->sb->csum))
return "bad checksum reading superblock"; return "bad checksum reading superblock";
sb->seq = le64_to_cpu(sb->sb->seq);
return NULL; return NULL;
} }
...@@ -642,6 +644,25 @@ static void write_super_endio(struct bio *bio) ...@@ -642,6 +644,25 @@ static void write_super_endio(struct bio *bio)
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
} }
static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
{
struct bch_sb *sb = ca->disk_sb.sb;
struct bio *bio = ca->disk_sb.bio;
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]);
bio->bi_iter.bi_size = 4096;
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
bch2_bio_map(bio, ca->sb_read_scratch);
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_SB],
bio_sectors(bio));
percpu_ref_get(&ca->io_ref);
closure_bio_submit(bio, &c->sb_write);
}
static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
{ {
struct bch_sb *sb = ca->disk_sb.sb; struct bch_sb *sb = ca->disk_sb.sb;
...@@ -669,7 +690,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) ...@@ -669,7 +690,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
closure_bio_submit(bio, &c->sb_write); closure_bio_submit(bio, &c->sb_write);
} }
void bch2_write_super(struct bch_fs *c) int bch2_write_super(struct bch_fs *c)
{ {
struct closure *cl = &c->sb_write; struct closure *cl = &c->sb_write;
struct bch_dev *ca; struct bch_dev *ca;
...@@ -677,6 +698,7 @@ void bch2_write_super(struct bch_fs *c) ...@@ -677,6 +698,7 @@ void bch2_write_super(struct bch_fs *c)
const char *err; const char *err;
struct bch_devs_mask sb_written; struct bch_devs_mask sb_written;
bool wrote, can_mount_without_written, can_mount_with_written; bool wrote, can_mount_without_written, can_mount_with_written;
int ret = 0;
lockdep_assert_held(&c->sb_lock); lockdep_assert_held(&c->sb_lock);
...@@ -692,6 +714,7 @@ void bch2_write_super(struct bch_fs *c) ...@@ -692,6 +714,7 @@ void bch2_write_super(struct bch_fs *c)
err = bch2_sb_validate(&ca->disk_sb); err = bch2_sb_validate(&ca->disk_sb);
if (err) { if (err) {
bch2_fs_inconsistent(c, "sb invalid before write: %s", err); bch2_fs_inconsistent(c, "sb invalid before write: %s", err);
ret = -1;
goto out; goto out;
} }
} }
...@@ -705,10 +728,27 @@ void bch2_write_super(struct bch_fs *c) ...@@ -705,10 +728,27 @@ void bch2_write_super(struct bch_fs *c)
ca->sb_write_error = 0; ca->sb_write_error = 0;
} }
for_each_online_member(ca, c, i)
read_back_super(c, ca);
closure_sync(cl);
for_each_online_member(ca, c, i) {
if (!ca->sb_write_error &&
ca->disk_sb.seq !=
le64_to_cpu(ca->sb_read_scratch->seq)) {
bch2_fs_fatal_error(c,
"Superblock modified by another process");
percpu_ref_put(&ca->io_ref);
ret = -EROFS;
goto out;
}
}
do { do {
wrote = false; wrote = false;
for_each_online_member(ca, c, i) for_each_online_member(ca, c, i)
if (sb < ca->disk_sb.sb->layout.nr_superblocks) { if (!ca->sb_write_error &&
sb < ca->disk_sb.sb->layout.nr_superblocks) {
write_one_super(c, ca, sb); write_one_super(c, ca, sb);
wrote = true; wrote = true;
} }
...@@ -716,9 +756,12 @@ void bch2_write_super(struct bch_fs *c) ...@@ -716,9 +756,12 @@ void bch2_write_super(struct bch_fs *c)
sb++; sb++;
} while (wrote); } while (wrote);
for_each_online_member(ca, c, i) for_each_online_member(ca, c, i) {
if (ca->sb_write_error) if (ca->sb_write_error)
__clear_bit(ca->dev_idx, sb_written.d); __clear_bit(ca->dev_idx, sb_written.d);
else
ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
}
nr_wrote = dev_mask_nr(&sb_written); nr_wrote = dev_mask_nr(&sb_written);
...@@ -741,13 +784,15 @@ void bch2_write_super(struct bch_fs *c) ...@@ -741,13 +784,15 @@ void bch2_write_super(struct bch_fs *c)
* written anything (new filesystem), we continue if we'd be able to * written anything (new filesystem), we continue if we'd be able to
* mount with the devices we did successfully write to: * mount with the devices we did successfully write to:
*/ */
bch2_fs_fatal_err_on(!nr_wrote || if (bch2_fs_fatal_err_on(!nr_wrote ||
(can_mount_without_written && (can_mount_without_written &&
!can_mount_with_written), c, !can_mount_with_written), c,
"Unable to write superblock to sufficient devices"); "Unable to write superblock to sufficient devices"))
ret = -1;
out: out:
/* Make new options visible after they're persistent: */ /* Make new options visible after they're persistent: */
bch2_sb_update(c); bch2_sb_update(c);
return ret;
} }
/* BCH_SB_FIELD_journal: */ /* BCH_SB_FIELD_journal: */
...@@ -888,16 +933,20 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) ...@@ -888,16 +933,20 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
int bch2_fs_mark_dirty(struct bch_fs *c) int bch2_fs_mark_dirty(struct bch_fs *c)
{ {
int ret;
/*
* Unconditionally write superblock, to verify it hasn't changed before
* we go rw:
*/
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb) || SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
(c->disk_sb.sb->compat[0] & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) { c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false); ret = bch2_write_super(c);
c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
return 0; return ret;
} }
struct jset_entry * struct jset_entry *
......
...@@ -89,7 +89,7 @@ int bch2_sb_realloc(struct bch_sb_handle *, unsigned); ...@@ -89,7 +89,7 @@ int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
const char *bch2_sb_validate(struct bch_sb_handle *); const char *bch2_sb_validate(struct bch_sb_handle *);
int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
void bch2_write_super(struct bch_fs *); int bch2_write_super(struct bch_fs *);
/* BCH_SB_FIELD_journal: */ /* BCH_SB_FIELD_journal: */
......
...@@ -366,7 +366,7 @@ static int bch2_fs_read_write_late(struct bch_fs *c) ...@@ -366,7 +366,7 @@ static int bch2_fs_read_write_late(struct bch_fs *c)
return 0; return 0;
} }
static int __bch2_fs_read_write(struct bch_fs *c, bool early) int __bch2_fs_read_write(struct bch_fs *c, bool early)
{ {
struct bch_dev *ca; struct bch_dev *ca;
unsigned i; unsigned i;
...@@ -907,6 +907,7 @@ static void bch2_dev_free(struct bch_dev *ca) ...@@ -907,6 +907,7 @@ static void bch2_dev_free(struct bch_dev *ca)
free_percpu(ca->io_done); free_percpu(ca->io_done);
bioset_exit(&ca->replica_set); bioset_exit(&ca->replica_set);
bch2_dev_buckets_free(ca); bch2_dev_buckets_free(ca);
kfree(ca->sb_read_scratch);
bch2_time_stats_exit(&ca->io_latency[WRITE]); bch2_time_stats_exit(&ca->io_latency[WRITE]);
bch2_time_stats_exit(&ca->io_latency[READ]); bch2_time_stats_exit(&ca->io_latency[READ]);
...@@ -1017,6 +1018,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, ...@@ -1017,6 +1018,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
0, GFP_KERNEL) || 0, GFP_KERNEL) ||
percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) || PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
!(ca->sb_read_scratch = kmalloc(4096, GFP_KERNEL)) ||
bch2_dev_buckets_alloc(c, ca) || bch2_dev_buckets_alloc(c, ca) ||
bioset_init(&ca->replica_set, 4, bioset_init(&ca->replica_set, 4,
offsetof(struct bch_write_bio, bio), 0) || offsetof(struct bch_write_bio, bio), 0) ||
......
...@@ -218,6 +218,7 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); ...@@ -218,6 +218,7 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
bool bch2_fs_emergency_read_only(struct bch_fs *); bool bch2_fs_emergency_read_only(struct bch_fs *);
void bch2_fs_read_only(struct bch_fs *); void bch2_fs_read_only(struct bch_fs *);
int __bch2_fs_read_write(struct bch_fs *, bool);
int bch2_fs_read_write(struct bch_fs *); int bch2_fs_read_write(struct bch_fs *);
int bch2_fs_read_write_early(struct bch_fs *); int bch2_fs_read_write_early(struct bch_fs *);
......
...@@ -12,6 +12,7 @@ struct bch_sb_handle { ...@@ -12,6 +12,7 @@ struct bch_sb_handle {
unsigned have_layout:1; unsigned have_layout:1;
unsigned have_bio:1; unsigned have_bio:1;
unsigned fs_sb:1; unsigned fs_sb:1;
u64 seq;
}; };
struct bch_devs_mask { struct bch_devs_mask {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment