Commit c04ccaa6 authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Jens Axboe

drbd: read meta data early, base on-disk offsets on super block

We used to calculate all on-disk meta data offsets, and then compare
the stored offsets, basically treating them as magic numbers.

Now with the activity log striping, the activity log size is no longer
fixed.  We need to first read the super block, then base the activity
log and bitmap offsets on the stored offsets/al stripe settings.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent cccac985
...@@ -168,7 +168,11 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, ...@@ -168,7 +168,11 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
bio->bi_end_io = drbd_md_io_complete; bio->bi_end_io = drbd_md_io_complete;
bio->bi_rw = rw; bio->bi_rw = rw;
if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ if (!(rw & WRITE) && mdev->state.disk == D_DISKLESS && mdev->ldev == NULL)
/* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
;
else if (!get_ldev_if_state(mdev, D_ATTACHING)) {
/* Corresponding put_ldev in drbd_md_io_complete() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
err = -ENODEV; err = -ENODEV;
goto out; goto out;
...@@ -199,9 +203,10 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, ...@@ -199,9 +203,10 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
BUG_ON(!bdev->md_bdev); BUG_ON(!bdev->md_bdev);
dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n", dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
current->comm, current->pid, __func__, current->comm, current->pid, __func__,
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
(void*)_RET_IP_ );
if (sector < drbd_md_first_sector(bdev) || if (sector < drbd_md_first_sector(bdev) ||
sector + 7 > drbd_md_last_sector(bdev)) sector + 7 > drbd_md_last_sector(bdev))
......
...@@ -2968,6 +2968,86 @@ static int check_activity_log_stripe_size(struct drbd_conf *mdev, ...@@ -2968,6 +2968,86 @@ static int check_activity_log_stripe_size(struct drbd_conf *mdev,
return -EINVAL; return -EINVAL;
} }
static int check_offsets_and_sizes(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
{
sector_t capacity = drbd_get_capacity(bdev->md_bdev);
struct drbd_md *in_core = &bdev->md;
s32 on_disk_al_sect;
s32 on_disk_bm_sect;
/* The on-disk size of the activity log, calculated from offsets, and
* the size of the activity log calculated from the stripe settings,
* should match.
* Though we could relax this a bit: it is ok, if the striped activity log
* fits in the available on-disk activity log size.
* Right now, that would break how resize is implemented.
* TODO: make drbd_determine_dev_size() (and the drbdmeta tool) aware
* of possible unused padding space in the on disk layout. */
if (in_core->al_offset < 0) {
if (in_core->bm_offset > in_core->al_offset)
goto err;
on_disk_al_sect = -in_core->al_offset;
on_disk_bm_sect = in_core->al_offset - in_core->bm_offset;
} else {
if (in_core->al_offset != MD_4kB_SECT)
goto err;
if (in_core->bm_offset < in_core->al_offset + in_core->al_size_4k * MD_4kB_SECT)
goto err;
on_disk_al_sect = in_core->bm_offset - MD_4kB_SECT;
on_disk_bm_sect = in_core->md_size_sect - in_core->bm_offset;
}
/* old fixed size meta data is exactly that: fixed. */
if (in_core->meta_dev_idx >= 0) {
if (in_core->md_size_sect != MD_128MB_SECT
|| in_core->al_offset != MD_4kB_SECT
|| in_core->bm_offset != MD_4kB_SECT + MD_32kB_SECT
|| in_core->al_stripes != 1
|| in_core->al_stripe_size_4k != MD_32kB_SECT/8)
goto err;
}
if (capacity < in_core->md_size_sect)
goto err;
if (capacity - in_core->md_size_sect < drbd_md_first_sector(bdev))
goto err;
/* should be aligned, and at least 32k */
if ((on_disk_al_sect & 7) || (on_disk_al_sect < MD_32kB_SECT))
goto err;
/* should fit (for now: exactly) into the available on-disk space;
* overflow prevention is in check_activity_log_stripe_size() above. */
if (on_disk_al_sect != in_core->al_size_4k * MD_4kB_SECT)
goto err;
/* again, should be aligned */
if (in_core->bm_offset & 7)
goto err;
/* FIXME check for device grow with flex external meta data? */
/* can the available bitmap space cover the last agreed device size? */
if (on_disk_bm_sect < (in_core->la_size_sect+7)/MD_4kB_SECT/8/512)
goto err;
return 0;
err:
dev_err(DEV, "meta data offsets don't make sense: idx=%d "
"al_s=%u, al_sz4k=%u, al_offset=%d, bm_offset=%d, "
"md_size_sect=%u, la_size=%llu, md_capacity=%llu\n",
in_core->meta_dev_idx,
in_core->al_stripes, in_core->al_stripe_size_4k,
in_core->al_offset, in_core->bm_offset, in_core->md_size_sect,
(unsigned long long)in_core->la_size_sect,
(unsigned long long)capacity);
return -EINVAL;
}
/** /**
* drbd_md_read() - Reads in the meta data super block * drbd_md_read() - Reads in the meta data super block
* @mdev: DRBD device. * @mdev: DRBD device.
...@@ -2976,7 +3056,8 @@ static int check_activity_log_stripe_size(struct drbd_conf *mdev, ...@@ -2976,7 +3056,8 @@ static int check_activity_log_stripe_size(struct drbd_conf *mdev,
* Return NO_ERROR on success, and an enum drbd_ret_code in case * Return NO_ERROR on success, and an enum drbd_ret_code in case
* something goes wrong. * something goes wrong.
* *
* Called exactly once during drbd_adm_attach() * Called exactly once during drbd_adm_attach(), while still being D_DISKLESS,
* even before @bdev is assigned to @mdev->ldev.
*/ */
int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
{ {
...@@ -2984,14 +3065,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) ...@@ -2984,14 +3065,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
u32 magic, flags; u32 magic, flags;
int i, rv = NO_ERROR; int i, rv = NO_ERROR;
if (!get_ldev_if_state(mdev, D_ATTACHING)) if (mdev->state.disk != D_DISKLESS)
return ERR_IO_MD_DISK; return ERR_DISK_CONFIGURED;
buffer = drbd_md_get_buffer(mdev); buffer = drbd_md_get_buffer(mdev);
if (!buffer) if (!buffer)
goto out; return ERR_NOMEM;
/* First, figure out where our meta data superblock is located. */ /* First, figure out where our meta data superblock is located,
* and read it. */
bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
bdev->md.md_offset = drbd_md_ss(bdev); bdev->md.md_offset = drbd_md_ss(bdev);
...@@ -3022,14 +3104,29 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) ...@@ -3022,14 +3104,29 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
goto err; goto err;
} }
if (check_activity_log_stripe_size(mdev, buffer, &bdev->md)) if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
goto err; goto err;
}
if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", /* convert to in_core endian */
be32_to_cpu(buffer->al_offset), bdev->md.al_offset); bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect);
for (i = UI_CURRENT; i < UI_SIZE; i++)
bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
bdev->md.flags = be32_to_cpu(buffer->flags);
bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
bdev->md.md_size_sect = be32_to_cpu(buffer->md_size_sect);
bdev->md.al_offset = be32_to_cpu(buffer->al_offset);
bdev->md.bm_offset = be32_to_cpu(buffer->bm_offset);
if (check_activity_log_stripe_size(mdev, buffer, &bdev->md))
goto err; goto err;
} if (check_offsets_and_sizes(mdev, bdev))
goto err;
if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
...@@ -3041,20 +3138,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) ...@@ -3041,20 +3138,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
goto err; goto err;
} }
if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
goto err;
}
rv = NO_ERROR; rv = NO_ERROR;
bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect);
for (i = UI_CURRENT; i < UI_SIZE; i++)
bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
bdev->md.flags = be32_to_cpu(buffer->flags);
bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
spin_lock_irq(&mdev->tconn->req_lock); spin_lock_irq(&mdev->tconn->req_lock);
if (mdev->state.conn < C_CONNECTED) { if (mdev->state.conn < C_CONNECTED) {
unsigned int peer; unsigned int peer;
...@@ -3066,8 +3151,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) ...@@ -3066,8 +3151,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
err: err:
drbd_md_put_buffer(mdev); drbd_md_put_buffer(mdev);
out:
put_ldev(mdev);
return rv; return rv;
} }
......
...@@ -721,7 +721,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, ...@@ -721,7 +721,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev) struct drbd_backing_dev *bdev)
{ {
sector_t md_size_sect = 0; sector_t md_size_sect = 0;
unsigned int al_size_sect = MD_32kB_SECT; unsigned int al_size_sect = bdev->md.al_size_4k * 8;
bdev->md.md_offset = drbd_md_ss(bdev); bdev->md.md_offset = drbd_md_ss(bdev);
...@@ -1413,8 +1413,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) ...@@ -1413,8 +1413,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
goto fail; goto fail;
} }
/* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ /* Read our meta data super block early.
drbd_md_set_sector_offsets(mdev, nbc); * This also sets other on-disk offsets. */
retcode = drbd_md_read(mdev, nbc);
if (retcode != NO_ERROR)
goto fail;
if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
...@@ -1481,8 +1484,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) ...@@ -1481,8 +1484,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
if (!get_ldev_if_state(mdev, D_ATTACHING)) if (!get_ldev_if_state(mdev, D_ATTACHING))
goto force_diskless; goto force_diskless;
drbd_md_set_sector_offsets(mdev, nbc);
if (!mdev->bitmap) { if (!mdev->bitmap) {
if (drbd_bm_init(mdev)) { if (drbd_bm_init(mdev)) {
retcode = ERR_NOMEM; retcode = ERR_NOMEM;
...@@ -1490,10 +1491,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) ...@@ -1490,10 +1491,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
} }
} }
retcode = drbd_md_read(mdev, nbc);
if (retcode != NO_ERROR)
goto force_diskless_dec;
if (mdev->state.conn < C_CONNECTED && if (mdev->state.conn < C_CONNECTED &&
mdev->state.role == R_PRIMARY && mdev->state.role == R_PRIMARY &&
(mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
......
...@@ -89,7 +89,8 @@ void drbd_md_io_complete(struct bio *bio, int error) ...@@ -89,7 +89,8 @@ void drbd_md_io_complete(struct bio *bio, int error)
md_io->done = 1; md_io->done = 1;
wake_up(&mdev->misc_wait); wake_up(&mdev->misc_wait);
bio_put(bio); bio_put(bio);
put_ldev(mdev); if (mdev->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
put_ldev(mdev);
} }
/* reads on behalf of the partner, /* reads on behalf of the partner,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment