Commit 0e70613b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md/raid5: Allow dirty-degraded arrays to be assembled when only party is degraded.
  Don't unconditionally set in_sync on newly added device in raid5_reshape
  md: allow v0.91 metadata to record devices as being active but not in-sync.
  md: factor out updating of 'recovery_offset'.
parents e0a2af1e c148ffdc
...@@ -944,6 +944,14 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -944,6 +944,14 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
desc->raid_disk < mddev->raid_disks */) { desc->raid_disk < mddev->raid_disks */) {
set_bit(In_sync, &rdev->flags); set_bit(In_sync, &rdev->flags);
rdev->raid_disk = desc->raid_disk; rdev->raid_disk = desc->raid_disk;
} else if (desc->state & (1<<MD_DISK_ACTIVE)) {
/* active but not in sync implies recovery up to
* reshape position. We don't know exactly where
* that is, so set to zero for now */
if (mddev->minor_version >= 91) {
rdev->recovery_offset = 0;
rdev->raid_disk = desc->raid_disk;
}
} }
if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
set_bit(WriteMostly, &rdev->flags); set_bit(WriteMostly, &rdev->flags);
...@@ -1032,8 +1040,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1032,8 +1040,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
list_for_each_entry(rdev2, &mddev->disks, same_set) { list_for_each_entry(rdev2, &mddev->disks, same_set) {
mdp_disk_t *d; mdp_disk_t *d;
int desc_nr; int desc_nr;
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) int is_active = test_bit(In_sync, &rdev2->flags);
&& !test_bit(Faulty, &rdev2->flags))
if (rdev2->raid_disk >= 0 &&
sb->minor_version >= 91)
/* we have nowhere to store the recovery_offset,
* but if it is not below the reshape_position,
* we can piggy-back on that.
*/
is_active = 1;
if (rdev2->raid_disk < 0 ||
test_bit(Faulty, &rdev2->flags))
is_active = 0;
if (is_active)
desc_nr = rdev2->raid_disk; desc_nr = rdev2->raid_disk;
else else
desc_nr = next_spare++; desc_nr = next_spare++;
...@@ -1043,16 +1062,16 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1043,16 +1062,16 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
d->number = rdev2->desc_nr; d->number = rdev2->desc_nr;
d->major = MAJOR(rdev2->bdev->bd_dev); d->major = MAJOR(rdev2->bdev->bd_dev);
d->minor = MINOR(rdev2->bdev->bd_dev); d->minor = MINOR(rdev2->bdev->bd_dev);
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) if (is_active)
&& !test_bit(Faulty, &rdev2->flags))
d->raid_disk = rdev2->raid_disk; d->raid_disk = rdev2->raid_disk;
else else
d->raid_disk = rdev2->desc_nr; /* compatibility */ d->raid_disk = rdev2->desc_nr; /* compatibility */
if (test_bit(Faulty, &rdev2->flags)) if (test_bit(Faulty, &rdev2->flags))
d->state = (1<<MD_DISK_FAULTY); d->state = (1<<MD_DISK_FAULTY);
else if (test_bit(In_sync, &rdev2->flags)) { else if (is_active) {
d->state = (1<<MD_DISK_ACTIVE); d->state = (1<<MD_DISK_ACTIVE);
d->state |= (1<<MD_DISK_SYNC); if (test_bit(In_sync, &rdev2->flags))
d->state |= (1<<MD_DISK_SYNC);
active++; active++;
working++; working++;
} else { } else {
...@@ -1382,8 +1401,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1382,8 +1401,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags)) { !test_bit(In_sync, &rdev->flags)) {
if (mddev->curr_resync_completed > rdev->recovery_offset)
rdev->recovery_offset = mddev->curr_resync_completed;
if (rdev->recovery_offset > 0) { if (rdev->recovery_offset > 0) {
sb->feature_map |= sb->feature_map |=
cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
...@@ -1917,6 +1934,14 @@ static void sync_sbs(mddev_t * mddev, int nospares) ...@@ -1917,6 +1934,14 @@ static void sync_sbs(mddev_t * mddev, int nospares)
*/ */
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
/* First make sure individual recovery_offsets are correct */
list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
mddev->curr_resync_completed > rdev->recovery_offset)
rdev->recovery_offset = mddev->curr_resync_completed;
}
list_for_each_entry(rdev, &mddev->disks, same_set) { list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->sb_events == mddev->events || if (rdev->sb_events == mddev->events ||
(nospares && (nospares &&
......
...@@ -4823,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) ...@@ -4823,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
{
switch (algo) {
case ALGORITHM_PARITY_0:
if (raid_disk < max_degraded)
return 1;
break;
case ALGORITHM_PARITY_N:
if (raid_disk >= raid_disks - max_degraded)
return 1;
break;
case ALGORITHM_PARITY_0_6:
if (raid_disk == 0 ||
raid_disk == raid_disks - 1)
return 1;
break;
case ALGORITHM_LEFT_ASYMMETRIC_6:
case ALGORITHM_RIGHT_ASYMMETRIC_6:
case ALGORITHM_LEFT_SYMMETRIC_6:
case ALGORITHM_RIGHT_SYMMETRIC_6:
if (raid_disk == raid_disks - 1)
return 1;
}
return 0;
}
static int run(mddev_t *mddev) static int run(mddev_t *mddev)
{ {
raid5_conf_t *conf; raid5_conf_t *conf;
int working_disks = 0, chunk_size; int working_disks = 0, chunk_size;
int dirty_parity_disks = 0;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
sector_t reshape_offset = 0;
if (mddev->recovery_cp != MaxSector) if (mddev->recovery_cp != MaxSector)
printk(KERN_NOTICE "raid5: %s is not clean" printk(KERN_NOTICE "raid5: %s is not clean"
...@@ -4861,6 +4890,7 @@ static int run(mddev_t *mddev) ...@@ -4861,6 +4890,7 @@ static int run(mddev_t *mddev)
"on a stripe boundary\n"); "on a stripe boundary\n");
return -EINVAL; return -EINVAL;
} }
reshape_offset = here_new * mddev->new_chunk_sectors;
/* here_new is the stripe we will write to */ /* here_new is the stripe we will write to */
here_old = mddev->reshape_position; here_old = mddev->reshape_position;
sector_div(here_old, mddev->chunk_sectors * sector_div(here_old, mddev->chunk_sectors *
...@@ -4916,10 +4946,51 @@ static int run(mddev_t *mddev) ...@@ -4916,10 +4946,51 @@ static int run(mddev_t *mddev)
/* /*
* 0 for a fully functional array, 1 or 2 for a degraded array. * 0 for a fully functional array, 1 or 2 for a degraded array.
*/ */
list_for_each_entry(rdev, &mddev->disks, same_set) list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk >= 0 && if (rdev->raid_disk < 0)
test_bit(In_sync, &rdev->flags)) continue;
if (test_bit(In_sync, &rdev->flags))
working_disks++; working_disks++;
/* This disc is not fully in-sync. However if it
* just stored parity (beyond the recovery_offset),
* when we don't need to be concerned about the
* array being dirty.
* When reshape goes 'backwards', we never have
* partially completed devices, so we only need
* to worry about reshape going forwards.
*/
/* Hack because v0.91 doesn't store recovery_offset properly. */
if (mddev->major_version == 0 &&
mddev->minor_version > 90)
rdev->recovery_offset = reshape_offset;
printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n",
rdev->raid_disk, working_disks, conf->prev_algo,
conf->previous_raid_disks, conf->max_degraded,
conf->algorithm, conf->raid_disks,
only_parity(rdev->raid_disk,
conf->prev_algo,
conf->previous_raid_disks,
conf->max_degraded),
only_parity(rdev->raid_disk,
conf->algorithm,
conf->raid_disks,
conf->max_degraded));
if (rdev->recovery_offset < reshape_offset) {
/* We need to check old and new layout */
if (!only_parity(rdev->raid_disk,
conf->algorithm,
conf->raid_disks,
conf->max_degraded))
continue;
}
if (!only_parity(rdev->raid_disk,
conf->prev_algo,
conf->previous_raid_disks,
conf->max_degraded))
continue;
dirty_parity_disks++;
}
mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks) mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
- working_disks); - working_disks);
...@@ -4935,7 +5006,7 @@ static int run(mddev_t *mddev) ...@@ -4935,7 +5006,7 @@ static int run(mddev_t *mddev)
mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
mddev->resync_max_sectors = mddev->dev_sectors; mddev->resync_max_sectors = mddev->dev_sectors;
if (mddev->degraded > 0 && if (mddev->degraded > dirty_parity_disks &&
mddev->recovery_cp != MaxSector) { mddev->recovery_cp != MaxSector) {
if (mddev->ok_start_degraded) if (mddev->ok_start_degraded)
printk(KERN_WARNING printk(KERN_WARNING
...@@ -5361,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -5361,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev)
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
if (raid5_add_disk(mddev, rdev) == 0) { if (raid5_add_disk(mddev, rdev) == 0) {
char nm[20]; char nm[20];
set_bit(In_sync, &rdev->flags); if (rdev->raid_disk >= conf->previous_raid_disks)
set_bit(In_sync, &rdev->flags);
else
rdev->recovery_offset = 0;
added_devices++; added_devices++;
rdev->recovery_offset = 0;
sprintf(nm, "rd%d", rdev->raid_disk); sprintf(nm, "rd%d", rdev->raid_disk);
if (sysfs_create_link(&mddev->kobj, if (sysfs_create_link(&mddev->kobj,
&rdev->kobj, nm)) &rdev->kobj, nm))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment