Commit ae8eed2d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  FIX: md: process hangs at wait_barrier after 0->10 takeover
  md_make_request: don't touch the bio after calling make_request
  md: Don't allow slot_store while resync/recovery is happening.
  md: don't clear curr_resync_completed at end of resync.
  md: Don't use remove_and_add_spares to remove failed devices from a read-only array
  Add raid1->raid0 takeover support
  md: Remove the AllReserved flag for component devices.
  md: don't abort checking spares as soon as one cannot be added.
  md: fix the test for finding spares in raid5_start_reshape.
  md: simplify some 'if' conditionals in raid5_start_reshape.
  md: revert change to raid_disks on failure.
parents 100b33c8 02214dc5
...@@ -287,6 +287,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio) ...@@ -287,6 +287,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
mddev_t *mddev = q->queuedata; mddev_t *mddev = q->queuedata;
int rv; int rv;
int cpu; int cpu;
unsigned int sectors;
if (mddev == NULL || mddev->pers == NULL if (mddev == NULL || mddev->pers == NULL
|| !mddev->ready) { || !mddev->ready) {
...@@ -311,12 +312,16 @@ static int md_make_request(struct request_queue *q, struct bio *bio) ...@@ -311,12 +312,16 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
atomic_inc(&mddev->active_io); atomic_inc(&mddev->active_io);
rcu_read_unlock(); rcu_read_unlock();
/*
* save the sectors now since our bio can
* go away inside make_request
*/
sectors = bio_sectors(bio);
rv = mddev->pers->make_request(mddev, bio); rv = mddev->pers->make_request(mddev, bio);
cpu = part_stat_lock(); cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
bio_sectors(bio));
part_stat_unlock(); part_stat_unlock();
if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
...@@ -1947,8 +1952,6 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) ...@@ -1947,8 +1952,6 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
__bdevname(dev, b)); __bdevname(dev, b));
return PTR_ERR(bdev); return PTR_ERR(bdev);
} }
if (!shared)
set_bit(AllReserved, &rdev->flags);
rdev->bdev = bdev; rdev->bdev = bdev;
return err; return err;
} }
...@@ -2465,6 +2468,9 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) ...@@ -2465,6 +2468,9 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
if (rdev->raid_disk != -1) if (rdev->raid_disk != -1)
return -EBUSY; return -EBUSY;
if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
return -EBUSY;
if (rdev->mddev->pers->hot_add_disk == NULL) if (rdev->mddev->pers->hot_add_disk == NULL)
return -EINVAL; return -EINVAL;
...@@ -2610,12 +2616,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) ...@@ -2610,12 +2616,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
mddev_lock(mddev); mddev_lock(mddev);
list_for_each_entry(rdev2, &mddev->disks, same_set) list_for_each_entry(rdev2, &mddev->disks, same_set)
if (test_bit(AllReserved, &rdev2->flags) || if (rdev->bdev == rdev2->bdev &&
(rdev->bdev == rdev2->bdev && rdev != rdev2 &&
rdev != rdev2 && overlaps(rdev->data_offset, rdev->sectors,
overlaps(rdev->data_offset, rdev->sectors, rdev2->data_offset,
rdev2->data_offset, rdev2->sectors)) {
rdev2->sectors))) {
overlap = 1; overlap = 1;
break; break;
} }
...@@ -5578,6 +5583,8 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks) ...@@ -5578,6 +5583,8 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks)
mddev->delta_disks = raid_disks - mddev->raid_disks; mddev->delta_disks = raid_disks - mddev->raid_disks;
rv = mddev->pers->check_reshape(mddev); rv = mddev->pers->check_reshape(mddev);
if (rv < 0)
mddev->delta_disks = 0;
return rv; return rv;
} }
...@@ -6985,9 +6992,6 @@ void md_do_sync(mddev_t *mddev) ...@@ -6985,9 +6992,6 @@ void md_do_sync(mddev_t *mddev)
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
mddev->resync_min = mddev->curr_resync_completed; mddev->resync_min = mddev->curr_resync_completed;
mddev->curr_resync = 0; mddev->curr_resync = 0;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
mddev->curr_resync_completed = 0;
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wake_up(&resync_wait); wake_up(&resync_wait);
set_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
...@@ -7028,7 +7032,7 @@ static int remove_and_add_spares(mddev_t *mddev) ...@@ -7028,7 +7032,7 @@ static int remove_and_add_spares(mddev_t *mddev)
} }
} }
if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) { if (mddev->degraded && !mddev->recovery_disabled) {
list_for_each_entry(rdev, &mddev->disks, same_set) { list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags) &&
...@@ -7151,7 +7155,20 @@ void md_check_recovery(mddev_t *mddev) ...@@ -7151,7 +7155,20 @@ void md_check_recovery(mddev_t *mddev)
/* Only thing we do on a ro array is remove /* Only thing we do on a ro array is remove
* failed devices. * failed devices.
*/ */
remove_and_add_spares(mddev); mdk_rdev_t *rdev;
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 &&
!test_bit(Blocked, &rdev->flags) &&
test_bit(Faulty, &rdev->flags) &&
atomic_read(&rdev->nr_pending)==0) {
if (mddev->pers->hot_remove_disk(
mddev, rdev->raid_disk)==0) {
char nm[20];
sprintf(nm,"rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
rdev->raid_disk = -1;
}
}
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock; goto unlock;
} }
......
...@@ -93,8 +93,6 @@ struct mdk_rdev_s ...@@ -93,8 +93,6 @@ struct mdk_rdev_s
#define Faulty 1 /* device is known to have a fault */ #define Faulty 1 /* device is known to have a fault */
#define In_sync 2 /* device is in_sync with rest of array */ #define In_sync 2 /* device is in_sync with rest of array */
#define WriteMostly 4 /* Avoid reading if at all possible */ #define WriteMostly 4 /* Avoid reading if at all possible */
#define AllReserved 6 /* If whole device is reserved for
* one array */
#define AutoDetected 7 /* added by auto-detect */ #define AutoDetected 7 /* added by auto-detect */
#define Blocked 8 /* An error occured on an externally #define Blocked 8 /* An error occured on an externally
* managed array, don't allow writes * managed array, don't allow writes
......
...@@ -179,6 +179,14 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf) ...@@ -179,6 +179,14 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
rdev1->new_raid_disk = j; rdev1->new_raid_disk = j;
} }
if (mddev->level == 1) {
/* taiking over a raid1 array-
* we have only one active disk
*/
j = 0;
rdev1->new_raid_disk = j;
}
if (j < 0 || j >= mddev->raid_disks) { if (j < 0 || j >= mddev->raid_disks) {
printk(KERN_ERR "md/raid0:%s: bad disk number %d - " printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
"aborting!\n", mdname(mddev), j); "aborting!\n", mdname(mddev), j);
...@@ -644,12 +652,38 @@ static void *raid0_takeover_raid10(mddev_t *mddev) ...@@ -644,12 +652,38 @@ static void *raid0_takeover_raid10(mddev_t *mddev)
return priv_conf; return priv_conf;
} }
static void *raid0_takeover_raid1(mddev_t *mddev)
{
raid0_conf_t *priv_conf;
/* Check layout:
* - (N - 1) mirror drives must be already faulty
*/
if ((mddev->raid_disks - 1) != mddev->degraded) {
printk(KERN_ERR "md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
mdname(mddev));
return ERR_PTR(-EINVAL);
}
/* Set new parameters */
mddev->new_level = 0;
mddev->new_layout = 0;
mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
mddev->delta_disks = 1 - mddev->raid_disks;
/* make sure it will be not marked as dirty */
mddev->recovery_cp = MaxSector;
create_strip_zones(mddev, &priv_conf);
return priv_conf;
}
static void *raid0_takeover(mddev_t *mddev) static void *raid0_takeover(mddev_t *mddev)
{ {
/* raid0 can take over: /* raid0 can take over:
* raid4 - if all data disks are active. * raid4 - if all data disks are active.
* raid5 - providing it is Raid4 layout and one disk is faulty * raid5 - providing it is Raid4 layout and one disk is faulty
* raid10 - assuming we have all necessary active disks * raid10 - assuming we have all necessary active disks
* raid1 - with (N -1) mirror drives faulty
*/ */
if (mddev->level == 4) if (mddev->level == 4)
return raid0_takeover_raid45(mddev); return raid0_takeover_raid45(mddev);
...@@ -665,6 +699,12 @@ static void *raid0_takeover(mddev_t *mddev) ...@@ -665,6 +699,12 @@ static void *raid0_takeover(mddev_t *mddev)
if (mddev->level == 10) if (mddev->level == 10)
return raid0_takeover_raid10(mddev); return raid0_takeover_raid10(mddev);
if (mddev->level == 1)
return raid0_takeover_raid1(mddev);
printk(KERN_ERR "Takeover from raid%i to raid0 not supported\n",
mddev->level);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
......
...@@ -2463,11 +2463,13 @@ static void *raid10_takeover_raid0(mddev_t *mddev) ...@@ -2463,11 +2463,13 @@ static void *raid10_takeover_raid0(mddev_t *mddev)
mddev->recovery_cp = MaxSector; mddev->recovery_cp = MaxSector;
conf = setup_conf(mddev); conf = setup_conf(mddev);
if (!IS_ERR(conf)) if (!IS_ERR(conf)) {
list_for_each_entry(rdev, &mddev->disks, same_set) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) if (rdev->raid_disk >= 0)
rdev->new_raid_disk = rdev->raid_disk * 2; rdev->new_raid_disk = rdev->raid_disk * 2;
conf->barrier = 1;
}
return conf; return conf;
} }
......
...@@ -5517,7 +5517,6 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -5517,7 +5517,6 @@ static int raid5_start_reshape(mddev_t *mddev)
raid5_conf_t *conf = mddev->private; raid5_conf_t *conf = mddev->private;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
int spares = 0; int spares = 0;
int added_devices = 0;
unsigned long flags; unsigned long flags;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
...@@ -5527,8 +5526,8 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -5527,8 +5526,8 @@ static int raid5_start_reshape(mddev_t *mddev)
return -ENOSPC; return -ENOSPC;
list_for_each_entry(rdev, &mddev->disks, same_set) list_for_each_entry(rdev, &mddev->disks, same_set)
if ((rdev->raid_disk < 0 || rdev->raid_disk >= conf->raid_disks) if (!test_bit(In_sync, &rdev->flags)
&& !test_bit(Faulty, &rdev->flags)) && !test_bit(Faulty, &rdev->flags))
spares++; spares++;
if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
...@@ -5571,34 +5570,35 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -5571,34 +5570,35 @@ static int raid5_start_reshape(mddev_t *mddev)
* to correctly record the "partially reconstructed" state of * to correctly record the "partially reconstructed" state of
* such devices during the reshape and confusion could result. * such devices during the reshape and confusion could result.
*/ */
if (mddev->delta_disks >= 0) if (mddev->delta_disks >= 0) {
list_for_each_entry(rdev, &mddev->disks, same_set) int added_devices = 0;
if (rdev->raid_disk < 0 && list_for_each_entry(rdev, &mddev->disks, same_set)
!test_bit(Faulty, &rdev->flags)) { if (rdev->raid_disk < 0 &&
if (raid5_add_disk(mddev, rdev) == 0) { !test_bit(Faulty, &rdev->flags)) {
char nm[20]; if (raid5_add_disk(mddev, rdev) == 0) {
if (rdev->raid_disk >= conf->previous_raid_disks) { char nm[20];
set_bit(In_sync, &rdev->flags); if (rdev->raid_disk
added_devices++; >= conf->previous_raid_disks) {
} else set_bit(In_sync, &rdev->flags);
rdev->recovery_offset = 0; added_devices++;
sprintf(nm, "rd%d", rdev->raid_disk); } else
if (sysfs_create_link(&mddev->kobj, rdev->recovery_offset = 0;
&rdev->kobj, nm)) sprintf(nm, "rd%d", rdev->raid_disk);
/* Failure here is OK */; if (sysfs_create_link(&mddev->kobj,
} else &rdev->kobj, nm))
break; /* Failure here is OK */;
} else if (rdev->raid_disk >= conf->previous_raid_disks }
&& !test_bit(Faulty, &rdev->flags)) { } else if (rdev->raid_disk >= conf->previous_raid_disks
/* This is a spare that was manually added */ && !test_bit(Faulty, &rdev->flags)) {
set_bit(In_sync, &rdev->flags); /* This is a spare that was manually added */
added_devices++; set_bit(In_sync, &rdev->flags);
} added_devices++;
}
/* When a reshape changes the number of devices, ->degraded /* When a reshape changes the number of devices,
* is measured against the larger of the pre and post number of * ->degraded is measured against the larger of the
* devices.*/ * pre and post number of devices.
if (mddev->delta_disks > 0) { */
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded += (conf->raid_disks - conf->previous_raid_disks) mddev->degraded += (conf->raid_disks - conf->previous_raid_disks)
- added_devices; - added_devices;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment