Commit c04be0aa authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: Improve locking around error handling

The error handling routines don't use proper locking, and so two concurrent
errors could trigger a problem.

So:
  - use test-and-set and test-and-clear to synchonise
    the In_sync bits with the ->degraded count
  - use the spinlock to protect updates to the
    degraded count (could use an atomic_t but that
    would be a bigger change in code, and isn't
    really justified)
  - remove un-necessary locking in raid5
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 11ce99e6
...@@ -959,14 +959,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -959,14 +959,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
* normal single drive * normal single drive
*/ */
return; return;
if (test_bit(In_sync, &rdev->flags)) { if (test_and_clear_bit(In_sync, &rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++; mddev->degraded++;
spin_unlock_irqrestore(&conf->device_lock, flags);
/* /*
* if recovery is running, make sure it aborts. * if recovery is running, make sure it aborts.
*/ */
set_bit(MD_RECOVERY_ERR, &mddev->recovery); set_bit(MD_RECOVERY_ERR, &mddev->recovery);
} }
clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n" printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
...@@ -1022,9 +1024,11 @@ static int raid1_spare_active(mddev_t *mddev) ...@@ -1022,9 +1024,11 @@ static int raid1_spare_active(mddev_t *mddev)
mdk_rdev_t *rdev = conf->mirrors[i].rdev; mdk_rdev_t *rdev = conf->mirrors[i].rdev;
if (rdev if (rdev
&& !test_bit(Faulty, &rdev->flags) && !test_bit(Faulty, &rdev->flags)
&& !test_bit(In_sync, &rdev->flags)) { && !test_and_set_bit(In_sync, &rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded--; mddev->degraded--;
set_bit(In_sync, &rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags);
} }
} }
...@@ -2048,7 +2052,7 @@ static int raid1_reshape(mddev_t *mddev) ...@@ -2048,7 +2052,7 @@ static int raid1_reshape(mddev_t *mddev)
mirror_info_t *newmirrors; mirror_info_t *newmirrors;
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
int cnt, raid_disks; int cnt, raid_disks;
unsigned long flags;
int d, d2; int d, d2;
/* Cannot change chunk_size, layout, or level */ /* Cannot change chunk_size, layout, or level */
...@@ -2107,7 +2111,9 @@ static int raid1_reshape(mddev_t *mddev) ...@@ -2107,7 +2111,9 @@ static int raid1_reshape(mddev_t *mddev)
kfree(conf->poolinfo); kfree(conf->poolinfo);
conf->poolinfo = newpoolinfo; conf->poolinfo = newpoolinfo;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded += (raid_disks - conf->raid_disks); mddev->degraded += (raid_disks - conf->raid_disks);
spin_unlock_irqrestore(&conf->device_lock, flags);
conf->raid_disks = mddev->raid_disks = raid_disks; conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0; mddev->delta_disks = 0;
......
...@@ -950,14 +950,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -950,14 +950,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
* really dead" tests... * really dead" tests...
*/ */
return; return;
if (test_bit(In_sync, &rdev->flags)) { if (test_and_clear_bit(In_sync, &rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++; mddev->degraded++;
spin_unlock_irqrestore(&conf->device_lock, flags);
/* /*
* if recovery is running, make sure it aborts. * if recovery is running, make sure it aborts.
*/ */
set_bit(MD_RECOVERY_ERR, &mddev->recovery); set_bit(MD_RECOVERY_ERR, &mddev->recovery);
} }
clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
...@@ -1033,9 +1035,11 @@ static int raid10_spare_active(mddev_t *mddev) ...@@ -1033,9 +1035,11 @@ static int raid10_spare_active(mddev_t *mddev)
tmp = conf->mirrors + i; tmp = conf->mirrors + i;
if (tmp->rdev if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags) && !test_bit(Faulty, &tmp->rdev->flags)
&& !test_bit(In_sync, &tmp->rdev->flags)) { && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded--; mddev->degraded--;
set_bit(In_sync, &tmp->rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags);
} }
} }
......
...@@ -636,7 +636,6 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done, ...@@ -636,7 +636,6 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
struct stripe_head *sh = bi->bi_private; struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i; int disks = sh->disks, i;
unsigned long flags;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
if (bi->bi_size) if (bi->bi_size)
...@@ -654,7 +653,6 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done, ...@@ -654,7 +653,6 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
return 0; return 0;
} }
spin_lock_irqsave(&conf->device_lock, flags);
if (!uptodate) if (!uptodate)
md_error(conf->mddev, conf->disks[i].rdev); md_error(conf->mddev, conf->disks[i].rdev);
...@@ -662,8 +660,7 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done, ...@@ -662,8 +660,7 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
clear_bit(R5_LOCKED, &sh->dev[i].flags); clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
__release_stripe(conf, sh); release_stripe(sh);
spin_unlock_irqrestore(&conf->device_lock, flags);
return 0; return 0;
} }
...@@ -697,9 +694,11 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -697,9 +694,11 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
if (!test_bit(Faulty, &rdev->flags)) { if (!test_bit(Faulty, &rdev->flags)) {
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
if (test_bit(In_sync, &rdev->flags)) { if (test_and_clear_bit(In_sync, &rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++; mddev->degraded++;
clear_bit(In_sync, &rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags);
/* /*
* if recovery was running, make sure it aborts. * if recovery was running, make sure it aborts.
*/ */
...@@ -3418,9 +3417,11 @@ static int raid5_spare_active(mddev_t *mddev) ...@@ -3418,9 +3417,11 @@ static int raid5_spare_active(mddev_t *mddev)
tmp = conf->disks + i; tmp = conf->disks + i;
if (tmp->rdev if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags) && !test_bit(Faulty, &tmp->rdev->flags)
&& !test_bit(In_sync, &tmp->rdev->flags)) { && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded--; mddev->degraded--;
set_bit(In_sync, &tmp->rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags);
} }
} }
print_raid5_conf(conf); print_raid5_conf(conf);
...@@ -3556,6 +3557,7 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -3556,6 +3557,7 @@ static int raid5_start_reshape(mddev_t *mddev)
struct list_head *rtmp; struct list_head *rtmp;
int spares = 0; int spares = 0;
int added_devices = 0; int added_devices = 0;
unsigned long flags;
if (mddev->degraded || if (mddev->degraded ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
...@@ -3597,7 +3599,9 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -3597,7 +3599,9 @@ static int raid5_start_reshape(mddev_t *mddev)
break; break;
} }
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices; mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
spin_unlock_irqrestore(&conf->device_lock, flags);
mddev->raid_disks = conf->raid_disks; mddev->raid_disks = conf->raid_disks;
mddev->reshape_position = 0; mddev->reshape_position = 0;
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment