Commit 3e98f2fb authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] md: Keep track of number of pending requests on each component device on an MD array

This will allow us to know, in the event of a device failure, when the
device is completely unused and so can be disconnected from the
array.  Currently this isn't a problem as drives aren't normally disconnect
until after a repacement has been rebuilt, which is a LONG TIME, but that
will change shortly...

We always increment the count under a spinlock after checking that
it hasn't been disconnected already (rdev!= NULL).
We disconnect under the same spinlock after checking that the
count is zero.
parent fff3986f
...@@ -998,6 +998,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int on_disk) ...@@ -998,6 +998,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int on_disk)
} }
rdev->desc_nr = -1; rdev->desc_nr = -1;
rdev->faulty = 0; rdev->faulty = 0;
atomic_set(&rdev->nr_pending, 0);
size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
if (!size) { if (!size) {
......
...@@ -70,7 +70,7 @@ static void mp_pool_free(void *mpb, void *data) ...@@ -70,7 +70,7 @@ static void mp_pool_free(void *mpb, void *data)
kfree(mpb); kfree(mpb);
} }
static int multipath_map (mddev_t *mddev, struct block_device **bdev) static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdev)
{ {
multipath_conf_t *conf = mddev_to_conf(mddev); multipath_conf_t *conf = mddev_to_conf(mddev);
int i, disks = MD_SB_DISKS; int i, disks = MD_SB_DISKS;
...@@ -80,12 +80,17 @@ static int multipath_map (mddev_t *mddev, struct block_device **bdev) ...@@ -80,12 +80,17 @@ static int multipath_map (mddev_t *mddev, struct block_device **bdev)
* now we use the first available disk. * now we use the first available disk.
*/ */
spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
if (conf->multipaths[i].operational) { if (conf->multipaths[i].operational &&
*bdev = conf->multipaths[i].rdev->bdev; conf->multipaths[i].rdev) {
return (0); *rdev = conf->multipaths[i].rdev;
atomic_inc(&(*rdev)->nr_pending);
spin_unlock_irq(&conf->device_lock);
return 0;
} }
} }
spin_unlock_irq(&conf->device_lock);
printk (KERN_ERR "multipath_map(): no more operational IO paths?\n"); printk (KERN_ERR "multipath_map(): no more operational IO paths?\n");
return (-1); return (-1);
...@@ -126,21 +131,21 @@ void multipath_end_request(struct bio *bio) ...@@ -126,21 +131,21 @@ void multipath_end_request(struct bio *bio)
{ {
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private); struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private);
multipath_conf_t *conf; multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev);
mdk_rdev_t *rdev; mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev;
if (uptodate) {
if (uptodate)
multipath_end_bh_io(mp_bh, uptodate); multipath_end_bh_io(mp_bh, uptodate);
return; else {
/*
* oops, IO error:
*/
md_error (mp_bh->mddev, rdev);
printk(KERN_ERR "multipath: %s: rescheduling sector %lu\n",
bdev_partition_name(rdev->bdev), bio->bi_sector);
multipath_reschedule_retry(mp_bh);
} }
/* atomic_dec(&rdev->nr_pending);
* oops, IO error:
*/
conf = mddev_to_conf(mp_bh->mddev);
rdev = conf->multipaths[mp_bh->path].rdev;
md_error (mp_bh->mddev, rdev);
printk(KERN_ERR "multipath: %s: rescheduling sector %lu\n",
bdev_partition_name(rdev->bdev), bio->bi_sector);
multipath_reschedule_retry(mp_bh);
return; return;
} }
...@@ -154,7 +159,8 @@ static int multipath_read_balance (multipath_conf_t *conf) ...@@ -154,7 +159,8 @@ static int multipath_read_balance (multipath_conf_t *conf)
int disk; int disk;
for (disk = 0; disk < MD_SB_DISKS; disk++) for (disk = 0; disk < MD_SB_DISKS; disk++)
if (conf->multipaths[disk].operational) if (conf->multipaths[disk].operational &&
conf->multipaths[disk].rdev)
return disk; return disk;
BUG(); BUG();
return 0; return 0;
...@@ -175,8 +181,11 @@ static int multipath_make_request (request_queue_t *q, struct bio * bio) ...@@ -175,8 +181,11 @@ static int multipath_make_request (request_queue_t *q, struct bio * bio)
/* /*
* read balancing logic: * read balancing logic:
*/ */
spin_lock_irq(&conf->device_lock);
mp_bh->path = multipath_read_balance(conf); mp_bh->path = multipath_read_balance(conf);
multipath = conf->multipaths + mp_bh->path; multipath = conf->multipaths + mp_bh->path;
atomic_inc(&multipath->rdev->nr_pending);
spin_unlock_irq(&conf->device_lock);
mp_bh->bio = *bio; mp_bh->bio = *bio;
mp_bh->bio.bi_bdev = multipath->rdev->bdev; mp_bh->bio.bi_bdev = multipath->rdev->bdev;
...@@ -321,7 +330,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number) ...@@ -321,7 +330,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
if (p->used_slot) { if (p->used_slot) {
if (p->operational) { if (p->operational ||
(p->rdev && atomic_read(&p->rdev->nr_pending))) {
printk(KERN_ERR "hot-remove-disk, slot %d is identified but is still operational!\n", number); printk(KERN_ERR "hot-remove-disk, slot %d is identified but is still operational!\n", number);
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
...@@ -359,7 +369,7 @@ static void multipathd (void *data) ...@@ -359,7 +369,7 @@ static void multipathd (void *data)
struct bio *bio; struct bio *bio;
unsigned long flags; unsigned long flags;
mddev_t *mddev; mddev_t *mddev;
struct block_device *bdev; mdk_rdev_t *rdev;
for (;;) { for (;;) {
spin_lock_irqsave(&retry_list_lock, flags); spin_lock_irqsave(&retry_list_lock, flags);
...@@ -372,16 +382,16 @@ static void multipathd (void *data) ...@@ -372,16 +382,16 @@ static void multipathd (void *data)
mddev = mp_bh->mddev; mddev = mp_bh->mddev;
bio = &mp_bh->bio; bio = &mp_bh->bio;
bio->bi_sector = mp_bh->master_bio->bi_sector; bio->bi_sector = mp_bh->master_bio->bi_sector;
bdev = bio->bi_bdev;
multipath_map (mddev, &bio->bi_bdev); rdev = NULL;
if (bio->bi_bdev == bdev) { if (multipath_map (mddev, &rdev)<0) {
printk(IO_ERROR, printk(IO_ERROR,
bdev_partition_name(bio->bi_bdev), bio->bi_sector); bdev_partition_name(bio->bi_bdev), bio->bi_sector);
multipath_end_bh_io(mp_bh, 0); multipath_end_bh_io(mp_bh, 0);
} else { } else {
printk(REDIRECT_SECTOR, printk(REDIRECT_SECTOR,
bdev_partition_name(bio->bi_bdev), bio->bi_sector); bdev_partition_name(bio->bi_bdev), bio->bi_sector);
bio->bi_bdev = rdev->bdev;
generic_make_request(bio); generic_make_request(bio);
} }
} }
......
...@@ -188,7 +188,7 @@ static inline void put_buf(r1bio_t *r1_bio) ...@@ -188,7 +188,7 @@ static inline void put_buf(r1bio_t *r1_bio)
mempool_free(r1_bio, conf->r1buf_pool); mempool_free(r1_bio, conf->r1buf_pool);
} }
static int map(mddev_t *mddev, struct block_device **bdev) static int map(mddev_t *mddev, mdk_rdev_t **rdev)
{ {
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
int i, disks = MD_SB_DISKS; int i, disks = MD_SB_DISKS;
...@@ -198,12 +198,17 @@ static int map(mddev_t *mddev, struct block_device **bdev) ...@@ -198,12 +198,17 @@ static int map(mddev_t *mddev, struct block_device **bdev)
* now we use the first available disk. * now we use the first available disk.
*/ */
spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
if (conf->mirrors[i].operational) { if (conf->mirrors[i].operational &&
*bdev = conf->mirrors[i].rdev->bdev; conf->mirrors[i].rdev) {
*rdev = conf->mirrors[i].rdev;
atomic_inc(&(*rdev)->nr_pending);
spin_unlock_irq(&conf->device_lock);
return 0; return 0;
} }
} }
spin_unlock_irq(&conf->device_lock);
printk (KERN_ERR "raid1_map(): huh, no more operational devices?\n"); printk (KERN_ERR "raid1_map(): huh, no more operational devices?\n");
return -1; return -1;
...@@ -244,7 +249,6 @@ static void inline update_head_pos(int disk, r1bio_t *r1_bio) ...@@ -244,7 +249,6 @@ static void inline update_head_pos(int disk, r1bio_t *r1_bio)
conf->mirrors[disk].head_position = conf->mirrors[disk].head_position =
r1_bio->sector + (r1_bio->master_bio->bi_size >> 9); r1_bio->sector + (r1_bio->master_bio->bi_size >> 9);
atomic_dec(&conf->mirrors[disk].nr_pending);
} }
static void end_request(struct bio *bio) static void end_request(struct bio *bio)
...@@ -285,29 +289,30 @@ static void end_request(struct bio *bio) ...@@ -285,29 +289,30 @@ static void end_request(struct bio *bio)
/* /*
* we have only one bio on the read side * we have only one bio on the read side
*/ */
if (uptodate) { if (uptodate)
raid_end_bio_io(r1_bio, uptodate); raid_end_bio_io(r1_bio, uptodate);
return; else {
/*
* oops, read error:
*/
printk(KERN_ERR "raid1: %s: rescheduling sector %lu\n",
bdev_partition_name(conf->mirrors[mirror].rdev->bdev), r1_bio->sector);
reschedule_retry(r1_bio);
} }
} else {
if (r1_bio->read_bio)
BUG();
/* /*
* oops, read error: * WRITE:
*
* Let's see if all mirrored write operations have finished
* already.
*/ */
printk(KERN_ERR "raid1: %s: rescheduling sector %lu\n", if (atomic_dec_and_test(&r1_bio->remaining))
bdev_partition_name(conf->mirrors[mirror].rdev->bdev), r1_bio->sector); raid_end_bio_io(r1_bio, uptodate);
reschedule_retry(r1_bio);
return;
} }
atomic_dec(&conf->mirrors[mirror].rdev->nr_pending);
if (r1_bio->read_bio)
BUG();
/*
* WRITE:
*
* Let's see if all mirrored write operations have finished
* already.
*/
if (atomic_dec_and_test(&r1_bio->remaining))
raid_end_bio_io(r1_bio, uptodate);
} }
/* /*
...@@ -321,6 +326,8 @@ static void end_request(struct bio *bio) ...@@ -321,6 +326,8 @@ static void end_request(struct bio *bio)
* *
* If there are 2 mirrors in the same 2 devices, performance degrades * If there are 2 mirrors in the same 2 devices, performance degrades
* because position is mirror, not device based. * because position is mirror, not device based.
*
* The rdev for the device selected will have nr_pending incremented.
*/ */
static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
{ {
...@@ -329,6 +336,7 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -329,6 +336,7 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
const int sectors = bio->bi_size >> 9; const int sectors = bio->bi_size >> 9;
sector_t new_distance, current_distance; sector_t new_distance, current_distance;
spin_lock_irq(&conf->device_lock);
/* /*
* Check if it if we can balance. We can balance on the whole * Check if it if we can balance. We can balance on the whole
* device if no resync is going on, or below the resync window. * device if no resync is going on, or below the resync window.
...@@ -382,7 +390,7 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -382,7 +390,7 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
(!conf->mirrors[disk].operational)) (!conf->mirrors[disk].operational))
continue; continue;
if (!atomic_read(&conf->mirrors[disk].nr_pending)) { if (!atomic_read(&conf->mirrors[disk].rdev->nr_pending)) {
new_disk = disk; new_disk = disk;
break; break;
} }
...@@ -399,6 +407,10 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -399,6 +407,10 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
conf->last_used = new_disk; conf->last_used = new_disk;
if (conf->mirrors[new_disk].rdev)
atomic_inc(&conf->mirrors[new_disk].rdev->nr_pending);
spin_unlock_irq(&conf->device_lock);
return new_disk; return new_disk;
} }
...@@ -484,21 +496,32 @@ static int make_request(request_queue_t *q, struct bio * bio) ...@@ -484,21 +496,32 @@ static int make_request(request_queue_t *q, struct bio * bio)
read_bio->bi_private = r1_bio; read_bio->bi_private = r1_bio;
generic_make_request(read_bio); generic_make_request(read_bio);
atomic_inc(&conf->mirrors[r1_bio->read_disk].nr_pending);
return 0; return 0;
} }
/* /*
* WRITE: * WRITE:
*/ */
/* first select target devices under spinlock and
* inc refcount on their rdev. Record them by setting
* write_bios[x] to bio
*/
spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks; i++) {
if (conf->mirrors[i].operational &&
conf->mirrors[i].rdev) {
atomic_inc(&conf->mirrors[i].rdev->nr_pending);
r1_bio->write_bios[i] = bio;
} else
r1_bio->write_bios[i] = NULL;
}
spin_unlock_irq(&conf->device_lock);
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
struct bio *mbio; struct bio *mbio;
if (!conf->mirrors[i].operational) if (!r1_bio->write_bios[i])
continue; continue;
mbio = bio_clone(bio, GFP_NOIO); mbio = bio_clone(bio, GFP_NOIO);
if (r1_bio->write_bios[i])
BUG();
r1_bio->write_bios[i] = mbio; r1_bio->write_bios[i] = mbio;
mbio->bi_sector = r1_bio->sector; mbio->bi_sector = r1_bio->sector;
...@@ -536,7 +559,6 @@ static int make_request(request_queue_t *q, struct bio * bio) ...@@ -536,7 +559,6 @@ static int make_request(request_queue_t *q, struct bio * bio)
continue; continue;
generic_make_request(mbio); generic_make_request(mbio);
atomic_inc(&conf->mirrors[i].nr_pending);
} }
return 0; return 0;
} }
...@@ -817,7 +839,8 @@ static int raid1_remove_disk(mddev_t *mddev, int number) ...@@ -817,7 +839,8 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
print_conf(conf); print_conf(conf);
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
if (p->used_slot) { if (p->used_slot) {
if (p->operational) { if (p->operational ||
(p->rdev && atomic_read(&p->rdev->nr_pending))) {
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
...@@ -859,6 +882,7 @@ static void end_sync_read(struct bio *bio) ...@@ -859,6 +882,7 @@ static void end_sync_read(struct bio *bio)
conf->mirrors[r1_bio->read_disk].rdev); conf->mirrors[r1_bio->read_disk].rdev);
else else
set_bit(R1BIO_Uptodate, &r1_bio->state); set_bit(R1BIO_Uptodate, &r1_bio->state);
atomic_dec(&conf->mirrors[r1_bio->read_disk].rdev->nr_pending);
reschedule_retry(r1_bio); reschedule_retry(r1_bio);
} }
...@@ -885,6 +909,7 @@ static void end_sync_write(struct bio *bio) ...@@ -885,6 +909,7 @@ static void end_sync_write(struct bio *bio)
resume_device(conf); resume_device(conf);
put_buf(r1_bio); put_buf(r1_bio);
} }
atomic_dec(&conf->mirrors[mirror].rdev->nr_pending);
} }
static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
...@@ -912,7 +937,9 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -912,7 +937,9 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
return; return;
} }
spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks ; i++) { for (i = 0; i < disks ; i++) {
r1_bio->write_bios[i] = NULL;
if (!conf->mirrors[i].operational) if (!conf->mirrors[i].operational)
continue; continue;
if (i == conf->last_used) if (i == conf->last_used)
...@@ -925,10 +952,17 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -925,10 +952,17 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
* don't need to write this we are just rebuilding * don't need to write this we are just rebuilding
*/ */
continue; continue;
if (!conf->mirrors[i].rdev)
continue;
atomic_inc(&conf->mirrors[i].rdev->nr_pending);
r1_bio->write_bios[i] = bio;
}
spin_unlock_irq(&conf->device_lock);
for (i = 0; i < disks ; i++) {
if (!r1_bio->write_bios[i])
continue;
mbio = bio_clone(bio, GFP_NOIO); mbio = bio_clone(bio, GFP_NOIO);
if (r1_bio->write_bios[i])
BUG();
r1_bio->write_bios[i] = mbio; r1_bio->write_bios[i] = mbio;
mbio->bi_bdev = conf->mirrors[i].rdev->bdev; mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
mbio->bi_sector = r1_bio->sector; mbio->bi_sector = r1_bio->sector;
...@@ -961,7 +995,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -961,7 +995,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
md_sync_acct(conf->mirrors[i].rdev, mbio->bi_size >> 9); md_sync_acct(conf->mirrors[i].rdev, mbio->bi_size >> 9);
generic_make_request(mbio); generic_make_request(mbio);
atomic_inc(&conf->mirrors[i].nr_pending);
} }
} }
...@@ -981,7 +1014,7 @@ static void raid1d(void *data) ...@@ -981,7 +1014,7 @@ static void raid1d(void *data)
unsigned long flags; unsigned long flags;
mddev_t *mddev; mddev_t *mddev;
conf_t *conf; conf_t *conf;
struct block_device *bdev; mdk_rdev_t *rdev;
for (;;) { for (;;) {
...@@ -1001,20 +1034,18 @@ static void raid1d(void *data) ...@@ -1001,20 +1034,18 @@ static void raid1d(void *data)
break; break;
case READ: case READ:
case READA: case READA:
bdev = bio->bi_bdev; if (map(mddev, &rdev) == -1) {
map(mddev, &bio->bi_bdev);
if (bio->bi_bdev == bdev) {
printk(IO_ERROR, bdev_partition_name(bio->bi_bdev), r1_bio->sector); printk(IO_ERROR, bdev_partition_name(bio->bi_bdev), r1_bio->sector);
raid_end_bio_io(r1_bio, 0); raid_end_bio_io(r1_bio, 0);
break; break;
} }
printk(REDIRECT_SECTOR, printk(REDIRECT_SECTOR,
bdev_partition_name(bio->bi_bdev), r1_bio->sector); bdev_partition_name(rdev->bdev), r1_bio->sector);
bio->bi_bdev = rdev->bdev;
bio->bi_sector = r1_bio->sector; bio->bi_sector = r1_bio->sector;
bio->bi_rw = r1_bio->cmd; bio->bi_rw = r1_bio->cmd;
generic_make_request(bio); generic_make_request(bio);
atomic_inc(&conf->mirrors[r1_bio->read_disk].nr_pending);
break; break;
} }
} }
...@@ -1080,7 +1111,9 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1080,7 +1111,9 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
*/ */
disk = conf->last_used; disk = conf->last_used;
/* make sure disk is operational */ /* make sure disk is operational */
while (!conf->mirrors[disk].operational) { spin_lock_irq(&conf->device_lock);
while (!conf->mirrors[disk].operational ||
!conf->mirrors[disk].rdev) {
if (disk <= 0) if (disk <= 0)
disk = conf->raid_disks; disk = conf->raid_disks;
disk--; disk--;
...@@ -1088,6 +1121,8 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1088,6 +1121,8 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
break; break;
} }
conf->last_used = disk; conf->last_used = disk;
atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
spin_unlock_irq(&conf->device_lock);
mirror = conf->mirrors + conf->last_used; mirror = conf->mirrors + conf->last_used;
...@@ -1130,7 +1165,6 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1130,7 +1165,6 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
md_sync_acct(mirror->rdev, nr_sectors); md_sync_acct(mirror->rdev, nr_sectors);
generic_make_request(read_bio); generic_make_request(read_bio);
atomic_inc(&conf->mirrors[conf->last_used].nr_pending);
return nr_sectors; return nr_sectors;
} }
......
...@@ -374,6 +374,7 @@ static void raid5_end_read_request (struct bio * bi) ...@@ -374,6 +374,7 @@ static void raid5_end_read_request (struct bio * bi)
md_error(conf->mddev, conf->disks[i].rdev); md_error(conf->mddev, conf->disks[i].rdev);
clear_bit(R5_UPTODATE, &sh->dev[i].flags); clear_bit(R5_UPTODATE, &sh->dev[i].flags);
} }
atomic_dec(&conf->disks[i].rdev->nr_pending);
#if 0 #if 0
/* must restore b_page before unlocking buffer... */ /* must restore b_page before unlocking buffer... */
if (sh->bh_page[i] != bh->b_page) { if (sh->bh_page[i] != bh->b_page) {
...@@ -408,6 +409,8 @@ static void raid5_end_write_request (struct bio *bi) ...@@ -408,6 +409,8 @@ static void raid5_end_write_request (struct bio *bi)
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
if (!uptodate) if (!uptodate)
md_error(conf->mddev, conf->disks[i].rdev); md_error(conf->mddev, conf->disks[i].rdev);
atomic_dec(&conf->disks[i].rdev->nr_pending);
clear_bit(R5_LOCKED, &sh->dev[i].flags); clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
...@@ -1161,18 +1164,26 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1161,18 +1164,26 @@ static void handle_stripe(struct stripe_head *sh)
for (i=disks; i-- ;) for (i=disks; i-- ;)
if (action[i]) { if (action[i]) {
struct bio *bi = &sh->dev[i].req; struct bio *bi = &sh->dev[i].req;
struct disk_info *spare = conf->spare;
int skip = 0; int skip = 0;
mdk_rdev_t *rdev = NULL;
if (action[i] == READ+1) if (action[i] == READ+1)
bi->bi_end_io = raid5_end_read_request; bi->bi_end_io = raid5_end_read_request;
else else
bi->bi_end_io = raid5_end_write_request; bi->bi_end_io = raid5_end_write_request;
spin_lock_irq(&conf->device_lock);
if (conf->disks[i].operational) if (conf->disks[i].operational)
bi->bi_bdev = conf->disks[i].rdev->bdev; rdev = conf->disks[i].rdev;
else if (spare && action[i] == WRITE+1) else if (conf->spare && action[i] == WRITE+1)
bi->bi_bdev = spare->rdev->bdev; rdev = conf->spare->rdev;
else skip=1; else skip=1;
if (rdev)
atomic_inc(&rdev->nr_pending);
else skip=1;
spin_unlock_irq(&conf->device_lock);
if (!skip) { if (!skip) {
bi->bi_bdev = rdev->bdev;
PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i); PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i);
atomic_inc(&sh->count); atomic_inc(&sh->count);
bi->bi_sector = sh->sector; bi->bi_sector = sh->sector;
...@@ -1772,7 +1783,8 @@ static int raid5_remove_disk(mddev_t *mddev, int number) ...@@ -1772,7 +1783,8 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
if (p->used_slot) { if (p->used_slot) {
if (p->operational) { if (p->operational ||
atomic_read(&p->rdev->nr_pending)) {
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
......
...@@ -160,6 +160,11 @@ struct mdk_rdev_s ...@@ -160,6 +160,11 @@ struct mdk_rdev_s
int desc_nr; /* descriptor index in the superblock */ int desc_nr; /* descriptor index in the superblock */
int raid_disk; /* role of device in array */ int raid_disk; /* role of device in array */
atomic_t nr_pending; /* number of pending requests.
* only maintained for arrays that
* support hot removal
*/
}; };
typedef struct mdk_personality_s mdk_personality_t; typedef struct mdk_personality_s mdk_personality_t;
......
...@@ -8,7 +8,6 @@ typedef struct mirror_info mirror_info_t; ...@@ -8,7 +8,6 @@ typedef struct mirror_info mirror_info_t;
struct mirror_info { struct mirror_info {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
sector_t head_position; sector_t head_position;
atomic_t nr_pending;
/* /*
* State bits: * State bits:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment