lab.nexedi.com will be down from Thursday, 20 March 2025, 07:30:00 UTC for a duration of approximately 2 hours

Commit 3e198f78 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: tidyup some issues with raid1 resync and prepare for catching read errors

We are dereferencing ->rdev without an rcu lock!
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent ddaf22ab
...@@ -177,6 +177,13 @@ static inline void free_r1bio(r1bio_t *r1_bio) ...@@ -177,6 +177,13 @@ static inline void free_r1bio(r1bio_t *r1_bio)
static inline void put_buf(r1bio_t *r1_bio) static inline void put_buf(r1bio_t *r1_bio)
{ {
conf_t *conf = mddev_to_conf(r1_bio->mddev); conf_t *conf = mddev_to_conf(r1_bio->mddev);
int i;
for (i=0; i<conf->raid_disks; i++) {
struct bio *bio = r1_bio->bios[i];
if (bio->bi_end_io)
rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
}
mempool_free(r1_bio, conf->r1buf_pool); mempool_free(r1_bio, conf->r1buf_pool);
...@@ -1085,7 +1092,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) ...@@ -1085,7 +1092,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
conf->mirrors[r1_bio->read_disk].rdev); conf->mirrors[r1_bio->read_disk].rdev);
} else } else
set_bit(R1BIO_Uptodate, &r1_bio->state); set_bit(R1BIO_Uptodate, &r1_bio->state);
rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
reschedule_retry(r1_bio); reschedule_retry(r1_bio);
return 0; return 0;
} }
...@@ -1116,7 +1122,6 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) ...@@ -1116,7 +1122,6 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
md_done_sync(mddev, r1_bio->sectors, uptodate); md_done_sync(mddev, r1_bio->sectors, uptodate);
put_buf(r1_bio); put_buf(r1_bio);
} }
rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
return 0; return 0;
} }
...@@ -1153,10 +1158,14 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1153,10 +1158,14 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->remaining, 1);
for (i = 0; i < disks ; i++) { for (i = 0; i < disks ; i++) {
wbio = r1_bio->bios[i]; wbio = r1_bio->bios[i];
if (wbio->bi_end_io != end_sync_write) if (wbio->bi_end_io == NULL ||
(wbio->bi_end_io == end_sync_read &&
(i == r1_bio->read_disk ||
!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
continue; continue;
atomic_inc(&conf->mirrors[i].rdev->nr_pending); wbio->bi_rw = WRITE;
wbio->bi_end_io = end_sync_write;
atomic_inc(&r1_bio->remaining); atomic_inc(&r1_bio->remaining);
md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
...@@ -1388,14 +1397,13 @@ static int init_resync(conf_t *conf) ...@@ -1388,14 +1397,13 @@ static int init_resync(conf_t *conf)
static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
{ {
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
mirror_info_t *mirror;
r1bio_t *r1_bio; r1bio_t *r1_bio;
struct bio *bio; struct bio *bio;
sector_t max_sector, nr_sectors; sector_t max_sector, nr_sectors;
int disk; int disk = -1;
int i; int i;
int wonly; int wonly = -1;
int write_targets = 0; int write_targets = 0, read_targets = 0;
int sync_blocks; int sync_blocks;
int still_degraded = 0; int still_degraded = 0;
...@@ -1447,44 +1455,24 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i ...@@ -1447,44 +1455,24 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
conf->next_resync = sector_nr; conf->next_resync = sector_nr;
r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
rcu_read_lock();
/* /*
* If reconstructing, and >1 working disc, * If we get a correctably read error during resync or recovery,
* could dedicate one to rebuild and others to * we might want to read from a different device. So we
* service read requests .. * flag all drives that could conceivably be read from for READ,
* and any others (which will be non-In_sync devices) for WRITE.
* If a read fails, we try reading from something else for which READ
* is OK.
*/ */
disk = conf->last_used;
/* make sure disk is operational */
wonly = disk;
while (conf->mirrors[disk].rdev == NULL ||
!test_bit(In_sync, &conf->mirrors[disk].rdev->flags) ||
test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags)
) {
if (conf->mirrors[disk].rdev &&
test_bit(In_sync, &conf->mirrors[disk].rdev->flags))
wonly = disk;
if (disk <= 0)
disk = conf->raid_disks;
disk--;
if (disk == conf->last_used) {
disk = wonly;
break;
}
}
conf->last_used = disk;
atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
mirror = conf->mirrors + disk;
r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
r1_bio->mddev = mddev; r1_bio->mddev = mddev;
r1_bio->sector = sector_nr; r1_bio->sector = sector_nr;
r1_bio->state = 0; r1_bio->state = 0;
set_bit(R1BIO_IsSync, &r1_bio->state); set_bit(R1BIO_IsSync, &r1_bio->state);
r1_bio->read_disk = disk;
for (i=0; i < conf->raid_disks; i++) { for (i=0; i < conf->raid_disks; i++) {
mdk_rdev_t *rdev;
bio = r1_bio->bios[i]; bio = r1_bio->bios[i];
/* take from bio_init */ /* take from bio_init */
...@@ -1499,35 +1487,49 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i ...@@ -1499,35 +1487,49 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
bio->bi_end_io = NULL; bio->bi_end_io = NULL;
bio->bi_private = NULL; bio->bi_private = NULL;
if (i == disk) { rdev = rcu_dereference(conf->mirrors[i].rdev);
bio->bi_rw = READ; if (rdev == NULL ||
bio->bi_end_io = end_sync_read; test_bit(Faulty, &rdev->flags)) {
} else if (conf->mirrors[i].rdev == NULL ||
test_bit(Faulty, &conf->mirrors[i].rdev->flags)) {
still_degraded = 1; still_degraded = 1;
continue; continue;
} else if (!test_bit(In_sync, &conf->mirrors[i].rdev->flags) || } else if (!test_bit(In_sync, &rdev->flags)) {
sector_nr + RESYNC_SECTORS > mddev->recovery_cp ||
test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
bio->bi_rw = WRITE; bio->bi_rw = WRITE;
bio->bi_end_io = end_sync_write; bio->bi_end_io = end_sync_write;
write_targets ++; write_targets ++;
} else } else {
/* no need to read or write here */ /* may need to read from here */
continue; bio->bi_rw = READ;
bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset; bio->bi_end_io = end_sync_read;
bio->bi_bdev = conf->mirrors[i].rdev->bdev; if (test_bit(WriteMostly, &rdev->flags)) {
if (wonly < 0)
wonly = i;
} else {
if (disk < 0)
disk = i;
}
read_targets++;
}
atomic_inc(&rdev->nr_pending);
bio->bi_sector = sector_nr + rdev->data_offset;
bio->bi_bdev = rdev->bdev;
bio->bi_private = r1_bio; bio->bi_private = r1_bio;
} }
rcu_read_unlock();
if (disk < 0)
disk = wonly;
r1_bio->read_disk = disk;
if (write_targets == 0) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
/* extra read targets are also write targets */
write_targets += read_targets-1;
if (write_targets == 0 || read_targets == 0) {
/* There is nowhere to write, so all non-sync /* There is nowhere to write, so all non-sync
* drives must be failed - so we are finished * drives must be failed - so we are finished
*/ */
sector_t rv = max_sector - sector_nr; sector_t rv = max_sector - sector_nr;
*skipped = 1; *skipped = 1;
put_buf(r1_bio); put_buf(r1_bio);
rdev_dec_pending(conf->mirrors[disk].rdev, mddev);
return rv; return rv;
} }
...@@ -1578,10 +1580,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i ...@@ -1578,10 +1580,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
sync_blocks -= (len>>9); sync_blocks -= (len>>9);
} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
bio_full: bio_full:
bio = r1_bio->bios[disk]; bio = r1_bio->bios[r1_bio->read_disk];
r1_bio->sectors = nr_sectors; r1_bio->sectors = nr_sectors;
md_sync_acct(mirror->rdev->bdev, nr_sectors); md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, nr_sectors);
generic_make_request(bio); generic_make_request(bio);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment