Commit e0a33270 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: Fixed refcounting/locking when attempting read error correction in raid10

We need to hold a reference to rdevs while reading and writing to attempt to
correct read errors.  This reference must be taken under an rcu lock.
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent df30d0f4
......@@ -1407,36 +1407,45 @@ static void raid10d(mddev_t *mddev)
if (s > (PAGE_SIZE>>9))
s = PAGE_SIZE >> 9;
rcu_read_lock();
do {
int d = r10_bio->devs[sl].devnum;
rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
test_bit(In_sync, &rdev->flags) &&
sync_page_io(rdev->bdev,
r10_bio->devs[sl].addr +
sect + rdev->data_offset,
s<<9,
conf->tmppage, READ))
success = 1;
else {
sl++;
if (sl == conf->copies)
sl = 0;
test_bit(In_sync, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
success = sync_page_io(rdev->bdev,
r10_bio->devs[sl].addr +
sect + rdev->data_offset,
s<<9,
conf->tmppage, READ);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
if (success)
break;
}
sl++;
if (sl == conf->copies)
sl = 0;
} while (!success && sl != r10_bio->read_slot);
rcu_read_unlock();
if (success) {
int start = sl;
/* write it back and re-read */
rcu_read_lock();
while (sl != r10_bio->read_slot) {
int d;
if (sl==0)
sl = conf->copies;
sl--;
d = r10_bio->devs[sl].devnum;
rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev->bdev,
r10_bio->devs[sl].addr +
......@@ -1444,6 +1453,8 @@ static void raid10d(mddev_t *mddev)
s<<9, conf->tmppage, WRITE) == 0)
/* Well, this device is dead */
md_error(mddev, rdev);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
}
}
sl = start;
......@@ -1453,17 +1464,22 @@ static void raid10d(mddev_t *mddev)
sl = conf->copies;
sl--;
d = r10_bio->devs[sl].devnum;
rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (sync_page_io(rdev->bdev,
r10_bio->devs[sl].addr +
sect + rdev->data_offset,
s<<9, conf->tmppage, READ) == 0)
/* Well, this device is dead */
md_error(mddev, rdev);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
}
}
rcu_read_unlock();
} else {
/* Cannot read from anywhere -- bye bye array */
md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment