Commit a448af25 authored by Yu Kuai's avatar Yu Kuai Committed by Song Liu

md/raid10: remove rcu protection to access rdev from conf

Because it's safe to accees rdev from conf:
 - If any spinlock is held, because synchronize_rcu() from
   md_kick_rdev_from_array() will prevent 'rdev' to be freed until
   spinlock is released;
 - If 'reconfig_lock' is held, because rdev can't be added or removed from
   array;
 - If there is normal IO inflight, because mddev_suspend() will prevent
   rdev to be added or removed from array;
 - If there is sync IO inflight, because 'MD_RECOVERY_RUNNING' is
   checked in remove_and_add_spares().

And these will cover all the scenarios in raid10.

This patch also cleanup the code to handle the case that replacement
replace rdev while IO is still inflight.
Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Signed-off-by: default avatarSong Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20231125081604.3939938-3-yukuai1@huaweicloud.com
parent c891f1fd
...@@ -743,7 +743,6 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -743,7 +743,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
struct geom *geo = &conf->geo; struct geom *geo = &conf->geo;
raid10_find_phys(conf, r10_bio); raid10_find_phys(conf, r10_bio);
rcu_read_lock();
best_dist_slot = -1; best_dist_slot = -1;
min_pending = UINT_MAX; min_pending = UINT_MAX;
best_dist_rdev = NULL; best_dist_rdev = NULL;
...@@ -775,18 +774,11 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -775,18 +774,11 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (r10_bio->devs[slot].bio == IO_BLOCKED) if (r10_bio->devs[slot].bio == IO_BLOCKED)
continue; continue;
disk = r10_bio->devs[slot].devnum; disk = r10_bio->devs[slot].devnum;
rdev = rcu_dereference(conf->mirrors[disk].replacement); rdev = conf->mirrors[disk].replacement;
if (rdev == NULL || test_bit(Faulty, &rdev->flags) || if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
r10_bio->devs[slot].addr + sectors > r10_bio->devs[slot].addr + sectors >
rdev->recovery_offset) { rdev->recovery_offset)
/* rdev = conf->mirrors[disk].rdev;
* Read replacement first to prevent reading both rdev
* and replacement as NULL during replacement replace
* rdev.
*/
smp_mb();
rdev = rcu_dereference(conf->mirrors[disk].rdev);
}
if (rdev == NULL || if (rdev == NULL ||
test_bit(Faulty, &rdev->flags)) test_bit(Faulty, &rdev->flags))
continue; continue;
...@@ -876,7 +868,6 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -876,7 +868,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
r10_bio->read_slot = slot; r10_bio->read_slot = slot;
} else } else
rdev = NULL; rdev = NULL;
rcu_read_unlock();
*max_sectors = best_good_sectors; *max_sectors = best_good_sectors;
return rdev; return rdev;
...@@ -1198,9 +1189,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1198,9 +1189,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
*/ */
gfp = GFP_NOIO | __GFP_HIGH; gfp = GFP_NOIO | __GFP_HIGH;
rcu_read_lock();
disk = r10_bio->devs[slot].devnum; disk = r10_bio->devs[slot].devnum;
err_rdev = rcu_dereference(conf->mirrors[disk].rdev); err_rdev = conf->mirrors[disk].rdev;
if (err_rdev) if (err_rdev)
snprintf(b, sizeof(b), "%pg", err_rdev->bdev); snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
else { else {
...@@ -1208,7 +1198,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1208,7 +1198,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
/* This never gets dereferenced */ /* This never gets dereferenced */
err_rdev = r10_bio->devs[slot].rdev; err_rdev = r10_bio->devs[slot].rdev;
} }
rcu_read_unlock();
} }
if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors))
...@@ -1279,15 +1268,8 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, ...@@ -1279,15 +1268,8 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
int devnum = r10_bio->devs[n_copy].devnum; int devnum = r10_bio->devs[n_copy].devnum;
struct bio *mbio; struct bio *mbio;
if (replacement) { rdev = replacement ? conf->mirrors[devnum].replacement :
rdev = conf->mirrors[devnum].replacement; conf->mirrors[devnum].rdev;
if (rdev == NULL) {
/* Replacement just got moved to main 'rdev' */
smp_mb();
rdev = conf->mirrors[devnum].rdev;
}
} else
rdev = conf->mirrors[devnum].rdev;
mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set); mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
if (replacement) if (replacement)
...@@ -1321,25 +1303,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, ...@@ -1321,25 +1303,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
} }
} }
static struct md_rdev *dereference_rdev_and_rrdev(struct raid10_info *mirror,
struct md_rdev **prrdev)
{
struct md_rdev *rdev, *rrdev;
rrdev = rcu_dereference(mirror->replacement);
/*
* Read replacement first to prevent reading both rdev and
* replacement as NULL during replacement replace rdev.
*/
smp_mb();
rdev = rcu_dereference(mirror->rdev);
if (rdev == rrdev)
rrdev = NULL;
*prrdev = rrdev;
return rdev;
}
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
{ {
int i; int i;
...@@ -1348,11 +1311,11 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -1348,11 +1311,11 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
retry_wait: retry_wait:
blocked_rdev = NULL; blocked_rdev = NULL;
rcu_read_lock();
for (i = 0; i < conf->copies; i++) { for (i = 0; i < conf->copies; i++) {
struct md_rdev *rdev, *rrdev; struct md_rdev *rdev, *rrdev;
rdev = dereference_rdev_and_rrdev(&conf->mirrors[i], &rrdev); rdev = conf->mirrors[i].rdev;
rrdev = conf->mirrors[i].replacement;
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev; blocked_rdev = rdev;
...@@ -1391,7 +1354,6 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -1391,7 +1354,6 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
} }
} }
} }
rcu_read_unlock();
if (unlikely(blocked_rdev)) { if (unlikely(blocked_rdev)) {
/* Have to wait for this device to get unblocked, then retry */ /* Have to wait for this device to get unblocked, then retry */
...@@ -1474,14 +1436,14 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1474,14 +1436,14 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
wait_blocked_dev(mddev, r10_bio); wait_blocked_dev(mddev, r10_bio);
rcu_read_lock();
max_sectors = r10_bio->sectors; max_sectors = r10_bio->sectors;
for (i = 0; i < conf->copies; i++) { for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum; int d = r10_bio->devs[i].devnum;
struct md_rdev *rdev, *rrdev; struct md_rdev *rdev, *rrdev;
rdev = dereference_rdev_and_rrdev(&conf->mirrors[d], &rrdev); rdev = conf->mirrors[d].rdev;
rrdev = conf->mirrors[d].replacement;
if (rdev && (test_bit(Faulty, &rdev->flags))) if (rdev && (test_bit(Faulty, &rdev->flags)))
rdev = NULL; rdev = NULL;
if (rrdev && (test_bit(Faulty, &rrdev->flags))) if (rrdev && (test_bit(Faulty, &rrdev->flags)))
...@@ -1535,7 +1497,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1535,7 +1497,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
atomic_inc(&rrdev->nr_pending); atomic_inc(&rrdev->nr_pending);
} }
} }
rcu_read_unlock();
if (max_sectors < r10_bio->sectors) if (max_sectors < r10_bio->sectors)
r10_bio->sectors = max_sectors; r10_bio->sectors = max_sectors;
...@@ -1625,17 +1586,8 @@ static void raid10_end_discard_request(struct bio *bio) ...@@ -1625,17 +1586,8 @@ static void raid10_end_discard_request(struct bio *bio)
set_bit(R10BIO_Uptodate, &r10_bio->state); set_bit(R10BIO_Uptodate, &r10_bio->state);
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
if (repl) rdev = repl ? conf->mirrors[dev].replacement :
rdev = conf->mirrors[dev].replacement; conf->mirrors[dev].rdev;
if (!rdev) {
/*
* raid10_remove_disk uses smp_mb to make sure rdev is set to
* replacement before setting replacement to NULL. It can read
* rdev first without barrier protect even replacement is NULL
*/
smp_rmb();
rdev = conf->mirrors[dev].rdev;
}
raid_end_discard_bio(r10_bio); raid_end_discard_bio(r10_bio);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
...@@ -1785,11 +1737,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) ...@@ -1785,11 +1737,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
* inc refcount on their rdev. Record them by setting * inc refcount on their rdev. Record them by setting
* bios[x] to bio * bios[x] to bio
*/ */
rcu_read_lock();
for (disk = 0; disk < geo->raid_disks; disk++) { for (disk = 0; disk < geo->raid_disks; disk++) {
struct md_rdev *rdev, *rrdev; struct md_rdev *rdev, *rrdev;
rdev = dereference_rdev_and_rrdev(&conf->mirrors[disk], &rrdev); rdev = conf->mirrors[disk].rdev;
rrdev = conf->mirrors[disk].replacement;
r10_bio->devs[disk].bio = NULL; r10_bio->devs[disk].bio = NULL;
r10_bio->devs[disk].repl_bio = NULL; r10_bio->devs[disk].repl_bio = NULL;
...@@ -1809,7 +1761,6 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) ...@@ -1809,7 +1761,6 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
atomic_inc(&rrdev->nr_pending); atomic_inc(&rrdev->nr_pending);
} }
} }
rcu_read_unlock();
atomic_set(&r10_bio->remaining, 1); atomic_set(&r10_bio->remaining, 1);
for (disk = 0; disk < geo->raid_disks; disk++) { for (disk = 0; disk < geo->raid_disks; disk++) {
...@@ -1939,6 +1890,8 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev) ...@@ -1939,6 +1890,8 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
int i; int i;
lockdep_assert_held(&mddev->lock);
if (conf->geo.near_copies < conf->geo.raid_disks) if (conf->geo.near_copies < conf->geo.raid_disks)
seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2);
if (conf->geo.near_copies > 1) if (conf->geo.near_copies > 1)
...@@ -1953,12 +1906,11 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev) ...@@ -1953,12 +1906,11 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
} }
seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks, seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
conf->geo.raid_disks - mddev->degraded); conf->geo.raid_disks - mddev->degraded);
rcu_read_lock();
for (i = 0; i < conf->geo.raid_disks; i++) { for (i = 0; i < conf->geo.raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev);
seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
} }
rcu_read_unlock();
seq_printf(seq, "]"); seq_printf(seq, "]");
} }
...@@ -1980,7 +1932,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore) ...@@ -1980,7 +1932,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
ncopies = conf->geo.near_copies; ncopies = conf->geo.near_copies;
} }
rcu_read_lock();
do { do {
int n = conf->copies; int n = conf->copies;
int cnt = 0; int cnt = 0;
...@@ -1988,7 +1939,7 @@ static int _enough(struct r10conf *conf, int previous, int ignore) ...@@ -1988,7 +1939,7 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
while (n--) { while (n--) {
struct md_rdev *rdev; struct md_rdev *rdev;
if (this != ignore && if (this != ignore &&
(rdev = rcu_dereference(conf->mirrors[this].rdev)) && (rdev = conf->mirrors[this].rdev) &&
test_bit(In_sync, &rdev->flags)) test_bit(In_sync, &rdev->flags))
cnt++; cnt++;
this = (this+1) % disks; this = (this+1) % disks;
...@@ -1999,7 +1950,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore) ...@@ -1999,7 +1950,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
} while (first != 0); } while (first != 0);
has_enough = 1; has_enough = 1;
out: out:
rcu_read_unlock();
return has_enough; return has_enough;
} }
...@@ -2072,8 +2022,7 @@ static void print_conf(struct r10conf *conf) ...@@ -2072,8 +2022,7 @@ static void print_conf(struct r10conf *conf)
pr_debug(" --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded, pr_debug(" --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
conf->geo.raid_disks); conf->geo.raid_disks);
/* This is only called with ->reconfix_mutex held, so lockdep_assert_held(&conf->mddev->reconfig_mutex);
* rcu protection of rdev is not needed */
for (i = 0; i < conf->geo.raid_disks; i++) { for (i = 0; i < conf->geo.raid_disks; i++) {
rdev = conf->mirrors[i].rdev; rdev = conf->mirrors[i].rdev;
if (rdev) if (rdev)
...@@ -2190,7 +2139,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -2190,7 +2139,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
err = 0; err = 0;
if (rdev->saved_raid_disk != mirror) if (rdev->saved_raid_disk != mirror)
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p->rdev, rdev); WRITE_ONCE(p->rdev, rdev);
break; break;
} }
...@@ -2204,7 +2153,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -2204,7 +2153,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
disk_stack_limits(mddev->gendisk, rdev->bdev, disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9); rdev->data_offset << 9);
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p->replacement, rdev); WRITE_ONCE(p->replacement, rdev);
} }
print_conf(conf); print_conf(conf);
...@@ -2246,15 +2195,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -2246,15 +2195,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
*rdevp = NULL; WRITE_ONCE(*rdevp, NULL);
if (p->replacement) { if (p->replacement) {
/* We must have just cleared 'rdev' */ /* We must have just cleared 'rdev' */
p->rdev = p->replacement; WRITE_ONCE(p->rdev, p->replacement);
clear_bit(Replacement, &p->replacement->flags); clear_bit(Replacement, &p->replacement->flags);
smp_mb(); /* Make sure other CPUs may see both as identical WRITE_ONCE(p->replacement, NULL);
* but will never see neither -- if they are careful.
*/
p->replacement = NULL;
} }
clear_bit(WantReplacement, &rdev->flags); clear_bit(WantReplacement, &rdev->flags);
...@@ -2754,20 +2700,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2754,20 +2700,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (s > (PAGE_SIZE>>9)) if (s > (PAGE_SIZE>>9))
s = PAGE_SIZE >> 9; s = PAGE_SIZE >> 9;
rcu_read_lock();
do { do {
sector_t first_bad; sector_t first_bad;
int bad_sectors; int bad_sectors;
d = r10_bio->devs[sl].devnum; d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev); rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags) && test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags) && !test_bit(Faulty, &rdev->flags) &&
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
&first_bad, &bad_sectors) == 0) { &first_bad, &bad_sectors) == 0) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
success = sync_page_io(rdev, success = sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect, sect,
...@@ -2775,7 +2719,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2775,7 +2719,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
conf->tmppage, conf->tmppage,
REQ_OP_READ, false); REQ_OP_READ, false);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock();
if (success) if (success)
break; break;
} }
...@@ -2783,7 +2726,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2783,7 +2726,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (sl == conf->copies) if (sl == conf->copies)
sl = 0; sl = 0;
} while (sl != slot); } while (sl != slot);
rcu_read_unlock();
if (!success) { if (!success) {
/* Cannot read from anywhere, just mark the block /* Cannot read from anywhere, just mark the block
...@@ -2807,20 +2749,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2807,20 +2749,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
start = sl; start = sl;
/* write it back and re-read */ /* write it back and re-read */
rcu_read_lock();
while (sl != slot) { while (sl != slot) {
if (sl==0) if (sl==0)
sl = conf->copies; sl = conf->copies;
sl--; sl--;
d = r10_bio->devs[sl].devnum; d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev); rdev = conf->mirrors[d].rdev;
if (!rdev || if (!rdev ||
test_bit(Faulty, &rdev->flags) || test_bit(Faulty, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags)) !test_bit(In_sync, &rdev->flags))
continue; continue;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (r10_sync_page_io(rdev, if (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect, sect,
...@@ -2839,7 +2779,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2839,7 +2779,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
rdev->bdev); rdev->bdev);
} }
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock();
} }
sl = start; sl = start;
while (sl != slot) { while (sl != slot) {
...@@ -2847,14 +2786,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2847,14 +2786,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
sl = conf->copies; sl = conf->copies;
sl--; sl--;
d = r10_bio->devs[sl].devnum; d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev); rdev = conf->mirrors[d].rdev;
if (!rdev || if (!rdev ||
test_bit(Faulty, &rdev->flags) || test_bit(Faulty, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags)) !test_bit(In_sync, &rdev->flags))
continue; continue;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
switch (r10_sync_page_io(rdev, switch (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect, sect,
...@@ -2882,9 +2820,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2882,9 +2820,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
} }
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock();
} }
rcu_read_unlock();
sectors -= s; sectors -= s;
sect += s; sect += s;
...@@ -3358,14 +3294,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3358,14 +3294,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Completed a full sync so the replacements /* Completed a full sync so the replacements
* are now fully recovered. * are now fully recovered.
*/ */
rcu_read_lock();
for (i = 0; i < conf->geo.raid_disks; i++) { for (i = 0; i < conf->geo.raid_disks; i++) {
struct md_rdev *rdev = struct md_rdev *rdev =
rcu_dereference(conf->mirrors[i].replacement); conf->mirrors[i].replacement;
if (rdev) if (rdev)
rdev->recovery_offset = MaxSector; rdev->recovery_offset = MaxSector;
} }
rcu_read_unlock();
} }
conf->fullsync = 0; conf->fullsync = 0;
} }
...@@ -3446,9 +3381,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3446,9 +3381,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
struct raid10_info *mirror = &conf->mirrors[i]; struct raid10_info *mirror = &conf->mirrors[i];
struct md_rdev *mrdev, *mreplace; struct md_rdev *mrdev, *mreplace;
rcu_read_lock(); mrdev = mirror->rdev;
mrdev = rcu_dereference(mirror->rdev); mreplace = mirror->replacement;
mreplace = rcu_dereference(mirror->replacement);
if (mrdev && (test_bit(Faulty, &mrdev->flags) || if (mrdev && (test_bit(Faulty, &mrdev->flags) ||
test_bit(In_sync, &mrdev->flags))) test_bit(In_sync, &mrdev->flags)))
...@@ -3456,22 +3390,18 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3456,22 +3390,18 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (mreplace && test_bit(Faulty, &mreplace->flags)) if (mreplace && test_bit(Faulty, &mreplace->flags))
mreplace = NULL; mreplace = NULL;
if (!mrdev && !mreplace) { if (!mrdev && !mreplace)
rcu_read_unlock();
continue; continue;
}
still_degraded = 0; still_degraded = 0;
/* want to reconstruct this device */ /* want to reconstruct this device */
rb2 = r10_bio; rb2 = r10_bio;
sect = raid10_find_virt(conf, sector_nr, i); sect = raid10_find_virt(conf, sector_nr, i);
if (sect >= mddev->resync_max_sectors) { if (sect >= mddev->resync_max_sectors)
/* last stripe is not complete - don't /* last stripe is not complete - don't
* try to recover this sector. * try to recover this sector.
*/ */
rcu_read_unlock();
continue; continue;
}
/* Unless we are doing a full sync, or a replacement /* Unless we are doing a full sync, or a replacement
* we only need to recover the block if it is set in * we only need to recover the block if it is set in
* the bitmap * the bitmap
...@@ -3487,14 +3417,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3487,14 +3417,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
* that there will never be anything to do here * that there will never be anything to do here
*/ */
chunks_skipped = -1; chunks_skipped = -1;
rcu_read_unlock();
continue; continue;
} }
if (mrdev) if (mrdev)
atomic_inc(&mrdev->nr_pending); atomic_inc(&mrdev->nr_pending);
if (mreplace) if (mreplace)
atomic_inc(&mreplace->nr_pending); atomic_inc(&mreplace->nr_pending);
rcu_read_unlock();
r10_bio = raid10_alloc_init_r10buf(conf); r10_bio = raid10_alloc_init_r10buf(conf);
r10_bio->state = 0; r10_bio->state = 0;
...@@ -3513,10 +3441,9 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3513,10 +3441,9 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Need to check if the array will still be /* Need to check if the array will still be
* degraded * degraded
*/ */
rcu_read_lock();
for (j = 0; j < conf->geo.raid_disks; j++) { for (j = 0; j < conf->geo.raid_disks; j++) {
struct md_rdev *rdev = rcu_dereference( struct md_rdev *rdev = conf->mirrors[j].rdev;
conf->mirrors[j].rdev);
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
still_degraded = 1; still_degraded = 1;
break; break;
...@@ -3531,8 +3458,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3531,8 +3458,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
int k; int k;
int d = r10_bio->devs[j].devnum; int d = r10_bio->devs[j].devnum;
sector_t from_addr, to_addr; sector_t from_addr, to_addr;
struct md_rdev *rdev = struct md_rdev *rdev = conf->mirrors[d].rdev;
rcu_dereference(conf->mirrors[d].rdev);
sector_t sector, first_bad; sector_t sector, first_bad;
int bad_sectors; int bad_sectors;
if (!rdev || if (!rdev ||
...@@ -3611,7 +3537,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3611,7 +3537,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
atomic_inc(&r10_bio->remaining); atomic_inc(&r10_bio->remaining);
break; break;
} }
rcu_read_unlock();
if (j == conf->copies) { if (j == conf->copies) {
/* Cannot recover, so abort the recovery or /* Cannot recover, so abort the recovery or
* record a bad block */ * record a bad block */
...@@ -3738,12 +3663,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3738,12 +3663,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio = r10_bio->devs[i].bio; bio = r10_bio->devs[i].bio;
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
rcu_read_lock(); rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev == NULL || test_bit(Faulty, &rdev->flags))
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
rcu_read_unlock();
continue; continue;
}
sector = r10_bio->devs[i].addr; sector = r10_bio->devs[i].addr;
if (is_badblock(rdev, sector, max_sync, if (is_badblock(rdev, sector, max_sync,
&first_bad, &bad_sectors)) { &first_bad, &bad_sectors)) {
...@@ -3753,7 +3676,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3753,7 +3676,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bad_sectors -= (sector - first_bad); bad_sectors -= (sector - first_bad);
if (max_sync > bad_sectors) if (max_sync > bad_sectors)
max_sync = bad_sectors; max_sync = bad_sectors;
rcu_read_unlock();
continue; continue;
} }
} }
...@@ -3769,11 +3691,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3769,11 +3691,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio_set_dev(bio, rdev->bdev); bio_set_dev(bio, rdev->bdev);
count++; count++;
rdev = rcu_dereference(conf->mirrors[d].replacement); rdev = conf->mirrors[d].replacement;
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { if (rdev == NULL || test_bit(Faulty, &rdev->flags))
rcu_read_unlock();
continue; continue;
}
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
/* Need to set up for writing to the replacement */ /* Need to set up for writing to the replacement */
...@@ -3790,7 +3711,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3790,7 +3711,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_iter.bi_sector = sector + rdev->data_offset; bio->bi_iter.bi_sector = sector + rdev->data_offset;
bio_set_dev(bio, rdev->bdev); bio_set_dev(bio, rdev->bdev);
count++; count++;
rcu_read_unlock();
} }
if (count < 2) { if (count < 2) {
...@@ -4500,11 +4420,11 @@ static int calc_degraded(struct r10conf *conf) ...@@ -4500,11 +4420,11 @@ static int calc_degraded(struct r10conf *conf)
int degraded, degraded2; int degraded, degraded2;
int i; int i;
rcu_read_lock();
degraded = 0; degraded = 0;
/* 'prev' section first */ /* 'prev' section first */
for (i = 0; i < conf->prev.raid_disks; i++) { for (i = 0; i < conf->prev.raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = conf->mirrors[i].rdev;
if (!rdev || test_bit(Faulty, &rdev->flags)) if (!rdev || test_bit(Faulty, &rdev->flags))
degraded++; degraded++;
else if (!test_bit(In_sync, &rdev->flags)) else if (!test_bit(In_sync, &rdev->flags))
...@@ -4514,13 +4434,12 @@ static int calc_degraded(struct r10conf *conf) ...@@ -4514,13 +4434,12 @@ static int calc_degraded(struct r10conf *conf)
*/ */
degraded++; degraded++;
} }
rcu_read_unlock();
if (conf->geo.raid_disks == conf->prev.raid_disks) if (conf->geo.raid_disks == conf->prev.raid_disks)
return degraded; return degraded;
rcu_read_lock();
degraded2 = 0; degraded2 = 0;
for (i = 0; i < conf->geo.raid_disks; i++) { for (i = 0; i < conf->geo.raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = conf->mirrors[i].rdev;
if (!rdev || test_bit(Faulty, &rdev->flags)) if (!rdev || test_bit(Faulty, &rdev->flags))
degraded2++; degraded2++;
else if (!test_bit(In_sync, &rdev->flags)) { else if (!test_bit(In_sync, &rdev->flags)) {
...@@ -4533,7 +4452,6 @@ static int calc_degraded(struct r10conf *conf) ...@@ -4533,7 +4452,6 @@ static int calc_degraded(struct r10conf *conf)
degraded2++; degraded2++;
} }
} }
rcu_read_unlock();
if (degraded2 > degraded) if (degraded2 > degraded)
return degraded2; return degraded2;
return degraded; return degraded;
...@@ -4965,16 +4883,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, ...@@ -4965,16 +4883,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
blist = read_bio; blist = read_bio;
read_bio->bi_next = NULL; read_bio->bi_next = NULL;
rcu_read_lock();
for (s = 0; s < conf->copies*2; s++) { for (s = 0; s < conf->copies*2; s++) {
struct bio *b; struct bio *b;
int d = r10_bio->devs[s/2].devnum; int d = r10_bio->devs[s/2].devnum;
struct md_rdev *rdev2; struct md_rdev *rdev2;
if (s&1) { if (s&1) {
rdev2 = rcu_dereference(conf->mirrors[d].replacement); rdev2 = conf->mirrors[d].replacement;
b = r10_bio->devs[s/2].repl_bio; b = r10_bio->devs[s/2].repl_bio;
} else { } else {
rdev2 = rcu_dereference(conf->mirrors[d].rdev); rdev2 = conf->mirrors[d].rdev;
b = r10_bio->devs[s/2].bio; b = r10_bio->devs[s/2].bio;
} }
if (!rdev2 || test_bit(Faulty, &rdev2->flags)) if (!rdev2 || test_bit(Faulty, &rdev2->flags))
...@@ -5008,7 +4925,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, ...@@ -5008,7 +4925,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
sector_nr += len >> 9; sector_nr += len >> 9;
nr_sectors += len >> 9; nr_sectors += len >> 9;
} }
rcu_read_unlock();
r10_bio->sectors = nr_sectors; r10_bio->sectors = nr_sectors;
/* Now submit the read */ /* Now submit the read */
...@@ -5061,20 +4977,17 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -5061,20 +4977,17 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
struct bio *b; struct bio *b;
int d = r10_bio->devs[s/2].devnum; int d = r10_bio->devs[s/2].devnum;
struct md_rdev *rdev; struct md_rdev *rdev;
rcu_read_lock();
if (s&1) { if (s&1) {
rdev = rcu_dereference(conf->mirrors[d].replacement); rdev = conf->mirrors[d].replacement;
b = r10_bio->devs[s/2].repl_bio; b = r10_bio->devs[s/2].repl_bio;
} else { } else {
rdev = rcu_dereference(conf->mirrors[d].rdev); rdev = conf->mirrors[d].rdev;
b = r10_bio->devs[s/2].bio; b = r10_bio->devs[s/2].bio;
} }
if (!rdev || test_bit(Faulty, &rdev->flags)) { if (!rdev || test_bit(Faulty, &rdev->flags))
rcu_read_unlock();
continue; continue;
}
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
md_sync_acct_bio(b, r10_bio->sectors); md_sync_acct_bio(b, r10_bio->sectors);
atomic_inc(&r10_bio->remaining); atomic_inc(&r10_bio->remaining);
b->bi_next = NULL; b->bi_next = NULL;
...@@ -5145,10 +5058,9 @@ static int handle_reshape_read_error(struct mddev *mddev, ...@@ -5145,10 +5058,9 @@ static int handle_reshape_read_error(struct mddev *mddev,
if (s > (PAGE_SIZE >> 9)) if (s > (PAGE_SIZE >> 9))
s = PAGE_SIZE >> 9; s = PAGE_SIZE >> 9;
rcu_read_lock();
while (!success) { while (!success) {
int d = r10b->devs[slot].devnum; int d = r10b->devs[slot].devnum;
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); struct md_rdev *rdev = conf->mirrors[d].rdev;
sector_t addr; sector_t addr;
if (rdev == NULL || if (rdev == NULL ||
test_bit(Faulty, &rdev->flags) || test_bit(Faulty, &rdev->flags) ||
...@@ -5157,14 +5069,12 @@ static int handle_reshape_read_error(struct mddev *mddev, ...@@ -5157,14 +5069,12 @@ static int handle_reshape_read_error(struct mddev *mddev,
addr = r10b->devs[slot].addr + idx * PAGE_SIZE; addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
success = sync_page_io(rdev, success = sync_page_io(rdev,
addr, addr,
s << 9, s << 9,
pages[idx], pages[idx],
REQ_OP_READ, false); REQ_OP_READ, false);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock();
if (success) if (success)
break; break;
failed: failed:
...@@ -5174,7 +5084,6 @@ static int handle_reshape_read_error(struct mddev *mddev, ...@@ -5174,7 +5084,6 @@ static int handle_reshape_read_error(struct mddev *mddev,
if (slot == first_slot) if (slot == first_slot)
break; break;
} }
rcu_read_unlock();
if (!success) { if (!success) {
/* couldn't read this block, must give up */ /* couldn't read this block, must give up */
set_bit(MD_RECOVERY_INTR, set_bit(MD_RECOVERY_INTR,
...@@ -5200,12 +5109,8 @@ static void end_reshape_write(struct bio *bio) ...@@ -5200,12 +5109,8 @@ static void end_reshape_write(struct bio *bio)
struct md_rdev *rdev = NULL; struct md_rdev *rdev = NULL;
d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
if (repl) rdev = repl ? conf->mirrors[d].replacement :
rdev = conf->mirrors[d].replacement; conf->mirrors[d].rdev;
if (!rdev) {
smp_mb();
rdev = conf->mirrors[d].rdev;
}
if (bio->bi_status) { if (bio->bi_status) {
/* FIXME should record badblock */ /* FIXME should record badblock */
...@@ -5240,18 +5145,16 @@ static void raid10_finish_reshape(struct mddev *mddev) ...@@ -5240,18 +5145,16 @@ static void raid10_finish_reshape(struct mddev *mddev)
mddev->resync_max_sectors = mddev->array_sectors; mddev->resync_max_sectors = mddev->array_sectors;
} else { } else {
int d; int d;
rcu_read_lock();
for (d = conf->geo.raid_disks ; for (d = conf->geo.raid_disks ;
d < conf->geo.raid_disks - mddev->delta_disks; d < conf->geo.raid_disks - mddev->delta_disks;
d++) { d++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); struct md_rdev *rdev = conf->mirrors[d].rdev;
if (rdev) if (rdev)
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
rdev = rcu_dereference(conf->mirrors[d].replacement); rdev = conf->mirrors[d].replacement;
if (rdev) if (rdev)
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
} }
rcu_read_unlock();
} }
mddev->layout = mddev->new_layout; mddev->layout = mddev->new_layout;
mddev->chunk_sectors = 1 << conf->geo.chunk_shift; mddev->chunk_sectors = 1 << conf->geo.chunk_shift;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment