Commit 8d355a46 authored by Li Nan's avatar Li Nan Committed by Song Liu

md/raid10: Do not add spare disk when recovery fails

In raid10_sync_request(), if data cannot be read from any disk for
recovery, it will go to 'giveup' and let 'chunks_skipped' + 1. After
multiple 'giveup', when 'chunks_skipped >= geo.raid_disks', it will
return 'max_sector', indicating that the recovery has been completed.
However, the recovery is just aborted and the data remains inconsistent.

Fix it by setting mirror->recovery_disabled, which will prevent the spare
disk from being added to this mirror. The same issue also exists during
resync, it will be fixed afterwards.
Signed-off-by: default avatarLi Nan <linan122@huawei.com>
Signed-off-by: default avatarSong Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230602091839.743798-2-linan666@huaweicloud.com
parent 4d8a5754
...@@ -3311,6 +3311,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3311,6 +3311,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
int chunks_skipped = 0; int chunks_skipped = 0;
sector_t chunk_mask = conf->geo.chunk_mask; sector_t chunk_mask = conf->geo.chunk_mask;
int page_idx = 0; int page_idx = 0;
int error_disk = -1;
/* /*
* Allow skipping a full rebuild for incremental assembly * Allow skipping a full rebuild for incremental assembly
...@@ -3394,8 +3395,21 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3394,8 +3395,21 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
return reshape_request(mddev, sector_nr, skipped); return reshape_request(mddev, sector_nr, skipped);
if (chunks_skipped >= conf->geo.raid_disks) { if (chunks_skipped >= conf->geo.raid_disks) {
/* if there has been nothing to do on any drive, pr_err("md/raid10:%s: %s fails\n", mdname(mddev),
* then there is nothing to do at all.. test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? "resync" : "recovery");
if (error_disk >= 0 &&
!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
/*
* recovery fails, set mirrors.recovery_disabled,
* device shouldn't be added to there.
*/
conf->mirrors[error_disk].recovery_disabled =
mddev->recovery_disabled;
return 0;
}
/*
* if there has been nothing to do on any drive,
* then there is nothing to do at all.
*/ */
*skipped = 1; *skipped = 1;
return (max_sector - sector_nr) + sectors_skipped; return (max_sector - sector_nr) + sectors_skipped;
...@@ -3646,6 +3660,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3646,6 +3660,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
mdname(mddev)); mdname(mddev));
mirror->recovery_disabled mirror->recovery_disabled
= mddev->recovery_disabled; = mddev->recovery_disabled;
} else {
error_disk = i;
} }
put_buf(r10_bio); put_buf(r10_bio);
if (rb2) if (rb2)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment