Commit 36a240a7 authored by Heinz Mauelshagen's avatar Heinz Mauelshagen Committed by Mike Snitzer

dm raid: fix RAID leg rebuild errors

On fast devices such as NVMe, a flaw in rs_get_progress() results in
false target status output when userspace lvm2 requests leg rebuilds
(symptom of the failure is device health chars 'aaaaaaaa' instead of
expected 'aAaAAAAA' causing lvm2 to fail).

The correct sync action state definitions already exist in
decipher_sync_action() so fix rs_get_progress() to use it.

Change decipher_sync_action() to return an enum rather than a string for
the sync states and call it from rs_get_progress().  Introduce
sync_str() to translate from enum to the string that is needed by
raid_status().
Signed-off-by: default avatarHeinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent c44a5ee8
...@@ -3332,32 +3332,53 @@ static int raid_map(struct dm_target *ti, struct bio *bio) ...@@ -3332,32 +3332,53 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED; return DM_MAPIO_SUBMITTED;
} }
/* Return string describing the current sync action of @mddev */ /* Return sync state string for @state */
static const char *decipher_sync_action(struct mddev *mddev, unsigned long recovery) enum sync_state { st_frozen, st_reshape, st_resync, st_check, st_repair, st_recover, st_idle };
static const char *sync_str(enum sync_state state)
{
/* Has to be in above sync_state order! */
static const char *sync_strs[] = {
"frozen",
"reshape",
"resync",
"check",
"repair",
"recover",
"idle"
};
return __within_range(state, 0, ARRAY_SIZE(sync_strs) - 1) ? sync_strs[state] : "undef";
};
/* Return enum sync_state for @mddev derived from @recovery flags */
static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
{ {
if (test_bit(MD_RECOVERY_FROZEN, &recovery)) if (test_bit(MD_RECOVERY_FROZEN, &recovery))
return "frozen"; return st_frozen;
/* The MD sync thread can be done with io but still be running */ /* The MD sync thread can be done with io or be interrupted but still be running */
if (!test_bit(MD_RECOVERY_DONE, &recovery) && if (!test_bit(MD_RECOVERY_DONE, &recovery) &&
(test_bit(MD_RECOVERY_RUNNING, &recovery) || (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
(!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) { (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
return "reshape"; return st_reshape;
if (test_bit(MD_RECOVERY_SYNC, &recovery)) { if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
if (!test_bit(MD_RECOVERY_REQUESTED, &recovery)) if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
return "resync"; return st_resync;
else if (test_bit(MD_RECOVERY_CHECK, &recovery)) if (test_bit(MD_RECOVERY_CHECK, &recovery))
return "check"; return st_check;
return "repair"; return st_repair;
} }
if (test_bit(MD_RECOVERY_RECOVER, &recovery)) if (test_bit(MD_RECOVERY_RECOVER, &recovery))
return "recover"; return st_recover;
if (mddev->reshape_position != MaxSector)
return st_reshape;
} }
return "idle"; return st_idle;
} }
/* /*
...@@ -3391,6 +3412,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, ...@@ -3391,6 +3412,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
sector_t resync_max_sectors) sector_t resync_max_sectors)
{ {
sector_t r; sector_t r;
enum sync_state state;
struct mddev *mddev = &rs->md; struct mddev *mddev = &rs->md;
clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
...@@ -3401,20 +3423,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, ...@@ -3401,20 +3423,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else { } else {
if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) && state = decipher_sync_action(mddev, recovery);
!test_bit(MD_RECOVERY_INTR, &recovery) &&
(test_bit(MD_RECOVERY_NEEDED, &recovery) || if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
test_bit(MD_RECOVERY_RUNNING, &recovery)))
r = mddev->curr_resync_completed;
else
r = mddev->recovery_cp; r = mddev->recovery_cp;
else
r = mddev->curr_resync_completed;
if (r >= resync_max_sectors && if (state == st_idle && r >= resync_max_sectors) {
(!test_bit(MD_RECOVERY_REQUESTED, &recovery) ||
(!test_bit(MD_RECOVERY_FROZEN, &recovery) &&
!test_bit(MD_RECOVERY_NEEDED, &recovery) &&
!test_bit(MD_RECOVERY_RUNNING, &recovery)))) {
/* /*
* Sync complete. * Sync complete.
*/ */
...@@ -3422,24 +3438,20 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, ...@@ -3422,24 +3438,20 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
if (test_bit(MD_RECOVERY_RECOVER, &recovery)) if (test_bit(MD_RECOVERY_RECOVER, &recovery))
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else if (test_bit(MD_RECOVERY_RECOVER, &recovery)) { } else if (state == st_recover)
/* /*
* In case we are recovering, the array is not in sync * In case we are recovering, the array is not in sync
* and health chars should show the recovering legs. * and health chars should show the recovering legs.
*/ */
; ;
else if (state == st_resync)
} else if (test_bit(MD_RECOVERY_SYNC, &recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
/* /*
* If "resync" is occurring, the raid set * If "resync" is occurring, the raid set
* is or may be out of sync hence the health * is or may be out of sync hence the health
* characters shall be 'a'. * characters shall be 'a'.
*/ */
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
else if (state == st_reshape)
} else if (test_bit(MD_RECOVERY_RESHAPE, &recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
/* /*
* If "reshape" is occurring, the raid set * If "reshape" is occurring, the raid set
* is or may be out of sync hence the health * is or may be out of sync hence the health
...@@ -3447,7 +3459,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, ...@@ -3447,7 +3459,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
*/ */
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
} else if (test_bit(MD_RECOVERY_REQUESTED, &recovery)) { else if (state == st_check || state == st_repair)
/* /*
* If "check" or "repair" is occurring, the raid set has * If "check" or "repair" is occurring, the raid set has
* undergone an initial sync and the health characters * undergone an initial sync and the health characters
...@@ -3455,12 +3467,12 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, ...@@ -3455,12 +3467,12 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
*/ */
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else { else {
struct md_rdev *rdev; struct md_rdev *rdev;
/* /*
* We are idle and recovery is needed, prevent 'A' chars race * We are idle and recovery is needed, prevent 'A' chars race
* caused by components still set to in-sync by constrcuctor. * caused by components still set to in-sync by constructor.
*/ */
if (test_bit(MD_RECOVERY_NEEDED, &recovery)) if (test_bit(MD_RECOVERY_NEEDED, &recovery))
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
...@@ -3524,7 +3536,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, ...@@ -3524,7 +3536,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
progress = rs_get_progress(rs, recovery, resync_max_sectors); progress = rs_get_progress(rs, recovery, resync_max_sectors);
resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ? resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
atomic64_read(&mddev->resync_mismatches) : 0; atomic64_read(&mddev->resync_mismatches) : 0;
sync_action = decipher_sync_action(&rs->md, recovery); sync_action = sync_str(decipher_sync_action(&rs->md, recovery));
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
for (i = 0; i < rs->raid_disks; i++) for (i = 0; i < rs->raid_disks; i++)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment