dm raid1: hold write bios when errors are handled

Hold all write bios when errors are handled. Previously the failures list was used only when handling errors with a userspace daemon such as dmeventd. Now, it is always used for all bios. The regions where some writes failed must be marked as nosync. This can only be done in process context (i.e. in raid1 workqueue), not in the write_callback function. Previously the write would succeed if writing to at least one leg succeeded. This is wrong because data from the failed leg may be replicated to the correct leg. Now, if using a userspace daemon, the write with some failures will be held until the daemon has done its job and reconfigured the array. If not using a daemon, the write still succeeds if at least one leg succeeds. This is bad, but it is consistent with current behavior. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Reviewed-by: Takahiro Yasui <tyasui@redhat.com> Tested-by: Takahiro Yasui <tyasui@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>

dm raid1: hold write bios when errors are handled
Hold all write bios when errors are handled. Previously the failures list was used only when handling errors with a userspace daemon such as dmeventd. Now, it is always used for all bios. The regions where some writes failed must be marked as nosync. This can only be done in process context (i.e. in raid1 workqueue), not in the write_callback function. Previously the write would succeed if writing to at least one leg succeeded. This is wrong because data from the failed leg may be replicated to the correct leg. Now, if using a userspace daemon, the write with some failures will be held until the daemon has done its job and reconfigured the array. If not using a daemon, the write still succeeds if at least one leg succeeds. This is bad, but it is consistent with current behavior. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Reviewed-by: Takahiro Yasui <tyasui@redhat.com> Tested-by: Takahiro Yasui <tyasui@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
60f355ea · Mikulas Patocka · Alasdair G Kergon · c58098be · 60f355ea
Commit 60f355ea authored Dec 10, 2009 by Mikulas Patocka Committed by Alasdair G Kergon Dec 10, 2009
Show whitespace changes
Inline Side-by-side

Showing with 32 additions and 31 deletions

drivers/md/dm-raid1.c drivers/md/dm-raid1.c +32 -31

No files found.
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -578,7 +578,6 @@ static void write_callback(unsigned long error, void *context)
 	unsigned i, ret = 0;
 	struct bio *bio = (struct bio *) context;
 	struct mirror_set *ms;
-	int uptodate = 0;
 	int should_wake = 0;
 	unsigned long flags;

@@ -591,20 +590,15 @@ static void write_callback(unsigned long error, void *context)
 	 * This way we handle both writes to SYNC and NOSYNC
 	 * regions with the same code.
 	 */
-	if (likely(!error))
-		goto out;
+	if (likely(!error)) {
+		bio_endio(bio, ret);
+		return;
+	}

 	for (i = 0; i < ms->nr_mirrors; i++)
 		if (test_bit(i, &error))
 			fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
-		else
-			uptodate = 1;

-	if (unlikely(!uptodate)) {
-		DMERR("All replicated volumes dead, failing I/O");
-		/* None of the writes succeeded, fail the I/O. */
-		ret = -EIO;
-	} else if (errors_handled(ms)) {
 	/*
 	 * Need to raise event.  Since raising
 	 * events can block, we need to do it in
@@ -617,10 +611,6 @@ static void write_callback(unsigned long error, void *context)
 	spin_unlock_irqrestore(&ms->lock, flags);
 	if (should_wake)
 		wakeup_mirrord(ms);
-		return;
-	}
-out:
-	bio_endio(bio, ret);
 }

 static void do_write(struct mirror_set *ms, struct bio *bio)
@@ -773,15 +763,26 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
 	 * for us to treat them the same and requeue them
 	 * as well.
 	 */
-
 	while ((bio = bio_list_pop(failures))) {
-		if (ms->log_failure)
-			hold_bio(ms, bio);
-		else {
+		if (!ms->log_failure) {
 			ms->in_sync = 0;
 			dm_rh_mark_nosync(ms->rh, bio);
-			bio_endio(bio, 0);
 		}
+
+		/*
+		 * If all the legs are dead, fail the I/O.
+		 * If we have been told to handle errors, hold the bio
+		 * and wait for userspace to deal with the problem.
+		 * Otherwise pretend that the I/O succeeded. (This would
+		 * be wrong if the failed leg returned after reboot and
+		 * got replicated back to the good legs.)
+		 */
+		if (!get_valid_mirror(ms))
+			bio_endio(bio, -EIO);
+		else if (errors_handled(ms))
+			hold_bio(ms, bio);
+		else
+			bio_endio(bio, 0);
 	}
 }