md/raid5: Factor out helper from raid5_make_request() loop

Factor out the inner loop of raid5_make_request() into it's own helper called make_stripe_request(). The helper returns a number of statuses: SUCCESS, RETRY, SCHEDULE_AND_RETRY and FAIL. This makes the code a bit easier to understand and allows the SCHEDULE_AND_RETRY path to be made common. A context structure is added to contain do_flush. It will be used more in subsequent patches for state that needs to be kept outside the loop. No functional changes intended. This will be cleaned up further in subsequent patches to untangle the gen_lock and do_prepare logic further. Signed-off-by: Logan Gunthorpe <logang@deltatee.com> Signed-off-by: Song Liu <song@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>

md/raid5: Factor out helper from raid5_make_request() loop
Factor out the inner loop of raid5_make_request() into it's own helper called make_stripe_request(). The helper returns a number of statuses: SUCCESS, RETRY, SCHEDULE_AND_RETRY and FAIL. This makes the code a bit easier to understand and allows the SCHEDULE_AND_RETRY path to be made common. A context structure is added to contain do_flush. It will be used more in subsequent patches for state that needs to be kept outside the loop. No functional changes intended. This will be cleaned up further in subsequent patches to untangle the gen_lock and do_prepare logic further. Signed-off-by: Logan Gunthorpe <logang@deltatee.com> Signed-off-by: Song Liu <song@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
f4aec6a0 · Logan Gunthorpe · Jens Axboe · 1baa1126 · f4aec6a0
Commit f4aec6a0 authored Jun 16, 2022 by Logan Gunthorpe Committed by Jens Axboe Aug 02, 2022
Show whitespace changes
Inline Side-by-side

Showing with 133 additions and 98 deletions

drivers/md/raid5.c drivers/md/raid5.c +133 -98

No files found.
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5787,84 +5787,32 @@ static bool ahead_of_reshape(struct mddev *mddev, sector_t sector,
 					  sector >= reshape_sector;
 }

-static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
-{
-	struct r5conf *conf = mddev->private;
-	int dd_idx;
-	sector_t new_sector;
-	sector_t logical_sector, last_sector;
-	struct stripe_head *sh;
-	const int rw = bio_data_dir(bi);
-	DEFINE_WAIT(w);
-	bool do_prepare;
-	bool do_flush = false;
-
-	if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
-		int ret = log_handle_flush_request(conf, bi);
-
-		if (ret == 0)
-			return true;
-		if (ret == -ENODEV) {
-			if (md_flush_request(mddev, bi))
-				return true;
-		}
-		/* ret == -EAGAIN, fallback */
-		/*
-		 * if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
-		 * we need to flush journal device
-		 */
-		do_flush = bi->bi_opf & REQ_PREFLUSH;
-	}
-
-	if (!md_write_start(mddev, bi))
-		return false;
-	/*
-	 * If array is degraded, better not do chunk aligned read because
-	 * later we might have to read it again in order to reconstruct
-	 * data on failed drives.
-	 */
-	if (rw == READ && mddev->degraded == 0 &&
-	    mddev->reshape_position == MaxSector) {
-		bi = chunk_aligned_read(mddev, bi);
-		if (!bi)
-			return true;
-	}
-
-	if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) {
-		make_discard_request(mddev, bi);
-		md_write_end(mddev);
-		return true;
-	}
+enum stripe_result {
+	STRIPE_SUCCESS = 0,
+	STRIPE_RETRY,
+	STRIPE_SCHEDULE_AND_RETRY,
+	STRIPE_FAIL,
+};

-	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
-	last_sector = bio_end_sector(bi);
-	bi->bi_next = NULL;
+struct stripe_request_ctx {
+	/* the request had REQ_PREFLUSH, cleared after the first stripe_head */
+	bool do_flush;
+};

-	/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
-	if ((bi->bi_opf & REQ_NOWAIT) &&
-	    (conf->reshape_progress != MaxSector) &&
-	    !ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
-	    ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
-		bio_wouldblock_error(bi);
-		if (rw == WRITE)
-			md_write_end(mddev);
-		return true;
-	}
-	md_account_bio(mddev, &bi);
-	prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
-	for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
-		int previous;
-		int seq;
+static enum stripe_result make_stripe_request(struct mddev *mddev,
+		struct r5conf *conf, struct stripe_request_ctx *ctx,
+		sector_t logical_sector, struct bio *bi, int seq)
+{
+	const int rw = bio_data_dir(bi);
+	enum stripe_result ret;
+	struct stripe_head *sh;
+	sector_t new_sector;
+	int previous = 0;
+	int dd_idx;

-		do_prepare = false;
-	retry:
-		seq = read_seqcount_begin(&conf->gen_lock);
-		previous = 0;
-		if (do_prepare)
-			prepare_to_wait(&conf->wait_for_overlap, &w,
-				TASK_UNINTERRUPTIBLE);
 	if (unlikely(conf->reshape_progress != MaxSector)) {
-			/* spinlock is needed as reshape_progress may be
+		/*
+		 * Spinlock is needed as reshape_progress may be
 		 * 64bit on a 32bit platform, and so it might be
 		 * possible to see a half-updated value
 		 * Of course reshape_progress could change after
@@ -5880,31 +5828,28 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 			if (ahead_of_reshape(mddev, logical_sector,
 					     conf->reshape_safe)) {
 				spin_unlock_irq(&conf->device_lock);
-					schedule();
-					do_prepare = true;
-					goto retry;
+				return STRIPE_SCHEDULE_AND_RETRY;
 			}
 		}
 		spin_unlock_irq(&conf->device_lock);
 	}

-		new_sector = raid5_compute_sector(conf, logical_sector,
-						  previous,
+	new_sector = raid5_compute_sector(conf, logical_sector, previous,
 					  &dd_idx, NULL);
-		pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
-			(unsigned long long)new_sector,
-			(unsigned long long)logical_sector);
+	pr_debug("raid456: %s, sector %llu logical %llu\n", __func__,
+		 new_sector, logical_sector);

 	sh = raid5_get_active_stripe(conf, new_sector, previous,
 				     (bi->bi_opf & REQ_RAHEAD), 0);
 	if (unlikely(!sh)) {
 		/* cannot get stripe, just give-up */
 		bi->bi_status = BLK_STS_IOERR;
-			break;
+		return STRIPE_FAIL;
 	}

 	if (unlikely(previous)) {
-			/* expansion might have moved on while waiting for a
+		/*
+		 * Expansion might have moved on while waiting for a
 		 * stripe, so we must do the range check again.
 		 * Expansion could still move past after this
 		 * test, but as we are holding a reference to
@@ -5920,17 +5865,15 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 			must_retry = 1;
 		spin_unlock_irq(&conf->device_lock);
 		if (must_retry) {
-				raid5_release_stripe(sh);
-				schedule();
-				do_prepare = true;
-				goto retry;
+			ret = STRIPE_SCHEDULE_AND_RETRY;
+			goto out_release;
 		}
 	}

 	if (read_seqcount_retry(&conf->gen_lock, seq)) {
 		/* Might have got the wrong stripe_head by accident */
-			raid5_release_stripe(sh);
-			goto retry;
+		ret = STRIPE_RETRY;
+		goto out_release;
 	}

 	if (test_bit(STRIPE_EXPANDING, &sh->state) ||
@@ -5940,19 +5883,17 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 		 * overlap. Flush everything and wait a while.
 		 */
 		md_wakeup_thread(mddev->thread);
-			raid5_release_stripe(sh);
-			schedule();
-			do_prepare = true;
-			goto retry;
+		ret = STRIPE_SCHEDULE_AND_RETRY;
+		goto out_release;
 	}

 	if (stripe_can_batch(sh))
 		stripe_add_to_batch_list(conf, sh);

-		if (do_flush) {
+	if (ctx->do_flush) {
 		set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
 		/* we only need flush for one stripe */
-			do_flush = false;
+		ctx->do_flush = false;
 	}

 	set_bit(STRIPE_HANDLE, &sh->state);
@@ -5963,7 +5904,101 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 		atomic_inc(&conf->preread_active_stripes);

 	release_stripe_plug(mddev, sh);
+	return STRIPE_SUCCESS;
+
+out_release:
+	raid5_release_stripe(sh);
+	return ret;
+}
+
+static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
+{
+	struct r5conf *conf = mddev->private;
+	sector_t logical_sector, last_sector;
+	struct stripe_request_ctx ctx = {};
+	const int rw = bio_data_dir(bi);
+	enum stripe_result res;
+	DEFINE_WAIT(w);
+	bool do_prepare;
+
+	if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
+		int ret = log_handle_flush_request(conf, bi);
+
+		if (ret == 0)
+			return true;
+		if (ret == -ENODEV) {
+			if (md_flush_request(mddev, bi))
+				return true;
+		}
+		/* ret == -EAGAIN, fallback */
+		/*
+		 * if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
+		 * we need to flush journal device
+		 */
+		ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
+	}
+
+	if (!md_write_start(mddev, bi))
+		return false;
+	/*
+	 * If array is degraded, better not do chunk aligned read because
+	 * later we might have to read it again in order to reconstruct
+	 * data on failed drives.
+	 */
+	if (rw == READ && mddev->degraded == 0 &&
+	    mddev->reshape_position == MaxSector) {
+		bi = chunk_aligned_read(mddev, bi);
+		if (!bi)
+			return true;
+	}
+
+	if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) {
+		make_discard_request(mddev, bi);
+		md_write_end(mddev);
+		return true;
+	}
+
+	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
+	last_sector = bio_end_sector(bi);
+	bi->bi_next = NULL;
+
+	/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
+	if ((bi->bi_opf & REQ_NOWAIT) &&
+	    (conf->reshape_progress != MaxSector) &&
+	    !ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
+	    ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
+		bio_wouldblock_error(bi);
+		if (rw == WRITE)
+			md_write_end(mddev);
+		return true;
 	}
+	md_account_bio(mddev, &bi);
+	prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
+	for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
+		int seq;
+
+		do_prepare = false;
+	retry:
+		seq = read_seqcount_begin(&conf->gen_lock);
+		if (do_prepare)
+			prepare_to_wait(&conf->wait_for_overlap, &w,
+				TASK_UNINTERRUPTIBLE);
+
+		res = make_stripe_request(mddev, conf, &ctx, logical_sector,
+					  bi, seq);
+		if (res == STRIPE_FAIL)
+			break;
+
+		if (res == STRIPE_RETRY)
+			goto retry;
+
+		if (res == STRIPE_SCHEDULE_AND_RETRY) {
+			schedule();
+			do_prepare = true;
+			goto retry;
+		}
+	}
+
 	finish_wait(&conf->wait_for_overlap, &w);

 	if (rw == WRITE)