nvme-rdma: teardown admin/io queues once on error recovery

Relying on the queue state while tearing down on every reconnect attempt is not a good design. We should do it once in err_work and simply try to establish the queues for each reconnect attempt. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de>

nvme-rdma: teardown admin/io queues once on error recovery
Relying on the queue state while tearing down on every reconnect attempt is not a good design. We should do it once in err_work and simply try to establish the queues for each reconnect attempt. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de>
5e1fe61d · Sagi Grimberg · Christoph Hellwig · 0fc176df · 5e1fe61d
Commit 5e1fe61d authored Oct 11, 2017 by Sagi Grimberg Committed by Christoph Hellwig Oct 18, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 15 deletions

drivers/nvme/host/rdma.c drivers/nvme/host/rdma.c +12 -15

No files found.
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -925,10 +925,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	++ctrl->ctrl.nr_reconnects;
-	if (ctrl->ctrl.queue_count > 1)
-		nvme_rdma_destroy_io_queues(ctrl, false);
-	nvme_rdma_destroy_admin_queue(ctrl, false);
 	ret = nvme_rdma_configure_admin_queue(ctrl, false);
 	if (ret)
 		goto requeue;
@@ -936,7 +932,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	if (ctrl->ctrl.queue_count > 1) {
 		ret = nvme_rdma_configure_io_queues(ctrl, false);
 		if (ret)
-			goto requeue;
+			goto destroy_admin;
 	}
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -946,14 +942,17 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 		return;
 	}
-	ctrl->ctrl.nr_reconnects = 0;
 	nvme_start_ctrl(&ctrl->ctrl);
-	dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
+	dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
+			ctrl->ctrl.nr_reconnects);
+	ctrl->ctrl.nr_reconnects = 0;
 	return;
+destroy_admin:
+	nvme_rdma_destroy_admin_queue(ctrl, false);
 requeue:
 	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
 			ctrl->ctrl.nr_reconnects);
@@ -969,17 +968,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
-		nvme_rdma_stop_io_queues(ctrl);
-	}
-	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
-	nvme_rdma_stop_queue(&ctrl->queues[0]);
-	/* We must take care of fastfail/requeue all our inflight requests */
-	if (ctrl->ctrl.queue_count > 1)
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
+		nvme_rdma_destroy_io_queues(ctrl, false);
+	}
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
+	nvme_rdma_destroy_admin_queue(ctrl, false);
 	/*
 	 * queues are not a live anymore, so restart the queues to fail fast