Commit 5fd750e8 authored by Israel Rukshin's avatar Israel Rukshin Committed by Greg Kroah-Hartman

nvmet-rdma: fix double free of rdma queue

[ Upstream commit 21f90243 ]

In case rdma accept fails at nvmet_rdma_queue_connect(), release work is
scheduled. Later on, a new RDMA CM event may arrive since we didn't
destroy the cm-id and call nvmet_rdma_queue_connect_fail(), which
schedule another release work. This will cause calling
nvmet_rdma_free_queue twice. To fix this we implicitly destroy the cm_id
with non-zero ret code, which guarantees that new rdma_cm events will
not arrive afterwards. Also add a qp pointer to nvmet_rdma_queue
structure, so we can use it when the cm_id pointer is NULL or was
destroyed.
Signed-off-by: default avatarIsrael Rukshin <israelr@mellanox.com>
Suggested-by: default avatarSagi Grimberg <sagi@grimberg.me>
Reviewed-by: default avatarMax Gurtovoy <maxg@mellanox.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent b73c7440
...@@ -83,6 +83,7 @@ enum nvmet_rdma_queue_state { ...@@ -83,6 +83,7 @@ enum nvmet_rdma_queue_state {
struct nvmet_rdma_queue { struct nvmet_rdma_queue {
struct rdma_cm_id *cm_id; struct rdma_cm_id *cm_id;
struct ib_qp *qp;
struct nvmet_port *port; struct nvmet_port *port;
struct ib_cq *cq; struct ib_cq *cq;
atomic_t sq_wr_avail; atomic_t sq_wr_avail;
...@@ -471,7 +472,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, ...@@ -471,7 +472,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
if (ndev->srq) if (ndev->srq)
ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
else else
ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL); ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
if (unlikely(ret)) if (unlikely(ret))
pr_err("post_recv cmd failed\n"); pr_err("post_recv cmd failed\n");
...@@ -510,7 +511,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) ...@@ -510,7 +511,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
if (rsp->n_rdma) { if (rsp->n_rdma) {
rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
queue->cm_id->port_num, rsp->req.sg, queue->cm_id->port_num, rsp->req.sg,
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
} }
...@@ -594,7 +595,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -594,7 +595,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(rsp->n_rdma <= 0); WARN_ON(rsp->n_rdma <= 0);
atomic_add(rsp->n_rdma, &queue->sq_wr_avail); atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
queue->cm_id->port_num, rsp->req.sg, queue->cm_id->port_num, rsp->req.sg,
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
rsp->n_rdma = 0; rsp->n_rdma = 0;
...@@ -737,7 +738,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) ...@@ -737,7 +738,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
} }
if (nvmet_rdma_need_data_in(rsp)) { if (nvmet_rdma_need_data_in(rsp)) {
if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp, if (rdma_rw_ctx_post(&rsp->rw, queue->qp,
queue->cm_id->port_num, &rsp->read_cqe, NULL)) queue->cm_id->port_num, &rsp->read_cqe, NULL))
nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
} else { } else {
...@@ -1020,6 +1021,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) ...@@ -1020,6 +1021,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
pr_err("failed to create_qp ret= %d\n", ret); pr_err("failed to create_qp ret= %d\n", ret);
goto err_destroy_cq; goto err_destroy_cq;
} }
queue->qp = queue->cm_id->qp;
atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr); atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);
...@@ -1048,11 +1050,10 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) ...@@ -1048,11 +1050,10 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
{ {
struct ib_qp *qp = queue->cm_id->qp; ib_drain_qp(queue->qp);
if (queue->cm_id)
ib_drain_qp(qp); rdma_destroy_id(queue->cm_id);
rdma_destroy_id(queue->cm_id); ib_destroy_qp(queue->qp);
ib_destroy_qp(qp);
ib_free_cq(queue->cq); ib_free_cq(queue->cq);
} }
...@@ -1286,9 +1287,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ...@@ -1286,9 +1287,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
if (ret) { if (ret) {
schedule_work(&queue->release_work); /*
/* Destroying rdma_cm id is not needed here */ * Don't destroy the cm_id in free path, as we implicitly
return 0; * destroy the cm_id here with non-zero ret code.
*/
queue->cm_id = NULL;
goto free_queue;
} }
mutex_lock(&nvmet_rdma_queue_mutex); mutex_lock(&nvmet_rdma_queue_mutex);
...@@ -1297,6 +1301,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ...@@ -1297,6 +1301,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
return 0; return 0;
free_queue:
nvmet_rdma_free_queue(queue);
put_device: put_device:
kref_put(&ndev->ref, nvmet_rdma_free_dev); kref_put(&ndev->ref, nvmet_rdma_free_dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment