Commit c6c23117 authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford

IB/hfi1: Add interlock between TID RDMA WRITE and other requests

This locking mechanism is designed to provent vavious memory corruption
scenarios from occurring when requests are pipelined, especially when
RDMA WRITE requests are interleaved with TID RDMA READ requests:
1. READ-AFTER-READ;
2. READ-AFTER-WRITE;
3. WRITE-AFTER-READ;
4. WRITE-AFTER-WRITE.
When memory corruption is likely, a request will be held back until
previous requests have been completed.
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 3c6cb20a
......@@ -173,6 +173,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
}
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
/* Check for tid write fence */
if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
hfi1_tid_rdma_ack_interlock(qp, e)) {
iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
goto bail;
}
if (e->opcode == OP(RDMA_READ_REQUEST)) {
/*
* If a RDMA read response is being resent and
......
......@@ -2179,6 +2179,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
req->state = TID_REQUEST_RESEND;
req->cur_seg = req->comp_seg;
}
qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
}
/* Re-process old requests.*/
if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
......@@ -3229,6 +3230,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
struct rvt_swqe *prev;
struct hfi1_qp_priv *priv = qp->priv;
u32 s_prev;
struct tid_rdma_request *req;
s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
prev = rvt_get_swqe_ptr(qp, s_prev);
......@@ -3240,14 +3242,28 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_RDMA_WRITE:
switch (prev->wr.opcode) {
case IB_WR_TID_RDMA_WRITE:
req = wqe_to_tid_req(prev);
if (req->ack_seg != req->total_segs)
goto interlock;
default:
break;
}
case IB_WR_RDMA_READ:
if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
break;
/* fall through */
case IB_WR_TID_RDMA_READ:
switch (prev->wr.opcode) {
case IB_WR_RDMA_READ:
if (qp->s_acked != qp->s_cur)
goto interlock;
break;
case IB_WR_TID_RDMA_WRITE:
req = wqe_to_tid_req(prev);
if (req->ack_seg != req->total_segs)
goto interlock;
default:
break;
}
......@@ -5157,7 +5173,9 @@ static int make_tid_rdma_ack(struct rvt_qp *qp,
e = &qp->s_ack_queue[qpriv->r_tid_ack];
req = ack_to_tid_req(e);
flow = req->acked_tail;
}
} else if (req->ack_seg == req->total_segs &&
qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
&bth2);
......@@ -5310,3 +5328,27 @@ bool hfi1_schedule_tid_send(struct rvt_qp *qp)
IOWAIT_PENDING_TID);
return false;
}
bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
{
struct rvt_ack_entry *prev;
struct tid_rdma_request *req;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
u32 s_prev;
s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
(qp->s_tail_ack_queue - 1);
prev = &qp->s_ack_queue[s_prev];
if ((e->opcode == TID_OP(READ_REQ) ||
e->opcode == OP(RDMA_READ_REQUEST)) &&
prev->opcode == TID_OP(WRITE_REQ)) {
req = ack_to_tid_req(prev);
if (req->ack_seg != req->total_segs) {
priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
return true;
}
}
return false;
}
......@@ -25,6 +25,7 @@
* s_flags, there are no collisions.
*
* HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
* HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
*/
#define HFI1_S_TID_BUSY_SET BIT(0)
/* BIT(1) reserved for RVT_S_BUSY. */
......@@ -32,9 +33,15 @@
/* BIT(3) reserved for RVT_S_RESP_PENDING. */
/* BIT(4) reserved for RVT_S_ACK_PENDING. */
#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
#define HFI1_R_TID_WAIT_INTERLCK BIT(6)
/* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
/* BIT(16) reserved for RVT_S_SEND_ONE */
#define HFI1_S_TID_RETRY_TIMER BIT(17)
/* BIT(18) reserved for RVT_S_ECN. */
#define HFI1_R_TID_SW_PSN BIT(19)
/* BIT(26) reserved for HFI1_S_WAIT_HALT */
/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
/*
* Unlike regular IB RDMA VERBS, which do not require an entry
......@@ -309,4 +316,6 @@ void _hfi1_do_tid_send(struct work_struct *work);
bool hfi1_schedule_tid_send(struct rvt_qp *qp);
bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);
#endif /* HFI1_TID_RDMA_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment