Commit 3c6cb20a authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford

IB/hfi1: Add TID RDMA WRITE functionality into RDMA verbs

This patch integrates TID RDMA WRITE protocol into normal RDMA verbs
framework. The TID RDMA WRITE protocol is an end-to-end protocol
between the hfi1 drivers on two OPA nodes that converts a qualified
RDMA WRITE request into a TID RDMA WRITE request to avoid data copying
on the responder side.
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 572f0c33
...@@ -138,6 +138,12 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { ...@@ -138,6 +138,12 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
.flags = RVT_OPERATION_USE_RESERVE, .flags = RVT_OPERATION_USE_RESERVE,
}, },
[IB_WR_TID_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
.qpt_support = BIT(IB_QPT_RC),
.flags = RVT_OPERATION_IGN_RNR_CNT,
},
}; };
static void flush_list_head(struct list_head *l) static void flush_list_head(struct list_head *l)
...@@ -780,6 +786,7 @@ void quiesce_qp(struct rvt_qp *qp) ...@@ -780,6 +786,7 @@ void quiesce_qp(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
hfi1_del_tid_reap_timer(qp); hfi1_del_tid_reap_timer(qp);
hfi1_del_tid_retry_timer(qp);
iowait_sdma_drain(&priv->s_iowait); iowait_sdma_drain(&priv->s_iowait);
qp_pio_drain(qp); qp_pio_drain(qp);
flush_tx_list(qp); flush_tx_list(qp);
......
This diff is collapsed.
...@@ -3205,6 +3205,20 @@ void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp) ...@@ -3205,6 +3205,20 @@ void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
do { do {
struct hfi1_swqe_priv *priv = wqe->priv; struct hfi1_swqe_priv *priv = wqe->priv;
ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
} while (!ret);
}
for (i = qp->s_acked_ack_queue; i != qp->r_head_ack_queue;) {
struct rvt_ack_entry *e = &qp->s_ack_queue[i];
if (++i == rvt_max_atomic(ib_to_rvt(qp->ibqp.device)))
i = 0;
/* Free only locally allocated TID entries */
if (e->opcode != TID_OP(WRITE_REQ))
continue;
do {
struct hfi1_ack_priv *priv = e->priv;
ret = hfi1_kern_exp_rcv_clear(&priv->tid_req); ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
} while (!ret); } while (!ret);
} }
......
...@@ -1126,7 +1126,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags) ...@@ -1126,7 +1126,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
0xffffffull), 0xffffffull),
psn = val & mask; psn = val & mask;
if (expct) if (expct)
psn = (psn & ~BTH_SEQ_MASK) | ((psn + frags) & BTH_SEQ_MASK); psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
else else
psn = psn + frags; psn = psn + frags;
return psn & mask; return psn & mask;
......
...@@ -161,6 +161,7 @@ MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the ...@@ -161,6 +161,7 @@ MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the
*/ */
const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND, [IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND, [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
...@@ -203,6 +204,12 @@ const u8 hdr_len_by_opcode[256] = { ...@@ -203,6 +204,12 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36, [IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36, [IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36,
/* UC */ /* UC */
[IB_OPCODE_UC_SEND_FIRST] = 12 + 8, [IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
[IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
...@@ -248,8 +255,14 @@ static const opcode_handler opcode_handler_tbl[256] = { ...@@ -248,8 +255,14 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
/* TID RDMA has separate handlers for different opcodes.*/ /* TID RDMA has separate handlers for different opcodes.*/
[IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req,
[IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp,
[IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data,
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data,
[IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req, [IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req,
[IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp, [IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp,
[IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync,
[IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack,
/* UC */ /* UC */
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
...@@ -1332,7 +1345,9 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) ...@@ -1332,7 +1345,9 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
rdi->dparms.props.max_mr_size = U64_MAX; rdi->dparms.props.max_mr_size = U64_MAX;
rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
rdi->dparms.props.max_qp = hfi1_max_qps; rdi->dparms.props.max_qp = hfi1_max_qps;
rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; rdi->dparms.props.max_qp_wr =
(hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ?
HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs);
rdi->dparms.props.max_send_sge = hfi1_max_sges; rdi->dparms.props.max_send_sge = hfi1_max_sges;
rdi->dparms.props.max_recv_sge = hfi1_max_sges; rdi->dparms.props.max_recv_sge = hfi1_max_sges;
rdi->dparms.props.max_sge_rd = hfi1_max_sges; rdi->dparms.props.max_sge_rd = hfi1_max_sges;
......
...@@ -193,6 +193,7 @@ struct hfi1_qp_priv { ...@@ -193,6 +193,7 @@ struct hfi1_qp_priv {
u32 r_tid_ack; /* the TID RDMA request to be ACK'ed */ u32 r_tid_ack; /* the TID RDMA request to be ACK'ed */
u32 r_tid_alloc; /* Request for which we are allocating resources */ u32 r_tid_alloc; /* Request for which we are allocating resources */
u32 pending_tid_w_segs; /* Num of pending tid write segments */ u32 pending_tid_w_segs; /* Num of pending tid write segments */
u32 pending_tid_w_resp; /* Num of pending tid write responses */
u32 alloc_w_segs; /* Number of segments for which write */ u32 alloc_w_segs; /* Number of segments for which write */
/* resources have been allocated for this QP */ /* resources have been allocated for this QP */
......
...@@ -246,6 +246,7 @@ struct rvt_ack_entry { ...@@ -246,6 +246,7 @@ struct rvt_ack_entry {
#define RVT_OPERATION_ATOMIC_SGE 0x00000004 #define RVT_OPERATION_ATOMIC_SGE 0x00000004
#define RVT_OPERATION_LOCAL 0x00000008 #define RVT_OPERATION_LOCAL 0x00000008
#define RVT_OPERATION_USE_RESERVE 0x00000010 #define RVT_OPERATION_USE_RESERVE 0x00000010
#define RVT_OPERATION_IGN_RNR_CNT 0x00000020
#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment