Commit 48a615dc authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford

IB/hfi1: Integrate OPFN into RC transactions

OPFN parameter negotiation allows a pair of connected RC QPs to exchange
a set of parameters in succession. This negotiation does not commence
till the first ULP request. Because OPFN operations are operations
private to the driver, they do not generate user completions or put the
QP into error when they run out of retries. This patch integrates the
OPFN protocol into the transactions of an RC QP.
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarAshutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent ddf922c3
......@@ -1498,6 +1498,12 @@ static int __init hfi1_mod_init(void)
/* sanitize link CRC options */
link_crc_mask &= SUPPORTED_CRCS;
ret = opfn_init();
if (ret < 0) {
pr_err("Failed to allocate opfn_wq");
goto bail_dev;
}
/*
* These must be called before the driver is registered with
* the PCI subsystem.
......@@ -1528,6 +1534,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
opfn_exit();
node_affinity_destroy_all();
hfi1_dbg_exit();
......
......@@ -132,6 +132,12 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
.qpt_support = BIT(IB_QPT_RC),
},
[IB_WR_OPFN] = {
.length = sizeof(struct ib_atomic_wr),
.qpt_support = BIT(IB_QPT_RC),
.flags = RVT_OPERATION_USE_RESERVE,
},
};
static void flush_list_head(struct list_head *l)
......@@ -285,6 +291,8 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
qp_set_16b(qp);
}
opfn_qp_init(qp, attr, attr_mask);
}
/**
......@@ -696,6 +704,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
hfi1_qp_priv_tid_free(rdi, qp);
kfree(priv->s_ahg);
kfree(priv);
}
......@@ -751,6 +760,10 @@ void notify_qp_reset(struct rvt_qp *qp)
{
qp->r_adefered = 0;
clear_ahg(qp);
/* Clear any OPFN state */
if (qp->ibqp.qp_type == IB_QPT_RC)
opfn_conn_error(qp);
}
/*
......
......@@ -57,6 +57,10 @@
/* cut down ridiculously long IB macro names */
#define OP(x) RC_OP(x)
static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct hfi1_ibport *ibp);
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
......@@ -517,10 +521,14 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail;
}
qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
}
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
/* FALLTHROUGH */
case IB_WR_OPFN:
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_OPFN) {
qp->s_state = OP(COMPARE_SWAP);
put_ib_ateth_swap(wqe->atomic_wr.swap,
&ohdr->u.atomic_eth);
......@@ -1040,6 +1048,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
*/
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
{
struct hfi1_qp_priv *priv = qp->priv;
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct hfi1_ibport *ibp;
......@@ -1050,8 +1059,26 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
hfi1_migrate_qp(qp);
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
/*
* We need special handling for the OPFN request WQEs as
* they are not allowed to generate real user errors
*/
if (wqe->wr.opcode == IB_WR_OPFN) {
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
/*
* Call opfn_conn_reply() with capcode and
* remaining data as 0 to close out the
* current request
*/
opfn_conn_reply(qp, priv->opfn.curr);
wqe = do_rc_completion(qp, wqe, ibp);
qp->s_flags &= ~RVT_S_WAIT_ACK;
} else {
rvt_send_complete(qp, wqe,
IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
return;
} else { /* need to handle delayed completion */
return;
......@@ -1363,6 +1390,9 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
u64 *vaddr = wqe->sg_list[0].vaddr;
*vaddr = val;
}
if (wqe->wr.opcode == IB_WR_OPFN)
opfn_conn_reply(qp, val);
if (qp->s_num_rd_atomic &&
(wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
......@@ -2068,6 +2098,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
return;
fecn = process_ecn(qp, packet);
opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1]));
/*
* Process responses (ACKs) before anything else. Note that the
......@@ -2363,15 +2394,18 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
struct ib_atomic_eth *ateth;
struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth;
u64 vaddr = get_ib_ateth_vaddr(ateth);
bool opfn = opcode == OP(COMPARE_SWAP) &&
vaddr == HFI1_VERBS_E_ATOMIC_VADDR;
struct rvt_ack_entry *e;
u64 vaddr;
atomic64_t *maddr;
u64 sdata;
u32 rkey;
u8 next;
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
!opfn))
goto nack_inv;
next = qp->r_head_ack_queue + 1;
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
......@@ -2387,8 +2421,11 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
vaddr = get_ib_ateth_vaddr(ateth);
/* Process OPFN special virtual address */
if (opfn) {
opfn_conn_response(qp, e, ateth);
goto ack;
}
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
......@@ -2407,6 +2444,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
ack:
e->opcode = opcode;
e->sent = 0;
e->psn = psn;
......
......@@ -246,5 +246,16 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
qpriv->rcd = qp_to_rcd(rdi, qp);
spin_lock_init(&qpriv->opfn.lock);
INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
return 0;
}
void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA))
cancel_work_sync(&priv->opfn.opfn_work);
}
......@@ -35,5 +35,6 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
struct ib_qp_init_attr *init_attr);
void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
#endif /* HFI1_TID_RDMA_H */
......@@ -1735,6 +1735,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
dd->verbs_dev.rdi.dparms.reserved_operations = 1;
dd->verbs_dev.rdi.dparms.extra_rdma_atomic = 1;
/* post send table */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment