Commit d2fe99e8 authored by Kumar Sanghvi's avatar Kumar Sanghvi Committed by Roland Dreier

RDMA/cxgb4: Add support for MPAv2 Enhanced RDMA Negotiation

This patch adds support for Enhanced RDMA Connection Establishment
(draft-ietf-storm-mpa-peer-connect-06), aka MPAv2.  Details of draft
can be obtained from:
<http://www.ietf.org/id/draft-ietf-storm-mpa-peer-connect-06.txt>

The patch updates the following functions for initiator perspective:
 - send_mpa_request
 - process_mpa_reply
 - post_terminate for TERM error codes
 - destroy_qp for TERM related change
 - adds layer/etype/ecode to c4iw_qp_attrs for sending with TERM
 - peer_abort for retrying connection attempt with MPA_v1 message
 - added c4iw_reconnect function

The patch updates the following functions for responder perspective:
 - process_mpa_request
 - send_mpa_reply
 - c4iw_accept_cr
 - passes ird/ord to upper layers
Signed-off-by: default avatarKumar Sanghvi <kumaras@chelsio.com>
Reviewed-by: default avatarSteve Wise <swise@opengridcomputing.com>
Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
parent 56da00fc
...@@ -103,7 +103,8 @@ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " ...@@ -103,7 +103,8 @@ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
static int mpa_rev = 1; static int mpa_rev = 1;
module_param(mpa_rev, int, 0644); module_param(mpa_rev, int, 0644);
MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
"1 is spec compliant. (default=1)"); "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
" compliant (default=1)");
static int markers_enabled; static int markers_enabled;
module_param(markers_enabled, int, 0644); module_param(markers_enabled, int, 0644);
...@@ -497,17 +498,21 @@ static int send_connect(struct c4iw_ep *ep) ...@@ -497,17 +498,21 @@ static int send_connect(struct c4iw_ep *ep)
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
} }
static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb) static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
u8 mpa_rev_to_use)
{ {
int mpalen, wrlen; int mpalen, wrlen;
struct fw_ofld_tx_data_wr *req; struct fw_ofld_tx_data_wr *req;
struct mpa_message *mpa; struct mpa_message *mpa;
struct mpa_v2_conn_params mpa_v2_params;
PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
BUG_ON(skb_cloned(skb)); BUG_ON(skb_cloned(skb));
mpalen = sizeof(*mpa) + ep->plen; mpalen = sizeof(*mpa) + ep->plen;
if (mpa_rev_to_use == 2)
mpalen += sizeof(struct mpa_v2_conn_params);
wrlen = roundup(mpalen + sizeof *req, 16); wrlen = roundup(mpalen + sizeof *req, 16);
skb = get_skb(skb, wrlen, GFP_KERNEL); skb = get_skb(skb, wrlen, GFP_KERNEL);
if (!skb) { if (!skb) {
...@@ -533,12 +538,39 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb) ...@@ -533,12 +538,39 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb)
mpa = (struct mpa_message *)(req + 1); mpa = (struct mpa_message *)(req + 1);
memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
mpa->flags = (crc_enabled ? MPA_CRC : 0) | mpa->flags = (crc_enabled ? MPA_CRC : 0) |
(markers_enabled ? MPA_MARKERS : 0); (markers_enabled ? MPA_MARKERS : 0) |
(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
mpa->private_data_size = htons(ep->plen); mpa->private_data_size = htons(ep->plen);
mpa->revision = mpa_rev; mpa->revision = mpa_rev_to_use;
if (mpa_rev_to_use == 1)
ep->tried_with_mpa_v1 = 1;
if (mpa_rev_to_use == 2) {
mpa->private_data_size +=
htons(sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
if (peer2peer) {
mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
mpa_v2_params.ord |=
htons(MPA_V2_RDMA_WRITE_RTR);
else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
mpa_v2_params.ord |=
htons(MPA_V2_RDMA_READ_RTR);
}
memcpy(mpa->private_data, &mpa_v2_params,
sizeof(struct mpa_v2_conn_params));
if (ep->plen) if (ep->plen)
memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); memcpy(mpa->private_data +
sizeof(struct mpa_v2_conn_params),
ep->mpa_pkt + sizeof(*mpa), ep->plen);
} else
if (ep->plen)
memcpy(mpa->private_data,
ep->mpa_pkt + sizeof(*mpa), ep->plen);
/* /*
* Reference the mpa skb. This ensures the data area * Reference the mpa skb. This ensures the data area
...@@ -562,10 +594,13 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) ...@@ -562,10 +594,13 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct fw_ofld_tx_data_wr *req; struct fw_ofld_tx_data_wr *req;
struct mpa_message *mpa; struct mpa_message *mpa;
struct sk_buff *skb; struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen; mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
mpalen += sizeof(struct mpa_v2_conn_params);
wrlen = roundup(mpalen + sizeof *req, 16); wrlen = roundup(mpalen + sizeof *req, 16);
skb = get_skb(NULL, wrlen, GFP_KERNEL); skb = get_skb(NULL, wrlen, GFP_KERNEL);
...@@ -595,8 +630,29 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) ...@@ -595,8 +630,29 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
mpa->flags = MPA_REJECT; mpa->flags = MPA_REJECT;
mpa->revision = mpa_rev; mpa->revision = mpa_rev;
mpa->private_data_size = htons(plen); mpa->private_data_size = htons(plen);
if (plen)
memcpy(mpa->private_data, pdata, plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
mpa->private_data_size +=
htons(sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons(((u16)ep->ird) |
(peer2peer ? MPA_V2_PEER2PEER_MODEL :
0));
mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
(p2p_type ==
FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
MPA_V2_RDMA_WRITE_RTR : p2p_type ==
FW_RI_INIT_P2PTYPE_READ_REQ ?
MPA_V2_RDMA_READ_RTR : 0) : 0));
memcpy(mpa->private_data, &mpa_v2_params,
sizeof(struct mpa_v2_conn_params));
if (ep->plen)
memcpy(mpa->private_data +
sizeof(struct mpa_v2_conn_params), pdata, plen);
} else
if (plen)
memcpy(mpa->private_data, pdata, plen);
/* /*
* Reference the mpa skb again. This ensures the data area * Reference the mpa skb again. This ensures the data area
...@@ -617,10 +673,13 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) ...@@ -617,10 +673,13 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct fw_ofld_tx_data_wr *req; struct fw_ofld_tx_data_wr *req;
struct mpa_message *mpa; struct mpa_message *mpa;
struct sk_buff *skb; struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen; mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
mpalen += sizeof(struct mpa_v2_conn_params);
wrlen = roundup(mpalen + sizeof *req, 16); wrlen = roundup(mpalen + sizeof *req, 16);
skb = get_skb(NULL, wrlen, GFP_KERNEL); skb = get_skb(NULL, wrlen, GFP_KERNEL);
...@@ -649,10 +708,36 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) ...@@ -649,10 +708,36 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
(markers_enabled ? MPA_MARKERS : 0); (markers_enabled ? MPA_MARKERS : 0);
mpa->revision = mpa_rev; mpa->revision = ep->mpa_attr.version;
mpa->private_data_size = htons(plen); mpa->private_data_size = htons(plen);
if (plen)
memcpy(mpa->private_data, pdata, plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
mpa->private_data_size +=
htons(sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
if (peer2peer && (ep->mpa_attr.p2p_type !=
FW_RI_INIT_P2PTYPE_DISABLED)) {
mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
mpa_v2_params.ord |=
htons(MPA_V2_RDMA_WRITE_RTR);
else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
mpa_v2_params.ord |=
htons(MPA_V2_RDMA_READ_RTR);
}
memcpy(mpa->private_data, &mpa_v2_params,
sizeof(struct mpa_v2_conn_params));
if (ep->plen)
memcpy(mpa->private_data +
sizeof(struct mpa_v2_conn_params), pdata, plen);
} else
if (plen)
memcpy(mpa->private_data, pdata, plen);
/* /*
* Reference the mpa skb. This ensures the data area * Reference the mpa skb. This ensures the data area
...@@ -695,7 +780,10 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) ...@@ -695,7 +780,10 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
/* start MPA negotiation */ /* start MPA negotiation */
send_flowc(ep, NULL); send_flowc(ep, NULL);
send_mpa_req(ep, skb); if (ep->retry_with_mpa_v1)
send_mpa_req(ep, skb, 1);
else
send_mpa_req(ep, skb, mpa_rev);
return 0; return 0;
} }
...@@ -769,8 +857,19 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) ...@@ -769,8 +857,19 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
event.remote_addr = ep->com.remote_addr; event.remote_addr = ep->com.remote_addr;
if ((status == 0) || (status == -ECONNREFUSED)) { if ((status == 0) || (status == -ECONNREFUSED)) {
event.private_data_len = ep->plen; if (!ep->tried_with_mpa_v1) {
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); /* this means MPA_v2 is used */
event.private_data_len = ep->plen -
sizeof(struct mpa_v2_conn_params);
event.private_data = ep->mpa_pkt +
sizeof(struct mpa_message) +
sizeof(struct mpa_v2_conn_params);
} else {
/* this means MPA_v1 is used */
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt +
sizeof(struct mpa_message);
}
} }
PDBG("%s ep %p tid %u status %d\n", __func__, ep, PDBG("%s ep %p tid %u status %d\n", __func__, ep,
...@@ -793,9 +892,22 @@ static void connect_request_upcall(struct c4iw_ep *ep) ...@@ -793,9 +892,22 @@ static void connect_request_upcall(struct c4iw_ep *ep)
event.event = IW_CM_EVENT_CONNECT_REQUEST; event.event = IW_CM_EVENT_CONNECT_REQUEST;
event.local_addr = ep->com.local_addr; event.local_addr = ep->com.local_addr;
event.remote_addr = ep->com.remote_addr; event.remote_addr = ep->com.remote_addr;
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
event.provider_data = ep; event.provider_data = ep;
if (!ep->tried_with_mpa_v1) {
/* this means MPA_v2 is used */
event.ord = ep->ord;
event.ird = ep->ird;
event.private_data_len = ep->plen -
sizeof(struct mpa_v2_conn_params);
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
sizeof(struct mpa_v2_conn_params);
} else {
/* this means MPA_v1 is used. Send max supported */
event.ord = c4iw_max_read_depth;
event.ird = c4iw_max_read_depth;
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
}
if (state_read(&ep->parent_ep->com) != DEAD) { if (state_read(&ep->parent_ep->com) != DEAD) {
c4iw_get_ep(&ep->com); c4iw_get_ep(&ep->com);
ep->parent_ep->com.cm_id->event_handler( ep->parent_ep->com.cm_id->event_handler(
...@@ -813,6 +925,8 @@ static void established_upcall(struct c4iw_ep *ep) ...@@ -813,6 +925,8 @@ static void established_upcall(struct c4iw_ep *ep)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event)); memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED; event.event = IW_CM_EVENT_ESTABLISHED;
event.ird = ep->ird;
event.ord = ep->ord;
if (ep->com.cm_id) { if (ep->com.cm_id) {
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->event_handler(ep->com.cm_id, &event);
...@@ -848,7 +962,10 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) ...@@ -848,7 +962,10 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
{ {
struct mpa_message *mpa; struct mpa_message *mpa;
struct mpa_v2_conn_params *mpa_v2_params;
u16 plen; u16 plen;
u16 resp_ird, resp_ord;
u8 rtr_mismatch = 0, insuff_ird = 0;
struct c4iw_qp_attributes attrs; struct c4iw_qp_attributes attrs;
enum c4iw_qp_attr_mask mask; enum c4iw_qp_attr_mask mask;
int err; int err;
...@@ -888,7 +1005,9 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) ...@@ -888,7 +1005,9 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
mpa = (struct mpa_message *) ep->mpa_pkt; mpa = (struct mpa_message *) ep->mpa_pkt;
/* Validate MPA header. */ /* Validate MPA header. */
if (mpa->revision != mpa_rev) { if (mpa->revision > mpa_rev) {
printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
" Received = %d\n", __func__, mpa_rev, mpa->revision);
err = -EPROTO; err = -EPROTO;
goto err; goto err;
} }
...@@ -938,13 +1057,66 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) ...@@ -938,13 +1057,66 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa_rev; ep->mpa_attr.version = mpa->revision;
ep->mpa_attr.p2p_type = peer2peer ? p2p_type : ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
FW_RI_INIT_P2PTYPE_DISABLED;
if (mpa->revision == 2) {
ep->mpa_attr.enhanced_rdma_conn =
mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
if (ep->mpa_attr.enhanced_rdma_conn) {
mpa_v2_params = (struct mpa_v2_conn_params *)
(ep->mpa_pkt + sizeof(*mpa));
resp_ird = ntohs(mpa_v2_params->ird) &
MPA_V2_IRD_ORD_MASK;
resp_ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
/*
* This is a double-check. Ideally, below checks are
* not required since ird/ord stuff has been taken
* care of in c4iw_accept_cr
*/
if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
err = -ENOMEM;
ep->ird = resp_ord;
ep->ord = resp_ird;
insuff_ird = 1;
}
if (ntohs(mpa_v2_params->ird) &
MPA_V2_PEER2PEER_MODEL) {
if (ntohs(mpa_v2_params->ord) &
MPA_V2_RDMA_WRITE_RTR)
ep->mpa_attr.p2p_type =
FW_RI_INIT_P2PTYPE_RDMA_WRITE;
else if (ntohs(mpa_v2_params->ord) &
MPA_V2_RDMA_READ_RTR)
ep->mpa_attr.p2p_type =
FW_RI_INIT_P2PTYPE_READ_REQ;
}
}
} else if (mpa->revision == 1)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
"xmit_marker_enabled=%d, version=%d\n", __func__, "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, "%d\n", __func__, ep->mpa_attr.crc_enabled,
ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); ep->mpa_attr.recv_marker_enabled,
ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
ep->mpa_attr.p2p_type, p2p_type);
/*
* If responder's RTR does not match with that of initiator, assign
* FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
* generated when moving QP to RTS state.
* A TERM message will be sent after QP has moved to RTS state
*/
if ((ep->mpa_attr.version == 2) &&
(ep->mpa_attr.p2p_type != p2p_type)) {
ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
rtr_mismatch = 1;
}
attrs.mpa_attr = ep->mpa_attr; attrs.mpa_attr = ep->mpa_attr;
attrs.max_ird = ep->ird; attrs.max_ird = ep->ird;
...@@ -961,6 +1133,39 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) ...@@ -961,6 +1133,39 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
ep->com.qp, mask, &attrs, 1); ep->com.qp, mask, &attrs, 1);
if (err) if (err)
goto err; goto err;
/*
* If responder's RTR requirement did not match with what initiator
* supports, generate TERM message
*/
if (rtr_mismatch) {
printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
err = -ENOMEM;
goto out;
}
/*
* Generate TERM if initiator IRD is not sufficient for responder
* provided ORD. Currently, we do the same behaviour even when
* responder provided IRD is also not sufficient as regards to
* initiator ORD.
*/
if (insuff_ird) {
printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
__func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
err = -ENOMEM;
goto out;
}
goto out; goto out;
err: err:
state_set(&ep->com, ABORTING); state_set(&ep->com, ABORTING);
...@@ -973,6 +1178,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) ...@@ -973,6 +1178,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
{ {
struct mpa_message *mpa; struct mpa_message *mpa;
struct mpa_v2_conn_params *mpa_v2_params;
u16 plen; u16 plen;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
...@@ -1013,7 +1219,9 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ...@@ -1013,7 +1219,9 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
/* /*
* Validate MPA Header. * Validate MPA Header.
*/ */
if (mpa->revision != mpa_rev) { if (mpa->revision > mpa_rev) {
printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
" Received = %d\n", __func__, mpa_rev, mpa->revision);
abort_connection(ep, skb, GFP_KERNEL); abort_connection(ep, skb, GFP_KERNEL);
return; return;
} }
...@@ -1056,9 +1264,37 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ...@@ -1056,9 +1264,37 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa_rev; ep->mpa_attr.version = mpa->revision;
ep->mpa_attr.p2p_type = peer2peer ? p2p_type : if (mpa->revision == 1)
FW_RI_INIT_P2PTYPE_DISABLED; ep->tried_with_mpa_v1 = 1;
ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
if (mpa->revision == 2) {
ep->mpa_attr.enhanced_rdma_conn =
mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
if (ep->mpa_attr.enhanced_rdma_conn) {
mpa_v2_params = (struct mpa_v2_conn_params *)
(ep->mpa_pkt + sizeof(*mpa));
ep->ird = ntohs(mpa_v2_params->ird) &
MPA_V2_IRD_ORD_MASK;
ep->ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
if (peer2peer) {
if (ntohs(mpa_v2_params->ord) &
MPA_V2_RDMA_WRITE_RTR)
ep->mpa_attr.p2p_type =
FW_RI_INIT_P2PTYPE_RDMA_WRITE;
else if (ntohs(mpa_v2_params->ord) &
MPA_V2_RDMA_READ_RTR)
ep->mpa_attr.p2p_type =
FW_RI_INIT_P2PTYPE_READ_REQ;
}
}
} else if (mpa->revision == 1)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
"xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__, "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
...@@ -1550,6 +1786,112 @@ static int is_neg_adv_abort(unsigned int status) ...@@ -1550,6 +1786,112 @@ static int is_neg_adv_abort(unsigned int status)
status == CPL_ERR_PERSIST_NEG_ADVICE; status == CPL_ERR_PERSIST_NEG_ADVICE;
} }
static int c4iw_reconnect(struct c4iw_ep *ep)
{
int err = 0;
struct rtable *rt;
struct net_device *pdev;
struct neighbour *neigh;
int step;
PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
init_timer(&ep->timer);
/*
* Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
if (ep->atid == -1) {
printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
err = -ENOMEM;
goto fail2;
}
/* find a route */
rt = find_route(ep->com.dev,
ep->com.cm_id->local_addr.sin_addr.s_addr,
ep->com.cm_id->remote_addr.sin_addr.s_addr,
ep->com.cm_id->local_addr.sin_port,
ep->com.cm_id->remote_addr.sin_port, 0);
if (!rt) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
ep->dst = &rt->dst;
neigh = dst_get_neighbour(ep->dst);
/* get a l2t entry */
if (neigh->dev->flags & IFF_LOOPBACK) {
PDBG("%s LOOPBACK\n", __func__);
pdev = ip_dev_find(&init_net,
ep->com.cm_id->remote_addr.sin_addr.s_addr);
ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
neigh, pdev, 0);
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
step = ep->com.dev->rdev.lldi.ntxq /
ep->com.dev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(pdev) * step;
step = ep->com.dev->rdev.lldi.nrxq /
ep->com.dev->rdev.lldi.nchan;
ep->ctrlq_idx = cxgb4_port_idx(pdev);
ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
cxgb4_port_idx(pdev) * step];
dev_put(pdev);
} else {
ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
neigh, neigh->dev, 0);
ep->mtu = dst_mtu(ep->dst);
ep->tx_chan = cxgb4_port_chan(neigh->dev);
ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1;
step = ep->com.dev->rdev.lldi.ntxq /
ep->com.dev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(neigh->dev) * step;
ep->ctrlq_idx = cxgb4_port_idx(neigh->dev);
step = ep->com.dev->rdev.lldi.nrxq /
ep->com.dev->rdev.lldi.nchan;
ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
cxgb4_port_idx(neigh->dev) * step];
}
if (!ep->l2t) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
err = -ENOMEM;
goto fail4;
}
PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
__func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
ep->l2t->idx);
state_set(&ep->com, CONNECTING);
ep->tos = 0;
/* send connect request to rnic */
err = send_connect(ep);
if (!err)
goto out;
cxgb4_l2t_release(ep->l2t);
fail4:
dst_release(ep->dst);
fail3:
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
fail2:
/*
* remember to send notification to upper layer.
* We are in here so the upper layer is not aware that this is
* re-connect attempt and so, upper layer is still waiting for
* response of 1st connect request.
*/
connect_reply_upcall(ep, -ECONNRESET);
c4iw_put_ep(&ep->com);
out:
return err;
}
static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
{ {
struct cpl_abort_req_rss *req = cplhdr(skb); struct cpl_abort_req_rss *req = cplhdr(skb);
...@@ -1573,8 +1915,11 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) ...@@ -1573,8 +1915,11 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
/* /*
* Wake up any threads in rdma_init() or rdma_fini(). * Wake up any threads in rdma_init() or rdma_fini().
* However, this is not needed if com state is just
* MPA_REQ_SENT
*/ */
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); if (ep->com.state != MPA_REQ_SENT)
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
mutex_lock(&ep->com.mutex); mutex_lock(&ep->com.mutex);
switch (ep->com.state) { switch (ep->com.state) {
...@@ -1585,7 +1930,21 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) ...@@ -1585,7 +1930,21 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
break; break;
case MPA_REQ_SENT: case MPA_REQ_SENT:
stop_ep_timer(ep); stop_ep_timer(ep);
connect_reply_upcall(ep, -ECONNRESET); if (mpa_rev == 2 && ep->tried_with_mpa_v1)
connect_reply_upcall(ep, -ECONNRESET);
else {
/*
* we just don't send notification upwards because we
* want to retry with mpa_v1 without upper layers even
* knowing it.
*
* do some housekeeping so as to re-initiate the
* connection
*/
PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
mpa_rev);
ep->retry_with_mpa_v1 = 1;
}
break; break;
case MPA_REP_SENT: case MPA_REP_SENT:
break; break;
...@@ -1621,7 +1980,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) ...@@ -1621,7 +1980,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
dst_confirm(ep->dst); dst_confirm(ep->dst);
if (ep->com.state != ABORTING) { if (ep->com.state != ABORTING) {
__state_set(&ep->com, DEAD); __state_set(&ep->com, DEAD);
release = 1; /* we don't release if we want to retry with mpa_v1 */
if (!ep->retry_with_mpa_v1)
release = 1;
} }
mutex_unlock(&ep->com.mutex); mutex_unlock(&ep->com.mutex);
...@@ -1641,6 +2002,15 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) ...@@ -1641,6 +2002,15 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
out: out:
if (release) if (release)
release_ep_resources(ep); release_ep_resources(ep);
/* retry with mpa-v1 */
if (ep && ep->retry_with_mpa_v1) {
cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
c4iw_reconnect(ep);
}
return 0; return 0;
} }
...@@ -1792,18 +2162,40 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ...@@ -1792,18 +2162,40 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto err; goto err;
} }
cm_id->add_ref(cm_id); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
ep->com.cm_id = cm_id; if (conn_param->ord > ep->ird) {
ep->com.qp = qp; ep->ird = conn_param->ird;
ep->ord = conn_param->ord;
send_mpa_reject(ep, conn_param->private_data,
conn_param->private_data_len);
abort_connection(ep, NULL, GFP_KERNEL);
err = -ENOMEM;
goto err;
}
if (conn_param->ird > ep->ord) {
if (!ep->ord)
conn_param->ird = 1;
else {
abort_connection(ep, NULL, GFP_KERNEL);
err = -ENOMEM;
goto err;
}
}
}
ep->ird = conn_param->ird; ep->ird = conn_param->ird;
ep->ord = conn_param->ord; ep->ord = conn_param->ord;
if (peer2peer && ep->ird == 0) if (ep->mpa_attr.version != 2)
ep->ird = 1; if (peer2peer && ep->ird == 0)
ep->ird = 1;
PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
ep->com.qp = qp;
/* bind QP to EP and move to RTS */ /* bind QP to EP and move to RTS */
attrs.mpa_attr = ep->mpa_attr; attrs.mpa_attr = ep->mpa_attr;
attrs.max_ird = ep->ird; attrs.max_ird = ep->ird;
...@@ -1944,6 +2336,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ...@@ -1944,6 +2336,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.dev->rdev.lldi.nchan; ep->com.dev->rdev.lldi.nchan;
ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
cxgb4_port_idx(neigh->dev) * step]; cxgb4_port_idx(neigh->dev) * step];
ep->retry_with_mpa_v1 = 0;
ep->tried_with_mpa_v1 = 0;
} }
if (!ep->l2t) { if (!ep->l2t) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
...@@ -2323,8 +2717,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) ...@@ -2323,8 +2717,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
/* /*
* Wake up any threads in rdma_init() or rdma_fini(). * Wake up any threads in rdma_init() or rdma_fini().
* However, this is not needed if com state is just
* MPA_REQ_SENT
*/ */
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); if (ep->com.state != MPA_REQ_SENT)
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
sched(dev, skb); sched(dev, skb);
return 0; return 0;
} }
......
...@@ -323,6 +323,7 @@ struct c4iw_mpa_attributes { ...@@ -323,6 +323,7 @@ struct c4iw_mpa_attributes {
u8 recv_marker_enabled; u8 recv_marker_enabled;
u8 xmit_marker_enabled; u8 xmit_marker_enabled;
u8 crc_enabled; u8 crc_enabled;
u8 enhanced_rdma_conn;
u8 version; u8 version;
u8 p2p_type; u8 p2p_type;
}; };
...@@ -349,6 +350,8 @@ struct c4iw_qp_attributes { ...@@ -349,6 +350,8 @@ struct c4iw_qp_attributes {
u8 is_terminate_local; u8 is_terminate_local;
struct c4iw_mpa_attributes mpa_attr; struct c4iw_mpa_attributes mpa_attr;
struct c4iw_ep *llp_stream_handle; struct c4iw_ep *llp_stream_handle;
u8 layer_etype;
u8 ecode;
}; };
struct c4iw_qp { struct c4iw_qp {
...@@ -501,11 +504,18 @@ enum c4iw_mmid_state { ...@@ -501,11 +504,18 @@ enum c4iw_mmid_state {
#define MPA_KEY_REP "MPA ID Rep Frame" #define MPA_KEY_REP "MPA ID Rep Frame"
#define MPA_MAX_PRIVATE_DATA 256 #define MPA_MAX_PRIVATE_DATA 256
#define MPA_ENHANCED_RDMA_CONN 0x10
#define MPA_REJECT 0x20 #define MPA_REJECT 0x20
#define MPA_CRC 0x40 #define MPA_CRC 0x40
#define MPA_MARKERS 0x80 #define MPA_MARKERS 0x80
#define MPA_FLAGS_MASK 0xE0 #define MPA_FLAGS_MASK 0xE0
#define MPA_V2_PEER2PEER_MODEL 0x8000
#define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000
#define MPA_V2_RDMA_WRITE_RTR 0x8000
#define MPA_V2_RDMA_READ_RTR 0x4000
#define MPA_V2_IRD_ORD_MASK 0x3FFF
#define c4iw_put_ep(ep) { \ #define c4iw_put_ep(ep) { \
PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
ep, atomic_read(&((ep)->kref.refcount))); \ ep, atomic_read(&((ep)->kref.refcount))); \
...@@ -528,6 +538,11 @@ struct mpa_message { ...@@ -528,6 +538,11 @@ struct mpa_message {
u8 private_data[0]; u8 private_data[0];
}; };
struct mpa_v2_conn_params {
__be16 ird;
__be16 ord;
};
struct terminate_message { struct terminate_message {
u8 layer_etype; u8 layer_etype;
u8 ecode; u8 ecode;
...@@ -580,7 +595,10 @@ enum c4iw_ddp_ecodes { ...@@ -580,7 +595,10 @@ enum c4iw_ddp_ecodes {
enum c4iw_mpa_ecodes { enum c4iw_mpa_ecodes {
MPA_CRC_ERR = 0x02, MPA_CRC_ERR = 0x02,
MPA_MARKER_ERR = 0x03 MPA_MARKER_ERR = 0x03,
MPA_LOCAL_CATA = 0x05,
MPA_INSUFF_IRD = 0x06,
MPA_NOMATCH_RTR = 0x07,
}; };
enum c4iw_ep_state { enum c4iw_ep_state {
...@@ -651,6 +669,8 @@ struct c4iw_ep { ...@@ -651,6 +669,8 @@ struct c4iw_ep {
u16 txq_idx; u16 txq_idx;
u16 ctrlq_idx; u16 ctrlq_idx;
u8 tos; u8 tos;
u8 retry_with_mpa_v1;
u8 tried_with_mpa_v1;
}; };
static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
......
...@@ -917,7 +917,11 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, ...@@ -917,7 +917,11 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
term = (struct terminate_message *)wqe->u.terminate.termmsg; term = (struct terminate_message *)wqe->u.terminate.termmsg;
build_term_codes(err_cqe, &term->layer_etype, &term->ecode); if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
term->layer_etype = qhp->attr.layer_etype;
term->ecode = qhp->attr.ecode;
} else
build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
c4iw_ofld_send(&qhp->rhp->rdev, skb); c4iw_ofld_send(&qhp->rhp->rdev, skb);
} }
...@@ -1012,6 +1016,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, ...@@ -1012,6 +1016,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
{ {
PDBG("%s p2p_type = %d\n", __func__, p2p_type);
memset(&init->u, 0, sizeof init->u); memset(&init->u, 0, sizeof init->u);
switch (p2p_type) { switch (p2p_type) {
case FW_RI_INIT_P2PTYPE_RDMA_WRITE: case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
...@@ -1212,6 +1217,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, ...@@ -1212,6 +1217,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
break; break;
case C4IW_QP_STATE_TERMINATE: case C4IW_QP_STATE_TERMINATE:
set_state(qhp, C4IW_QP_STATE_TERMINATE); set_state(qhp, C4IW_QP_STATE_TERMINATE);
qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode;
if (qhp->ibqp.uobject) if (qhp->ibqp.uobject)
t4_set_wq_in_error(&qhp->wq); t4_set_wq_in_error(&qhp->wq);
ep = qhp->ep; ep = qhp->ep;
...@@ -1334,7 +1341,10 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) ...@@ -1334,7 +1341,10 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
rhp = qhp->rhp; rhp = qhp->rhp;
attrs.next_state = C4IW_QP_STATE_ERROR; attrs.next_state = C4IW_QP_STATE_ERROR;
c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
else
c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
wait_event(qhp->wait, !qhp->ep); wait_event(qhp->wait, !qhp->ep);
remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment