Commit 2fb44f2b authored by Jeremy Filizetti's avatar Jeremy Filizetti Committed by Greg Kroah-Hartman

staging: lustre: Support different ko2iblnd configs between systems

This patch adds suppoort for ko2iblnd to have different values for
peer_credits and map_on_demand between systems.
Signed-off-by: default avatarJeremy Filizetti <jeremy.filizetti@gmail.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3322
Reviewed-on: http://review.whamcloud.com/11794Reviewed-by: default avatarAmir Shehata <amir.shehata@intel.com>
Reviewed-by: default avatarJames Simmons <uja.ornl@yahoo.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 27f9aea3
...@@ -631,7 +631,7 @@ static int kiblnd_get_completion_vector(kib_conn_t *conn, int cpt) ...@@ -631,7 +631,7 @@ static int kiblnd_get_completion_vector(kib_conn_t *conn, int cpt)
} }
kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
int state, int version) int state, int version, kib_connparams_t *cp)
{ {
/* /*
* CAVEAT EMPTOR: * CAVEAT EMPTOR:
...@@ -686,6 +686,14 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, ...@@ -686,6 +686,14 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
cmid->context = conn; /* for future CM callbacks */ cmid->context = conn; /* for future CM callbacks */
conn->ibc_cmid = cmid; conn->ibc_cmid = cmid;
if (!cp) {
conn->ibc_max_frags = IBLND_CFG_RDMA_FRAGS;
conn->ibc_queue_depth = *kiblnd_tunables.kib_peertxcredits;
} else {
conn->ibc_max_frags = cp->ibcp_max_frags;
conn->ibc_queue_depth = cp->ibcp_queue_depth;
}
INIT_LIST_HEAD(&conn->ibc_early_rxs); INIT_LIST_HEAD(&conn->ibc_early_rxs);
INIT_LIST_HEAD(&conn->ibc_tx_noops); INIT_LIST_HEAD(&conn->ibc_tx_noops);
INIT_LIST_HEAD(&conn->ibc_tx_queue); INIT_LIST_HEAD(&conn->ibc_tx_queue);
...@@ -730,27 +738,27 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, ...@@ -730,27 +738,27 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
write_unlock_irqrestore(glock, flags); write_unlock_irqrestore(glock, flags);
LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt, LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt,
IBLND_RX_MSGS(version) * sizeof(kib_rx_t)); IBLND_RX_MSGS(conn) * sizeof(kib_rx_t));
if (!conn->ibc_rxs) { if (!conn->ibc_rxs) {
CERROR("Cannot allocate RX buffers\n"); CERROR("Cannot allocate RX buffers\n");
goto failed_2; goto failed_2;
} }
rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt, rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt,
IBLND_RX_MSG_PAGES(version)); IBLND_RX_MSG_PAGES(conn));
if (rc) if (rc)
goto failed_2; goto failed_2;
kiblnd_map_rx_descs(conn); kiblnd_map_rx_descs(conn);
cq_attr.cqe = IBLND_CQ_ENTRIES(version); cq_attr.cqe = IBLND_CQ_ENTRIES(conn);
cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt); cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt);
cq = ib_create_cq(cmid->device, cq = ib_create_cq(cmid->device,
kiblnd_cq_completion, kiblnd_cq_event, conn, kiblnd_cq_completion, kiblnd_cq_event, conn,
&cq_attr); &cq_attr);
if (IS_ERR(cq)) { if (IS_ERR(cq)) {
CERROR("Can't create CQ: %ld, cqe: %d\n", CERROR("Failed to create CQ with %d CQEs: %ld\n",
PTR_ERR(cq), IBLND_CQ_ENTRIES(version)); IBLND_CQ_ENTRIES(conn), PTR_ERR(cq));
goto failed_2; goto failed_2;
} }
...@@ -764,8 +772,8 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, ...@@ -764,8 +772,8 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
init_qp_attr->event_handler = kiblnd_qp_event; init_qp_attr->event_handler = kiblnd_qp_event;
init_qp_attr->qp_context = conn; init_qp_attr->qp_context = conn;
init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(version); init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn);
init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(version); init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn);
init_qp_attr->cap.max_send_sge = 1; init_qp_attr->cap.max_send_sge = 1;
init_qp_attr->cap.max_recv_sge = 1; init_qp_attr->cap.max_recv_sge = 1;
init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
...@@ -786,11 +794,11 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, ...@@ -786,11 +794,11 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
/* 1 ref for caller and each rxmsg */ /* 1 ref for caller and each rxmsg */
atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(version)); atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn));
conn->ibc_nrx = IBLND_RX_MSGS(version); conn->ibc_nrx = IBLND_RX_MSGS(conn);
/* post receives */ /* post receives */
for (i = 0; i < IBLND_RX_MSGS(version); i++) { for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
rc = kiblnd_post_rx(&conn->ibc_rxs[i], rc = kiblnd_post_rx(&conn->ibc_rxs[i],
IBLND_POSTRX_NO_CREDIT); IBLND_POSTRX_NO_CREDIT);
if (rc) { if (rc) {
...@@ -804,7 +812,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, ...@@ -804,7 +812,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
* NB locking needed now I'm racing with completion * NB locking needed now I'm racing with completion
*/ */
spin_lock_irqsave(&sched->ibs_lock, flags); spin_lock_irqsave(&sched->ibs_lock, flags);
conn->ibc_nrx -= IBLND_RX_MSGS(version) - i; conn->ibc_nrx -= IBLND_RX_MSGS(conn) - i;
spin_unlock_irqrestore(&sched->ibs_lock, flags); spin_unlock_irqrestore(&sched->ibs_lock, flags);
/* /*
...@@ -816,7 +824,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, ...@@ -816,7 +824,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
conn->ibc_cmid = NULL; conn->ibc_cmid = NULL;
/* Drop my own and unused rxbuffer refcounts */ /* Drop my own and unused rxbuffer refcounts */
while (i++ <= IBLND_RX_MSGS(version)) while (i++ <= IBLND_RX_MSGS(conn))
kiblnd_conn_decref(conn); kiblnd_conn_decref(conn);
return NULL; return NULL;
...@@ -886,8 +894,7 @@ void kiblnd_destroy_conn(kib_conn_t *conn) ...@@ -886,8 +894,7 @@ void kiblnd_destroy_conn(kib_conn_t *conn)
if (conn->ibc_rxs) { if (conn->ibc_rxs) {
LIBCFS_FREE(conn->ibc_rxs, LIBCFS_FREE(conn->ibc_rxs,
IBLND_RX_MSGS(conn->ibc_version) IBLND_RX_MSGS(conn) * sizeof(kib_rx_t));
* sizeof(kib_rx_t));
} }
if (conn->ibc_connvars) if (conn->ibc_connvars)
...@@ -1143,7 +1150,7 @@ void kiblnd_unmap_rx_descs(kib_conn_t *conn) ...@@ -1143,7 +1150,7 @@ void kiblnd_unmap_rx_descs(kib_conn_t *conn)
LASSERT(conn->ibc_rxs); LASSERT(conn->ibc_rxs);
LASSERT(conn->ibc_hdev); LASSERT(conn->ibc_hdev);
for (i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) { for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
rx = &conn->ibc_rxs[i]; rx = &conn->ibc_rxs[i];
LASSERT(rx->rx_nob >= 0); /* not posted */ LASSERT(rx->rx_nob >= 0); /* not posted */
...@@ -1167,7 +1174,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn) ...@@ -1167,7 +1174,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn)
int ipg; int ipg;
int i; int i;
for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) { for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn); i++) {
pg = conn->ibc_rx_pages->ibp_pages[ipg]; pg = conn->ibc_rx_pages->ibp_pages[ipg];
rx = &conn->ibc_rxs[i]; rx = &conn->ibc_rxs[i];
...@@ -1192,7 +1199,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn) ...@@ -1192,7 +1199,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn)
if (pg_off == PAGE_SIZE) { if (pg_off == PAGE_SIZE) {
pg_off = 0; pg_off = 0;
ipg++; ipg++;
LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn->ibc_version)); LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn));
} }
} }
} }
...@@ -1296,12 +1303,16 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo) ...@@ -1296,12 +1303,16 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
} }
} }
struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd) struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd,
int negotiated_nfrags)
{ {
__u16 nfrags = (negotiated_nfrags != -1) ?
negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand;
LASSERT(hdev->ibh_mrs); LASSERT(hdev->ibh_mrs);
if (*kiblnd_tunables.kib_map_on_demand > 0 && if (*kiblnd_tunables.kib_map_on_demand > 0 &&
*kiblnd_tunables.kib_map_on_demand <= rd->rd_nfrags) nfrags <= rd->rd_nfrags)
return NULL; return NULL;
return hdev->ibh_mrs; return hdev->ibh_mrs;
......
...@@ -162,18 +162,17 @@ kiblnd_concurrent_sends_v1(void) ...@@ -162,18 +162,17 @@ kiblnd_concurrent_sends_v1(void)
#define IBLND_FMR_POOL 256 #define IBLND_FMR_POOL 256
#define IBLND_FMR_POOL_FLUSH 192 #define IBLND_FMR_POOL_FLUSH 192
/* TX messages (shared by all connections) */ #define IBLND_RX_MSGS(c) \
#define IBLND_TX_MSGS() (*kiblnd_tunables.kib_ntx) ((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version))
#define IBLND_RX_MSG_BYTES(c) (IBLND_RX_MSGS(c) * IBLND_MSG_SIZE)
/* RX messages (per connection) */ #define IBLND_RX_MSG_PAGES(c) \
#define IBLND_RX_MSGS(v) (IBLND_MSG_QUEUE_SIZE(v) * 2 + IBLND_OOB_MSGS(v)) ((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE)
#define IBLND_RX_MSG_BYTES(v) (IBLND_RX_MSGS(v) * IBLND_MSG_SIZE)
#define IBLND_RX_MSG_PAGES(v) ((IBLND_RX_MSG_BYTES(v) + PAGE_SIZE - 1) / PAGE_SIZE)
/* WRs and CQEs (per connection) */ /* WRs and CQEs (per connection) */
#define IBLND_RECV_WRS(v) IBLND_RX_MSGS(v) #define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
#define IBLND_SEND_WRS(v) ((IBLND_RDMA_FRAGS(v) + 1) * IBLND_CONCURRENT_SENDS(v)) #define IBLND_SEND_WRS(c) \
#define IBLND_CQ_ENTRIES(v) (IBLND_RECV_WRS(v) + IBLND_SEND_WRS(v)) ((c->ibc_max_frags + 1) * IBLND_CONCURRENT_SENDS(c->ibc_version))
#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
struct kib_hca_dev; struct kib_hca_dev;
...@@ -464,10 +463,10 @@ typedef struct { ...@@ -464,10 +463,10 @@ typedef struct {
#define IBLND_REJECT_FATAL 3 /* Anything else */ #define IBLND_REJECT_FATAL 3 /* Anything else */
#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */ #define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */
#define IBLND_REJECT_CONN_STALE 5 /* stale peer */ #define IBLND_REJECT_CONN_STALE 5 /* stale peer */
#define IBLND_REJECT_RDMA_FRAGS 6 /* Fatal: peer's rdma frags can't match */ /* peer's rdma frags doesn't match mine */
/* mine */ #define IBLND_REJECT_RDMA_FRAGS 6
#define IBLND_REJECT_MSG_QUEUE_SIZE 7 /* Fatal: peer's msg queue size can't */ /* peer's msg queue size doesn't match mine */
/* match mine */ #define IBLND_REJECT_MSG_QUEUE_SIZE 7
/***********************************************************************/ /***********************************************************************/
...@@ -535,6 +534,10 @@ typedef struct kib_conn { ...@@ -535,6 +534,10 @@ typedef struct kib_conn {
int ibc_outstanding_credits; /* # credits to return */ int ibc_outstanding_credits; /* # credits to return */
int ibc_reserved_credits; /* # ACK/DONE msg credits */ int ibc_reserved_credits; /* # ACK/DONE msg credits */
int ibc_comms_error; /* set on comms error */ int ibc_comms_error; /* set on comms error */
/* connections queue depth */
__u16 ibc_queue_depth;
/* connections max frags */
__u16 ibc_max_frags;
unsigned int ibc_nrx:16; /* receive buffers owned */ unsigned int ibc_nrx:16; /* receive buffers owned */
unsigned int ibc_scheduled:1; /* scheduled for attention */ unsigned int ibc_scheduled:1; /* scheduled for attention */
unsigned int ibc_ready:1; /* CQ callback fired */ unsigned int ibc_ready:1; /* CQ callback fired */
...@@ -907,7 +910,8 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, ...@@ -907,7 +910,8 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
kib_rdma_desc_t *rd); kib_rdma_desc_t *rd,
int negotiated_nfrags);
void kiblnd_map_rx_descs(kib_conn_t *conn); void kiblnd_map_rx_descs(kib_conn_t *conn);
void kiblnd_unmap_rx_descs(kib_conn_t *conn); void kiblnd_unmap_rx_descs(kib_conn_t *conn);
void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node); void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
...@@ -942,7 +946,7 @@ int kiblnd_close_stale_conns_locked(kib_peer_t *peer, ...@@ -942,7 +946,7 @@ int kiblnd_close_stale_conns_locked(kib_peer_t *peer,
int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why); int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why);
kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
int state, int version); int state, int version, kib_connparams_t *cp);
void kiblnd_destroy_conn(kib_conn_t *conn); void kiblnd_destroy_conn(kib_conn_t *conn);
void kiblnd_close_conn(kib_conn_t *conn, int error); void kiblnd_close_conn(kib_conn_t *conn, int error);
void kiblnd_close_conn_locked(kib_conn_t *conn, int error); void kiblnd_close_conn_locked(kib_conn_t *conn, int error);
......
...@@ -328,14 +328,13 @@ kiblnd_handle_rx(kib_rx_t *rx) ...@@ -328,14 +328,13 @@ kiblnd_handle_rx(kib_rx_t *rx)
spin_lock(&conn->ibc_lock); spin_lock(&conn->ibc_lock);
if (conn->ibc_credits + credits > if (conn->ibc_credits + credits >
IBLND_MSG_QUEUE_SIZE(conn->ibc_version)) { conn->ibc_queue_depth) {
rc2 = conn->ibc_credits; rc2 = conn->ibc_credits;
spin_unlock(&conn->ibc_lock); spin_unlock(&conn->ibc_lock);
CERROR("Bad credits from %s: %d + %d > %d\n", CERROR("Bad credits from %s: %d + %d > %d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid), libcfs_nid2str(conn->ibc_peer->ibp_nid),
rc2, credits, rc2, credits, conn->ibc_queue_depth);
IBLND_MSG_QUEUE_SIZE(conn->ibc_version));
kiblnd_close_conn(conn, -EPROTO); kiblnd_close_conn(conn, -EPROTO);
kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT); kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT);
...@@ -653,8 +652,8 @@ static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, ...@@ -653,8 +652,8 @@ static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
nob += rd->rd_frags[i].rf_nob; nob += rd->rd_frags[i].rf_nob;
} }
/* looking for pre-mapping MR */ mr = kiblnd_find_rd_dma_mr(hdev, rd, tx->tx_conn ?
mr = kiblnd_find_rd_dma_mr(hdev, rd); tx->tx_conn->ibc_max_frags : -1);
if (mr) { if (mr) {
/* found pre-mapping MR */ /* found pre-mapping MR */
rd->rd_key = (rd != tx->tx_rd) ? mr->rkey : mr->lkey; rd->rd_key = (rd != tx->tx_rd) ? mr->rkey : mr->lkey;
...@@ -774,13 +773,13 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) ...@@ -774,13 +773,13 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
LASSERT(tx->tx_queued); LASSERT(tx->tx_queued);
/* We rely on this for QP sizing */ /* We rely on this for QP sizing */
LASSERT(tx->tx_nwrq > 0); LASSERT(tx->tx_nwrq > 0);
LASSERT(tx->tx_nwrq <= 1 + IBLND_RDMA_FRAGS(ver)); LASSERT(tx->tx_nwrq <= 1 + conn->ibc_max_frags);
LASSERT(!credit || credit == 1); LASSERT(!credit || credit == 1);
LASSERT(conn->ibc_outstanding_credits >= 0); LASSERT(conn->ibc_outstanding_credits >= 0);
LASSERT(conn->ibc_outstanding_credits <= IBLND_MSG_QUEUE_SIZE(ver)); LASSERT(conn->ibc_outstanding_credits <= conn->ibc_queue_depth);
LASSERT(conn->ibc_credits >= 0); LASSERT(conn->ibc_credits >= 0);
LASSERT(conn->ibc_credits <= IBLND_MSG_QUEUE_SIZE(ver)); LASSERT(conn->ibc_credits <= conn->ibc_queue_depth);
if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) { if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) {
/* tx completions outstanding... */ /* tx completions outstanding... */
...@@ -1089,10 +1088,10 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, ...@@ -1089,10 +1088,10 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
break; break;
} }
if (tx->tx_nwrq == IBLND_RDMA_FRAGS(conn->ibc_version)) { if (tx->tx_nwrq >= conn->ibc_max_frags) {
CERROR("RDMA too fragmented for %s (%d): %d/%d src %d/%d dst frags\n", CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid), libcfs_nid2str(conn->ibc_peer->ibp_nid),
IBLND_RDMA_FRAGS(conn->ibc_version), conn->ibc_max_frags,
srcidx, srcrd->rd_nfrags, srcidx, srcrd->rd_nfrags,
dstidx, dstrd->rd_nfrags); dstidx, dstrd->rd_nfrags);
rc = -EMSGSIZE; rc = -EMSGSIZE;
...@@ -2243,7 +2242,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) ...@@ -2243,7 +2242,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
if (!ni || /* no matching net */ if (!ni || /* no matching net */
ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */
net->ibn_dev != ibdev) { /* wrong device */ net->ibn_dev != ibdev) { /* wrong device */
CERROR("Can't accept %s on %s (%s:%d:%pI4h): bad dst nid %s\n", CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n",
libcfs_nid2str(nid), libcfs_nid2str(nid),
!ni ? "NA" : libcfs_nid2str(ni->ni_nid), !ni ? "NA" : libcfs_nid2str(ni->ni_nid),
ibdev->ibd_ifname, ibdev->ibd_nnets, ibdev->ibd_ifname, ibdev->ibd_nnets,
...@@ -2270,10 +2269,11 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) ...@@ -2270,10 +2269,11 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
goto failed; goto failed;
} }
if (reqmsg->ibm_u.connparams.ibcp_queue_depth != if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
IBLND_MSG_QUEUE_SIZE(version)) { IBLND_MSG_QUEUE_SIZE(version)) {
CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n", CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n",
libcfs_nid2str(nid), reqmsg->ibm_u.connparams.ibcp_queue_depth, libcfs_nid2str(nid),
reqmsg->ibm_u.connparams.ibcp_queue_depth,
IBLND_MSG_QUEUE_SIZE(version)); IBLND_MSG_QUEUE_SIZE(version));
if (version == IBLND_MSG_VERSION) if (version == IBLND_MSG_VERSION)
...@@ -2282,14 +2282,25 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) ...@@ -2282,14 +2282,25 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
goto failed; goto failed;
} }
if (reqmsg->ibm_u.connparams.ibcp_max_frags != if (reqmsg->ibm_u.connparams.ibcp_max_frags >
IBLND_RDMA_FRAGS(version)) { IBLND_RDMA_FRAGS(version)) {
CERROR("Can't accept %s(version %x): incompatible max_frags %d (%d wanted)\n", CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n",
libcfs_nid2str(nid), version, libcfs_nid2str(nid), version,
reqmsg->ibm_u.connparams.ibcp_max_frags, reqmsg->ibm_u.connparams.ibcp_max_frags,
IBLND_RDMA_FRAGS(version)); IBLND_RDMA_FRAGS(version));
if (version == IBLND_MSG_VERSION) if (version >= IBLND_MSG_VERSION)
rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
goto failed;
} else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
IBLND_RDMA_FRAGS(version) && !net->ibn_fmr_ps) {
CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n",
libcfs_nid2str(nid), version,
reqmsg->ibm_u.connparams.ibcp_max_frags,
IBLND_RDMA_FRAGS(version));
if (version >= IBLND_MSG_VERSION)
rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
goto failed; goto failed;
...@@ -2371,7 +2382,8 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) ...@@ -2371,7 +2382,8 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
write_unlock_irqrestore(g_lock, flags); write_unlock_irqrestore(g_lock, flags);
} }
conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, version); conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, version,
&reqmsg->ibm_u.connparams);
if (!conn) { if (!conn) {
kiblnd_peer_connect_failed(peer, 0, -ENOMEM); kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
kiblnd_peer_decref(peer); kiblnd_peer_decref(peer);
...@@ -2384,19 +2396,21 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) ...@@ -2384,19 +2396,21 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
* CM callback doesn't destroy cmid. * CM callback doesn't destroy cmid.
*/ */
conn->ibc_incarnation = reqmsg->ibm_srcstamp; conn->ibc_incarnation = reqmsg->ibm_srcstamp;
conn->ibc_credits = IBLND_MSG_QUEUE_SIZE(version); conn->ibc_credits = reqmsg->ibm_u.connparams.ibcp_queue_depth;
conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(version); conn->ibc_reserved_credits = reqmsg->ibm_u.connparams.ibcp_queue_depth;
LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(version) LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
<= IBLND_RX_MSGS(version)); IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn));
ackmsg = &conn->ibc_connvars->cv_msg; ackmsg = &conn->ibc_connvars->cv_msg;
memset(ackmsg, 0, sizeof(*ackmsg)); memset(ackmsg, 0, sizeof(*ackmsg));
kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
sizeof(ackmsg->ibm_u.connparams)); sizeof(ackmsg->ibm_u.connparams));
ackmsg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version); ackmsg->ibm_u.connparams.ibcp_queue_depth =
reqmsg->ibm_u.connparams.ibcp_queue_depth;
ackmsg->ibm_u.connparams.ibcp_max_frags =
reqmsg->ibm_u.connparams.ibcp_max_frags;
ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
ackmsg->ibm_u.connparams.ibcp_max_frags = IBLND_RDMA_FRAGS(version);
kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp); kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
...@@ -2479,6 +2493,31 @@ kiblnd_reconnect(kib_conn_t *conn, int version, ...@@ -2479,6 +2493,31 @@ kiblnd_reconnect(kib_conn_t *conn, int version,
reason = "Unknown"; reason = "Unknown";
break; break;
case IBLND_REJECT_RDMA_FRAGS:
if (conn->ibc_max_frags <= cp->ibcp_max_frags) {
CNETERR("Unsupported max frags, peer supports %d\n",
cp->ibcp_max_frags);
goto failed;
} else if (!*kiblnd_tunables.kib_map_on_demand) {
CNETERR("map_on_demand must be enabled to support map_on_demand peers\n");
goto failed;
}
conn->ibc_max_frags = cp->ibcp_max_frags;
reason = "rdma fragments";
break;
case IBLND_REJECT_MSG_QUEUE_SIZE:
if (conn->ibc_queue_depth <= cp->ibcp_queue_depth) {
CNETERR("Unsupported queue depth, peer supports %d\n",
cp->ibcp_queue_depth);
goto failed;
}
conn->ibc_queue_depth = cp->ibcp_queue_depth;
reason = "queue depth";
break;
case IBLND_REJECT_CONN_STALE: case IBLND_REJECT_CONN_STALE:
reason = "stale"; reason = "stale";
break; break;
...@@ -2495,11 +2534,17 @@ kiblnd_reconnect(kib_conn_t *conn, int version, ...@@ -2495,11 +2534,17 @@ kiblnd_reconnect(kib_conn_t *conn, int version,
CNETERR("%s: retrying (%s), %x, %x, queue_dep: %d, max_frag: %d, msg_size: %d\n", CNETERR("%s: retrying (%s), %x, %x, queue_dep: %d, max_frag: %d, msg_size: %d\n",
libcfs_nid2str(peer->ibp_nid), libcfs_nid2str(peer->ibp_nid),
reason, IBLND_MSG_VERSION, version, reason, IBLND_MSG_VERSION, version,
cp ? cp->ibcp_queue_depth : IBLND_MSG_QUEUE_SIZE(version), conn->ibc_queue_depth, conn->ibc_max_frags,
cp ? cp->ibcp_max_frags : IBLND_RDMA_FRAGS(version),
cp ? cp->ibcp_max_msg_size : IBLND_MSG_SIZE); cp ? cp->ibcp_max_msg_size : IBLND_MSG_SIZE);
kiblnd_connect_peer(peer); kiblnd_connect_peer(peer);
return;
failed:
write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
peer->ibp_connecting--;
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
return;
} }
static void static void
...@@ -2595,24 +2640,10 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob) ...@@ -2595,24 +2640,10 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob)
case IBLND_REJECT_CONN_RACE: case IBLND_REJECT_CONN_RACE:
case IBLND_REJECT_CONN_STALE: case IBLND_REJECT_CONN_STALE:
case IBLND_REJECT_CONN_UNCOMPAT: case IBLND_REJECT_CONN_UNCOMPAT:
kiblnd_reconnect(conn, rej->ibr_version,
incarnation, rej->ibr_why, cp);
break;
case IBLND_REJECT_MSG_QUEUE_SIZE: case IBLND_REJECT_MSG_QUEUE_SIZE:
CERROR("%s rejected: incompatible message queue depth %d, %d\n",
libcfs_nid2str(peer->ibp_nid),
cp ? cp->ibcp_queue_depth :
IBLND_MSG_QUEUE_SIZE(rej->ibr_version),
IBLND_MSG_QUEUE_SIZE(conn->ibc_version));
break;
case IBLND_REJECT_RDMA_FRAGS: case IBLND_REJECT_RDMA_FRAGS:
CERROR("%s rejected: incompatible # of RDMA fragments %d, %d\n", kiblnd_reconnect(conn, rej->ibr_version,
libcfs_nid2str(peer->ibp_nid), incarnation, rej->ibr_why, cp);
cp ? cp->ibcp_max_frags :
IBLND_RDMA_FRAGS(rej->ibr_version),
IBLND_RDMA_FRAGS(conn->ibc_version));
break; break;
case IBLND_REJECT_NO_RESOURCES: case IBLND_REJECT_NO_RESOURCES:
...@@ -2676,22 +2707,22 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob) ...@@ -2676,22 +2707,22 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob)
goto failed; goto failed;
} }
if (msg->ibm_u.connparams.ibcp_queue_depth != if (msg->ibm_u.connparams.ibcp_queue_depth >
IBLND_MSG_QUEUE_SIZE(ver)) { conn->ibc_queue_depth) {
CERROR("%s has incompatible queue depth %d(%d wanted)\n", CERROR("%s has incompatible queue depth %d (<=%d wanted)\n",
libcfs_nid2str(peer->ibp_nid), libcfs_nid2str(peer->ibp_nid),
msg->ibm_u.connparams.ibcp_queue_depth, msg->ibm_u.connparams.ibcp_queue_depth,
IBLND_MSG_QUEUE_SIZE(ver)); conn->ibc_queue_depth);
rc = -EPROTO; rc = -EPROTO;
goto failed; goto failed;
} }
if (msg->ibm_u.connparams.ibcp_max_frags != if (msg->ibm_u.connparams.ibcp_max_frags >
IBLND_RDMA_FRAGS(ver)) { conn->ibc_max_frags) {
CERROR("%s has incompatible max_frags %d (%d wanted)\n", CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
libcfs_nid2str(peer->ibp_nid), libcfs_nid2str(peer->ibp_nid),
msg->ibm_u.connparams.ibcp_max_frags, msg->ibm_u.connparams.ibcp_max_frags,
IBLND_RDMA_FRAGS(ver)); conn->ibc_max_frags);
rc = -EPROTO; rc = -EPROTO;
goto failed; goto failed;
} }
...@@ -2721,10 +2752,12 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob) ...@@ -2721,10 +2752,12 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob)
} }
conn->ibc_incarnation = msg->ibm_srcstamp; conn->ibc_incarnation = msg->ibm_srcstamp;
conn->ibc_credits = conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(ver); conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(ver) conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
<= IBLND_RX_MSGS(ver)); conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags;
LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
kiblnd_connreq_done(conn, 0); kiblnd_connreq_done(conn, 0);
return; return;
...@@ -2761,7 +2794,8 @@ kiblnd_active_connect(struct rdma_cm_id *cmid) ...@@ -2761,7 +2794,8 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT, version); conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT,
version, NULL);
if (!conn) { if (!conn) {
kiblnd_peer_connect_failed(peer, 1, -ENOMEM); kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
kiblnd_peer_decref(peer); /* lose cmid's ref */ kiblnd_peer_decref(peer); /* lose cmid's ref */
...@@ -2777,8 +2811,8 @@ kiblnd_active_connect(struct rdma_cm_id *cmid) ...@@ -2777,8 +2811,8 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
memset(msg, 0, sizeof(*msg)); memset(msg, 0, sizeof(*msg));
kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams)); kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
msg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version); msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
msg->ibm_u.connparams.ibcp_max_frags = IBLND_RDMA_FRAGS(version); msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
kiblnd_pack_msg(peer->ibp_ni, msg, version, kiblnd_pack_msg(peer->ibp_ni, msg, version,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment