Commit 76b06402 authored by Doug Ledford's avatar Doug Ledford

Merge branches 'ib_core', 'ib_ipoib', 'srpt', 'drain-cq-v4' and 'net/9p' into k.o/for-4.6

...@@ -1657,3 +1657,167 @@ int ib_sg_to_pages(struct ib_mr *mr, ...@@ -1657,3 +1657,167 @@ int ib_sg_to_pages(struct ib_mr *mr,
return i; return i;
} }
EXPORT_SYMBOL(ib_sg_to_pages); EXPORT_SYMBOL(ib_sg_to_pages);
struct ib_drain_cqe {
struct ib_cqe cqe;
struct completion done;
};
static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe,
cqe);
complete(&cqe->done);
}
/*
* Post a WR and block until its completion is reaped for the SQ.
*/
static void __ib_drain_sq(struct ib_qp *qp)
{
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
struct ib_send_wr swr = {}, *bad_swr;
int ret;
if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
"IB_POLL_DIRECT poll_ctx not supported for drain\n");
return;
}
swr.wr_cqe = &sdrain.cqe;
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
ret = ib_post_send(qp, &swr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
wait_for_completion(&sdrain.done);
}
/*
* Post a WR and block until its completion is reaped for the RQ.
*/
static void __ib_drain_rq(struct ib_qp *qp)
{
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe rdrain;
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
"IB_POLL_DIRECT poll_ctx not supported for drain\n");
return;
}
rwr.wr_cqe = &rdrain.cqe;
rdrain.cqe.done = ib_drain_qp_done;
init_completion(&rdrain.done);
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
ret = ib_post_recv(qp, &rwr, &bad_rwr);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
wait_for_completion(&rdrain.done);
}
/**
* ib_drain_sq() - Block until all SQ CQEs have been consumed by the
* application.
* @qp: queue pair to drain
*
* If the device has a provider-specific drain function, then
* call that. Otherwise call the generic drain function
* __ib_drain_sq().
*
* The caller must:
*
* ensure there is room in the CQ and SQ for the drain work request and
* completion.
*
* allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
* IB_POLL_DIRECT.
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
*/
void ib_drain_sq(struct ib_qp *qp)
{
if (qp->device->drain_sq)
qp->device->drain_sq(qp);
else
__ib_drain_sq(qp);
}
EXPORT_SYMBOL(ib_drain_sq);
/**
* ib_drain_rq() - Block until all RQ CQEs have been consumed by the
* application.
* @qp: queue pair to drain
*
* If the device has a provider-specific drain function, then
* call that. Otherwise call the generic drain function
* __ib_drain_rq().
*
* The caller must:
*
* ensure there is room in the CQ and RQ for the drain work request and
* completion.
*
* allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
* IB_POLL_DIRECT.
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
*/
void ib_drain_rq(struct ib_qp *qp)
{
if (qp->device->drain_rq)
qp->device->drain_rq(qp);
else
__ib_drain_rq(qp);
}
EXPORT_SYMBOL(ib_drain_rq);
/**
* ib_drain_qp() - Block until all CQEs have been consumed by the
* application on both the RQ and SQ.
* @qp: queue pair to drain
*
* The caller must:
*
* ensure there is room in the CQ(s), SQ, and RQ for drain work requests
* and completions.
*
* allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
* IB_POLL_DIRECT.
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
*/
void ib_drain_qp(struct ib_qp *qp)
{
ib_drain_sq(qp);
ib_drain_rq(qp);
}
EXPORT_SYMBOL(ib_drain_qp);
...@@ -815,8 +815,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) ...@@ -815,8 +815,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
} }
} }
out: out:
if (wq) if (wq) {
if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) {
if (t4_sq_empty(wq))
complete(&qhp->sq_drained);
if (t4_rq_empty(wq))
complete(&qhp->rq_drained);
}
spin_unlock(&qhp->lock); spin_unlock(&qhp->lock);
}
return ret; return ret;
} }
......
...@@ -476,6 +476,8 @@ struct c4iw_qp { ...@@ -476,6 +476,8 @@ struct c4iw_qp {
wait_queue_head_t wait; wait_queue_head_t wait;
struct timer_list timer; struct timer_list timer;
int sq_sig_all; int sq_sig_all;
struct completion rq_drained;
struct completion sq_drained;
}; };
static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
...@@ -1016,6 +1018,8 @@ extern int c4iw_wr_log; ...@@ -1016,6 +1018,8 @@ extern int c4iw_wr_log;
extern int db_fc_threshold; extern int db_fc_threshold;
extern int db_coalescing_threshold; extern int db_coalescing_threshold;
extern int use_dsgl; extern int use_dsgl;
void c4iw_drain_rq(struct ib_qp *qp);
void c4iw_drain_sq(struct ib_qp *qp);
#endif #endif
...@@ -564,6 +564,8 @@ int c4iw_register_device(struct c4iw_dev *dev) ...@@ -564,6 +564,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.get_protocol_stats = c4iw_get_mib; dev->ibdev.get_protocol_stats = c4iw_get_mib;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = c4iw_port_immutable; dev->ibdev.get_port_immutable = c4iw_port_immutable;
dev->ibdev.drain_sq = c4iw_drain_sq;
dev->ibdev.drain_rq = c4iw_drain_rq;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm) if (!dev->ibdev.iwcm)
......
...@@ -1697,6 +1697,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ...@@ -1697,6 +1697,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->attr.max_ird = 0; qhp->attr.max_ird = 0;
qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
spin_lock_init(&qhp->lock); spin_lock_init(&qhp->lock);
init_completion(&qhp->sq_drained);
init_completion(&qhp->rq_drained);
mutex_init(&qhp->mutex); mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait); init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1); atomic_set(&qhp->refcnt, 1);
...@@ -1888,3 +1890,17 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -1888,3 +1890,17 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
return 0; return 0;
} }
void c4iw_drain_sq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
wait_for_completion(&qp->sq_drained);
}
void c4iw_drain_rq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
wait_for_completion(&qp->rq_drained);
}
...@@ -244,6 +244,7 @@ struct ipoib_cm_tx { ...@@ -244,6 +244,7 @@ struct ipoib_cm_tx {
unsigned tx_tail; unsigned tx_tail;
unsigned long flags; unsigned long flags;
u32 mtu; u32 mtu;
unsigned max_send_sge;
}; };
struct ipoib_cm_rx_buf { struct ipoib_cm_rx_buf {
...@@ -390,6 +391,7 @@ struct ipoib_dev_priv { ...@@ -390,6 +391,7 @@ struct ipoib_dev_priv {
int hca_caps; int hca_caps;
struct ipoib_ethtool_st ethtool; struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer; struct timer_list poll_timer;
unsigned max_send_sge;
}; };
struct ipoib_ah { struct ipoib_ah {
......
...@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ...@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req; struct ipoib_tx_buf *tx_req;
int rc; int rc;
unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
if (unlikely(skb->len > tx->mtu)) { if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
...@@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ...@@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN); ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
return; return;
} }
if (skb_shinfo(skb)->nr_frags > usable_sge) {
if (skb_linearize(skb) < 0) {
ipoib_warn(priv, "skb could not be linearized\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
ipoib_warn(priv, "too many frags after skb linearize\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
}
ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
tx->tx_head, skb->len, tx->qp->qp_num); tx->tx_head, skb->len, tx->qp->qp_num);
...@@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ ...@@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
struct ib_qp *tx_qp; struct ib_qp *tx_qp;
if (dev->features & NETIF_F_SG) if (dev->features & NETIF_F_SG)
attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; attr.cap.max_send_sge =
min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
tx_qp = ib_create_qp(priv->pd, &attr); tx_qp = ib_create_qp(priv->pd, &attr);
if (PTR_ERR(tx_qp) == -EINVAL) { if (PTR_ERR(tx_qp) == -EINVAL) {
...@@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ ...@@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
tx_qp = ib_create_qp(priv->pd, &attr); tx_qp = ib_create_qp(priv->pd, &attr);
} }
tx->max_send_sge = attr.cap.max_send_sge;
return tx_qp; return tx_qp;
} }
......
...@@ -538,6 +538,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -538,6 +538,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_tx_buf *tx_req; struct ipoib_tx_buf *tx_req;
int hlen, rc; int hlen, rc;
void *phead; void *phead;
unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);
if (skb_is_gso(skb)) { if (skb_is_gso(skb)) {
hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
...@@ -561,6 +562,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -561,6 +562,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
phead = NULL; phead = NULL;
hlen = 0; hlen = 0;
} }
if (skb_shinfo(skb)->nr_frags > usable_sge) {
if (skb_linearize(skb) < 0) {
ipoib_warn(priv, "skb could not be linearized\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
ipoib_warn(priv, "too many frags after skb linearize\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
}
ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
skb->len, address, qpn); skb->len, address, qpn);
......
...@@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
if (dev->features & NETIF_F_SG) if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; init_attr.cap.max_send_sge =
min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
priv->qp = ib_create_qp(priv->pd, &init_attr); priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) { if (IS_ERR(priv->qp)) {
...@@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->rx_wr.next = NULL; priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge; priv->rx_wr.sg_list = priv->rx_sge;
priv->max_send_sge = init_attr.cap.max_send_sge;
return 0; return 0;
out_free_send_cq: out_free_send_cq:
......
...@@ -458,9 +458,6 @@ struct iser_fr_pool { ...@@ -458,9 +458,6 @@ struct iser_fr_pool {
* @comp: iser completion context * @comp: iser completion context
* @fr_pool: connection fast registration poool * @fr_pool: connection fast registration poool
* @pi_support: Indicate device T10-PI support * @pi_support: Indicate device T10-PI support
* @last: last send wr to signal all flush errors were drained
* @last_cqe: cqe handler for last wr
* @last_comp: completes when all connection completions consumed
*/ */
struct ib_conn { struct ib_conn {
struct rdma_cm_id *cma_id; struct rdma_cm_id *cma_id;
...@@ -472,10 +469,7 @@ struct ib_conn { ...@@ -472,10 +469,7 @@ struct ib_conn {
struct iser_comp *comp; struct iser_comp *comp;
struct iser_fr_pool fr_pool; struct iser_fr_pool fr_pool;
bool pi_support; bool pi_support;
struct ib_send_wr last;
struct ib_cqe last_cqe;
struct ib_cqe reg_cqe; struct ib_cqe reg_cqe;
struct completion last_comp;
}; };
/** /**
...@@ -617,7 +611,6 @@ void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc); ...@@ -617,7 +611,6 @@ void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_task_rdma_init(struct iscsi_iser_task *task); void iser_task_rdma_init(struct iscsi_iser_task *task);
......
...@@ -729,13 +729,6 @@ void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc) ...@@ -729,13 +729,6 @@ void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
kmem_cache_free(ig.desc_cache, desc); kmem_cache_free(ig.desc_cache, desc);
} }
void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_conn *ib_conn = wc->qp->qp_context;
complete(&ib_conn->last_comp);
}
void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
{ {
......
...@@ -663,7 +663,6 @@ void iser_conn_release(struct iser_conn *iser_conn) ...@@ -663,7 +663,6 @@ void iser_conn_release(struct iser_conn *iser_conn)
int iser_conn_terminate(struct iser_conn *iser_conn) int iser_conn_terminate(struct iser_conn *iser_conn)
{ {
struct ib_conn *ib_conn = &iser_conn->ib_conn; struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct ib_send_wr *bad_wr;
int err = 0; int err = 0;
/* terminate the iser conn only if the conn state is UP */ /* terminate the iser conn only if the conn state is UP */
...@@ -688,14 +687,8 @@ int iser_conn_terminate(struct iser_conn *iser_conn) ...@@ -688,14 +687,8 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
iser_err("Failed to disconnect, conn: 0x%p err %d\n", iser_err("Failed to disconnect, conn: 0x%p err %d\n",
iser_conn, err); iser_conn, err);
/* post an indication that all flush errors were consumed */ /* block until all flush errors are consumed */
err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr); ib_drain_sq(ib_conn->qp);
if (err) {
iser_err("conn %p failed to post last wr", ib_conn);
return 1;
}
wait_for_completion(&ib_conn->last_comp);
} }
return 1; return 1;
...@@ -954,10 +947,6 @@ void iser_conn_init(struct iser_conn *iser_conn) ...@@ -954,10 +947,6 @@ void iser_conn_init(struct iser_conn *iser_conn)
ib_conn->post_recv_buf_count = 0; ib_conn->post_recv_buf_count = 0;
ib_conn->reg_cqe.done = iser_reg_comp; ib_conn->reg_cqe.done = iser_reg_comp;
ib_conn->last_cqe.done = iser_last_comp;
ib_conn->last.wr_cqe = &ib_conn->last_cqe;
ib_conn->last.opcode = IB_WR_SEND;
init_completion(&ib_conn->last_comp);
} }
/** /**
......
...@@ -446,49 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) ...@@ -446,49 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
dev->max_pages_per_mr); dev->max_pages_per_mr);
} }
static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct srp_rdma_ch *ch = cq->cq_context;
complete(&ch->done);
}
static struct ib_cqe srp_drain_cqe = {
.done = srp_drain_done,
};
/** /**
* srp_destroy_qp() - destroy an RDMA queue pair * srp_destroy_qp() - destroy an RDMA queue pair
* @ch: SRP RDMA channel. * @ch: SRP RDMA channel.
* *
* Change a queue pair into the error state and wait until all receive * Drain the qp before destroying it. This avoids that the receive
* completions have been processed before destroying it. This avoids that * completion handler can access the queue pair while it is
* the receive completion handler can access the queue pair while it is
* being destroyed. * being destroyed.
*/ */
static void srp_destroy_qp(struct srp_rdma_ch *ch) static void srp_destroy_qp(struct srp_rdma_ch *ch)
{ {
static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; ib_drain_rq(ch->qp);
static struct ib_recv_wr wr = { 0 };
struct ib_recv_wr *bad_wr;
int ret;
wr.wr_cqe = &srp_drain_cqe;
/* Destroying a QP and reusing ch->done is only safe if not connected */
WARN_ON_ONCE(ch->connected);
ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
if (ret)
goto out;
init_completion(&ch->done);
ret = ib_post_recv(ch->qp, &wr, &bad_wr);
WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
if (ret == 0)
wait_for_completion(&ch->done);
out:
ib_destroy_qp(ch->qp); ib_destroy_qp(ch->qp);
} }
...@@ -508,7 +476,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) ...@@ -508,7 +476,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
if (!init_attr) if (!init_attr)
return -ENOMEM; return -ENOMEM;
/* queue_size + 1 for ib_drain_qp */ /* queue_size + 1 for ib_drain_rq() */
recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
ch->comp_vector, IB_POLL_SOFTIRQ); ch->comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(recv_cq)) { if (IS_ERR(recv_cq)) {
......
This diff is collapsed.
...@@ -218,20 +218,20 @@ struct srpt_send_ioctx { ...@@ -218,20 +218,20 @@ struct srpt_send_ioctx {
/** /**
* enum rdma_ch_state - SRP channel state. * enum rdma_ch_state - SRP channel state.
* @CH_CONNECTING: QP is in RTR state; waiting for RTU. * @CH_CONNECTING: QP is in RTR state; waiting for RTU.
* @CH_LIVE: QP is in RTS state. * @CH_LIVE: QP is in RTS state.
* @CH_DISCONNECTING: DREQ has been received; waiting for DREP * @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has
* or DREQ has been send and waiting for DREP * been received.
* or . * @CH_DRAINING: DREP has been received or waiting for DREP timed out
* @CH_DRAINING: QP is in ERR state; waiting for last WQE event. * and last work request has been queued.
* @CH_RELEASING: Last WQE event has been received; releasing resources. * @CH_DISCONNECTED: Last completion has been received.
*/ */
enum rdma_ch_state { enum rdma_ch_state {
CH_CONNECTING, CH_CONNECTING,
CH_LIVE, CH_LIVE,
CH_DISCONNECTING, CH_DISCONNECTING,
CH_DRAINING, CH_DRAINING,
CH_RELEASING CH_DISCONNECTED,
}; };
/** /**
...@@ -267,6 +267,8 @@ struct srpt_rdma_ch { ...@@ -267,6 +267,8 @@ struct srpt_rdma_ch {
struct ib_cm_id *cm_id; struct ib_cm_id *cm_id;
struct ib_qp *qp; struct ib_qp *qp;
struct ib_cq *cq; struct ib_cq *cq;
struct ib_cqe zw_cqe;
struct kref kref;
int rq_size; int rq_size;
u32 rsp_size; u32 rsp_size;
atomic_t sq_wr_avail; atomic_t sq_wr_avail;
...@@ -286,7 +288,6 @@ struct srpt_rdma_ch { ...@@ -286,7 +288,6 @@ struct srpt_rdma_ch {
u8 sess_name[36]; u8 sess_name[36];
struct work_struct release_work; struct work_struct release_work;
struct completion *release_done; struct completion *release_done;
bool in_shutdown;
}; };
/** /**
...@@ -343,7 +344,7 @@ struct srpt_port { ...@@ -343,7 +344,7 @@ struct srpt_port {
* @ioctx_ring: Per-HCA SRQ. * @ioctx_ring: Per-HCA SRQ.
* @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
* @ch_releaseQ: Enables waiting for removal from rch_list. * @ch_releaseQ: Enables waiting for removal from rch_list.
* @spinlock: Protects rch_list and tpg. * @mutex: Protects rch_list.
* @port: Information about the ports owned by this HCA. * @port: Information about the ports owned by this HCA.
* @event_handler: Per-HCA asynchronous IB event handler. * @event_handler: Per-HCA asynchronous IB event handler.
* @list: Node in srpt_dev_list. * @list: Node in srpt_dev_list.
...@@ -357,18 +358,10 @@ struct srpt_device { ...@@ -357,18 +358,10 @@ struct srpt_device {
struct srpt_recv_ioctx **ioctx_ring; struct srpt_recv_ioctx **ioctx_ring;
struct list_head rch_list; struct list_head rch_list;
wait_queue_head_t ch_releaseQ; wait_queue_head_t ch_releaseQ;
spinlock_t spinlock; struct mutex mutex;
struct srpt_port port[2]; struct srpt_port port[2];
struct ib_event_handler event_handler; struct ib_event_handler event_handler;
struct list_head list; struct list_head list;
}; };
/**
* struct srpt_node_acl - Per-initiator ACL data (managed via configfs).
* @nacl: Target core node ACL information.
*/
struct srpt_node_acl {
struct se_node_acl nacl;
};
#endif /* IB_SRPT_H */ #endif /* IB_SRPT_H */
...@@ -1846,6 +1846,8 @@ struct ib_device { ...@@ -1846,6 +1846,8 @@ struct ib_device {
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status); struct ib_mr_status *mr_status);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
void (*drain_rq)(struct ib_qp *qp);
void (*drain_sq)(struct ib_qp *qp);
struct ib_dma_mapping_ops *dma_ops; struct ib_dma_mapping_ops *dma_ops;
...@@ -3094,4 +3096,7 @@ int ib_sg_to_pages(struct ib_mr *mr, ...@@ -3094,4 +3096,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
int sg_nents, int sg_nents,
int (*set_page)(struct ib_mr *, u64)); int (*set_page)(struct ib_mr *, u64));
void ib_drain_rq(struct ib_qp *qp);
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
#endif /* IB_VERBS_H */ #endif /* IB_VERBS_H */
...@@ -109,14 +109,13 @@ struct p9_trans_rdma { ...@@ -109,14 +109,13 @@ struct p9_trans_rdma {
/** /**
* p9_rdma_context - Keeps track of in-process WR * p9_rdma_context - Keeps track of in-process WR
* *
* @wc_op: The original WR op for when the CQE completes in error.
* @busa: Bus address to unmap when the WR completes * @busa: Bus address to unmap when the WR completes
* @req: Keeps track of requests (send) * @req: Keeps track of requests (send)
* @rc: Keepts track of replies (receive) * @rc: Keepts track of replies (receive)
*/ */
struct p9_rdma_req; struct p9_rdma_req;
struct p9_rdma_context { struct p9_rdma_context {
enum ib_wc_opcode wc_op; struct ib_cqe cqe;
dma_addr_t busa; dma_addr_t busa;
union { union {
struct p9_req_t *req; struct p9_req_t *req;
...@@ -284,9 +283,12 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -284,9 +283,12 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
} }
static void static void
handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, recv_done(struct ib_cq *cq, struct ib_wc *wc)
struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
{ {
struct p9_client *client = cq->cq_context;
struct p9_trans_rdma *rdma = client->trans;
struct p9_rdma_context *c =
container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
struct p9_req_t *req; struct p9_req_t *req;
int err = 0; int err = 0;
int16_t tag; int16_t tag;
...@@ -295,7 +297,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, ...@@ -295,7 +297,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
if (status != IB_WC_SUCCESS) if (wc->status != IB_WC_SUCCESS)
goto err_out; goto err_out;
err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
...@@ -316,21 +318,32 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, ...@@ -316,21 +318,32 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
req->rc = c->rc; req->rc = c->rc;
p9_client_cb(client, req, REQ_STATUS_RCVD); p9_client_cb(client, req, REQ_STATUS_RCVD);
out:
up(&rdma->rq_sem);
kfree(c);
return; return;
err_out: err_out:
p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
req, err, wc->status);
rdma->state = P9_RDMA_FLUSHING; rdma->state = P9_RDMA_FLUSHING;
client->status = Disconnected; client->status = Disconnected;
goto out;
} }
static void static void
handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, send_done(struct ib_cq *cq, struct ib_wc *wc)
struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
{ {
struct p9_client *client = cq->cq_context;
struct p9_trans_rdma *rdma = client->trans;
struct p9_rdma_context *c =
container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
ib_dma_unmap_single(rdma->cm_id->device, ib_dma_unmap_single(rdma->cm_id->device,
c->busa, c->req->tc->size, c->busa, c->req->tc->size,
DMA_TO_DEVICE); DMA_TO_DEVICE);
up(&rdma->sq_sem);
kfree(c);
} }
static void qp_event_handler(struct ib_event *event, void *context) static void qp_event_handler(struct ib_event *event, void *context)
...@@ -339,42 +352,6 @@ static void qp_event_handler(struct ib_event *event, void *context) ...@@ -339,42 +352,6 @@ static void qp_event_handler(struct ib_event *event, void *context)
event->event, context); event->event, context);
} }
static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
{
struct p9_client *client = cq_context;
struct p9_trans_rdma *rdma = client->trans;
int ret;
struct ib_wc wc;
ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
switch (c->wc_op) {
case IB_WC_RECV:
handle_recv(client, rdma, c, wc.status, wc.byte_len);
up(&rdma->rq_sem);
break;
case IB_WC_SEND:
handle_send(client, rdma, c, wc.status, wc.byte_len);
up(&rdma->sq_sem);
break;
default:
pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n",
c->wc_op, wc.opcode, wc.status);
break;
}
kfree(c);
}
}
static void cq_event_handler(struct ib_event *e, void *v)
{
p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
}
static void rdma_destroy_trans(struct p9_trans_rdma *rdma) static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
{ {
if (!rdma) if (!rdma)
...@@ -387,7 +364,7 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma) ...@@ -387,7 +364,7 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
ib_dealloc_pd(rdma->pd); ib_dealloc_pd(rdma->pd);
if (rdma->cq && !IS_ERR(rdma->cq)) if (rdma->cq && !IS_ERR(rdma->cq))
ib_destroy_cq(rdma->cq); ib_free_cq(rdma->cq);
if (rdma->cm_id && !IS_ERR(rdma->cm_id)) if (rdma->cm_id && !IS_ERR(rdma->cm_id))
rdma_destroy_id(rdma->cm_id); rdma_destroy_id(rdma->cm_id);
...@@ -408,13 +385,14 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) ...@@ -408,13 +385,14 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
goto error; goto error;
c->cqe.done = recv_done;
sge.addr = c->busa; sge.addr = c->busa;
sge.length = client->msize; sge.length = client->msize;
sge.lkey = rdma->pd->local_dma_lkey; sge.lkey = rdma->pd->local_dma_lkey;
wr.next = NULL; wr.next = NULL;
c->wc_op = IB_WC_RECV; wr.wr_cqe = &c->cqe;
wr.wr_id = (unsigned long) c;
wr.sg_list = &sge; wr.sg_list = &sge;
wr.num_sge = 1; wr.num_sge = 1;
return ib_post_recv(rdma->qp, &wr, &bad_wr); return ib_post_recv(rdma->qp, &wr, &bad_wr);
...@@ -499,13 +477,14 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) ...@@ -499,13 +477,14 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
goto send_error; goto send_error;
} }
c->cqe.done = send_done;
sge.addr = c->busa; sge.addr = c->busa;
sge.length = c->req->tc->size; sge.length = c->req->tc->size;
sge.lkey = rdma->pd->local_dma_lkey; sge.lkey = rdma->pd->local_dma_lkey;
wr.next = NULL; wr.next = NULL;
c->wc_op = IB_WC_SEND; wr.wr_cqe = &c->cqe;
wr.wr_id = (unsigned long) c;
wr.opcode = IB_WR_SEND; wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED; wr.send_flags = IB_SEND_SIGNALED;
wr.sg_list = &sge; wr.sg_list = &sge;
...@@ -642,7 +621,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) ...@@ -642,7 +621,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
struct p9_trans_rdma *rdma; struct p9_trans_rdma *rdma;
struct rdma_conn_param conn_param; struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr; struct ib_qp_init_attr qp_attr;
struct ib_cq_init_attr cq_attr = {};
/* Parse the transport specific mount options */ /* Parse the transport specific mount options */
err = parse_opts(args, &opts); err = parse_opts(args, &opts);
...@@ -695,13 +673,11 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) ...@@ -695,13 +673,11 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
goto error; goto error;
/* Create the Completion Queue */ /* Create the Completion Queue */
cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, opts.sq_depth + opts.rq_depth + 1,
cq_event_handler, client, 0, IB_POLL_SOFTIRQ);
&cq_attr);
if (IS_ERR(rdma->cq)) if (IS_ERR(rdma->cq))
goto error; goto error;
ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
/* Create the Protection Domain */ /* Create the Protection Domain */
rdma->pd = ib_alloc_pd(rdma->cm_id->device); rdma->pd = ib_alloc_pd(rdma->cm_id->device);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment