Commit f717221b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband:
  IB/mlx4: Fix last allocated object tracking in bitmap allocator
  IB/cm: Fix stale connection detection
  IPoIB/cm: Fix performance regression on Mellanox
  IB/mthca: Fix handling of send CQE with error for QPs connected to SRQ
parents eaad084b a2cb4a98
...@@ -1297,26 +1297,29 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, ...@@ -1297,26 +1297,29 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
/* Check for duplicate REQ and stale connections. */ /* Check for possible duplicate REQ. */
spin_lock_irqsave(&cm.lock, flags); spin_lock_irqsave(&cm.lock, flags);
timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
if (!timewait_info)
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) { if (timewait_info) {
cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
timewait_info->work.remote_id); timewait_info->work.remote_id);
cm_cleanup_timewait(cm_id_priv->timewait_info);
spin_unlock_irqrestore(&cm.lock, flags); spin_unlock_irqrestore(&cm.lock, flags);
if (cur_cm_id_priv) { if (cur_cm_id_priv) {
cm_dup_req_handler(work, cur_cm_id_priv); cm_dup_req_handler(work, cur_cm_id_priv);
cm_deref_id(cur_cm_id_priv); cm_deref_id(cur_cm_id_priv);
} else }
cm_issue_rej(work->port, work->mad_recv_wc, return NULL;
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, }
NULL, 0);
listen_cm_id_priv = NULL; /* Check for stale connections. */
goto out; timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
cm_cleanup_timewait(cm_id_priv->timewait_info);
spin_unlock_irqrestore(&cm.lock, flags);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
return NULL;
} }
/* Find matching listen request. */ /* Find matching listen request. */
......
...@@ -2284,10 +2284,10 @@ void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, ...@@ -2284,10 +2284,10 @@ void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
struct mthca_next_seg *next; struct mthca_next_seg *next;
/* /*
* For SRQs, all WQEs generate a CQE, so we're always at the * For SRQs, all receive WQEs generate a CQE, so we're always
* end of the doorbell chain. * at the end of the doorbell chain.
*/ */
if (qp->ibqp.srq) { if (qp->ibqp.srq && !is_send) {
*new_wqe = 0; *new_wqe = 0;
return; return;
} }
......
...@@ -156,7 +156,7 @@ struct ipoib_cm_data { ...@@ -156,7 +156,7 @@ struct ipoib_cm_data {
* - and then invoke a Destroy QP or Reset QP. * - and then invoke a Destroy QP or Reset QP.
* *
* We use the second option and wait for a completion on the * We use the second option and wait for a completion on the
* rx_drain_qp before destroying QPs attached to our SRQ. * same CQ before destroying QPs attached to our SRQ.
*/ */
enum ipoib_cm_state { enum ipoib_cm_state {
...@@ -199,7 +199,6 @@ struct ipoib_cm_dev_priv { ...@@ -199,7 +199,6 @@ struct ipoib_cm_dev_priv {
struct ib_srq *srq; struct ib_srq *srq;
struct ipoib_cm_rx_buf *srq_ring; struct ipoib_cm_rx_buf *srq_ring;
struct ib_cm_id *id; struct ib_cm_id *id;
struct ib_qp *rx_drain_qp; /* generates WR described in 10.3.1 */
struct list_head passive_ids; /* state: LIVE */ struct list_head passive_ids; /* state: LIVE */
struct list_head rx_error_list; /* state: ERROR */ struct list_head rx_error_list; /* state: ERROR */
struct list_head rx_flush_list; /* state: FLUSH, drain not started */ struct list_head rx_flush_list; /* state: FLUSH, drain not started */
......
...@@ -69,8 +69,9 @@ static struct ib_qp_attr ipoib_cm_err_attr = { ...@@ -69,8 +69,9 @@ static struct ib_qp_attr ipoib_cm_err_attr = {
#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff #define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
static struct ib_recv_wr ipoib_cm_rx_drain_wr = { static struct ib_send_wr ipoib_cm_rx_drain_wr = {
.wr_id = IPOIB_CM_RX_DRAIN_WRID .wr_id = IPOIB_CM_RX_DRAIN_WRID,
.opcode = IB_WR_SEND,
}; };
static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
...@@ -163,16 +164,22 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int ...@@ -163,16 +164,22 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int
static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv) static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv)
{ {
struct ib_recv_wr *bad_wr; struct ib_send_wr *bad_wr;
struct ipoib_cm_rx *p;
/* rx_drain_qp send queue depth is 1, so /* We only reserved 1 extra slot in CQ for drain WRs, so
* make sure we have at most 1 outstanding WR. */ * make sure we have at most 1 outstanding WR. */
if (list_empty(&priv->cm.rx_flush_list) || if (list_empty(&priv->cm.rx_flush_list) ||
!list_empty(&priv->cm.rx_drain_list)) !list_empty(&priv->cm.rx_drain_list))
return; return;
if (ib_post_recv(priv->cm.rx_drain_qp, &ipoib_cm_rx_drain_wr, &bad_wr)) /*
ipoib_warn(priv, "failed to post rx_drain wr\n"); * QPs on flush list are error state. This way, a "flush
* error" WC will be immediately generated for each WR we post.
*/
p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
ipoib_warn(priv, "failed to post drain wr\n");
list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
} }
...@@ -199,10 +206,10 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, ...@@ -199,10 +206,10 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = { struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler, .event_handler = ipoib_cm_rx_event_handler,
.send_cq = priv->cq, /* does not matter, we never send anything */ .send_cq = priv->cq, /* For drain WR */
.recv_cq = priv->cq, .recv_cq = priv->cq,
.srq = priv->cm.srq, .srq = priv->cm.srq,
.cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */ .cap.max_send_wr = 1, /* For drain WR */
.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
.sq_sig_type = IB_SIGNAL_ALL_WR, .sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC, .qp_type = IB_QPT_RC,
...@@ -242,6 +249,27 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev, ...@@ -242,6 +249,27 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
return ret; return ret;
} }
/*
* Current Mellanox HCA firmware won't generate completions
* with error for drain WRs unless the QP has been moved to
* RTS first. This work-around leaves a window where a QP has
* moved to error asynchronously, but this will eventually get
* fixed in firmware, so let's not error out if modify QP
* fails.
*/
qp_attr.qp_state = IB_QPS_RTS;
ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
if (ret) {
ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
return 0;
}
ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
if (ret) {
ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
return 0;
}
return 0; return 0;
} }
...@@ -623,38 +651,11 @@ static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr) ...@@ -623,38 +651,11 @@ static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
int ipoib_cm_dev_open(struct net_device *dev) int ipoib_cm_dev_open(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr qp_init_attr = {
.send_cq = priv->cq, /* does not matter, we never send anything */
.recv_cq = priv->cq,
.cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */
.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
.cap.max_recv_wr = 1,
.cap.max_recv_sge = 1, /* FIXME: 0 Seems not to work */
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_UC,
};
int ret; int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
return 0; return 0;
priv->cm.rx_drain_qp = ib_create_qp(priv->pd, &qp_init_attr);
if (IS_ERR(priv->cm.rx_drain_qp)) {
printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
ret = PTR_ERR(priv->cm.rx_drain_qp);
return ret;
}
/*
* We put the QP in error state directly. This way, a "flush
* error" WC will be immediately generated for each WR we post.
*/
ret = ib_modify_qp(priv->cm.rx_drain_qp, &ipoib_cm_err_attr, IB_QP_STATE);
if (ret) {
ipoib_warn(priv, "failed to modify drain QP to error: %d\n", ret);
goto err_qp;
}
priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
if (IS_ERR(priv->cm.id)) { if (IS_ERR(priv->cm.id)) {
printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
...@@ -676,8 +677,6 @@ int ipoib_cm_dev_open(struct net_device *dev) ...@@ -676,8 +677,6 @@ int ipoib_cm_dev_open(struct net_device *dev)
ib_destroy_cm_id(priv->cm.id); ib_destroy_cm_id(priv->cm.id);
err_cm: err_cm:
priv->cm.id = NULL; priv->cm.id = NULL;
err_qp:
ib_destroy_qp(priv->cm.rx_drain_qp);
return ret; return ret;
} }
...@@ -740,7 +739,6 @@ void ipoib_cm_dev_stop(struct net_device *dev) ...@@ -740,7 +739,6 @@ void ipoib_cm_dev_stop(struct net_device *dev)
kfree(p); kfree(p);
} }
ib_destroy_qp(priv->cm.rx_drain_qp);
cancel_delayed_work(&priv->cm.stale_task); cancel_delayed_work(&priv->cm.stale_task);
} }
......
...@@ -51,8 +51,8 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) ...@@ -51,8 +51,8 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap)
if (obj < bitmap->max) { if (obj < bitmap->max) {
set_bit(obj, bitmap->table); set_bit(obj, bitmap->table);
bitmap->last = (obj + 1) & (bitmap->max - 1);
obj |= bitmap->top; obj |= bitmap->top;
bitmap->last = obj + 1;
} else } else
obj = -1; obj = -1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment