Commit 57ce41d1 authored by Eli Cohen's avatar Eli Cohen Committed by Roland Dreier

IB/ipoib: Fix transmit queue stalling forever

Commit f56bcd80 ("IPoIB: Use separate CQ for UD send completions")
introduced a bug where the transmit queue could get stopped and never
woken up.  The problem is that send completions are only polled at the
end of the xmit function, so if the send queue fills up and the xmit
path stops the queue, then there is no way for send completions to
ever get polled, and so the transmit queue stays stopped forever.

Fix this by arming the send CQ just before posting the last send
request that fills the send queue.  Then, when the completion event
handler is called, drain the send CQ.  Since it is possible that not
enough send completions are in the CQ, verify that the the net queue
has been woken up after draining the send CQ, and if not arm a timer
and drain again at the timer function.
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 3ae15e16
...@@ -334,6 +334,7 @@ struct ipoib_dev_priv { ...@@ -334,6 +334,7 @@ struct ipoib_dev_priv {
#endif #endif
int hca_caps; int hca_caps;
struct ipoib_ethtool_st ethtool; struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
}; };
struct ipoib_ah { struct ipoib_ah {
...@@ -404,6 +405,7 @@ extern struct workqueue_struct *ipoib_workqueue; ...@@ -404,6 +405,7 @@ extern struct workqueue_struct *ipoib_workqueue;
int ipoib_poll(struct napi_struct *napi, int budget); int ipoib_poll(struct napi_struct *napi, int budget);
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
struct ib_pd *pd, struct ib_ah_attr *attr); struct ib_pd *pd, struct ib_ah_attr *attr);
......
...@@ -461,6 +461,26 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) ...@@ -461,6 +461,26 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
netif_rx_schedule(dev, &priv->napi); netif_rx_schedule(dev, &priv->napi);
} }
static void drain_tx_cq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned long flags;
spin_lock_irqsave(&priv->tx_lock, flags);
while (poll_tx(priv))
; /* nothing */
if (netif_queue_stopped(dev))
mod_timer(&priv->poll_timer, jiffies + 1);
spin_unlock_irqrestore(&priv->tx_lock, flags);
}
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
drain_tx_cq((struct net_device *)dev_ptr);
}
static inline int post_send(struct ipoib_dev_priv *priv, static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id, unsigned int wr_id,
struct ib_ah *address, u32 qpn, struct ib_ah *address, u32 qpn,
...@@ -555,12 +575,22 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -555,12 +575,22 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
else else
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
ipoib_warn(priv, "request notify on send CQ failed\n");
netif_stop_queue(dev);
}
if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
address->ah, qpn, tx_req, phead, hlen))) { address->ah, qpn, tx_req, phead, hlen))) {
ipoib_warn(priv, "post_send failed\n"); ipoib_warn(priv, "post_send failed\n");
++dev->stats.tx_errors; ++dev->stats.tx_errors;
--priv->tx_outstanding;
ipoib_dma_unmap_tx(priv->ca, tx_req); ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
if (netif_queue_stopped(dev))
netif_wake_queue(dev);
} else { } else {
dev->trans_start = jiffies; dev->trans_start = jiffies;
...@@ -568,14 +598,11 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -568,14 +598,11 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++priv->tx_head; ++priv->tx_head;
skb_orphan(skb); skb_orphan(skb);
if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev);
}
} }
if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
poll_tx(priv); while (poll_tx(priv))
; /* nothing */
} }
static void __ipoib_reap_ah(struct net_device *dev) static void __ipoib_reap_ah(struct net_device *dev)
...@@ -609,6 +636,11 @@ void ipoib_reap_ah(struct work_struct *work) ...@@ -609,6 +636,11 @@ void ipoib_reap_ah(struct work_struct *work)
round_jiffies_relative(HZ)); round_jiffies_relative(HZ));
} }
static void ipoib_ib_tx_timer_func(unsigned long ctx)
{
drain_tx_cq((struct net_device *)ctx);
}
int ipoib_ib_dev_open(struct net_device *dev) int ipoib_ib_dev_open(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
...@@ -645,6 +677,10 @@ int ipoib_ib_dev_open(struct net_device *dev) ...@@ -645,6 +677,10 @@ int ipoib_ib_dev_open(struct net_device *dev)
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
round_jiffies_relative(HZ)); round_jiffies_relative(HZ));
init_timer(&priv->poll_timer);
priv->poll_timer.function = ipoib_ib_tx_timer_func;
priv->poll_timer.data = (unsigned long)dev;
set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
return 0; return 0;
...@@ -810,6 +846,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) ...@@ -810,6 +846,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
ipoib_dbg(priv, "All sends and receives done.\n"); ipoib_dbg(priv, "All sends and receives done.\n");
timeout: timeout:
del_timer_sync(&priv->poll_timer);
qp_attr.qp_state = IB_QPS_RESET; qp_attr.qp_state = IB_QPS_RESET;
if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to RESET state\n"); ipoib_warn(priv, "Failed to modify QP to RESET state\n");
......
...@@ -187,7 +187,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -187,7 +187,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_mr; goto out_free_mr;
} }
priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0); priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
dev, ipoib_sendq_size, 0);
if (IS_ERR(priv->send_cq)) { if (IS_ERR(priv->send_cq)) {
printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
goto out_free_recv_cq; goto out_free_recv_cq;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment