Commit 586a6934 authored by Pradeep Satyanarayana's avatar Pradeep Satyanarayana Committed by Roland Dreier

IPoIB/CM: Enable SRQ support on HCAs that support fewer than 16 SG entries

Some HCAs (such as ehca2) support SRQ, but only support fewer than 16 SG
entries for SRQs.  Currently IPoIB/CM implicitly assumes all HCAs will
support 16 SG entries for SRQs (to handle a 64K MTU with 4K pages). This
patch removes that restriction by limiting the maximum MTU in connected
mode to what the maximum number of SRQ SG entries allows.

This patch addresses <https://bugs.openfabrics.org/show_bug.cgi?id=728>
Signed-off-by: default avatarPradeep Satyanarayana <pradeeps@linux.vnet.ibm.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent fff09a8e
...@@ -238,6 +238,8 @@ struct ipoib_cm_dev_priv { ...@@ -238,6 +238,8 @@ struct ipoib_cm_dev_priv {
struct ib_sge rx_sge[IPOIB_CM_RX_SG]; struct ib_sge rx_sge[IPOIB_CM_RX_SG];
struct ib_recv_wr rx_wr; struct ib_recv_wr rx_wr;
int nonsrq_conn_qp; int nonsrq_conn_qp;
int max_cm_mtu;
int num_frags;
}; };
/* /*
...@@ -503,6 +505,12 @@ static inline int ipoib_cm_has_srq(struct net_device *dev) ...@@ -503,6 +505,12 @@ static inline int ipoib_cm_has_srq(struct net_device *dev)
return !!priv->cm.srq; return !!priv->cm.srq;
} }
static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
return priv->cm.max_cm_mtu;
}
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
int ipoib_cm_dev_open(struct net_device *dev); int ipoib_cm_dev_open(struct net_device *dev);
void ipoib_cm_dev_stop(struct net_device *dev); void ipoib_cm_dev_stop(struct net_device *dev);
...@@ -552,6 +560,11 @@ static inline int ipoib_cm_has_srq(struct net_device *dev) ...@@ -552,6 +560,11 @@ static inline int ipoib_cm_has_srq(struct net_device *dev)
return 0; return 0;
} }
static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
{
return 0;
}
static inline static inline
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{ {
......
...@@ -96,13 +96,13 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) ...@@ -96,13 +96,13 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
for (i = 0; i < IPOIB_CM_RX_SG; ++i) for (i = 0; i < priv->cm.num_frags; ++i)
priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
if (unlikely(ret)) { if (unlikely(ret)) {
ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
priv->cm.srq_ring[id].mapping); priv->cm.srq_ring[id].mapping);
dev_kfree_skb_any(priv->cm.srq_ring[id].skb); dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
priv->cm.srq_ring[id].skb = NULL; priv->cm.srq_ring[id].skb = NULL;
...@@ -1399,13 +1399,13 @@ int ipoib_cm_add_mode_attr(struct net_device *dev) ...@@ -1399,13 +1399,13 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
return device_create_file(&dev->dev, &dev_attr_mode); return device_create_file(&dev->dev, &dev_attr_mode);
} }
static void ipoib_cm_create_srq(struct net_device *dev) static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_srq_init_attr srq_init_attr = { struct ib_srq_init_attr srq_init_attr = {
.attr = { .attr = {
.max_wr = ipoib_recvq_size, .max_wr = ipoib_recvq_size,
.max_sge = IPOIB_CM_RX_SG .max_sge = max_sge
} }
}; };
...@@ -1431,7 +1431,8 @@ static void ipoib_cm_create_srq(struct net_device *dev) ...@@ -1431,7 +1431,8 @@ static void ipoib_cm_create_srq(struct net_device *dev)
int ipoib_cm_dev_init(struct net_device *dev) int ipoib_cm_dev_init(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
int i; int i, ret;
struct ib_device_attr attr;
INIT_LIST_HEAD(&priv->cm.passive_ids); INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list); INIT_LIST_HEAD(&priv->cm.reap_list);
...@@ -1448,22 +1449,40 @@ int ipoib_cm_dev_init(struct net_device *dev) ...@@ -1448,22 +1449,40 @@ int ipoib_cm_dev_init(struct net_device *dev)
skb_queue_head_init(&priv->cm.skb_queue); skb_queue_head_init(&priv->cm.skb_queue);
for (i = 0; i < IPOIB_CM_RX_SG; ++i) ret = ib_query_device(priv->ca, &attr);
if (ret) {
printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
return ret;
}
ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
ipoib_cm_create_srq(dev, attr.max_srq_sge);
if (ipoib_cm_has_srq(dev)) {
priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
priv->cm.num_frags = attr.max_srq_sge;
ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
priv->cm.max_cm_mtu, priv->cm.num_frags);
} else {
priv->cm.max_cm_mtu = IPOIB_CM_MTU;
priv->cm.num_frags = IPOIB_CM_RX_SG;
}
for (i = 0; i < priv->cm.num_frags; ++i)
priv->cm.rx_sge[i].lkey = priv->mr->lkey; priv->cm.rx_sge[i].lkey = priv->mr->lkey;
priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
for (i = 1; i < IPOIB_CM_RX_SG; ++i) for (i = 1; i < priv->cm.num_frags; ++i)
priv->cm.rx_sge[i].length = PAGE_SIZE; priv->cm.rx_sge[i].length = PAGE_SIZE;
priv->cm.rx_wr.next = NULL; priv->cm.rx_wr.next = NULL;
priv->cm.rx_wr.sg_list = priv->cm.rx_sge; priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; priv->cm.rx_wr.num_sge = priv->cm.num_frags;
ipoib_cm_create_srq(dev);
if (ipoib_cm_has_srq(dev)) { if (ipoib_cm_has_srq(dev)) {
for (i = 0; i < ipoib_recvq_size; ++i) { for (i = 0; i < ipoib_recvq_size; ++i) {
if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
IPOIB_CM_RX_SG - 1, priv->cm.num_frags - 1,
priv->cm.srq_ring[i].mapping)) { priv->cm.srq_ring[i].mapping)) {
ipoib_warn(priv, "failed to allocate " ipoib_warn(priv, "failed to allocate "
"receive buffer %d\n", i); "receive buffer %d\n", i);
......
...@@ -182,17 +182,20 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) ...@@ -182,17 +182,20 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
/* dev->mtu > 2K ==> connected mode */ /* dev->mtu > 2K ==> connected mode */
if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { if (ipoib_cm_admin_enabled(dev)) {
if (new_mtu > ipoib_cm_max_mtu(dev))
return -EINVAL;
if (new_mtu > priv->mcast_mtu) if (new_mtu > priv->mcast_mtu)
ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
priv->mcast_mtu); priv->mcast_mtu);
dev->mtu = new_mtu; dev->mtu = new_mtu;
return 0; return 0;
} }
if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
return -EINVAL; return -EINVAL;
}
priv->admin_mtu = new_mtu; priv->admin_mtu = new_mtu;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment