Commit 0bd0f1e6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "Most are minor to important fixes.

  There is one performance enhancement that I took on the grounds that
  failing to check if other processes can run before running what's
  intended to be a background, idle-time task is a bug, even though the
  primary effect of the fix is to improve performance (and it was a very
  simple patch)"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/mlx5: Postpone remove_keys under knowledge of coming preemption
  IB/mlx4: Use vmalloc for WR buffers when needed
  IB/mlx4: Use correct order of variables in log message
  iser-target: Remove explicit mlx4 work-around
  mlx4: Expose correct max_sge_rd limit
  IB/mad: Require CM send method for everything except ClassPortInfo
  IB/cma: Add a missing rcu_read_unlock()
  IB core: Fix ib_sg_to_pages()
  IB/srp: Fix srp_map_sg_fr()
  IB/srp: Fix indirect data buffer rkey endianness
  IB/srp: Initialize dma_length in srp_map_idb
  IB/srp: Fix possible send queue overflow
  IB/srp: Fix a memory leak
  IB/sa: Put netlink request into the request list before sending
  IB/iser: use sector_div instead of do_div
  IB/core: use RCU for uverbs id lookup
  IB/qib: Minor fixes to qib per SFF 8636
  IB/core: Fix user mode post wr corruption
  IB/qib: Fix qib_mr structure
parents a80c47da ab5cdc31
...@@ -1126,10 +1126,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev, ...@@ -1126,10 +1126,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
rcu_read_lock(); rcu_read_lock();
err = fib_lookup(dev_net(net_dev), &fl4, &res, 0); err = fib_lookup(dev_net(net_dev), &fl4, &res, 0);
if (err) ret = err == 0 && FIB_RES_DEV(res) == net_dev;
return false;
ret = FIB_RES_DEV(res) == net_dev;
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
......
...@@ -1811,6 +1811,11 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr, ...@@ -1811,6 +1811,11 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr,
if (qp_num == 0) if (qp_num == 0)
valid = 1; valid = 1;
} else { } else {
/* CM attributes other than ClassPortInfo only use Send method */
if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
(mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
(mad_hdr->method != IB_MGMT_METHOD_SEND))
goto out;
/* Filter GSI packets sent to QP0 */ /* Filter GSI packets sent to QP0 */
if (qp_num != 0) if (qp_num != 0)
valid = 1; valid = 1;
......
...@@ -512,7 +512,7 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask) ...@@ -512,7 +512,7 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
return len; return len;
} }
static int ib_nl_send_msg(struct ib_sa_query *query) static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
{ {
struct sk_buff *skb = NULL; struct sk_buff *skb = NULL;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
...@@ -526,7 +526,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query) ...@@ -526,7 +526,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
if (len <= 0) if (len <= 0)
return -EMSGSIZE; return -EMSGSIZE;
skb = nlmsg_new(len, GFP_KERNEL); skb = nlmsg_new(len, gfp_mask);
if (!skb) if (!skb)
return -ENOMEM; return -ENOMEM;
...@@ -544,7 +544,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query) ...@@ -544,7 +544,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
/* Repair the nlmsg header length */ /* Repair the nlmsg header length */
nlmsg_end(skb, nlh); nlmsg_end(skb, nlh);
ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL); ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask);
if (!ret) if (!ret)
ret = len; ret = len;
else else
...@@ -553,7 +553,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query) ...@@ -553,7 +553,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
return ret; return ret;
} }
static int ib_nl_make_request(struct ib_sa_query *query) static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
{ {
unsigned long flags; unsigned long flags;
unsigned long delay; unsigned long delay;
...@@ -562,24 +562,26 @@ static int ib_nl_make_request(struct ib_sa_query *query) ...@@ -562,24 +562,26 @@ static int ib_nl_make_request(struct ib_sa_query *query)
INIT_LIST_HEAD(&query->list); INIT_LIST_HEAD(&query->list);
query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
/* Put the request on the list first.*/
spin_lock_irqsave(&ib_nl_request_lock, flags); spin_lock_irqsave(&ib_nl_request_lock, flags);
ret = ib_nl_send_msg(query);
if (ret <= 0) {
ret = -EIO;
goto request_out;
} else {
ret = 0;
}
delay = msecs_to_jiffies(sa_local_svc_timeout_ms); delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies; query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list); list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */ /* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list) if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
request_out: ret = ib_nl_send_msg(query, gfp_mask);
if (ret <= 0) {
ret = -EIO;
/* Remove the request */
spin_lock_irqsave(&ib_nl_request_lock, flags);
list_del(&query->list);
spin_unlock_irqrestore(&ib_nl_request_lock, flags); spin_unlock_irqrestore(&ib_nl_request_lock, flags);
} else {
ret = 0;
}
return ret; return ret;
} }
...@@ -1108,7 +1110,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) ...@@ -1108,7 +1110,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) { if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) { if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query)) if (!ib_nl_make_request(query, gfp_mask))
return id; return id;
} }
ib_sa_disable_local_svc(query); ib_sa_disable_local_svc(query);
......
...@@ -62,9 +62,11 @@ static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; ...@@ -62,9 +62,11 @@ static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
* The ib_uobject locking scheme is as follows: * The ib_uobject locking scheme is as follows:
* *
* - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
* needs to be held during all idr operations. When an object is * needs to be held during all idr write operations. When an object is
* looked up, a reference must be taken on the object's kref before * looked up, a reference must be taken on the object's kref before
* dropping this lock. * dropping this lock. For read operations, the rcu_read_lock()
* and rcu_write_lock() but similarly the kref reference is grabbed
* before the rcu_read_unlock().
* *
* - Each object also has an rwsem. This rwsem must be held for * - Each object also has an rwsem. This rwsem must be held for
* reading while an operation that uses the object is performed. * reading while an operation that uses the object is performed.
...@@ -96,7 +98,7 @@ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, ...@@ -96,7 +98,7 @@ static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
static void release_uobj(struct kref *kref) static void release_uobj(struct kref *kref)
{ {
kfree(container_of(kref, struct ib_uobject, ref)); kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
} }
static void put_uobj(struct ib_uobject *uobj) static void put_uobj(struct ib_uobject *uobj)
...@@ -145,7 +147,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, ...@@ -145,7 +147,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
{ {
struct ib_uobject *uobj; struct ib_uobject *uobj;
spin_lock(&ib_uverbs_idr_lock); rcu_read_lock();
uobj = idr_find(idr, id); uobj = idr_find(idr, id);
if (uobj) { if (uobj) {
if (uobj->context == context) if (uobj->context == context)
...@@ -153,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, ...@@ -153,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
else else
uobj = NULL; uobj = NULL;
} }
spin_unlock(&ib_uverbs_idr_lock); rcu_read_unlock();
return uobj; return uobj;
} }
...@@ -2446,6 +2448,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, ...@@ -2446,6 +2448,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
int i, sg_ind; int i, sg_ind;
int is_ud; int is_ud;
ssize_t ret = -EINVAL; ssize_t ret = -EINVAL;
size_t next_size;
if (copy_from_user(&cmd, buf, sizeof cmd)) if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT; return -EFAULT;
...@@ -2490,7 +2493,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, ...@@ -2490,7 +2493,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put; goto out_put;
} }
ud = alloc_wr(sizeof(*ud), user_wr->num_sge); next_size = sizeof(*ud);
ud = alloc_wr(next_size, user_wr->num_sge);
if (!ud) { if (!ud) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_put; goto out_put;
...@@ -2511,7 +2515,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, ...@@ -2511,7 +2515,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
user_wr->opcode == IB_WR_RDMA_READ) { user_wr->opcode == IB_WR_RDMA_READ) {
struct ib_rdma_wr *rdma; struct ib_rdma_wr *rdma;
rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge); next_size = sizeof(*rdma);
rdma = alloc_wr(next_size, user_wr->num_sge);
if (!rdma) { if (!rdma) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_put; goto out_put;
...@@ -2525,7 +2530,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, ...@@ -2525,7 +2530,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
struct ib_atomic_wr *atomic; struct ib_atomic_wr *atomic;
atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge); next_size = sizeof(*atomic);
atomic = alloc_wr(next_size, user_wr->num_sge);
if (!atomic) { if (!atomic) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_put; goto out_put;
...@@ -2540,7 +2546,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, ...@@ -2540,7 +2546,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
} else if (user_wr->opcode == IB_WR_SEND || } else if (user_wr->opcode == IB_WR_SEND ||
user_wr->opcode == IB_WR_SEND_WITH_IMM || user_wr->opcode == IB_WR_SEND_WITH_IMM ||
user_wr->opcode == IB_WR_SEND_WITH_INV) { user_wr->opcode == IB_WR_SEND_WITH_INV) {
next = alloc_wr(sizeof(*next), user_wr->num_sge); next_size = sizeof(*next);
next = alloc_wr(next_size, user_wr->num_sge);
if (!next) { if (!next) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_put; goto out_put;
...@@ -2572,7 +2579,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, ...@@ -2572,7 +2579,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (next->num_sge) { if (next->num_sge) {
next->sg_list = (void *) next + next->sg_list = (void *) next +
ALIGN(sizeof *next, sizeof (struct ib_sge)); ALIGN(next_size, sizeof(struct ib_sge));
if (copy_from_user(next->sg_list, if (copy_from_user(next->sg_list,
buf + sizeof cmd + buf + sizeof cmd +
cmd.wr_count * cmd.wqe_size + cmd.wr_count * cmd.wqe_size +
......
...@@ -1516,7 +1516,7 @@ EXPORT_SYMBOL(ib_map_mr_sg); ...@@ -1516,7 +1516,7 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* @sg_nents: number of entries in sg * @sg_nents: number of entries in sg
* @set_page: driver page assignment function pointer * @set_page: driver page assignment function pointer
* *
* Core service helper for drivers to covert the largest * Core service helper for drivers to convert the largest
* prefix of given sg list to a page vector. The sg list * prefix of given sg list to a page vector. The sg list
* prefix converted is the prefix that meet the requirements * prefix converted is the prefix that meet the requirements
* of ib_map_mr_sg. * of ib_map_mr_sg.
...@@ -1533,7 +1533,7 @@ int ib_sg_to_pages(struct ib_mr *mr, ...@@ -1533,7 +1533,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
u64 last_end_dma_addr = 0, last_page_addr = 0; u64 last_end_dma_addr = 0, last_page_addr = 0;
unsigned int last_page_off = 0; unsigned int last_page_off = 0;
u64 page_mask = ~((u64)mr->page_size - 1); u64 page_mask = ~((u64)mr->page_size - 1);
int i; int i, ret;
mr->iova = sg_dma_address(&sgl[0]); mr->iova = sg_dma_address(&sgl[0]);
mr->length = 0; mr->length = 0;
...@@ -1544,27 +1544,29 @@ int ib_sg_to_pages(struct ib_mr *mr, ...@@ -1544,27 +1544,29 @@ int ib_sg_to_pages(struct ib_mr *mr,
u64 end_dma_addr = dma_addr + dma_len; u64 end_dma_addr = dma_addr + dma_len;
u64 page_addr = dma_addr & page_mask; u64 page_addr = dma_addr & page_mask;
if (i && page_addr != dma_addr) { /*
if (last_end_dma_addr != dma_addr) { * For the second and later elements, check whether either the
/* gap */ * end of element i-1 or the start of element i is not aligned
goto done; * on a page boundary.
*/
if (i && (last_page_off != 0 || page_addr != dma_addr)) {
/* Stop mapping if there is a gap. */
if (last_end_dma_addr != dma_addr)
break;
} else if (last_page_off + dma_len <= mr->page_size) { /*
/* chunk this fragment with the last */ * Coalesce this element with the last. If it is small
mr->length += dma_len; * enough just update mr->length. Otherwise start
last_end_dma_addr += dma_len; * mapping from the next page.
last_page_off += dma_len; */
continue; goto next_page;
} else {
/* map starting from the next page */
page_addr = last_page_addr + mr->page_size;
dma_len -= mr->page_size - last_page_off;
}
} }
do { do {
if (unlikely(set_page(mr, page_addr))) ret = set_page(mr, page_addr);
goto done; if (unlikely(ret < 0))
return i ? : ret;
next_page:
page_addr += mr->page_size; page_addr += mr->page_size;
} while (page_addr < end_dma_addr); } while (page_addr < end_dma_addr);
...@@ -1574,7 +1576,6 @@ int ib_sg_to_pages(struct ib_mr *mr, ...@@ -1574,7 +1576,6 @@ int ib_sg_to_pages(struct ib_mr *mr,
last_page_off = end_dma_addr & ~page_mask; last_page_off = end_dma_addr & ~page_mask;
} }
done:
return i; return i;
} }
EXPORT_SYMBOL(ib_sg_to_pages); EXPORT_SYMBOL(ib_sg_to_pages);
...@@ -456,7 +456,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, ...@@ -456,7 +456,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg, props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg); dev->dev->caps.max_rq_sg);
props->max_sge_rd = props->max_sge; props->max_sge_rd = MLX4_MAX_SGE_RD;
props->max_cq = dev->dev->quotas.cq; props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes; props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt; props->max_mr = dev->dev->quotas.mpt;
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include <rdma/ib_cache.h> #include <rdma/ib_cache.h>
#include <rdma/ib_pack.h> #include <rdma/ib_pack.h>
...@@ -795,8 +796,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, ...@@ -795,8 +796,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err) if (err)
goto err_mtt; goto err_mtt;
qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp); qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp); if (!qp->sq.wrid)
qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
if (!qp->rq.wrid)
qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
if (!qp->sq.wrid || !qp->rq.wrid) { if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM; err = -ENOMEM;
goto err_wrid; goto err_wrid;
...@@ -886,8 +893,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, ...@@ -886,8 +893,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (qp_has_rq(init_attr)) if (qp_has_rq(init_attr))
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else { } else {
kfree(qp->sq.wrid); kvfree(qp->sq.wrid);
kfree(qp->rq.wrid); kvfree(qp->rq.wrid);
} }
err_mtt: err_mtt:
...@@ -1062,8 +1069,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, ...@@ -1062,8 +1069,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
&qp->db); &qp->db);
ib_umem_release(qp->umem); ib_umem_release(qp->umem);
} else { } else {
kfree(qp->sq.wrid); kvfree(qp->sq.wrid);
kfree(qp->rq.wrid); kvfree(qp->rq.wrid);
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
free_proxy_bufs(&dev->ib_dev, qp); free_proxy_bufs(&dev->ib_dev, qp);
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <linux/mlx4/qp.h> #include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h> #include <linux/mlx4/srq.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/vmalloc.h>
#include "mlx4_ib.h" #include "mlx4_ib.h"
#include "user.h" #include "user.h"
...@@ -171,11 +172,15 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, ...@@ -171,11 +172,15 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
goto err_mtt; goto err_mtt;
srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL); srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
if (!srq->wrid) {
srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
GFP_KERNEL, PAGE_KERNEL);
if (!srq->wrid) { if (!srq->wrid) {
err = -ENOMEM; err = -ENOMEM;
goto err_mtt; goto err_mtt;
} }
} }
}
cqn = (init_attr->srq_type == IB_SRQT_XRC) ? cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0; to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
...@@ -204,7 +209,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, ...@@ -204,7 +209,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (pd->uobject) if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else else
kfree(srq->wrid); kvfree(srq->wrid);
err_mtt: err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt); mlx4_mtt_cleanup(dev->dev, &srq->mtt);
......
...@@ -381,7 +381,19 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) ...@@ -381,7 +381,19 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
} }
} }
} else if (ent->cur > 2 * ent->limit) { } else if (ent->cur > 2 * ent->limit) {
if (!someone_adding(cache) && /*
* The remove_keys() logic is performed as garbage collection
* task. Such task is intended to be run when no other active
* processes are running.
*
* The need_resched() will return TRUE if there are user tasks
* to be activated in near future.
*
* In such case, we don't execute remove_keys() and postpone
* the garbage collection work to try to run in next cycle,
* in order to free CPU resources to other tasks.
*/
if (!need_resched() && !someone_adding(cache) &&
time_after(jiffies, cache->last_add + 300 * HZ)) { time_after(jiffies, cache->last_add + 300 * HZ)) {
remove_keys(dev, i, 1); remove_keys(dev, i, 1);
if (ent->cur > ent->limit) if (ent->cur > ent->limit)
......
...@@ -292,7 +292,7 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp) ...@@ -292,7 +292,7 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
qib_dev_porterr(ppd->dd, ppd->port, qib_dev_porterr(ppd->dd, ppd->port,
"QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]); "QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]);
if ((peek[2] & 2) == 0) { if ((peek[2] & 4) == 0) {
/* /*
* If cable is paged, rather than "flat memory", we need to * If cable is paged, rather than "flat memory", we need to
* set the page to zero, Even if it already appears to be zero. * set the page to zero, Even if it already appears to be zero.
...@@ -538,7 +538,7 @@ int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len) ...@@ -538,7 +538,7 @@ int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len)
sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n", sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n",
QSFP_DATE_LEN, cd.date); QSFP_DATE_LEN, cd.date);
sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n", sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n",
QSFP_LOT_LEN, cd.date); QSFP_LOT_LEN, cd.lot);
while (bidx < QSFP_DEFAULT_HDR_CNT) { while (bidx < QSFP_DEFAULT_HDR_CNT) {
int iidx; int iidx;
......
...@@ -329,9 +329,9 @@ struct qib_sge { ...@@ -329,9 +329,9 @@ struct qib_sge {
struct qib_mr { struct qib_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct ib_umem *umem; struct ib_umem *umem;
struct qib_mregion mr; /* must be last */
u64 *pages; u64 *pages;
u32 npages; u32 npages;
struct qib_mregion mr; /* must be last */
}; };
/* /*
......
...@@ -1293,7 +1293,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, ...@@ -1293,7 +1293,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
sector_t sector_off = mr_status.sig_err.sig_err_offset; sector_t sector_off = mr_status.sig_err.sig_err_offset;
do_div(sector_off, sector_size + 8); sector_div(sector_off, sector_size + 8);
*sector = scsi_get_lba(iser_task->sc) + sector_off; *sector = scsi_get_lba(iser_task->sc) + sector_off;
pr_err("PI error found type %d at sector %llx " pr_err("PI error found type %d at sector %llx "
......
...@@ -157,16 +157,9 @@ isert_create_qp(struct isert_conn *isert_conn, ...@@ -157,16 +157,9 @@ isert_create_qp(struct isert_conn *isert_conn,
attr.recv_cq = comp->cq; attr.recv_cq = comp->cq;
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS; attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
/* attr.cap.max_send_sge = device->dev_attr.max_sge;
* FIXME: Use devattr.max_sge - 2 for max_send_sge as isert_conn->max_sge = min(device->dev_attr.max_sge,
* work-around for RDMA_READs with ConnectX-2. device->dev_attr.max_sge_rd);
*
* Also, still make sure to have at least two SGEs for
* outgoing control PDU responses.
*/
attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2);
isert_conn->max_sge = attr.cap.max_send_sge;
attr.cap.max_recv_sge = 1; attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC; attr.qp_type = IB_QPT_RC;
......
...@@ -488,7 +488,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) ...@@ -488,7 +488,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
struct ib_qp *qp; struct ib_qp *qp;
struct ib_fmr_pool *fmr_pool = NULL; struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL; struct srp_fr_pool *fr_pool = NULL;
const int m = 1 + dev->use_fast_reg; const int m = dev->use_fast_reg ? 3 : 1;
struct ib_cq_init_attr cq_attr = {}; struct ib_cq_init_attr cq_attr = {};
int ret; int ret;
...@@ -994,16 +994,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) ...@@ -994,16 +994,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
ret = srp_lookup_path(ch); ret = srp_lookup_path(ch);
if (ret) if (ret)
return ret; goto out;
while (1) { while (1) {
init_completion(&ch->done); init_completion(&ch->done);
ret = srp_send_req(ch, multich); ret = srp_send_req(ch, multich);
if (ret) if (ret)
return ret; goto out;
ret = wait_for_completion_interruptible(&ch->done); ret = wait_for_completion_interruptible(&ch->done);
if (ret < 0) if (ret < 0)
return ret; goto out;
/* /*
* The CM event handling code will set status to * The CM event handling code will set status to
...@@ -1011,15 +1011,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) ...@@ -1011,15 +1011,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
* back, or SRP_DLID_REDIRECT if we get a lid/qp * back, or SRP_DLID_REDIRECT if we get a lid/qp
* redirect REJ back. * redirect REJ back.
*/ */
switch (ch->status) { ret = ch->status;
switch (ret) {
case 0: case 0:
ch->connected = true; ch->connected = true;
return 0; goto out;
case SRP_PORT_REDIRECT: case SRP_PORT_REDIRECT:
ret = srp_lookup_path(ch); ret = srp_lookup_path(ch);
if (ret) if (ret)
return ret; goto out;
break; break;
case SRP_DLID_REDIRECT: case SRP_DLID_REDIRECT:
...@@ -1028,13 +1029,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) ...@@ -1028,13 +1029,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
case SRP_STALE_CONN: case SRP_STALE_CONN:
shost_printk(KERN_ERR, target->scsi_host, PFX shost_printk(KERN_ERR, target->scsi_host, PFX
"giving up on stale connection\n"); "giving up on stale connection\n");
ch->status = -ECONNRESET; ret = -ECONNRESET;
return ch->status; goto out;
default: default:
return ch->status; goto out;
} }
} }
out:
return ret <= 0 ? ret : -ENODEV;
} }
static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey) static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
...@@ -1309,7 +1313,7 @@ static int srp_map_finish_fmr(struct srp_map_state *state, ...@@ -1309,7 +1313,7 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
} }
static int srp_map_finish_fr(struct srp_map_state *state, static int srp_map_finish_fr(struct srp_map_state *state,
struct srp_rdma_ch *ch) struct srp_rdma_ch *ch, int sg_nents)
{ {
struct srp_target_port *target = ch->target; struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev; struct srp_device *dev = target->srp_host->srp_dev;
...@@ -1324,10 +1328,10 @@ static int srp_map_finish_fr(struct srp_map_state *state, ...@@ -1324,10 +1328,10 @@ static int srp_map_finish_fr(struct srp_map_state *state,
WARN_ON_ONCE(!dev->use_fast_reg); WARN_ON_ONCE(!dev->use_fast_reg);
if (state->sg_nents == 0) if (sg_nents == 0)
return 0; return 0;
if (state->sg_nents == 1 && target->global_mr) { if (sg_nents == 1 && target->global_mr) {
srp_map_desc(state, sg_dma_address(state->sg), srp_map_desc(state, sg_dma_address(state->sg),
sg_dma_len(state->sg), sg_dma_len(state->sg),
target->global_mr->rkey); target->global_mr->rkey);
...@@ -1341,8 +1345,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, ...@@ -1341,8 +1345,7 @@ static int srp_map_finish_fr(struct srp_map_state *state,
rkey = ib_inc_rkey(desc->mr->rkey); rkey = ib_inc_rkey(desc->mr->rkey);
ib_update_fast_reg_key(desc->mr, rkey); ib_update_fast_reg_key(desc->mr, rkey);
n = ib_map_mr_sg(desc->mr, state->sg, state->sg_nents, n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
dev->mr_page_size);
if (unlikely(n < 0)) if (unlikely(n < 0))
return n; return n;
...@@ -1448,16 +1451,15 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, ...@@ -1448,16 +1451,15 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
state->fr.next = req->fr_list; state->fr.next = req->fr_list;
state->fr.end = req->fr_list + ch->target->cmd_sg_cnt; state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
state->sg = scat; state->sg = scat;
state->sg_nents = scsi_sg_count(req->scmnd);
while (state->sg_nents) { while (count) {
int i, n; int i, n;
n = srp_map_finish_fr(state, ch); n = srp_map_finish_fr(state, ch, count);
if (unlikely(n < 0)) if (unlikely(n < 0))
return n; return n;
state->sg_nents -= n; count -= n;
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
state->sg = sg_next(state->sg); state->sg = sg_next(state->sg);
} }
...@@ -1517,10 +1519,12 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, ...@@ -1517,10 +1519,12 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
if (dev->use_fast_reg) { if (dev->use_fast_reg) {
state.sg = idb_sg; state.sg = idb_sg;
state.sg_nents = 1;
sg_set_buf(idb_sg, req->indirect_desc, idb_len); sg_set_buf(idb_sg, req->indirect_desc, idb_len);
idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
ret = srp_map_finish_fr(&state, ch); #ifdef CONFIG_NEED_SG_DMA_LENGTH
idb_sg->dma_length = idb_sg->length; /* hack^2 */
#endif
ret = srp_map_finish_fr(&state, ch, 1);
if (ret < 0) if (ret < 0)
return ret; return ret;
} else if (dev->use_fmr) { } else if (dev->use_fmr) {
...@@ -1655,7 +1659,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, ...@@ -1655,7 +1659,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
return ret; return ret;
req->nmdesc++; req->nmdesc++;
} else { } else {
idb_rkey = target->global_mr->rkey; idb_rkey = cpu_to_be32(target->global_mr->rkey);
} }
indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
......
...@@ -300,10 +300,7 @@ struct srp_map_state { ...@@ -300,10 +300,7 @@ struct srp_map_state {
dma_addr_t base_dma_addr; dma_addr_t base_dma_addr;
u32 dma_len; u32 dma_len;
u32 total_len; u32 total_len;
union {
unsigned int npages; unsigned int npages;
int sg_nents;
};
unsigned int nmdesc; unsigned int nmdesc;
unsigned int ndesc; unsigned int ndesc;
}; };
......
...@@ -1010,7 +1010,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, ...@@ -1010,7 +1010,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
smp->method == IB_MGMT_METHOD_GET) || network_view) { smp->method == IB_MGMT_METHOD_GET) || network_view) {
mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n", mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
slave, smp->method, smp->mgmt_class, slave, smp->mgmt_class, smp->method,
network_view ? "Network" : "Host", network_view ? "Network" : "Host",
be16_to_cpu(smp->attr_id)); be16_to_cpu(smp->attr_id));
return -EPERM; return -EPERM;
......
...@@ -426,6 +426,17 @@ enum { ...@@ -426,6 +426,17 @@ enum {
MLX4_MAX_FAST_REG_PAGES = 511, MLX4_MAX_FAST_REG_PAGES = 511,
}; };
enum {
/*
* Max wqe size for rdma read is 512 bytes, so this
* limits our max_sge_rd as the wqe needs to fit:
* - ctrl segment (16 bytes)
* - rdma segment (16 bytes)
* - scatter elements (16 bytes each)
*/
MLX4_MAX_SGE_RD = (512 - 16 - 16) / 16
};
enum { enum {
MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14,
MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15,
......
...@@ -237,6 +237,8 @@ struct ib_vendor_mad { ...@@ -237,6 +237,8 @@ struct ib_vendor_mad {
u8 data[IB_MGMT_VENDOR_DATA]; u8 data[IB_MGMT_VENDOR_DATA];
}; };
#define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001)
struct ib_class_port_info { struct ib_class_port_info {
u8 base_version; u8 base_version;
u8 class_version; u8 class_version;
......
...@@ -1271,6 +1271,7 @@ struct ib_uobject { ...@@ -1271,6 +1271,7 @@ struct ib_uobject {
int id; /* index into kernel idr */ int id; /* index into kernel idr */
struct kref ref; struct kref ref;
struct rw_semaphore mutex; /* protects .live */ struct rw_semaphore mutex; /* protects .live */
struct rcu_head rcu; /* kfree_rcu() overhead */
int live; int live;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment