Commit 925d96a0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "Final set of -rc fixes for 4.6.

  I've collected up a number of patches that are all pretty small with
  the exception of only a couple.  The hfi1 driver has a number of
  important patches, and it is what really drives the line count of this
  pull request up.  These are all small and I've got this kernel built
  and running in the test lab (I have most of the hardware, I think nes
  is the only thing in this patch set that I can't say I've personally
  tested and have up and running).

  Summary:

   - A number of collected fixes for oopses, memory corruptions,
     deadlocks, etc.  All of these fixes are small (many only 5-10
     lines), obvious, and tested.

   - Fix for the security issue related to the use of write for
     bi-directional communications"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  RDMA/nes: don't leak skb if carrier down
  IB/security: Restrict use of the write() interface
  IB/hfi1: Use kernel default llseek for ui device
  IB/hfi1: Don't attempt to free resources if initialization failed
  IB/hfi1: Fix missing lock/unlock in verbs drain callback
  IB/rdmavt: Fix send scheduling
  IB/hfi1: Prevent unpinning of wrong pages
  IB/hfi1: Fix deadlock caused by locking with wrong scope
  IB/hfi1: Prevent NULL pointer deferences in caching code
  MAINTAINERS: Update iser/isert maintainer contact info
  IB/mlx5: Expose correct max_sge_rd limit
  RDMA/iw_cxgb4: Fix bar2 virt addr calculation for T4 chips
  iw_cxgb4: handle draining an idle qp
  iw_cxgb3: initialize ibdev.iwcm->ifname for port mapping
  iw_cxgb4: initialize ibdev.iwcm->ifname for port mapping
  IB/core: Don't drain non-existent rq queue-pair
  IB/core: Fix oops in ib_cache_gid_set_default_gid
parents 1d003af2 4c8bb959
...@@ -6027,7 +6027,7 @@ F: include/scsi/*iscsi* ...@@ -6027,7 +6027,7 @@ F: include/scsi/*iscsi*
ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
M: Or Gerlitz <ogerlitz@mellanox.com> M: Or Gerlitz <ogerlitz@mellanox.com>
M: Sagi Grimberg <sagig@mellanox.com> M: Sagi Grimberg <sagi@grimberg.me>
M: Roi Dayan <roid@mellanox.com> M: Roi Dayan <roid@mellanox.com>
L: linux-rdma@vger.kernel.org L: linux-rdma@vger.kernel.org
S: Supported S: Supported
...@@ -6037,7 +6037,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/ ...@@ -6037,7 +6037,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/
F: drivers/infiniband/ulp/iser/ F: drivers/infiniband/ulp/iser/
ISCSI EXTENSIONS FOR RDMA (ISER) TARGET ISCSI EXTENSIONS FOR RDMA (ISER) TARGET
M: Sagi Grimberg <sagig@mellanox.com> M: Sagi Grimberg <sagi@grimberg.me>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master
L: linux-rdma@vger.kernel.org L: linux-rdma@vger.kernel.org
L: target-devel@vger.kernel.org L: target-devel@vger.kernel.org
......
...@@ -691,7 +691,8 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, ...@@ -691,7 +691,8 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
NULL); NULL);
/* Coudn't find default GID location */ /* Coudn't find default GID location */
WARN_ON(ix < 0); if (WARN_ON(ix < 0))
goto release;
zattr_type.gid_type = gid_type; zattr_type.gid_type = gid_type;
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <rdma/ib.h>
#include <rdma/ib_cm.h> #include <rdma/ib_cm.h>
#include <rdma/ib_user_cm.h> #include <rdma/ib_user_cm.h>
#include <rdma/ib_marshall.h> #include <rdma/ib_marshall.h>
...@@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, ...@@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
struct ib_ucm_cmd_hdr hdr; struct ib_ucm_cmd_hdr hdr;
ssize_t result; ssize_t result;
if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
return -EACCES;
if (len < sizeof(hdr)) if (len < sizeof(hdr))
return -EINVAL; return -EINVAL;
......
...@@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, ...@@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
struct rdma_ucm_cmd_hdr hdr; struct rdma_ucm_cmd_hdr hdr;
ssize_t ret; ssize_t ret;
if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
return -EACCES;
if (len < sizeof(hdr)) if (len < sizeof(hdr))
return -EINVAL; return -EINVAL;
......
...@@ -48,6 +48,8 @@ ...@@ -48,6 +48,8 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <rdma/ib.h>
#include "uverbs.h" #include "uverbs.h"
MODULE_AUTHOR("Roland Dreier"); MODULE_AUTHOR("Roland Dreier");
...@@ -709,6 +711,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, ...@@ -709,6 +711,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
int srcu_key; int srcu_key;
ssize_t ret; ssize_t ret;
if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
return -EACCES;
if (count < sizeof hdr) if (count < sizeof hdr)
return -EINVAL; return -EINVAL;
......
...@@ -1860,6 +1860,7 @@ EXPORT_SYMBOL(ib_drain_rq); ...@@ -1860,6 +1860,7 @@ EXPORT_SYMBOL(ib_drain_rq);
void ib_drain_qp(struct ib_qp *qp) void ib_drain_qp(struct ib_qp *qp)
{ {
ib_drain_sq(qp); ib_drain_sq(qp);
ib_drain_rq(qp); if (!qp->srq)
ib_drain_rq(qp);
} }
EXPORT_SYMBOL(ib_drain_qp); EXPORT_SYMBOL(ib_drain_qp);
...@@ -1390,6 +1390,8 @@ int iwch_register_device(struct iwch_dev *dev) ...@@ -1390,6 +1390,8 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp; dev->ibdev.iwcm->get_qp = iwch_get_qp;
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name,
sizeof(dev->ibdev.iwcm->ifname));
ret = ib_register_device(&dev->ibdev, NULL); ret = ib_register_device(&dev->ibdev, NULL);
if (ret) if (ret)
......
...@@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, ...@@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
&cq->bar2_qid, &cq->bar2_qid,
user ? &cq->bar2_pa : NULL); user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_va) { if (user && !cq->bar2_pa) {
pr_warn(MOD "%s: cqid %u not in BAR2 range.\n", pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
pci_name(rdev->lldi.pdev), cq->cqid); pci_name(rdev->lldi.pdev), cq->cqid);
ret = -EINVAL; ret = -EINVAL;
......
...@@ -580,6 +580,8 @@ int c4iw_register_device(struct c4iw_dev *dev) ...@@ -580,6 +580,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
dev->ibdev.iwcm->get_qp = c4iw_get_qp; dev->ibdev.iwcm->get_qp = c4iw_get_qp;
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
ret = ib_register_device(&dev->ibdev, NULL); ret = ib_register_device(&dev->ibdev, NULL);
if (ret) if (ret)
......
...@@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, ...@@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
if (pbar2_pa) if (pbar2_pa)
*pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
if (is_t4(rdev->lldi.adapter_type))
return NULL;
return rdev->bar2_kva + bar2_qoffset; return rdev->bar2_kva + bar2_qoffset;
} }
...@@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, ...@@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
/* /*
* User mode must have bar2 access. * User mode must have bar2 access.
*/ */
if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) { if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n", pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
goto free_dma; goto free_dma;
...@@ -1895,13 +1899,27 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -1895,13 +1899,27 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
void c4iw_drain_sq(struct ib_qp *ibqp) void c4iw_drain_sq(struct ib_qp *ibqp)
{ {
struct c4iw_qp *qp = to_c4iw_qp(ibqp); struct c4iw_qp *qp = to_c4iw_qp(ibqp);
unsigned long flag;
bool need_to_wait;
wait_for_completion(&qp->sq_drained); spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_sq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
if (need_to_wait)
wait_for_completion(&qp->sq_drained);
} }
void c4iw_drain_rq(struct ib_qp *ibqp) void c4iw_drain_rq(struct ib_qp *ibqp)
{ {
struct c4iw_qp *qp = to_c4iw_qp(ibqp); struct c4iw_qp *qp = to_c4iw_qp(ibqp);
unsigned long flag;
bool need_to_wait;
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_rq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
wait_for_completion(&qp->rq_drained); if (need_to_wait)
wait_for_completion(&qp->rq_drained);
} }
...@@ -530,7 +530,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, ...@@ -530,7 +530,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
sizeof(struct mlx5_wqe_ctrl_seg)) / sizeof(struct mlx5_wqe_ctrl_seg)) /
sizeof(struct mlx5_wqe_data_seg); sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge; props->max_sge_rd = MLX5_MAX_SGE_RD;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
......
...@@ -500,9 +500,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) ...@@ -500,9 +500,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
* skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); * skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
*/ */
if (!netif_carrier_ok(netdev))
return NETDEV_TX_OK;
if (netif_queue_stopped(netdev)) if (netif_queue_stopped(netdev))
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
......
...@@ -45,6 +45,8 @@ ...@@ -45,6 +45,8 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <rdma/ib.h>
#include "qib.h" #include "qib.h"
#include "qib_common.h" #include "qib_common.h"
#include "qib_user_sdma.h" #include "qib_user_sdma.h"
...@@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ...@@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
ssize_t ret = 0; ssize_t ret = 0;
void *dest; void *dest;
if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
return -EACCES;
if (count < sizeof(cmd.type)) { if (count < sizeof(cmd.type)) {
ret = -EINVAL; ret = -EINVAL;
goto bail; goto bail;
......
...@@ -1637,9 +1637,9 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1637,9 +1637,9 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_unlock_irqrestore(&qp->s_hlock, flags); spin_unlock_irqrestore(&qp->s_hlock, flags);
if (nreq) { if (nreq) {
if (call_send) if (call_send)
rdi->driver_f.schedule_send_no_lock(qp);
else
rdi->driver_f.do_send(qp); rdi->driver_f.do_send(qp);
else
rdi->driver_f.schedule_send_no_lock(qp);
} }
return err; return err;
} }
......
...@@ -3,4 +3,4 @@ July, 2015 ...@@ -3,4 +3,4 @@ July, 2015
- Remove unneeded file entries in sysfs - Remove unneeded file entries in sysfs
- Remove software processing of IB protocol and place in library for use - Remove software processing of IB protocol and place in library for use
by qib, ipath (if still present), hfi1, and eventually soft-roce by qib, ipath (if still present), hfi1, and eventually soft-roce
- Replace incorrect uAPI
...@@ -49,6 +49,8 @@ ...@@ -49,6 +49,8 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/io.h> #include <linux/io.h>
#include <rdma/ib.h>
#include "hfi.h" #include "hfi.h"
#include "pio.h" #include "pio.h"
#include "device.h" #include "device.h"
...@@ -190,6 +192,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, ...@@ -190,6 +192,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
int uctxt_required = 1; int uctxt_required = 1;
int must_be_root = 0; int must_be_root = 0;
/* FIXME: This interface cannot continue out of staging */
if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
return -EACCES;
if (count < sizeof(cmd)) { if (count < sizeof(cmd)) {
ret = -EINVAL; ret = -EINVAL;
goto bail; goto bail;
...@@ -791,15 +797,16 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) ...@@ -791,15 +797,16 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
spin_unlock_irqrestore(&dd->uctxt_lock, flags); spin_unlock_irqrestore(&dd->uctxt_lock, flags);
dd->rcd[uctxt->ctxt] = NULL; dd->rcd[uctxt->ctxt] = NULL;
hfi1_user_exp_rcv_free(fdata);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
uctxt->rcvwait_to = 0; uctxt->rcvwait_to = 0;
uctxt->piowait_to = 0; uctxt->piowait_to = 0;
uctxt->rcvnowait = 0; uctxt->rcvnowait = 0;
uctxt->pionowait = 0; uctxt->pionowait = 0;
uctxt->event_flags = 0; uctxt->event_flags = 0;
hfi1_user_exp_rcv_free(fdata);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
hfi1_stats.sps_ctxts--; hfi1_stats.sps_ctxts--;
if (++dd->freectxts == dd->num_user_contexts) if (++dd->freectxts == dd->num_user_contexts)
aspm_enable_all(dd); aspm_enable_all(dd);
...@@ -1127,27 +1134,13 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) ...@@ -1127,27 +1134,13 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
static int user_init(struct file *fp) static int user_init(struct file *fp)
{ {
int ret;
unsigned int rcvctrl_ops = 0; unsigned int rcvctrl_ops = 0;
struct hfi1_filedata *fd = fp->private_data; struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_ctxtdata *uctxt = fd->uctxt;
/* make sure that the context has already been setup */ /* make sure that the context has already been setup */
if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) { if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
ret = -EFAULT; return -EFAULT;
goto done;
}
/*
* Subctxts don't need to initialize anything since master
* has done it.
*/
if (fd->subctxt) {
ret = wait_event_interruptible(uctxt->wait, !test_bit(
HFI1_CTXT_MASTER_UNINIT,
&uctxt->event_flags));
goto expected;
}
/* initialize poll variables... */ /* initialize poll variables... */
uctxt->urgent = 0; uctxt->urgent = 0;
...@@ -1202,19 +1195,7 @@ static int user_init(struct file *fp) ...@@ -1202,19 +1195,7 @@ static int user_init(struct file *fp)
wake_up(&uctxt->wait); wake_up(&uctxt->wait);
} }
expected: return 0;
/*
* Expected receive has to be setup for all processes (including
* shared contexts). However, it has to be done after the master
* context has been fully configured as it depends on the
* eager/expected split of the RcvArray entries.
* Setting it up here ensures that the subcontexts will be waiting
* (due to the above wait_event_interruptible() until the master
* is setup.
*/
ret = hfi1_user_exp_rcv_init(fp);
done:
return ret;
} }
static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
...@@ -1261,7 +1242,7 @@ static int setup_ctxt(struct file *fp) ...@@ -1261,7 +1242,7 @@ static int setup_ctxt(struct file *fp)
int ret = 0; int ret = 0;
/* /*
* Context should be set up only once (including allocation and * Context should be set up only once, including allocation and
* programming of eager buffers. This is done if context sharing * programming of eager buffers. This is done if context sharing
* is not requested or by the master process. * is not requested or by the master process.
*/ */
...@@ -1282,8 +1263,27 @@ static int setup_ctxt(struct file *fp) ...@@ -1282,8 +1263,27 @@ static int setup_ctxt(struct file *fp)
if (ret) if (ret)
goto done; goto done;
} }
} else {
ret = wait_event_interruptible(uctxt->wait, !test_bit(
HFI1_CTXT_MASTER_UNINIT,
&uctxt->event_flags));
if (ret)
goto done;
} }
ret = hfi1_user_sdma_alloc_queues(uctxt, fp); ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
if (ret)
goto done;
/*
* Expected receive has to be setup for all processes (including
* shared contexts). However, it has to be done after the master
* context has been fully configured as it depends on the
* eager/expected split of the RcvArray entries.
* Setting it up here ensures that the subcontexts will be waiting
* (due to the above wait_event_interruptible() until the master
* is setup.
*/
ret = hfi1_user_exp_rcv_init(fp);
if (ret) if (ret)
goto done; goto done;
...@@ -1565,29 +1565,8 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence) ...@@ -1565,29 +1565,8 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
{ {
struct hfi1_devdata *dd = filp->private_data; struct hfi1_devdata *dd = filp->private_data;
switch (whence) { return fixed_size_llseek(filp, offset, whence,
case SEEK_SET: (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE);
break;
case SEEK_CUR:
offset += filp->f_pos;
break;
case SEEK_END:
offset = ((dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE) -
offset;
break;
default:
return -EINVAL;
}
if (offset < 0)
return -EINVAL;
if (offset >= (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE)
return -EINVAL;
filp->f_pos = offset;
return filp->f_pos;
} }
/* NOTE: assumes unsigned long is 8 bytes */ /* NOTE: assumes unsigned long is 8 bytes */
......
...@@ -71,6 +71,7 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *, ...@@ -71,6 +71,7 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *,
struct mm_struct *, struct mm_struct *,
unsigned long, unsigned long); unsigned long, unsigned long);
static void mmu_notifier_mem_invalidate(struct mmu_notifier *, static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
struct mm_struct *,
unsigned long, unsigned long); unsigned long, unsigned long);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long); unsigned long, unsigned long);
...@@ -137,7 +138,7 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) ...@@ -137,7 +138,7 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
rbnode = rb_entry(node, struct mmu_rb_node, node); rbnode = rb_entry(node, struct mmu_rb_node, node);
rb_erase(node, root); rb_erase(node, root);
if (handler->ops->remove) if (handler->ops->remove)
handler->ops->remove(root, rbnode, false); handler->ops->remove(root, rbnode, NULL);
} }
} }
...@@ -176,7 +177,7 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) ...@@ -176,7 +177,7 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
return ret; return ret;
} }
/* Caller must host handler lock */ /* Caller must hold handler lock */
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
unsigned long addr, unsigned long addr,
unsigned long len) unsigned long len)
...@@ -200,15 +201,21 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, ...@@ -200,15 +201,21 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node; return node;
} }
/* Caller must *not* hold handler lock. */
static void __mmu_rb_remove(struct mmu_rb_handler *handler, static void __mmu_rb_remove(struct mmu_rb_handler *handler,
struct mmu_rb_node *node, bool arg) struct mmu_rb_node *node, struct mm_struct *mm)
{ {
unsigned long flags;
/* Validity of handler and node pointers has been checked by caller. */ /* Validity of handler and node pointers has been checked by caller. */
hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
node->len); node->len);
spin_lock_irqsave(&handler->lock, flags);
__mmu_int_rb_remove(node, handler->root); __mmu_int_rb_remove(node, handler->root);
spin_unlock_irqrestore(&handler->lock, flags);
if (handler->ops->remove) if (handler->ops->remove)
handler->ops->remove(handler->root, node, arg); handler->ops->remove(handler->root, node, mm);
} }
struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
...@@ -231,14 +238,11 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, ...@@ -231,14 +238,11 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
{ {
struct mmu_rb_handler *handler = find_mmu_handler(root); struct mmu_rb_handler *handler = find_mmu_handler(root);
unsigned long flags;
if (!handler || !node) if (!handler || !node)
return; return;
spin_lock_irqsave(&handler->lock, flags); __mmu_rb_remove(handler, node, NULL);
__mmu_rb_remove(handler, node, false);
spin_unlock_irqrestore(&handler->lock, flags);
} }
static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
...@@ -260,7 +264,7 @@ static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) ...@@ -260,7 +264,7 @@ static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
static inline void mmu_notifier_page(struct mmu_notifier *mn, static inline void mmu_notifier_page(struct mmu_notifier *mn,
struct mm_struct *mm, unsigned long addr) struct mm_struct *mm, unsigned long addr)
{ {
mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE); mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE);
} }
static inline void mmu_notifier_range_start(struct mmu_notifier *mn, static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
...@@ -268,25 +272,31 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *mn, ...@@ -268,25 +272,31 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
unsigned long start, unsigned long start,
unsigned long end) unsigned long end)
{ {
mmu_notifier_mem_invalidate(mn, start, end); mmu_notifier_mem_invalidate(mn, mm, start, end);
} }
static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
struct mmu_rb_handler *handler = struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn); container_of(mn, struct mmu_rb_handler, mn);
struct rb_root *root = handler->root; struct rb_root *root = handler->root;
struct mmu_rb_node *node; struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&handler->lock, flags); spin_lock_irqsave(&handler->lock, flags);
for (node = __mmu_int_rb_iter_first(root, start, end - 1); node; for (node = __mmu_int_rb_iter_first(root, start, end - 1);
node = __mmu_int_rb_iter_next(node, start, end - 1)) { node; node = ptr) {
/* Guard against node removal. */
ptr = __mmu_int_rb_iter_next(node, start, end - 1);
hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
node->addr, node->len); node->addr, node->len);
if (handler->ops->invalidate(root, node)) if (handler->ops->invalidate(root, node)) {
__mmu_rb_remove(handler, node, true); spin_unlock_irqrestore(&handler->lock, flags);
__mmu_rb_remove(handler, node, mm);
spin_lock_irqsave(&handler->lock, flags);
}
} }
spin_unlock_irqrestore(&handler->lock, flags); spin_unlock_irqrestore(&handler->lock, flags);
} }
...@@ -59,7 +59,8 @@ struct mmu_rb_node { ...@@ -59,7 +59,8 @@ struct mmu_rb_node {
struct mmu_rb_ops { struct mmu_rb_ops {
bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long); bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
int (*insert)(struct rb_root *, struct mmu_rb_node *); int (*insert)(struct rb_root *, struct mmu_rb_node *);
void (*remove)(struct rb_root *, struct mmu_rb_node *, bool); void (*remove)(struct rb_root *, struct mmu_rb_node *,
struct mm_struct *);
int (*invalidate)(struct rb_root *, struct mmu_rb_node *); int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
}; };
......
...@@ -519,10 +519,12 @@ static void iowait_sdma_drained(struct iowait *wait) ...@@ -519,10 +519,12 @@ static void iowait_sdma_drained(struct iowait *wait)
* do the flush work until that QP's * do the flush work until that QP's
* sdma work has finished. * sdma work has finished.
*/ */
spin_lock(&qp->s_lock);
if (qp->s_flags & RVT_S_WAIT_DMA) { if (qp->s_flags & RVT_S_WAIT_DMA) {
qp->s_flags &= ~RVT_S_WAIT_DMA; qp->s_flags &= ~RVT_S_WAIT_DMA;
hfi1_schedule_send(qp); hfi1_schedule_send(qp);
} }
spin_unlock(&qp->s_lock);
} }
/** /**
......
...@@ -87,7 +87,8 @@ static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); ...@@ -87,7 +87,8 @@ static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
static int set_rcvarray_entry(struct file *, unsigned long, u32, static int set_rcvarray_entry(struct file *, unsigned long, u32,
struct tid_group *, struct page **, unsigned); struct tid_group *, struct page **, unsigned);
static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *,
struct mm_struct *);
static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static int program_rcvarray(struct file *, unsigned long, struct tid_group *, static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
struct tid_pageset *, unsigned, u16, struct page **, struct tid_pageset *, unsigned, u16, struct page **,
...@@ -254,6 +255,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) ...@@ -254,6 +255,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_group *grp, *gptr; struct tid_group *grp, *gptr;
if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
return 0;
/* /*
* The notifier would have been removed when the process'es mm * The notifier would have been removed when the process'es mm
* was freed. * was freed.
...@@ -899,7 +902,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, ...@@ -899,7 +902,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
if (!node || node->rcventry != (uctxt->expected_base + rcventry)) if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF; return -EBADF;
if (HFI1_CAP_IS_USET(TID_UNMAP)) if (HFI1_CAP_IS_USET(TID_UNMAP))
mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false); mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL);
else else
hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
...@@ -965,7 +968,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, ...@@ -965,7 +968,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
continue; continue;
if (HFI1_CAP_IS_USET(TID_UNMAP)) if (HFI1_CAP_IS_USET(TID_UNMAP))
mmu_rb_remove(&fd->tid_rb_root, mmu_rb_remove(&fd->tid_rb_root,
&node->mmu, false); &node->mmu, NULL);
else else
hfi1_mmu_rb_remove(&fd->tid_rb_root, hfi1_mmu_rb_remove(&fd->tid_rb_root,
&node->mmu); &node->mmu);
...@@ -1032,7 +1035,7 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) ...@@ -1032,7 +1035,7 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
} }
static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
bool notifier) struct mm_struct *mm)
{ {
struct hfi1_filedata *fdata = struct hfi1_filedata *fdata =
container_of(root, struct hfi1_filedata, tid_rb_root); container_of(root, struct hfi1_filedata, tid_rb_root);
......
...@@ -278,7 +278,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *); ...@@ -278,7 +278,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
static void user_sdma_free_request(struct user_sdma_request *, bool); static void user_sdma_free_request(struct user_sdma_request *, bool);
static int pin_vector_pages(struct user_sdma_request *, static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *); struct user_sdma_iovec *);
static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned); static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
unsigned);
static int check_header_template(struct user_sdma_request *, static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32); struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *, static int set_txreq_header(struct user_sdma_request *,
...@@ -299,7 +300,8 @@ static int defer_packet_queue( ...@@ -299,7 +300,8 @@ static int defer_packet_queue(
static void activate_packet_queue(struct iowait *, int); static void activate_packet_queue(struct iowait *, int);
static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *,
struct mm_struct *);
static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static struct mmu_rb_ops sdma_rb_ops = { static struct mmu_rb_ops sdma_rb_ops = {
...@@ -1063,8 +1065,10 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1063,8 +1065,10 @@ static int pin_vector_pages(struct user_sdma_request *req,
rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
(unsigned long)iovec->iov.iov_base, (unsigned long)iovec->iov.iov_base,
iovec->iov.iov_len); iovec->iov.iov_len);
if (rb_node) if (rb_node && !IS_ERR(rb_node))
node = container_of(rb_node, struct sdma_mmu_node, rb); node = container_of(rb_node, struct sdma_mmu_node, rb);
else
rb_node = NULL;
if (!node) { if (!node) {
node = kzalloc(sizeof(*node), GFP_KERNEL); node = kzalloc(sizeof(*node), GFP_KERNEL);
...@@ -1107,7 +1111,8 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1107,7 +1111,8 @@ static int pin_vector_pages(struct user_sdma_request *req,
goto bail; goto bail;
} }
if (pinned != npages) { if (pinned != npages) {
unpin_vector_pages(current->mm, pages, pinned); unpin_vector_pages(current->mm, pages, node->npages,
pinned);
ret = -EFAULT; ret = -EFAULT;
goto bail; goto bail;
} }
...@@ -1147,9 +1152,9 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1147,9 +1152,9 @@ static int pin_vector_pages(struct user_sdma_request *req,
} }
static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
unsigned npages) unsigned start, unsigned npages)
{ {
hfi1_release_user_pages(mm, pages, npages, 0); hfi1_release_user_pages(mm, pages + start, npages, 0);
kfree(pages); kfree(pages);
} }
...@@ -1502,7 +1507,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) ...@@ -1502,7 +1507,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
&req->pq->sdma_rb_root, &req->pq->sdma_rb_root,
(unsigned long)req->iovs[i].iov.iov_base, (unsigned long)req->iovs[i].iov.iov_base,
req->iovs[i].iov.iov_len); req->iovs[i].iov.iov_len);
if (!mnode) if (!mnode || IS_ERR(mnode))
continue; continue;
node = container_of(mnode, struct sdma_mmu_node, rb); node = container_of(mnode, struct sdma_mmu_node, rb);
...@@ -1547,7 +1552,7 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) ...@@ -1547,7 +1552,7 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
} }
static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
bool notifier) struct mm_struct *mm)
{ {
struct sdma_mmu_node *node = struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb); container_of(mnode, struct sdma_mmu_node, rb);
...@@ -1557,14 +1562,20 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, ...@@ -1557,14 +1562,20 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
node->pq->n_locked -= node->npages; node->pq->n_locked -= node->npages;
spin_unlock(&node->pq->evict_lock); spin_unlock(&node->pq->evict_lock);
unpin_vector_pages(notifier ? NULL : current->mm, node->pages, /*
* If mm is set, we are being called by the MMU notifier and we
* should not pass a mm_struct to unpin_vector_page(). This is to
* prevent a deadlock when hfi1_release_user_pages() attempts to
* take the mmap_sem, which the MMU notifier has already taken.
*/
unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0,
node->npages); node->npages);
/* /*
* If called by the MMU notifier, we have to adjust the pinned * If called by the MMU notifier, we have to adjust the pinned
* page count ourselves. * page count ourselves.
*/ */
if (notifier) if (mm)
current->mm->pinned_vm -= node->npages; mm->pinned_vm -= node->npages;
kfree(node); kfree(node);
} }
......
...@@ -392,6 +392,17 @@ enum { ...@@ -392,6 +392,17 @@ enum {
MLX5_CAP_OFF_CMDIF_CSUM = 46, MLX5_CAP_OFF_CMDIF_CSUM = 46,
}; };
enum {
/*
* Max wqe size for rdma read is 512 bytes, so this
* limits our max_sge_rd as the wqe needs to fit:
* - ctrl segment (16 bytes)
* - rdma segment (16 bytes)
* - scatter elements (16 bytes each)
*/
MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16
};
struct mlx5_inbox_hdr { struct mlx5_inbox_hdr {
__be16 opcode; __be16 opcode;
u8 rsvd[4]; u8 rsvd[4];
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#define _RDMA_IB_H #define _RDMA_IB_H
#include <linux/types.h> #include <linux/types.h>
#include <linux/sched.h>
struct ib_addr { struct ib_addr {
union { union {
...@@ -86,4 +87,19 @@ struct sockaddr_ib { ...@@ -86,4 +87,19 @@ struct sockaddr_ib {
__u64 sib_scope_id; __u64 sib_scope_id;
}; };
/*
* The IB interfaces that use write() as bi-directional ioctl() are
* fundamentally unsafe, since there are lots of ways to trigger "write()"
* calls from various contexts with elevated privileges. That includes the
* traditional suid executable error message writes, but also various kernel
* interfaces that can write to file descriptors.
*
* This function provides protection for the legacy API by restricting the
* calling context.
*/
static inline bool ib_safe_file_access(struct file *filp)
{
return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS);
}
#endif /* _RDMA_IB_H */ #endif /* _RDMA_IB_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment