Commit de560374 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband:
  IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters
  IB: Put rlimit accounting struct in struct ib_umem
  IB/uverbs: Export ib_umem_get()/ib_umem_release() to modules
parents b5f0adbc 225c7b1f
......@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
<http://www.openib.org>.
config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
default y
config INFINIBAND_ADDR_TRANS
bool
depends on INFINIBAND && INET
......@@ -40,6 +45,8 @@ source "drivers/infiniband/hw/ehca/Kconfig"
source "drivers/infiniband/hw/amso1100/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig"
source "drivers/infiniband/ulp/srp/Kconfig"
......
......@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
......@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
......@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \
uverbs_marshall.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
......@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
flush_scheduled_work();
}
module_init(ib_core_init);
......
......@@ -39,13 +39,6 @@
#include "uverbs.h"
struct ib_umem_account_work {
struct work_struct work;
struct mm_struct *mm;
unsigned long diff;
};
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
struct ib_umem_chunk *chunk, *tmp;
......@@ -64,35 +57,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
}
}
int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
void *addr, size_t size, int write)
/**
* ib_umem_get - Pin and DMA map userspace memory.
* @context: userspace context to pin memory for
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
*/
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access)
{
struct ib_umem *umem;
struct page **page_list;
struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret = 0;
int ret;
int off;
int i;
if (!can_do_mlock())
return -EPERM;
return ERR_PTR(-EPERM);
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
umem = kmalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
umem->context = context;
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
mem->user_base = (unsigned long) addr;
mem->length = size;
mem->offset = (unsigned long) addr & ~PAGE_MASK;
mem->page_size = PAGE_SIZE;
mem->writable = write;
INIT_LIST_HEAD(&umem->chunk_list);
INIT_LIST_HEAD(&mem->chunk_list);
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
return ERR_PTR(-ENOMEM);
}
npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
......@@ -104,13 +118,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
goto out;
}
cur_base = (unsigned long) addr & PAGE_MASK;
cur_base = addr & PAGE_MASK;
while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(int, npages,
PAGE_SIZE / sizeof (struct page *)),
1, !write, page_list, NULL);
1, !umem->writable, page_list, NULL);
if (ret < 0)
goto out;
......@@ -136,7 +150,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
chunk->page_list[i].length = PAGE_SIZE;
}
chunk->nmap = ib_dma_map_sg(dev,
chunk->nmap = ib_dma_map_sg(context->device,
&chunk->page_list[0],
chunk->nents,
DMA_BIDIRECTIONAL);
......@@ -151,75 +165,94 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
ret -= chunk->nents;
off += chunk->nents;
list_add_tail(&chunk->list, &mem->chunk_list);
list_add_tail(&chunk->list, &umem->chunk_list);
}
ret = 0;
}
out:
if (ret < 0)
__ib_umem_release(dev, mem, 0);
else
if (ret < 0) {
__ib_umem_release(context->device, umem, 0);
kfree(umem);
} else
current->mm->locked_vm = locked;
up_write(&current->mm->mmap_sem);
free_page((unsigned long) page_list);
return ret;
return ret < 0 ? ERR_PTR(ret) : umem;
}
EXPORT_SYMBOL(ib_umem_get);
void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
static void ib_umem_account(struct work_struct *work)
{
__ib_umem_release(dev, umem, 1);
down_write(&current->mm->mmap_sem);
current->mm->locked_vm -=
PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
up_write(&current->mm->mmap_sem);
}
struct ib_umem *umem = container_of(work, struct ib_umem, work);
static void ib_umem_account(struct work_struct *_work)
{
struct ib_umem_account_work *work =
container_of(_work, struct ib_umem_account_work, work);
down_write(&work->mm->mmap_sem);
work->mm->locked_vm -= work->diff;
up_write(&work->mm->mmap_sem);
mmput(work->mm);
kfree(work);
down_write(&umem->mm->mmap_sem);
umem->mm->locked_vm -= umem->diff;
up_write(&umem->mm->mmap_sem);
mmput(umem->mm);
kfree(umem);
}
void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
void ib_umem_release(struct ib_umem *umem)
{
struct ib_umem_account_work *work;
struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
unsigned long diff;
__ib_umem_release(dev, umem, 1);
__ib_umem_release(umem->context->device, umem, 1);
mm = get_task_mm(current);
if (!mm)
return;
diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
/*
* We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. Therefore we
* defer the vm_locked accounting to the system workqueue.
* up here and not be able to take the mmap_sem. In that case
* we defer the vm_locked accounting to the system workqueue.
*/
if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
INIT_WORK(&umem->work, ib_umem_account);
umem->mm = mm;
umem->diff = diff;
work = kmalloc(sizeof *work, GFP_KERNEL);
if (!work) {
mmput(mm);
schedule_work(&umem->work);
return;
}
} else
down_write(&mm->mmap_sem);
current->mm->locked_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
shift = ilog2(umem->page_size);
INIT_WORK(&work->work, ib_umem_account);
work->mm = mm;
work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (i = 0; i < chunk->nmap; ++i)
n += sg_dma_len(&chunk->page_list[i]) >> shift;
schedule_work(&work->work);
return n;
}
EXPORT_SYMBOL(ib_umem_page_count);
......@@ -45,6 +45,7 @@
#include <linux/completion.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
/*
......@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
void *addr, size_t size, int write);
void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
const char __user *buf, int in_len, \
......
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
* Copyright (c) 2006 Mellanox Technologies. All rights reserved.
*
......@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
......@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
struct ib_udata udata;
struct ib_umem_object *obj;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
......@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
!(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
return -EINVAL;
obj = kmalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key);
down_write(&obj->uobject.mutex);
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
ret = ib_umem_get(file->device->ib_dev, &obj->umem,
(void *) (unsigned long) cmd.start, cmd.length,
!!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
if (ret)
goto err_free;
obj->umem.virt_base = cmd.hca_va;
init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
down_write(&uobj->mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_release;
goto err_free;
}
mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
......@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->device = pd->device;
mr->pd = pd;
mr->uobject = &obj->uobject;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
obj->uobject.object = mr;
ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject);
uobj->object = mr;
ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
if (ret)
goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
resp.mr_handle = obj->uobject.id;
resp.mr_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
......@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
put_pd_read(pd);
mutex_lock(&file->mutex);
list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
list_add_tail(&uobj->list, &file->ucontext->mr_list);
mutex_unlock(&file->mutex);
obj->uobject.live = 1;
uobj->live = 1;
up_write(&obj->uobject.mutex);
up_write(&uobj->mutex);
return in_len;
err_copy:
idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject);
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
err_unreg:
ib_dereg_mr(mr);
......@@ -676,11 +663,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
err_put:
put_pd_read(pd);
err_release:
ib_umem_release(file->device->ib_dev, &obj->umem);
err_free:
put_uobj_write(&obj->uobject);
put_uobj_write(uobj);
return ret;
}
......@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_dereg_mr cmd;
struct ib_mr *mr;
struct ib_uobject *uobj;
struct ib_umem_object *memobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
......@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (!uobj)
return -EINVAL;
memobj = container_of(uobj, struct ib_umem_object, uobject);
mr = uobj->object;
mr = uobj->object;
ret = ib_dereg_mr(mr);
if (!ret)
......@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
list_del(&uobj->list);
mutex_unlock(&file->mutex);
ib_umem_release(file->device->ib_dev, &memobj->umem);
put_uobj(uobj);
return in_len;
......
......@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
if (!context)
return 0;
context->closing = 1;
list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
struct ib_ah *ah = uobj->object;
......@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
struct ib_device *mrdev = mr->device;
struct ib_umem_object *memobj;
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr);
memobj = container_of(uobj, struct ib_umem_object, uobject);
ib_umem_release_on_close(mrdev, &memobj->umem);
kfree(memobj);
kfree(uobj);
}
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
......@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_filesystem(&uverbs_event_fs);
class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
flush_scheduled_work();
idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr);
......
......@@ -56,6 +56,7 @@
#include <asm/byteorder.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include "c2.h"
#include "c2_provider.h"
......@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
}
mr->pd = to_c2pd(ib_pd);
mr->umem = NULL;
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n",
__FUNCTION__, page_shift, pbl_depth, total_len,
......@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
}
static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int acc, struct ib_udata *udata)
static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
u64 *pages;
u64 kva = 0;
......@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct c2_mr *c2mr;
pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
shift = ffs(region->page_size) - 1;
c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
if (!c2mr)
return ERR_PTR(-ENOMEM);
c2mr->pd = c2pd;
c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
if (IS_ERR(c2mr->umem)) {
err = PTR_ERR(c2mr->umem);
kfree(c2mr);
return ERR_PTR(err);
}
shift = ffs(c2mr->umem->page_size) - 1;
n = 0;
list_for_each_entry(chunk, &region->chunk_list, list)
list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
......@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
}
i = 0;
list_for_each_entry(chunk, &region->chunk_list, list) {
list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] =
sg_dma_address(&chunk->page_list[j]) +
(region->page_size * k);
(c2mr->umem->page_size * k);
}
}
}
kva = (u64)region->virt_base;
kva = virt;
err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
pages,
region->page_size,
c2mr->umem->page_size,
i,
region->length,
region->offset,
length,
c2mr->umem->offset,
&kva,
c2_convert_access(acc),
c2mr);
kfree(pages);
if (err) {
kfree(c2mr);
return ERR_PTR(err);
}
if (err)
goto err;
return &c2mr->ibmr;
err:
ib_umem_release(c2mr->umem);
kfree(c2mr);
return ERR_PTR(err);
}
......@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr)
err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
if (err)
pr_debug("c2_stag_dealloc failed: %d\n", err);
else
else {
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
}
return err;
}
......
......@@ -73,6 +73,7 @@ struct c2_pd {
struct c2_mr {
struct ib_mr ibmr;
struct c2_pd *pd;
struct ib_umem *umem;
};
struct c2_av;
......
......@@ -47,6 +47,7 @@
#include <rdma/iw_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include "cxio_hal.h"
......@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
if (mhp->umem)
ib_umem_release(mhp->umem);
PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
kfree(mhp);
return 0;
......@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
}
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int acc, struct ib_udata *udata)
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
__be64 *pages;
int shift, n, len;
......@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct iwch_reg_user_mr_resp uresp;
PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
shift = ffs(region->page_size) - 1;
php = to_iwch_pd(pd);
rhp = php->rhp;
......@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
if (!mhp)
return ERR_PTR(-ENOMEM);
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree(mhp);
return ERR_PTR(err);
}
shift = ffs(mhp->umem->page_size) - 1;
n = 0;
list_for_each_entry(chunk, &region->chunk_list, list)
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
......@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
i = n = 0;
list_for_each_entry(chunk, &region->chunk_list, list)
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
region->page_size * k);
mhp->umem->page_size * k);
}
}
......@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = region->virt_base;
mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) region->length;
mhp->attr.len = (u32) length;
mhp->attr.pbl_size = i;
err = iwch_register_mem(rhp, php, mhp, shift, pages);
kfree(pages);
......@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
return &mhp->ibmr;
err:
ib_umem_release(mhp->umem);
kfree(mhp);
return ERR_PTR(err);
}
......
......@@ -73,6 +73,7 @@ struct tpt_attributes {
struct iwch_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct iwch_dev *rhp;
u64 kva;
struct tpt_attributes attr;
......
......@@ -176,6 +176,7 @@ struct ehca_mr {
struct ib_mr ib_mr; /* must always be first in ehca_mr */
struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
} ib;
struct ib_umem *umem;
spinlock_t mrlock;
enum ehca_mr_flag flags;
......
......@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
int num_phys_buf,
int mr_access_flags, u64 *iova_start);
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
struct ib_umem *region,
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
int mr_access_flags, struct ib_udata *udata);
int ehca_rereg_phys_mr(struct ib_mr *mr,
......
......@@ -39,6 +39,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <rdma/ib_umem.h>
#include <asm/current.h>
#include "ehca_iverbs.h"
......@@ -238,10 +240,8 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
/*----------------------------------------------------------------------*/
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
struct ib_umem *region,
int mr_access_flags,
struct ib_udata *udata)
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
int mr_access_flags, struct ib_udata *udata)
{
struct ib_mr *ib_mr;
struct ehca_mr *e_mr;
......@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ehca_gen_err("bad pd=%p", pd);
return ERR_PTR(-EFAULT);
}
if (!region) {
ehca_err(pd->device, "bad input values: region=%p", region);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
!(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
......@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
if (region->page_size != PAGE_SIZE) {
ehca_err(pd->device, "page size not supported, "
"region->page_size=%x", region->page_size);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
if ((region->length == 0) ||
((region->virt_base + region->length) < region->virt_base)) {
if (length == 0 || virt + length < virt) {
ehca_err(pd->device, "bad input values: length=%lx "
"virt_base=%lx", region->length, region->virt_base);
"virt_base=%lx", length, virt);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
......@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
goto reg_user_mr_exit0;
}
e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
mr_access_flags);
if (IS_ERR(e_mr->umem)) {
ib_mr = (void *) e_mr->umem;
goto reg_user_mr_exit1;
}
if (e_mr->umem->page_size != PAGE_SIZE) {
ehca_err(pd->device, "page size not supported, "
"e_mr->umem->page_size=%x", e_mr->umem->page_size);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit2;
}
/* determine number of MR pages */
num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
PAGE_SIZE - 1) / PAGE_SIZE);
num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
PAGE_SIZE);
num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
EHCA_PAGESIZE);
/* register MR on HCA */
pginfo.type = EHCA_MR_PGI_USER;
pginfo.num_pages = num_pages_mr;
pginfo.num_4k = num_pages_4k;
pginfo.region = region;
pginfo.next_4k = region->offset / EHCA_PAGESIZE;
pginfo.region = e_mr->umem;
pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE;
pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
(&region->chunk_list),
(&e_mr->umem->chunk_list),
list);
ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
region->length, mr_access_flags, e_pd, &pginfo,
&e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd,
&pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
if (ret) {
ib_mr = ERR_PTR(ret);
goto reg_user_mr_exit1;
goto reg_user_mr_exit2;
}
/* successful registration of all pages */
return &e_mr->ib.ib_mr;
reg_user_mr_exit2:
ib_umem_release(e_mr->umem);
reg_user_mr_exit1:
ehca_mr_delete(e_mr);
reg_user_mr_exit0:
if (IS_ERR(ib_mr))
ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
" udata=%p",
PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
PTR_ERR(ib_mr), pd, mr_access_flags, udata);
return ib_mr;
} /* end ehca_reg_user_mr() */
......@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr)
goto dereg_mr_exit0;
}
if (e_mr->umem)
ib_umem_release(e_mr->umem);
/* successful deregistration */
ehca_mr_delete(e_mr);
......
......@@ -31,6 +31,7 @@
* SOFTWARE.
*/
#include <rdma/ib_umem.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_smi.h>
......@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
mr->mr.offset = 0;
mr->mr.access_flags = acc;
mr->mr.max_segs = num_phys_buf;
mr->umem = NULL;
m = 0;
n = 0;
......@@ -170,46 +172,56 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
/**
* ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @region: the user memory region
* @start: starting userspace address
* @length: length of region to register
* @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region
* @udata: unused by the InfiniPath driver
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int mr_access_flags, struct ib_udata *udata)
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata)
{
struct ipath_mr *mr;
struct ib_umem *umem;
struct ib_umem_chunk *chunk;
int n, m, i;
struct ib_mr *ret;
if (region->length == 0) {
if (length == 0) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
if (IS_ERR(umem))
return (void *) umem;
n = 0;
list_for_each_entry(chunk, &region->chunk_list, list)
list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
ib_umem_release(umem);
goto bail;
}
mr->mr.pd = pd;
mr->mr.user_base = region->user_base;
mr->mr.iova = region->virt_base;
mr->mr.length = region->length;
mr->mr.offset = region->offset;
mr->mr.user_base = start;
mr->mr.iova = virt_addr;
mr->mr.length = length;
mr->mr.offset = umem->offset;
mr->mr.access_flags = mr_access_flags;
mr->mr.max_segs = n;
mr->umem = umem;
m = 0;
n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) {
list_for_each_entry(chunk, &umem->chunk_list, list) {
for (i = 0; i < chunk->nents; i++) {
void *vaddr;
......@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
goto bail;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = region->page_size;
mr->mr.map[m]->segs[n].length = umem->page_size;
n++;
if (n == IPATH_SEGSZ) {
m++;
......@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
i--;
kfree(mr->mr.map[i]);
}
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
return 0;
}
......
......@@ -251,6 +251,7 @@ struct ipath_sge {
/* Memory region */
struct ipath_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct ipath_mregion mr; /* must be last */
};
......@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int mr_access_flags,
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
int ipath_dereg_mr(struct ib_mr *ibmr);
......
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on INFINIBAND
select MLX4_CORE
---help---
This driver provides low-level InfiniBand support for
Mellanox ConnectX PCI Express host channel adapters (HCAs).
This is required to use InfiniBand protocols such as
IP-over-IB or SRP with these devices.
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mlx4_ib.h"
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct mlx4_dev *dev = to_mdev(pd->device)->dev;
struct mlx4_ib_ah *ah;
ah = kmalloc(sizeof *ah, GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
memset(&ah->av, 0, sizeof ah->av);
ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.g_slid = ah_attr->src_path_bits;
ah->av.dlid = cpu_to_be16(ah_attr->dlid);
if (ah_attr->static_rate) {
ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
--ah->av.stat_rate;
}
ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
if (ah_attr->ah_flags & IB_AH_GRH) {
ah->av.g_slid |= 0x80;
ah->av.gid_index = ah_attr->grh.sgid_index;
ah->av.hop_limit = ah_attr->grh.hop_limit;
ah->av.sl_tclass_flowlabel |=
cpu_to_be32((ah_attr->grh.traffic_class << 20) |
ah_attr->grh.flow_label);
memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
}
return &ah->ibah;
}
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(ah->av.dlid);
ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24;
if (ah->av.stat_rate)
ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
if (mlx4_ib_ah_grh_present(ah)) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.traffic_class =
be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
ah_attr->grh.flow_label =
be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
ah_attr->grh.hop_limit = ah->av.hop_limit;
ah_attr->grh.sgid_index = ah->av.gid_index;
memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
}
return 0;
}
int mlx4_ib_destroy_ah(struct ib_ah *ah)
{
kfree(to_mah(ah));
return 0;
}
This diff is collapsed.
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/slab.h>
#include "mlx4_ib.h"
struct mlx4_ib_db_pgdir {
struct list_head list;
DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE);
DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2);
unsigned long *bits[2];
__be32 *db_page;
dma_addr_t db_dma;
};
static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev)
{
struct mlx4_ib_db_pgdir *pgdir;
pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
if (!pgdir)
return NULL;
bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2);
pgdir->bits[0] = pgdir->order0;
pgdir->bits[1] = pgdir->order1;
pgdir->db_page = dma_alloc_coherent(dev->ib_dev.dma_device,
PAGE_SIZE, &pgdir->db_dma,
GFP_KERNEL);
if (!pgdir->db_page) {
kfree(pgdir);
return NULL;
}
return pgdir;
}
static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir,
struct mlx4_ib_db *db, int order)
{
int o;
int i;
for (o = order; o <= 1; ++o) {
i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o);
if (i < MLX4_IB_DB_PER_PAGE >> o)
goto found;
}
return -ENOMEM;
found:
clear_bit(i, pgdir->bits[o]);
i <<= o;
if (o > order)
set_bit(i ^ 1, pgdir->bits[order]);
db->u.pgdir = pgdir;
db->index = i;
db->db = pgdir->db_page + db->index;
db->dma = pgdir->db_dma + db->index * 4;
db->order = order;
return 0;
}
int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order)
{
struct mlx4_ib_db_pgdir *pgdir;
int ret = 0;
mutex_lock(&dev->pgdir_mutex);
list_for_each_entry(pgdir, &dev->pgdir_list, list)
if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order))
goto out;
pgdir = mlx4_ib_alloc_db_pgdir(dev);
if (!pgdir) {
ret = -ENOMEM;
goto out;
}
list_add(&pgdir->list, &dev->pgdir_list);
/* This should never fail -- we just allocated an empty page: */
WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order));
out:
mutex_unlock(&dev->pgdir_mutex);
return ret;
}
void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db)
{
int o;
int i;
mutex_lock(&dev->pgdir_mutex);
o = db->order;
i = db->index;
if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
clear_bit(i ^ 1, db->u.pgdir->order0);
++o;
}
i >>= o;
set_bit(i, db->u.pgdir->bits[o]);
if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) {
dma_free_coherent(dev->ib_dev.dma_device, PAGE_SIZE,
db->u.pgdir->db_page, db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
kfree(db->u.pgdir);
}
mutex_unlock(&dev->pgdir_mutex);
}
struct mlx4_ib_user_db_page {
struct list_head list;
struct ib_umem *umem;
unsigned long user_virt;
int refcnt;
};
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_ib_db *db)
{
struct mlx4_ib_user_db_page *page;
struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
list_for_each_entry(page, &context->db_page_list, list)
if (page->user_virt == (virt & PAGE_MASK))
goto found;
page = kmalloc(sizeof *page, GFP_KERNEL);
if (!page) {
err = -ENOMEM;
goto out;
}
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
goto out;
}
list_add(&page->list, &context->db_page_list);
found:
chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
out:
mutex_unlock(&context->db_page_mutex);
return err;
}
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db)
{
mutex_lock(&context->db_page_mutex);
if (!--db->u.user_page->refcnt) {
list_del(&db->u.user_page->list);
ib_umem_release(db->u.user_page->umem);
kfree(db->u.user_page);
}
mutex_unlock(&context->db_page_mutex);
}
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
#include <linux/mlx4/cmd.h>
#include "mlx4_ib.h"
enum {
MLX4_IB_VENDOR_CLASS1 = 0x9,
MLX4_IB_VENDOR_CLASS2 = 0xa
};
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad)
{
struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
void *inbox;
int err;
u32 in_modifier = port;
u8 op_modifier = 0;
inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(inmailbox))
return PTR_ERR(inmailbox);
inbox = inmailbox->buf;
outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(outmailbox)) {
mlx4_free_cmd_mailbox(dev->dev, inmailbox);
return PTR_ERR(outmailbox);
}
memcpy(inbox, in_mad, 256);
/*
* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
if (ignore_mkey || !in_wc)
op_modifier |= 0x1;
if (ignore_bkey || !in_wc)
op_modifier |= 0x2;
if (in_wc) {
struct {
__be32 my_qpn;
u32 reserved1;
__be32 rqpn;
u8 sl;
u8 g_path;
u16 reserved2[2];
__be16 pkey;
u32 reserved3[11];
u8 grh[40];
} *ext_info;
memset(inbox + 256, 0, 256);
ext_info = inbox + 256;
ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
ext_info->rqpn = cpu_to_be32(in_wc->src_qp);
ext_info->sl = in_wc->sl << 4;
ext_info->g_path = in_wc->dlid_path_bits |
(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
ext_info->pkey = cpu_to_be16(in_wc->pkey_index);
if (in_grh)
memcpy(ext_info->grh, in_grh, 40);
op_modifier |= 0x4;
in_modifier |= in_wc->slid << 16;
}
err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
in_modifier, op_modifier,
MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
if (!err);
memcpy(response_mad, outmailbox->buf, 256);
mlx4_free_cmd_mailbox(dev->dev, inmailbox);
mlx4_free_cmd_mailbox(dev->dev, outmailbox);
return err;
}
static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
struct ib_ah_attr ah_attr;
if (!dev->send_agent[port_num - 1][0])
return;
memset(&ah_attr, 0, sizeof ah_attr);
ah_attr.dlid = lid;
ah_attr.sl = sl;
ah_attr.port_num = port_num;
new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
&ah_attr);
if (IS_ERR(new_ah))
return;
spin_lock(&dev->sm_lock);
if (dev->sm_ah[port_num - 1])
ib_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock(&dev->sm_lock);
}
/*
* Snoop SM MADs for port info and P_Key table sets, so we can
* synthesize LID change and P_Key change events.
*/
static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad)
{
struct ib_event event;
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
struct ib_port_info *pinfo =
(struct ib_port_info *) ((struct ib_smp *) mad)->data;
update_sm_ah(to_mdev(ibdev), port_num,
be16_to_cpu(pinfo->sm_lid),
pinfo->neighbormtu_mastersmsl & 0xf);
event.device = ibdev;
event.element.port_num = port_num;
if(pinfo->clientrereg_resv_subnetto & 0x80)
event.event = IB_EVENT_CLIENT_REREGISTER;
else
event.event = IB_EVENT_LID_CHANGE;
ib_dispatch_event(&event);
}
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
event.device = ibdev;
event.event = IB_EVENT_PKEY_CHANGE;
event.element.port_num = port_num;
ib_dispatch_event(&event);
}
}
}
static void node_desc_override(struct ib_device *dev,
struct ib_mad *mad)
{
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
spin_lock(&to_mdev(dev)->sm_lock);
memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
spin_unlock(&to_mdev(dev)->sm_lock);
}
}
static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad)
{
int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
int ret;
if (agent) {
send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
IB_MGMT_MAD_DATA, GFP_ATOMIC);
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
* wrong following the IB spec strictly, but we know
* it's OK for our devices).
*/
spin_lock(&dev->sm_lock);
memcpy(send_buf->mad, mad, sizeof *mad);
if ((send_buf->ah = dev->sm_ah[port_num - 1]))
ret = ib_post_send_mad(send_buf, NULL);
else
ret = -EINVAL;
spin_unlock(&dev->sm_lock);
if (ret)
ib_free_send_mad(send_buf);
}
}
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
u16 slid;
int err;
slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
forward_trap(to_mdev(ibdev), port_num, in_mad);
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
}
if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
return IB_MAD_RESULT_SUCCESS;
/*
* Don't process SMInfo queries or vendor-specific
* MADs -- the SMA can't handle them.
*/
if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
IB_SMP_ATTR_VENDOR_MASK))
return IB_MAD_RESULT_SUCCESS;
} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) {
if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
return IB_MAD_RESULT_SUCCESS;
} else
return IB_MAD_RESULT_SUCCESS;
err = mlx4_MAD_IFC(to_mdev(ibdev),
mad_flags & IB_MAD_IGNORE_MKEY,
mad_flags & IB_MAD_IGNORE_BKEY,
port_num, in_wc, in_grh, in_mad, out_mad);
if (err)
return IB_MAD_RESULT_FAILURE;
if (!out_mad->mad_hdr.status) {
smp_snoop(ibdev, port_num, in_mad);
node_desc_override(ibdev, out_mad);
}
/* set return bit in status of directed route responses */
if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
/* no response for trap repress */
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
ib_free_send_mad(mad_send_wc->send_buf);
}
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
{
struct ib_mad_agent *agent;
int p, q;
int ret;
for (p = 0; p < dev->dev->caps.num_ports; ++p)
for (q = 0; q <= 1; ++q) {
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
NULL, NULL);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
}
dev->send_agent[p][q] = agent;
}
return 0;
err:
for (p = 0; p < dev->dev->caps.num_ports; ++p)
for (q = 0; q <= 1; ++q)
if (dev->send_agent[p][q])
ib_unregister_mad_agent(dev->send_agent[p][q]);
return ret;
}
void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
{
struct ib_mad_agent *agent;
int p, q;
for (p = 0; p < dev->dev->caps.num_ports; ++p) {
for (q = 0; q <= 1; ++q) {
agent = dev->send_agent[p][q];
dev->send_agent[p][q] = NULL;
ib_unregister_mad_agent(agent);
}
if (dev->sm_ah[p])
ib_destroy_ah(dev->sm_ah[p]);
}
}
This diff is collapsed.
/*
* Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_IB_H
#define MLX4_IB_H
#include <linux/compiler.h>
#include <linux/list.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
enum {
MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4
};
struct mlx4_ib_db_pgdir;
struct mlx4_ib_user_db_page;
struct mlx4_ib_db {
__be32 *db;
union {
struct mlx4_ib_db_pgdir *pgdir;
struct mlx4_ib_user_db_page *user_page;
} u;
dma_addr_t dma;
int index;
int order;
};
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
struct list_head db_page_list;
struct mutex db_page_mutex;
};
struct mlx4_ib_pd {
struct ib_pd ibpd;
u32 pdn;
};
struct mlx4_ib_cq_buf {
struct mlx4_buf buf;
struct mlx4_mtt mtt;
};
struct mlx4_ib_cq {
struct ib_cq ibcq;
struct mlx4_cq mcq;
struct mlx4_ib_cq_buf buf;
struct mlx4_ib_db db;
spinlock_t lock;
struct ib_umem *umem;
};
struct mlx4_ib_mr {
struct ib_mr ibmr;
struct mlx4_mr mmr;
struct ib_umem *umem;
};
struct mlx4_ib_wq {
u64 *wrid;
spinlock_t lock;
int max;
int max_gs;
int offset;
int wqe_shift;
unsigned head;
unsigned tail;
};
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
struct mlx4_buf buf;
struct mlx4_ib_db db;
struct mlx4_ib_wq rq;
u32 doorbell_qpn;
__be32 sq_signal_bits;
struct mlx4_ib_wq sq;
struct ib_umem *umem;
struct mlx4_mtt mtt;
int buf_size;
struct mutex mutex;
u8 port;
u8 alt_port;
u8 atomic_rd_en;
u8 resp_depth;
u8 state;
};
struct mlx4_ib_srq {
struct ib_srq ibsrq;
struct mlx4_srq msrq;
struct mlx4_buf buf;
struct mlx4_ib_db db;
u64 *wrid;
spinlock_t lock;
int head;
int tail;
u16 wqe_ctr;
struct ib_umem *umem;
struct mlx4_mtt mtt;
struct mutex mutex;
};
struct mlx4_ib_ah {
struct ib_ah ibah;
struct mlx4_av av;
};
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
void __iomem *uar_map;
struct list_head pgdir_list;
struct mutex pgdir_mutex;
struct mlx4_uar priv_uar;
u32 priv_pdn;
MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
struct ib_ah *sm_ah[MLX4_MAX_PORTS];
spinlock_t sm_lock;
struct mutex cap_mask_mutex;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
}
static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
{
return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext);
}
static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx4_ib_pd, ibpd);
}
static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct mlx4_ib_cq, ibcq);
}
static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq)
{
return container_of(mcq, struct mlx4_ib_cq, mcq);
}
static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct mlx4_ib_mr, ibmr);
}
static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mlx4_ib_qp, ibqp);
}
static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp)
{
return container_of(mqp, struct mlx4_ib_qp, mqp);
}
static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq)
{
return container_of(ibsrq, struct mlx4_ib_srq, ibsrq);
}
static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq)
{
return container_of(msrq, struct mlx4_ib_srq, msrq);
}
static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
{
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order);
void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db);
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_ib_db *db);
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db);
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem);
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
struct ib_ucontext *context,
struct ib_udata *udata);
int mlx4_ib_destroy_cq(struct ib_cq *cq);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx4_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_destroy_srq(struct ib_srq *srq);
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_destroy_qp(struct ib_qp *qp);
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad);
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
return !!(ah->av.g_slid & 0x80);
}
#endif /* MLX4_IB_H */
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mlx4_ib.h"
static u32 convert_access(int acc)
{
return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
(acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
MLX4_PERM_LOCAL_READ;
}
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx4_ib_mr *mr;
int err;
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
~0ull, convert_access(acc), 0, 0, &mr->mmr);
if (err)
goto err_free;
err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
if (err)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
mr->umem = NULL;
return &mr->ibmr;
err_mr:
mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_free:
kfree(mr);
return ERR_PTR(err);
}
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
struct ib_umem_chunk *chunk;
int i, j, k;
int n;
int len;
int err = 0;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
return -ENOMEM;
i = n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
umem->page_size * k;
/*
* Be friendly to WRITE_MTT firmware
* command, and pass it chunks of
* appropriate size.
*/
if (i == PAGE_SIZE / sizeof (u64) - 2) {
err = mlx4_write_mtt(dev->dev, mtt, n,
i, pages);
if (err)
goto out;
n += i;
i = 0;
}
}
}
if (i)
err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
out:
free_page((unsigned long) pages);
return err;
}
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
int shift;
int err;
int n;
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
n = ib_umem_page_count(mr->umem);
shift = ilog2(mr->umem->page_size);
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
if (err)
goto err_umem;
err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
if (err)
goto err_mr;
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
return &mr->ibmr;
err_mr:
mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_umem:
ib_umem_release(mr->umem);
err_free:
kfree(mr);
return ERR_PTR(err);
}
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
return 0;
}
This diff is collapsed.
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include "mlx4_ib.h"
#include "user.h"
static void *get_wqe(struct mlx4_ib_srq *srq, int n)
{
int offset = n << srq->msrq.wqe_shift;
if (srq->buf.nbufs == 1)
return srq->buf.u.direct.buf + offset;
else
return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf +
(offset & (PAGE_SIZE - 1));
}
static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
{
struct ib_event event;
struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
if (ibsrq->event_handler) {
event.device = ibsrq->device;
event.element.srq = ibsrq;
switch (type) {
case MLX4_EVENT_TYPE_SRQ_LIMIT:
event.event = IB_EVENT_SRQ_LIMIT_REACHED;
break;
case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
event.event = IB_EVENT_SRQ_ERR;
break;
default:
printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
"on SRQ %06x\n", type, srq->srqn);
return;
}
ibsrq->event_handler(&event, ibsrq->srq_context);
}
}
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_srq *srq;
struct mlx4_wqe_srq_next_seg *next;
int desc_size;
int buf_size;
int err;
int i;
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
return ERR_PTR(-EINVAL);
srq = kmalloc(sizeof *srq, GFP_KERNEL);
if (!srq)
return ERR_PTR(-ENOMEM);
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
srq->msrq.max_gs = init_attr->attr.max_sge;
desc_size = max(32UL,
roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) +
srq->msrq.max_gs *
sizeof (struct mlx4_wqe_data_seg)));
srq->msrq.wqe_shift = ilog2(desc_size);
buf_size = srq->msrq.max * desc_size;
if (pd->uobject) {
struct mlx4_ib_create_srq ucmd;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
err = -EFAULT;
goto err_srq;
}
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
buf_size, 0);
if (IS_ERR(srq->umem)) {
err = PTR_ERR(srq->umem);
goto err_srq;
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
ilog2(srq->umem->page_size), &srq->mtt);
if (err)
goto err_buf;
err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem);
if (err)
goto err_mtt;
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &srq->db);
if (err)
goto err_mtt;
} else {
err = mlx4_ib_db_alloc(dev, &srq->db, 0);
if (err)
goto err_srq;
*srq->db.db = 0;
if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
err = -ENOMEM;
goto err_db;
}
srq->head = 0;
srq->tail = srq->msrq.max - 1;
srq->wqe_ctr = 0;
for (i = 0; i < srq->msrq.max; ++i) {
next = get_wqe(srq, i);
next->next_wqe_index =
cpu_to_be16((i + 1) & (srq->msrq.max - 1));
}
err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,
&srq->mtt);
if (err)
goto err_buf;
err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
if (err)
goto err_mtt;
srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
if (!srq->wrid) {
err = -ENOMEM;
goto err_mtt;
}
}
err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
srq->db.dma, &srq->msrq);
if (err)
goto err_wrid;
srq->msrq.event = mlx4_ib_srq_event;
if (pd->uobject)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
}
init_attr->attr.max_wr = srq->msrq.max - 1;
return &srq->ibsrq;
err_wrid:
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else
kfree(srq->wrid);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
err_buf:
if (pd->uobject)
ib_umem_release(srq->umem);
else
mlx4_buf_free(dev->dev, buf_size, &srq->buf);
err_db:
if (!pd->uobject)
mlx4_ib_db_free(dev, &srq->db);
err_srq:
kfree(srq);
return ERR_PTR(err);
}
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
struct mlx4_ib_srq *srq = to_msrq(ibsrq);
int ret;
/* We don't support resizing SRQs (yet?) */
if (attr_mask & IB_SRQ_MAX_WR)
return -EINVAL;
if (attr_mask & IB_SRQ_LIMIT) {
if (attr->srq_limit >= srq->msrq.max)
return -EINVAL;
mutex_lock(&srq->mutex);
ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
mutex_unlock(&srq->mutex);
if (ret)
return ret;
}
return 0;
}
int mlx4_ib_destroy_srq(struct ib_srq *srq)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
mlx4_srq_free(dev->dev, &msrq->msrq);
mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
if (srq->uobject) {
mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
ib_umem_release(msrq->umem);
} else {
kfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_ib_db_free(dev, &msrq->db);
}
kfree(msrq);
return 0;
}
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
{
struct mlx4_wqe_srq_next_seg *next;
/* always called with interrupts disabled. */
spin_lock(&srq->lock);
next = get_wqe(srq, srq->tail);
next->next_wqe_index = cpu_to_be16(wqe_index);
srq->tail = wqe_index;
spin_unlock(&srq->lock);
}
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct mlx4_ib_srq *srq = to_msrq(ibsrq);
struct mlx4_wqe_srq_next_seg *next;
struct mlx4_wqe_data_seg *scat;
unsigned long flags;
int err = 0;
int nreq;
int i;
spin_lock_irqsave(&srq->lock, flags);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
err = -EINVAL;
*bad_wr = wr;
break;
}
srq->wrid[srq->head] = wr->wr_id;
next = get_wqe(srq, srq->head);
srq->head = be16_to_cpu(next->next_wqe_index);
scat = (struct mlx4_wqe_data_seg *) (next + 1);
for (i = 0; i < wr->num_sge; ++i) {
scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
}
if (i < srq->msrq.max_gs) {
scat[i].byte_count = 0;
scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
scat[i].addr = 0;
}
}
if (likely(nreq)) {
srq->wqe_ctr += nreq;
/*
* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
*srq->db.db = cpu_to_be32(srq->wqe_ctr);
}
spin_unlock_irqrestore(&srq->lock, flags);
return err;
}
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_IB_USER_H
#define MLX4_IB_USER_H
#include <linux/types.h>
/*
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define MLX4_IB_UVERBS_ABI_VERSION 1
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
* In particular do not use pointer types -- pass pointers in __u64
* instead.
*/
struct mlx4_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
};
struct mlx4_ib_alloc_pd_resp {
__u32 pdn;
__u32 reserved;
};
struct mlx4_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
};
struct mlx4_ib_create_cq_resp {
__u32 cqn;
__u32 reserved;
};
struct mlx4_ib_resize_cq {
__u64 buf_addr;
};
struct mlx4_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
};
struct mlx4_ib_create_srq_resp {
__u32 srqn;
__u32 reserved;
};
struct mlx4_ib_create_qp {
__u64 buf_addr;
__u64 db_addr;
};
#endif /* MLX4_IB_USER_H */
......@@ -73,6 +73,7 @@ struct mthca_mtt;
struct mthca_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct mthca_mtt *mtt;
};
......
......@@ -2493,6 +2493,20 @@ config PASEMI_MAC
This driver supports the on-chip 1/10Gbit Ethernet controller on
PA Semi's PWRficient line of chips.
config MLX4_CORE
tristate
depends on PCI
default n
config MLX4_DEBUG
bool "Verbose debugging output" if (MLX4_CORE && EMBEDDED)
default y
---help---
This option causes debugging code to be compiled into the
mlx4_core driver. The output can be turned on via the
debug_level module parameter (which can also be set after
the driver is loaded through sysfs).
endmenu
source "drivers/net/tokenring/Kconfig"
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment