Commit de560374 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband:
  IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters
  IB: Put rlimit accounting struct in struct ib_umem
  IB/uverbs: Export ib_umem_get()/ib_umem_release() to modules
parents b5f0adbc 225c7b1f
...@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS ...@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from libibverbs, libibcm and a hardware driver library from
<http://www.openib.org>. <http://www.openib.org>.
config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
default y
config INFINIBAND_ADDR_TRANS config INFINIBAND_ADDR_TRANS
bool bool
depends on INFINIBAND && INET depends on INFINIBAND && INET
...@@ -40,6 +45,8 @@ source "drivers/infiniband/hw/ehca/Kconfig" ...@@ -40,6 +45,8 @@ source "drivers/infiniband/hw/ehca/Kconfig"
source "drivers/infiniband/hw/amso1100/Kconfig" source "drivers/infiniband/hw/amso1100/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig"
source "drivers/infiniband/ulp/srp/Kconfig" source "drivers/infiniband/ulp/srp/Kconfig"
......
...@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/ ...@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
...@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ ...@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o device.o fmr_pool.o cache.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
...@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o ...@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
uverbs_marshall.o
...@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void) ...@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void)
{ {
ib_cache_cleanup(); ib_cache_cleanup();
ib_sysfs_cleanup(); ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
flush_scheduled_work();
} }
module_init(ib_core_init); module_init(ib_core_init);
......
...@@ -39,13 +39,6 @@ ...@@ -39,13 +39,6 @@
#include "uverbs.h" #include "uverbs.h"
struct ib_umem_account_work {
struct work_struct work;
struct mm_struct *mm;
unsigned long diff;
};
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{ {
struct ib_umem_chunk *chunk, *tmp; struct ib_umem_chunk *chunk, *tmp;
...@@ -64,35 +57,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d ...@@ -64,35 +57,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
} }
} }
int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, /**
void *addr, size_t size, int write) * ib_umem_get - Pin and DMA map userspace memory.
* @context: userspace context to pin memory for
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
*/
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access)
{ {
struct ib_umem *umem;
struct page **page_list; struct page **page_list;
struct ib_umem_chunk *chunk; struct ib_umem_chunk *chunk;
unsigned long locked; unsigned long locked;
unsigned long lock_limit; unsigned long lock_limit;
unsigned long cur_base; unsigned long cur_base;
unsigned long npages; unsigned long npages;
int ret = 0; int ret;
int off; int off;
int i; int i;
if (!can_do_mlock()) if (!can_do_mlock())
return -EPERM; return ERR_PTR(-EPERM);
page_list = (struct page **) __get_free_page(GFP_KERNEL); umem = kmalloc(sizeof *umem, GFP_KERNEL);
if (!page_list) if (!umem)
return -ENOMEM; return ERR_PTR(-ENOMEM);
umem->context = context;
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
mem->user_base = (unsigned long) addr; INIT_LIST_HEAD(&umem->chunk_list);
mem->length = size;
mem->offset = (unsigned long) addr & ~PAGE_MASK;
mem->page_size = PAGE_SIZE;
mem->writable = write;
INIT_LIST_HEAD(&mem->chunk_list); page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
return ERR_PTR(-ENOMEM);
}
npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem); down_write(&current->mm->mmap_sem);
...@@ -104,13 +118,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, ...@@ -104,13 +118,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
goto out; goto out;
} }
cur_base = (unsigned long) addr & PAGE_MASK; cur_base = addr & PAGE_MASK;
while (npages) { while (npages) {
ret = get_user_pages(current, current->mm, cur_base, ret = get_user_pages(current, current->mm, cur_base,
min_t(int, npages, min_t(int, npages,
PAGE_SIZE / sizeof (struct page *)), PAGE_SIZE / sizeof (struct page *)),
1, !write, page_list, NULL); 1, !umem->writable, page_list, NULL);
if (ret < 0) if (ret < 0)
goto out; goto out;
...@@ -136,7 +150,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, ...@@ -136,7 +150,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
chunk->page_list[i].length = PAGE_SIZE; chunk->page_list[i].length = PAGE_SIZE;
} }
chunk->nmap = ib_dma_map_sg(dev, chunk->nmap = ib_dma_map_sg(context->device,
&chunk->page_list[0], &chunk->page_list[0],
chunk->nents, chunk->nents,
DMA_BIDIRECTIONAL); DMA_BIDIRECTIONAL);
...@@ -151,75 +165,94 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, ...@@ -151,75 +165,94 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
ret -= chunk->nents; ret -= chunk->nents;
off += chunk->nents; off += chunk->nents;
list_add_tail(&chunk->list, &mem->chunk_list); list_add_tail(&chunk->list, &umem->chunk_list);
} }
ret = 0; ret = 0;
} }
out: out:
if (ret < 0) if (ret < 0) {
__ib_umem_release(dev, mem, 0); __ib_umem_release(context->device, umem, 0);
else kfree(umem);
} else
current->mm->locked_vm = locked; current->mm->locked_vm = locked;
up_write(&current->mm->mmap_sem); up_write(&current->mm->mmap_sem);
free_page((unsigned long) page_list); free_page((unsigned long) page_list);
return ret; return ret < 0 ? ERR_PTR(ret) : umem;
} }
EXPORT_SYMBOL(ib_umem_get);
void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) static void ib_umem_account(struct work_struct *work)
{ {
__ib_umem_release(dev, umem, 1); struct ib_umem *umem = container_of(work, struct ib_umem, work);
down_write(&current->mm->mmap_sem);
current->mm->locked_vm -=
PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
up_write(&current->mm->mmap_sem);
}
static void ib_umem_account(struct work_struct *_work) down_write(&umem->mm->mmap_sem);
{ umem->mm->locked_vm -= umem->diff;
struct ib_umem_account_work *work = up_write(&umem->mm->mmap_sem);
container_of(_work, struct ib_umem_account_work, work); mmput(umem->mm);
kfree(umem);
down_write(&work->mm->mmap_sem);
work->mm->locked_vm -= work->diff;
up_write(&work->mm->mmap_sem);
mmput(work->mm);
kfree(work);
} }
void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) /**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
void ib_umem_release(struct ib_umem *umem)
{ {
struct ib_umem_account_work *work; struct ib_ucontext *context = umem->context;
struct mm_struct *mm; struct mm_struct *mm;
unsigned long diff;
__ib_umem_release(dev, umem, 1); __ib_umem_release(umem->context->device, umem, 1);
mm = get_task_mm(current); mm = get_task_mm(current);
if (!mm) if (!mm)
return; return;
diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
/* /*
* We may be called with the mm's mmap_sem already held. This * We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops * can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release * the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end * method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. Therefore we * up here and not be able to take the mmap_sem. In that case
* defer the vm_locked accounting to the system workqueue. * we defer the vm_locked accounting to the system workqueue.
*/ */
if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
INIT_WORK(&umem->work, ib_umem_account);
umem->mm = mm;
umem->diff = diff;
work = kmalloc(sizeof *work, GFP_KERNEL); schedule_work(&umem->work);
if (!work) {
mmput(mm);
return; return;
} } else
down_write(&mm->mmap_sem);
current->mm->locked_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
shift = ilog2(umem->page_size);
INIT_WORK(&work->work, ib_umem_account); n = 0;
work->mm = mm; list_for_each_entry(chunk, &umem->chunk_list, list)
work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; for (i = 0; i < chunk->nmap; ++i)
n += sg_dma_len(&chunk->page_list[i]) >> shift;
schedule_work(&work->work); return n;
} }
EXPORT_SYMBOL(ib_umem_page_count);
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include <linux/completion.h> #include <linux/completion.h>
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
/* /*
...@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); ...@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler, void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event); struct ib_event *event);
int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
void *addr, size_t size, int write);
void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
#define IB_UVERBS_DECLARE_CMD(name) \ #define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
const char __user *buf, int in_len, \ const char __user *buf, int in_len, \
......
/* /*
* Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved.
* Copyright (c) 2006 Mellanox Technologies. All rights reserved. * Copyright (c) 2006 Mellanox Technologies. All rights reserved.
* *
...@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ...@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->ah_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors; resp.num_comp_vectors = file->device->num_comp_vectors;
...@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp; struct ib_uverbs_reg_mr_resp resp;
struct ib_udata udata; struct ib_udata udata;
struct ib_umem_object *obj; struct ib_uobject *uobj;
struct ib_pd *pd; struct ib_pd *pd;
struct ib_mr *mr; struct ib_mr *mr;
int ret; int ret;
...@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
!(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
return -EINVAL; return -EINVAL;
obj = kmalloc(sizeof *obj, GFP_KERNEL); uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!obj) if (!uobj)
return -ENOMEM; return -ENOMEM;
init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key); init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
down_write(&obj->uobject.mutex); down_write(&uobj->mutex);
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
ret = ib_umem_get(file->device->ib_dev, &obj->umem,
(void *) (unsigned long) cmd.start, cmd.length,
!!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
if (ret)
goto err_free;
obj->umem.virt_base = cmd.hca_va;
pd = idr_read_pd(cmd.pd_handle, file->ucontext); pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) { if (!pd) {
ret = -EINVAL; ret = -EINVAL;
goto err_release; goto err_free;
} }
mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
if (IS_ERR(mr)) { if (IS_ERR(mr)) {
ret = PTR_ERR(mr); ret = PTR_ERR(mr);
goto err_put; goto err_put;
...@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->device = pd->device; mr->device = pd->device;
mr->pd = pd; mr->pd = pd;
mr->uobject = &obj->uobject; mr->uobject = uobj;
atomic_inc(&pd->usecnt); atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0); atomic_set(&mr->usecnt, 0);
obj->uobject.object = mr; uobj->object = mr;
ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject); ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
if (ret) if (ret)
goto err_unreg; goto err_unreg;
memset(&resp, 0, sizeof resp); memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey; resp.lkey = mr->lkey;
resp.rkey = mr->rkey; resp.rkey = mr->rkey;
resp.mr_handle = obj->uobject.id; resp.mr_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response, if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) { &resp, sizeof resp)) {
...@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
put_pd_read(pd); put_pd_read(pd);
mutex_lock(&file->mutex); mutex_lock(&file->mutex);
list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); list_add_tail(&uobj->list, &file->ucontext->mr_list);
mutex_unlock(&file->mutex); mutex_unlock(&file->mutex);
obj->uobject.live = 1; uobj->live = 1;
up_write(&obj->uobject.mutex); up_write(&uobj->mutex);
return in_len; return in_len;
err_copy: err_copy:
idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject); idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
err_unreg: err_unreg:
ib_dereg_mr(mr); ib_dereg_mr(mr);
...@@ -676,11 +663,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -676,11 +663,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
err_put: err_put:
put_pd_read(pd); put_pd_read(pd);
err_release:
ib_umem_release(file->device->ib_dev, &obj->umem);
err_free: err_free:
put_uobj_write(&obj->uobject); put_uobj_write(uobj);
return ret; return ret;
} }
...@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, ...@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_dereg_mr cmd; struct ib_uverbs_dereg_mr cmd;
struct ib_mr *mr; struct ib_mr *mr;
struct ib_uobject *uobj; struct ib_uobject *uobj;
struct ib_umem_object *memobj;
int ret = -EINVAL; int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd)) if (copy_from_user(&cmd, buf, sizeof cmd))
...@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, ...@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (!uobj) if (!uobj)
return -EINVAL; return -EINVAL;
memobj = container_of(uobj, struct ib_umem_object, uobject); mr = uobj->object;
mr = uobj->object;
ret = ib_dereg_mr(mr); ret = ib_dereg_mr(mr);
if (!ret) if (!ret)
...@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, ...@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
list_del(&uobj->list); list_del(&uobj->list);
mutex_unlock(&file->mutex); mutex_unlock(&file->mutex);
ib_umem_release(file->device->ib_dev, &memobj->umem);
put_uobj(uobj); put_uobj(uobj);
return in_len; return in_len;
......
...@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, ...@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
if (!context) if (!context)
return 0; return 0;
context->closing = 1;
list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
struct ib_ah *ah = uobj->object; struct ib_ah *ah = uobj->object;
...@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, ...@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object; struct ib_mr *mr = uobj->object;
struct ib_device *mrdev = mr->device;
struct ib_umem_object *memobj;
idr_remove_uobj(&ib_uverbs_mr_idr, uobj); idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr); ib_dereg_mr(mr);
kfree(uobj);
memobj = container_of(uobj, struct ib_umem_object, uobject);
ib_umem_release_on_close(mrdev, &memobj->umem);
kfree(memobj);
} }
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
...@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void) ...@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_filesystem(&uverbs_event_fs); unregister_filesystem(&uverbs_event_fs);
class_destroy(uverbs_class); class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
flush_scheduled_work();
idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr); idr_destroy(&ib_uverbs_mw_idr);
......
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
#include "c2.h" #include "c2.h"
#include "c2_provider.h" #include "c2_provider.h"
...@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, ...@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
} }
mr->pd = to_c2pd(ib_pd); mr->pd = to_c2pd(ib_pd);
mr->umem = NULL;
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n", "*iova_start %llx, first pa %llx, last pa %llx\n",
__FUNCTION__, page_shift, pbl_depth, total_len, __FUNCTION__, page_shift, pbl_depth, total_len,
...@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc) ...@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
} }
static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int acc, struct ib_udata *udata) u64 virt, int acc, struct ib_udata *udata)
{ {
u64 *pages; u64 *pages;
u64 kva = 0; u64 kva = 0;
...@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct c2_mr *c2mr; struct c2_mr *c2mr;
pr_debug("%s:%u\n", __FUNCTION__, __LINE__); pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
shift = ffs(region->page_size) - 1;
c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
if (!c2mr) if (!c2mr)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
c2mr->pd = c2pd; c2mr->pd = c2pd;
c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
if (IS_ERR(c2mr->umem)) {
err = PTR_ERR(c2mr->umem);
kfree(c2mr);
return ERR_PTR(err);
}
shift = ffs(c2mr->umem->page_size) - 1;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
n += chunk->nents; n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL); pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
...@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
} }
i = 0; i = 0;
list_for_each_entry(chunk, &region->chunk_list, list) { list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
for (j = 0; j < chunk->nmap; ++j) { for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift; len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) { for (k = 0; k < len; ++k) {
pages[i++] = pages[i++] =
sg_dma_address(&chunk->page_list[j]) + sg_dma_address(&chunk->page_list[j]) +
(region->page_size * k); (c2mr->umem->page_size * k);
} }
} }
} }
kva = (u64)region->virt_base; kva = virt;
err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
pages, pages,
region->page_size, c2mr->umem->page_size,
i, i,
region->length, length,
region->offset, c2mr->umem->offset,
&kva, &kva,
c2_convert_access(acc), c2_convert_access(acc),
c2mr); c2mr);
kfree(pages); kfree(pages);
if (err) { if (err)
kfree(c2mr); goto err;
return ERR_PTR(err);
}
return &c2mr->ibmr; return &c2mr->ibmr;
err: err:
ib_umem_release(c2mr->umem);
kfree(c2mr); kfree(c2mr);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr) ...@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr)
err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
if (err) if (err)
pr_debug("c2_stag_dealloc failed: %d\n", err); pr_debug("c2_stag_dealloc failed: %d\n", err);
else else {
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr); kfree(mr);
}
return err; return err;
} }
......
...@@ -73,6 +73,7 @@ struct c2_pd { ...@@ -73,6 +73,7 @@ struct c2_pd {
struct c2_mr { struct c2_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct c2_pd *pd; struct c2_pd *pd;
struct ib_umem *umem;
}; };
struct c2_av; struct c2_av;
......
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include <rdma/iw_cm.h> #include <rdma/iw_cm.h>
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
#include "cxio_hal.h" #include "cxio_hal.h"
...@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) ...@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
remove_handle(rhp, &rhp->mmidr, mmid); remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva) if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva); kfree((void *) (unsigned long) mhp->kva);
if (mhp->umem)
ib_umem_release(mhp->umem);
PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp); PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
kfree(mhp); kfree(mhp);
return 0; return 0;
...@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, ...@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
} }
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int acc, struct ib_udata *udata) u64 virt, int acc, struct ib_udata *udata)
{ {
__be64 *pages; __be64 *pages;
int shift, n, len; int shift, n, len;
...@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct iwch_reg_user_mr_resp uresp; struct iwch_reg_user_mr_resp uresp;
PDBG("%s ib_pd %p\n", __FUNCTION__, pd); PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
shift = ffs(region->page_size) - 1;
php = to_iwch_pd(pd); php = to_iwch_pd(pd);
rhp = php->rhp; rhp = php->rhp;
...@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
if (!mhp) if (!mhp)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree(mhp);
return ERR_PTR(err);
}
shift = ffs(mhp->umem->page_size) - 1;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents; n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL); pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
...@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
i = n = 0; i = n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) { for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift; len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) { for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address( pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) + &chunk->page_list[j]) +
region->page_size * k); mhp->umem->page_size * k);
} }
} }
...@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
mhp->attr.pdid = php->pdid; mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0; mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc); mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = region->virt_base; mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12; mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) region->length; mhp->attr.len = (u32) length;
mhp->attr.pbl_size = i; mhp->attr.pbl_size = i;
err = iwch_register_mem(rhp, php, mhp, shift, pages); err = iwch_register_mem(rhp, php, mhp, shift, pages);
kfree(pages); kfree(pages);
...@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
return &mhp->ibmr; return &mhp->ibmr;
err: err:
ib_umem_release(mhp->umem);
kfree(mhp); kfree(mhp);
return ERR_PTR(err); return ERR_PTR(err);
} }
......
...@@ -73,6 +73,7 @@ struct tpt_attributes { ...@@ -73,6 +73,7 @@ struct tpt_attributes {
struct iwch_mr { struct iwch_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct ib_umem *umem;
struct iwch_dev *rhp; struct iwch_dev *rhp;
u64 kva; u64 kva;
struct tpt_attributes attr; struct tpt_attributes attr;
......
...@@ -176,6 +176,7 @@ struct ehca_mr { ...@@ -176,6 +176,7 @@ struct ehca_mr {
struct ib_mr ib_mr; /* must always be first in ehca_mr */ struct ib_mr ib_mr; /* must always be first in ehca_mr */
struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
} ib; } ib;
struct ib_umem *umem;
spinlock_t mrlock; spinlock_t mrlock;
enum ehca_mr_flag flags; enum ehca_mr_flag flags;
......
...@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, ...@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
int num_phys_buf, int num_phys_buf,
int mr_access_flags, u64 *iova_start); int mr_access_flags, u64 *iova_start);
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
struct ib_umem *region,
int mr_access_flags, struct ib_udata *udata); int mr_access_flags, struct ib_udata *udata);
int ehca_rereg_phys_mr(struct ib_mr *mr, int ehca_rereg_phys_mr(struct ib_mr *mr,
......
...@@ -39,6 +39,8 @@ ...@@ -39,6 +39,8 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <rdma/ib_umem.h>
#include <asm/current.h> #include <asm/current.h>
#include "ehca_iverbs.h" #include "ehca_iverbs.h"
...@@ -238,10 +240,8 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, ...@@ -238,10 +240,8 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
/*----------------------------------------------------------------------*/ /*----------------------------------------------------------------------*/
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
struct ib_umem *region, int mr_access_flags, struct ib_udata *udata)
int mr_access_flags,
struct ib_udata *udata)
{ {
struct ib_mr *ib_mr; struct ib_mr *ib_mr;
struct ehca_mr *e_mr; struct ehca_mr *e_mr;
...@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, ...@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ehca_gen_err("bad pd=%p", pd); ehca_gen_err("bad pd=%p", pd);
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
} }
if (!region) {
ehca_err(pd->device, "bad input values: region=%p", region);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
!(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
...@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, ...@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ib_mr = ERR_PTR(-EINVAL); ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0; goto reg_user_mr_exit0;
} }
if (region->page_size != PAGE_SIZE) {
ehca_err(pd->device, "page size not supported, "
"region->page_size=%x", region->page_size);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
if ((region->length == 0) || if (length == 0 || virt + length < virt) {
((region->virt_base + region->length) < region->virt_base)) {
ehca_err(pd->device, "bad input values: length=%lx " ehca_err(pd->device, "bad input values: length=%lx "
"virt_base=%lx", region->length, region->virt_base); "virt_base=%lx", length, virt);
ib_mr = ERR_PTR(-EINVAL); ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0; goto reg_user_mr_exit0;
} }
...@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, ...@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
goto reg_user_mr_exit0; goto reg_user_mr_exit0;
} }
e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
mr_access_flags);
if (IS_ERR(e_mr->umem)) {
ib_mr = (void *) e_mr->umem;
goto reg_user_mr_exit1;
}
if (e_mr->umem->page_size != PAGE_SIZE) {
ehca_err(pd->device, "page size not supported, "
"e_mr->umem->page_size=%x", e_mr->umem->page_size);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit2;
}
/* determine number of MR pages */ /* determine number of MR pages */
num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
PAGE_SIZE - 1) / PAGE_SIZE); PAGE_SIZE);
num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); EHCA_PAGESIZE);
/* register MR on HCA */ /* register MR on HCA */
pginfo.type = EHCA_MR_PGI_USER; pginfo.type = EHCA_MR_PGI_USER;
pginfo.num_pages = num_pages_mr; pginfo.num_pages = num_pages_mr;
pginfo.num_4k = num_pages_4k; pginfo.num_4k = num_pages_4k;
pginfo.region = region; pginfo.region = e_mr->umem;
pginfo.next_4k = region->offset / EHCA_PAGESIZE; pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE;
pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
(&region->chunk_list), (&e_mr->umem->chunk_list),
list); list);
ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd,
region->length, mr_access_flags, e_pd, &pginfo, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
&e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
if (ret) { if (ret) {
ib_mr = ERR_PTR(ret); ib_mr = ERR_PTR(ret);
goto reg_user_mr_exit1; goto reg_user_mr_exit2;
} }
/* successful registration of all pages */ /* successful registration of all pages */
return &e_mr->ib.ib_mr; return &e_mr->ib.ib_mr;
reg_user_mr_exit2:
ib_umem_release(e_mr->umem);
reg_user_mr_exit1: reg_user_mr_exit1:
ehca_mr_delete(e_mr); ehca_mr_delete(e_mr);
reg_user_mr_exit0: reg_user_mr_exit0:
if (IS_ERR(ib_mr)) if (IS_ERR(ib_mr))
ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x" ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
" udata=%p", " udata=%p",
PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); PTR_ERR(ib_mr), pd, mr_access_flags, udata);
return ib_mr; return ib_mr;
} /* end ehca_reg_user_mr() */ } /* end ehca_reg_user_mr() */
...@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr) ...@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr)
goto dereg_mr_exit0; goto dereg_mr_exit0;
} }
if (e_mr->umem)
ib_umem_release(e_mr->umem);
/* successful deregistration */ /* successful deregistration */
ehca_mr_delete(e_mr); ehca_mr_delete(e_mr);
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
* SOFTWARE. * SOFTWARE.
*/ */
#include <rdma/ib_umem.h>
#include <rdma/ib_pack.h> #include <rdma/ib_pack.h>
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
...@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, ...@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
mr->mr.offset = 0; mr->mr.offset = 0;
mr->mr.access_flags = acc; mr->mr.access_flags = acc;
mr->mr.max_segs = num_phys_buf; mr->mr.max_segs = num_phys_buf;
mr->umem = NULL;
m = 0; m = 0;
n = 0; n = 0;
...@@ -170,46 +172,56 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, ...@@ -170,46 +172,56 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
/** /**
* ipath_reg_user_mr - register a userspace memory region * ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region * @pd: protection domain for this memory region
* @region: the user memory region * @start: starting userspace address
* @length: length of region to register
* @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region * @mr_access_flags: access flags for this memory region
* @udata: unused by the InfiniPath driver * @udata: unused by the InfiniPath driver
* *
* Returns the memory region on success, otherwise returns an errno. * Returns the memory region on success, otherwise returns an errno.
*/ */
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int mr_access_flags, struct ib_udata *udata) u64 virt_addr, int mr_access_flags,
struct ib_udata *udata)
{ {
struct ipath_mr *mr; struct ipath_mr *mr;
struct ib_umem *umem;
struct ib_umem_chunk *chunk; struct ib_umem_chunk *chunk;
int n, m, i; int n, m, i;
struct ib_mr *ret; struct ib_mr *ret;
if (region->length == 0) { if (length == 0) {
ret = ERR_PTR(-EINVAL); ret = ERR_PTR(-EINVAL);
goto bail; goto bail;
} }
umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
if (IS_ERR(umem))
return (void *) umem;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents; n += chunk->nents;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table); mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) { if (!mr) {
ret = ERR_PTR(-ENOMEM); ret = ERR_PTR(-ENOMEM);
ib_umem_release(umem);
goto bail; goto bail;
} }
mr->mr.pd = pd; mr->mr.pd = pd;
mr->mr.user_base = region->user_base; mr->mr.user_base = start;
mr->mr.iova = region->virt_base; mr->mr.iova = virt_addr;
mr->mr.length = region->length; mr->mr.length = length;
mr->mr.offset = region->offset; mr->mr.offset = umem->offset;
mr->mr.access_flags = mr_access_flags; mr->mr.access_flags = mr_access_flags;
mr->mr.max_segs = n; mr->mr.max_segs = n;
mr->umem = umem;
m = 0; m = 0;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) { list_for_each_entry(chunk, &umem->chunk_list, list) {
for (i = 0; i < chunk->nents; i++) { for (i = 0; i < chunk->nents; i++) {
void *vaddr; void *vaddr;
...@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
goto bail; goto bail;
} }
mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = region->page_size; mr->mr.map[m]->segs[n].length = umem->page_size;
n++; n++;
if (n == IPATH_SEGSZ) { if (n == IPATH_SEGSZ) {
m++; m++;
...@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr) ...@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
i--; i--;
kfree(mr->mr.map[i]); kfree(mr->mr.map[i]);
} }
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr); kfree(mr);
return 0; return 0;
} }
......
...@@ -251,6 +251,7 @@ struct ipath_sge { ...@@ -251,6 +251,7 @@ struct ipath_sge {
/* Memory region */ /* Memory region */
struct ipath_mr { struct ipath_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct ib_umem *umem;
struct ipath_mregion mr; /* must be last */ struct ipath_mregion mr; /* must be last */
}; };
...@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, ...@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list, struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start); int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int mr_access_flags, u64 virt_addr, int mr_access_flags,
struct ib_udata *udata); struct ib_udata *udata);
int ipath_dereg_mr(struct ib_mr *ibmr); int ipath_dereg_mr(struct ib_mr *ibmr);
......
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on INFINIBAND
select MLX4_CORE
---help---
This driver provides low-level InfiniBand support for
Mellanox ConnectX PCI Express host channel adapters (HCAs).
This is required to use InfiniBand protocols such as
IP-over-IB or SRP with these devices.
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mlx4_ib.h"
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct mlx4_dev *dev = to_mdev(pd->device)->dev;
struct mlx4_ib_ah *ah;
ah = kmalloc(sizeof *ah, GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
memset(&ah->av, 0, sizeof ah->av);
ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.g_slid = ah_attr->src_path_bits;
ah->av.dlid = cpu_to_be16(ah_attr->dlid);
if (ah_attr->static_rate) {
ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
--ah->av.stat_rate;
}
ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
if (ah_attr->ah_flags & IB_AH_GRH) {
ah->av.g_slid |= 0x80;
ah->av.gid_index = ah_attr->grh.sgid_index;
ah->av.hop_limit = ah_attr->grh.hop_limit;
ah->av.sl_tclass_flowlabel |=
cpu_to_be32((ah_attr->grh.traffic_class << 20) |
ah_attr->grh.flow_label);
memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
}
return &ah->ibah;
}
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(ah->av.dlid);
ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24;
if (ah->av.stat_rate)
ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
if (mlx4_ib_ah_grh_present(ah)) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.traffic_class =
be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
ah_attr->grh.flow_label =
be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
ah_attr->grh.hop_limit = ah->av.hop_limit;
ah_attr->grh.sgid_index = ah->av.gid_index;
memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
}
return 0;
}
int mlx4_ib_destroy_ah(struct ib_ah *ah)
{
kfree(to_mah(ah));
return 0;
}
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mlx4/cq.h>
#include <linux/mlx4/qp.h>
#include "mlx4_ib.h"
#include "user.h"
static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
{
struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
ibcq->comp_handler(ibcq, ibcq->cq_context);
}
static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
{
struct ib_event event;
struct ib_cq *ibcq;
if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
"on CQ %06x\n", type, cq->cqn);
return;
}
ibcq = &to_mibcq(cq)->ibcq;
if (ibcq->event_handler) {
event.device = ibcq->device;
event.event = IB_EVENT_CQ_ERR;
event.element.cq = ibcq;
ibcq->event_handler(&event, ibcq->cq_context);
}
}
static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
{
int offset = n * sizeof (struct mlx4_cqe);
if (buf->buf.nbufs == 1)
return buf->buf.u.direct.buf + offset;
else
return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +
(offset & (PAGE_SIZE - 1));
}
static void *get_cqe(struct mlx4_ib_cq *cq, int n)
{
return get_cqe_from_buf(&cq->buf, n);
}
static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
{
struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
}
static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
{
return get_sw_cqe(cq, cq->mcq.cons_index);
}
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_cq *cq;
struct mlx4_uar *uar;
int buf_size;
int err;
if (entries < 1 || entries > dev->dev->caps.max_cqes)
return ERR_PTR(-EINVAL);
cq = kmalloc(sizeof *cq, GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
entries = roundup_pow_of_two(entries + 1);
cq->ibcq.cqe = entries - 1;
buf_size = entries * sizeof (struct mlx4_cqe);
spin_lock_init(&cq->lock);
if (context) {
struct mlx4_ib_create_cq ucmd;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
err = -EFAULT;
goto err_cq;
}
cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->umem)) {
err = PTR_ERR(cq->umem);
goto err_cq;
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem),
ilog2(cq->umem->page_size), &cq->buf.mtt);
if (err)
goto err_buf;
err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
if (err)
goto err_mtt;
err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
&cq->db);
if (err)
goto err_mtt;
uar = &to_mucontext(context)->uar;
} else {
err = mlx4_ib_db_alloc(dev, &cq->db, 1);
if (err)
goto err_cq;
cq->mcq.set_ci_db = cq->db.db;
cq->mcq.arm_db = cq->db.db + 1;
*cq->mcq.set_ci_db = 0;
*cq->mcq.arm_db = 0;
if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) {
err = -ENOMEM;
goto err_db;
}
err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift,
&cq->buf.mtt);
if (err)
goto err_buf;
err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf);
if (err)
goto err_mtt;
uar = &dev->priv_uar;
}
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
cq->db.dma, &cq->mcq);
if (err)
goto err_dbmap;
cq->mcq.comp = mlx4_ib_cq_comp;
cq->mcq.event = mlx4_ib_cq_event;
if (context)
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
err = -EFAULT;
goto err_dbmap;
}
return &cq->ibcq;
err_dbmap:
if (context)
mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
err_buf:
if (context)
ib_umem_release(cq->umem);
else
mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe),
&cq->buf.buf);
err_db:
if (!context)
mlx4_ib_db_free(dev, &cq->db);
err_cq:
kfree(cq);
return ERR_PTR(err);
}
int mlx4_ib_destroy_cq(struct ib_cq *cq)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
struct mlx4_ib_cq *mcq = to_mcq(cq);
mlx4_cq_free(dev->dev, &mcq->mcq);
mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
if (cq->uobject) {
mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
ib_umem_release(mcq->umem);
} else {
mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe),
&mcq->buf.buf);
mlx4_ib_db_free(dev, &mcq->db);
}
kfree(mcq);
return 0;
}
static void dump_cqe(void *cqe)
{
__be32 *buf = cqe;
printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
}
static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
struct ib_wc *wc)
{
if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
printk(KERN_DEBUG "local QP operation err "
"(QPN %06x, WQE index %x, vendor syndrome %02x, "
"opcode = %02x)\n",
be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
cqe->vendor_err_syndrome,
cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
dump_cqe(cqe);
}
switch (cqe->syndrome) {
case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:
wc->status = IB_WC_LOC_LEN_ERR;
break;
case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:
wc->status = IB_WC_LOC_QP_OP_ERR;
break;
case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:
wc->status = IB_WC_LOC_PROT_ERR;
break;
case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:
wc->status = IB_WC_WR_FLUSH_ERR;
break;
case MLX4_CQE_SYNDROME_MW_BIND_ERR:
wc->status = IB_WC_MW_BIND_ERR;
break;
case MLX4_CQE_SYNDROME_BAD_RESP_ERR:
wc->status = IB_WC_BAD_RESP_ERR;
break;
case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:
wc->status = IB_WC_LOC_ACCESS_ERR;
break;
case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
wc->status = IB_WC_REM_INV_REQ_ERR;
break;
case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:
wc->status = IB_WC_REM_ACCESS_ERR;
break;
case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:
wc->status = IB_WC_REM_OP_ERR;
break;
case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
wc->status = IB_WC_RETRY_EXC_ERR;
break;
case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
wc->status = IB_WC_RNR_RETRY_EXC_ERR;
break;
case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:
wc->status = IB_WC_REM_ABORT_ERR;
break;
default:
wc->status = IB_WC_GENERAL_ERR;
break;
}
wc->vendor_err = cqe->vendor_err_syndrome;
}
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_ib_qp **cur_qp,
struct ib_wc *wc)
{
struct mlx4_cqe *cqe;
struct mlx4_qp *mqp;
struct mlx4_ib_wq *wq;
struct mlx4_ib_srq *srq;
int is_send;
int is_error;
u16 wqe_ctr;
cqe = next_cqe_sw(cq);
if (!cqe)
return -EAGAIN;
++cq->mcq.cons_index;
/*
* Make sure we read CQ entry contents after we've checked the
* ownership bit.
*/
rmb();
is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR;
if (!*cur_qp ||
(be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) {
/*
* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
* from the table.
*/
mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
be32_to_cpu(cqe->my_qpn));
if (unlikely(!mqp)) {
printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);
return -EINVAL;
}
*cur_qp = to_mibqp(mqp);
}
wc->qp = &(*cur_qp)->ibqp;
if (is_send) {
wq = &(*cur_qp)->sq;
wqe_ctr = be16_to_cpu(cqe->wqe_index);
wq->tail += wqe_ctr - (u16) wq->tail;
wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
++wq->tail;
} else if ((*cur_qp)->ibqp.srq) {
srq = to_msrq((*cur_qp)->ibqp.srq);
wqe_ctr = be16_to_cpu(cqe->wqe_index);
wc->wr_id = srq->wrid[wqe_ctr];
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else {
wq = &(*cur_qp)->rq;
wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
++wq->tail;
}
if (unlikely(is_error)) {
mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
return 0;
}
wc->status = IB_WC_SUCCESS;
if (is_send) {
wc->wc_flags = 0;
switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
case MLX4_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
case MLX4_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
case MLX4_OPCODE_SEND_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
case MLX4_OPCODE_SEND:
wc->opcode = IB_WC_SEND;
break;
case MLX4_OPCODE_RDMA_READ:
wc->opcode = IB_WC_SEND;
wc->byte_len = be32_to_cpu(cqe->byte_cnt);
break;
case MLX4_OPCODE_ATOMIC_CS:
wc->opcode = IB_WC_COMP_SWAP;
wc->byte_len = 8;
break;
case MLX4_OPCODE_ATOMIC_FA:
wc->opcode = IB_WC_FETCH_ADD;
wc->byte_len = 8;
break;
case MLX4_OPCODE_BIND_MW:
wc->opcode = IB_WC_BIND_MW;
break;
}
} else {
wc->byte_len = be32_to_cpu(cqe->byte_cnt);
switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
wc->wc_flags = IB_WC_WITH_IMM;
wc->imm_data = cqe->immed_rss_invalid;
break;
case MLX4_RECV_OPCODE_SEND:
wc->opcode = IB_WC_RECV;
wc->wc_flags = 0;
break;
case MLX4_RECV_OPCODE_SEND_IMM:
wc->opcode = IB_WC_RECV;
wc->wc_flags = IB_WC_WITH_IMM;
wc->imm_data = cqe->immed_rss_invalid;
break;
}
wc->slid = be16_to_cpu(cqe->rlid);
wc->sl = cqe->sl >> 4;
wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff;
wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f;
wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ?
IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) >> 16;
}
return 0;
}
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct mlx4_ib_cq *cq = to_mcq(ibcq);
struct mlx4_ib_qp *cur_qp = NULL;
unsigned long flags;
int npolled;
int err = 0;
spin_lock_irqsave(&cq->lock, flags);
for (npolled = 0; npolled < num_entries; ++npolled) {
err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
if (err)
break;
}
if (npolled)
mlx4_cq_set_ci(&cq->mcq);
spin_unlock_irqrestore(&cq->lock, flags);
if (err == 0 || err == -EAGAIN)
return npolled;
else
return err;
}
int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
mlx4_cq_arm(&to_mcq(ibcq)->mcq,
(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
to_mdev(ibcq->device)->uar_map,
MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));
return 0;
}
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
{
u32 prod_index;
int nfreed = 0;
struct mlx4_cqe *cqe;
/*
* First we need to find the current producer index, so we
* know where to start cleaning from. It doesn't matter if HW
* adds new entries after this loop -- the QP we're worried
* about is already in RESET, so the new entries won't come
* from our QP and therefore don't need to be checked.
*/
for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
break;
/*
* Now sweep backwards through the CQ, removing CQ entries
* that match our QP by copying older entries on top of them.
*/
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {
if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
++nfreed;
} else if (nfreed)
memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
cqe, sizeof *cqe);
}
if (nfreed) {
cq->mcq.cons_index += nfreed;
/*
* Make sure update of buffer contents is done before
* updating consumer index.
*/
wmb();
mlx4_cq_set_ci(&cq->mcq);
}
}
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
{
spin_lock_irq(&cq->lock);
__mlx4_ib_cq_clean(cq, qpn, srq);
spin_unlock_irq(&cq->lock);
}
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/slab.h>
#include "mlx4_ib.h"
struct mlx4_ib_db_pgdir {
struct list_head list;
DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE);
DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2);
unsigned long *bits[2];
__be32 *db_page;
dma_addr_t db_dma;
};
static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev)
{
struct mlx4_ib_db_pgdir *pgdir;
pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
if (!pgdir)
return NULL;
bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2);
pgdir->bits[0] = pgdir->order0;
pgdir->bits[1] = pgdir->order1;
pgdir->db_page = dma_alloc_coherent(dev->ib_dev.dma_device,
PAGE_SIZE, &pgdir->db_dma,
GFP_KERNEL);
if (!pgdir->db_page) {
kfree(pgdir);
return NULL;
}
return pgdir;
}
static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir,
struct mlx4_ib_db *db, int order)
{
int o;
int i;
for (o = order; o <= 1; ++o) {
i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o);
if (i < MLX4_IB_DB_PER_PAGE >> o)
goto found;
}
return -ENOMEM;
found:
clear_bit(i, pgdir->bits[o]);
i <<= o;
if (o > order)
set_bit(i ^ 1, pgdir->bits[order]);
db->u.pgdir = pgdir;
db->index = i;
db->db = pgdir->db_page + db->index;
db->dma = pgdir->db_dma + db->index * 4;
db->order = order;
return 0;
}
int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order)
{
struct mlx4_ib_db_pgdir *pgdir;
int ret = 0;
mutex_lock(&dev->pgdir_mutex);
list_for_each_entry(pgdir, &dev->pgdir_list, list)
if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order))
goto out;
pgdir = mlx4_ib_alloc_db_pgdir(dev);
if (!pgdir) {
ret = -ENOMEM;
goto out;
}
list_add(&pgdir->list, &dev->pgdir_list);
/* This should never fail -- we just allocated an empty page: */
WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order));
out:
mutex_unlock(&dev->pgdir_mutex);
return ret;
}
void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db)
{
int o;
int i;
mutex_lock(&dev->pgdir_mutex);
o = db->order;
i = db->index;
if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
clear_bit(i ^ 1, db->u.pgdir->order0);
++o;
}
i >>= o;
set_bit(i, db->u.pgdir->bits[o]);
if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) {
dma_free_coherent(dev->ib_dev.dma_device, PAGE_SIZE,
db->u.pgdir->db_page, db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
kfree(db->u.pgdir);
}
mutex_unlock(&dev->pgdir_mutex);
}
struct mlx4_ib_user_db_page {
struct list_head list;
struct ib_umem *umem;
unsigned long user_virt;
int refcnt;
};
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_ib_db *db)
{
struct mlx4_ib_user_db_page *page;
struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
list_for_each_entry(page, &context->db_page_list, list)
if (page->user_virt == (virt & PAGE_MASK))
goto found;
page = kmalloc(sizeof *page, GFP_KERNEL);
if (!page) {
err = -ENOMEM;
goto out;
}
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
goto out;
}
list_add(&page->list, &context->db_page_list);
found:
chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
out:
mutex_unlock(&context->db_page_mutex);
return err;
}
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db)
{
mutex_lock(&context->db_page_mutex);
if (!--db->u.user_page->refcnt) {
list_del(&db->u.user_page->list);
ib_umem_release(db->u.user_page->umem);
kfree(db->u.user_page);
}
mutex_unlock(&context->db_page_mutex);
}
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
#include <linux/mlx4/cmd.h>
#include "mlx4_ib.h"
enum {
MLX4_IB_VENDOR_CLASS1 = 0x9,
MLX4_IB_VENDOR_CLASS2 = 0xa
};
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad)
{
struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
void *inbox;
int err;
u32 in_modifier = port;
u8 op_modifier = 0;
inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(inmailbox))
return PTR_ERR(inmailbox);
inbox = inmailbox->buf;
outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(outmailbox)) {
mlx4_free_cmd_mailbox(dev->dev, inmailbox);
return PTR_ERR(outmailbox);
}
memcpy(inbox, in_mad, 256);
/*
* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
if (ignore_mkey || !in_wc)
op_modifier |= 0x1;
if (ignore_bkey || !in_wc)
op_modifier |= 0x2;
if (in_wc) {
struct {
__be32 my_qpn;
u32 reserved1;
__be32 rqpn;
u8 sl;
u8 g_path;
u16 reserved2[2];
__be16 pkey;
u32 reserved3[11];
u8 grh[40];
} *ext_info;
memset(inbox + 256, 0, 256);
ext_info = inbox + 256;
ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
ext_info->rqpn = cpu_to_be32(in_wc->src_qp);
ext_info->sl = in_wc->sl << 4;
ext_info->g_path = in_wc->dlid_path_bits |
(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
ext_info->pkey = cpu_to_be16(in_wc->pkey_index);
if (in_grh)
memcpy(ext_info->grh, in_grh, 40);
op_modifier |= 0x4;
in_modifier |= in_wc->slid << 16;
}
err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
in_modifier, op_modifier,
MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
if (!err);
memcpy(response_mad, outmailbox->buf, 256);
mlx4_free_cmd_mailbox(dev->dev, inmailbox);
mlx4_free_cmd_mailbox(dev->dev, outmailbox);
return err;
}
static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
struct ib_ah_attr ah_attr;
if (!dev->send_agent[port_num - 1][0])
return;
memset(&ah_attr, 0, sizeof ah_attr);
ah_attr.dlid = lid;
ah_attr.sl = sl;
ah_attr.port_num = port_num;
new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
&ah_attr);
if (IS_ERR(new_ah))
return;
spin_lock(&dev->sm_lock);
if (dev->sm_ah[port_num - 1])
ib_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock(&dev->sm_lock);
}
/*
* Snoop SM MADs for port info and P_Key table sets, so we can
* synthesize LID change and P_Key change events.
*/
static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad)
{
struct ib_event event;
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
struct ib_port_info *pinfo =
(struct ib_port_info *) ((struct ib_smp *) mad)->data;
update_sm_ah(to_mdev(ibdev), port_num,
be16_to_cpu(pinfo->sm_lid),
pinfo->neighbormtu_mastersmsl & 0xf);
event.device = ibdev;
event.element.port_num = port_num;
if(pinfo->clientrereg_resv_subnetto & 0x80)
event.event = IB_EVENT_CLIENT_REREGISTER;
else
event.event = IB_EVENT_LID_CHANGE;
ib_dispatch_event(&event);
}
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
event.device = ibdev;
event.event = IB_EVENT_PKEY_CHANGE;
event.element.port_num = port_num;
ib_dispatch_event(&event);
}
}
}
static void node_desc_override(struct ib_device *dev,
struct ib_mad *mad)
{
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
spin_lock(&to_mdev(dev)->sm_lock);
memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
spin_unlock(&to_mdev(dev)->sm_lock);
}
}
static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad)
{
int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
int ret;
if (agent) {
send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
IB_MGMT_MAD_DATA, GFP_ATOMIC);
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
* wrong following the IB spec strictly, but we know
* it's OK for our devices).
*/
spin_lock(&dev->sm_lock);
memcpy(send_buf->mad, mad, sizeof *mad);
if ((send_buf->ah = dev->sm_ah[port_num - 1]))
ret = ib_post_send_mad(send_buf, NULL);
else
ret = -EINVAL;
spin_unlock(&dev->sm_lock);
if (ret)
ib_free_send_mad(send_buf);
}
}
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
u16 slid;
int err;
slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
forward_trap(to_mdev(ibdev), port_num, in_mad);
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
}
if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
return IB_MAD_RESULT_SUCCESS;
/*
* Don't process SMInfo queries or vendor-specific
* MADs -- the SMA can't handle them.
*/
if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
IB_SMP_ATTR_VENDOR_MASK))
return IB_MAD_RESULT_SUCCESS;
} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) {
if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
return IB_MAD_RESULT_SUCCESS;
} else
return IB_MAD_RESULT_SUCCESS;
err = mlx4_MAD_IFC(to_mdev(ibdev),
mad_flags & IB_MAD_IGNORE_MKEY,
mad_flags & IB_MAD_IGNORE_BKEY,
port_num, in_wc, in_grh, in_mad, out_mad);
if (err)
return IB_MAD_RESULT_FAILURE;
if (!out_mad->mad_hdr.status) {
smp_snoop(ibdev, port_num, in_mad);
node_desc_override(ibdev, out_mad);
}
/* set return bit in status of directed route responses */
if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
/* no response for trap repress */
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
ib_free_send_mad(mad_send_wc->send_buf);
}
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
{
struct ib_mad_agent *agent;
int p, q;
int ret;
for (p = 0; p < dev->dev->caps.num_ports; ++p)
for (q = 0; q <= 1; ++q) {
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
NULL, NULL);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
}
dev->send_agent[p][q] = agent;
}
return 0;
err:
for (p = 0; p < dev->dev->caps.num_ports; ++p)
for (q = 0; q <= 1; ++q)
if (dev->send_agent[p][q])
ib_unregister_mad_agent(dev->send_agent[p][q]);
return ret;
}
void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
{
struct ib_mad_agent *agent;
int p, q;
for (p = 0; p < dev->dev->caps.num_ports; ++p) {
for (q = 0; q <= 1; ++q) {
agent = dev->send_agent[p][q];
dev->send_agent[p][q] = NULL;
ib_unregister_mad_agent(agent);
}
if (dev->sm_ah[p])
ib_destroy_ah(dev->sm_ah[p]);
}
}
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
#include "mlx4_ib.h"
#include "user.h"
#define DRV_NAME "mlx4_ib"
#define DRV_VERSION "0.01"
#define DRV_RELDATE "May 1, 2006"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
static const char mlx4_ib_version[] __devinitdata =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
static void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
mad->class_version = 1;
mad->method = IB_MGMT_METHOD_GET;
}
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memset(props, 0, sizeof *props);
props->fw_ver = dev->dev->caps.fw_ver;
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
props->max_mr_size = ~0ull;
props->page_size_cap = dev->dev->caps.page_size_cap;
props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
props->max_qp_wr = dev->dev->caps.max_wqes;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
props->max_srq_wr = dev->dev->caps.max_srq_wqes;
props->max_srq_sge = dev->dev->caps.max_srq_sge;
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = dev->dev->caps.pkey_table_len;
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1;
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
memset(props, 0, sizeof *props);
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
props->sm_sl = out_mad->data[36] & 0xf;
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len;
props->max_msg_sz = 0x80000000;
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len;
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
props->active_speed = out_mad->data[35] >> 4;
props->max_mtu = out_mad->data[41] & 0xf;
props->active_mtu = out_mad->data[36] >> 4;
props->subnet_timeout = out_mad->data[51] & 0x1f;
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw, out_mad->data + 8, 8);
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
spin_lock(&to_mdev(ibdev)->sm_lock);
memcpy(ibdev->node_desc, props->node_desc, 64);
spin_unlock(&to_mdev(ibdev)->sm_lock);
}
return 0;
}
static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
u32 cap_mask)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
memset(mailbox->buf, 0, 256);
*(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
}
static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
struct ib_port_attr attr;
u32 cap_mask;
int err;
mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
err = mlx4_ib_query_port(ibdev, port, &attr);
if (err)
goto out;
cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
err = mlx4_SET_PORT(to_mdev(ibdev), port,
!!(mask & IB_PORT_RESET_QKEY_CNTR),
cap_mask);
out:
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
return err;
}
static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
resp.qp_tab_size = dev->dev->caps.num_qps;
resp.bf_reg_size = dev->dev->caps.bf_reg_size;
resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
if (err) {
kfree(context);
return ERR_PTR(err);
}
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
err = ib_copy_to_udata(udata, &resp, sizeof resp);
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context);
return ERR_PTR(-EFAULT);
}
return &context->ibucontext;
}
static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
kfree(context);
return 0;
}
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
if (vma->vm_pgoff == 0) {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
/* FIXME want pgprot_writecombine() for BlueFlame pages */
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn +
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
} else
return -EINVAL;
return 0;
}
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_pd *pd;
int err;
pd = kmalloc(sizeof *pd, GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
if (context)
if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
kfree(pd);
return ERR_PTR(-EFAULT);
}
return &pd->ibpd;
}
static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
{
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
kfree(pd);
return 0;
}
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
&to_mqp(ibqp)->mqp, gid->raw);
}
static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
return mlx4_multicast_detach(to_mdev(ibqp->device)->dev,
&to_mqp(ibqp)->mqp, gid->raw);
}
static int init_node_data(struct mlx4_ib_dev *dev)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
struct mlx4_ib_dev *ibdev;
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
if (!ibdev) {
dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
return NULL;
}
if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
goto err_dealloc;
if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
goto err_pd;
ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
if (!ibdev->uar_map)
goto err_uar;
INIT_LIST_HEAD(&ibdev->pgdir_list);
mutex_init(&ibdev->pgdir_mutex);
ibdev->dev = dev;
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports;
ibdev->ib_dev.num_comp_vectors = 1;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
ibdev->ib_dev.query_device = mlx4_ib_query_device;
ibdev->ib_dev.query_port = mlx4_ib_query_port;
ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
ibdev->ib_dev.mmap = mlx4_ib_mmap;
ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
ibdev->ib_dev.post_send = mlx4_ib_post_send;
ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
if (init_node_data(ibdev))
goto err_map;
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
if (ib_register_device(&ibdev->ib_dev))
goto err_map;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
return ibdev;
err_reg:
ib_unregister_device(&ibdev->ib_dev);
err_map:
iounmap(ibdev->uar_map);
err_uar:
mlx4_uar_free(dev, &ibdev->priv_uar);
err_pd:
mlx4_pd_free(dev, ibdev->priv_pdn);
err_dealloc:
ib_dealloc_device(&ibdev->ib_dev);
return NULL;
}
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
int p;
for (p = 1; p <= dev->caps.num_ports; ++p)
mlx4_CLOSE_PORT(dev, p);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
iounmap(ibdev->uar_map);
mlx4_uar_free(dev, &ibdev->priv_uar);
mlx4_pd_free(dev, ibdev->priv_pdn);
ib_dealloc_device(&ibdev->ib_dev);
}
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
enum mlx4_dev_event event, int subtype,
int port)
{
struct ib_event ibev;
switch (event) {
case MLX4_EVENT_TYPE_PORT_CHANGE:
ibev.event = subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ?
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
break;
case MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR:
ibev.event = IB_EVENT_DEVICE_FATAL;
break;
default:
return;
}
ibev.device = ibdev_ptr;
ibev.element.port_num = port;
ib_dispatch_event(&ibev);
}
static struct mlx4_interface mlx4_ib_interface = {
.add = mlx4_ib_add,
.remove = mlx4_ib_remove,
.event = mlx4_ib_event
};
static int __init mlx4_ib_init(void)
{
return mlx4_register_interface(&mlx4_ib_interface);
}
static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
}
module_init(mlx4_ib_init);
module_exit(mlx4_ib_cleanup);
/*
* Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_IB_H
#define MLX4_IB_H
#include <linux/compiler.h>
#include <linux/list.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
enum {
MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4
};
struct mlx4_ib_db_pgdir;
struct mlx4_ib_user_db_page;
struct mlx4_ib_db {
__be32 *db;
union {
struct mlx4_ib_db_pgdir *pgdir;
struct mlx4_ib_user_db_page *user_page;
} u;
dma_addr_t dma;
int index;
int order;
};
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
struct list_head db_page_list;
struct mutex db_page_mutex;
};
struct mlx4_ib_pd {
struct ib_pd ibpd;
u32 pdn;
};
struct mlx4_ib_cq_buf {
struct mlx4_buf buf;
struct mlx4_mtt mtt;
};
struct mlx4_ib_cq {
struct ib_cq ibcq;
struct mlx4_cq mcq;
struct mlx4_ib_cq_buf buf;
struct mlx4_ib_db db;
spinlock_t lock;
struct ib_umem *umem;
};
struct mlx4_ib_mr {
struct ib_mr ibmr;
struct mlx4_mr mmr;
struct ib_umem *umem;
};
struct mlx4_ib_wq {
u64 *wrid;
spinlock_t lock;
int max;
int max_gs;
int offset;
int wqe_shift;
unsigned head;
unsigned tail;
};
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
struct mlx4_buf buf;
struct mlx4_ib_db db;
struct mlx4_ib_wq rq;
u32 doorbell_qpn;
__be32 sq_signal_bits;
struct mlx4_ib_wq sq;
struct ib_umem *umem;
struct mlx4_mtt mtt;
int buf_size;
struct mutex mutex;
u8 port;
u8 alt_port;
u8 atomic_rd_en;
u8 resp_depth;
u8 state;
};
struct mlx4_ib_srq {
struct ib_srq ibsrq;
struct mlx4_srq msrq;
struct mlx4_buf buf;
struct mlx4_ib_db db;
u64 *wrid;
spinlock_t lock;
int head;
int tail;
u16 wqe_ctr;
struct ib_umem *umem;
struct mlx4_mtt mtt;
struct mutex mutex;
};
struct mlx4_ib_ah {
struct ib_ah ibah;
struct mlx4_av av;
};
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
void __iomem *uar_map;
struct list_head pgdir_list;
struct mutex pgdir_mutex;
struct mlx4_uar priv_uar;
u32 priv_pdn;
MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
struct ib_ah *sm_ah[MLX4_MAX_PORTS];
spinlock_t sm_lock;
struct mutex cap_mask_mutex;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
}
static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
{
return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext);
}
static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx4_ib_pd, ibpd);
}
static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct mlx4_ib_cq, ibcq);
}
static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq)
{
return container_of(mcq, struct mlx4_ib_cq, mcq);
}
static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct mlx4_ib_mr, ibmr);
}
static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mlx4_ib_qp, ibqp);
}
static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp)
{
return container_of(mqp, struct mlx4_ib_qp, mqp);
}
static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq)
{
return container_of(ibsrq, struct mlx4_ib_srq, ibsrq);
}
static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq)
{
return container_of(msrq, struct mlx4_ib_srq, msrq);
}
static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
{
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order);
void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db);
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_ib_db *db);
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db);
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem);
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
struct ib_ucontext *context,
struct ib_udata *udata);
int mlx4_ib_destroy_cq(struct ib_cq *cq);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx4_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_destroy_srq(struct ib_srq *srq);
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_destroy_qp(struct ib_qp *qp);
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad);
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
return !!(ah->av.g_slid & 0x80);
}
#endif /* MLX4_IB_H */
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mlx4_ib.h"
static u32 convert_access(int acc)
{
return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
(acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
MLX4_PERM_LOCAL_READ;
}
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx4_ib_mr *mr;
int err;
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
~0ull, convert_access(acc), 0, 0, &mr->mmr);
if (err)
goto err_free;
err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
if (err)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
mr->umem = NULL;
return &mr->ibmr;
err_mr:
mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_free:
kfree(mr);
return ERR_PTR(err);
}
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
struct ib_umem_chunk *chunk;
int i, j, k;
int n;
int len;
int err = 0;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
return -ENOMEM;
i = n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
umem->page_size * k;
/*
* Be friendly to WRITE_MTT firmware
* command, and pass it chunks of
* appropriate size.
*/
if (i == PAGE_SIZE / sizeof (u64) - 2) {
err = mlx4_write_mtt(dev->dev, mtt, n,
i, pages);
if (err)
goto out;
n += i;
i = 0;
}
}
}
if (i)
err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
out:
free_page((unsigned long) pages);
return err;
}
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
int shift;
int err;
int n;
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
n = ib_umem_page_count(mr->umem);
shift = ilog2(mr->umem->page_size);
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
if (err)
goto err_umem;
err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
if (err)
goto err_mr;
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
return &mr->ibmr;
err_mr:
mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_umem:
ib_umem_release(mr->umem);
err_free:
kfree(mr);
return ERR_PTR(err);
}
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
return 0;
}
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
#include <linux/mlx4/qp.h>
#include "mlx4_ib.h"
#include "user.h"
enum {
MLX4_IB_ACK_REQ_FREQ = 8,
};
enum {
MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
};
enum {
/*
* Largest possible UD header: send with GRH and immediate data.
*/
MLX4_IB_UD_HEADER_SIZE = 72
};
struct mlx4_ib_sqp {
struct mlx4_ib_qp qp;
int pkey_index;
u32 qkey;
u32 send_psn;
struct ib_ud_header ud_header;
u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
};
static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
[IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
[IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
[IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
[IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
[IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
[IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
};
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
{
return container_of(mqp, struct mlx4_ib_sqp, qp);
}
static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
}
static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
}
static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
{
if (qp->buf.nbufs == 1)
return qp->buf.u.direct.buf + offset;
else
return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +
(offset & (PAGE_SIZE - 1));
}
static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
{
return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
}
static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
{
return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
}
static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
{
struct ib_event event;
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
if (type == MLX4_EVENT_TYPE_PATH_MIG)
to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
if (ibqp->event_handler) {
event.device = ibqp->device;
event.element.qp = ibqp;
switch (type) {
case MLX4_EVENT_TYPE_PATH_MIG:
event.event = IB_EVENT_PATH_MIG;
break;
case MLX4_EVENT_TYPE_COMM_EST:
event.event = IB_EVENT_COMM_EST;
break;
case MLX4_EVENT_TYPE_SQ_DRAINED:
event.event = IB_EVENT_SQ_DRAINED;
break;
case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
event.event = IB_EVENT_QP_LAST_WQE_REACHED;
break;
case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
event.event = IB_EVENT_QP_FATAL;
break;
case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
event.event = IB_EVENT_PATH_MIG_ERR;
break;
case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
event.event = IB_EVENT_QP_REQ_ERR;
break;
case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
"on QP %06x\n", type, qp->qpn);
return;
}
ibqp->event_handler(&event, ibqp->qp_context);
}
}
static int send_wqe_overhead(enum ib_qp_type type)
{
/*
* UD WQEs must have a datagram segment.
* RC and UC WQEs might have a remote address segment.
* MLX WQEs need two extra inline data segments (for the UD
* header and space for the ICRC).
*/
switch (type) {
case IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg);
case IB_QPT_UC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
case IB_QPT_RC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_atomic_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
case IB_QPT_SMI:
case IB_QPT_GSI:
return sizeof (struct mlx4_wqe_ctrl_seg) +
ALIGN(MLX4_IB_UD_HEADER_SIZE +
sizeof (struct mlx4_wqe_inline_seg),
sizeof (struct mlx4_wqe_data_seg)) +
ALIGN(4 +
sizeof (struct mlx4_wqe_inline_seg),
sizeof (struct mlx4_wqe_data_seg));
default:
return sizeof (struct mlx4_wqe_ctrl_seg);
}
}
static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
enum ib_qp_type type, struct mlx4_ib_qp *qp)
{
/* Sanity check QP size before proceeding */
if (cap->max_send_wr > dev->dev->caps.max_wqes ||
cap->max_recv_wr > dev->dev->caps.max_wqes ||
cap->max_send_sge > dev->dev->caps.max_sq_sg ||
cap->max_recv_sge > dev->dev->caps.max_rq_sg ||
cap->max_inline_data + send_wqe_overhead(type) +
sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
return -EINVAL;
/*
* For MLX transport we need 2 extra S/G entries:
* one for the header and one for the checksum at the end
*/
if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
return -EINVAL;
qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0;
qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0;
qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
sizeof (struct mlx4_wqe_data_seg)));
qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
sizeof (struct mlx4_wqe_data_seg),
cap->max_inline_data +
sizeof (struct mlx4_wqe_inline_seg)) +
send_wqe_overhead(type)));
qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
sizeof (struct mlx4_wqe_data_seg);
qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
(qp->sq.max << qp->sq.wqe_shift);
if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
qp->rq.offset = 0;
qp->sq.offset = qp->rq.max << qp->rq.wqe_shift;
} else {
qp->rq.offset = qp->sq.max << qp->sq.wqe_shift;
qp->sq.offset = 0;
}
cap->max_send_wr = qp->sq.max;
cap->max_recv_wr = qp->rq.max;
cap->max_send_sge = qp->sq.max_gs;
cap->max_recv_sge = qp->rq.max_gs;
cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
sizeof (struct mlx4_wqe_inline_seg);
return 0;
}
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
{
struct mlx4_wqe_ctrl_seg *ctrl;
int err;
int i;
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
qp->state = IB_QPS_RESET;
qp->atomic_rd_en = 0;
qp->resp_depth = 0;
qp->rq.head = 0;
qp->rq.tail = 0;
qp->sq.head = 0;
qp->sq.tail = 0;
err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp);
if (err)
goto err;
if (pd->uobject) {
struct mlx4_ib_create_qp ucmd;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
err = -EFAULT;
goto err;
}
qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
qp->buf_size, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
ilog2(qp->umem->page_size), &qp->mtt);
if (err)
goto err_buf;
err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
if (err)
goto err_mtt;
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &qp->db);
if (err)
goto err_mtt;
} else {
err = mlx4_ib_db_alloc(dev, &qp->db, 0);
if (err)
goto err;
*qp->db.db = 0;
if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
err = -ENOMEM;
goto err_db;
}
err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
&qp->mtt);
if (err)
goto err_buf;
err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
if (err)
goto err_mtt;
for (i = 0; i < qp->sq.max; ++i) {
ctrl = get_send_wqe(qp, i);
ctrl->owner_opcode = cpu_to_be32(1 << 31);
}
qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL);
qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
}
/* We don't support inline sends for kernel QPs (yet) */
init_attr->cap.max_inline_data = 0;
}
err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
if (err)
goto err_wrid;
/*
* Hardware wants QPN written in big-endian order (after
* shifting) for send doorbell. Precompute this value to save
* a little bit when posting sends.
*/
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
else
qp->sq_signal_bits = 0;
qp->mqp.event = mlx4_ib_qp_event;
return 0;
err_wrid:
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
}
err_mtt:
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
err_buf:
if (pd->uobject)
ib_umem_release(qp->umem);
else
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
err_db:
if (!pd->uobject)
mlx4_ib_db_free(dev, &qp->db);
err:
return err;
}
static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
{
switch (state) {
case IB_QPS_RESET: return MLX4_QP_STATE_RST;
case IB_QPS_INIT: return MLX4_QP_STATE_INIT;
case IB_QPS_RTR: return MLX4_QP_STATE_RTR;
case IB_QPS_RTS: return MLX4_QP_STATE_RTS;
case IB_QPS_SQD: return MLX4_QP_STATE_SQD;
case IB_QPS_SQE: return MLX4_QP_STATE_SQER;
case IB_QPS_ERR: return MLX4_QP_STATE_ERR;
default: return -1;
}
}
static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
{
if (send_cq == recv_cq)
spin_lock_irq(&send_cq->lock);
else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
spin_lock_irq(&recv_cq->lock);
spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
}
}
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
{
if (send_cq == recv_cq)
spin_unlock_irq(&send_cq->lock);
else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
spin_unlock_irq(&recv_cq->lock);
}
}
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
int is_user)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
if (qp->state != IB_QPS_RESET)
if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
qp->mqp.qpn);
send_cq = to_mcq(qp->ibqp.send_cq);
recv_cq = to_mcq(qp->ibqp.recv_cq);
mlx4_ib_lock_cqs(send_cq, recv_cq);
if (!is_user) {
__mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
if (send_cq != recv_cq)
__mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
}
mlx4_qp_remove(dev->dev, &qp->mqp);
mlx4_ib_unlock_cqs(send_cq, recv_cq);
mlx4_qp_free(dev->dev, &qp->mqp);
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
if (is_user) {
mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
&qp->db);
ib_umem_release(qp->umem);
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
mlx4_ib_db_free(dev, &qp->db);
}
}
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_sqp *sqp;
struct mlx4_ib_qp *qp;
int err;
switch (init_attr->qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
{
qp = kmalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
}
qp->ibqp.qp_num = qp->mqp.qpn;
break;
}
case IB_QPT_SMI:
case IB_QPT_GSI:
{
/* Userspace is not allowed to create special QPs: */
if (pd->uobject)
return ERR_PTR(-EINVAL);
sqp = kmalloc(sizeof *sqp, GFP_KERNEL);
if (!sqp)
return ERR_PTR(-ENOMEM);
qp = &sqp->qp;
err = create_qp_common(dev, pd, init_attr, udata,
dev->dev->caps.sqp_start +
(init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
init_attr->port_num - 1,
qp);
if (err) {
kfree(sqp);
return ERR_PTR(err);
}
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
break;
}
default:
/* Don't support raw QPs */
return ERR_PTR(-EINVAL);
}
return &qp->ibqp;
}
int mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
destroy_qp_common(dev, mqp, !!qp->pd->uobject);
if (is_sqp(dev, mqp))
kfree(to_msqp(mqp));
else
kfree(mqp);
return 0;
}
static void init_port(struct mlx4_ib_dev *dev, int port)
{
struct mlx4_init_port_param param;
int err;
memset(&param, 0, sizeof param);
param.port_width_cap = dev->dev->caps.port_width_cap;
param.vl_cap = dev->dev->caps.vl_cap;
param.mtu = ib_mtu_enum_to_int(dev->dev->caps.mtu_cap);
param.max_gid = dev->dev->caps.gid_table_len;
param.max_pkey = dev->dev->caps.pkey_table_len;
err = mlx4_INIT_PORT(dev->dev, &param, port);
if (err)
printk(KERN_WARNING "INIT_PORT failed, return code %d.\n", err);
}
static int to_mlx4_st(enum ib_qp_type type)
{
switch (type) {
case IB_QPT_RC: return MLX4_QP_ST_RC;
case IB_QPT_UC: return MLX4_QP_ST_UC;
case IB_QPT_UD: return MLX4_QP_ST_UD;
case IB_QPT_SMI:
case IB_QPT_GSI: return MLX4_QP_ST_MLX;
default: return -1;
}
}
static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask)
{
u8 dest_rd_atomic;
u32 access_flags;
u32 hw_access_flags = 0;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
dest_rd_atomic = attr->max_dest_rd_atomic;
else
dest_rd_atomic = qp->resp_depth;
if (attr_mask & IB_QP_ACCESS_FLAGS)
access_flags = attr->qp_access_flags;
else
access_flags = qp->atomic_rd_en;
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
if (access_flags & IB_ACCESS_REMOTE_READ)
hw_access_flags |= MLX4_QP_BIT_RRE;
if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
hw_access_flags |= MLX4_QP_BIT_RAE;
if (access_flags & IB_ACCESS_REMOTE_WRITE)
hw_access_flags |= MLX4_QP_BIT_RWE;
return cpu_to_be32(hw_access_flags);
}
static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr,
int attr_mask)
{
if (attr_mask & IB_QP_PKEY_INDEX)
sqp->pkey_index = attr->pkey_index;
if (attr_mask & IB_QP_QKEY)
sqp->qkey = attr->qkey;
if (attr_mask & IB_QP_SQ_PSN)
sqp->send_psn = attr->sq_psn;
}
static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
{
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
struct mlx4_qp_path *path, u8 port)
{
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
if (ah->static_rate) {
path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << path->static_rate & dev->dev->caps.stat_rate_support))
--path->static_rate;
} else
path->static_rate = 0;
path->counter_index = 0xff;
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len) {
printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
ah->grh.sgid_index, dev->dev->caps.gid_table_len - 1);
return -1;
}
path->grh_mylmc |= 1 << 7;
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
path->tclass_flowlabel =
cpu_to_be32((ah->grh.traffic_class << 20) |
(ah->grh.flow_label));
memcpy(path->rgid, ah->grh.dgid.raw, 16);
}
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);
return 0;
}
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
enum ib_qp_state cur_state, new_state;
int sqd_event;
int err = -EINVAL;
context = kzalloc(sizeof *context, GFP_KERNEL);
if (!context)
return -ENOMEM;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
goto out;
if ((attr_mask & IB_QP_PKEY_INDEX) &&
attr->pkey_index >= dev->dev->caps.pkey_table_len) {
goto out;
}
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
goto out;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
goto out;
}
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
context->flags |= cpu_to_be32(1 << 8); /* DE? */
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
else {
optpar |= MLX4_QP_OPTPAR_PM_STATE;
switch (attr->path_mig_state) {
case IB_MIG_MIGRATED:
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
break;
case IB_MIG_REARM:
context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
break;
case IB_MIG_ARMED:
context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
break;
}
}
if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
ibqp->qp_type == IB_QPT_UD)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
printk(KERN_ERR "path MTU (%u) is invalid\n",
attr->path_mtu);
return -EINVAL;
}
context->mtu_msgmax = (attr->path_mtu << 5) | 31;
}
if (qp->rq.max)
context->rq_size_stride = ilog2(qp->rq.max) << 3;
context->rq_size_stride |= qp->rq.wqe_shift - 4;
if (qp->sq.max)
context->sq_size_stride = ilog2(qp->sq.max) << 3;
context->sq_size_stride |= qp->sq.wqe_shift - 4;
if (qp->ibqp.uobject)
context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
else
context->usr_page = cpu_to_be32(dev->priv_uar.index);
if (attr_mask & IB_QP_DEST_QPN)
context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
if (attr_mask & IB_QP_PORT) {
if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
!(attr_mask & IB_QP_AV)) {
mlx4_set_sched(&context->pri_path, attr->port_num);
optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
}
}
if (attr_mask & IB_QP_PKEY_INDEX) {
context->pri_path.pkey_index = attr->pkey_index;
optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
}
if (attr_mask & IB_QP_RNR_RETRY) {
context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
}
if (attr_mask & IB_QP_AV) {
if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) {
err = -EINVAL;
goto out;
}
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
}
if (attr_mask & IB_QP_TIMEOUT) {
context->pri_path.ackto = attr->timeout << 3;
optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
}
if (attr_mask & IB_QP_ALT_PATH) {
if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len)
return -EINVAL;
if (attr->alt_port_num == 0 ||
attr->alt_port_num > dev->dev->caps.num_ports)
return -EINVAL;
if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
attr->alt_port_num))
return -EINVAL;
context->alt_path.pkey_index = attr->alt_pkey_index;
context->alt_path.ackto = attr->alt_timeout << 3;
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
if (attr_mask & IB_QP_RETRY_CNT) {
context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
if (attr->max_rd_atomic)
context->params1 |=
cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
optpar |= MLX4_QP_OPTPAR_SRA_MAX;
}
if (attr_mask & IB_QP_SQ_PSN)
context->next_send_psn = cpu_to_be32(attr->sq_psn);
context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
if (attr->max_dest_rd_atomic)
context->params2 |=
cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
optpar |= MLX4_QP_OPTPAR_RRA_MAX;
}
if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
}
if (ibqp->srq)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
}
if (attr_mask & IB_QP_RQ_PSN)
context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
if (attr_mask & IB_QP_QKEY) {
context->qkey = cpu_to_be32(attr->qkey);
optpar |= MLX4_QP_OPTPAR_Q_KEY;
}
if (ibqp->srq)
context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
if (cur_state == IB_QPS_INIT &&
new_state == IB_QPS_RTR &&
(ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
ibqp->qp_type == IB_QPT_UD)) {
context->pri_path.sched_queue = (qp->port - 1) << 6;
if (is_qp0(dev, qp))
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
else
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
}
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
else
sqd_event = 0;
err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
to_mlx4_state(new_state), context, optpar,
sqd_event, &qp->mqp);
if (err)
goto out;
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
qp->atomic_rd_en = attr->qp_access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
qp->resp_depth = attr->max_dest_rd_atomic;
if (attr_mask & IB_QP_PORT)
qp->port = attr->port_num;
if (attr_mask & IB_QP_ALT_PATH)
qp->alt_port = attr->alt_port_num;
if (is_sqp(dev, qp))
store_sqp_attrs(to_msqp(qp), attr, attr_mask);
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
* QP0 to RESET or ERROR, bring the link back down.
*/
if (is_qp0(dev, qp)) {
if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
init_port(dev, qp->port);
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
(new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
mlx4_CLOSE_PORT(dev->dev, qp->port);
}
/*
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq): NULL);
if (ibqp->send_cq != ibqp->recv_cq)
mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
qp->rq.head = 0;
qp->rq.tail = 0;
qp->sq.head = 0;
qp->sq.tail = 0;
*qp->db.db = 0;
}
out:
mutex_unlock(&qp->mutex);
kfree(context);
return err;
}
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe)
{
struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
u16 pkey;
int send_size;
int header_size;
int i;
send_size = 0;
for (i = 0; i < wr->num_sge; ++i)
send_size += wr->sg_list[i].length;
ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);
sqp->ud_header.lrh.service_level =
be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
sqp->ud_header.lrh.destination_lid = ah->av.dlid;
sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f);
if (mlx4_ib_ah_grh_present(ah)) {
sqp->ud_header.grh.traffic_class =
(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
sqp->ud_header.grh.flow_label =
ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
ah->av.gid_index, &sqp->ud_header.grh.source_gid);
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.dgid, 16);
}
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
(sqp->ud_header.lrh.destination_lid ==
IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
(sqp->ud_header.lrh.service_level << 8));
mlx->rlid = sqp->ud_header.lrh.destination_lid;
switch (wr->opcode) {
case IB_WR_SEND:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
break;
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
sqp->ud_header.immediate_data = wr->imm_data;
break;
default:
return -EINVAL;
}
sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
if (!sqp->qp.ibqp.qp_num)
ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
else
ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
sqp->qkey : wr->wr.ud.remote_qkey);
sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
if (0) {
printk(KERN_ERR "built UD header of size %d:\n", header_size);
for (i = 0; i < header_size / 4; ++i) {
if (i % 8 == 0)
printk(" [%02x] ", i * 4);
printk(" %08x",
be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
if ((i + 1) % 8 == 0)
printk("\n");
}
printk("\n");
}
inl->byte_count = cpu_to_be32(1 << 31 | header_size);
memcpy(inl + 1, sqp->header_buf, header_size);
return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
}
static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
{
unsigned cur;
struct mlx4_ib_cq *cq;
cur = wq->head - wq->tail;
if (likely(cur + nreq < wq->max))
return 0;
cq = to_mcq(ib_cq);
spin_lock(&cq->lock);
cur = wq->head - wq->tail;
spin_unlock(&cq->lock);
return cur + nreq >= wq->max;
}
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
void *wqe;
struct mlx4_wqe_ctrl_seg *ctrl;
unsigned long flags;
int nreq;
int err = 0;
int ind;
int size;
int i;
spin_lock_irqsave(&qp->rq.lock, flags);
ind = qp->sq.head;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
err = -EINVAL;
*bad_wr = wr;
goto out;
}
ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1));
qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id;
ctrl->srcrb_flags =
(wr->send_flags & IB_SEND_SIGNALED ?
cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
(wr->send_flags & IB_SEND_SOLICITED ?
cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
qp->sq_signal_bits;
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
ctrl->imm = wr->imm_data;
else
ctrl->imm = 0;
wqe += sizeof *ctrl;
size = sizeof *ctrl / 16;
switch (ibqp->qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
cpu_to_be64(wr->wr.atomic.remote_addr);
((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
cpu_to_be32(wr->wr.atomic.rkey);
((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
wqe += sizeof (struct mlx4_wqe_raddr_seg);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
cpu_to_be64(wr->wr.atomic.swap);
((struct mlx4_wqe_atomic_seg *) wqe)->compare =
cpu_to_be64(wr->wr.atomic.compare_add);
} else {
((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
cpu_to_be64(wr->wr.atomic.compare_add);
((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0;
}
wqe += sizeof (struct mlx4_wqe_atomic_seg);
size += (sizeof (struct mlx4_wqe_raddr_seg) +
sizeof (struct mlx4_wqe_atomic_seg)) / 16;
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
cpu_to_be64(wr->wr.rdma.remote_addr);
((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
cpu_to_be32(wr->wr.rdma.rkey);
((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
wqe += sizeof (struct mlx4_wqe_raddr_seg);
size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
break;
default:
/* No extra segments required for sends */
break;
}
break;
case IB_QPT_UD:
memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av,
&to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
((struct mlx4_wqe_datagram_seg *) wqe)->dqpn =
cpu_to_be32(wr->wr.ud.remote_qpn);
((struct mlx4_wqe_datagram_seg *) wqe)->qkey =
cpu_to_be32(wr->wr.ud.remote_qkey);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
break;
case IB_QPT_SMI:
case IB_QPT_GSI:
err = build_mlx_header(to_msqp(qp), wr, ctrl);
if (err < 0) {
*bad_wr = wr;
goto out;
}
wqe += err;
size += err / 16;
err = 0;
break;
default:
break;
}
for (i = 0; i < wr->num_sge; ++i) {
((struct mlx4_wqe_data_seg *) wqe)->byte_count =
cpu_to_be32(wr->sg_list[i].length);
((struct mlx4_wqe_data_seg *) wqe)->lkey =
cpu_to_be32(wr->sg_list[i].lkey);
((struct mlx4_wqe_data_seg *) wqe)->addr =
cpu_to_be64(wr->sg_list[i].addr);
wqe += sizeof (struct mlx4_wqe_data_seg);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
/* Add one more inline data segment for ICRC for MLX sends */
if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) {
((struct mlx4_wqe_inline_seg *) wqe)->byte_count =
cpu_to_be32((1 << 31) | 4);
((u32 *) wqe)[1] = 0;
wqe += sizeof (struct mlx4_wqe_data_seg);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size;
/*
* Make sure descriptor is fully written before
* setting ownership bit (because HW can start
* executing as soon as we do).
*/
wmb();
if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) {
err = -EINVAL;
goto out;
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
(ind & qp->sq.max ? cpu_to_be32(1 << 31) : 0);
++ind;
}
out:
if (likely(nreq)) {
qp->sq.head += nreq;
/*
* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
writel(qp->doorbell_qpn,
to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
/*
* Make sure doorbells don't leak out of SQ spinlock
* and reach the HCA out of order.
*/
mmiowb();
}
spin_unlock_irqrestore(&qp->rq.lock, flags);
return err;
}
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_wqe_data_seg *scat;
unsigned long flags;
int err = 0;
int nreq;
int ind;
int i;
spin_lock_irqsave(&qp->rq.lock, flags);
ind = qp->rq.head & (qp->rq.max - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
if (unlikely(wr->num_sge > qp->rq.max_gs)) {
err = -EINVAL;
*bad_wr = wr;
goto out;
}
scat = get_recv_wqe(qp, ind);
for (i = 0; i < wr->num_sge; ++i) {
scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
}
if (i < qp->rq.max_gs) {
scat[i].byte_count = 0;
scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
scat[i].addr = 0;
}
qp->rq.wrid[ind] = wr->wr_id;
ind = (ind + 1) & (qp->rq.max - 1);
}
out:
if (likely(nreq)) {
qp->rq.head += nreq;
/*
* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
*qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
}
spin_unlock_irqrestore(&qp->rq.lock, flags);
return err;
}
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include "mlx4_ib.h"
#include "user.h"
static void *get_wqe(struct mlx4_ib_srq *srq, int n)
{
int offset = n << srq->msrq.wqe_shift;
if (srq->buf.nbufs == 1)
return srq->buf.u.direct.buf + offset;
else
return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf +
(offset & (PAGE_SIZE - 1));
}
static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
{
struct ib_event event;
struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
if (ibsrq->event_handler) {
event.device = ibsrq->device;
event.element.srq = ibsrq;
switch (type) {
case MLX4_EVENT_TYPE_SRQ_LIMIT:
event.event = IB_EVENT_SRQ_LIMIT_REACHED;
break;
case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
event.event = IB_EVENT_SRQ_ERR;
break;
default:
printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
"on SRQ %06x\n", type, srq->srqn);
return;
}
ibsrq->event_handler(&event, ibsrq->srq_context);
}
}
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_srq *srq;
struct mlx4_wqe_srq_next_seg *next;
int desc_size;
int buf_size;
int err;
int i;
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
return ERR_PTR(-EINVAL);
srq = kmalloc(sizeof *srq, GFP_KERNEL);
if (!srq)
return ERR_PTR(-ENOMEM);
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
srq->msrq.max_gs = init_attr->attr.max_sge;
desc_size = max(32UL,
roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) +
srq->msrq.max_gs *
sizeof (struct mlx4_wqe_data_seg)));
srq->msrq.wqe_shift = ilog2(desc_size);
buf_size = srq->msrq.max * desc_size;
if (pd->uobject) {
struct mlx4_ib_create_srq ucmd;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
err = -EFAULT;
goto err_srq;
}
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
buf_size, 0);
if (IS_ERR(srq->umem)) {
err = PTR_ERR(srq->umem);
goto err_srq;
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
ilog2(srq->umem->page_size), &srq->mtt);
if (err)
goto err_buf;
err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem);
if (err)
goto err_mtt;
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &srq->db);
if (err)
goto err_mtt;
} else {
err = mlx4_ib_db_alloc(dev, &srq->db, 0);
if (err)
goto err_srq;
*srq->db.db = 0;
if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
err = -ENOMEM;
goto err_db;
}
srq->head = 0;
srq->tail = srq->msrq.max - 1;
srq->wqe_ctr = 0;
for (i = 0; i < srq->msrq.max; ++i) {
next = get_wqe(srq, i);
next->next_wqe_index =
cpu_to_be16((i + 1) & (srq->msrq.max - 1));
}
err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,
&srq->mtt);
if (err)
goto err_buf;
err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
if (err)
goto err_mtt;
srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
if (!srq->wrid) {
err = -ENOMEM;
goto err_mtt;
}
}
err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
srq->db.dma, &srq->msrq);
if (err)
goto err_wrid;
srq->msrq.event = mlx4_ib_srq_event;
if (pd->uobject)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
}
init_attr->attr.max_wr = srq->msrq.max - 1;
return &srq->ibsrq;
err_wrid:
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else
kfree(srq->wrid);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
err_buf:
if (pd->uobject)
ib_umem_release(srq->umem);
else
mlx4_buf_free(dev->dev, buf_size, &srq->buf);
err_db:
if (!pd->uobject)
mlx4_ib_db_free(dev, &srq->db);
err_srq:
kfree(srq);
return ERR_PTR(err);
}
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
struct mlx4_ib_srq *srq = to_msrq(ibsrq);
int ret;
/* We don't support resizing SRQs (yet?) */
if (attr_mask & IB_SRQ_MAX_WR)
return -EINVAL;
if (attr_mask & IB_SRQ_LIMIT) {
if (attr->srq_limit >= srq->msrq.max)
return -EINVAL;
mutex_lock(&srq->mutex);
ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
mutex_unlock(&srq->mutex);
if (ret)
return ret;
}
return 0;
}
int mlx4_ib_destroy_srq(struct ib_srq *srq)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
mlx4_srq_free(dev->dev, &msrq->msrq);
mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
if (srq->uobject) {
mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
ib_umem_release(msrq->umem);
} else {
kfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_ib_db_free(dev, &msrq->db);
}
kfree(msrq);
return 0;
}
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
{
struct mlx4_wqe_srq_next_seg *next;
/* always called with interrupts disabled. */
spin_lock(&srq->lock);
next = get_wqe(srq, srq->tail);
next->next_wqe_index = cpu_to_be16(wqe_index);
srq->tail = wqe_index;
spin_unlock(&srq->lock);
}
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct mlx4_ib_srq *srq = to_msrq(ibsrq);
struct mlx4_wqe_srq_next_seg *next;
struct mlx4_wqe_data_seg *scat;
unsigned long flags;
int err = 0;
int nreq;
int i;
spin_lock_irqsave(&srq->lock, flags);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
err = -EINVAL;
*bad_wr = wr;
break;
}
srq->wrid[srq->head] = wr->wr_id;
next = get_wqe(srq, srq->head);
srq->head = be16_to_cpu(next->next_wqe_index);
scat = (struct mlx4_wqe_data_seg *) (next + 1);
for (i = 0; i < wr->num_sge; ++i) {
scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
}
if (i < srq->msrq.max_gs) {
scat[i].byte_count = 0;
scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
scat[i].addr = 0;
}
}
if (likely(nreq)) {
srq->wqe_ctr += nreq;
/*
* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
*srq->db.db = cpu_to_be32(srq->wqe_ctr);
}
spin_unlock_irqrestore(&srq->lock, flags);
return err;
}
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_IB_USER_H
#define MLX4_IB_USER_H
#include <linux/types.h>
/*
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define MLX4_IB_UVERBS_ABI_VERSION 1
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
* In particular do not use pointer types -- pass pointers in __u64
* instead.
*/
struct mlx4_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
};
struct mlx4_ib_alloc_pd_resp {
__u32 pdn;
__u32 reserved;
};
struct mlx4_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
};
struct mlx4_ib_create_cq_resp {
__u32 cqn;
__u32 reserved;
};
struct mlx4_ib_resize_cq {
__u64 buf_addr;
};
struct mlx4_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
};
struct mlx4_ib_create_srq_resp {
__u32 srqn;
__u32 reserved;
};
struct mlx4_ib_create_qp {
__u64 buf_addr;
__u64 db_addr;
};
#endif /* MLX4_IB_USER_H */
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
*/ */
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
#include <linux/mm.h> #include <linux/mm.h>
...@@ -908,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) ...@@ -908,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(err); return ERR_PTR(err);
} }
mr->umem = NULL;
return &mr->ibmr; return &mr->ibmr;
} }
...@@ -1003,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, ...@@ -1003,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
} }
kfree(page_list); kfree(page_list);
mr->umem = NULL;
return &mr->ibmr; return &mr->ibmr;
} }
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int acc, struct ib_udata *udata) u64 virt, int acc, struct ib_udata *udata)
{ {
struct mthca_dev *dev = to_mdev(pd->device); struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk; struct ib_umem_chunk *chunk;
...@@ -1018,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1018,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int err = 0; int err = 0;
int write_mtt_size; int write_mtt_size;
shift = ffs(region->page_size) - 1;
mr = kmalloc(sizeof *mr, GFP_KERNEL); mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr) if (!mr)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err;
}
shift = ffs(mr->umem->page_size) - 1;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &mr->umem->chunk_list, list)
n += chunk->nents; n += chunk->nents;
mr->mtt = mthca_alloc_mtt(dev, n); mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) { if (IS_ERR(mr->mtt)) {
err = PTR_ERR(mr->mtt); err = PTR_ERR(mr->mtt);
goto err; goto err_umem;
} }
pages = (u64 *) __get_free_page(GFP_KERNEL); pages = (u64 *) __get_free_page(GFP_KERNEL);
...@@ -1044,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1044,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &mr->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) { for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift; len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) { for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) + pages[i++] = sg_dma_address(&chunk->page_list[j]) +
region->page_size * k; mr->umem->page_size * k;
/* /*
* Be friendly to write_mtt and pass it chunks * Be friendly to write_mtt and pass it chunks
* of appropriate size. * of appropriate size.
...@@ -1071,8 +1082,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1071,8 +1082,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
if (err) if (err)
goto err_mtt; goto err_mtt;
err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
region->length, convert_access(acc), mr); convert_access(acc), mr);
if (err) if (err)
goto err_mtt; goto err_mtt;
...@@ -1082,6 +1093,9 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1082,6 +1093,9 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
err_mtt: err_mtt:
mthca_free_mtt(dev, mr->mtt); mthca_free_mtt(dev, mr->mtt);
err_umem:
ib_umem_release(mr->umem);
err: err:
kfree(mr); kfree(mr);
return ERR_PTR(err); return ERR_PTR(err);
...@@ -1090,8 +1104,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1090,8 +1104,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
static int mthca_dereg_mr(struct ib_mr *mr) static int mthca_dereg_mr(struct ib_mr *mr)
{ {
struct mthca_mr *mmr = to_mmr(mr); struct mthca_mr *mmr = to_mmr(mr);
mthca_free_mr(to_mdev(mr->device), mmr); mthca_free_mr(to_mdev(mr->device), mmr);
if (mmr->umem)
ib_umem_release(mmr->umem);
kfree(mmr); kfree(mmr);
return 0; return 0;
} }
......
...@@ -73,6 +73,7 @@ struct mthca_mtt; ...@@ -73,6 +73,7 @@ struct mthca_mtt;
struct mthca_mr { struct mthca_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct ib_umem *umem;
struct mthca_mtt *mtt; struct mthca_mtt *mtt;
}; };
......
...@@ -2493,6 +2493,20 @@ config PASEMI_MAC ...@@ -2493,6 +2493,20 @@ config PASEMI_MAC
This driver supports the on-chip 1/10Gbit Ethernet controller on This driver supports the on-chip 1/10Gbit Ethernet controller on
PA Semi's PWRficient line of chips. PA Semi's PWRficient line of chips.
config MLX4_CORE
tristate
depends on PCI
default n
config MLX4_DEBUG
bool "Verbose debugging output" if (MLX4_CORE && EMBEDDED)
default y
---help---
This option causes debugging code to be compiled into the
mlx4_core driver. The output can be turned on via the
debug_level module parameter (which can also be set after
the driver is loaded through sysfs).
endmenu endmenu
source "drivers/net/tokenring/Kconfig" source "drivers/net/tokenring/Kconfig"
......
...@@ -197,6 +197,7 @@ obj-$(CONFIG_SMC911X) += smc911x.o ...@@ -197,6 +197,7 @@ obj-$(CONFIG_SMC911X) += smc911x.o
obj-$(CONFIG_DM9000) += dm9000.o obj-$(CONFIG_DM9000) += dm9000.o
obj-$(CONFIG_FEC_8XX) += fec_8xx/ obj-$(CONFIG_FEC_8XX) += fec_8xx/
obj-$(CONFIG_PASEMI_MAC) += pasemi_mac.o obj-$(CONFIG_PASEMI_MAC) += pasemi_mac.o
obj-$(CONFIG_MLX4_CORE) += mlx4/
obj-$(CONFIG_MACB) += macb.o obj-$(CONFIG_MACB) += macb.o
......
obj-$(CONFIG_MLX4_CORE) += mlx4_core.o
mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
mr.o pd.o profile.o qp.o reset.o srq.o
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
#include "mlx4.h"
u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap)
{
u32 obj;
spin_lock(&bitmap->lock);
obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last);
if (obj >= bitmap->max) {
bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
obj = find_first_zero_bit(bitmap->table, bitmap->max);
}
if (obj < bitmap->max) {
set_bit(obj, bitmap->table);
obj |= bitmap->top;
bitmap->last = obj + 1;
} else
obj = -1;
spin_unlock(&bitmap->lock);
return obj;
}
void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj)
{
obj &= bitmap->max - 1;
spin_lock(&bitmap->lock);
clear_bit(obj, bitmap->table);
bitmap->last = min(bitmap->last, obj);
bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
spin_unlock(&bitmap->lock);
}
int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved)
{
int i;
/* num must be a power of 2 */
if (num != roundup_pow_of_two(num))
return -EINVAL;
bitmap->last = 0;
bitmap->top = 0;
bitmap->max = num;
bitmap->mask = mask;
spin_lock_init(&bitmap->lock);
bitmap->table = kzalloc(BITS_TO_LONGS(num) * sizeof (long), GFP_KERNEL);
if (!bitmap->table)
return -ENOMEM;
for (i = 0; i < reserved; ++i)
set_bit(i, bitmap->table);
return 0;
}
void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap)
{
kfree(bitmap->table);
}
/*
* Handling for queue buffers -- we allocate a bunch of memory and
* register it in a memory region at HCA virtual address 0. If the
* requested size is > max_direct, we split the allocation into
* multiple pages, so we don't require too much contiguous memory.
*/
int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
struct mlx4_buf *buf)
{
dma_addr_t t;
if (size <= max_direct) {
buf->nbufs = 1;
buf->npages = 1;
buf->page_shift = get_order(size) + PAGE_SHIFT;
buf->u.direct.buf = dma_alloc_coherent(&dev->pdev->dev,
size, &t, GFP_KERNEL);
if (!buf->u.direct.buf)
return -ENOMEM;
buf->u.direct.map = t;
while (t & ((1 << buf->page_shift) - 1)) {
--buf->page_shift;
buf->npages *= 2;
}
memset(buf->u.direct.buf, 0, size);
} else {
int i;
buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
buf->npages = buf->nbufs;
buf->page_shift = PAGE_SHIFT;
buf->u.page_list = kzalloc(buf->nbufs * sizeof *buf->u.page_list,
GFP_KERNEL);
if (!buf->u.page_list)
return -ENOMEM;
for (i = 0; i < buf->nbufs; ++i) {
buf->u.page_list[i].buf =
dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
&t, GFP_KERNEL);
if (!buf->u.page_list[i].buf)
goto err_free;
buf->u.page_list[i].map = t;
memset(buf->u.page_list[i].buf, 0, PAGE_SIZE);
}
}
return 0;
err_free:
mlx4_buf_free(dev, size, buf);
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(mlx4_buf_alloc);
void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
{
int i;
if (buf->nbufs == 1)
dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf,
buf->u.direct.map);
else {
for (i = 0; i < buf->nbufs; ++i)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
buf->u.page_list[i].buf,
buf->u.page_list[i].map);
kfree(buf->u.page_list);
}
}
EXPORT_SYMBOL_GPL(mlx4_buf_free);
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mlx4.h"
void mlx4_handle_catas_err(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
mlx4_err(dev, "Catastrophic error detected:\n");
for (i = 0; i < priv->fw.catas_size; ++i)
mlx4_err(dev, " buf[%02x]: %08x\n",
i, swab32(readl(priv->catas_err.map + i)));
mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
}
void mlx4_map_catas_buf(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
unsigned long addr;
addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
priv->fw.catas_offset;
priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
if (!priv->catas_err.map)
mlx4_warn(dev, "Failed to map catastrophic error buffer at 0x%lx\n",
addr);
}
void mlx4_unmap_catas_buf(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
if (priv->catas_err.map)
iounmap(priv->catas_err.map);
}
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/sched.h>
#include <linux/pci.h>
#include <linux/errno.h>
#include <linux/mlx4/cmd.h>
#include <asm/io.h>
#include "mlx4.h"
#define CMD_POLL_TOKEN 0xffff
enum {
/* command completed successfully: */
CMD_STAT_OK = 0x00,
/* Internal error (such as a bus error) occurred while processing command: */
CMD_STAT_INTERNAL_ERR = 0x01,
/* Operation/command not supported or opcode modifier not supported: */
CMD_STAT_BAD_OP = 0x02,
/* Parameter not supported or parameter out of range: */
CMD_STAT_BAD_PARAM = 0x03,
/* System not enabled or bad system state: */
CMD_STAT_BAD_SYS_STATE = 0x04,
/* Attempt to access reserved or unallocaterd resource: */
CMD_STAT_BAD_RESOURCE = 0x05,
/* Requested resource is currently executing a command, or is otherwise busy: */
CMD_STAT_RESOURCE_BUSY = 0x06,
/* Required capability exceeds device limits: */
CMD_STAT_EXCEED_LIM = 0x08,
/* Resource is not in the appropriate state or ownership: */
CMD_STAT_BAD_RES_STATE = 0x09,
/* Index out of range: */
CMD_STAT_BAD_INDEX = 0x0a,
/* FW image corrupted: */
CMD_STAT_BAD_NVMEM = 0x0b,
/* Attempt to modify a QP/EE which is not in the presumed state: */
CMD_STAT_BAD_QP_STATE = 0x10,
/* Bad segment parameters (Address/Size): */
CMD_STAT_BAD_SEG_PARAM = 0x20,
/* Memory Region has Memory Windows bound to: */
CMD_STAT_REG_BOUND = 0x21,
/* HCA local attached memory not present: */
CMD_STAT_LAM_NOT_PRE = 0x22,
/* Bad management packet (silently discarded): */
CMD_STAT_BAD_PKT = 0x30,
/* More outstanding CQEs in CQ than new CQ size: */
CMD_STAT_BAD_SIZE = 0x40
};
enum {
HCR_IN_PARAM_OFFSET = 0x00,
HCR_IN_MODIFIER_OFFSET = 0x08,
HCR_OUT_PARAM_OFFSET = 0x0c,
HCR_TOKEN_OFFSET = 0x14,
HCR_STATUS_OFFSET = 0x18,
HCR_OPMOD_SHIFT = 12,
HCR_T_BIT = 21,
HCR_E_BIT = 22,
HCR_GO_BIT = 23
};
enum {
GO_BIT_TIMEOUT = 10000
};
struct mlx4_cmd_context {
struct completion done;
int result;
int next;
u64 out_param;
u16 token;
};
static int mlx4_status_to_errno(u8 status) {
static const int trans_table[] = {
[CMD_STAT_INTERNAL_ERR] = -EIO,
[CMD_STAT_BAD_OP] = -EPERM,
[CMD_STAT_BAD_PARAM] = -EINVAL,
[CMD_STAT_BAD_SYS_STATE] = -ENXIO,
[CMD_STAT_BAD_RESOURCE] = -EBADF,
[CMD_STAT_RESOURCE_BUSY] = -EBUSY,
[CMD_STAT_EXCEED_LIM] = -ENOMEM,
[CMD_STAT_BAD_RES_STATE] = -EBADF,
[CMD_STAT_BAD_INDEX] = -EBADF,
[CMD_STAT_BAD_NVMEM] = -EFAULT,
[CMD_STAT_BAD_QP_STATE] = -EINVAL,
[CMD_STAT_BAD_SEG_PARAM] = -EFAULT,
[CMD_STAT_REG_BOUND] = -EBUSY,
[CMD_STAT_LAM_NOT_PRE] = -EAGAIN,
[CMD_STAT_BAD_PKT] = -EINVAL,
[CMD_STAT_BAD_SIZE] = -ENOMEM,
};
if (status >= ARRAY_SIZE(trans_table) ||
(status != CMD_STAT_OK && trans_table[status] == 0))
return -EIO;
return trans_table[status];
}
static int cmd_pending(struct mlx4_dev *dev)
{
u32 status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
return (status & swab32(1 << HCR_GO_BIT)) ||
(mlx4_priv(dev)->cmd.toggle ==
!!(status & swab32(1 << HCR_T_BIT)));
}
static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
u32 in_modifier, u8 op_modifier, u16 op, u16 token,
int event)
{
struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
u32 __iomem *hcr = cmd->hcr;
int ret = -EAGAIN;
unsigned long end;
mutex_lock(&cmd->hcr_mutex);
end = jiffies;
if (event)
end += HZ * 10;
while (cmd_pending(dev)) {
if (time_after_eq(jiffies, end))
goto out;
cond_resched();
}
/*
* We use writel (instead of something like memcpy_toio)
* because writes of less than 32 bits to the HCR don't work
* (and some architectures such as ia64 implement memcpy_toio
* in terms of writeb).
*/
__raw_writel((__force u32) cpu_to_be32(in_param >> 32), hcr + 0);
__raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), hcr + 1);
__raw_writel((__force u32) cpu_to_be32(in_modifier), hcr + 2);
__raw_writel((__force u32) cpu_to_be32(out_param >> 32), hcr + 3);
__raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4);
__raw_writel((__force u32) cpu_to_be32(token << 16), hcr + 5);
/* __raw_writel may not order writes. */
wmb();
__raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) |
(cmd->toggle << HCR_T_BIT) |
(event ? (1 << HCR_E_BIT) : 0) |
(op_modifier << HCR_OPMOD_SHIFT) |
op), hcr + 6);
cmd->toggle = cmd->toggle ^ 1;
ret = 0;
out:
mutex_unlock(&cmd->hcr_mutex);
return ret;
}
static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout)
{
struct mlx4_priv *priv = mlx4_priv(dev);
void __iomem *hcr = priv->cmd.hcr;
int err = 0;
unsigned long end;
down(&priv->cmd.poll_sem);
err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
if (err)
goto out;
end = msecs_to_jiffies(timeout) + jiffies;
while (cmd_pending(dev) && time_before(jiffies, end))
cond_resched();
if (cmd_pending(dev)) {
err = -ETIMEDOUT;
goto out;
}
if (out_is_imm)
*out_param =
(u64) be32_to_cpu((__force __be32)
__raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
(u64) be32_to_cpu((__force __be32)
__raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4));
err = mlx4_status_to_errno(be32_to_cpu((__force __be32)
__raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24);
out:
up(&priv->cmd.poll_sem);
return err;
}
void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_context *context =
&priv->cmd.context[token & priv->cmd.token_mask];
/* previously timed out command completing at long last */
if (token != context->token)
return;
context->result = mlx4_status_to_errno(status);
context->out_param = out_param;
context->token += priv->cmd.token_mask + 1;
complete(&context->done);
}
static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout)
{
struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
struct mlx4_cmd_context *context;
int err = 0;
down(&cmd->event_sem);
spin_lock(&cmd->context_lock);
BUG_ON(cmd->free_head < 0);
context = &cmd->context[cmd->free_head];
cmd->free_head = context->next;
spin_unlock(&cmd->context_lock);
init_completion(&context->done);
mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
in_modifier, op_modifier, op, context->token, 1);
if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) {
err = -EBUSY;
goto out;
}
err = context->result;
if (err)
goto out;
if (out_is_imm)
*out_param = context->out_param;
out:
spin_lock(&cmd->context_lock);
context->next = cmd->free_head;
cmd->free_head = context - cmd->context;
spin_unlock(&cmd->context_lock);
up(&cmd->event_sem);
return err;
}
int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout)
{
if (mlx4_priv(dev)->cmd.use_events)
return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm,
in_modifier, op_modifier, op, timeout);
else
return mlx4_cmd_poll(dev, in_param, out_param, out_is_imm,
in_modifier, op_modifier, op, timeout);
}
EXPORT_SYMBOL_GPL(__mlx4_cmd);
int mlx4_cmd_init(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
mutex_init(&priv->cmd.hcr_mutex);
sema_init(&priv->cmd.poll_sem, 1);
priv->cmd.use_events = 0;
priv->cmd.toggle = 1;
priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE,
MLX4_HCR_SIZE);
if (!priv->cmd.hcr) {
mlx4_err(dev, "Couldn't map command register.");
return -ENOMEM;
}
priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
MLX4_MAILBOX_SIZE,
MLX4_MAILBOX_SIZE, 0);
if (!priv->cmd.pool) {
iounmap(priv->cmd.hcr);
return -ENOMEM;
}
return 0;
}
void mlx4_cmd_cleanup(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
pci_pool_destroy(priv->cmd.pool);
iounmap(priv->cmd.hcr);
}
/*
* Switch to using events to issue FW commands (can only be called
* after event queue for command events has been initialized).
*/
int mlx4_cmd_use_events(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
priv->cmd.context = kmalloc(priv->cmd.max_cmds *
sizeof (struct mlx4_cmd_context),
GFP_KERNEL);
if (!priv->cmd.context)
return -ENOMEM;
for (i = 0; i < priv->cmd.max_cmds; ++i) {
priv->cmd.context[i].token = i;
priv->cmd.context[i].next = i + 1;
}
priv->cmd.context[priv->cmd.max_cmds - 1].next = -1;
priv->cmd.free_head = 0;
sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
spin_lock_init(&priv->cmd.context_lock);
for (priv->cmd.token_mask = 1;
priv->cmd.token_mask < priv->cmd.max_cmds;
priv->cmd.token_mask <<= 1)
; /* nothing */
--priv->cmd.token_mask;
priv->cmd.use_events = 1;
down(&priv->cmd.poll_sem);
return 0;
}
/*
* Switch back to polling (used when shutting down the device)
*/
void mlx4_cmd_use_polling(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
priv->cmd.use_events = 0;
for (i = 0; i < priv->cmd.max_cmds; ++i)
down(&priv->cmd.event_sem);
kfree(priv->cmd.context);
up(&priv->cmd.poll_sem);
}
struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
{
struct mlx4_cmd_mailbox *mailbox;
mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL);
if (!mailbox)
return ERR_PTR(-ENOMEM);
mailbox->buf = pci_pool_alloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL,
&mailbox->dma);
if (!mailbox->buf) {
kfree(mailbox);
return ERR_PTR(-ENOMEM);
}
return mailbox;
}
EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox);
void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox)
{
if (!mailbox)
return;
pci_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma);
kfree(mailbox);
}
EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox);
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/hardirq.h>
#include <linux/mlx4/cmd.h>
#include "mlx4.h"
#include "icm.h"
struct mlx4_cq_context {
__be32 flags;
u16 reserved1[3];
__be16 page_offset;
__be32 logsize_usrpage;
u8 reserved2;
u8 cq_period;
u8 reserved3;
u8 cq_max_count;
u8 reserved4[3];
u8 comp_eqn;
u8 log_page_size;
u8 reserved5[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
__be32 last_notified_index;
__be32 solicit_producer_index;
__be32 consumer_index;
__be32 producer_index;
u8 reserved6[2];
__be64 db_rec_addr;
};
#define MLX4_CQ_STATUS_OK ( 0 << 28)
#define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28)
#define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28)
#define MLX4_CQ_FLAG_CC ( 1 << 18)
#define MLX4_CQ_FLAG_OI ( 1 << 17)
#define MLX4_CQ_STATE_ARMED ( 9 << 8)
#define MLX4_CQ_STATE_ARMED_SOL ( 6 << 8)
#define MLX4_EQ_STATE_FIRED (10 << 8)
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
{
struct mlx4_cq *cq;
cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
cqn & (dev->caps.num_cqs - 1));
if (!cq) {
mlx4_warn(dev, "Completion event for bogus CQ %08x\n", cqn);
return;
}
++cq->arm_sn;
cq->comp(cq);
}
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
{
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
struct mlx4_cq *cq;
spin_lock(&cq_table->lock);
cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
if (cq)
atomic_inc(&cq->refcount);
spin_unlock(&cq_table->lock);
if (!cq) {
mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
return;
}
cq->event(cq, event_type);
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
}
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num)
{
return mlx4_cmd(dev, mailbox->dma, cq_num, 0, MLX4_CMD_SW2HW_CQ,
MLX4_CMD_TIME_CLASS_A);
}
static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num)
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
MLX4_CMD_TIME_CLASS_A);
}
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cq_table *cq_table = &priv->cq_table;
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_cq_context *cq_context;
u64 mtt_addr;
int err;
cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
if (cq->cqn == -1)
return -ENOMEM;
err = mlx4_table_get(dev, &cq_table->table, cq->cqn);
if (err)
goto err_out;
err = mlx4_table_get(dev, &cq_table->cmpt_table, cq->cqn);
if (err)
goto err_put;
spin_lock_irq(&cq_table->lock);
err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
spin_unlock_irq(&cq_table->lock);
if (err)
goto err_cmpt_put;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
err = PTR_ERR(mailbox);
goto err_radix;
}
cq_context = mailbox->buf;
memset(cq_context, 0, sizeof *cq_context);
cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
cq_context->comp_eqn = priv->eq_table.eq[MLX4_EQ_COMP].eqn;
cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
mtt_addr = mlx4_mtt_addr(dev, mtt);
cq_context->mtt_base_addr_h = mtt_addr >> 32;
cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
cq_context->db_rec_addr = cpu_to_be64(db_rec);
err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn);
mlx4_free_cmd_mailbox(dev, mailbox);
if (err)
goto err_radix;
cq->cons_index = 0;
cq->arm_sn = 1;
cq->uar = uar;
atomic_set(&cq->refcount, 1);
init_completion(&cq->free);
return 0;
err_radix:
spin_lock_irq(&cq_table->lock);
radix_tree_delete(&cq_table->tree, cq->cqn);
spin_unlock_irq(&cq_table->lock);
err_cmpt_put:
mlx4_table_put(dev, &cq_table->cmpt_table, cq->cqn);
err_put:
mlx4_table_put(dev, &cq_table->table, cq->cqn);
err_out:
mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_cq_alloc);
void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cq_table *cq_table = &priv->cq_table;
int err;
err = mlx4_HW2SW_CQ(dev, NULL, cq->cqn);
if (err)
mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq);
spin_lock_irq(&cq_table->lock);
radix_tree_delete(&cq_table->tree, cq->cqn);
spin_unlock_irq(&cq_table->lock);
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
wait_for_completion(&cq->free);
mlx4_table_put(dev, &cq_table->table, cq->cqn);
mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
}
EXPORT_SYMBOL_GPL(mlx4_cq_free);
int __devinit mlx4_init_cq_table(struct mlx4_dev *dev)
{
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
int err;
spin_lock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs,
dev->caps.num_cqs - 1, dev->caps.reserved_cqs);
if (err)
return err;
return 0;
}
void mlx4_cleanup_cq_table(struct mlx4_dev *dev)
{
/* Nothing to do to clean up radix_tree */
mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap);
}
/*
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/mlx4/cmd.h>
#include "mlx4.h"
#include "fw.h"
enum {
MLX4_NUM_ASYNC_EQE = 0x100,
MLX4_NUM_SPARE_EQE = 0x80,
MLX4_EQ_ENTRY_SIZE = 0x20
};
/*
* Must be packed because start is 64 bits but only aligned to 32 bits.
*/
struct mlx4_eq_context {
__be32 flags;
u16 reserved1[3];
__be16 page_offset;
u8 log_eq_size;
u8 reserved2[4];
u8 eq_period;
u8 reserved3;
u8 eq_max_count;
u8 reserved4[3];
u8 intr;
u8 log_page_size;
u8 reserved5[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
u32 reserved6[2];
__be32 consumer_index;
__be32 producer_index;
u32 reserved7[4];
};
#define MLX4_EQ_STATUS_OK ( 0 << 28)
#define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28)
#define MLX4_EQ_OWNER_SW ( 0 << 24)
#define MLX4_EQ_OWNER_HW ( 1 << 24)
#define MLX4_EQ_FLAG_EC ( 1 << 18)
#define MLX4_EQ_FLAG_OI ( 1 << 17)
#define MLX4_EQ_STATE_ARMED ( 9 << 8)
#define MLX4_EQ_STATE_FIRED (10 << 8)
#define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8)
#define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \
(1ull << MLX4_EVENT_TYPE_COMM_EST) | \
(1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \
(1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \
(1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \
(1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \
(1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \
(1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
(1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \
(1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
(1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \
(1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \
(1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
(1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
(1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
(1ull << MLX4_EVENT_TYPE_CMD))
#define MLX4_CATAS_EVENT_MASK (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR)
struct mlx4_eqe {
u8 reserved1;
u8 type;
u8 reserved2;
u8 subtype;
union {
u32 raw[6];
struct {
__be32 cqn;
} __attribute__((packed)) comp;
struct {
u16 reserved1;
__be16 token;
u32 reserved2;
u8 reserved3[3];
u8 status;
__be64 out_param;
} __attribute__((packed)) cmd;
struct {
__be32 qpn;
} __attribute__((packed)) qp;
struct {
__be32 srqn;
} __attribute__((packed)) srq;
struct {
__be32 cqn;
u32 reserved1;
u8 reserved2[3];
u8 syndrome;
} __attribute__((packed)) cq_err;
struct {
u32 reserved1[2];
__be32 port;
} __attribute__((packed)) port_change;
} event;
u8 reserved3[3];
u8 owner;
} __attribute__((packed));
static void eq_set_ci(struct mlx4_eq *eq, int req_not)
{
__raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) |
req_not << 31),
eq->doorbell);
/* We still want ordering, just not swabbing, so add a barrier */
mb();
}
static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
{
unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
}
static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
{
struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
}
static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
{
struct mlx4_eqe *eqe;
int cqn;
int eqes_found = 0;
int set_ci = 0;
while ((eqe = next_eqe_sw(eq))) {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
*/
rmb();
switch (eqe->type) {
case MLX4_EVENT_TYPE_COMP:
cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
mlx4_cq_completion(dev, cqn);
break;
case MLX4_EVENT_TYPE_PATH_MIG:
case MLX4_EVENT_TYPE_COMM_EST:
case MLX4_EVENT_TYPE_SQ_DRAINED:
case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
eqe->type);
break;
case MLX4_EVENT_TYPE_SRQ_LIMIT:
case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
eqe->type);
break;
case MLX4_EVENT_TYPE_CMD:
mlx4_cmd_event(dev,
be16_to_cpu(eqe->event.cmd.token),
eqe->event.cmd.status,
be64_to_cpu(eqe->event.cmd.out_param));
break;
case MLX4_EVENT_TYPE_PORT_CHANGE:
mlx4_dispatch_event(dev, eqe->type, eqe->subtype,
be32_to_cpu(eqe->event.port_change.port) >> 28);
break;
case MLX4_EVENT_TYPE_CQ_ERROR:
mlx4_warn(dev, "CQ %s on CQN %06x\n",
eqe->event.cq_err.syndrome == 1 ?
"overrun" : "access violation",
be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
mlx4_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
eqe->type);
break;
case MLX4_EVENT_TYPE_EQ_OVERFLOW:
mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
break;
case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
case MLX4_EVENT_TYPE_ECC_DETECT:
default:
mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at index %u\n",
eqe->type, eqe->subtype, eq->eqn, eq->cons_index);
break;
};
++eq->cons_index;
eqes_found = 1;
++set_ci;
/*
* The HCA will think the queue has overflowed if we
* don't tell it we've been processing events. We
* create our EQs with MLX4_NUM_SPARE_EQE extra
* entries, so we must update our consumer index at
* least that often.
*/
if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) {
/*
* Conditional on hca_type is OK here because
* this is a rare case, not the fast path.
*/
eq_set_ci(eq, 0);
set_ci = 0;
}
}
eq_set_ci(eq, 1);
return eqes_found;
}
static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
{
struct mlx4_dev *dev = dev_ptr;
struct mlx4_priv *priv = mlx4_priv(dev);
int work = 0;
int i;
writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
for (i = 0; i < MLX4_EQ_CATAS; ++i)
work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
return IRQ_RETVAL(work);
}
static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr)
{
struct mlx4_eq *eq = eq_ptr;
struct mlx4_dev *dev = eq->dev;
mlx4_eq_int(dev, eq);
/* MSI-X vectors always belong to us */
return IRQ_HANDLED;
}
static irqreturn_t mlx4_catas_interrupt(int irq, void *dev_ptr)
{
mlx4_handle_catas_err(dev_ptr);
/* MSI-X vectors always belong to us */
return IRQ_HANDLED;
}
static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
int eq_num)
{
return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B);
}
static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int eq_num)
{
return mlx4_cmd(dev, mailbox->dma, eq_num, 0, MLX4_CMD_SW2HW_EQ,
MLX4_CMD_TIME_CLASS_A);
}
static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int eq_num)
{
return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, 0, MLX4_CMD_HW2SW_EQ,
MLX4_CMD_TIME_CLASS_A);
}
static void __devinit __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev,
struct mlx4_eq *eq)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int index;
index = eq->eqn / 4 - dev->caps.reserved_eqs / 4;
if (!priv->eq_table.uar_map[index]) {
priv->eq_table.uar_map[index] =
ioremap(pci_resource_start(dev->pdev, 2) +
((eq->eqn / 4) << PAGE_SHIFT),
PAGE_SIZE);
if (!priv->eq_table.uar_map[index]) {
mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
eq->eqn);
return NULL;
}
}
return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
}
static int __devinit mlx4_create_eq(struct mlx4_dev *dev, int nent,
u8 intr, struct mlx4_eq *eq)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_eq_context *eq_context;
int npages;
u64 *dma_list = NULL;
dma_addr_t t;
u64 mtt_addr;
int err = -ENOMEM;
int i;
eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2));
npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL);
if (!eq->page_list)
goto err_out;
for (i = 0; i < npages; ++i)
eq->page_list[i].buf = NULL;
dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
if (!dma_list)
goto err_out_free;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
goto err_out_free;
eq_context = mailbox->buf;
for (i = 0; i < npages; ++i) {
eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
PAGE_SIZE, &t, GFP_KERNEL);
if (!eq->page_list[i].buf)
goto err_out_free_pages;
dma_list[i] = t;
eq->page_list[i].map = t;
memset(eq->page_list[i].buf, 0, PAGE_SIZE);
}
eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap);
if (eq->eqn == -1)
goto err_out_free_pages;
eq->doorbell = mlx4_get_eq_uar(dev, eq);
if (!eq->doorbell) {
err = -ENOMEM;
goto err_out_free_eq;
}
err = mlx4_mtt_init(dev, npages, PAGE_SHIFT, &eq->mtt);
if (err)
goto err_out_free_eq;
err = mlx4_write_mtt(dev, &eq->mtt, 0, npages, dma_list);
if (err)
goto err_out_free_mtt;
memset(eq_context, 0, sizeof *eq_context);
eq_context->flags = cpu_to_be32(MLX4_EQ_STATUS_OK |
MLX4_EQ_STATE_ARMED);
eq_context->log_eq_size = ilog2(eq->nent);
eq_context->intr = intr;
eq_context->log_page_size = PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT;
mtt_addr = mlx4_mtt_addr(dev, &eq->mtt);
eq_context->mtt_base_addr_h = mtt_addr >> 32;
eq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
err = mlx4_SW2HW_EQ(dev, mailbox, eq->eqn);
if (err) {
mlx4_warn(dev, "SW2HW_EQ failed (%d)\n", err);
goto err_out_free_mtt;
}
kfree(dma_list);
mlx4_free_cmd_mailbox(dev, mailbox);
eq->cons_index = 0;
return err;
err_out_free_mtt:
mlx4_mtt_cleanup(dev, &eq->mtt);
err_out_free_eq:
mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
err_out_free_pages:
for (i = 0; i < npages; ++i)
if (eq->page_list[i].buf)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
eq->page_list[i].buf,
eq->page_list[i].map);
mlx4_free_cmd_mailbox(dev, mailbox);
err_out_free:
kfree(eq->page_list);
kfree(dma_list);
err_out:
return err;
}
static void mlx4_free_eq(struct mlx4_dev *dev,
struct mlx4_eq *eq)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
int err;
int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
int i;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return;
err = mlx4_HW2SW_EQ(dev, mailbox, eq->eqn);
if (err)
mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err);
if (0) {
mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) {
if (i % 4 == 0)
printk("[%02x] ", i * 4);
printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
if ((i + 1) % 4 == 0)
printk("\n");
}
}
mlx4_mtt_cleanup(dev, &eq->mtt);
for (i = 0; i < npages; ++i)
pci_free_consistent(dev->pdev, PAGE_SIZE,
eq->page_list[i].buf,
eq->page_list[i].map);
kfree(eq->page_list);
mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
mlx4_free_cmd_mailbox(dev, mailbox);
}
static void mlx4_free_irqs(struct mlx4_dev *dev)
{
struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table;
int i;
if (eq_table->have_irq)
free_irq(dev->pdev->irq, dev);
for (i = 0; i < MLX4_NUM_EQ; ++i)
if (eq_table->eq[i].have_irq)
free_irq(eq_table->eq[i].irq, eq_table->eq + i);
}
static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
if (!priv->clr_base) {
mlx4_err(dev, "Couldn't map interrupt clear register, aborting.\n");
return -ENOMEM;
}
return 0;
}
static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
iounmap(priv->clr_base);
}
int __devinit mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int ret;
/*
* We assume that mapping one page is enough for the whole EQ
* context table. This is fine with all current HCAs, because
* we only use 32 EQs and each EQ uses 64 bytes of context
* memory, or 1 KB total.
*/
priv->eq_table.icm_virt = icm_virt;
priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
if (!priv->eq_table.icm_page)
return -ENOMEM;
priv->eq_table.icm_dma = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
if (pci_dma_mapping_error(priv->eq_table.icm_dma)) {
__free_page(priv->eq_table.icm_page);
return -ENOMEM;
}
ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt);
if (ret) {
pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
__free_page(priv->eq_table.icm_page);
}
return ret;
}
void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1);
pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
__free_page(priv->eq_table.icm_page);
}
int __devinit mlx4_init_eq_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
int i;
err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
dev->caps.num_eqs - 1, dev->caps.reserved_eqs);
if (err)
return err;
for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
priv->eq_table.uar_map[i] = NULL;
err = mlx4_map_clr_int(dev);
if (err)
goto err_out_free;
priv->eq_table.clr_mask =
swab32(1 << (priv->eq_table.inta_pin & 31));
priv->eq_table.clr_int = priv->clr_base +
(priv->eq_table.inta_pin < 32 ? 4 : 0);
err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
(dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0,
&priv->eq_table.eq[MLX4_EQ_COMP]);
if (err)
goto err_out_unmap;
err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
(dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0,
&priv->eq_table.eq[MLX4_EQ_ASYNC]);
if (err)
goto err_out_comp;
if (dev->flags & MLX4_FLAG_MSI_X) {
static const char *eq_name[] = {
[MLX4_EQ_COMP] = DRV_NAME " (comp)",
[MLX4_EQ_ASYNC] = DRV_NAME " (async)",
[MLX4_EQ_CATAS] = DRV_NAME " (catas)"
};
err = mlx4_create_eq(dev, 1, MLX4_EQ_CATAS,
&priv->eq_table.eq[MLX4_EQ_CATAS]);
if (err)
goto err_out_async;
for (i = 0; i < MLX4_EQ_CATAS; ++i) {
err = request_irq(priv->eq_table.eq[i].irq,
mlx4_msi_x_interrupt,
0, eq_name[i], priv->eq_table.eq + i);
if (err)
goto err_out_catas;
priv->eq_table.eq[i].have_irq = 1;
}
err = request_irq(priv->eq_table.eq[MLX4_EQ_CATAS].irq,
mlx4_catas_interrupt, 0,
eq_name[MLX4_EQ_CATAS], dev);
if (err)
goto err_out_catas;
priv->eq_table.eq[MLX4_EQ_CATAS].have_irq = 1;
} else {
err = request_irq(dev->pdev->irq, mlx4_interrupt,
SA_SHIRQ, DRV_NAME, dev);
if (err)
goto err_out_async;
priv->eq_table.have_irq = 1;
}
err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
if (err)
mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);
for (i = 0; i < MLX4_EQ_CATAS; ++i)
eq_set_ci(&priv->eq_table.eq[i], 1);
if (dev->flags & MLX4_FLAG_MSI_X) {
err = mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 0,
priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
if (err)
mlx4_warn(dev, "MAP_EQ for catas EQ %d failed (%d)\n",
priv->eq_table.eq[MLX4_EQ_CATAS].eqn, err);
}
return 0;
err_out_catas:
mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
err_out_async:
mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);
err_out_comp:
mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]);
err_out_unmap:
mlx4_unmap_clr_int(dev);
mlx4_free_irqs(dev);
err_out_free:
mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
return err;
}
void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
if (dev->flags & MLX4_FLAG_MSI_X)
mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 1,
priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
mlx4_free_irqs(dev);
for (i = 0; i < MLX4_EQ_CATAS; ++i)
mlx4_free_eq(dev, &priv->eq_table.eq[i]);
if (dev->flags & MLX4_FLAG_MSI_X)
mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
mlx4_unmap_clr_int(dev);
for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
if (priv->eq_table.uar_map[i])
iounmap(priv->eq_table.uar_map[i]);
mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
}
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mlx4/cmd.h>
#include "fw.h"
#include "icm.h"
extern void __buggy_use_of_MLX4_GET(void);
extern void __buggy_use_of_MLX4_PUT(void);
#define MLX4_GET(dest, source, offset) \
do { \
void *__p = (char *) (source) + (offset); \
switch (sizeof (dest)) { \
case 1: (dest) = *(u8 *) __p; break; \
case 2: (dest) = be16_to_cpup(__p); break; \
case 4: (dest) = be32_to_cpup(__p); break; \
case 8: (dest) = be64_to_cpup(__p); break; \
default: __buggy_use_of_MLX4_GET(); \
} \
} while (0)
#define MLX4_PUT(dest, source, offset) \
do { \
void *__d = ((char *) (dest) + (offset)); \
switch (sizeof(source)) { \
case 1: *(u8 *) __d = (source); break; \
case 2: *(__be16 *) __d = cpu_to_be16(source); break; \
case 4: *(__be32 *) __d = cpu_to_be32(source); break; \
case 8: *(__be64 *) __d = cpu_to_be64(source); break; \
default: __buggy_use_of_MLX4_PUT(); \
} \
} while (0)
static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
{
static const char *fname[] = {
[ 0] = "RC transport",
[ 1] = "UC transport",
[ 2] = "UD transport",
[ 3] = "SRC transport",
[ 4] = "reliable multicast",
[ 5] = "FCoIB support",
[ 6] = "SRQ support",
[ 7] = "IPoIB checksum offload",
[ 8] = "P_Key violation counter",
[ 9] = "Q_Key violation counter",
[10] = "VMM",
[16] = "MW support",
[17] = "APM support",
[18] = "Atomic ops support",
[19] = "Raw multicast support",
[20] = "Address vector port checking support",
[21] = "UD multicast support",
[24] = "Demand paging support",
[25] = "Router support"
};
int i;
mlx4_dbg(dev, "DEV_CAP flags:\n");
for (i = 0; i < 32; ++i)
if (fname[i] && (flags & (1 << i)))
mlx4_dbg(dev, " %s\n", fname[i]);
}
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
struct mlx4_cmd_mailbox *mailbox;
u32 *outbox;
u8 field;
u16 size;
u16 stat_rate;
int err;
#define QUERY_DEV_CAP_OUT_SIZE 0x100
#define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10
#define QUERY_DEV_CAP_MAX_QP_SZ_OFFSET 0x11
#define QUERY_DEV_CAP_RSVD_QP_OFFSET 0x12
#define QUERY_DEV_CAP_MAX_QP_OFFSET 0x13
#define QUERY_DEV_CAP_RSVD_SRQ_OFFSET 0x14
#define QUERY_DEV_CAP_MAX_SRQ_OFFSET 0x15
#define QUERY_DEV_CAP_RSVD_EEC_OFFSET 0x16
#define QUERY_DEV_CAP_MAX_EEC_OFFSET 0x17
#define QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET 0x19
#define QUERY_DEV_CAP_RSVD_CQ_OFFSET 0x1a
#define QUERY_DEV_CAP_MAX_CQ_OFFSET 0x1b
#define QUERY_DEV_CAP_MAX_MPT_OFFSET 0x1d
#define QUERY_DEV_CAP_RSVD_EQ_OFFSET 0x1e
#define QUERY_DEV_CAP_MAX_EQ_OFFSET 0x1f
#define QUERY_DEV_CAP_RSVD_MTT_OFFSET 0x20
#define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21
#define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22
#define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23
#define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27
#define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29
#define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b
#define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f
#define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33
#define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35
#define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36
#define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37
#define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b
#define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c
#define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f
#define QUERY_DEV_CAP_FLAGS_OFFSET 0x44
#define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48
#define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49
#define QUERY_DEV_CAP_PAGE_SZ_OFFSET 0x4b
#define QUERY_DEV_CAP_BF_OFFSET 0x4c
#define QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET 0x4d
#define QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET 0x4e
#define QUERY_DEV_CAP_LOG_MAX_BF_PAGES_OFFSET 0x4f
#define QUERY_DEV_CAP_MAX_SG_SQ_OFFSET 0x51
#define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52
#define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55
#define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56
#define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61
#define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62
#define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63
#define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64
#define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65
#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82
#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84
#define QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET 0x86
#define QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET 0x88
#define QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET 0x8a
#define QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET 0x8c
#define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e
#define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90
#define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92
#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x97
#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
outbox = mailbox->buf;
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
MLX4_CMD_TIME_CLASS_A);
if (err)
goto out;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
dev_cap->reserved_qps = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
dev_cap->max_qps = 1 << (field & 0x1f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_SRQ_OFFSET);
dev_cap->reserved_srqs = 1 << (field >> 4);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_OFFSET);
dev_cap->max_srqs = 1 << (field & 0x1f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET);
dev_cap->max_cq_sz = 1 << field;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_CQ_OFFSET);
dev_cap->reserved_cqs = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_OFFSET);
dev_cap->max_cqs = 1 << (field & 0x1f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
dev_cap->max_mpts = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
dev_cap->reserved_eqs = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
dev_cap->max_eqs = 1 << (field & 0x7);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
dev_cap->reserved_mtts = 1 << (field >> 4);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET);
dev_cap->max_mrw_sz = 1 << field;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET);
dev_cap->reserved_mrws = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET);
dev_cap->max_mtt_seg = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET);
dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
dev_cap->max_responder_per_qp = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET);
dev_cap->max_rdma_global = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
dev_cap->local_ca_ack_delay = field & 0x1f;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET);
dev_cap->max_mtu = field >> 4;
dev_cap->max_port_width = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
dev_cap->max_vl = field >> 4;
dev_cap->num_ports = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET);
dev_cap->max_gids = 1 << (field & 0xf);
MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
dev_cap->stat_rate_support = stat_rate;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET);
dev_cap->max_pkeys = 1 << (field & 0xf);
MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
dev_cap->reserved_uars = field >> 4;
MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET);
dev_cap->uar_size = 1 << ((field & 0x3f) + 20);
MLX4_GET(field, outbox, QUERY_DEV_CAP_PAGE_SZ_OFFSET);
dev_cap->min_page_sz = 1 << field;
MLX4_GET(field, outbox, QUERY_DEV_CAP_BF_OFFSET);
if (field & 0x80) {
MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET);
dev_cap->bf_reg_size = 1 << (field & 0x1f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET);
dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
} else {
dev_cap->bf_reg_size = 0;
mlx4_dbg(dev, "BlueFlame not available\n");
}
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET);
dev_cap->max_sq_sg = field;
MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET);
dev_cap->max_sq_desc_sz = size;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET);
dev_cap->max_qp_per_mcg = 1 << field;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET);
dev_cap->reserved_mgms = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MCG_OFFSET);
dev_cap->max_mcgs = 1 << field;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_PD_OFFSET);
dev_cap->reserved_pds = field >> 4;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
dev_cap->max_pds = 1 << (field & 0x3f);
MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET);
dev_cap->rdmarc_entry_sz = size;
MLX4_GET(size, outbox, QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET);
dev_cap->qpc_entry_sz = size;
MLX4_GET(size, outbox, QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET);
dev_cap->aux_entry_sz = size;
MLX4_GET(size, outbox, QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET);
dev_cap->altc_entry_sz = size;
MLX4_GET(size, outbox, QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET);
dev_cap->eqc_entry_sz = size;
MLX4_GET(size, outbox, QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET);
dev_cap->cqc_entry_sz = size;
MLX4_GET(size, outbox, QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET);
dev_cap->srq_entry_sz = size;
MLX4_GET(size, outbox, QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET);
dev_cap->cmpt_entry_sz = size;
MLX4_GET(size, outbox, QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET);
dev_cap->mtt_entry_sz = size;
MLX4_GET(size, outbox, QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET);
dev_cap->dmpt_entry_sz = size;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET);
dev_cap->max_srq_sz = 1 << field;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET);
dev_cap->max_qp_sz = 1 << field;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET);
dev_cap->resize_srq = field & 1;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET);
dev_cap->max_rq_sg = field;
MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
dev_cap->max_rq_desc_sz = size;
MLX4_GET(dev_cap->bmme_flags, outbox,
QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
MLX4_GET(dev_cap->reserved_lkey, outbox,
QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
MLX4_GET(dev_cap->max_icm_sz, outbox,
QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
if (dev_cap->bmme_flags & 1)
mlx4_dbg(dev, "Base MM extensions: yes "
"(flags %d, rsvd L_Key %08x)\n",
dev_cap->bmme_flags, dev_cap->reserved_lkey);
else
mlx4_dbg(dev, "Base MM extensions: no\n");
/*
* Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
* we can't use any EQs whose doorbell falls on that page,
* even if the EQ itself isn't reserved.
*/
dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4,
dev_cap->reserved_eqs);
mlx4_dbg(dev, "Max ICM size %lld MB\n",
(unsigned long long) dev_cap->max_icm_sz >> 20);
mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
dev_cap->max_qps, dev_cap->reserved_qps, dev_cap->qpc_entry_sz);
mlx4_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n",
dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz);
mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz);
mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz);
mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
dev_cap->reserved_mrws, dev_cap->reserved_mtts);
mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
dev_cap->max_pds, dev_cap->reserved_pds, dev_cap->reserved_uars);
mlx4_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
dev_cap->max_pds, dev_cap->reserved_mgms);
mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz);
mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n",
dev_cap->local_ca_ack_delay, 128 << dev_cap->max_mtu,
dev_cap->max_port_width);
mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n",
dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg);
mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n",
dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg);
dump_dev_cap_flags(dev, dev_cap->flags);
out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
{
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_icm_iter iter;
__be64 *pages;
int lg;
int nent = 0;
int i;
int err = 0;
int ts = 0, tc = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE);
pages = mailbox->buf;
for (mlx4_icm_first(icm, &iter);
!mlx4_icm_last(&iter);
mlx4_icm_next(&iter)) {
/*
* We have to pass pages that are aligned to their
* size, so find the least significant 1 in the
* address or size and use that as our log2 size.
*/
lg = ffs(mlx4_icm_addr(&iter) | mlx4_icm_size(&iter)) - 1;
if (lg < MLX4_ICM_PAGE_SHIFT) {
mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n",
MLX4_ICM_PAGE_SIZE,
(unsigned long long) mlx4_icm_addr(&iter),
mlx4_icm_size(&iter));
err = -EINVAL;
goto out;
}
for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) {
if (virt != -1) {
pages[nent * 2] = cpu_to_be64(virt);
virt += 1 << lg;
}
pages[nent * 2 + 1] =
cpu_to_be64((mlx4_icm_addr(&iter) + (i << lg)) |
(lg - MLX4_ICM_PAGE_SHIFT));
ts += 1 << (lg - 10);
++tc;
if (++nent == MLX4_MAILBOX_SIZE / 16) {
err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
MLX4_CMD_TIME_CLASS_B);
if (err)
goto out;
nent = 0;
}
}
}
if (nent)
err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B);
if (err)
goto out;
switch (op) {
case MLX4_CMD_MAP_FA:
mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts);
break;
case MLX4_CMD_MAP_ICM_AUX:
mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts);
break;
case MLX4_CMD_MAP_ICM:
mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n",
tc, ts, (unsigned long long) virt - (ts << 10));
break;
}
out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm)
{
return mlx4_map_cmd(dev, MLX4_CMD_MAP_FA, icm, -1);
}
int mlx4_UNMAP_FA(struct mlx4_dev *dev)
{
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B);
}
int mlx4_RUN_FW(struct mlx4_dev *dev)
{
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A);
}
int mlx4_QUERY_FW(struct mlx4_dev *dev)
{
struct mlx4_fw *fw = &mlx4_priv(dev)->fw;
struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
struct mlx4_cmd_mailbox *mailbox;
u32 *outbox;
int err = 0;
u64 fw_ver;
u8 lg;
#define QUERY_FW_OUT_SIZE 0x100
#define QUERY_FW_VER_OFFSET 0x00
#define QUERY_FW_MAX_CMD_OFFSET 0x0f
#define QUERY_FW_ERR_START_OFFSET 0x30
#define QUERY_FW_ERR_SIZE_OFFSET 0x38
#define QUERY_FW_ERR_BAR_OFFSET 0x3c
#define QUERY_FW_SIZE_OFFSET 0x00
#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20
#define QUERY_FW_CLR_INT_BAR_OFFSET 0x28
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
outbox = mailbox->buf;
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
MLX4_CMD_TIME_CLASS_A);
if (err)
goto out;
MLX4_GET(fw_ver, outbox, QUERY_FW_VER_OFFSET);
/*
* FW subminor version is at more signifant bits than minor
* version, so swap here.
*/
dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) |
((fw_ver & 0xffff0000ull) >> 16) |
((fw_ver & 0x0000ffffull) << 16);
MLX4_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
cmd->max_cmds = 1 << lg;
mlx4_dbg(dev, "FW version %d.%d.%03d, max commands %d\n",
(int) (dev->caps.fw_ver >> 32),
(int) (dev->caps.fw_ver >> 16) & 0xffff,
(int) dev->caps.fw_ver & 0xffff,
cmd->max_cmds);
MLX4_GET(fw->catas_offset, outbox, QUERY_FW_ERR_START_OFFSET);
MLX4_GET(fw->catas_size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
MLX4_GET(fw->catas_bar, outbox, QUERY_FW_ERR_BAR_OFFSET);
fw->catas_bar = (fw->catas_bar >> 6) * 2;
mlx4_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x, BAR %d\n",
(unsigned long long) fw->catas_offset, fw->catas_size, fw->catas_bar);
MLX4_GET(fw->fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
MLX4_GET(fw->clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET);
fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2;
mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2);
/*
* Round up number of system pages needed in case
* MLX4_ICM_PAGE_SIZE < PAGE_SIZE.
*/
fw->fw_pages =
ALIGN(fw->fw_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >>
(PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT);
mlx4_dbg(dev, "Clear int @ %llx, BAR %d\n",
(unsigned long long) fw->clr_int_base, fw->clr_int_bar);
out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
static void get_board_id(void *vsd, char *board_id)
{
int i;
#define VSD_OFFSET_SIG1 0x00
#define VSD_OFFSET_SIG2 0xde
#define VSD_OFFSET_MLX_BOARD_ID 0xd0
#define VSD_OFFSET_TS_BOARD_ID 0x20
#define VSD_SIGNATURE_TOPSPIN 0x5ad
memset(board_id, 0, MLX4_BOARD_ID_LEN);
if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN &&
be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) {
strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MLX4_BOARD_ID_LEN);
} else {
/*
* The board ID is a string but the firmware byte
* swaps each 4-byte word before passing it back to
* us. Therefore we need to swab it before printing.
*/
for (i = 0; i < 4; ++i)
((u32 *) board_id)[i] =
swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4));
}
}
int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter)
{
struct mlx4_cmd_mailbox *mailbox;
u32 *outbox;
int err;
#define QUERY_ADAPTER_OUT_SIZE 0x100
#define QUERY_ADAPTER_VENDOR_ID_OFFSET 0x00
#define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04
#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08
#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
#define QUERY_ADAPTER_VSD_OFFSET 0x20
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
outbox = mailbox->buf;
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER,
MLX4_CMD_TIME_CLASS_A);
if (err)
goto out;
MLX4_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
MLX4_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
MLX4_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4,
adapter->board_id);
out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
{
struct mlx4_cmd_mailbox *mailbox;
__be32 *inbox;
int err;
#define INIT_HCA_IN_SIZE 0x200
#define INIT_HCA_VERSION_OFFSET 0x000
#define INIT_HCA_VERSION 2
#define INIT_HCA_FLAGS_OFFSET 0x014
#define INIT_HCA_QPC_OFFSET 0x020
#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28)
#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f)
#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
#define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67)
#define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70)
#define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77)
#define INIT_HCA_MCAST_OFFSET 0x0c0
#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00)
#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16)
#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
#define INIT_HCA_TPT_OFFSET 0x0f0
#define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00)
#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b)
#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10)
#define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18)
#define INIT_HCA_UAR_OFFSET 0x120
#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a)
#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b)
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
inbox = mailbox->buf;
memset(inbox, 0, INIT_HCA_IN_SIZE);
*((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION;
#if defined(__LITTLE_ENDIAN)
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
#elif defined(__BIG_ENDIAN)
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
#else
#error Host endianness not defined
#endif
/* Check port for UD address vector: */
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
/* QPC/EEC/CQC/EQC/RDMARC attributes */
MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
MLX4_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET);
MLX4_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET);
MLX4_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET);
MLX4_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET);
MLX4_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET);
MLX4_PUT(inbox, param->altc_base, INIT_HCA_ALTC_BASE_OFFSET);
MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET);
MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET);
MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET);
MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET);
MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET);
/* multicast attributes */
MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
/* TPT attributes */
MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET);
MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET);
MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET);
MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET);
/* UAR attributes */
MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 1000);
if (err)
mlx4_err(dev, "INIT_HCA returns %d\n", err);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
int mlx4_INIT_PORT(struct mlx4_dev *dev, struct mlx4_init_port_param *param, int port)
{
struct mlx4_cmd_mailbox *mailbox;
u32 *inbox;
int err;
u32 flags;
#define INIT_PORT_IN_SIZE 256
#define INIT_PORT_FLAGS_OFFSET 0x00
#define INIT_PORT_FLAG_SIG (1 << 18)
#define INIT_PORT_FLAG_NG (1 << 17)
#define INIT_PORT_FLAG_G0 (1 << 16)
#define INIT_PORT_VL_SHIFT 4
#define INIT_PORT_PORT_WIDTH_SHIFT 8
#define INIT_PORT_MTU_OFFSET 0x04
#define INIT_PORT_MAX_GID_OFFSET 0x06
#define INIT_PORT_MAX_PKEY_OFFSET 0x0a
#define INIT_PORT_GUID0_OFFSET 0x10
#define INIT_PORT_NODE_GUID_OFFSET 0x18
#define INIT_PORT_SI_GUID_OFFSET 0x20
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
inbox = mailbox->buf;
memset(inbox, 0, INIT_PORT_IN_SIZE);
flags = 0;
flags |= param->set_guid0 ? INIT_PORT_FLAG_G0 : 0;
flags |= param->set_node_guid ? INIT_PORT_FLAG_NG : 0;
flags |= param->set_si_guid ? INIT_PORT_FLAG_SIG : 0;
flags |= (param->vl_cap & 0xf) << INIT_PORT_VL_SHIFT;
flags |= (param->port_width_cap & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT;
MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET);
MLX4_PUT(inbox, param->mtu, INIT_PORT_MTU_OFFSET);
MLX4_PUT(inbox, param->max_gid, INIT_PORT_MAX_GID_OFFSET);
MLX4_PUT(inbox, param->max_pkey, INIT_PORT_MAX_PKEY_OFFSET);
MLX4_PUT(inbox, param->guid0, INIT_PORT_GUID0_OFFSET);
MLX4_PUT(inbox, param->node_guid, INIT_PORT_NODE_GUID_OFFSET);
MLX4_PUT(inbox, param->si_guid, INIT_PORT_SI_GUID_OFFSET);
err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT,
MLX4_CMD_TIME_CLASS_A);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_INIT_PORT);
int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port)
{
return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000);
}
EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT);
int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic)
{
return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000);
}
int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
{
int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0,
MLX4_CMD_SET_ICM_SIZE,
MLX4_CMD_TIME_CLASS_A);
if (ret)
return ret;
/*
* Round up number of system pages needed in case
* MLX4_ICM_PAGE_SIZE < PAGE_SIZE.
*/
*aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >>
(PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT);
return 0;
}
int mlx4_NOP(struct mlx4_dev *dev)
{
/* Input modifier of 0x1f means "finish as soon as possible." */
return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100);
}
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_FW_H
#define MLX4_FW_H
#include "mlx4.h"
#include "icm.h"
struct mlx4_dev_cap {
int max_srq_sz;
int max_qp_sz;
int reserved_qps;
int max_qps;
int reserved_srqs;
int max_srqs;
int max_cq_sz;
int reserved_cqs;
int max_cqs;
int max_mpts;
int reserved_eqs;
int max_eqs;
int reserved_mtts;
int max_mrw_sz;
int reserved_mrws;
int max_mtt_seg;
int max_requester_per_qp;
int max_responder_per_qp;
int max_rdma_global;
int local_ca_ack_delay;
int max_mtu;
int max_port_width;
int max_vl;
int num_ports;
int max_gids;
u16 stat_rate_support;
int max_pkeys;
u32 flags;
int reserved_uars;
int uar_size;
int min_page_sz;
int bf_reg_size;
int bf_regs_per_page;
int max_sq_sg;
int max_sq_desc_sz;
int max_rq_sg;
int max_rq_desc_sz;
int max_qp_per_mcg;
int reserved_mgms;
int max_mcgs;
int reserved_pds;
int max_pds;
int qpc_entry_sz;
int rdmarc_entry_sz;
int altc_entry_sz;
int aux_entry_sz;
int srq_entry_sz;
int cqc_entry_sz;
int eqc_entry_sz;
int dmpt_entry_sz;
int cmpt_entry_sz;
int mtt_entry_sz;
int resize_srq;
u8 bmme_flags;
u32 reserved_lkey;
u64 max_icm_sz;
};
struct mlx4_adapter {
u32 vendor_id;
u32 device_id;
u32 revision_id;
char board_id[MLX4_BOARD_ID_LEN];
u8 inta_pin;
};
struct mlx4_init_hca_param {
u64 qpc_base;
u64 rdmarc_base;
u64 auxc_base;
u64 altc_base;
u64 srqc_base;
u64 cqc_base;
u64 eqc_base;
u64 mc_base;
u64 dmpt_base;
u64 cmpt_base;
u64 mtt_base;
u16 log_mc_entry_sz;
u16 log_mc_hash_sz;
u8 log_num_qps;
u8 log_num_srqs;
u8 log_num_cqs;
u8 log_num_eqs;
u8 log_rd_per_qp;
u8 log_mc_table_sz;
u8 log_mpt_sz;
u8 log_uar_sz;
};
struct mlx4_init_ib_param {
int port_width;
int vl_cap;
int mtu_cap;
u16 gid_cap;
u16 pkey_cap;
int set_guid0;
u64 guid0;
int set_node_guid;
u64 node_guid;
int set_si_guid;
u64 si_guid;
};
struct mlx4_set_ib_param {
int set_si_guid;
int reset_qkey_viol;
u64 si_guid;
u32 cap_mask;
};
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_UNMAP_FA(struct mlx4_dev *dev);
int mlx4_RUN_FW(struct mlx4_dev *dev);
int mlx4_QUERY_FW(struct mlx4_dev *dev);
int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter);
int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic);
int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt);
int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
int mlx4_NOP(struct mlx4_dev *dev);
#endif /* MLX4_FW_H */
/*
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/mlx4/cmd.h>
#include "mlx4.h"
#include "icm.h"
#include "fw.h"
/*
* We allocate in as big chunks as we can, up to a maximum of 256 KB
* per chunk.
*/
enum {
MLX4_ICM_ALLOC_SIZE = 1 << 18,
MLX4_TABLE_CHUNK_SIZE = 1 << 18
};
void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm)
{
struct mlx4_icm_chunk *chunk, *tmp;
int i;
list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
if (chunk->nsg > 0)
pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
PCI_DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->npages; ++i)
__free_pages(chunk->mem[i].page,
get_order(chunk->mem[i].length));
kfree(chunk);
}
kfree(icm);
}
struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
gfp_t gfp_mask)
{
struct mlx4_icm *icm;
struct mlx4_icm_chunk *chunk = NULL;
int cur_order;
icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!icm)
return icm;
icm->refcount = 0;
INIT_LIST_HEAD(&icm->chunk_list);
cur_order = get_order(MLX4_ICM_ALLOC_SIZE);
while (npages > 0) {
if (!chunk) {
chunk = kmalloc(sizeof *chunk,
gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!chunk)
goto fail;
chunk->npages = 0;
chunk->nsg = 0;
list_add_tail(&chunk->list, &icm->chunk_list);
}
while (1 << cur_order > npages)
--cur_order;
chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order);
if (chunk->mem[chunk->npages].page) {
chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order;
chunk->mem[chunk->npages].offset = 0;
if (++chunk->npages == MLX4_ICM_CHUNK_LEN) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
if (chunk->nsg <= 0)
goto fail;
chunk = NULL;
}
npages -= 1 << cur_order;
} else {
--cur_order;
if (cur_order < 0)
goto fail;
}
}
if (chunk) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
if (chunk->nsg <= 0)
goto fail;
}
return icm;
fail:
mlx4_free_icm(dev, icm);
return NULL;
}
static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt)
{
return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM, icm, virt);
}
int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
{
return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM,
MLX4_CMD_TIME_CLASS_B);
}
int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt)
{
struct mlx4_cmd_mailbox *mailbox;
__be64 *inbox;
int err;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
inbox = mailbox->buf;
inbox[0] = cpu_to_be64(virt);
inbox[1] = cpu_to_be64(dma_addr);
err = mlx4_cmd(dev, mailbox->dma, 1, 0, MLX4_CMD_MAP_ICM,
MLX4_CMD_TIME_CLASS_B);
mlx4_free_cmd_mailbox(dev, mailbox);
if (!err)
mlx4_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
(unsigned long long) dma_addr, (unsigned long long) virt);
return err;
}
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
{
return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM_AUX, icm, -1);
}
int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
{
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX, MLX4_CMD_TIME_CLASS_B);
}
int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
{
int i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
int ret = 0;
mutex_lock(&table->mutex);
if (table->icm[i]) {
++table->icm[i]->refcount;
goto out;
}
table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
__GFP_NOWARN);
if (!table->icm[i]) {
ret = -ENOMEM;
goto out;
}
if (mlx4_MAP_ICM(dev, table->icm[i], table->virt +
(u64) i * MLX4_TABLE_CHUNK_SIZE)) {
mlx4_free_icm(dev, table->icm[i]);
table->icm[i] = NULL;
ret = -ENOMEM;
goto out;
}
++table->icm[i]->refcount;
out:
mutex_unlock(&table->mutex);
return ret;
}
void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
{
int i;
i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
mutex_lock(&table->mutex);
if (--table->icm[i]->refcount == 0) {
mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
mlx4_free_icm(dev, table->icm[i]);
table->icm[i] = NULL;
}
mutex_unlock(&table->mutex);
}
void *mlx4_table_find(struct mlx4_icm_table *table, int obj)
{
int idx, offset, i;
struct mlx4_icm_chunk *chunk;
struct mlx4_icm *icm;
struct page *page = NULL;
if (!table->lowmem)
return NULL;
mutex_lock(&table->mutex);
idx = obj & (table->num_obj - 1);
icm = table->icm[idx / (MLX4_TABLE_CHUNK_SIZE / table->obj_size)];
offset = idx % (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
if (!icm)
goto out;
list_for_each_entry(chunk, &icm->chunk_list, list) {
for (i = 0; i < chunk->npages; ++i) {
if (chunk->mem[i].length > offset) {
page = chunk->mem[i].page;
goto out;
}
offset -= chunk->mem[i].length;
}
}
out:
mutex_unlock(&table->mutex);
return page ? lowmem_page_address(page) + offset : NULL;
}
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end)
{
int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size;
int i, err;
for (i = start; i <= end; i += inc) {
err = mlx4_table_get(dev, table, i);
if (err)
goto fail;
}
return 0;
fail:
while (i > start) {
i -= inc;
mlx4_table_put(dev, table, i);
}
return err;
}
void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end)
{
int i;
for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size)
mlx4_table_put(dev, table, i);
}
int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
u64 virt, int obj_size, int nobj, int reserved,
int use_lowmem)
{
int obj_per_chunk;
int num_icm;
unsigned chunk_size;
int i;
obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
table->icm = kcalloc(num_icm, sizeof *table->icm, GFP_KERNEL);
if (!table->icm)
return -ENOMEM;
table->virt = virt;
table->num_icm = num_icm;
table->num_obj = nobj;
table->obj_size = obj_size;
table->lowmem = use_lowmem;
mutex_init(&table->mutex);
for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
chunk_size = MLX4_TABLE_CHUNK_SIZE;
if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > nobj * obj_size)
chunk_size = PAGE_ALIGN(nobj * obj_size - i * MLX4_TABLE_CHUNK_SIZE);
table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
__GFP_NOWARN);
if (!table->icm[i])
goto err;
if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) {
mlx4_free_icm(dev, table->icm[i]);
table->icm[i] = NULL;
goto err;
}
/*
* Add a reference to this ICM chunk so that it never
* gets freed (since it contains reserved firmware objects).
*/
++table->icm[i]->refcount;
}
return 0;
err:
for (i = 0; i < num_icm; ++i)
if (table->icm[i]) {
mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE,
MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
mlx4_free_icm(dev, table->icm[i]);
}
return -ENOMEM;
}
void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
{
int i;
for (i = 0; i < table->num_icm; ++i)
if (table->icm[i]) {
mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
mlx4_free_icm(dev, table->icm[i]);
}
kfree(table->icm);
}
/*
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_ICM_H
#define MLX4_ICM_H
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/mutex.h>
#define MLX4_ICM_CHUNK_LEN \
((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
(sizeof (struct scatterlist)))
enum {
MLX4_ICM_PAGE_SHIFT = 12,
MLX4_ICM_PAGE_SIZE = 1 << MLX4_ICM_PAGE_SHIFT,
};
struct mlx4_icm_chunk {
struct list_head list;
int npages;
int nsg;
struct scatterlist mem[MLX4_ICM_CHUNK_LEN];
};
struct mlx4_icm {
struct list_head chunk_list;
int refcount;
};
struct mlx4_icm_iter {
struct mlx4_icm *icm;
struct mlx4_icm_chunk *chunk;
int page_idx;
};
struct mlx4_dev;
struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask);
void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end);
void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end);
int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
u64 virt, int obj_size, int nobj, int reserved,
int use_lowmem);
void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
void *mlx4_table_find(struct mlx4_icm_table *table, int obj);
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end);
void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end);
static inline void mlx4_icm_first(struct mlx4_icm *icm,
struct mlx4_icm_iter *iter)
{
iter->icm = icm;
iter->chunk = list_empty(&icm->chunk_list) ?
NULL : list_entry(icm->chunk_list.next,
struct mlx4_icm_chunk, list);
iter->page_idx = 0;
}
static inline int mlx4_icm_last(struct mlx4_icm_iter *iter)
{
return !iter->chunk;
}
static inline void mlx4_icm_next(struct mlx4_icm_iter *iter)
{
if (++iter->page_idx >= iter->chunk->nsg) {
if (iter->chunk->list.next == &iter->icm->chunk_list) {
iter->chunk = NULL;
return;
}
iter->chunk = list_entry(iter->chunk->list.next,
struct mlx4_icm_chunk, list);
iter->page_idx = 0;
}
}
static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter)
{
return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
}
static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter)
{
return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
}
int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count);
int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt);
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
#endif /* MLX4_ICM_H */
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mlx4/driver.h>
#include "mlx4.h"
struct mlx4_device_context {
struct list_head list;
struct mlx4_interface *intf;
void *context;
};
static LIST_HEAD(intf_list);
static LIST_HEAD(dev_list);
static DEFINE_MUTEX(intf_mutex);
static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
{
struct mlx4_device_context *dev_ctx;
dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL);
if (!dev_ctx)
return;
dev_ctx->intf = intf;
dev_ctx->context = intf->add(&priv->dev);
if (dev_ctx->context) {
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
spin_unlock_irq(&priv->ctx_lock);
} else
kfree(dev_ctx);
}
static void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
{
struct mlx4_device_context *dev_ctx;
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf == intf) {
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
intf->remove(&priv->dev, dev_ctx->context);
kfree(dev_ctx);
return;
}
}
int mlx4_register_interface(struct mlx4_interface *intf)
{
struct mlx4_priv *priv;
if (!intf->add || !intf->remove)
return -EINVAL;
mutex_lock(&intf_mutex);
list_add_tail(&intf->list, &intf_list);
list_for_each_entry(priv, &dev_list, dev_list)
mlx4_add_device(intf, priv);
mutex_unlock(&intf_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_register_interface);
void mlx4_unregister_interface(struct mlx4_interface *intf)
{
struct mlx4_priv *priv;
mutex_lock(&intf_mutex);
list_for_each_entry(priv, &dev_list, dev_list)
mlx4_remove_device(intf, priv);
list_del(&intf->list);
mutex_unlock(&intf_mutex);
}
EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type,
int subtype, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_device_context *dev_ctx;
unsigned long flags;
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf->event)
dev_ctx->intf->event(dev, dev_ctx->context, type,
subtype, port);
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
int mlx4_register_device(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
mutex_lock(&intf_mutex);
list_add_tail(&priv->dev_list, &dev_list);
list_for_each_entry(intf, &intf_list, list)
mlx4_add_device(intf, priv);
mutex_unlock(&intf_mutex);
return 0;
}
void mlx4_unregister_device(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
mutex_lock(&intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx4_remove_device(intf, priv);
list_del(&priv->dev_list);
mutex_unlock(&intf_mutex);
}
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
#include "mlx4.h"
#include "fw.h"
#include "icm.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
#ifdef CONFIG_MLX4_DEBUG
int mlx4_debug_level = 0;
module_param_named(debug_level, mlx4_debug_level, int, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
#endif /* CONFIG_MLX4_DEBUG */
#ifdef CONFIG_PCI_MSI
static int msi_x;
module_param(msi_x, int, 0444);
MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
#else /* CONFIG_PCI_MSI */
#define msi_x (0)
#endif /* CONFIG_PCI_MSI */
static const char mlx4_version[] __devinitdata =
DRV_NAME ": Mellanox ConnectX core driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
static struct mlx4_profile default_profile = {
.num_qp = 1 << 16,
.num_srq = 1 << 16,
.rdmarc_per_qp = 4,
.num_cq = 1 << 16,
.num_mcg = 1 << 13,
.num_mpt = 1 << 17,
.num_mtt = 1 << 20,
};
static int __devinit mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
int err;
err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
if (err) {
mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
return err;
}
if (dev_cap->min_page_sz > PAGE_SIZE) {
mlx4_err(dev, "HCA minimum page size of %d bigger than "
"kernel PAGE_SIZE of %ld, aborting.\n",
dev_cap->min_page_sz, PAGE_SIZE);
return -ENODEV;
}
if (dev_cap->num_ports > MLX4_MAX_PORTS) {
mlx4_err(dev, "HCA has %d ports, but we only support %d, "
"aborting.\n",
dev_cap->num_ports, MLX4_MAX_PORTS);
return -ENODEV;
}
if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
"PCI resource 2 size of 0x%llx, aborting.\n",
dev_cap->uar_size,
(unsigned long long) pci_resource_len(dev->pdev, 2));
return -ENODEV;
}
dev->caps.num_ports = dev_cap->num_ports;
dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE;
dev->caps.vl_cap = dev_cap->max_vl;
dev->caps.mtu_cap = dev_cap->max_mtu;
dev->caps.gid_table_len = dev_cap->max_gids;
dev->caps.pkey_table_len = dev_cap->max_pkeys;
dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
dev->caps.bf_reg_size = dev_cap->bf_reg_size;
dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page;
dev->caps.max_sq_sg = dev_cap->max_sq_sg;
dev->caps.max_rq_sg = dev_cap->max_rq_sg;
dev->caps.max_wqes = dev_cap->max_qp_sz;
dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp;
dev->caps.reserved_qps = dev_cap->reserved_qps;
dev->caps.max_srq_wqes = dev_cap->max_srq_sz;
dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1;
dev->caps.reserved_srqs = dev_cap->reserved_srqs;
dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz;
dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz;
dev->caps.num_qp_per_mgm = MLX4_QP_PER_MGM;
/*
* Subtract 1 from the limit because we need to allocate a
* spare CQE so the HCA HW can tell the difference between an
* empty CQ and a full CQ.
*/
dev->caps.max_cqes = dev_cap->max_cq_sz - 1;
dev->caps.reserved_cqs = dev_cap->reserved_cqs;
dev->caps.reserved_eqs = dev_cap->reserved_eqs;
dev->caps.reserved_mtts = dev_cap->reserved_mtts;
dev->caps.reserved_mrws = dev_cap->reserved_mrws;
dev->caps.reserved_uars = dev_cap->reserved_uars;
dev->caps.reserved_pds = dev_cap->reserved_pds;
dev->caps.port_width_cap = dev_cap->max_port_width;
dev->caps.mtt_entry_sz = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1);
dev->caps.flags = dev_cap->flags;
dev->caps.stat_rate_support = dev_cap->stat_rate_support;
return 0;
}
static int __devinit mlx4_load_fw(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
GFP_HIGHUSER | __GFP_NOWARN);
if (!priv->fw.fw_icm) {
mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
return -ENOMEM;
}
err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
if (err) {
mlx4_err(dev, "MAP_FA command failed, aborting.\n");
goto err_free;
}
err = mlx4_RUN_FW(dev);
if (err) {
mlx4_err(dev, "RUN_FW command failed, aborting.\n");
goto err_unmap_fa;
}
return 0;
err_unmap_fa:
mlx4_UNMAP_FA(dev);
err_free:
mlx4_free_icm(dev, priv->fw.fw_icm);
return err;
}
static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
int cmpt_entry_sz)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
cmpt_base +
((u64) (MLX4_CMPT_TYPE_QP *
cmpt_entry_sz) << MLX4_CMPT_SHIFT),
cmpt_entry_sz, dev->caps.num_qps,
dev->caps.reserved_qps, 0);
if (err)
goto err;
err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
cmpt_base +
((u64) (MLX4_CMPT_TYPE_SRQ *
cmpt_entry_sz) << MLX4_CMPT_SHIFT),
cmpt_entry_sz, dev->caps.num_srqs,
dev->caps.reserved_srqs, 0);
if (err)
goto err_qp;
err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
cmpt_base +
((u64) (MLX4_CMPT_TYPE_CQ *
cmpt_entry_sz) << MLX4_CMPT_SHIFT),
cmpt_entry_sz, dev->caps.num_cqs,
dev->caps.reserved_cqs, 0);
if (err)
goto err_srq;
err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
cmpt_base +
((u64) (MLX4_CMPT_TYPE_EQ *
cmpt_entry_sz) << MLX4_CMPT_SHIFT),
cmpt_entry_sz,
roundup_pow_of_two(MLX4_NUM_EQ +
dev->caps.reserved_eqs),
MLX4_NUM_EQ + dev->caps.reserved_eqs, 0);
if (err)
goto err_cq;
return 0;
err_cq:
mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
err_srq:
mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
err_qp:
mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
err:
return err;
}
static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
struct mlx4_dev_cap *dev_cap,
struct mlx4_init_hca_param *init_hca,
u64 icm_size)
{
struct mlx4_priv *priv = mlx4_priv(dev);
u64 aux_pages;
int err;
err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
if (err) {
mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
return err;
}
mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
(unsigned long long) icm_size >> 10,
(unsigned long long) aux_pages << 2);
priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
GFP_HIGHUSER | __GFP_NOWARN);
if (!priv->fw.aux_icm) {
mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
return -ENOMEM;
}
err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
if (err) {
mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
goto err_free_aux;
}
err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
if (err) {
mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
goto err_unmap_aux;
}
err = mlx4_map_eq_icm(dev, init_hca->eqc_base);
if (err) {
mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
goto err_unmap_cmpt;
}
err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
init_hca->mtt_base,
dev->caps.mtt_entry_sz,
dev->caps.num_mtt_segs,
dev->caps.reserved_mtts, 1);
if (err) {
mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
goto err_unmap_eq;
}
err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
init_hca->dmpt_base,
dev_cap->dmpt_entry_sz,
dev->caps.num_mpts,
dev->caps.reserved_mrws, 1);
if (err) {
mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
goto err_unmap_mtt;
}
err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
init_hca->qpc_base,
dev_cap->qpc_entry_sz,
dev->caps.num_qps,
dev->caps.reserved_qps, 0);
if (err) {
mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
goto err_unmap_dmpt;
}
err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
init_hca->auxc_base,
dev_cap->aux_entry_sz,
dev->caps.num_qps,
dev->caps.reserved_qps, 0);
if (err) {
mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
goto err_unmap_qp;
}
err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
init_hca->altc_base,
dev_cap->altc_entry_sz,
dev->caps.num_qps,
dev->caps.reserved_qps, 0);
if (err) {
mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
goto err_unmap_auxc;
}
err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
init_hca->rdmarc_base,
dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
dev->caps.num_qps,
dev->caps.reserved_qps, 0);
if (err) {
mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
goto err_unmap_altc;
}
err = mlx4_init_icm_table(dev, &priv->cq_table.table,
init_hca->cqc_base,
dev_cap->cqc_entry_sz,
dev->caps.num_cqs,
dev->caps.reserved_cqs, 0);
if (err) {
mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
goto err_unmap_rdmarc;
}
err = mlx4_init_icm_table(dev, &priv->srq_table.table,
init_hca->srqc_base,
dev_cap->srq_entry_sz,
dev->caps.num_srqs,
dev->caps.reserved_srqs, 0);
if (err) {
mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
goto err_unmap_cq;
}
/*
* It's not strictly required, but for simplicity just map the
* whole multicast group table now. The table isn't very big
* and it's a lot easier than trying to track ref counts.
*/
err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
dev->caps.num_mgms + dev->caps.num_amgms,
dev->caps.num_mgms + dev->caps.num_amgms,
0);
if (err) {
mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
goto err_unmap_srq;
}
return 0;
err_unmap_srq:
mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
err_unmap_cq:
mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
err_unmap_rdmarc:
mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
err_unmap_altc:
mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
err_unmap_auxc:
mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
err_unmap_qp:
mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
err_unmap_dmpt:
mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
err_unmap_mtt:
mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
err_unmap_eq:
mlx4_unmap_eq_icm(dev);
err_unmap_cmpt:
mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
err_unmap_aux:
mlx4_UNMAP_ICM_AUX(dev);
err_free_aux:
mlx4_free_icm(dev, priv->fw.aux_icm);
return err;
}
static void mlx4_free_icms(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
mlx4_unmap_eq_icm(dev);
mlx4_UNMAP_ICM_AUX(dev);
mlx4_free_icm(dev, priv->fw.aux_icm);
}
static void mlx4_close_hca(struct mlx4_dev *dev)
{
mlx4_CLOSE_HCA(dev, 0);
mlx4_free_icms(dev);
mlx4_UNMAP_FA(dev);
mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm);
}
static int __devinit mlx4_init_hca(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_adapter adapter;
struct mlx4_dev_cap dev_cap;
struct mlx4_profile profile;
struct mlx4_init_hca_param init_hca;
u64 icm_size;
int err;
err = mlx4_QUERY_FW(dev);
if (err) {
mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
return err;
}
err = mlx4_load_fw(dev);
if (err) {
mlx4_err(dev, "Failed to start FW, aborting.\n");
return err;
}
err = mlx4_dev_cap(dev, &dev_cap);
if (err) {
mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
goto err_stop_fw;
}
profile = default_profile;
icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
if ((long long) icm_size < 0) {
err = icm_size;
goto err_stop_fw;
}
init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
if (err)
goto err_stop_fw;
err = mlx4_INIT_HCA(dev, &init_hca);
if (err) {
mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
goto err_free_icm;
}
err = mlx4_QUERY_ADAPTER(dev, &adapter);
if (err) {
mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
goto err_close;
}
priv->eq_table.inta_pin = adapter.inta_pin;
priv->rev_id = adapter.revision_id;
memcpy(priv->board_id, adapter.board_id, sizeof priv->board_id);
return 0;
err_close:
mlx4_close_hca(dev);
err_free_icm:
mlx4_free_icms(dev);
err_stop_fw:
mlx4_UNMAP_FA(dev);
mlx4_free_icm(dev, priv->fw.fw_icm);
return err;
}
static int __devinit mlx4_setup_hca(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
MLX4_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
err = mlx4_init_uar_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
"user access region table, aborting.\n");
return err;
}
err = mlx4_uar_alloc(dev, &priv->driver_uar);
if (err) {
mlx4_err(dev, "Failed to allocate driver access region, "
"aborting.\n");
goto err_uar_table_free;
}
priv->kar = ioremap(priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
if (!priv->kar) {
mlx4_err(dev, "Couldn't map kernel access region, "
"aborting.\n");
err = -ENOMEM;
goto err_uar_free;
}
err = mlx4_init_pd_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
"protection domain table, aborting.\n");
goto err_kar_unmap;
}
err = mlx4_init_mr_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
"memory region table, aborting.\n");
goto err_pd_table_free;
}
mlx4_map_catas_buf(dev);
err = mlx4_init_eq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
"event queue table, aborting.\n");
goto err_catas_buf;
}
err = mlx4_cmd_use_events(dev);
if (err) {
mlx4_err(dev, "Failed to switch to event-driven "
"firmware commands, aborting.\n");
goto err_eq_table_free;
}
err = mlx4_NOP(dev);
if (err) {
mlx4_err(dev, "NOP command failed to generate interrupt "
"(IRQ %d), aborting.\n",
priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
if (dev->flags & MLX4_FLAG_MSI_X)
mlx4_err(dev, "Try again with MSI-X disabled.\n");
else
mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
goto err_cmd_poll;
}
mlx4_dbg(dev, "NOP command IRQ test passed\n");
err = mlx4_init_cq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
"completion queue table, aborting.\n");
goto err_cmd_poll;
}
err = mlx4_init_srq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
"shared receive queue table, aborting.\n");
goto err_cq_table_free;
}
err = mlx4_init_qp_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
"queue pair table, aborting.\n");
goto err_srq_table_free;
}
err = mlx4_init_mcg_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
"multicast group table, aborting.\n");
goto err_qp_table_free;
}
return 0;
err_qp_table_free:
mlx4_cleanup_qp_table(dev);
err_srq_table_free:
mlx4_cleanup_srq_table(dev);
err_cq_table_free:
mlx4_cleanup_cq_table(dev);
err_cmd_poll:
mlx4_cmd_use_polling(dev);
err_eq_table_free:
mlx4_cleanup_eq_table(dev);
err_catas_buf:
mlx4_unmap_catas_buf(dev);
mlx4_cleanup_mr_table(dev);
err_pd_table_free:
mlx4_cleanup_pd_table(dev);
err_kar_unmap:
iounmap(priv->kar);
err_uar_free:
mlx4_uar_free(dev, &priv->driver_uar);
err_uar_table_free:
mlx4_cleanup_uar_table(dev);
return err;
}
static void __devinit mlx4_enable_msi_x(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct msix_entry entries[MLX4_NUM_EQ];
int err;
int i;
if (msi_x) {
for (i = 0; i < MLX4_NUM_EQ; ++i)
entries[i].entry = i;
err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries));
if (err) {
if (err > 0)
mlx4_info(dev, "Only %d MSI-X vectors available, "
"not using MSI-X\n", err);
goto no_msi;
}
for (i = 0; i < MLX4_NUM_EQ; ++i)
priv->eq_table.eq[i].irq = entries[i].vector;
dev->flags |= MLX4_FLAG_MSI_X;
return;
}
no_msi:
for (i = 0; i < MLX4_NUM_EQ; ++i)
priv->eq_table.eq[i].irq = dev->pdev->irq;
}
static int __devinit mlx4_init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
{
static int mlx4_version_printed;
struct mlx4_priv *priv;
struct mlx4_dev *dev;
int err;
if (!mlx4_version_printed) {
printk(KERN_INFO "%s", mlx4_version);
++mlx4_version_printed;
}
printk(KERN_INFO PFX "Initializing %s\n",
pci_name(pdev));
err = pci_enable_device(pdev);
if (err) {
dev_err(&pdev->dev, "Cannot enable PCI device, "
"aborting.\n");
return err;
}
/*
* Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not
* be present)
*/
if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
pci_resource_len(pdev, 0) != 1 << 20) {
dev_err(&pdev->dev, "Missing DCS, aborting.\n");
err = -ENODEV;
goto err_disable_pdev;
}
if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
dev_err(&pdev->dev, "Missing UAR, aborting.\n");
err = -ENODEV;
goto err_disable_pdev;
}
err = pci_request_region(pdev, 0, DRV_NAME);
if (err) {
dev_err(&pdev->dev, "Cannot request control region, aborting.\n");
goto err_disable_pdev;
}
err = pci_request_region(pdev, 2, DRV_NAME);
if (err) {
dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
goto err_release_bar0;
}
pci_set_master(pdev);
err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
goto err_release_bar2;
}
}
err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
"consistent PCI DMA mask.\n");
err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
"aborting.\n");
goto err_release_bar2;
}
}
priv = kzalloc(sizeof *priv, GFP_KERNEL);
if (!priv) {
dev_err(&pdev->dev, "Device struct alloc failed, "
"aborting.\n");
err = -ENOMEM;
goto err_release_bar2;
}
dev = &priv->dev;
dev->pdev = pdev;
/*
* Now reset the HCA before we touch the PCI capabilities or
* attempt a firmware command, since a boot ROM may have left
* the HCA in an undefined state.
*/
err = mlx4_reset(dev);
if (err) {
mlx4_err(dev, "Failed to reset HCA, aborting.\n");
goto err_free_dev;
}
mlx4_enable_msi_x(dev);
if (mlx4_cmd_init(dev)) {
mlx4_err(dev, "Failed to init command interface, aborting.\n");
goto err_free_dev;
}
err = mlx4_init_hca(dev);
if (err)
goto err_cmd;
err = mlx4_setup_hca(dev);
if (err)
goto err_close;
err = mlx4_register_device(dev);
if (err)
goto err_cleanup;
pci_set_drvdata(pdev, dev);
return 0;
err_cleanup:
mlx4_cleanup_mcg_table(dev);
mlx4_cleanup_qp_table(dev);
mlx4_cleanup_srq_table(dev);
mlx4_cleanup_cq_table(dev);
mlx4_cmd_use_polling(dev);
mlx4_cleanup_eq_table(dev);
mlx4_unmap_catas_buf(dev);
mlx4_cleanup_mr_table(dev);
mlx4_cleanup_pd_table(dev);
mlx4_cleanup_uar_table(dev);
err_close:
mlx4_close_hca(dev);
err_cmd:
mlx4_cmd_cleanup(dev);
err_free_dev:
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
kfree(priv);
err_release_bar2:
pci_release_region(pdev, 2);
err_release_bar0:
pci_release_region(pdev, 0);
err_disable_pdev:
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
return err;
}
static void __devexit mlx4_remove_one(struct pci_dev *pdev)
{
struct mlx4_dev *dev = pci_get_drvdata(pdev);
struct mlx4_priv *priv = mlx4_priv(dev);
int p;
if (dev) {
mlx4_unregister_device(dev);
for (p = 1; p <= dev->caps.num_ports; ++p)
mlx4_CLOSE_PORT(dev, p);
mlx4_cleanup_mcg_table(dev);
mlx4_cleanup_qp_table(dev);
mlx4_cleanup_srq_table(dev);
mlx4_cleanup_cq_table(dev);
mlx4_cmd_use_polling(dev);
mlx4_cleanup_eq_table(dev);
mlx4_unmap_catas_buf(dev);
mlx4_cleanup_mr_table(dev);
mlx4_cleanup_pd_table(dev);
iounmap(priv->kar);
mlx4_uar_free(dev, &priv->driver_uar);
mlx4_cleanup_uar_table(dev);
mlx4_close_hca(dev);
mlx4_cmd_cleanup(dev);
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
kfree(priv);
pci_release_region(pdev, 2);
pci_release_region(pdev, 0);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
}
static struct pci_device_id mlx4_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
{ PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
{ PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
{ 0, }
};
MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
static struct pci_driver mlx4_driver = {
.name = DRV_NAME,
.id_table = mlx4_pci_table,
.probe = mlx4_init_one,
.remove = __devexit_p(mlx4_remove_one)
};
static int __init mlx4_init(void)
{
int ret;
ret = pci_register_driver(&mlx4_driver);
return ret < 0 ? ret : 0;
}
static void __exit mlx4_cleanup(void)
{
pci_unregister_driver(&mlx4_driver);
}
module_init(mlx4_init);
module_exit(mlx4_cleanup);
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/mlx4/cmd.h>
#include "mlx4.h"
struct mlx4_mgm {
__be32 next_gid_index;
__be32 members_count;
u32 reserved[2];
u8 gid[16];
__be32 qp[MLX4_QP_PER_MGM];
};
static const u8 zero_gid[16]; /* automatically initialized to 0 */
static int mlx4_READ_MCG(struct mlx4_dev *dev, int index,
struct mlx4_cmd_mailbox *mailbox)
{
return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG,
MLX4_CMD_TIME_CLASS_A);
}
static int mlx4_WRITE_MCG(struct mlx4_dev *dev, int index,
struct mlx4_cmd_mailbox *mailbox)
{
return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG,
MLX4_CMD_TIME_CLASS_A);
}
static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
u16 *hash)
{
u64 imm;
int err;
err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, 0, MLX4_CMD_MGID_HASH,
MLX4_CMD_TIME_CLASS_A);
if (!err)
*hash = imm;
return err;
}
/*
* Caller must hold MCG table semaphore. gid and mgm parameters must
* be properly aligned for command interface.
*
* Returns 0 unless a firmware command error occurs.
*
* If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1
* and *mgm holds MGM entry.
*
* if GID is found in AMGM, *index = index in AMGM, *prev = index of
* previous entry in hash chain and *mgm holds AMGM entry.
*
* If no AMGM exists for given gid, *index = -1, *prev = index of last
* entry in hash chain and *mgm holds end of hash chain.
*/
static int find_mgm(struct mlx4_dev *dev,
u8 *gid, struct mlx4_cmd_mailbox *mgm_mailbox,
u16 *hash, int *prev, int *index)
{
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mgm *mgm = mgm_mailbox->buf;
u8 *mgid;
int err;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return -ENOMEM;
mgid = mailbox->buf;
memcpy(mgid, gid, 16);
err = mlx4_MGID_HASH(dev, mailbox, hash);
mlx4_free_cmd_mailbox(dev, mailbox);
if (err)
return err;
if (0)
mlx4_dbg(dev, "Hash for %04x:%04x:%04x:%04x:"
"%04x:%04x:%04x:%04x is %04x\n",
be16_to_cpu(((__be16 *) gid)[0]),
be16_to_cpu(((__be16 *) gid)[1]),
be16_to_cpu(((__be16 *) gid)[2]),
be16_to_cpu(((__be16 *) gid)[3]),
be16_to_cpu(((__be16 *) gid)[4]),
be16_to_cpu(((__be16 *) gid)[5]),
be16_to_cpu(((__be16 *) gid)[6]),
be16_to_cpu(((__be16 *) gid)[7]),
*hash);
*index = *hash;
*prev = -1;
do {
err = mlx4_READ_MCG(dev, *index, mgm_mailbox);
if (err)
return err;
if (!memcmp(mgm->gid, zero_gid, 16)) {
if (*index != *hash) {
mlx4_err(dev, "Found zero MGID in AMGM.\n");
err = -EINVAL;
}
return err;
}
if (!memcmp(mgm->gid, gid, 16))
return err;
*prev = *index;
*index = be32_to_cpu(mgm->next_gid_index) >> 6;
} while (*index);
*index = -1;
return err;
}
int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mgm *mgm;
u32 members_count;
u16 hash;
int index, prev;
int link = 0;
int i;
int err;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
mgm = mailbox->buf;
mutex_lock(&priv->mcg_table.mutex);
err = find_mgm(dev, gid, mailbox, &hash, &prev, &index);
if (err)
goto out;
if (index != -1) {
if (!memcmp(mgm->gid, zero_gid, 16))
memcpy(mgm->gid, gid, 16);
} else {
link = 1;
index = mlx4_bitmap_alloc(&priv->mcg_table.bitmap);
if (index == -1) {
mlx4_err(dev, "No AMGM entries left\n");
err = -ENOMEM;
goto out;
}
index += dev->caps.num_mgms;
err = mlx4_READ_MCG(dev, index, mailbox);
if (err)
goto out;
memset(mgm, 0, sizeof *mgm);
memcpy(mgm->gid, gid, 16);
}
members_count = be32_to_cpu(mgm->members_count);
if (members_count == MLX4_QP_PER_MGM) {
mlx4_err(dev, "MGM at index %x is full.\n", index);
err = -ENOMEM;
goto out;
}
for (i = 0; i < members_count; ++i)
if (mgm->qp[i] == cpu_to_be32(qp->qpn)) {
mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn);
err = 0;
goto out;
}
mgm->qp[members_count++] = cpu_to_be32(qp->qpn);
mgm->members_count = cpu_to_be32(members_count);
err = mlx4_WRITE_MCG(dev, index, mailbox);
if (err)
goto out;
if (!link)
goto out;
err = mlx4_READ_MCG(dev, prev, mailbox);
if (err)
goto out;
mgm->next_gid_index = cpu_to_be32(index << 6);
err = mlx4_WRITE_MCG(dev, prev, mailbox);
if (err)
goto out;
out:
if (err && link && index != -1) {
if (index < dev->caps.num_mgms)
mlx4_warn(dev, "Got AMGM index %d < %d",
index, dev->caps.num_mgms);
else
mlx4_bitmap_free(&priv->mcg_table.bitmap,
index - dev->caps.num_mgms);
}
mutex_unlock(&priv->mcg_table.mutex);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mgm *mgm;
u32 members_count;
u16 hash;
int prev, index;
int i, loc;
int err;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
mgm = mailbox->buf;
mutex_lock(&priv->mcg_table.mutex);
err = find_mgm(dev, gid, mailbox, &hash, &prev, &index);
if (err)
goto out;
if (index == -1) {
mlx4_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
"not found\n",
be16_to_cpu(((__be16 *) gid)[0]),
be16_to_cpu(((__be16 *) gid)[1]),
be16_to_cpu(((__be16 *) gid)[2]),
be16_to_cpu(((__be16 *) gid)[3]),
be16_to_cpu(((__be16 *) gid)[4]),
be16_to_cpu(((__be16 *) gid)[5]),
be16_to_cpu(((__be16 *) gid)[6]),
be16_to_cpu(((__be16 *) gid)[7]));
err = -EINVAL;
goto out;
}
members_count = be32_to_cpu(mgm->members_count);
for (loc = -1, i = 0; i < members_count; ++i)
if (mgm->qp[i] == cpu_to_be32(qp->qpn))
loc = i;
if (loc == -1) {
mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn);
err = -EINVAL;
goto out;
}
mgm->members_count = cpu_to_be32(--members_count);
mgm->qp[loc] = mgm->qp[i - 1];
mgm->qp[i - 1] = 0;
err = mlx4_WRITE_MCG(dev, index, mailbox);
if (err)
goto out;
if (i != 1)
goto out;
if (prev == -1) {
/* Remove entry from MGM */
int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6;
if (amgm_index) {
err = mlx4_READ_MCG(dev, amgm_index, mailbox);
if (err)
goto out;
} else
memset(mgm->gid, 0, 16);
err = mlx4_WRITE_MCG(dev, index, mailbox);
if (err)
goto out;
if (amgm_index) {
if (amgm_index < dev->caps.num_mgms)
mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d",
index, amgm_index, dev->caps.num_mgms);
else
mlx4_bitmap_free(&priv->mcg_table.bitmap,
amgm_index - dev->caps.num_mgms);
}
} else {
/* Remove entry from AMGM */
int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
err = mlx4_READ_MCG(dev, prev, mailbox);
if (err)
goto out;
mgm->next_gid_index = cpu_to_be32(cur_next_index << 6);
err = mlx4_WRITE_MCG(dev, prev, mailbox);
if (err)
goto out;
if (index < dev->caps.num_mgms)
mlx4_warn(dev, "entry %d had next AMGM index %d < %d",
prev, index, dev->caps.num_mgms);
else
mlx4_bitmap_free(&priv->mcg_table.bitmap,
index - dev->caps.num_mgms);
}
out:
mutex_unlock(&priv->mcg_table.mutex);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
int __devinit mlx4_init_mcg_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
err = mlx4_bitmap_init(&priv->mcg_table.bitmap,
dev->caps.num_amgms, dev->caps.num_amgms - 1, 0);
if (err)
return err;
mutex_init(&priv->mcg_table.mutex);
return 0;
}
void mlx4_cleanup_mcg_table(struct mlx4_dev *dev)
{
mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap);
}
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_H
#define MLX4_H
#include <linux/radix-tree.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
#define DRV_NAME "mlx4_core"
#define PFX DRV_NAME ": "
#define DRV_VERSION "0.01"
#define DRV_RELDATE "May 1, 2007"
enum {
MLX4_HCR_BASE = 0x80680,
MLX4_HCR_SIZE = 0x0001c,
MLX4_CLR_INT_SIZE = 0x00008
};
enum {
MLX4_BOARD_ID_LEN = 64
};
enum {
MLX4_MGM_ENTRY_SIZE = 0x40,
MLX4_QP_PER_MGM = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2),
MLX4_MTT_ENTRY_PER_SEG = 8
};
enum {
MLX4_EQ_ASYNC,
MLX4_EQ_COMP,
MLX4_EQ_CATAS,
MLX4_NUM_EQ
};
enum {
MLX4_NUM_PDS = 1 << 15
};
enum {
MLX4_CMPT_TYPE_QP = 0,
MLX4_CMPT_TYPE_SRQ = 1,
MLX4_CMPT_TYPE_CQ = 2,
MLX4_CMPT_TYPE_EQ = 3,
MLX4_CMPT_NUM_TYPE
};
enum {
MLX4_CMPT_SHIFT = 24,
MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT
};
#ifdef CONFIG_MLX4_DEBUG
extern int mlx4_debug_level;
#define mlx4_dbg(mdev, format, arg...) \
do { \
if (mlx4_debug_level) \
dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
} while (0)
#else /* CONFIG_MLX4_DEBUG */
#define mlx4_dbg(mdev, format, arg...) do { (void) mdev; } while (0)
#endif /* CONFIG_MLX4_DEBUG */
#define mlx4_err(mdev, format, arg...) \
dev_err(&mdev->pdev->dev, format, ## arg)
#define mlx4_info(mdev, format, arg...) \
dev_info(&mdev->pdev->dev, format, ## arg)
#define mlx4_warn(mdev, format, arg...) \
dev_warn(&mdev->pdev->dev, format, ## arg)
struct mlx4_bitmap {
u32 last;
u32 top;
u32 max;
u32 mask;
spinlock_t lock;
unsigned long *table;
};
struct mlx4_buddy {
unsigned long **bits;
int max_order;
spinlock_t lock;
};
struct mlx4_icm;
struct mlx4_icm_table {
u64 virt;
int num_icm;
int num_obj;
int obj_size;
int lowmem;
struct mutex mutex;
struct mlx4_icm **icm;
};
struct mlx4_eq {
struct mlx4_dev *dev;
void __iomem *doorbell;
int eqn;
u32 cons_index;
u16 irq;
u16 have_irq;
int nent;
struct mlx4_buf_list *page_list;
struct mlx4_mtt mtt;
};
struct mlx4_profile {
int num_qp;
int rdmarc_per_qp;
int num_srq;
int num_cq;
int num_mcg;
int num_mpt;
int num_mtt;
};
struct mlx4_fw {
u64 clr_int_base;
u64 catas_offset;
struct mlx4_icm *fw_icm;
struct mlx4_icm *aux_icm;
u32 catas_size;
u16 fw_pages;
u8 clr_int_bar;
u8 catas_bar;
};
struct mlx4_cmd {
struct pci_pool *pool;
void __iomem *hcr;
struct mutex hcr_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
int max_cmds;
spinlock_t context_lock;
int free_head;
struct mlx4_cmd_context *context;
u16 token_mask;
u8 use_events;
u8 toggle;
};
struct mlx4_uar_table {
struct mlx4_bitmap bitmap;
};
struct mlx4_mr_table {
struct mlx4_bitmap mpt_bitmap;
struct mlx4_buddy mtt_buddy;
u64 mtt_base;
u64 mpt_base;
struct mlx4_icm_table mtt_table;
struct mlx4_icm_table dmpt_table;
};
struct mlx4_cq_table {
struct mlx4_bitmap bitmap;
spinlock_t lock;
struct radix_tree_root tree;
struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
};
struct mlx4_eq_table {
struct mlx4_bitmap bitmap;
void __iomem *clr_int;
void __iomem *uar_map[(MLX4_NUM_EQ + 6) / 4];
u32 clr_mask;
struct mlx4_eq eq[MLX4_NUM_EQ];
u64 icm_virt;
struct page *icm_page;
dma_addr_t icm_dma;
struct mlx4_icm_table cmpt_table;
int have_irq;
u8 inta_pin;
};
struct mlx4_srq_table {
struct mlx4_bitmap bitmap;
spinlock_t lock;
struct radix_tree_root tree;
struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
};
struct mlx4_qp_table {
struct mlx4_bitmap bitmap;
u32 rdmarc_base;
int rdmarc_shift;
spinlock_t lock;
struct mlx4_icm_table qp_table;
struct mlx4_icm_table auxc_table;
struct mlx4_icm_table altc_table;
struct mlx4_icm_table rdmarc_table;
struct mlx4_icm_table cmpt_table;
};
struct mlx4_mcg_table {
struct mutex mutex;
struct mlx4_bitmap bitmap;
struct mlx4_icm_table table;
};
struct mlx4_catas_err {
u32 __iomem *map;
int size;
};
struct mlx4_priv {
struct mlx4_dev dev;
struct list_head dev_list;
struct list_head ctx_list;
spinlock_t ctx_lock;
struct mlx4_fw fw;
struct mlx4_cmd cmd;
struct mlx4_bitmap pd_bitmap;
struct mlx4_uar_table uar_table;
struct mlx4_mr_table mr_table;
struct mlx4_cq_table cq_table;
struct mlx4_eq_table eq_table;
struct mlx4_srq_table srq_table;
struct mlx4_qp_table qp_table;
struct mlx4_mcg_table mcg_table;
struct mlx4_catas_err catas_err;
void __iomem *clr_base;
struct mlx4_uar driver_uar;
void __iomem *kar;
MLX4_DECLARE_DOORBELL_LOCK(doorbell_lock)
u32 rev_id;
char board_id[MLX4_BOARD_ID_LEN];
};
static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
{
return container_of(dev, struct mlx4_priv, dev);
}
u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj);
int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved);
void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);
int mlx4_reset(struct mlx4_dev *dev);
int mlx4_init_pd_table(struct mlx4_dev *dev);
int mlx4_init_uar_table(struct mlx4_dev *dev);
int mlx4_init_mr_table(struct mlx4_dev *dev);
int mlx4_init_eq_table(struct mlx4_dev *dev);
int mlx4_init_cq_table(struct mlx4_dev *dev);
int mlx4_init_qp_table(struct mlx4_dev *dev);
int mlx4_init_srq_table(struct mlx4_dev *dev);
int mlx4_init_mcg_table(struct mlx4_dev *dev);
void mlx4_cleanup_pd_table(struct mlx4_dev *dev);
void mlx4_cleanup_uar_table(struct mlx4_dev *dev);
void mlx4_cleanup_mr_table(struct mlx4_dev *dev);
void mlx4_cleanup_eq_table(struct mlx4_dev *dev);
void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
void mlx4_map_catas_buf(struct mlx4_dev *dev);
void mlx4_unmap_catas_buf(struct mlx4_dev *dev);
int mlx4_register_device(struct mlx4_dev *dev);
void mlx4_unregister_device(struct mlx4_dev *dev);
void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type,
int subtype, int port);
struct mlx4_dev_cap;
struct mlx4_init_hca_param;
u64 mlx4_make_profile(struct mlx4_dev *dev,
struct mlx4_profile *request,
struct mlx4_dev_cap *dev_cap,
struct mlx4_init_hca_param *init_hca);
int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt);
void mlx4_unmap_eq_icm(struct mlx4_dev *dev);
int mlx4_cmd_init(struct mlx4_dev *dev);
void mlx4_cmd_cleanup(struct mlx4_dev *dev);
void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
int mlx4_cmd_use_events(struct mlx4_dev *dev);
void mlx4_cmd_use_polling(struct mlx4_dev *dev);
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);
void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
void mlx4_handle_catas_err(struct mlx4_dev *dev);
#endif /* MLX4_H */
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/mlx4/cmd.h>
#include "mlx4.h"
#include "icm.h"
/*
* Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
*/
struct mlx4_mpt_entry {
__be32 flags;
__be32 qpn;
__be32 key;
__be32 pd;
__be64 start;
__be64 length;
__be32 lkey;
__be32 win_cnt;
u8 reserved1[3];
u8 mtt_rep;
__be64 mtt_seg;
__be32 mtt_sz;
__be32 entity_size;
__be32 first_byte_offset;
} __attribute__((packed));
#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
#define MLX4_MPT_FLAG_MIO (1 << 17)
#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15)
#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
#define MLX4_MPT_FLAG_REGION (1 << 8)
#define MLX4_MTT_FLAG_PRESENT 1
static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
{
int o;
int m;
u32 seg;
spin_lock(&buddy->lock);
for (o = order; o <= buddy->max_order; ++o) {
m = 1 << (buddy->max_order - o);
seg = find_first_bit(buddy->bits[o], m);
if (seg < m)
goto found;
}
spin_unlock(&buddy->lock);
return -1;
found:
clear_bit(seg, buddy->bits[o]);
while (o > order) {
--o;
seg <<= 1;
set_bit(seg ^ 1, buddy->bits[o]);
}
spin_unlock(&buddy->lock);
seg <<= order;
return seg;
}
static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order)
{
seg >>= order;
spin_lock(&buddy->lock);
while (test_bit(seg ^ 1, buddy->bits[order])) {
clear_bit(seg ^ 1, buddy->bits[order]);
seg >>= 1;
++order;
}
set_bit(seg, buddy->bits[order]);
spin_unlock(&buddy->lock);
}
static int __devinit mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
{
int i, s;
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
GFP_KERNEL);
if (!buddy->bits)
goto err_out;
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
if (!buddy->bits[i])
goto err_out_free;
bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
}
set_bit(0, buddy->bits[buddy->max_order]);
return 0;
err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
kfree(buddy->bits[i]);
kfree(buddy->bits);
err_out:
return -ENOMEM;
}
static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
{
int i;
for (i = 0; i <= buddy->max_order; ++i)
kfree(buddy->bits[i]);
kfree(buddy->bits);
}
static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
u32 seg;
seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, order);
if (seg == -1)
return -1;
if (mlx4_table_get_range(dev, &mr_table->mtt_table, seg,
seg + (1 << order) - 1)) {
mlx4_buddy_free(&mr_table->mtt_buddy, seg, order);
return -1;
}
return seg;
}
int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
struct mlx4_mtt *mtt)
{
int i;
if (!npages) {
mtt->order = -1;
mtt->page_shift = MLX4_ICM_PAGE_SHIFT;
return 0;
} else
mtt->page_shift = page_shift;
for (mtt->order = 0, i = MLX4_MTT_ENTRY_PER_SEG; i < npages; i <<= 1)
++mtt->order;
mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order);
if (mtt->first_seg == -1)
return -ENOMEM;
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_mtt_init);
void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
if (mtt->order < 0)
return;
mlx4_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order);
mlx4_table_put_range(dev, &mr_table->mtt_table, mtt->first_seg,
mtt->first_seg + (1 << mtt->order) - 1);
}
EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
{
return (u64) mtt->first_seg * dev->caps.mtt_entry_sz;
}
EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
static u32 hw_index_to_key(u32 ind)
{
return (ind >> 24) | (ind << 8);
}
static u32 key_to_hw_index(u32 key)
{
return (key << 24) | (key >> 8);
}
static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int mpt_index)
{
return mlx4_cmd(dev, mailbox->dma, mpt_index, 0, MLX4_CMD_SW2HW_MPT,
MLX4_CMD_TIME_CLASS_B);
}
static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int mpt_index)
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
!mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B);
}
int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
int npages, int page_shift, struct mlx4_mr *mr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
u32 index;
int err;
index = mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
if (index == -1) {
err = -ENOMEM;
goto err;
}
mr->iova = iova;
mr->size = size;
mr->pd = pd;
mr->access = access;
mr->enabled = 0;
mr->key = hw_index_to_key(index);
err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
if (err)
goto err_index;
return 0;
err_index:
mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);
err:
kfree(mr);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_mr_alloc);
void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
if (mr->enabled) {
err = mlx4_HW2SW_MPT(dev, NULL,
key_to_hw_index(mr->key) &
(dev->caps.num_mpts - 1));
if (err)
mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err);
}
mlx4_mtt_cleanup(dev, &mr->mtt);
mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, key_to_hw_index(mr->key));
}
EXPORT_SYMBOL_GPL(mlx4_mr_free);
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mpt_entry *mpt_entry;
int err;
err = mlx4_table_get(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
if (err)
return err;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
err = PTR_ERR(mailbox);
goto err_table;
}
mpt_entry = mailbox->buf;
memset(mpt_entry, 0, sizeof *mpt_entry);
mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS |
MLX4_MPT_FLAG_MIO |
MLX4_MPT_FLAG_REGION |
mr->access);
if (mr->mtt.order < 0)
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key));
mpt_entry->pd = cpu_to_be32(mr->pd);
mpt_entry->start = cpu_to_be64(mr->iova);
mpt_entry->length = cpu_to_be64(mr->size);
mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
err = mlx4_SW2HW_MPT(dev, mailbox,
key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
if (err) {
mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
goto err_cmd;
}
mr->enabled = 1;
mlx4_free_cmd_mailbox(dev, mailbox);
return 0;
err_cmd:
mlx4_free_cmd_mailbox(dev, mailbox);
err_table:
mlx4_table_put(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
return err;
}
EXPORT_SYMBOL_GPL(mlx4_mr_enable);
static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int num_mtt)
{
return mlx4_cmd(dev, mailbox->dma, num_mtt, 0, MLX4_CMD_WRITE_MTT,
MLX4_CMD_TIME_CLASS_B);
}
int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int start_index, int npages, u64 *page_list)
{
struct mlx4_cmd_mailbox *mailbox;
__be64 *mtt_entry;
int i;
int err = 0;
if (mtt->order < 0)
return -EINVAL;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
mtt_entry = mailbox->buf;
while (npages > 0) {
mtt_entry[0] = cpu_to_be64(mlx4_mtt_addr(dev, mtt) + start_index * 8);
mtt_entry[1] = 0;
for (i = 0; i < npages && i < MLX4_MAILBOX_SIZE / 8 - 2; ++i)
mtt_entry[i + 2] = cpu_to_be64(page_list[i] |
MLX4_MTT_FLAG_PRESENT);
/*
* If we have an odd number of entries to write, add
* one more dummy entry for firmware efficiency.
*/
if (i & 1)
mtt_entry[i + 2] = 0;
err = mlx4_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
if (err)
goto out;
npages -= i;
start_index += i;
page_list += i;
}
out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_write_mtt);
int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
struct mlx4_buf *buf)
{
u64 *page_list;
int err;
int i;
page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL);
if (!page_list)
return -ENOMEM;
for (i = 0; i < buf->npages; ++i)
if (buf->nbufs == 1)
page_list[i] = buf->u.direct.map + (i << buf->page_shift);
else
page_list[i] = buf->u.page_list[i].map;
err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list);
kfree(page_list);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
int __devinit mlx4_init_mr_table(struct mlx4_dev *dev)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
int err;
err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts,
~0, dev->caps.reserved_mrws);
if (err)
return err;
err = mlx4_buddy_init(&mr_table->mtt_buddy,
ilog2(dev->caps.num_mtt_segs));
if (err)
goto err_buddy;
if (dev->caps.reserved_mtts) {
if (mlx4_alloc_mtt_range(dev, ilog2(dev->caps.reserved_mtts)) == -1) {
mlx4_warn(dev, "MTT table of order %d is too small.\n",
mr_table->mtt_buddy.max_order);
err = -ENOMEM;
goto err_reserve_mtts;
}
}
return 0;
err_reserve_mtts:
mlx4_buddy_cleanup(&mr_table->mtt_buddy);
err_buddy:
mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
return err;
}
void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
mlx4_buddy_cleanup(&mr_table->mtt_buddy);
mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
}
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/errno.h>
#include <asm/page.h>
#include "mlx4.h"
#include "icm.h"
int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn)
{
struct mlx4_priv *priv = mlx4_priv(dev);
*pdn = mlx4_bitmap_alloc(&priv->pd_bitmap);
if (*pdn == -1)
return -ENOMEM;
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_pd_alloc);
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
{
mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn);
}
EXPORT_SYMBOL_GPL(mlx4_pd_free);
int __devinit mlx4_init_pd_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds,
(1 << 24) - 1, dev->caps.reserved_pds);
}
void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
{
mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap);
}
int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
{
uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap);
if (uar->index == -1)
return -ENOMEM;
uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_uar_alloc);
void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
{
mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index);
}
EXPORT_SYMBOL_GPL(mlx4_uar_free);
int mlx4_init_uar_table(struct mlx4_dev *dev)
{
return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
dev->caps.num_uars, dev->caps.num_uars - 1,
max(128, dev->caps.reserved_uars));
}
void mlx4_cleanup_uar_table(struct mlx4_dev *dev)
{
mlx4_bitmap_cleanup(&mlx4_priv(dev)->uar_table.bitmap);
}
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include "mlx4.h"
#include "fw.h"
enum {
MLX4_RES_QP,
MLX4_RES_RDMARC,
MLX4_RES_ALTC,
MLX4_RES_AUXC,
MLX4_RES_SRQ,
MLX4_RES_CQ,
MLX4_RES_EQ,
MLX4_RES_DMPT,
MLX4_RES_CMPT,
MLX4_RES_MTT,
MLX4_RES_MCG,
MLX4_RES_NUM
};
static const char *res_name[] = {
[MLX4_RES_QP] = "QP",
[MLX4_RES_RDMARC] = "RDMARC",
[MLX4_RES_ALTC] = "ALTC",
[MLX4_RES_AUXC] = "AUXC",
[MLX4_RES_SRQ] = "SRQ",
[MLX4_RES_CQ] = "CQ",
[MLX4_RES_EQ] = "EQ",
[MLX4_RES_DMPT] = "DMPT",
[MLX4_RES_CMPT] = "CMPT",
[MLX4_RES_MTT] = "MTT",
[MLX4_RES_MCG] = "MCG",
};
u64 mlx4_make_profile(struct mlx4_dev *dev,
struct mlx4_profile *request,
struct mlx4_dev_cap *dev_cap,
struct mlx4_init_hca_param *init_hca)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_resource {
u64 size;
u64 start;
int type;
int num;
int log_num;
};
u64 total_size = 0;
struct mlx4_resource *profile;
struct mlx4_resource tmp;
int i, j;
profile = kzalloc(MLX4_RES_NUM * sizeof *profile, GFP_KERNEL);
if (!profile)
return -ENOMEM;
profile[MLX4_RES_QP].size = dev_cap->qpc_entry_sz;
profile[MLX4_RES_RDMARC].size = dev_cap->rdmarc_entry_sz;
profile[MLX4_RES_ALTC].size = dev_cap->altc_entry_sz;
profile[MLX4_RES_AUXC].size = dev_cap->aux_entry_sz;
profile[MLX4_RES_SRQ].size = dev_cap->srq_entry_sz;
profile[MLX4_RES_CQ].size = dev_cap->cqc_entry_sz;
profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz;
profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz;
profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz;
profile[MLX4_RES_MTT].size = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE;
profile[MLX4_RES_QP].num = request->num_qp;
profile[MLX4_RES_RDMARC].num = request->num_qp * request->rdmarc_per_qp;
profile[MLX4_RES_ALTC].num = request->num_qp;
profile[MLX4_RES_AUXC].num = request->num_qp;
profile[MLX4_RES_SRQ].num = request->num_srq;
profile[MLX4_RES_CQ].num = request->num_cq;
profile[MLX4_RES_EQ].num = MLX4_NUM_EQ + dev_cap->reserved_eqs;
profile[MLX4_RES_DMPT].num = request->num_mpt;
profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS;
profile[MLX4_RES_MTT].num = request->num_mtt;
profile[MLX4_RES_MCG].num = request->num_mcg;
for (i = 0; i < MLX4_RES_NUM; ++i) {
profile[i].type = i;
profile[i].num = roundup_pow_of_two(profile[i].num);
profile[i].log_num = ilog2(profile[i].num);
profile[i].size *= profile[i].num;
profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
}
/*
* Sort the resources in decreasing order of size. Since they
* all have sizes that are powers of 2, we'll be able to keep
* resources aligned to their size and pack them without gaps
* using the sorted order.
*/
for (i = MLX4_RES_NUM; i > 0; --i)
for (j = 1; j < i; ++j) {
if (profile[j].size > profile[j - 1].size) {
tmp = profile[j];
profile[j] = profile[j - 1];
profile[j - 1] = tmp;
}
}
for (i = 0; i < MLX4_RES_NUM; ++i) {
if (profile[i].size) {
profile[i].start = total_size;
total_size += profile[i].size;
}
if (total_size > dev_cap->max_icm_sz) {
mlx4_err(dev, "Profile requires 0x%llx bytes; "
"won't fit in 0x%llx bytes of context memory.\n",
(unsigned long long) total_size,
(unsigned long long) dev_cap->max_icm_sz);
kfree(profile);
return -ENOMEM;
}
if (profile[i].size)
mlx4_dbg(dev, " profile[%2d] (%6s): 2^%02d entries @ 0x%10llx, "
"size 0x%10llx\n",
i, res_name[profile[i].type], profile[i].log_num,
(unsigned long long) profile[i].start,
(unsigned long long) profile[i].size);
}
mlx4_dbg(dev, "HCA context memory: reserving %d KB\n",
(int) (total_size >> 10));
for (i = 0; i < MLX4_RES_NUM; ++i) {
switch (profile[i].type) {
case MLX4_RES_QP:
dev->caps.num_qps = profile[i].num;
init_hca->qpc_base = profile[i].start;
init_hca->log_num_qps = profile[i].log_num;
break;
case MLX4_RES_RDMARC:
for (priv->qp_table.rdmarc_shift = 0;
request->num_qp << priv->qp_table.rdmarc_shift < profile[i].num;
++priv->qp_table.rdmarc_shift)
; /* nothing */
dev->caps.max_qp_dest_rdma = 1 << priv->qp_table.rdmarc_shift;
priv->qp_table.rdmarc_base = (u32) profile[i].start;
init_hca->rdmarc_base = profile[i].start;
init_hca->log_rd_per_qp = priv->qp_table.rdmarc_shift;
break;
case MLX4_RES_ALTC:
init_hca->altc_base = profile[i].start;
break;
case MLX4_RES_AUXC:
init_hca->auxc_base = profile[i].start;
break;
case MLX4_RES_SRQ:
dev->caps.num_srqs = profile[i].num;
init_hca->srqc_base = profile[i].start;
init_hca->log_num_srqs = profile[i].log_num;
break;
case MLX4_RES_CQ:
dev->caps.num_cqs = profile[i].num;
init_hca->cqc_base = profile[i].start;
init_hca->log_num_cqs = profile[i].log_num;
break;
case MLX4_RES_EQ:
dev->caps.num_eqs = profile[i].num;
init_hca->eqc_base = profile[i].start;
init_hca->log_num_eqs = profile[i].log_num;
break;
case MLX4_RES_DMPT:
dev->caps.num_mpts = profile[i].num;
priv->mr_table.mpt_base = profile[i].start;
init_hca->dmpt_base = profile[i].start;
init_hca->log_mpt_sz = profile[i].log_num;
break;
case MLX4_RES_CMPT:
init_hca->cmpt_base = profile[i].start;
break;
case MLX4_RES_MTT:
dev->caps.num_mtt_segs = profile[i].num;
priv->mr_table.mtt_base = profile[i].start;
init_hca->mtt_base = profile[i].start;
break;
case MLX4_RES_MCG:
dev->caps.num_mgms = profile[i].num >> 1;
dev->caps.num_amgms = profile[i].num >> 1;
init_hca->mc_base = profile[i].start;
init_hca->log_mc_entry_sz = ilog2(MLX4_MGM_ENTRY_SIZE);
init_hca->log_mc_table_sz = profile[i].log_num;
init_hca->log_mc_hash_sz = profile[i].log_num - 1;
break;
default:
break;
}
}
/*
* PDs don't take any HCA memory, but we assign them as part
* of the HCA profile anyway.
*/
dev->caps.num_pds = MLX4_NUM_PDS;
kfree(profile);
return total_size;
}
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/mlx4/cmd.h>
#include <linux/mlx4/qp.h>
#include "mlx4.h"
#include "icm.h"
void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
struct mlx4_qp *qp;
spin_lock(&qp_table->lock);
qp = __mlx4_qp_lookup(dev, qpn);
if (qp)
atomic_inc(&qp->refcount);
spin_unlock(&qp_table->lock);
if (!qp) {
mlx4_warn(dev, "Async event for bogus QP %08x\n", qpn);
return;
}
qp->event(qp, event_type);
if (atomic_dec_and_test(&qp->refcount))
complete(&qp->free);
}
int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
int sqd_event, struct mlx4_qp *qp)
{
static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = {
[MLX4_QP_STATE_RST] = {
[MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
[MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
[MLX4_QP_STATE_INIT] = MLX4_CMD_RST2INIT_QP,
},
[MLX4_QP_STATE_INIT] = {
[MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
[MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
[MLX4_QP_STATE_INIT] = MLX4_CMD_INIT2INIT_QP,
[MLX4_QP_STATE_RTR] = MLX4_CMD_INIT2RTR_QP,
},
[MLX4_QP_STATE_RTR] = {
[MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
[MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
[MLX4_QP_STATE_RTS] = MLX4_CMD_RTR2RTS_QP,
},
[MLX4_QP_STATE_RTS] = {
[MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
[MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
[MLX4_QP_STATE_RTS] = MLX4_CMD_RTS2RTS_QP,
[MLX4_QP_STATE_SQD] = MLX4_CMD_RTS2SQD_QP,
},
[MLX4_QP_STATE_SQD] = {
[MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
[MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
[MLX4_QP_STATE_RTS] = MLX4_CMD_SQD2RTS_QP,
[MLX4_QP_STATE_SQD] = MLX4_CMD_SQD2SQD_QP,
},
[MLX4_QP_STATE_SQER] = {
[MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
[MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
[MLX4_QP_STATE_RTS] = MLX4_CMD_SQERR2RTS_QP,
},
[MLX4_QP_STATE_ERR] = {
[MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
[MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
}
};
struct mlx4_cmd_mailbox *mailbox;
int ret = 0;
if (cur_state < 0 || cur_state >= MLX4_QP_NUM_STATE ||
new_state < 0 || cur_state >= MLX4_QP_NUM_STATE ||
!op[cur_state][new_state])
return -EINVAL;
if (op[cur_state][new_state] == MLX4_CMD_2RST_QP)
return mlx4_cmd(dev, 0, qp->qpn, 2,
MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A);
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
if (cur_state == MLX4_QP_STATE_RST && new_state == MLX4_QP_STATE_INIT) {
u64 mtt_addr = mlx4_mtt_addr(dev, mtt);
context->mtt_base_addr_h = mtt_addr >> 32;
context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
}
*(__be32 *) mailbox->buf = cpu_to_be32(optpar);
memcpy(mailbox->buf + 8, context, sizeof *context);
((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn =
cpu_to_be32(qp->qpn);
ret = mlx4_cmd(dev, mailbox->dma, qp->qpn | (!!sqd_event << 31),
new_state == MLX4_QP_STATE_RST ? 2 : 0,
op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C);
mlx4_free_cmd_mailbox(dev, mailbox);
return ret;
}
EXPORT_SYMBOL_GPL(mlx4_qp_modify);
int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
int err;
if (sqpn)
qp->qpn = sqpn;
else {
qp->qpn = mlx4_bitmap_alloc(&qp_table->bitmap);
if (qp->qpn == -1)
return -ENOMEM;
}
err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn);
if (err)
goto err_out;
err = mlx4_table_get(dev, &qp_table->auxc_table, qp->qpn);
if (err)
goto err_put_qp;
err = mlx4_table_get(dev, &qp_table->altc_table, qp->qpn);
if (err)
goto err_put_auxc;
err = mlx4_table_get(dev, &qp_table->rdmarc_table, qp->qpn);
if (err)
goto err_put_altc;
err = mlx4_table_get(dev, &qp_table->cmpt_table, qp->qpn);
if (err)
goto err_put_rdmarc;
spin_lock_irq(&qp_table->lock);
err = radix_tree_insert(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1), qp);
spin_unlock_irq(&qp_table->lock);
if (err)
goto err_put_cmpt;
atomic_set(&qp->refcount, 1);
init_completion(&qp->free);
return 0;
err_put_cmpt:
mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
err_put_rdmarc:
mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
err_put_altc:
mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
err_put_auxc:
mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
err_put_qp:
mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
err_out:
if (!sqpn)
mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
unsigned long flags;
spin_lock_irqsave(&qp_table->lock, flags);
radix_tree_delete(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1));
spin_unlock_irqrestore(&qp_table->lock, flags);
}
EXPORT_SYMBOL_GPL(mlx4_qp_remove);
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
if (atomic_dec_and_test(&qp->refcount))
complete(&qp->free);
wait_for_completion(&qp->free);
mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
}
EXPORT_SYMBOL_GPL(mlx4_qp_free);
static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
{
return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
MLX4_CMD_TIME_CLASS_B);
}
int __devinit mlx4_init_qp_table(struct mlx4_dev *dev)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
int err;
spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
/*
* We reserve 2 extra QPs per port for the special QPs. The
* block of special QPs must be aligned to a multiple of 8, so
* round up.
*/
dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps, 8);
err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
(1 << 24) - 1, dev->caps.sqp_start + 8);
if (err)
return err;
return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start);
}
void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
{
mlx4_CONF_SPECIAL_QP(dev, 0);
mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
}
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include "mlx4.h"
int mlx4_reset(struct mlx4_dev *dev)
{
void __iomem *reset;
u32 *hca_header = NULL;
int pcie_cap;
u16 devctl;
u16 linkctl;
u16 vendor;
unsigned long end;
u32 sem;
int i;
int err = 0;
#define MLX4_RESET_BASE 0xf0000
#define MLX4_RESET_SIZE 0x400
#define MLX4_SEM_OFFSET 0x3fc
#define MLX4_RESET_OFFSET 0x10
#define MLX4_RESET_VALUE swab32(1)
#define MLX4_SEM_TIMEOUT_JIFFIES (10 * HZ)
#define MLX4_RESET_TIMEOUT_JIFFIES (2 * HZ)
/*
* Reset the chip. This is somewhat ugly because we have to
* save off the PCI header before reset and then restore it
* after the chip reboots. We skip config space offsets 22
* and 23 since those have a special meaning.
*/
/* Do we need to save off the full 4K PCI Express header?? */
hca_header = kmalloc(256, GFP_KERNEL);
if (!hca_header) {
err = -ENOMEM;
mlx4_err(dev, "Couldn't allocate memory to save HCA "
"PCI header, aborting.\n");
goto out;
}
pcie_cap = pci_find_capability(dev->pdev, PCI_CAP_ID_EXP);
for (i = 0; i < 64; ++i) {
if (i == 22 || i == 23)
continue;
if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't save HCA "
"PCI header, aborting.\n");
goto out;
}
}
reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE,
MLX4_RESET_SIZE);
if (!reset) {
err = -ENOMEM;
mlx4_err(dev, "Couldn't map HCA reset register, aborting.\n");
goto out;
}
/* grab HW semaphore to lock out flash updates */
end = jiffies + MLX4_SEM_TIMEOUT_JIFFIES;
do {
sem = readl(reset + MLX4_SEM_OFFSET);
if (!sem)
break;
msleep(1);
} while (time_before(jiffies, end));
if (sem) {
mlx4_err(dev, "Failed to obtain HW semaphore, aborting\n");
err = -EAGAIN;
iounmap(reset);
goto out;
}
/* actually hit reset */
writel(MLX4_RESET_VALUE, reset + MLX4_RESET_OFFSET);
iounmap(reset);
end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES;
do {
if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) &&
vendor != 0xffff)
break;
msleep(1);
} while (time_before(jiffies, end));
if (vendor == 0xffff) {
err = -ENODEV;
mlx4_err(dev, "PCI device did not come back after reset, "
"aborting.\n");
goto out;
}
/* Now restore the PCI headers */
if (pcie_cap) {
devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4];
if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_DEVCTL,
devctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express "
"Device Control register, aborting.\n");
goto out;
}
linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_LNKCTL,
linkctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express "
"Link control register, aborting.\n");
goto out;
}
}
for (i = 0; i < 16; ++i) {
if (i * 4 == PCI_COMMAND)
continue;
if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA reg %x, "
"aborting.\n", i);
goto out;
}
}
if (pci_write_config_dword(dev->pdev, PCI_COMMAND,
hca_header[PCI_COMMAND / 4])) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA COMMAND, "
"aborting.\n");
goto out;
}
out:
kfree(hca_header);
return err;
}
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/mlx4/cmd.h>
#include "mlx4.h"
#include "icm.h"
struct mlx4_srq_context {
__be32 state_logsize_srqn;
u8 logstride;
u8 reserved1[3];
u8 pg_offset;
u8 reserved2[3];
u32 reserved3;
u8 log_page_size;
u8 reserved4[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
__be32 pd;
__be16 limit_watermark;
__be16 wqe_cnt;
u16 reserved5;
__be16 wqe_counter;
u32 reserved6;
__be64 db_rec_addr;
};
void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
struct mlx4_srq *srq;
spin_lock(&srq_table->lock);
srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
if (srq)
atomic_inc(&srq->refcount);
spin_unlock(&srq_table->lock);
if (!srq) {
mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
return;
}
srq->event(srq, event_type);
if (atomic_dec_and_test(&srq->refcount))
complete(&srq->free);
}
static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int srq_num)
{
return mlx4_cmd(dev, mailbox->dma, srq_num, 0, MLX4_CMD_SW2HW_SRQ,
MLX4_CMD_TIME_CLASS_A);
}
static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int srq_num)
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ,
MLX4_CMD_TIME_CLASS_A);
}
static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
{
return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ,
MLX4_CMD_TIME_CLASS_B);
}
int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
u64 db_rec, struct mlx4_srq *srq)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_srq_context *srq_context;
u64 mtt_addr;
int err;
srq->srqn = mlx4_bitmap_alloc(&srq_table->bitmap);
if (srq->srqn == -1)
return -ENOMEM;
err = mlx4_table_get(dev, &srq_table->table, srq->srqn);
if (err)
goto err_out;
err = mlx4_table_get(dev, &srq_table->cmpt_table, srq->srqn);
if (err)
goto err_put;
spin_lock_irq(&srq_table->lock);
err = radix_tree_insert(&srq_table->tree, srq->srqn, srq);
spin_unlock_irq(&srq_table->lock);
if (err)
goto err_cmpt_put;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
err = PTR_ERR(mailbox);
goto err_radix;
}
srq_context = mailbox->buf;
memset(srq_context, 0, sizeof *srq_context);
srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) |
srq->srqn);
srq_context->logstride = srq->wqe_shift - 4;
srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
mtt_addr = mlx4_mtt_addr(dev, mtt);
srq_context->mtt_base_addr_h = mtt_addr >> 32;
srq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
srq_context->pd = cpu_to_be32(pdn);
srq_context->db_rec_addr = cpu_to_be64(db_rec);
err = mlx4_SW2HW_SRQ(dev, mailbox, srq->srqn);
mlx4_free_cmd_mailbox(dev, mailbox);
if (err)
goto err_radix;
atomic_set(&srq->refcount, 1);
init_completion(&srq->free);
return 0;
err_radix:
spin_lock_irq(&srq_table->lock);
radix_tree_delete(&srq_table->tree, srq->srqn);
spin_unlock_irq(&srq_table->lock);
err_cmpt_put:
mlx4_table_put(dev, &srq_table->cmpt_table, srq->srqn);
err_put:
mlx4_table_put(dev, &srq_table->table, srq->srqn);
err_out:
mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_srq_alloc);
void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
int err;
err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn);
if (err)
mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn);
spin_lock_irq(&srq_table->lock);
radix_tree_delete(&srq_table->tree, srq->srqn);
spin_unlock_irq(&srq_table->lock);
if (atomic_dec_and_test(&srq->refcount))
complete(&srq->free);
wait_for_completion(&srq->free);
mlx4_table_put(dev, &srq_table->table, srq->srqn);
mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
}
EXPORT_SYMBOL_GPL(mlx4_srq_free);
int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark)
{
return mlx4_ARM_SRQ(dev, srq->srqn, limit_watermark);
}
EXPORT_SYMBOL_GPL(mlx4_srq_arm);
int __devinit mlx4_init_srq_table(struct mlx4_dev *dev)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
int err;
spin_lock_init(&srq_table->lock);
INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC);
err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs,
dev->caps.num_srqs - 1, dev->caps.reserved_srqs);
if (err)
return err;
return 0;
}
void mlx4_cleanup_srq_table(struct mlx4_dev *dev)
{
mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap);
}
/*
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_CMD_H
#define MLX4_CMD_H
#include <linux/dma-mapping.h>
enum {
/* initialization and general commands */
MLX4_CMD_SYS_EN = 0x1,
MLX4_CMD_SYS_DIS = 0x2,
MLX4_CMD_MAP_FA = 0xfff,
MLX4_CMD_UNMAP_FA = 0xffe,
MLX4_CMD_RUN_FW = 0xff6,
MLX4_CMD_MOD_STAT_CFG = 0x34,
MLX4_CMD_QUERY_DEV_CAP = 0x3,
MLX4_CMD_QUERY_FW = 0x4,
MLX4_CMD_ENABLE_LAM = 0xff8,
MLX4_CMD_DISABLE_LAM = 0xff7,
MLX4_CMD_QUERY_DDR = 0x5,
MLX4_CMD_QUERY_ADAPTER = 0x6,
MLX4_CMD_INIT_HCA = 0x7,
MLX4_CMD_CLOSE_HCA = 0x8,
MLX4_CMD_INIT_PORT = 0x9,
MLX4_CMD_CLOSE_PORT = 0xa,
MLX4_CMD_QUERY_HCA = 0xb,
MLX4_CMD_SET_PORT = 0xc,
MLX4_CMD_ACCESS_DDR = 0x2e,
MLX4_CMD_MAP_ICM = 0xffa,
MLX4_CMD_UNMAP_ICM = 0xff9,
MLX4_CMD_MAP_ICM_AUX = 0xffc,
MLX4_CMD_UNMAP_ICM_AUX = 0xffb,
MLX4_CMD_SET_ICM_SIZE = 0xffd,
/* TPT commands */
MLX4_CMD_SW2HW_MPT = 0xd,
MLX4_CMD_QUERY_MPT = 0xe,
MLX4_CMD_HW2SW_MPT = 0xf,
MLX4_CMD_READ_MTT = 0x10,
MLX4_CMD_WRITE_MTT = 0x11,
MLX4_CMD_SYNC_TPT = 0x2f,
/* EQ commands */
MLX4_CMD_MAP_EQ = 0x12,
MLX4_CMD_SW2HW_EQ = 0x13,
MLX4_CMD_HW2SW_EQ = 0x14,
MLX4_CMD_QUERY_EQ = 0x15,
/* CQ commands */
MLX4_CMD_SW2HW_CQ = 0x16,
MLX4_CMD_HW2SW_CQ = 0x17,
MLX4_CMD_QUERY_CQ = 0x18,
MLX4_CMD_RESIZE_CQ = 0x2c,
/* SRQ commands */
MLX4_CMD_SW2HW_SRQ = 0x35,
MLX4_CMD_HW2SW_SRQ = 0x36,
MLX4_CMD_QUERY_SRQ = 0x37,
MLX4_CMD_ARM_SRQ = 0x40,
/* QP/EE commands */
MLX4_CMD_RST2INIT_QP = 0x19,
MLX4_CMD_INIT2RTR_QP = 0x1a,
MLX4_CMD_RTR2RTS_QP = 0x1b,
MLX4_CMD_RTS2RTS_QP = 0x1c,
MLX4_CMD_SQERR2RTS_QP = 0x1d,
MLX4_CMD_2ERR_QP = 0x1e,
MLX4_CMD_RTS2SQD_QP = 0x1f,
MLX4_CMD_SQD2SQD_QP = 0x38,
MLX4_CMD_SQD2RTS_QP = 0x20,
MLX4_CMD_2RST_QP = 0x21,
MLX4_CMD_QUERY_QP = 0x22,
MLX4_CMD_INIT2INIT_QP = 0x2d,
MLX4_CMD_SUSPEND_QP = 0x32,
MLX4_CMD_UNSUSPEND_QP = 0x33,
/* special QP and management commands */
MLX4_CMD_CONF_SPECIAL_QP = 0x23,
MLX4_CMD_MAD_IFC = 0x24,
/* multicast commands */
MLX4_CMD_READ_MCG = 0x25,
MLX4_CMD_WRITE_MCG = 0x26,
MLX4_CMD_MGID_HASH = 0x27,
/* miscellaneous commands */
MLX4_CMD_DIAG_RPRT = 0x30,
MLX4_CMD_NOP = 0x31,
/* debug commands */
MLX4_CMD_QUERY_DEBUG_MSG = 0x2a,
MLX4_CMD_SET_DEBUG_MSG = 0x2b,
};
enum {
MLX4_CMD_TIME_CLASS_A = 10000,
MLX4_CMD_TIME_CLASS_B = 10000,
MLX4_CMD_TIME_CLASS_C = 10000,
};
enum {
MLX4_MAILBOX_SIZE = 4096
};
struct mlx4_dev;
struct mlx4_cmd_mailbox {
void *buf;
dma_addr_t dma;
};
int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout);
/* Invoke a command with no output parameter */
static inline int mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u32 in_modifier,
u8 op_modifier, u16 op, unsigned long timeout)
{
return __mlx4_cmd(dev, in_param, NULL, 0, in_modifier,
op_modifier, op, timeout);
}
/* Invoke a command with an output mailbox */
static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param,
u32 in_modifier, u8 op_modifier, u16 op,
unsigned long timeout)
{
return __mlx4_cmd(dev, in_param, &out_param, 0, in_modifier,
op_modifier, op, timeout);
}
/*
* Invoke a command with an immediate output parameter (and copy the
* output into the caller's out_param pointer after the command
* executes).
*/
static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
u32 in_modifier, u8 op_modifier, u16 op,
unsigned long timeout)
{
return __mlx4_cmd(dev, in_param, out_param, 1, in_modifier,
op_modifier, op, timeout);
}
struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev);
void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox);
#endif /* MLX4_CMD_H */
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_CQ_H
#define MLX4_CQ_H
#include <linux/types.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
struct mlx4_cqe {
__be32 my_qpn;
__be32 immed_rss_invalid;
__be32 g_mlpath_rqpn;
u8 sl;
u8 reserved1;
__be16 rlid;
u32 reserved2;
__be32 byte_cnt;
__be16 wqe_index;
__be16 checksum;
u8 reserved3[3];
u8 owner_sr_opcode;
};
struct mlx4_err_cqe {
__be32 my_qpn;
u32 reserved1[5];
__be16 wqe_index;
u8 vendor_err_syndrome;
u8 syndrome;
u8 reserved2[3];
u8 owner_sr_opcode;
};
enum {
MLX4_CQE_OWNER_MASK = 0x80,
MLX4_CQE_IS_SEND_MASK = 0x40,
MLX4_CQE_OPCODE_MASK = 0x1f
};
enum {
MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01,
MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02,
MLX4_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04,
MLX4_CQE_SYNDROME_WR_FLUSH_ERR = 0x05,
MLX4_CQE_SYNDROME_MW_BIND_ERR = 0x06,
MLX4_CQE_SYNDROME_BAD_RESP_ERR = 0x10,
MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11,
MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12,
MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13,
MLX4_CQE_SYNDROME_REMOTE_OP_ERR = 0x14,
MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15,
MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16,
MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22,
};
static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd,
void __iomem *uar_page,
spinlock_t *doorbell_lock)
{
__be32 doorbell[2];
u32 sn;
u32 ci;
sn = cq->arm_sn & 3;
ci = cq->cons_index & 0xffffff;
*cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci);
/*
* Make sure that the doorbell record in host memory is
* written before ringing the doorbell via PCI MMIO.
*/
wmb();
doorbell[0] = cpu_to_be32(sn << 28 | cmd | cq->cqn);
doorbell[1] = cpu_to_be32(ci);
mlx4_write64(doorbell, uar_page + MLX4_CQ_DOORBELL, doorbell_lock);
}
static inline void mlx4_cq_set_ci(struct mlx4_cq *cq)
{
*cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff);
}
enum {
MLX4_CQ_DB_REQ_NOT_SOL = 1 << 24,
MLX4_CQ_DB_REQ_NOT = 2 << 24
};
#endif /* MLX4_CQ_H */
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_DEVICE_H
#define MLX4_DEVICE_H
#include <linux/pci.h>
#include <linux/completion.h>
#include <linux/radix-tree.h>
#include <asm/atomic.h>
enum {
MLX4_FLAG_MSI_X = 1 << 0,
};
enum {
MLX4_MAX_PORTS = 2
};
enum {
MLX4_DEV_CAP_FLAG_RC = 1 << 0,
MLX4_DEV_CAP_FLAG_UC = 1 << 1,
MLX4_DEV_CAP_FLAG_UD = 1 << 2,
MLX4_DEV_CAP_FLAG_SRQ = 1 << 6,
MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1 << 7,
MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8,
MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9,
MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16,
MLX4_DEV_CAP_FLAG_APM = 1 << 17,
MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18,
MLX4_DEV_CAP_FLAG_RAW_MCAST = 1 << 19,
MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1 << 20,
MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21
};
enum mlx4_event {
MLX4_EVENT_TYPE_COMP = 0x00,
MLX4_EVENT_TYPE_PATH_MIG = 0x01,
MLX4_EVENT_TYPE_COMM_EST = 0x02,
MLX4_EVENT_TYPE_SQ_DRAINED = 0x03,
MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13,
MLX4_EVENT_TYPE_SRQ_LIMIT = 0x14,
MLX4_EVENT_TYPE_CQ_ERROR = 0x04,
MLX4_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
MLX4_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
MLX4_EVENT_TYPE_PATH_MIG_FAILED = 0x07,
MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
MLX4_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11,
MLX4_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12,
MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08,
MLX4_EVENT_TYPE_PORT_CHANGE = 0x09,
MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f,
MLX4_EVENT_TYPE_ECC_DETECT = 0x0e,
MLX4_EVENT_TYPE_CMD = 0x0a
};
enum {
MLX4_PORT_CHANGE_SUBTYPE_DOWN = 1,
MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4
};
enum {
MLX4_PERM_LOCAL_READ = 1 << 10,
MLX4_PERM_LOCAL_WRITE = 1 << 11,
MLX4_PERM_REMOTE_READ = 1 << 12,
MLX4_PERM_REMOTE_WRITE = 1 << 13,
MLX4_PERM_ATOMIC = 1 << 14
};
enum {
MLX4_OPCODE_NOP = 0x00,
MLX4_OPCODE_SEND_INVAL = 0x01,
MLX4_OPCODE_RDMA_WRITE = 0x08,
MLX4_OPCODE_RDMA_WRITE_IMM = 0x09,
MLX4_OPCODE_SEND = 0x0a,
MLX4_OPCODE_SEND_IMM = 0x0b,
MLX4_OPCODE_LSO = 0x0e,
MLX4_OPCODE_RDMA_READ = 0x10,
MLX4_OPCODE_ATOMIC_CS = 0x11,
MLX4_OPCODE_ATOMIC_FA = 0x12,
MLX4_OPCODE_ATOMIC_MASK_CS = 0x14,
MLX4_OPCODE_ATOMIC_MASK_FA = 0x15,
MLX4_OPCODE_BIND_MW = 0x18,
MLX4_OPCODE_FMR = 0x19,
MLX4_OPCODE_LOCAL_INVAL = 0x1b,
MLX4_OPCODE_CONFIG_CMD = 0x1f,
MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
MLX4_RECV_OPCODE_SEND = 0x01,
MLX4_RECV_OPCODE_SEND_IMM = 0x02,
MLX4_RECV_OPCODE_SEND_INVAL = 0x03,
MLX4_CQE_OPCODE_ERROR = 0x1e,
MLX4_CQE_OPCODE_RESIZE = 0x16,
};
enum {
MLX4_STAT_RATE_OFFSET = 5
};
struct mlx4_caps {
u64 fw_ver;
int num_ports;
int vl_cap;
int mtu_cap;
int gid_table_len;
int pkey_table_len;
int local_ca_ack_delay;
int num_uars;
int bf_reg_size;
int bf_regs_per_page;
int max_sq_sg;
int max_rq_sg;
int num_qps;
int max_wqes;
int max_sq_desc_sz;
int max_rq_desc_sz;
int max_qp_init_rdma;
int max_qp_dest_rdma;
int reserved_qps;
int sqp_start;
int num_srqs;
int max_srq_wqes;
int max_srq_sge;
int reserved_srqs;
int num_cqs;
int max_cqes;
int reserved_cqs;
int num_eqs;
int reserved_eqs;
int num_mpts;
int num_mtt_segs;
int fmr_reserved_mtts;
int reserved_mtts;
int reserved_mrws;
int reserved_uars;
int num_mgms;
int num_amgms;
int reserved_mcgs;
int num_qp_per_mgm;
int num_pds;
int reserved_pds;
int mtt_entry_sz;
u32 page_size_cap;
u32 flags;
u16 stat_rate_support;
u8 port_width_cap;
};
struct mlx4_buf_list {
void *buf;
dma_addr_t map;
};
struct mlx4_buf {
union {
struct mlx4_buf_list direct;
struct mlx4_buf_list *page_list;
} u;
int nbufs;
int npages;
int page_shift;
};
struct mlx4_mtt {
u32 first_seg;
int order;
int page_shift;
};
struct mlx4_mr {
struct mlx4_mtt mtt;
u64 iova;
u64 size;
u32 key;
u32 pd;
u32 access;
int enabled;
};
struct mlx4_uar {
unsigned long pfn;
int index;
};
struct mlx4_cq {
void (*comp) (struct mlx4_cq *);
void (*event) (struct mlx4_cq *, enum mlx4_event);
struct mlx4_uar *uar;
u32 cons_index;
__be32 *set_ci_db;
__be32 *arm_db;
int arm_sn;
int cqn;
atomic_t refcount;
struct completion free;
};
struct mlx4_qp {
void (*event) (struct mlx4_qp *, enum mlx4_event);
int qpn;
atomic_t refcount;
struct completion free;
};
struct mlx4_srq {
void (*event) (struct mlx4_srq *, enum mlx4_event);
int srqn;
int max;
int max_gs;
int wqe_shift;
atomic_t refcount;
struct completion free;
};
struct mlx4_av {
__be32 port_pd;
u8 reserved1;
u8 g_slid;
__be16 dlid;
u8 reserved2;
u8 gid_index;
u8 stat_rate;
u8 hop_limit;
__be32 sl_tclass_flowlabel;
u8 dgid[16];
};
struct mlx4_dev {
struct pci_dev *pdev;
unsigned long flags;
struct mlx4_caps caps;
struct radix_tree_root qp_table_tree;
};
struct mlx4_init_port_param {
int set_guid0;
int set_node_guid;
int set_si_guid;
u16 mtu;
int port_width_cap;
u16 vl_cap;
u16 max_gid;
u16 max_pkey;
u64 guid0;
u64 node_guid;
u64 si_guid;
};
int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
struct mlx4_buf *buf);
void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
struct mlx4_mtt *mtt);
void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
int npages, int page_shift, struct mlx4_mr *mr);
void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr);
int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int start_index, int npages, u64 *page_list);
int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
struct mlx4_buf *buf);
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq);
void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp);
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
u64 db_rec, struct mlx4_srq *srq);
void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
int mlx4_INIT_PORT(struct mlx4_dev *dev, struct mlx4_init_port_param *param, int port);
int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
#endif /* MLX4_DEVICE_H */
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_DOORBELL_H
#define MLX4_DOORBELL_H
#include <linux/types.h>
#include <linux/io.h>
#define MLX4_SEND_DOORBELL 0x14
#define MLX4_CQ_DOORBELL 0x20
#if BITS_PER_LONG == 64
/*
* Assume that we can just write a 64-bit doorbell atomically. s390
* actually doesn't have writeq() but S/390 systems don't even have
* PCI so we won't worry about it.
*/
#define MLX4_DECLARE_DOORBELL_LOCK(name)
#define MLX4_INIT_DOORBELL_LOCK(ptr) do { } while (0)
#define MLX4_GET_DOORBELL_LOCK(ptr) (NULL)
static inline void mlx4_write64_raw(__be64 val, void __iomem *dest)
{
__raw_writeq((__force u64) val, dest);
}
static inline void mlx4_write64(__be32 val[2], void __iomem *dest,
spinlock_t *doorbell_lock)
{
__raw_writeq(*(u64 *) val, dest);
}
#else
/*
* Just fall back to a spinlock to protect the doorbell if
* BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
* MMIO writes.
*/
#define MLX4_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
#define MLX4_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
#define MLX4_GET_DOORBELL_LOCK(ptr) (ptr)
static inline void mlx4_write64_raw(__be64 val, void __iomem *dest)
{
__raw_writel(((__force u32 *) &val)[0], dest);
__raw_writel(((__force u32 *) &val)[1], dest + 4);
}
static inline void mlx4_write64(__be32 val[2], void __iomem *dest,
spinlock_t *doorbell_lock)
{
unsigned long flags;
spin_lock_irqsave(doorbell_lock, flags);
__raw_writel((__force u32) val[0], dest);
__raw_writel((__force u32) val[1], dest + 4);
spin_unlock_irqrestore(doorbell_lock, flags);
}
#endif
#endif /* MLX4_DOORBELL_H */
/*
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_DRIVER_H
#define MLX4_DRIVER_H
#include <linux/device.h>
struct mlx4_dev;
enum mlx4_dev_event {
MLX4_DEV_EVENT_CATASTROPHIC_ERROR,
MLX4_DEV_EVENT_PORT_UP,
MLX4_DEV_EVENT_PORT_DOWN,
MLX4_DEV_EVENT_PORT_REINIT,
};
struct mlx4_interface {
void * (*add) (struct mlx4_dev *dev);
void (*remove)(struct mlx4_dev *dev, void *context);
void (*event) (struct mlx4_dev *dev, void *context,
enum mlx4_dev_event event, int subtype,
int port);
struct list_head list;
};
int mlx4_register_interface(struct mlx4_interface *intf);
void mlx4_unregister_interface(struct mlx4_interface *intf);
#endif /* MLX4_DRIVER_H */
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_QP_H
#define MLX4_QP_H
#include <linux/types.h>
#include <linux/mlx4/device.h>
#define MLX4_INVALID_LKEY 0x100
enum mlx4_qp_optpar {
MLX4_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
MLX4_QP_OPTPAR_RRE = 1 << 1,
MLX4_QP_OPTPAR_RAE = 1 << 2,
MLX4_QP_OPTPAR_RWE = 1 << 3,
MLX4_QP_OPTPAR_PKEY_INDEX = 1 << 4,
MLX4_QP_OPTPAR_Q_KEY = 1 << 5,
MLX4_QP_OPTPAR_RNR_TIMEOUT = 1 << 6,
MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
MLX4_QP_OPTPAR_SRA_MAX = 1 << 8,
MLX4_QP_OPTPAR_RRA_MAX = 1 << 9,
MLX4_QP_OPTPAR_PM_STATE = 1 << 10,
MLX4_QP_OPTPAR_RETRY_COUNT = 1 << 12,
MLX4_QP_OPTPAR_RNR_RETRY = 1 << 13,
MLX4_QP_OPTPAR_ACK_TIMEOUT = 1 << 14,
MLX4_QP_OPTPAR_SCHED_QUEUE = 1 << 16
};
enum mlx4_qp_state {
MLX4_QP_STATE_RST = 0,
MLX4_QP_STATE_INIT = 1,
MLX4_QP_STATE_RTR = 2,
MLX4_QP_STATE_RTS = 3,
MLX4_QP_STATE_SQER = 4,
MLX4_QP_STATE_SQD = 5,
MLX4_QP_STATE_ERR = 6,
MLX4_QP_STATE_SQ_DRAINING = 7,
MLX4_QP_NUM_STATE
};
enum {
MLX4_QP_ST_RC = 0x0,
MLX4_QP_ST_UC = 0x1,
MLX4_QP_ST_RD = 0x2,
MLX4_QP_ST_UD = 0x3,
MLX4_QP_ST_MLX = 0x7
};
enum {
MLX4_QP_PM_MIGRATED = 0x3,
MLX4_QP_PM_ARMED = 0x0,
MLX4_QP_PM_REARM = 0x1
};
enum {
/* params1 */
MLX4_QP_BIT_SRE = 1 << 15,
MLX4_QP_BIT_SWE = 1 << 14,
MLX4_QP_BIT_SAE = 1 << 13,
/* params2 */
MLX4_QP_BIT_RRE = 1 << 15,
MLX4_QP_BIT_RWE = 1 << 14,
MLX4_QP_BIT_RAE = 1 << 13,
MLX4_QP_BIT_RIC = 1 << 4,
};
struct mlx4_qp_path {
u8 fl;
u8 reserved1[2];
u8 pkey_index;
u8 reserved2;
u8 grh_mylmc;
__be16 rlid;
u8 ackto;
u8 mgid_index;
u8 static_rate;
u8 hop_limit;
__be32 tclass_flowlabel;
u8 rgid[16];
u8 sched_queue;
u8 snooper_flags;
u8 reserved3[2];
u8 counter_index;
u8 reserved4[7];
};
struct mlx4_qp_context {
__be32 flags;
__be32 pd;
u8 mtu_msgmax;
u8 rq_size_stride;
u8 sq_size_stride;
u8 rlkey;
__be32 usr_page;
__be32 local_qpn;
__be32 remote_qpn;
struct mlx4_qp_path pri_path;
struct mlx4_qp_path alt_path;
__be32 params1;
u32 reserved1;
__be32 next_send_psn;
__be32 cqn_send;
u32 reserved2[2];
__be32 last_acked_psn;
__be32 ssn;
__be32 params2;
__be32 rnr_nextrecvpsn;
__be32 srcd;
__be32 cqn_recv;
__be64 db_rec_addr;
__be32 qkey;
__be32 srqn;
__be32 msn;
__be16 rq_wqe_counter;
__be16 sq_wqe_counter;
u32 reserved3[2];
__be32 param3;
__be32 nummmcpeers_basemkey;
u8 log_page_size;
u8 reserved4[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
u32 reserved5[10];
};
enum {
MLX4_WQE_CTRL_FENCE = 1 << 6,
MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2,
MLX4_WQE_CTRL_SOLICITED = 1 << 1,
};
struct mlx4_wqe_ctrl_seg {
__be32 owner_opcode;
u8 reserved2[3];
u8 fence_size;
/*
* High 24 bits are SRC remote buffer; low 8 bits are flags:
* [7] SO (strong ordering)
* [5] TCP/UDP checksum
* [4] IP checksum
* [3:2] C (generate completion queue entry)
* [1] SE (solicited event)
*/
__be32 srcrb_flags;
/*
* imm is immediate data for send/RDMA write w/ immediate;
* also invalidation key for send with invalidate; input
* modifier for WQEs on CCQs.
*/
__be32 imm;
};
enum {
MLX4_WQE_MLX_VL15 = 1 << 17,
MLX4_WQE_MLX_SLR = 1 << 16
};
struct mlx4_wqe_mlx_seg {
u8 owner;
u8 reserved1[2];
u8 opcode;
u8 reserved2[3];
u8 size;
/*
* [17] VL15
* [16] SLR
* [15:12] static rate
* [11:8] SL
* [4] ICRC
* [3:2] C
* [0] FL (force loopback)
*/
__be32 flags;
__be16 rlid;
u16 reserved3;
};
struct mlx4_wqe_datagram_seg {
__be32 av[8];
__be32 dqpn;
__be32 qkey;
__be32 reservd[2];
};
struct mlx4_wqe_bind_seg {
__be32 flags1;
__be32 flags2;
__be32 new_rkey;
__be32 lkey;
__be64 addr;
__be64 length;
};
struct mlx4_wqe_fmr_seg {
__be32 flags;
__be32 mem_key;
__be64 buf_list;
__be64 start_addr;
__be64 reg_len;
__be32 offset;
__be32 page_size;
u32 reserved[2];
};
struct mlx4_wqe_fmr_ext_seg {
u8 flags;
u8 reserved;
__be16 app_mask;
__be16 wire_app_tag;
__be16 mem_app_tag;
__be32 wire_ref_tag_base;
__be32 mem_ref_tag_base;
};
struct mlx4_wqe_local_inval_seg {
u8 flags;
u8 reserved1[3];
__be32 mem_key;
u8 reserved2[3];
u8 guest_id;
__be64 pa;
};
struct mlx4_wqe_raddr_seg {
__be64 raddr;
__be32 rkey;
u32 reserved;
};
struct mlx4_wqe_atomic_seg {
__be64 swap_add;
__be64 compare;
};
struct mlx4_wqe_data_seg {
__be32 byte_count;
__be32 lkey;
__be64 addr;
};
struct mlx4_wqe_inline_seg {
__be32 byte_count;
};
int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
int sqd_event, struct mlx4_qp *qp);
static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
{
return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
}
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
#endif /* MLX4_QP_H */
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_SRQ_H
#define MLX4_SRQ_H
struct mlx4_wqe_srq_next_seg {
u16 reserved1;
__be16 next_wqe_index;
u32 reserved2[3];
};
#endif /* MLX4_SRQ_H */
/*
* Copyright (c) 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef IB_UMEM_H
#define IB_UMEM_H
#include <linux/list.h>
#include <linux/scatterlist.h>
struct ib_ucontext;
struct ib_umem {
struct ib_ucontext *context;
size_t length;
int offset;
int page_size;
int writable;
struct list_head chunk_list;
struct work_struct work;
struct mm_struct *mm;
unsigned long diff;
};
struct ib_umem_chunk {
struct list_head list;
int nents;
int nmap;
struct scatterlist page_list[0];
};
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
#else /* CONFIG_INFINIBAND_USER_MEM */
#include <linux/err.h>
static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
unsigned long addr, size_t size,
int access) {
return ERR_PTR(-EINVAL);
}
static inline void ib_umem_release(struct ib_umem *umem) { }
static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
#endif /* CONFIG_INFINIBAND_USER_MEM */
#endif /* IB_UMEM_H */
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU * licenses. You may choose to be licensed under the terms of the GNU
...@@ -710,6 +710,7 @@ struct ib_ucontext { ...@@ -710,6 +710,7 @@ struct ib_ucontext {
struct list_head qp_list; struct list_head qp_list;
struct list_head srq_list; struct list_head srq_list;
struct list_head ah_list; struct list_head ah_list;
int closing;
}; };
struct ib_uobject { struct ib_uobject {
...@@ -723,23 +724,6 @@ struct ib_uobject { ...@@ -723,23 +724,6 @@ struct ib_uobject {
int live; int live;
}; };
struct ib_umem {
unsigned long user_base;
unsigned long virt_base;
size_t length;
int offset;
int page_size;
int writable;
struct list_head chunk_list;
};
struct ib_umem_chunk {
struct list_head list;
int nents;
int nmap;
struct scatterlist page_list[0];
};
struct ib_udata { struct ib_udata {
void __user *inbuf; void __user *inbuf;
void __user *outbuf; void __user *outbuf;
...@@ -752,11 +736,6 @@ struct ib_udata { ...@@ -752,11 +736,6 @@ struct ib_udata {
((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
(void *) &((struct ib_umem_chunk *) 0)->page_list[0])) (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
struct ib_umem_object {
struct ib_uobject uobject;
struct ib_umem umem;
};
struct ib_pd { struct ib_pd {
struct ib_device *device; struct ib_device *device;
struct ib_uobject *uobject; struct ib_uobject *uobject;
...@@ -1003,7 +982,8 @@ struct ib_device { ...@@ -1003,7 +982,8 @@ struct ib_device {
int mr_access_flags, int mr_access_flags,
u64 *iova_start); u64 *iova_start);
struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
struct ib_umem *region, u64 start, u64 length,
u64 virt_addr,
int mr_access_flags, int mr_access_flags,
struct ib_udata *udata); struct ib_udata *udata);
int (*query_mr)(struct ib_mr *mr, int (*query_mr)(struct ib_mr *mr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment