Commit 5aad2145 authored by Nicholas Bellinger's avatar Nicholas Bellinger

Merge branch 'for-next' of...

Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband into for-next
parents fa389e22 2dea9094
......@@ -42,29 +42,29 @@
#include "uverbs.h"
#define IB_UMEM_MAX_PAGE_CHUNK \
((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
(void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
struct ib_umem_chunk *chunk, *tmp;
struct scatterlist *sg;
struct page *page;
int i;
list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
ib_dma_unmap_sg(dev, chunk->page_list,
chunk->nents, DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->nents; ++i) {
struct page *page = sg_page(&chunk->page_list[i]);
if (umem->nmap > 0)
ib_dma_unmap_sg(dev, umem->sg_head.sgl,
umem->nmap,
DMA_BIDIRECTIONAL);
if (umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
}
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
kfree(chunk);
page = sg_page(sg);
if (umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
}
sg_free_table(&umem->sg_head);
return;
}
/**
......@@ -81,15 +81,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret;
int off;
int i;
DEFINE_DMA_ATTRS(attrs);
struct scatterlist *sg, *sg_list_start;
int need_release = 0;
if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
......@@ -97,7 +97,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!can_do_mlock())
return ERR_PTR(-EPERM);
umem = kmalloc(sizeof *umem, GFP_KERNEL);
umem = kzalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
......@@ -117,8 +117,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
INIT_LIST_HEAD(&umem->chunk_list);
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
......@@ -147,7 +145,18 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
cur_base = addr & PAGE_MASK;
ret = 0;
if (npages == 0) {
ret = -EINVAL;
goto out;
}
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret)
goto out;
need_release = 1;
sg_list_start = umem->sg_head.sgl;
while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(unsigned long, npages,
......@@ -157,54 +166,38 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (ret < 0)
goto out;
umem->npages += ret;
cur_base += ret * PAGE_SIZE;
npages -= ret;
off = 0;
while (ret) {
chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
GFP_KERNEL);
if (!chunk) {
ret = -ENOMEM;
goto out;
}
chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
sg_init_table(chunk->page_list, chunk->nents);
for (i = 0; i < chunk->nents; ++i) {
if (vma_list &&
!is_vm_hugetlb_page(vma_list[i + off]))
umem->hugetlb = 0;
sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
}
chunk->nmap = ib_dma_map_sg_attrs(context->device,
&chunk->page_list[0],
chunk->nents,
DMA_BIDIRECTIONAL,
&attrs);
if (chunk->nmap <= 0) {
for (i = 0; i < chunk->nents; ++i)
put_page(sg_page(&chunk->page_list[i]));
kfree(chunk);
ret = -ENOMEM;
goto out;
}
ret -= chunk->nents;
off += chunk->nents;
list_add_tail(&chunk->list, &umem->chunk_list);
for_each_sg(sg_list_start, sg, ret, i) {
if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
umem->hugetlb = 0;
sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
}
ret = 0;
/* preparing for next loop */
sg_list_start = sg;
}
umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
umem->npages,
DMA_BIDIRECTIONAL,
&attrs);
if (umem->nmap <= 0) {
ret = -ENOMEM;
goto out;
}
ret = 0;
out:
if (ret < 0) {
__ib_umem_release(context->device, umem, 0);
if (need_release)
__ib_umem_release(context->device, umem, 0);
kfree(umem);
} else
current->mm->pinned_vm = locked;
......@@ -278,17 +271,16 @@ EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
struct scatterlist *sg;
shift = ilog2(umem->page_size);
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (i = 0; i < chunk->nmap; ++i)
n += sg_dma_len(&chunk->page_list[i]) >> shift;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
n += sg_dma_len(sg) >> shift;
return n;
}
......
......@@ -1169,6 +1169,45 @@ int ib_dereg_mr(struct ib_mr *mr)
}
EXPORT_SYMBOL(ib_dereg_mr);
struct ib_mr *ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr)
{
struct ib_mr *mr;
if (!pd->device->create_mr)
return ERR_PTR(-ENOSYS);
mr = pd->device->create_mr(pd, mr_init_attr);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_create_mr);
int ib_destroy_mr(struct ib_mr *mr)
{
struct ib_pd *pd;
int ret;
if (atomic_read(&mr->usecnt))
return -EBUSY;
pd = mr->pd;
ret = mr->device->destroy_mr(mr);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_destroy_mr);
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct ib_mr *mr;
......@@ -1398,3 +1437,11 @@ int ib_destroy_flow(struct ib_flow *flow_id)
return err;
}
EXPORT_SYMBOL(ib_destroy_flow);
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
return mr->device->check_mr_status ?
mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
}
EXPORT_SYMBOL(ib_check_mr_status);
......@@ -431,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 *pages;
u64 kva = 0;
int shift, n, len;
int i, j, k;
int i, k, entry;
int err = 0;
struct ib_umem_chunk *chunk;
struct scatterlist *sg;
struct c2_pd *c2pd = to_c2pd(pd);
struct c2_mr *c2mr;
......@@ -452,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(c2mr->umem->page_size) - 1;
n = 0;
list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
n += chunk->nents;
n = c2mr->umem->nmap;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
if (!pages) {
......@@ -464,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
i = 0;
list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] =
sg_dma_address(&chunk->page_list[j]) +
(c2mr->umem->page_size * k);
}
for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] =
sg_dma_address(sg) +
(c2mr->umem->page_size * k);
}
}
......
......@@ -618,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
int i, j, k;
int i, k, entry;
int err = 0;
struct ib_umem_chunk *chunk;
struct iwch_dev *rhp;
struct iwch_pd *php;
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
struct scatterlist *sg;
PDBG("%s ib_pd %p\n", __func__, pd);
php = to_iwch_pd(pd);
......@@ -645,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
n = 0;
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
n = mhp->umem->nmap;
err = iwch_alloc_pbl(mhp, n);
if (err)
......@@ -661,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
......@@ -676,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = 0;
}
}
}
}
if (i)
err = iwch_write_pbl(mhp, pages, i, n);
......
......@@ -678,9 +678,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
int i, j, k;
int i, k, entry;
int err = 0;
struct ib_umem_chunk *chunk;
struct scatterlist *sg;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
......@@ -710,10 +710,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
n = 0;
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
if (err)
goto err;
......@@ -726,24 +723,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
mhp->attr.pbl_addr + (n << 3), i);
if (err)
goto pbl_done;
n += i;
i = 0;
}
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
mhp->attr.pbl_addr + (n << 3), i);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
}
if (i)
err = write_pbl(&mhp->rhp->rdev, pages,
......
......@@ -322,7 +322,7 @@ struct ehca_mr_pginfo {
} phy;
struct { /* type EHCA_MR_PGI_USER section */
struct ib_umem *region;
struct ib_umem_chunk *next_chunk;
struct scatterlist *next_sg;
u64 next_nmap;
} usr;
struct { /* type EHCA_MR_PGI_FMR section */
......
......@@ -400,10 +400,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
pginfo.num_hwpages = num_hwpages;
pginfo.u.usr.region = e_mr->umem;
pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
(&e_mr->umem->chunk_list),
list);
pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
&e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
......@@ -1858,61 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
u64 *kpage)
{
int ret = 0;
struct ib_umem_chunk *prev_chunk;
struct ib_umem_chunk *chunk;
u64 pgaddr;
u32 i = 0;
u32 j = 0;
int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
/* loop over desired chunk entries */
chunk = pginfo->u.usr.next_chunk;
prev_chunk = pginfo->u.usr.next_chunk;
list_for_each_entry_continue(
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
<< PAGE_SHIFT ;
*kpage = pgaddr + (pginfo->next_hwpage *
pginfo->hwpage_size);
if ( !(*kpage) ) {
ehca_gen_err("pgaddr=%llx "
"chunk->page_list[i]=%llx "
"i=%x next_hwpage=%llx",
pgaddr, (u64)sg_dma_address(
&chunk->page_list[i]),
i, pginfo->next_hwpage);
return -EFAULT;
}
(pginfo->hwpage_cnt)++;
(pginfo->next_hwpage)++;
kpage++;
if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
(pginfo->kpage_cnt)++;
(pginfo->u.usr.next_nmap)++;
pginfo->next_hwpage = 0;
i++;
}
j++;
if (j >= number) break;
struct scatterlist **sg = &pginfo->u.usr.next_sg;
while (*sg != NULL) {
pgaddr = page_to_pfn(sg_page(*sg))
<< PAGE_SHIFT;
*kpage = pgaddr + (pginfo->next_hwpage *
pginfo->hwpage_size);
if (!(*kpage)) {
ehca_gen_err("pgaddr=%llx "
"sg_dma_address=%llx "
"entry=%llx next_hwpage=%llx",
pgaddr, (u64)sg_dma_address(*sg),
pginfo->u.usr.next_nmap,
pginfo->next_hwpage);
return -EFAULT;
}
if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
(j >= number)) {
pginfo->u.usr.next_nmap = 0;
prev_chunk = chunk;
break;
} else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
pginfo->u.usr.next_nmap = 0;
prev_chunk = chunk;
} else if (j >= number)
(pginfo->hwpage_cnt)++;
(pginfo->next_hwpage)++;
kpage++;
if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
(pginfo->kpage_cnt)++;
(pginfo->u.usr.next_nmap)++;
pginfo->next_hwpage = 0;
*sg = sg_next(*sg);
}
j++;
if (j >= number)
break;
else
prev_chunk = chunk;
}
pginfo->u.usr.next_chunk =
list_prepare_entry(prev_chunk,
(&(pginfo->u.usr.region->chunk_list)),
list);
return ret;
}
......@@ -1920,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
* check given pages for contiguous layout
* last page addr is returned in prev_pgaddr for further check
*/
static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
int start_idx, int end_idx,
static int ehca_check_kpages_per_ate(struct scatterlist **sg,
int num_pages,
u64 *prev_pgaddr)
{
int t;
for (t = start_idx; t <= end_idx; t++) {
u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
if (ehca_debug_level >= 3)
ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
*(u64 *)__va(pgaddr));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
ehca_gen_err("uncontiguous page found pgaddr=%llx "
"prev_pgaddr=%llx page_list_i=%x",
pgaddr, *prev_pgaddr, t);
"prev_pgaddr=%llx entries_left_in_hwpage=%x",
pgaddr, *prev_pgaddr, num_pages);
return -EINVAL;
}
*prev_pgaddr = pgaddr;
......@@ -1947,111 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
u64 *kpage)
{
int ret = 0;
struct ib_umem_chunk *prev_chunk;
struct ib_umem_chunk *chunk;
u64 pgaddr, prev_pgaddr;
u32 i = 0;
u32 j = 0;
int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
int nr_kpages = kpages_per_hwpage;
struct scatterlist **sg = &pginfo->u.usr.next_sg;
while (*sg != NULL) {
/* loop over desired chunk entries */
chunk = pginfo->u.usr.next_chunk;
prev_chunk = pginfo->u.usr.next_chunk;
list_for_each_entry_continue(
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
if (nr_kpages == kpages_per_hwpage) {
pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
<< PAGE_SHIFT );
*kpage = pgaddr;
if ( !(*kpage) ) {
ehca_gen_err("pgaddr=%llx i=%x",
pgaddr, i);
if (nr_kpages == kpages_per_hwpage) {
pgaddr = (page_to_pfn(sg_page(*sg))
<< PAGE_SHIFT);
*kpage = pgaddr;
if (!(*kpage)) {
ehca_gen_err("pgaddr=%llx entry=%llx",
pgaddr, pginfo->u.usr.next_nmap);
ret = -EFAULT;
return ret;
}
/*
* The first page in a hwpage must be aligned;
* the first MR page is exempt from this rule.
*/
if (pgaddr & (pginfo->hwpage_size - 1)) {
if (pginfo->hwpage_cnt) {
ehca_gen_err(
"invalid alignment "
"pgaddr=%llx entry=%llx "
"mr_pgsize=%llx",
pgaddr, pginfo->u.usr.next_nmap,
pginfo->hwpage_size);
ret = -EFAULT;
return ret;
}
/*
* The first page in a hwpage must be aligned;
* the first MR page is exempt from this rule.
*/
if (pgaddr & (pginfo->hwpage_size - 1)) {
if (pginfo->hwpage_cnt) {
ehca_gen_err(
"invalid alignment "
"pgaddr=%llx i=%x "
"mr_pgsize=%llx",
pgaddr, i,
pginfo->hwpage_size);
ret = -EFAULT;
return ret;
}
/* first MR page */
pginfo->kpage_cnt =
(pgaddr &
(pginfo->hwpage_size - 1)) >>
PAGE_SHIFT;
nr_kpages -= pginfo->kpage_cnt;
*kpage = pgaddr &
~(pginfo->hwpage_size - 1);
}
if (ehca_debug_level >= 3) {
u64 val = *(u64 *)__va(pgaddr);
ehca_gen_dbg("kpage=%llx chunk_page=%llx "
"value=%016llx",
*kpage, pgaddr, val);
}
prev_pgaddr = pgaddr;
i++;
pginfo->kpage_cnt++;
pginfo->u.usr.next_nmap++;
nr_kpages--;
if (!nr_kpages)
goto next_kpage;
continue;
/* first MR page */
pginfo->kpage_cnt =
(pgaddr &
(pginfo->hwpage_size - 1)) >>
PAGE_SHIFT;
nr_kpages -= pginfo->kpage_cnt;
*kpage = pgaddr &
~(pginfo->hwpage_size - 1);
}
if (i + nr_kpages > chunk->nmap) {
ret = ehca_check_kpages_per_ate(
chunk->page_list, i,
chunk->nmap - 1, &prev_pgaddr);
if (ret) return ret;
pginfo->kpage_cnt += chunk->nmap - i;
pginfo->u.usr.next_nmap += chunk->nmap - i;
nr_kpages -= chunk->nmap - i;
break;
if (ehca_debug_level >= 3) {
u64 val = *(u64 *)__va(pgaddr);
ehca_gen_dbg("kpage=%llx page=%llx "
"value=%016llx",
*kpage, pgaddr, val);
}
prev_pgaddr = pgaddr;
*sg = sg_next(*sg);
pginfo->kpage_cnt++;
pginfo->u.usr.next_nmap++;
nr_kpages--;
if (!nr_kpages)
goto next_kpage;
continue;
}
ret = ehca_check_kpages_per_ate(sg, nr_kpages,
&prev_pgaddr);
if (ret)
return ret;
pginfo->kpage_cnt += nr_kpages;
pginfo->u.usr.next_nmap += nr_kpages;
ret = ehca_check_kpages_per_ate(chunk->page_list, i,
i + nr_kpages - 1,
&prev_pgaddr);
if (ret) return ret;
i += nr_kpages;
pginfo->kpage_cnt += nr_kpages;
pginfo->u.usr.next_nmap += nr_kpages;
next_kpage:
nr_kpages = kpages_per_hwpage;
(pginfo->hwpage_cnt)++;
kpage++;
j++;
if (j >= number) break;
}
if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
(j >= number)) {
pginfo->u.usr.next_nmap = 0;
prev_chunk = chunk;
break;
} else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
pginfo->u.usr.next_nmap = 0;
prev_chunk = chunk;
} else if (j >= number)
nr_kpages = kpages_per_hwpage;
(pginfo->hwpage_cnt)++;
kpage++;
j++;
if (j >= number)
break;
else
prev_chunk = chunk;
}
pginfo->u.usr.next_chunk =
list_prepare_entry(prev_chunk,
(&(pginfo->u.usr.region->chunk_list)),
list);
return ret;
}
......
......@@ -188,8 +188,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct ipath_mr *mr;
struct ib_umem *umem;
struct ib_umem_chunk *chunk;
int n, m, i;
int n, m, entry;
struct scatterlist *sg;
struct ib_mr *ret;
if (length == 0) {
......@@ -202,10 +202,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(umem))
return (void *) umem;
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents;
n = umem->nmap;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
......@@ -224,22 +221,20 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
m = 0;
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list) {
for (i = 0; i < chunk->nents; i++) {
void *vaddr;
vaddr = page_address(sg_page(&chunk->page_list[i]));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = umem->page_size;
n++;
if (n == IPATH_SEGSZ) {
m++;
n = 0;
}
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
void *vaddr;
vaddr = page_address(sg_page(sg));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = umem->page_size;
n++;
if (n == IPATH_SEGSZ) {
m++;
n = 0;
}
}
ret = &mr->ibmr;
......
......@@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db)
{
struct mlx4_ib_user_db_page *page;
struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
......@@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
......
......@@ -90,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
struct ib_umem_chunk *chunk;
int i, j, k;
int i, k, entry;
int n;
int len;
int err = 0;
struct scatterlist *sg;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
......@@ -102,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
i = n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
umem->page_size * k;
/*
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
*/
if (i == PAGE_SIZE / sizeof (u64)) {
err = mlx4_write_mtt(dev->dev, mtt, n,
i, pages);
if (err)
goto out;
n += i;
i = 0;
}
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(sg) +
umem->page_size * k;
/*
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
*/
if (i == PAGE_SIZE / sizeof (u64)) {
err = mlx4_write_mtt(dev->dev, mtt, n,
i, pages);
if (err)
goto out;
n += i;
i = 0;
}
}
}
if (i)
err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
......
......@@ -366,6 +366,38 @@ static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
mlx5_buf_free(&dev->mdev, &buf->buf);
}
static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
struct ib_sig_err *item)
{
u16 syndrome = be16_to_cpu(cqe->syndrome);
#define GUARD_ERR (1 << 13)
#define APPTAG_ERR (1 << 12)
#define REFTAG_ERR (1 << 11)
if (syndrome & GUARD_ERR) {
item->err_type = IB_SIG_BAD_GUARD;
item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
} else
if (syndrome & REFTAG_ERR) {
item->err_type = IB_SIG_BAD_REFTAG;
item->expected = be32_to_cpu(cqe->expected_reftag);
item->actual = be32_to_cpu(cqe->actual_reftag);
} else
if (syndrome & APPTAG_ERR) {
item->err_type = IB_SIG_BAD_APPTAG;
item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
} else {
pr_err("Got signature completion error with bad syndrome %04x\n",
syndrome);
}
item->sig_err_offset = be64_to_cpu(cqe->err_offset);
item->key = be32_to_cpu(cqe->mkey);
}
static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_ib_qp **cur_qp,
struct ib_wc *wc)
......@@ -375,6 +407,9 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_cqe64 *cqe64;
struct mlx5_core_qp *mqp;
struct mlx5_ib_wq *wq;
struct mlx5_sig_err_cqe *sig_err_cqe;
struct mlx5_core_mr *mmr;
struct mlx5_ib_mr *mr;
uint8_t opcode;
uint32_t qpn;
u16 wqe_ctr;
......@@ -475,6 +510,33 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
}
}
break;
case MLX5_CQE_SIG_ERR:
sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
read_lock(&dev->mdev.priv.mr_table.lock);
mmr = __mlx5_mr_lookup(&dev->mdev,
mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
if (unlikely(!mmr)) {
read_unlock(&dev->mdev.priv.mr_table.lock);
mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
return -EINVAL;
}
mr = to_mibmr(mmr);
get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
mr->sig->sig_err_exists = true;
mr->sig->sigerr_count++;
mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
cq->mcq.cqn, mr->sig->err_item.key,
mr->sig->err_item.err_type,
mr->sig->err_item.sig_err_offset,
mr->sig->err_item.expected,
mr->sig->err_item.actual);
read_unlock(&dev->mdev.priv.mr_table.lock);
goto repoll;
}
return 0;
......
......@@ -47,7 +47,6 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db)
{
struct mlx5_ib_user_db_page *page;
struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
......@@ -75,8 +74,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
......
......@@ -273,6 +273,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (flags & MLX5_DEV_CAP_FLAG_XRC)
props->device_cap_flags |= IB_DEVICE_XRC;
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) {
props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
IB_PROT_T10DIF_TYPE_3;
props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
IB_GUARD_T10DIF_CSUM;
}
props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
0xffffff;
......@@ -1423,12 +1432,15 @@ static int init_one(struct pci_dev *pdev,
dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr;
dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
dev->ib_dev.create_mr = mlx5_ib_create_mr;
dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
......
......@@ -44,16 +44,17 @@
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
int *ncont, int *order)
{
struct ib_umem_chunk *chunk;
unsigned long tmp;
unsigned long m;
int i, j, k;
int i, k;
u64 base = 0;
int p = 0;
int skip;
int mask;
u64 len;
u64 pfn;
struct scatterlist *sg;
int entry;
addr = addr >> PAGE_SHIFT;
tmp = (unsigned long)addr;
......@@ -61,32 +62,31 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
skip = 1 << m;
mask = skip - 1;
i = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (j = 0; j < chunk->nmap; j++) {
len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT;
for (k = 0; k < len; k++) {
if (!(i & mask)) {
tmp = (unsigned long)pfn;
m = min(m, find_first_bit(&tmp, sizeof(tmp)));
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> PAGE_SHIFT;
pfn = sg_dma_address(sg) >> PAGE_SHIFT;
for (k = 0; k < len; k++) {
if (!(i & mask)) {
tmp = (unsigned long)pfn;
m = min(m, find_first_bit(&tmp, sizeof(tmp)));
skip = 1 << m;
mask = skip - 1;
base = pfn;
p = 0;
} else {
if (base + p != pfn) {
tmp = (unsigned long)p;
m = find_first_bit(&tmp, sizeof(tmp));
skip = 1 << m;
mask = skip - 1;
base = pfn;
p = 0;
} else {
if (base + p != pfn) {
tmp = (unsigned long)p;
m = find_first_bit(&tmp, sizeof(tmp));
skip = 1 << m;
mask = skip - 1;
base = pfn;
p = 0;
}
}
p++;
i++;
}
p++;
i++;
}
}
if (i) {
m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
......@@ -112,32 +112,32 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
{
int shift = page_shift - PAGE_SHIFT;
int mask = (1 << shift) - 1;
struct ib_umem_chunk *chunk;
int i, j, k;
int i, k;
u64 cur = 0;
u64 base;
int len;
struct scatterlist *sg;
int entry;
i = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (j = 0; j < chunk->nmap; j++) {
len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
base = sg_dma_address(&chunk->page_list[j]);
for (k = 0; k < len; k++) {
if (!(i & mask)) {
cur = base + (k << PAGE_SHIFT);
if (umr)
cur |= 3;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> PAGE_SHIFT;
base = sg_dma_address(sg);
for (k = 0; k < len; k++) {
if (!(i & mask)) {
cur = base + (k << PAGE_SHIFT);
if (umr)
cur |= 3;
pas[i >> shift] = cpu_to_be64(cur);
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
i >> shift, be64_to_cpu(pas[i >> shift]));
} else
mlx5_ib_dbg(dev, "=====> 0x%llx\n",
base + (k << PAGE_SHIFT));
i++;
}
pas[i >> shift] = cpu_to_be64(cur);
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
i >> shift, be64_to_cpu(pas[i >> shift]));
} else
mlx5_ib_dbg(dev, "=====> 0x%llx\n",
base + (k << PAGE_SHIFT));
i++;
}
}
}
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
......
......@@ -189,6 +189,9 @@ struct mlx5_ib_qp {
int create_type;
u32 pa_lkey;
/* Store signature errors */
bool signature_en;
};
struct mlx5_ib_cq_buf {
......@@ -265,6 +268,7 @@ struct mlx5_ib_mr {
enum ib_wc_status status;
struct mlx5_ib_dev *dev;
struct mlx5_create_mkey_mbox_out out;
struct mlx5_core_sig_ctx *sig;
};
struct mlx5_ib_fast_reg_page_list {
......@@ -396,6 +400,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
return container_of(mqp, struct mlx5_ib_qp, mqp);
}
static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
{
return container_of(mmr, struct mlx5_ib_mr, mmr);
}
static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx5_ib_pd, ibpd);
......@@ -495,6 +504,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr);
struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
......@@ -530,6 +542,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
static inline void init_query_mad(struct ib_smp *mad)
{
......
......@@ -992,6 +992,122 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
}
struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
int access_mode, err;
int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_free;
}
in->seg.status = 1 << 6; /* free */
in->seg.xlt_oct_size = cpu_to_be32(ndescs);
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
access_mode = MLX5_ACCESS_MODE_MTT;
if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
u32 psv_index[2];
in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
MLX5_MKEY_BSF_EN);
in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
if (!mr->sig) {
err = -ENOMEM;
goto err_free_in;
}
/* create mem & wire PSVs */
err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn,
2, psv_index);
if (err)
goto err_free_sig;
access_mode = MLX5_ACCESS_MODE_KLM;
mr->sig->psv_memory.psv_idx = psv_index[0];
mr->sig->psv_wire.psv_idx = psv_index[1];
mr->sig->sig_status_checked = true;
mr->sig->sig_err_exists = false;
/* Next UMR, Arm SIGERR */
++mr->sig->sigerr_count;
}
in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in),
NULL, NULL, NULL);
if (err)
goto err_destroy_psv;
mr->ibmr.lkey = mr->mmr.key;
mr->ibmr.rkey = mr->mmr.key;
mr->umem = NULL;
kfree(in);
return &mr->ibmr;
err_destroy_psv:
if (mr->sig) {
if (mlx5_core_destroy_psv(&dev->mdev,
mr->sig->psv_memory.psv_idx))
mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
mr->sig->psv_memory.psv_idx);
if (mlx5_core_destroy_psv(&dev->mdev,
mr->sig->psv_wire.psv_idx))
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
}
err_free_sig:
kfree(mr->sig);
err_free_in:
kfree(in);
err_free:
kfree(mr);
return ERR_PTR(err);
}
int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
{
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int err;
if (mr->sig) {
if (mlx5_core_destroy_psv(&dev->mdev,
mr->sig->psv_memory.psv_idx))
mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
mr->sig->psv_memory.psv_idx);
if (mlx5_core_destroy_psv(&dev->mdev,
mr->sig->psv_wire.psv_idx))
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
kfree(mr->sig);
}
err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
if (err) {
mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
mr->mmr.key, err);
return err;
}
kfree(mr);
return err;
}
struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len)
{
......@@ -1077,3 +1193,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
kfree(mfrpl->ibfrpl.page_list);
kfree(mfrpl);
}
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status)
{
struct mlx5_ib_mr *mmr = to_mmr(ibmr);
int ret = 0;
if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
pr_err("Invalid status check mask\n");
ret = -EINVAL;
goto done;
}
mr_status->fail_status = 0;
if (check_mask & IB_MR_CHECK_SIG_STATUS) {
if (!mmr->sig) {
ret = -EINVAL;
pr_err("signature status check requested on a non-signature enabled MR\n");
goto done;
}
mmr->sig->sig_status_checked = true;
if (!mmr->sig->sig_err_exists)
goto done;
if (ibmr->lkey == mmr->sig->err_item.key)
memcpy(&mr_status->sig_err, &mmr->sig->err_item,
sizeof(mr_status->sig_err));
else {
mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
mr_status->sig_err.sig_err_offset = 0;
mr_status->sig_err.key = mmr->sig->err_item.key;
}
mmr->sig->sig_err_exists = false;
mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
}
done:
return ret;
}
......@@ -256,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
}
size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
return MLX5_SIG_WQE_SIZE;
else
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
......@@ -284,6 +287,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
qp->signature_en = true;
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
......@@ -665,7 +671,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
int err;
uuari = &dev->mdev.priv.uuari;
if (init_attr->create_flags)
if (init_attr->create_flags & ~IB_QP_CREATE_SIGNATURE_EN)
return -EINVAL;
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
......@@ -1771,6 +1777,27 @@ static __be64 frwr_mkey_mask(void)
return cpu_to_be64(result);
}
static __be64 sig_mkey_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR |
MLX5_MKEY_MASK_EN_SIGERR |
MLX5_MKEY_MASK_EN_RINVAL |
MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
MLX5_MKEY_MASK_SMALL_FENCE |
MLX5_MKEY_MASK_FREE |
MLX5_MKEY_MASK_BSF_EN;
return cpu_to_be64(result);
}
static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
struct ib_send_wr *wr, int li)
{
......@@ -1826,7 +1853,7 @@ static u8 get_umr_flags(int acc)
(acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
}
static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
......@@ -1838,7 +1865,8 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
return;
}
seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags);
seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
MLX5_ACCESS_MODE_MTT;
*writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
......@@ -1954,6 +1982,342 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
return 0;
}
static u16 prot_field_size(enum ib_signature_type type)
{
switch (type) {
case IB_SIG_TYPE_T10_DIF:
return MLX5_DIF_SIZE;
default:
return 0;
}
}
static u8 bs_selector(int block_size)
{
switch (block_size) {
case 512: return 0x1;
case 520: return 0x2;
case 4096: return 0x3;
case 4160: return 0x4;
case 1073741824: return 0x5;
default: return 0;
}
}
static int format_selector(struct ib_sig_attrs *attr,
struct ib_sig_domain *domain,
int *selector)
{
#define FORMAT_DIF_NONE 0
#define FORMAT_DIF_CRC_INC 8
#define FORMAT_DIF_CRC_NO_INC 12
#define FORMAT_DIF_CSUM_INC 13
#define FORMAT_DIF_CSUM_NO_INC 14
switch (domain->sig.dif.type) {
case IB_T10DIF_NONE:
/* No DIF */
*selector = FORMAT_DIF_NONE;
break;
case IB_T10DIF_TYPE1: /* Fall through */
case IB_T10DIF_TYPE2:
switch (domain->sig.dif.bg_type) {
case IB_T10DIF_CRC:
*selector = FORMAT_DIF_CRC_INC;
break;
case IB_T10DIF_CSUM:
*selector = FORMAT_DIF_CSUM_INC;
break;
default:
return 1;
}
break;
case IB_T10DIF_TYPE3:
switch (domain->sig.dif.bg_type) {
case IB_T10DIF_CRC:
*selector = domain->sig.dif.type3_inc_reftag ?
FORMAT_DIF_CRC_INC :
FORMAT_DIF_CRC_NO_INC;
break;
case IB_T10DIF_CSUM:
*selector = domain->sig.dif.type3_inc_reftag ?
FORMAT_DIF_CSUM_INC :
FORMAT_DIF_CSUM_NO_INC;
break;
default:
return 1;
}
break;
default:
return 1;
}
return 0;
}
static int mlx5_set_bsf(struct ib_mr *sig_mr,
struct ib_sig_attrs *sig_attrs,
struct mlx5_bsf *bsf, u32 data_size)
{
struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
struct mlx5_bsf_basic *basic = &bsf->basic;
struct ib_sig_domain *mem = &sig_attrs->mem;
struct ib_sig_domain *wire = &sig_attrs->wire;
int ret, selector;
switch (sig_attrs->mem.sig_type) {
case IB_SIG_TYPE_T10_DIF:
if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
return -EINVAL;
/* Input domain check byte mask */
basic->check_byte_mask = sig_attrs->check_mask;
if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
mem->sig.dif.type == wire->sig.dif.type) {
/* Same block structure */
basic->bsf_size_sbs = 1 << 4;
if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
basic->wire.copy_byte_mask = 0xff;
else
basic->wire.copy_byte_mask = 0x3f;
} else
basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
basic->raw_data_size = cpu_to_be32(data_size);
ret = format_selector(sig_attrs, mem, &selector);
if (ret)
return -EINVAL;
basic->m_bfs_psv = cpu_to_be32(selector << 24 |
msig->psv_memory.psv_idx);
ret = format_selector(sig_attrs, wire, &selector);
if (ret)
return -EINVAL;
basic->w_bfs_psv = cpu_to_be32(selector << 24 |
msig->psv_wire.psv_idx);
break;
default:
return -EINVAL;
}
return 0;
}
static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
void **seg, int *size)
{
struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs;
struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
struct mlx5_bsf *bsf;
u32 data_len = wr->sg_list->length;
u32 data_key = wr->sg_list->lkey;
u64 data_va = wr->sg_list->addr;
int ret;
int wqe_size;
if (!wr->wr.sig_handover.prot) {
/**
* Source domain doesn't contain signature information
* So need construct:
* ------------------
* | data_klm |
* ------------------
* | BSF |
* ------------------
**/
struct mlx5_klm *data_klm = *seg;
data_klm->bcount = cpu_to_be32(data_len);
data_klm->key = cpu_to_be32(data_key);
data_klm->va = cpu_to_be64(data_va);
wqe_size = ALIGN(sizeof(*data_klm), 64);
} else {
/**
* Source domain contains signature information
* So need construct a strided block format:
* ---------------------------
* | stride_block_ctrl |
* ---------------------------
* | data_klm |
* ---------------------------
* | prot_klm |
* ---------------------------
* | BSF |
* ---------------------------
**/
struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
struct mlx5_stride_block_entry *data_sentry;
struct mlx5_stride_block_entry *prot_sentry;
u32 prot_key = wr->wr.sig_handover.prot->lkey;
u64 prot_va = wr->wr.sig_handover.prot->addr;
u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
int prot_size;
sblock_ctrl = *seg;
data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
prot_size = prot_field_size(sig_attrs->mem.sig_type);
if (!prot_size) {
pr_err("Bad block size given: %u\n", block_size);
return -EINVAL;
}
sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
prot_size);
sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
sblock_ctrl->num_entries = cpu_to_be16(2);
data_sentry->bcount = cpu_to_be16(block_size);
data_sentry->key = cpu_to_be32(data_key);
data_sentry->va = cpu_to_be64(data_va);
prot_sentry->bcount = cpu_to_be16(prot_size);
prot_sentry->key = cpu_to_be32(prot_key);
if (prot_key == data_key && prot_va == data_va) {
/**
* The data and protection are interleaved
* in a single memory region
**/
prot_sentry->va = cpu_to_be64(data_va + block_size);
prot_sentry->stride = cpu_to_be16(block_size + prot_size);
data_sentry->stride = prot_sentry->stride;
} else {
/* The data and protection are two different buffers */
prot_sentry->va = cpu_to_be64(prot_va);
data_sentry->stride = cpu_to_be16(block_size);
prot_sentry->stride = cpu_to_be16(prot_size);
}
wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
sizeof(*prot_sentry), 64);
}
*seg += wqe_size;
*size += wqe_size / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
bsf = *seg;
ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
if (ret)
return -EINVAL;
*seg += sizeof(*bsf);
*size += sizeof(*bsf) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
return 0;
}
static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
struct ib_send_wr *wr, u32 nelements,
u32 length, u32 pdn)
{
struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
u32 sig_key = sig_mr->rkey;
u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
memset(seg, 0, sizeof(*seg));
seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
MLX5_ACCESS_MODE_KLM;
seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
seg->len = cpu_to_be64(length);
seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
}
static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
struct ib_send_wr *wr, u32 nelements)
{
memset(umr, 0, sizeof(*umr));
umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
umr->klm_octowords = get_klm_octo(nelements);
umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
umr->mkey_mask = sig_mkey_mask();
}
static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
void **seg, int *size)
{
struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr);
u32 pdn = get_pd(qp)->pdn;
u32 klm_oct_size;
int region_len, ret;
if (unlikely(wr->num_sge != 1) ||
unlikely(wr->wr.sig_handover.access_flags &
IB_ACCESS_REMOTE_ATOMIC) ||
unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
unlikely(!sig_mr->sig->sig_status_checked))
return -EINVAL;
/* length of the protected region, data + protection */
region_len = wr->sg_list->length;
if (wr->wr.sig_handover.prot)
region_len += wr->wr.sig_handover.prot->length;
/**
* KLM octoword size - if protection was provided
* then we use strided block format (3 octowords),
* else we use single KLM (1 octoword)
**/
klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1;
set_sig_umr_segment(*seg, wr, klm_oct_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
ret = set_sig_data_segment(wr, qp, seg, size);
if (ret)
return ret;
sig_mr->sig->sig_status_checked = false;
return 0;
}
static int set_psv_wr(struct ib_sig_domain *domain,
u32 psv_idx, void **seg, int *size)
{
struct mlx5_seg_set_psv *psv_seg = *seg;
memset(psv_seg, 0, sizeof(*psv_seg));
psv_seg->psv_num = cpu_to_be32(psv_idx);
switch (domain->sig_type) {
case IB_SIG_TYPE_T10_DIF:
psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
domain->sig.dif.app_tag);
psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
*seg += sizeof(*psv_seg);
*size += sizeof(*psv_seg) / 16;
break;
default:
pr_err("Bad signature type given.\n");
return 1;
}
return 0;
}
static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
{
......@@ -2041,6 +2405,59 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
}
}
static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
struct ib_send_wr *wr, int *idx,
int *size, int nreq)
{
int err = 0;
if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
err = -ENOMEM;
return err;
}
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
*seg = mlx5_get_send_wqe(qp, *idx);
*ctrl = *seg;
*(uint32_t *)(*seg + 8) = 0;
(*ctrl)->imm = send_ieth(wr);
(*ctrl)->fm_ce_se = qp->sq_signal_bits |
(wr->send_flags & IB_SEND_SIGNALED ?
MLX5_WQE_CTRL_CQ_UPDATE : 0) |
(wr->send_flags & IB_SEND_SOLICITED ?
MLX5_WQE_CTRL_SOLICITED : 0);
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
return err;
}
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
u8 size, unsigned idx, u64 wr_id,
int nreq, u8 fence, u8 next_fence,
u32 mlx5_opcode)
{
u8 opmod = 0;
ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
mlx5_opcode | ((u32)opmod << 24));
ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
ctrl->fm_ce_se |= fence;
qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
ctrl->signature = wq_sig(ctrl);
qp->sq.wrid[idx] = wr_id;
qp->sq.w_list[idx].opcode = mlx5_opcode;
qp->sq.wqe_head[idx] = qp->sq.head + nreq;
qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
qp->sq.w_list[idx].next = qp->sq.cur_post;
}
int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
......@@ -2048,13 +2465,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = &dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_mr *mr;
struct mlx5_wqe_data_seg *dpseg;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf = qp->bf;
int uninitialized_var(size);
void *qend = qp->sq.qend;
unsigned long flags;
u32 mlx5_opcode;
unsigned idx;
int err = 0;
int inl = 0;
......@@ -2063,7 +2480,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int nreq;
int i;
u8 next_fence = 0;
u8 opmod = 0;
u8 fence;
spin_lock_irqsave(&qp->sq.lock, flags);
......@@ -2076,36 +2492,23 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
fence = qp->fm_cache;
num_sge = wr->num_sge;
if (unlikely(num_sge > qp->sq.max_gs)) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
fence = qp->fm_cache;
num_sge = wr->num_sge;
if (unlikely(num_sge > qp->sq.max_gs)) {
err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
seg = mlx5_get_send_wqe(qp, idx);
ctrl = seg;
*(uint32_t *)(seg + 8) = 0;
ctrl->imm = send_ieth(wr);
ctrl->fm_ce_se = qp->sq_signal_bits |
(wr->send_flags & IB_SEND_SIGNALED ?
MLX5_WQE_CTRL_CQ_UPDATE : 0) |
(wr->send_flags & IB_SEND_SOLICITED ?
MLX5_WQE_CTRL_SOLICITED : 0);
seg += sizeof(*ctrl);
size = sizeof(*ctrl) / 16;
switch (ibqp->qp_type) {
case IB_QPT_XRC_INI:
xrc = seg;
......@@ -2158,6 +2561,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
num_sge = 0;
break;
case IB_WR_REG_SIG_MR:
qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
mr = to_mmr(wr->wr.sig_handover.sig_mr);
ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
err = set_sig_umr_wr(wr, qp, &seg, &size);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
finish_wqe(qp, ctrl, size, idx, wr->wr_id,
nreq, get_fence(fence, wr),
next_fence, MLX5_OPCODE_UMR);
/*
* SET_PSV WQEs are not signaled and solicited
* on error
*/
wr->send_flags &= ~IB_SEND_SIGNALED;
wr->send_flags |= IB_SEND_SOLICITED;
err = begin_wqe(qp, &seg, &ctrl, wr,
&idx, &size, nreq);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem,
mr->sig->psv_memory.psv_idx, &seg,
&size);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
finish_wqe(qp, ctrl, size, idx, wr->wr_id,
nreq, get_fence(fence, wr),
next_fence, MLX5_OPCODE_SET_PSV);
err = begin_wqe(qp, &seg, &ctrl, wr,
&idx, &size, nreq);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire,
mr->sig->psv_wire.psv_idx, &seg,
&size);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
finish_wqe(qp, ctrl, size, idx, wr->wr_id,
nreq, get_fence(fence, wr),
next_fence, MLX5_OPCODE_SET_PSV);
num_sge = 0;
goto skip_psv;
default:
break;
}
......@@ -2238,22 +2708,10 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
}
mlx5_opcode = mlx5_ib_opcode[wr->opcode];
ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
mlx5_opcode |
((u32)opmod << 24));
ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
ctrl->fm_ce_se |= get_fence(fence, wr);
qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
ctrl->signature = wq_sig(ctrl);
qp->sq.wrid[idx] = wr->wr_id;
qp->sq.w_list[idx].opcode = mlx5_opcode;
qp->sq.wqe_head[idx] = qp->sq.head + nreq;
qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
qp->sq.w_list[idx].next = qp->sq.cur_post;
finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
get_fence(fence, wr), next_fence,
mlx5_ib_opcode[wr->opcode]);
skip_psv:
if (0)
dump_wqe(qp, idx, size);
}
......
......@@ -976,12 +976,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
struct scatterlist *sg;
struct mthca_mr *mr;
struct mthca_reg_mr ucmd;
u64 *pages;
int shift, n, len;
int i, j, k;
int i, k, entry;
int err = 0;
int write_mtt_size;
......@@ -1009,10 +1009,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(mr->umem->page_size) - 1;
n = 0;
list_for_each_entry(chunk, &mr->umem->chunk_list, list)
n += chunk->nents;
n = mr->umem->nmap;
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
......@@ -1030,25 +1027,24 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
list_for_each_entry(chunk, &mr->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
mr->umem->page_size * k;
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
*/
if (i == write_mtt_size) {
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
if (err)
goto mtt_done;
n += i;
i = 0;
}
for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(sg) +
mr->umem->page_size * k;
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
*/
if (i == write_mtt_size) {
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
if (err)
goto mtt_done;
n += i;
i = 0;
}
}
}
if (i)
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
......
......@@ -2307,7 +2307,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
struct ib_mr *ibmr = ERR_PTR(-EINVAL);
struct ib_umem_chunk *chunk;
struct scatterlist *sg;
struct nes_ucontext *nes_ucontext;
struct nes_pbl *nespbl;
struct nes_mr *nesmr;
......@@ -2315,7 +2315,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct nes_mem_reg_req req;
struct nes_vpbl vpbl;
struct nes_root_vpbl root_vpbl;
int nmap_index, page_index;
int entry, page_index;
int page_count = 0;
int err, pbl_depth = 0;
int chunk_pages;
......@@ -2330,6 +2330,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u16 pbl_count;
u8 single_page = 1;
u8 stag_key;
int first_page = 1;
region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(region)) {
......@@ -2380,128 +2381,125 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
nesmr->region = region;
list_for_each_entry(chunk, &region->chunk_list, list) {
nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n",
chunk->nents, chunk->nmap);
for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) {
ib_umem_release(region);
nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
(unsigned int) sg_dma_address(&chunk->page_list[nmap_index]));
ibmr = ERR_PTR(-EINVAL);
kfree(nesmr);
goto reg_user_mr_err;
}
for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
if (sg_dma_address(sg) & ~PAGE_MASK) {
ib_umem_release(region);
nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
(unsigned int) sg_dma_address(sg));
ibmr = ERR_PTR(-EINVAL);
kfree(nesmr);
goto reg_user_mr_err;
}
if (!sg_dma_len(&chunk->page_list[nmap_index])) {
ib_umem_release(region);
nes_free_resource(nesadapter, nesadapter->allocated_mrs,
stag_index);
nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
ibmr = ERR_PTR(-EINVAL);
kfree(nesmr);
goto reg_user_mr_err;
}
if (!sg_dma_len(sg)) {
ib_umem_release(region);
nes_free_resource(nesadapter, nesadapter->allocated_mrs,
stag_index);
nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
ibmr = ERR_PTR(-EINVAL);
kfree(nesmr);
goto reg_user_mr_err;
}
region_length += sg_dma_len(&chunk->page_list[nmap_index]);
chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
region_length -= skip_pages << 12;
for (page_index=skip_pages; page_index < chunk_pages; page_index++) {
skip_pages = 0;
if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length)
goto enough_pages;
if ((page_count&0x01FF) == 0) {
if (page_count >= 1024 * 512) {
region_length += sg_dma_len(sg);
chunk_pages = sg_dma_len(sg) >> 12;
region_length -= skip_pages << 12;
for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
skip_pages = 0;
if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length)
goto enough_pages;
if ((page_count&0x01FF) == 0) {
if (page_count >= 1024 * 512) {
ib_umem_release(region);
nes_free_resource(nesadapter,
nesadapter->allocated_mrs, stag_index);
kfree(nesmr);
ibmr = ERR_PTR(-E2BIG);
goto reg_user_mr_err;
}
if (root_pbl_index == 1) {
root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
8192, &root_vpbl.pbl_pbase);
nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
if (!root_vpbl.pbl_vbase) {
ib_umem_release(region);
nes_free_resource(nesadapter,
nesadapter->allocated_mrs, stag_index);
pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
vpbl.pbl_pbase);
nes_free_resource(nesadapter, nesadapter->allocated_mrs,
stag_index);
kfree(nesmr);
ibmr = ERR_PTR(-E2BIG);
ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
if (root_pbl_index == 1) {
root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
8192, &root_vpbl.pbl_pbase);
nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
if (!root_vpbl.pbl_vbase) {
ib_umem_release(region);
pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
vpbl.pbl_pbase);
nes_free_resource(nesadapter, nesadapter->allocated_mrs,
stag_index);
kfree(nesmr);
ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
GFP_KERNEL);
if (!root_vpbl.leaf_vpbl) {
ib_umem_release(region);
pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
root_vpbl.pbl_pbase);
pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
vpbl.pbl_pbase);
nes_free_resource(nesadapter, nesadapter->allocated_mrs,
stag_index);
kfree(nesmr);
ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
root_vpbl.pbl_vbase[0].pa_low =
cpu_to_le32((u32)vpbl.pbl_pbase);
root_vpbl.pbl_vbase[0].pa_high =
cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
root_vpbl.leaf_vpbl[0] = vpbl;
}
vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
&vpbl.pbl_pbase);
nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
if (!vpbl.pbl_vbase) {
root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
GFP_KERNEL);
if (!root_vpbl.leaf_vpbl) {
ib_umem_release(region);
nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
ibmr = ERR_PTR(-ENOMEM);
pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
root_vpbl.pbl_pbase);
pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
vpbl.pbl_pbase);
nes_free_resource(nesadapter, nesadapter->allocated_mrs,
stag_index);
kfree(nesmr);
ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
if (1 <= root_pbl_index) {
root_vpbl.pbl_vbase[root_pbl_index].pa_low =
cpu_to_le32((u32)vpbl.pbl_pbase);
root_vpbl.pbl_vbase[root_pbl_index].pa_high =
cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
}
root_pbl_index++;
cur_pbl_index = 0;
root_vpbl.pbl_vbase[0].pa_low =
cpu_to_le32((u32)vpbl.pbl_pbase);
root_vpbl.pbl_vbase[0].pa_high =
cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
root_vpbl.leaf_vpbl[0] = vpbl;
}
if (single_page) {
if (page_count != 0) {
if ((last_dma_addr+4096) !=
(sg_dma_address(&chunk->page_list[nmap_index])+
(page_index*4096)))
single_page = 0;
last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
(page_index*4096);
} else {
first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
(page_index*4096);
last_dma_addr = first_dma_addr;
}
vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
&vpbl.pbl_pbase);
nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
if (!vpbl.pbl_vbase) {
ib_umem_release(region);
nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
ibmr = ERR_PTR(-ENOMEM);
kfree(nesmr);
goto reg_user_mr_err;
}
if (1 <= root_pbl_index) {
root_vpbl.pbl_vbase[root_pbl_index].pa_low =
cpu_to_le32((u32)vpbl.pbl_pbase);
root_vpbl.pbl_vbase[root_pbl_index].pa_high =
cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
}
root_pbl_index++;
cur_pbl_index = 0;
}
if (single_page) {
if (page_count != 0) {
if ((last_dma_addr+4096) !=
(sg_dma_address(sg)+
(page_index*4096)))
single_page = 0;
last_dma_addr = sg_dma_address(sg)+
(page_index*4096);
} else {
first_dma_addr = sg_dma_address(sg)+
(page_index*4096);
last_dma_addr = first_dma_addr;
}
vpbl.pbl_vbase[cur_pbl_index].pa_low =
cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+
(page_index*4096)));
vpbl.pbl_vbase[cur_pbl_index].pa_high =
cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+
(page_index*4096))) >> 32)));
cur_pbl_index++;
page_count++;
}
vpbl.pbl_vbase[cur_pbl_index].pa_low =
cpu_to_le32((u32)(sg_dma_address(sg)+
(page_index*4096)));
vpbl.pbl_vbase[cur_pbl_index].pa_high =
cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+
(page_index*4096))) >> 32)));
cur_pbl_index++;
page_count++;
}
}
enough_pages:
nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x,"
" stag_key=0x%08x\n",
......@@ -2613,25 +2611,28 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase,
(void *) nespbl->pbl_vbase, nespbl->user_base);
list_for_each_entry(chunk, &region->chunk_list, list) {
for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0;
nespbl->page = sg_page(&chunk->page_list[0]);
for (page_index=0; page_index<chunk_pages; page_index++) {
((__le32 *)pbl)[0] = cpu_to_le32((u32)
(sg_dma_address(&chunk->page_list[nmap_index])+
(page_index*4096)));
((__le32 *)pbl)[1] = cpu_to_le32(((u64)
(sg_dma_address(&chunk->page_list[nmap_index])+
(page_index*4096)))>>32);
nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
(unsigned long long)*pbl,
le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
pbl++;
}
for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
chunk_pages = sg_dma_len(sg) >> 12;
chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0;
if (first_page) {
nespbl->page = sg_page(sg);
first_page = 0;
}
for (page_index = 0; page_index < chunk_pages; page_index++) {
((__le32 *)pbl)[0] = cpu_to_le32((u32)
(sg_dma_address(sg)+
(page_index*4096)));
((__le32 *)pbl)[1] = cpu_to_le32(((u64)
(sg_dma_address(sg)+
(page_index*4096)))>>32);
nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
(unsigned long long)*pbl,
le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
pbl++;
}
}
if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list);
} else {
......
......@@ -726,10 +726,10 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
u32 num_pbes)
{
struct ocrdma_pbe *pbe;
struct ib_umem_chunk *chunk;
struct scatterlist *sg;
struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
struct ib_umem *umem = mr->umem;
int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0;
if (!mr->hwmr.num_pbes)
return;
......@@ -739,39 +739,37 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
shift = ilog2(umem->page_size);
list_for_each_entry(chunk, &umem->chunk_list, list) {
/* get all the dma regions from the chunk. */
for (i = 0; i < chunk->nmap; i++) {
pages = sg_dma_len(&chunk->page_list[i]) >> shift;
for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
/* store the page address in pbe */
pbe->pa_lo =
cpu_to_le32(sg_dma_address
(&chunk->page_list[i]) +
(umem->page_size * pg_cnt));
pbe->pa_hi =
cpu_to_le32(upper_32_bits
((sg_dma_address
(&chunk->page_list[i]) +
umem->page_size * pg_cnt)));
pbe_cnt += 1;
total_num_pbes += 1;
pbe++;
/* if done building pbes, issue the mbx cmd. */
if (total_num_pbes == num_pbes)
return;
/* if the given pbl is full storing the pbes,
* move to next pbl.
*/
if (pbe_cnt ==
(mr->hwmr.pbl_size / sizeof(u64))) {
pbl_tbl++;
pbe = (struct ocrdma_pbe *)pbl_tbl->va;
pbe_cnt = 0;
}
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> shift;
for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
/* store the page address in pbe */
pbe->pa_lo =
cpu_to_le32(sg_dma_address
(sg) +
(umem->page_size * pg_cnt));
pbe->pa_hi =
cpu_to_le32(upper_32_bits
((sg_dma_address
(sg) +
umem->page_size * pg_cnt)));
pbe_cnt += 1;
total_num_pbes += 1;
pbe++;
/* if done building pbes, issue the mbx cmd. */
if (total_num_pbes == num_pbes)
return;
/* if the given pbl is full storing the pbes,
* move to next pbl.
*/
if (pbe_cnt ==
(mr->hwmr.pbl_size / sizeof(u64))) {
pbl_tbl++;
pbe = (struct ocrdma_pbe *)pbl_tbl->va;
pbe_cnt = 0;
}
}
}
}
......
......@@ -232,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct qib_mr *mr;
struct ib_umem *umem;
struct ib_umem_chunk *chunk;
int n, m, i;
struct scatterlist *sg;
int n, m, entry;
struct ib_mr *ret;
if (length == 0) {
......@@ -246,9 +246,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(umem))
return (void *) umem;
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents;
n = umem->nmap;
mr = alloc_mr(n, pd);
if (IS_ERR(mr)) {
......@@ -268,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.page_shift = ilog2(umem->page_size);
m = 0;
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list) {
for (i = 0; i < chunk->nents; i++) {
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
void *vaddr;
vaddr = page_address(sg_page(&chunk->page_list[i]));
vaddr = page_address(sg_page(sg));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
......@@ -284,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
m++;
n = 0;
}
}
}
ret = &mr->ibmr;
......
......@@ -446,6 +446,7 @@ int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
mlx5_init_cq_table(dev);
mlx5_init_qp_table(dev);
mlx5_init_srq_table(dev);
mlx5_init_mr_table(dev);
return 0;
......
......@@ -36,11 +36,24 @@
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
void mlx5_init_mr_table(struct mlx5_core_dev *dev)
{
struct mlx5_mr_table *table = &dev->priv.mr_table;
rwlock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
}
void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev)
{
}
int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
struct mlx5_create_mkey_mbox_in *in, int inlen,
mlx5_cmd_cbk_t callback, void *context,
struct mlx5_create_mkey_mbox_out *out)
{
struct mlx5_mr_table *table = &dev->priv.mr_table;
struct mlx5_create_mkey_mbox_out lout;
int err;
u8 key;
......@@ -73,14 +86,21 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
be32_to_cpu(lout.mkey), key, mr->key);
/* connect to MR tree */
write_lock_irq(&table->lock);
err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr);
write_unlock_irq(&table->lock);
return err;
}
EXPORT_SYMBOL(mlx5_core_create_mkey);
int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
{
struct mlx5_mr_table *table = &dev->priv.mr_table;
struct mlx5_destroy_mkey_mbox_in in;
struct mlx5_destroy_mkey_mbox_out out;
unsigned long flags;
int err;
memset(&in, 0, sizeof(in));
......@@ -95,6 +115,10 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
if (out.hdr.status)
return mlx5_cmd_status_to_err(&out.hdr);
write_lock_irqsave(&table->lock, flags);
radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key));
write_unlock_irqrestore(&table->lock, flags);
return err;
}
EXPORT_SYMBOL(mlx5_core_destroy_mkey);
......@@ -144,3 +168,64 @@ int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
return err;
}
EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
int npsvs, u32 *sig_index)
{
struct mlx5_allocate_psv_in in;
struct mlx5_allocate_psv_out out;
int i, err;
if (npsvs > MLX5_MAX_PSVS)
return -EINVAL;
memset(&in, 0, sizeof(in));
memset(&out, 0, sizeof(out));
in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_PSV);
in.npsv_pd = cpu_to_be32((npsvs << 28) | pdn);
err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
if (err) {
mlx5_core_err(dev, "cmd exec failed %d\n", err);
return err;
}
if (out.hdr.status) {
mlx5_core_err(dev, "create_psv bad status %d\n", out.hdr.status);
return mlx5_cmd_status_to_err(&out.hdr);
}
for (i = 0; i < npsvs; i++)
sig_index[i] = be32_to_cpu(out.psv_idx[i]) & 0xffffff;
return err;
}
EXPORT_SYMBOL(mlx5_core_create_psv);
int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
{
struct mlx5_destroy_psv_in in;
struct mlx5_destroy_psv_out out;
int err;
memset(&in, 0, sizeof(in));
memset(&out, 0, sizeof(out));
in.psv_number = cpu_to_be32(psv_num);
in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_PSV);
err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
if (err) {
mlx5_core_err(dev, "destroy_psv cmd exec failed %d\n", err);
goto out;
}
if (out.hdr.status) {
mlx5_core_err(dev, "destroy_psv bad status %d\n", out.hdr.status);
err = mlx5_cmd_status_to_err(&out.hdr);
goto out;
}
out:
return err;
}
EXPORT_SYMBOL(mlx5_core_destroy_psv);
......@@ -80,6 +80,7 @@ enum {
MLX5_CQE_RESP_SEND_IMM = 3,
MLX5_CQE_RESP_SEND_INV = 4,
MLX5_CQE_RESIZE_CQ = 5,
MLX5_CQE_SIG_ERR = 12,
MLX5_CQE_REQ_ERR = 13,
MLX5_CQE_RESP_ERR = 14,
MLX5_CQE_INVALID = 15,
......
......@@ -48,6 +48,8 @@ enum {
MLX5_MAX_COMMANDS = 32,
MLX5_CMD_DATA_BLOCK_SIZE = 512,
MLX5_PCI_CMD_XPORT = 7,
MLX5_MKEY_BSF_OCTO_SIZE = 4,
MLX5_MAX_PSVS = 4,
};
enum {
......@@ -116,6 +118,7 @@ enum {
MLX5_MKEY_MASK_START_ADDR = 1ull << 6,
MLX5_MKEY_MASK_PD = 1ull << 7,
MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8,
MLX5_MKEY_MASK_EN_SIGERR = 1ull << 9,
MLX5_MKEY_MASK_BSF_EN = 1ull << 12,
MLX5_MKEY_MASK_KEY = 1ull << 13,
MLX5_MKEY_MASK_QPN = 1ull << 14,
......@@ -555,6 +558,23 @@ struct mlx5_cqe64 {
u8 op_own;
};
struct mlx5_sig_err_cqe {
u8 rsvd0[16];
__be32 expected_trans_sig;
__be32 actual_trans_sig;
__be32 expected_reftag;
__be32 actual_reftag;
__be16 syndrome;
u8 rsvd22[2];
__be32 mkey;
__be64 err_offset;
u8 rsvd30[8];
__be32 qpn;
u8 rsvd38[2];
u8 signature;
u8 op_own;
};
struct mlx5_wqe_srq_next_seg {
u8 rsvd0[2];
__be16 next_wqe_index;
......@@ -936,4 +956,27 @@ enum {
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0
};
struct mlx5_allocate_psv_in {
struct mlx5_inbox_hdr hdr;
__be32 npsv_pd;
__be32 rsvd_psv0;
};
struct mlx5_allocate_psv_out {
struct mlx5_outbox_hdr hdr;
u8 rsvd[8];
__be32 psv_idx[4];
};
struct mlx5_destroy_psv_in {
struct mlx5_inbox_hdr hdr;
__be32 psv_number;
u8 rsvd[4];
};
struct mlx5_destroy_psv_out {
struct mlx5_outbox_hdr hdr;
u8 rsvd[8];
};
#endif /* MLX5_DEVICE_H */
......@@ -401,6 +401,26 @@ struct mlx5_eq {
struct mlx5_rsc_debug *dbg;
};
struct mlx5_core_psv {
u32 psv_idx;
struct psv_layout {
u32 pd;
u16 syndrome;
u16 reserved;
u16 bg;
u16 app_tag;
u32 ref_tag;
} psv;
};
struct mlx5_core_sig_ctx {
struct mlx5_core_psv psv_memory;
struct mlx5_core_psv psv_wire;
struct ib_sig_err err_item;
bool sig_status_checked;
bool sig_err_exists;
u32 sigerr_count;
};
struct mlx5_core_mr {
u64 iova;
......@@ -475,6 +495,13 @@ struct mlx5_srq_table {
struct radix_tree_root tree;
};
struct mlx5_mr_table {
/* protect radix tree
*/
rwlock_t lock;
struct radix_tree_root tree;
};
struct mlx5_priv {
char name[MLX5_MAX_NAME_LEN];
struct mlx5_eq_table eq_table;
......@@ -504,6 +531,10 @@ struct mlx5_priv {
struct mlx5_cq_table cq_table;
/* end: cq staff */
/* start: mr staff */
struct mlx5_mr_table mr_table;
/* end: mr staff */
/* start: alloc staff */
struct mutex pgdir_mutex;
struct list_head pgdir_list;
......@@ -651,6 +682,11 @@ static inline void mlx5_vfree(const void *addr)
kfree(addr);
}
static inline u32 mlx5_base_mkey(const u32 key)
{
return key & 0xffffff00u;
}
int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev);
void mlx5_dev_cleanup(struct mlx5_core_dev *dev);
int mlx5_cmd_init(struct mlx5_core_dev *dev);
......@@ -685,6 +721,8 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_query_srq_mbox_out *out);
int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq);
void mlx5_init_mr_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev);
int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
struct mlx5_create_mkey_mbox_in *in, int inlen,
mlx5_cmd_cbk_t callback, void *context,
......@@ -746,6 +784,9 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
const char *mlx5_command_str(int command);
int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
int npsvs, u32 *sig_index);
int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
static inline u32 mlx5_mkey_to_idx(u32 mkey)
{
......
......@@ -37,6 +37,9 @@
#include <linux/mlx5/driver.h>
#define MLX5_INVALID_LKEY 0x100
#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5)
#define MLX5_DIF_SIZE 8
#define MLX5_STRIDE_BLOCK_OP 0x400
enum mlx5_qp_optpar {
MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
......@@ -151,6 +154,11 @@ enum {
MLX5_SND_DBR = 1,
};
enum {
MLX5_FLAGS_INLINE = 1<<7,
MLX5_FLAGS_CHECK_FREE = 1<<5,
};
struct mlx5_wqe_fmr_seg {
__be32 flags;
__be32 mem_key;
......@@ -278,6 +286,60 @@ struct mlx5_wqe_inline_seg {
__be32 byte_count;
};
struct mlx5_bsf {
struct mlx5_bsf_basic {
u8 bsf_size_sbs;
u8 check_byte_mask;
union {
u8 copy_byte_mask;
u8 bs_selector;
u8 rsvd_wflags;
} wire;
union {
u8 bs_selector;
u8 rsvd_mflags;
} mem;
__be32 raw_data_size;
__be32 w_bfs_psv;
__be32 m_bfs_psv;
} basic;
struct mlx5_bsf_ext {
__be32 t_init_gen_pro_size;
__be32 rsvd_epi_size;
__be32 w_tfs_psv;
__be32 m_tfs_psv;
} ext;
struct mlx5_bsf_inl {
__be32 w_inl_vld;
__be32 w_rsvd;
__be64 w_block_format;
__be32 m_inl_vld;
__be32 m_rsvd;
__be64 m_block_format;
} inl;
};
struct mlx5_klm {
__be32 bcount;
__be32 key;
__be64 va;
};
struct mlx5_stride_block_entry {
__be16 stride;
__be16 bcount;
__be32 key;
__be64 va;
};
struct mlx5_stride_block_ctrl_seg {
__be32 bcount_per_cycle;
__be32 op;
__be32 repeat_count;
u16 rsvd;
__be16 num_entries;
};
struct mlx5_core_qp {
void (*event) (struct mlx5_core_qp *, int);
int qpn;
......@@ -444,6 +506,11 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
}
static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
{
return radix_tree_lookup(&dev->priv.mr_table.tree, key);
}
int mlx5_core_create_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp,
struct mlx5_create_qp_mbox_in *in,
......
......@@ -46,17 +46,12 @@ struct ib_umem {
int page_size;
int writable;
int hugetlb;
struct list_head chunk_list;
struct work_struct work;
struct mm_struct *mm;
unsigned long diff;
};
struct ib_umem_chunk {
struct list_head list;
int nents;
int nmap;
struct scatterlist page_list[0];
struct sg_table sg_head;
int nmap;
int npages;
};
#ifdef CONFIG_INFINIBAND_USER_MEM
......
......@@ -122,7 +122,19 @@ enum ib_device_cap_flags {
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29)
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
IB_DEVICE_SIGNATURE_HANDOVER = (1<<30)
};
enum ib_signature_prot_cap {
IB_PROT_T10DIF_TYPE_1 = 1,
IB_PROT_T10DIF_TYPE_2 = 1 << 1,
IB_PROT_T10DIF_TYPE_3 = 1 << 2,
};
enum ib_signature_guard_cap {
IB_GUARD_T10DIF_CRC = 1,
IB_GUARD_T10DIF_CSUM = 1 << 1,
};
enum ib_atomic_cap {
......@@ -172,6 +184,8 @@ struct ib_device_attr {
unsigned int max_fast_reg_page_list_len;
u16 max_pkeys;
u8 local_ca_ack_delay;
int sig_prot_cap;
int sig_guard_cap;
};
enum ib_mtu {
......@@ -461,6 +475,130 @@ int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
*/
int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
enum ib_mr_create_flags {
IB_MR_SIGNATURE_EN = 1,
};
/**
* ib_mr_init_attr - Memory region init attributes passed to routine
* ib_create_mr.
* @max_reg_descriptors: max number of registration descriptors that
* may be used with registration work requests.
* @flags: MR creation flags bit mask.
*/
struct ib_mr_init_attr {
int max_reg_descriptors;
u32 flags;
};
enum ib_signature_type {
IB_SIG_TYPE_T10_DIF,
};
/**
* T10-DIF Signature types
* T10-DIF types are defined by SCSI
* specifications.
*/
enum ib_t10_dif_type {
IB_T10DIF_NONE,
IB_T10DIF_TYPE1,
IB_T10DIF_TYPE2,
IB_T10DIF_TYPE3
};
/**
* Signature T10-DIF block-guard types
* IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
* IB_T10DIF_CSUM: Corresponds to IP checksum rules.
*/
enum ib_t10_dif_bg_type {
IB_T10DIF_CRC,
IB_T10DIF_CSUM
};
/**
* struct ib_t10_dif_domain - Parameters specific for T10-DIF
* domain.
* @type: T10-DIF type (0|1|2|3)
* @bg_type: T10-DIF block guard type (CRC|CSUM)
* @pi_interval: protection information interval.
* @bg: seed of guard computation.
* @app_tag: application tag of guard block
* @ref_tag: initial guard block reference tag.
* @type3_inc_reftag: T10-DIF type 3 does not state
* about the reference tag, it is the user
* choice to increment it or not.
*/
struct ib_t10_dif_domain {
enum ib_t10_dif_type type;
enum ib_t10_dif_bg_type bg_type;
u16 pi_interval;
u16 bg;
u16 app_tag;
u32 ref_tag;
bool type3_inc_reftag;
};
/**
* struct ib_sig_domain - Parameters for signature domain
* @sig_type: specific signauture type
* @sig: union of all signature domain attributes that may
* be used to set domain layout.
*/
struct ib_sig_domain {
enum ib_signature_type sig_type;
union {
struct ib_t10_dif_domain dif;
} sig;
};
/**
* struct ib_sig_attrs - Parameters for signature handover operation
* @check_mask: bitmask for signature byte check (8 bytes)
* @mem: memory domain layout desciptor.
* @wire: wire domain layout desciptor.
*/
struct ib_sig_attrs {
u8 check_mask;
struct ib_sig_domain mem;
struct ib_sig_domain wire;
};
enum ib_sig_err_type {
IB_SIG_BAD_GUARD,
IB_SIG_BAD_REFTAG,
IB_SIG_BAD_APPTAG,
};
/**
* struct ib_sig_err - signature error descriptor
*/
struct ib_sig_err {
enum ib_sig_err_type err_type;
u32 expected;
u32 actual;
u64 sig_err_offset;
u32 key;
};
enum ib_mr_status_check {
IB_MR_CHECK_SIG_STATUS = 1,
};
/**
* struct ib_mr_status - Memory region status container
*
* @fail_status: Bitmask of MR checks status. For each
* failed check a corresponding status bit is set.
* @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS
* failure.
*/
struct ib_mr_status {
u32 fail_status;
struct ib_sig_err sig_err;
};
/**
* mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
* enum.
......@@ -644,6 +782,7 @@ enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
IB_QP_CREATE_NETIF_QP = 1 << 5,
IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
......@@ -808,6 +947,7 @@ enum ib_wr_opcode {
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
IB_WR_BIND_MW,
IB_WR_REG_SIG_MR,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
*/
......@@ -913,6 +1053,12 @@ struct ib_send_wr {
u32 rkey;
struct ib_mw_bind_info bind_info;
} bind_mw;
struct {
struct ib_sig_attrs *sig_attrs;
struct ib_mr *sig_mr;
int access_flags;
struct ib_sge *prot;
} sig_handover;
} wr;
u32 xrc_remote_srq_num; /* XRC TGT QPs only */
};
......@@ -1407,6 +1553,9 @@ struct ib_device {
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
int (*destroy_mr)(struct ib_mr *mr);
struct ib_mr * (*create_mr)(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr);
struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
int max_page_list_len);
struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
......@@ -1455,6 +1604,8 @@ struct ib_device {
*flow_attr,
int domain);
int (*destroy_flow)(struct ib_flow *flow_id);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
struct ib_dma_mapping_ops *dma_ops;
......@@ -2250,6 +2401,25 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
*/
int ib_dereg_mr(struct ib_mr *mr);
/**
* ib_create_mr - Allocates a memory region that may be used for
* signature handover operations.
* @pd: The protection domain associated with the region.
* @mr_init_attr: memory region init attributes.
*/
struct ib_mr *ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr);
/**
* ib_destroy_mr - Destroys a memory region that was created using
* ib_create_mr and removes it from HW translation tables.
* @mr: The memory region to destroy.
*
* This function can fail, if the memory region has memory windows bound to it.
*/
int ib_destroy_mr(struct ib_mr *mr);
/**
* ib_alloc_fast_reg_mr - Allocates memory region usable with the
* IB_WR_FAST_REG_MR send work request.
......@@ -2435,4 +2605,19 @@ static inline int ib_check_mr_access(int flags)
return 0;
}
/**
* ib_check_mr_status: lightweight check of MR status.
* This routine may provide status checks on a selected
* ib_mr. first use is for signature status check.
*
* @mr: A memory region.
* @check_mask: Bitmask of which checks to perform from
* ib_mr_status_check enumeration.
* @mr_status: The container of relevant status checks.
* failed checks will be indicated in the status bitmask
* and the relevant info shall be in the error item.
*/
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
#endif /* IB_VERBS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment