Commit fd7dbf03 authored by Jason Gunthorpe's avatar Jason Gunthorpe

RDMA/odp: Make it clearer when a umem is an implicit ODP umem

Implicit ODP umems are special, they don't have any page lists, they don't
exist in the interval tree and they are never DMA mapped.

Instead of trying to guess this based on a zero length use an explicit
flag.

Further, do not allow non-implicit umems to be 0 size.

Link: https://lore.kernel.org/r/20190819111710.18440-4-leon@kernel.orgSigned-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent f993de88
...@@ -176,18 +176,15 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) ...@@ -176,18 +176,15 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
down_write(&per_mm->umem_rwsem); down_write(&per_mm->umem_rwsem);
if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) {
/* /*
* Note that the representation of the intervals in the * Note that the representation of the intervals in the interval tree
* interval tree considers the ending point as contained in * considers the ending point as contained in the interval, while the
* the interval, while the function ib_umem_end returns the * function ib_umem_end returns the first address which is not
* first address which is not contained in the umem. * contained in the umem.
*/ */
umem_odp->interval_tree.start = ib_umem_start(umem_odp); umem_odp->interval_tree.start = ib_umem_start(umem_odp);
umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1; umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1;
interval_tree_insert(&umem_odp->interval_tree, interval_tree_insert(&umem_odp->interval_tree, &per_mm->umem_tree);
&per_mm->umem_tree);
}
up_write(&per_mm->umem_rwsem); up_write(&per_mm->umem_rwsem);
} }
...@@ -196,11 +193,8 @@ static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp) ...@@ -196,11 +193,8 @@ static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
down_write(&per_mm->umem_rwsem); down_write(&per_mm->umem_rwsem);
if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) interval_tree_remove(&umem_odp->interval_tree, &per_mm->umem_tree);
interval_tree_remove(&umem_odp->interval_tree,
&per_mm->umem_tree);
complete_all(&umem_odp->notifier_completion); complete_all(&umem_odp->notifier_completion);
up_write(&per_mm->umem_rwsem); up_write(&per_mm->umem_rwsem);
} }
...@@ -320,6 +314,9 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, ...@@ -320,6 +314,9 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
int pages = size >> PAGE_SHIFT; int pages = size >> PAGE_SHIFT;
int ret; int ret;
if (!size)
return ERR_PTR(-EINVAL);
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!odp_data) if (!odp_data)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -381,6 +378,9 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) ...@@ -381,6 +378,9 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
struct mm_struct *mm = umem->owning_mm; struct mm_struct *mm = umem->owning_mm;
int ret_val; int ret_val;
if (umem_odp->umem.address == 0 && umem_odp->umem.length == 0)
umem_odp->is_implicit_odp = 1;
umem_odp->page_shift = PAGE_SHIFT; umem_odp->page_shift = PAGE_SHIFT;
if (access & IB_ACCESS_HUGETLB) { if (access & IB_ACCESS_HUGETLB) {
struct vm_area_struct *vma; struct vm_area_struct *vma;
...@@ -401,7 +401,10 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) ...@@ -401,7 +401,10 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
init_completion(&umem_odp->notifier_completion); init_completion(&umem_odp->notifier_completion);
if (ib_umem_odp_num_pages(umem_odp)) { if (!umem_odp->is_implicit_odp) {
if (!ib_umem_odp_num_pages(umem_odp))
return -EINVAL;
umem_odp->page_list = umem_odp->page_list =
vzalloc(array_size(sizeof(*umem_odp->page_list), vzalloc(array_size(sizeof(*umem_odp->page_list),
ib_umem_odp_num_pages(umem_odp))); ib_umem_odp_num_pages(umem_odp)));
...@@ -420,6 +423,8 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) ...@@ -420,6 +423,8 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
ret_val = get_per_mm(umem_odp); ret_val = get_per_mm(umem_odp);
if (ret_val) if (ret_val)
goto out_dma_list; goto out_dma_list;
if (!umem_odp->is_implicit_odp)
add_umem_to_per_mm(umem_odp); add_umem_to_per_mm(umem_odp);
return 0; return 0;
...@@ -439,13 +444,14 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) ...@@ -439,13 +444,14 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
* It is the driver's responsibility to ensure, before calling us, * It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more. * that the hardware will not attempt to access the MR any more.
*/ */
if (!umem_odp->is_implicit_odp) {
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
ib_umem_end(umem_odp)); ib_umem_end(umem_odp));
remove_umem_from_per_mm(umem_odp); remove_umem_from_per_mm(umem_odp);
put_per_mm(umem_odp);
vfree(umem_odp->dma_list); vfree(umem_odp->dma_list);
vfree(umem_odp->page_list); vfree(umem_odp->page_list);
}
put_per_mm(umem_odp);
} }
/* /*
......
...@@ -1600,7 +1600,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) ...@@ -1600,7 +1600,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
/* Wait for all running page-fault handlers to finish. */ /* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu); synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */ /* Destroy all page mappings */
if (umem_odp->page_list) if (!umem_odp->is_implicit_odp)
mlx5_ib_invalidate_range(umem_odp, mlx5_ib_invalidate_range(umem_odp,
ib_umem_start(umem_odp), ib_umem_start(umem_odp),
ib_umem_end(umem_odp)); ib_umem_end(umem_odp));
......
...@@ -584,7 +584,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, ...@@ -584,7 +584,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
struct ib_umem_odp *odp; struct ib_umem_odp *odp;
size_t size; size_t size;
if (!odp_mr->page_list) { if (odp_mr->is_implicit_odp) {
odp = implicit_mr_get_data(mr, io_virt, bcnt); odp = implicit_mr_get_data(mr, io_virt, bcnt);
if (IS_ERR(odp)) if (IS_ERR(odp))
......
...@@ -69,6 +69,14 @@ struct ib_umem_odp { ...@@ -69,6 +69,14 @@ struct ib_umem_odp {
/* Tree tracking */ /* Tree tracking */
struct interval_tree_node interval_tree; struct interval_tree_node interval_tree;
/*
* An implicit odp umem cannot be DMA mapped, has 0 length, and serves
* only as an anchor for the driver to hold onto the per_mm. FIXME:
* This should be removed and drivers should work with the per_mm
* directly.
*/
bool is_implicit_odp;
struct completion notifier_completion; struct completion notifier_completion;
int dying; int dying;
unsigned int page_shift; unsigned int page_shift;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment