Commit 3d5f3c54 authored by Jason Gunthorpe's avatar Jason Gunthorpe

RDMA/mlx5: Rework implicit_mr_get_data

This function is intended to loop across each MTT chunk in the implicit
parent that intersects the range [io_virt, io_virt+bnct).  But it is has a
confusing construction, so:

- Consistently use imr and odp_imr to refer to the implicit parent
  to avoid confusion with the normal mr and odp of the child
- Directly compute the inclusive start/end indexes by shifting. This is
  clearer to understand the intent and avoids any errors from unaligned
  values of addr
- Iterate directly over the range of MTT indexes, do not make a loop
  out of goto
- Follow 'success oriented flow', with goto error unwind
- Directly calculate the range of idx's that need update_xlt
- Ensure that any leaf MR added to the interval tree always results in an
  update to the XLT

Link: https://lore.kernel.org/r/20191009160934.3143-6-jgg@ziepe.caReviewed-by: default avatarArtemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 74bddb36
...@@ -479,78 +479,93 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, ...@@ -479,78 +479,93 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
return ERR_PTR(err); return ERR_PTR(err);
} }
static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
u64 io_virt, size_t bcnt) unsigned long idx)
{ {
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); struct ib_umem_odp *odp;
struct ib_umem_odp *odp, *result = NULL;
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
u64 addr = io_virt & MLX5_IMR_MTT_MASK;
int nentries = 0, start_idx = 0, ret;
struct mlx5_ib_mr *mtt; struct mlx5_ib_mr *mtt;
mutex_lock(&odp_mr->umem_mutex); odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem),
odp = odp_lookup(addr, 1, mr); idx * MLX5_IMR_MTT_SIZE,
MLX5_IMR_MTT_SIZE);
if (IS_ERR(odp))
return ERR_CAST(odp);
mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", mtt = implicit_mr_alloc(imr->ibmr.pd, odp, 0, imr->access_flags);
io_virt, bcnt, addr, odp); if (IS_ERR(mtt)) {
ib_umem_odp_release(odp);
return mtt;
}
next_mr: odp->private = mtt;
if (likely(odp)) { mtt->umem = &odp->umem;
if (nentries) mtt->mmkey.iova = idx * MLX5_IMR_MTT_SIZE;
nentries++; mtt->parent = imr;
} else { INIT_WORK(&odp->work, mr_leaf_free_action);
odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE);
if (IS_ERR(odp)) {
mutex_unlock(&odp_mr->umem_mutex);
return ERR_CAST(odp);
}
mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, xa_store(&mtt->dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key),
mr->access_flags); &mtt->mmkey, GFP_ATOMIC);
if (IS_ERR(mtt)) { return mtt;
mutex_unlock(&odp_mr->umem_mutex); }
ib_umem_odp_release(odp);
return ERR_CAST(mtt);
}
odp->private = mtt; static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr,
mtt->umem = &odp->umem; u64 io_virt, size_t bcnt)
mtt->mmkey.iova = addr; {
mtt->parent = mr; struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
INIT_WORK(&odp->work, mr_leaf_free_action); unsigned long end_idx = (io_virt + bcnt - 1) >> MLX5_IMR_MTT_SHIFT;
unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT;
unsigned long inv_start_idx = end_idx + 1;
unsigned long inv_len = 0;
struct ib_umem_odp *result = NULL;
struct ib_umem_odp *odp;
int ret;
xa_store(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), mutex_lock(&odp_imr->umem_mutex);
&mtt->mmkey, GFP_ATOMIC); odp = odp_lookup(idx * MLX5_IMR_MTT_SIZE, 1, imr);
for (idx = idx; idx <= end_idx; idx++) {
if (unlikely(!odp)) {
struct mlx5_ib_mr *mtt;
if (!nentries) mtt = implicit_get_child_mr(imr, idx);
start_idx = addr >> MLX5_IMR_MTT_SHIFT; if (IS_ERR(mtt)) {
nentries++; result = ERR_CAST(mtt);
} goto out;
}
odp = to_ib_umem_odp(mtt->umem);
inv_start_idx = min(inv_start_idx, idx);
inv_len = idx - inv_start_idx + 1;
}
/* Return first odp if region not covered by single one */ /* Return first odp if region not covered by single one */
if (likely(!result)) if (likely(!result))
result = odp; result = odp;
addr += MLX5_IMR_MTT_SIZE;
if (unlikely(addr < io_virt + bcnt)) {
odp = odp_next(odp); odp = odp_next(odp);
if (odp && ib_umem_start(odp) != addr) if (odp && ib_umem_start(odp) != idx * MLX5_IMR_MTT_SIZE)
odp = NULL; odp = NULL;
goto next_mr;
} }
if (unlikely(nentries)) { /*
ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0, * Any time the children in the interval tree are changed we must
MLX5_IB_UPD_XLT_INDIRECT | * perform an update of the xlt before exiting to ensure the HW and
* the tree remains synchronized.
*/
out:
if (likely(!inv_len))
goto out_unlock;
ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC); MLX5_IB_UPD_XLT_ATOMIC);
if (ret) { if (ret) {
mlx5_ib_err(dev, "Failed to update PAS\n"); mlx5_ib_err(to_mdev(imr->ibmr.pd->device),
result = ERR_PTR(ret); "Failed to update PAS\n");
} result = ERR_PTR(ret);
goto out_unlock;
} }
mutex_unlock(&odp_mr->umem_mutex); out_unlock:
mutex_unlock(&odp_imr->umem_mutex);
return result; return result;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment