Commit 8c6d097d authored by Michael Guralnik's avatar Michael Guralnik Committed by Leon Romanovsky

RDMA/mlx5: Enforce umem boundaries for explicit ODP page faults

The new memory scheme page faults are requesting the driver to fetch
additinal pages to the faulted memory access.
This is done in order to prefetch pages before and after the area that
got the page fault, assuming this will reduce the total amount of page
faults.

The driver should ensure it handles only the pages that are within the
umem range.
Signed-off-by: default avatarMichael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909100504.29797-5-michaelgur@nvidia.comSigned-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parent 64c68385
...@@ -748,24 +748,31 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, ...@@ -748,24 +748,31 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
* >0: Number of pages mapped * >0: Number of pages mapped
*/ */
static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
u32 *bytes_mapped, u32 flags) u32 *bytes_mapped, u32 flags, bool permissive_fault)
{ {
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
if (unlikely(io_virt < mr->ibmr.iova)) if (unlikely(io_virt < mr->ibmr.iova) && !permissive_fault)
return -EFAULT; return -EFAULT;
if (mr->umem->is_dmabuf) if (mr->umem->is_dmabuf)
return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags); return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags);
if (!odp->is_implicit_odp) { if (!odp->is_implicit_odp) {
u64 offset = io_virt < mr->ibmr.iova ? 0 : io_virt - mr->ibmr.iova;
u64 user_va; u64 user_va;
if (check_add_overflow(io_virt - mr->ibmr.iova, if (check_add_overflow(offset, (u64)odp->umem.address,
(u64)odp->umem.address, &user_va)) &user_va))
return -EFAULT; return -EFAULT;
if (unlikely(user_va >= ib_umem_end(odp) ||
ib_umem_end(odp) - user_va < bcnt)) if (permissive_fault) {
if (user_va < ib_umem_start(odp))
user_va = ib_umem_start(odp);
if ((user_va + bcnt) > ib_umem_end(odp))
bcnt = ib_umem_end(odp) - user_va;
} else if (unlikely(user_va >= ib_umem_end(odp) ||
ib_umem_end(odp) - user_va < bcnt))
return -EFAULT; return -EFAULT;
return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped, return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped,
flags); flags);
...@@ -872,7 +879,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, ...@@ -872,7 +879,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
case MLX5_MKEY_MR: case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0, false);
if (ret < 0) if (ret < 0)
goto end; goto end;
...@@ -1727,7 +1734,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) ...@@ -1727,7 +1734,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
for (i = 0; i < work->num_sge; ++i) { for (i = 0; i < work->num_sge; ++i) {
ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
work->frags[i].length, &bytes_mapped, work->frags[i].length, &bytes_mapped,
work->pf_flags); work->pf_flags, false);
if (ret <= 0) if (ret <= 0)
continue; continue;
mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
...@@ -1778,7 +1785,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, ...@@ -1778,7 +1785,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
if (IS_ERR(mr)) if (IS_ERR(mr))
return PTR_ERR(mr); return PTR_ERR(mr);
ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length,
&bytes_mapped, pf_flags); &bytes_mapped, pf_flags, false);
if (ret < 0) { if (ret < 0) {
mlx5r_deref_odp_mkey(&mr->mmkey); mlx5r_deref_odp_mkey(&mr->mmkey);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment