Commit 7d0cc6ed authored by Artemy Kovalyov's avatar Artemy Kovalyov Committed by David S. Miller

IB/mlx5: Add MR cache for large UMR regions

In this change we turn mlx5_ib_update_mtt() into generic
mlx5_ib_update_xlt() to perfrom HCA translation table modifiactions
supporting both atomic and process contexts and not limited by number
of modified entries.
Using this function we increase preallocated MRs up to 16GB.
Signed-off-by: default avatarArtemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c438fde1
...@@ -1112,11 +1112,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, ...@@ -1112,11 +1112,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif #endif
context->upd_xlt_page = __get_free_page(GFP_KERNEL);
if (!context->upd_xlt_page) {
err = -ENOMEM;
goto out_uars;
}
mutex_init(&context->upd_xlt_page_mutex);
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_core_alloc_transport_domain(dev->mdev, err = mlx5_core_alloc_transport_domain(dev->mdev,
&context->tdn); &context->tdn);
if (err) if (err)
goto out_uars; goto out_page;
} }
INIT_LIST_HEAD(&context->vma_private_list); INIT_LIST_HEAD(&context->vma_private_list);
...@@ -1168,6 +1175,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, ...@@ -1168,6 +1175,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
out_page:
free_page(context->upd_xlt_page);
out_uars: out_uars:
for (i--; i >= 0; i--) for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index); mlx5_cmd_free_uar(dev->mdev, uars[i].index);
...@@ -1195,6 +1205,8 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) ...@@ -1195,6 +1205,8 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
free_page(context->upd_xlt_page);
for (i = 0; i < uuari->num_uars; i++) { for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
......
...@@ -159,7 +159,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, ...@@ -159,7 +159,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
unsigned long umem_page_shift = ilog2(umem->page_size); unsigned long umem_page_shift = ilog2(umem->page_size);
int shift = page_shift - umem_page_shift; int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1; int mask = (1 << shift) - 1;
int i, k; int i, k, idx;
u64 cur = 0; u64 cur = 0;
u64 base; u64 base;
int len; int len;
...@@ -185,18 +185,36 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, ...@@ -185,18 +185,36 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> umem_page_shift; len = sg_dma_len(sg) >> umem_page_shift;
base = sg_dma_address(sg); base = sg_dma_address(sg);
for (k = 0; k < len; k++) {
/* Skip elements below offset */
if (i + len < offset << shift) {
i += len;
continue;
}
/* Skip pages below offset */
if (i < offset << shift) {
k = (offset << shift) - i;
i = offset << shift;
} else {
k = 0;
}
for (; k < len; k++) {
if (!(i & mask)) { if (!(i & mask)) {
cur = base + (k << umem_page_shift); cur = base + (k << umem_page_shift);
cur |= access_flags; cur |= access_flags;
idx = (i >> shift) - offset;
pas[i >> shift] = cpu_to_be64(cur); pas[idx] = cpu_to_be64(cur);
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
i >> shift, be64_to_cpu(pas[i >> shift])); i >> shift, be64_to_cpu(pas[idx]));
} else }
mlx5_ib_dbg(dev, "=====> 0x%llx\n",
base + (k << umem_page_shift));
i++; i++;
/* Stop after num_pages reached */
if (i >> shift >= offset + num_pages)
return;
} }
} }
} }
......
...@@ -125,6 +125,10 @@ struct mlx5_ib_ucontext { ...@@ -125,6 +125,10 @@ struct mlx5_ib_ucontext {
/* Transport Domain number */ /* Transport Domain number */
u32 tdn; u32 tdn;
struct list_head vma_private_list; struct list_head vma_private_list;
unsigned long upd_xlt_page;
/* protect ODP/KSM */
struct mutex upd_xlt_page_mutex;
}; };
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
...@@ -192,6 +196,13 @@ struct mlx5_ib_flow_db { ...@@ -192,6 +196,13 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_UMR_OCTOWORD 16 #define MLX5_IB_UMR_OCTOWORD 16
#define MLX5_IB_UMR_XLT_ALIGNMENT 64 #define MLX5_IB_UMR_XLT_ALIGNMENT 64
#define MLX5_IB_UPD_XLT_ZAP BIT(0)
#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
#define MLX5_IB_UPD_XLT_ADDR BIT(3)
#define MLX5_IB_UPD_XLT_PD BIT(4)
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
* *
* These flags are intended for internal use by the mlx5_ib driver, and they * These flags are intended for internal use by the mlx5_ib driver, and they
...@@ -788,8 +799,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -788,8 +799,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata); struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int npages, int zap); int page_shift, int flags);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags, u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata); struct ib_pd *pd, struct ib_udata *udata);
......
This diff is collapsed.
...@@ -91,16 +91,21 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, ...@@ -91,16 +91,21 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
u64 umr_offset = idx & umr_block_mask; u64 umr_offset = idx & umr_block_mask;
if (in_block && umr_offset == 0) { if (in_block && umr_offset == 0) {
mlx5_ib_update_mtt(mr, blk_start_idx, mlx5_ib_update_xlt(mr, blk_start_idx,
idx - blk_start_idx, 1); idx - blk_start_idx,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0; in_block = 0;
} }
} }
} }
if (in_block) if (in_block)
mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1, mlx5_ib_update_xlt(mr, blk_start_idx,
1); idx - blk_start_idx + 1,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
/* /*
* We are now sure that the device will not access the * We are now sure that the device will not access the
* memory. We can safely unmap it, and mark it as dirty if * memory. We can safely unmap it, and mark it as dirty if
...@@ -257,7 +262,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp, ...@@ -257,7 +262,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
* this MR, since ib_umem_odp_map_dma_pages already * this MR, since ib_umem_odp_map_dma_pages already
* checks this. * checks this.
*/ */
ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0); ret = mlx5_ib_update_xlt(mr, start_idx, npages,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ATOMIC);
} else { } else {
ret = -EAGAIN; ret = -EAGAIN;
} }
......
...@@ -152,6 +152,26 @@ static struct mlx5_profile profile[] = { ...@@ -152,6 +152,26 @@ static struct mlx5_profile profile[] = {
.size = 8, .size = 8,
.limit = 4 .limit = 4
}, },
.mr_cache[16] = {
.size = 8,
.limit = 4
},
.mr_cache[17] = {
.size = 8,
.limit = 4
},
.mr_cache[18] = {
.size = 8,
.limit = 4
},
.mr_cache[19] = {
.size = 4,
.limit = 2
},
.mr_cache[20] = {
.size = 4,
.limit = 2
},
}, },
}; };
......
...@@ -959,7 +959,7 @@ enum { ...@@ -959,7 +959,7 @@ enum {
}; };
enum { enum {
MAX_MR_CACHE_ENTRIES = 16, MAX_MR_CACHE_ENTRIES = 21,
}; };
enum { enum {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment