Commit 81713d37 authored by Artemy Kovalyov's avatar Artemy Kovalyov Committed by Doug Ledford

IB/mlx5: Add implicit MR support

Add implicit MR, covering entire user address space.
The MR is implemented as an indirect KSM MR consisting of
1GB direct MRs.
Pages and direct MRs are added/removed to MR by ODP.
Signed-off-by: default avatarArtemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 49780d42
...@@ -3583,6 +3583,8 @@ static int __init mlx5_ib_init(void) ...@@ -3583,6 +3583,8 @@ static int __init mlx5_ib_init(void)
{ {
int err; int err;
mlx5_ib_odp_init();
err = mlx5_register_interface(&mlx5_ib_interface); err = mlx5_register_interface(&mlx5_ib_interface);
return err; return err;
......
...@@ -202,6 +202,7 @@ struct mlx5_ib_flow_db { ...@@ -202,6 +202,7 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_UPD_XLT_ADDR BIT(3) #define MLX5_IB_UPD_XLT_ADDR BIT(3)
#define MLX5_IB_UPD_XLT_PD BIT(4) #define MLX5_IB_UPD_XLT_PD BIT(4)
#define MLX5_IB_UPD_XLT_ACCESS BIT(5) #define MLX5_IB_UPD_XLT_ACCESS BIT(5)
#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
* *
...@@ -503,6 +504,10 @@ struct mlx5_ib_mr { ...@@ -503,6 +504,10 @@ struct mlx5_ib_mr {
int live; int live;
void *descs_alloc; void *descs_alloc;
int access_flags; /* Needed for rereg MR */ int access_flags; /* Needed for rereg MR */
struct mlx5_ib_mr *parent;
atomic_t num_leaf_free;
wait_queue_head_t q_leaf_free;
}; };
struct mlx5_ib_mw { struct mlx5_ib_mw {
...@@ -637,6 +642,7 @@ struct mlx5_ib_dev { ...@@ -637,6 +642,7 @@ struct mlx5_ib_dev {
* being used by a page fault handler. * being used by a page fault handler.
*/ */
struct srcu_struct mr_srcu; struct srcu_struct mr_srcu;
u32 null_mkey;
#endif #endif
struct mlx5_ib_flow_db flow_db; struct mlx5_ib_flow_db flow_db;
/* protect resources needed as part of reset flow */ /* protect resources needed as part of reset flow */
...@@ -789,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, ...@@ -789,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags); int page_shift, int flags);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags, u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata); struct ib_pd *pd, struct ib_udata *udata);
...@@ -868,6 +877,9 @@ int __init mlx5_ib_odp_init(void); ...@@ -868,6 +877,9 @@ int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void); void mlx5_ib_odp_cleanup(void);
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end); unsigned long end);
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr, int flags);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{ {
...@@ -875,9 +887,13 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) ...@@ -875,9 +887,13 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
} }
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; } static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr,
int flags) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
......
...@@ -469,7 +469,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) ...@@ -469,7 +469,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
spin_unlock_irq(&ent->lock); spin_unlock_irq(&ent->lock);
err = add_keys(dev, entry, 1); err = add_keys(dev, entry, 1);
if (err) if (err && err != -EAGAIN)
return ERR_PTR(err); return ERR_PTR(err);
wait_for_completion(&ent->compl); wait_for_completion(&ent->compl);
...@@ -669,8 +669,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ...@@ -669,8 +669,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
queue_work(cache->wq, &ent->work); queue_work(cache->wq, &ent->work);
if (i > MAX_UMR_CACHE_ENTRY) if (i > MAX_UMR_CACHE_ENTRY) {
mlx5_odp_init_mr_cache_entry(ent);
continue; continue;
}
if (!use_umr(dev, ent->order)) if (!use_umr(dev, ent->order))
continue; continue;
...@@ -935,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, ...@@ -935,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
{ {
struct mlx5_ib_dev *dev = mr->dev; struct mlx5_ib_dev *dev = mr->dev;
struct ib_umem *umem = mr->umem; struct ib_umem *umem = mr->umem;
if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
return npages;
}
npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
...@@ -968,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, ...@@ -968,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
struct mlx5_umr_wr wr; struct mlx5_umr_wr wr;
struct ib_sge sg; struct ib_sge sg;
int err = 0; int err = 0;
int desc_size = sizeof(struct mlx5_mtt); int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
? sizeof(struct mlx5_klm)
: sizeof(struct mlx5_mtt);
const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
const int page_mask = page_align - 1; const int page_mask = page_align - 1;
size_t pages_mapped = 0; size_t pages_mapped = 0;
...@@ -1186,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -1186,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags); start, virt_addr, length, access_flags);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (!start && length == U64_MAX) {
if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return ERR_PTR(-EINVAL);
mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
return &mr->ibmr;
}
#endif
err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
&page_shift, &ncont, &order); &page_shift, &ncont, &order);
...@@ -1471,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) ...@@ -1471,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
/* Wait for all running page-fault handlers to finish. */ /* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu); synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */ /* Destroy all page mappings */
mlx5_ib_invalidate_range(umem, ib_umem_start(umem), if (umem->odp_data->page_list)
ib_umem_end(umem)); mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
ib_umem_end(umem));
else
mlx5_ib_free_implicit_mr(mr);
/* /*
* We kill the umem before the MR for ODP, * We kill the umem before the MR for ODP,
* so that there will not be any invalidations in * so that there will not be any invalidations in
......
This diff is collapsed.
...@@ -1053,6 +1053,8 @@ enum { ...@@ -1053,6 +1053,8 @@ enum {
enum { enum {
MAX_UMR_CACHE_ENTRY = 20, MAX_UMR_CACHE_ENTRY = 20,
MLX5_IMR_MTT_CACHE_ENTRY,
MLX5_IMR_KSM_CACHE_ENTRY,
MAX_MR_CACHE_ENTRIES MAX_MR_CACHE_ENTRIES
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment