Commit a665aca8 authored by Jason Gunthorpe's avatar Jason Gunthorpe

RDMA/umem: Split ib_umem_num_pages() into ib_umem_num_dma_blocks()

ib_umem_num_pages() should only be used by things working with the SGL in
CPU pages directly.

Drivers building DMA lists should use the new ib_num_dma_blocks() which
returns the number of blocks rdma_umem_for_each_block() will return.

To make this general for DMA drivers requires a different implementation.
Computing DMA block count based on umem->address only works if the
requested page size is < PAGE_SIZE and/or the IOVA == umem->address.

Instead the number of DMA pages should be computed in the IOVA address
space, not umem->address. Thus the IOVA has to be stored inside the umem
so it can be used for these calculations.

For now set it to umem->address by default and fix it up if
ib_umem_find_best_pgsz() was called. This allows drivers to be converted
to ib_umem_num_dma_blocks() safely.

Link: https://lore.kernel.org/r/6-v2-270386b7e60b+28f4-umem_1_jgg@nvidia.comSigned-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 89603f7e
...@@ -161,7 +161,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, ...@@ -161,7 +161,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0)))) if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
return 0; return 0;
va = virt; umem->iova = va = virt;
/* The best result is the smallest page size that results in the minimum /* The best result is the smallest page size that results in the minimum
* number of required pages. Compute the largest page size that could * number of required pages. Compute the largest page size that could
* work based on VA address bits that don't change. * work based on VA address bits that don't change.
...@@ -240,6 +240,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, ...@@ -240,6 +240,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
umem->ibdev = device; umem->ibdev = device;
umem->length = size; umem->length = size;
umem->address = addr; umem->address = addr;
/*
* Drivers should call ib_umem_find_best_pgsz() to set the iova
* correctly.
*/
umem->iova = addr;
umem->writable = ib_access_writable(access); umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm; umem->owning_mm = mm = current->mm;
mmgrab(mm); mmgrab(mm);
......
...@@ -548,7 +548,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -548,7 +548,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = PAGE_SHIFT; shift = PAGE_SHIFT;
n = ib_umem_num_pages(mhp->umem); n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift);
err = alloc_pbl(mhp, n); err = alloc_pbl(mhp, n);
if (err) if (err)
goto err_umem_release; goto err_umem_release;
......
...@@ -169,8 +169,8 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, ...@@ -169,8 +169,8 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, __be64 *pas, int access_flags) int page_shift, __be64 *pas, int access_flags)
{ {
return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
ib_umem_num_pages(umem), pas, ib_umem_num_dma_blocks(umem, PAGE_SIZE),
access_flags); pas, access_flags);
} }
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
{ {
......
...@@ -881,7 +881,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -881,7 +881,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err; goto err;
} }
n = ib_umem_num_pages(mr->umem); n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE);
mr->mtt = mthca_alloc_mtt(dev, n); mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) { if (IS_ERR(mr->mtt)) {
......
...@@ -133,7 +133,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -133,7 +133,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_CAST(umem); return ERR_CAST(umem);
} }
npages = ib_umem_num_pages(umem); npages = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
npages); npages);
......
...@@ -17,6 +17,7 @@ struct ib_umem_odp; ...@@ -17,6 +17,7 @@ struct ib_umem_odp;
struct ib_umem { struct ib_umem {
struct ib_device *ibdev; struct ib_device *ibdev;
struct mm_struct *owning_mm; struct mm_struct *owning_mm;
u64 iova;
size_t length; size_t length;
unsigned long address; unsigned long address;
u32 writable : 1; u32 writable : 1;
...@@ -33,11 +34,17 @@ static inline int ib_umem_offset(struct ib_umem *umem) ...@@ -33,11 +34,17 @@ static inline int ib_umem_offset(struct ib_umem *umem)
return umem->address & ~PAGE_MASK; return umem->address & ~PAGE_MASK;
} }
static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
unsigned long pgsz)
{
return (size_t)((ALIGN(umem->iova + umem->length, pgsz) -
ALIGN_DOWN(umem->iova, pgsz))) /
pgsz;
}
static inline size_t ib_umem_num_pages(struct ib_umem *umem) static inline size_t ib_umem_num_pages(struct ib_umem *umem)
{ {
return (ALIGN(umem->address + umem->length, PAGE_SIZE) - return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
PAGE_SHIFT;
} }
static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
...@@ -55,6 +62,8 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, ...@@ -55,6 +62,8 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
* pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The
* returned DMA blocks will be aligned to pgsz and span the range: * returned DMA blocks will be aligned to pgsz and span the range:
* ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz) * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz)
*
* Performs exactly ib_umem_num_dma_blocks() iterations.
*/ */
#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ #define rdma_umem_for_each_dma_block(umem, biter, pgsz) \
for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment