Commit 490ea596 authored by santosh.shilimkar@oracle.com's avatar santosh.shilimkar@oracle.com Committed by David S. Miller

RDS: IB: move FMR code to its own file

No functional change.
Signed-off-by: default avatarSantosh Shilimkar <ssantosh@kernel.org>
Signed-off-by: default avatarSantosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a69365a3
...@@ -37,61 +37,16 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) ...@@ -37,61 +37,16 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
struct rds_ib_mr_pool *pool; struct rds_ib_mr_pool *pool;
struct rds_ib_mr *ibmr = NULL; struct rds_ib_mr *ibmr = NULL;
struct rds_ib_fmr *fmr; struct rds_ib_fmr *fmr;
int err = 0, iter = 0; int err = 0;
if (npages <= RDS_MR_8K_MSG_SIZE) if (npages <= RDS_MR_8K_MSG_SIZE)
pool = rds_ibdev->mr_8k_pool; pool = rds_ibdev->mr_8k_pool;
else else
pool = rds_ibdev->mr_1m_pool; pool = rds_ibdev->mr_1m_pool;
if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) ibmr = rds_ib_try_reuse_ibmr(pool);
queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); if (ibmr)
return ibmr;
/* Switch pools if one of the pool is reaching upper limit */
if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
if (pool->pool_type == RDS_IB_MR_8K_POOL)
pool = rds_ibdev->mr_1m_pool;
else
pool = rds_ibdev->mr_8k_pool;
}
while (1) {
ibmr = rds_ib_reuse_mr(pool);
if (ibmr)
return ibmr;
/* No clean MRs - now we have the choice of either
* allocating a fresh MR up to the limit imposed by the
* driver, or flush any dirty unused MRs.
* We try to avoid stalling in the send path if possible,
* so we allocate as long as we're allowed to.
*
* We're fussy with enforcing the FMR limit, though. If the
* driver tells us we can't use more than N fmrs, we shouldn't
* start arguing with it
*/
if (atomic_inc_return(&pool->item_count) <= pool->max_items)
break;
atomic_dec(&pool->item_count);
if (++iter > 2) {
if (pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
return ERR_PTR(-EAGAIN);
}
/* We do have some empty MRs. Flush them out. */
if (pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
rds_ib_flush_mr_pool(pool, 0, &ibmr);
if (ibmr)
return ibmr;
}
ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
rdsibdev_to_node(rds_ibdev)); rdsibdev_to_node(rds_ibdev));
...@@ -218,3 +173,76 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, ...@@ -218,3 +173,76 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
return ret; return ret;
} }
struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *rds_ibdev,
struct scatterlist *sg,
unsigned long nents,
u32 *key)
{
struct rds_ib_mr *ibmr = NULL;
struct rds_ib_fmr *fmr;
int ret;
ibmr = rds_ib_alloc_fmr(rds_ibdev, nents);
if (IS_ERR(ibmr))
return ibmr;
ibmr->device = rds_ibdev;
fmr = &ibmr->u.fmr;
ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
if (ret == 0)
*key = fmr->fmr->rkey;
else
rds_ib_free_mr(ibmr, 0);
return ibmr;
}
void rds_ib_unreg_fmr(struct list_head *list, unsigned int *nfreed,
unsigned long *unpinned, unsigned int goal)
{
struct rds_ib_mr *ibmr, *next;
struct rds_ib_fmr *fmr;
LIST_HEAD(fmr_list);
int ret = 0;
unsigned int freed = *nfreed;
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
list_for_each_entry(ibmr, list, unmap_list) {
fmr = &ibmr->u.fmr;
list_add(&fmr->fmr->list, &fmr_list);
}
ret = ib_unmap_fmr(&fmr_list);
if (ret)
pr_warn("RDS/IB: FMR invalidation failed (err=%d)\n", ret);
/* Now we can destroy the DMA mapping and unpin any pages */
list_for_each_entry_safe(ibmr, next, list, unmap_list) {
fmr = &ibmr->u.fmr;
*unpinned += ibmr->sg_len;
__rds_ib_teardown_mr(ibmr);
if (freed < goal ||
ibmr->remap_count >= ibmr->pool->fmr_attr.max_maps) {
if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
list_del(&ibmr->unmap_list);
ib_dealloc_fmr(fmr->fmr);
kfree(ibmr);
freed++;
}
}
*nfreed = freed;
}
void rds_ib_free_fmr_list(struct rds_ib_mr *ibmr)
{
struct rds_ib_mr_pool *pool = ibmr->pool;
if (ibmr->remap_count >= pool->fmr_attr.max_maps)
llist_add(&ibmr->llnode, &pool->drop_list);
else
llist_add(&ibmr->llnode, &pool->free_list);
}
...@@ -113,4 +113,10 @@ int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *, ...@@ -113,4 +113,10 @@ int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *,
struct scatterlist *, unsigned int); struct scatterlist *, unsigned int);
struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *); struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *);
int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **); int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **);
struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *,
unsigned long, u32 *);
struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *);
void rds_ib_unreg_fmr(struct list_head *, unsigned int *,
unsigned long *, unsigned int);
void rds_ib_free_fmr_list(struct rds_ib_mr *);
#endif #endif
...@@ -333,15 +333,12 @@ static void list_to_llist_nodes(struct rds_ib_mr_pool *pool, ...@@ -333,15 +333,12 @@ static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
int free_all, struct rds_ib_mr **ibmr_ret) int free_all, struct rds_ib_mr **ibmr_ret)
{ {
struct rds_ib_mr *ibmr, *next; struct rds_ib_mr *ibmr;
struct rds_ib_fmr *fmr;
struct llist_node *clean_nodes; struct llist_node *clean_nodes;
struct llist_node *clean_tail; struct llist_node *clean_tail;
LIST_HEAD(unmap_list); LIST_HEAD(unmap_list);
LIST_HEAD(fmr_list);
unsigned long unpinned = 0; unsigned long unpinned = 0;
unsigned int nfreed = 0, dirty_to_clean = 0, free_goal; unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
int ret = 0;
if (pool->pool_type == RDS_IB_MR_8K_POOL) if (pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush); rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush);
...@@ -395,33 +392,7 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, ...@@ -395,33 +392,7 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
if (list_empty(&unmap_list)) if (list_empty(&unmap_list))
goto out; goto out;
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
list_for_each_entry(ibmr, &unmap_list, unmap_list) {
fmr = &ibmr->u.fmr;
list_add(&fmr->fmr->list, &fmr_list);
}
ret = ib_unmap_fmr(&fmr_list);
if (ret)
printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
/* Now we can destroy the DMA mapping and unpin any pages */
list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
unpinned += ibmr->sg_len;
fmr = &ibmr->u.fmr;
__rds_ib_teardown_mr(ibmr);
if (nfreed < free_goal ||
ibmr->remap_count >= pool->fmr_attr.max_maps) {
if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
list_del(&ibmr->unmap_list);
ib_dealloc_fmr(fmr->fmr);
kfree(ibmr);
nfreed++;
}
}
if (!list_empty(&unmap_list)) { if (!list_empty(&unmap_list)) {
/* we have to make sure that none of the things we're about /* we have to make sure that none of the things we're about
...@@ -454,7 +425,47 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, ...@@ -454,7 +425,47 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
if (waitqueue_active(&pool->flush_wait)) if (waitqueue_active(&pool->flush_wait))
wake_up(&pool->flush_wait); wake_up(&pool->flush_wait);
out_nolock: out_nolock:
return ret; return 0;
}
struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
{
struct rds_ib_mr *ibmr = NULL;
int iter = 0;
if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
while (1) {
ibmr = rds_ib_reuse_mr(pool);
if (ibmr)
return ibmr;
if (atomic_inc_return(&pool->item_count) <= pool->max_items)
break;
atomic_dec(&pool->item_count);
if (++iter > 2) {
if (pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
return ERR_PTR(-EAGAIN);
}
/* We do have some empty MRs. Flush them out. */
if (pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
rds_ib_flush_mr_pool(pool, 0, &ibmr);
if (ibmr)
return ibmr;
}
return ibmr;
} }
static void rds_ib_mr_pool_flush_worker(struct work_struct *work) static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
...@@ -473,10 +484,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) ...@@ -473,10 +484,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
/* Return it to the pool's free list */ /* Return it to the pool's free list */
if (ibmr->remap_count >= pool->fmr_attr.max_maps) rds_ib_free_fmr_list(ibmr);
llist_add(&ibmr->llnode, &pool->drop_list);
else
llist_add(&ibmr->llnode, &pool->free_list);
atomic_add(ibmr->sg_len, &pool->free_pinned); atomic_add(ibmr->sg_len, &pool->free_pinned);
atomic_inc(&pool->dirty_count); atomic_inc(&pool->dirty_count);
...@@ -521,7 +529,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, ...@@ -521,7 +529,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
{ {
struct rds_ib_device *rds_ibdev; struct rds_ib_device *rds_ibdev;
struct rds_ib_mr *ibmr = NULL; struct rds_ib_mr *ibmr = NULL;
struct rds_ib_fmr *fmr;
int ret; int ret;
rds_ibdev = rds_ib_get_device(rs->rs_bound_addr); rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
...@@ -535,30 +542,17 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, ...@@ -535,30 +542,17 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
goto out; goto out;
} }
ibmr = rds_ib_alloc_fmr(rds_ibdev, nents); ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret);
if (IS_ERR(ibmr)) { if (ibmr)
rds_ib_dev_put(rds_ibdev); rds_ibdev = NULL;
return ibmr;
}
fmr = &ibmr->u.fmr;
ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
if (ret == 0)
*key_ret = fmr->fmr->rkey;
else
printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret);
ibmr->device = rds_ibdev;
rds_ibdev = NULL;
out: out:
if (ret) { if (!ibmr)
if (ibmr) pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
rds_ib_free_mr(ibmr, 0);
ibmr = ERR_PTR(ret);
}
if (rds_ibdev) if (rds_ibdev)
rds_ib_dev_put(rds_ibdev); rds_ib_dev_put(rds_ibdev);
return ibmr; return ibmr;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment