Commit cc886c9f authored by Chuck Lever's avatar Chuck Lever Committed by Doug Ledford

svcrdma: Improve allocation of struct svc_rdma_op_ctxt

When the maximum payload size of NFS READ and WRITE was increased
by commit cc9a903d ("svcrdma: Change maximum server payload back
to RPCSVC_MAXPAYLOAD"), the size of struct svc_rdma_op_ctxt
increased to over 6KB (on x86_64). That makes allocating one of
these from a kmem_cache more likely to fail in situations when
system memory is exhausted.

Since I'm about to add a caller where this allocation must always
work _and_ it cannot sleep, pre-allocate ctxts for each connection.

Another motivation for this change is that NFSv4.x servers are
required by specification not to drop NFS requests. Pre-allocating
memory resources reduces the likelihood of a drop.
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Acked-by: default avatarBruce Fields <bfields@fieldses.org>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent ced4ac0c
...@@ -69,6 +69,7 @@ extern atomic_t rdma_stat_sq_prod; ...@@ -69,6 +69,7 @@ extern atomic_t rdma_stat_sq_prod;
* completes. * completes.
*/ */
struct svc_rdma_op_ctxt { struct svc_rdma_op_ctxt {
struct list_head free;
struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_op_ctxt *read_hdr;
struct svc_rdma_fastreg_mr *frmr; struct svc_rdma_fastreg_mr *frmr;
int hdr_count; int hdr_count;
...@@ -141,7 +142,10 @@ struct svcxprt_rdma { ...@@ -141,7 +142,10 @@ struct svcxprt_rdma {
struct ib_pd *sc_pd; struct ib_pd *sc_pd;
atomic_t sc_dma_used; atomic_t sc_dma_used;
atomic_t sc_ctxt_used; spinlock_t sc_ctxt_lock;
struct list_head sc_ctxts;
int sc_ctxt_used;
struct list_head sc_rq_dto_q; struct list_head sc_rq_dto_q;
spinlock_t sc_rq_dto_lock; spinlock_t sc_rq_dto_lock;
struct ib_qp *sc_qp; struct ib_qp *sc_qp;
......
...@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt) ...@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
} }
#endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */
struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
gfp_t flags)
{ {
struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_op_ctxt *ctxt;
ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, ctxt = kmalloc(sizeof(*ctxt), flags);
GFP_KERNEL | __GFP_NOFAIL); if (ctxt) {
ctxt->xprt = xprt; ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->dto_q); INIT_LIST_HEAD(&ctxt->free);
INIT_LIST_HEAD(&ctxt->dto_q);
}
return ctxt;
}
static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
{
int i;
/* Each RPC/RDMA credit can consume a number of send
* and receive WQEs. One ctxt is allocated for each.
*/
i = xprt->sc_sq_depth + xprt->sc_max_requests;
while (i--) {
struct svc_rdma_op_ctxt *ctxt;
ctxt = alloc_ctxt(xprt, GFP_KERNEL);
if (!ctxt) {
dprintk("svcrdma: No memory for RDMA ctxt\n");
return false;
}
list_add(&ctxt->free, &xprt->sc_ctxts);
}
return true;
}
struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt = NULL;
spin_lock_bh(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used++;
if (list_empty(&xprt->sc_ctxts))
goto out_empty;
ctxt = list_first_entry(&xprt->sc_ctxts,
struct svc_rdma_op_ctxt, free);
list_del_init(&ctxt->free);
spin_unlock_bh(&xprt->sc_ctxt_lock);
out:
ctxt->count = 0; ctxt->count = 0;
ctxt->frmr = NULL; ctxt->frmr = NULL;
atomic_inc(&xprt->sc_ctxt_used);
return ctxt; return ctxt;
out_empty:
/* Either pre-allocation missed the mark, or send
* queue accounting is broken.
*/
spin_unlock_bh(&xprt->sc_ctxt_lock);
ctxt = alloc_ctxt(xprt, GFP_NOIO);
if (ctxt)
goto out;
spin_lock_bh(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used--;
spin_unlock_bh(&xprt->sc_ctxt_lock);
WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
return NULL;
} }
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
...@@ -190,16 +248,29 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) ...@@ -190,16 +248,29 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
{ {
struct svcxprt_rdma *xprt; struct svcxprt_rdma *xprt = ctxt->xprt;
int i; int i;
xprt = ctxt->xprt;
if (free_pages) if (free_pages)
for (i = 0; i < ctxt->count; i++) for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]); put_page(ctxt->pages[i]);
kmem_cache_free(svc_rdma_ctxt_cachep, ctxt); spin_lock_bh(&xprt->sc_ctxt_lock);
atomic_dec(&xprt->sc_ctxt_used); xprt->sc_ctxt_used--;
list_add(&ctxt->free, &xprt->sc_ctxts);
spin_unlock_bh(&xprt->sc_ctxt_lock);
}
static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
{
while (!list_empty(&xprt->sc_ctxts)) {
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_first_entry(&xprt->sc_ctxts,
struct svc_rdma_op_ctxt, free);
list_del(&ctxt->free);
kfree(ctxt);
}
} }
/* /*
...@@ -521,11 +592,13 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, ...@@ -521,11 +592,13 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait); init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock);
spin_lock_init(&cma_xprt->sc_frmr_q_lock); spin_lock_init(&cma_xprt->sc_frmr_q_lock);
spin_lock_init(&cma_xprt->sc_ctxt_lock);
if (listener) if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
...@@ -913,6 +986,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ...@@ -913,6 +986,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
(size_t)svcrdma_max_requests); (size_t)svcrdma_max_requests);
newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
if (!svc_rdma_prealloc_ctxts(newxprt))
goto errout;
/* /*
* Limit ORD based on client limit, local device limit, and * Limit ORD based on client limit, local device limit, and
* configured svcrdma limit. * configured svcrdma limit.
...@@ -1174,15 +1250,15 @@ static void __svc_rdma_free(struct work_struct *work) ...@@ -1174,15 +1250,15 @@ static void __svc_rdma_free(struct work_struct *work)
} }
/* Warn if we leaked a resource or under-referenced */ /* Warn if we leaked a resource or under-referenced */
if (atomic_read(&rdma->sc_ctxt_used) != 0) if (rdma->sc_ctxt_used != 0)
pr_err("svcrdma: ctxt still in use? (%d)\n", pr_err("svcrdma: ctxt still in use? (%d)\n",
atomic_read(&rdma->sc_ctxt_used)); rdma->sc_ctxt_used);
if (atomic_read(&rdma->sc_dma_used) != 0) if (atomic_read(&rdma->sc_dma_used) != 0)
pr_err("svcrdma: dma still in use? (%d)\n", pr_err("svcrdma: dma still in use? (%d)\n",
atomic_read(&rdma->sc_dma_used)); atomic_read(&rdma->sc_dma_used));
/* De-allocate fastreg mr */
rdma_dealloc_frmr_q(rdma); rdma_dealloc_frmr_q(rdma);
svc_rdma_destroy_ctxts(rdma);
/* Destroy the QP if present (not a listener) */ /* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment