Commit edffb84c authored by Trond Myklebust's avatar Trond Myklebust

Merge tag 'nfs-rdma-for-5.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs into linux-next

NFSoRDmA Client updates for Linux 5.11

Cleanups and improvements:
  - Remove use of raw kernel memory addresses in tracepoints
  - Replace dprintk() call sites in ERR_CHUNK path
  - Trace unmap sync calls
  - Optimize MR DMA-unmapping
Signed-off-by: default avatarTrond Myklebust <trond.myklebust@hammerspace.com>
parents 5c3485bb 7a03aeb6
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2015-2020, Oracle and/or its affiliates.
*
* Support for backward direction RPCs on RPC/RDMA.
*/
......@@ -82,7 +82,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
&rqst->rq_snd_buf, rpcrdma_noch_pullup))
return -EIO;
trace_xprtrdma_cb_reply(rqst);
trace_xprtrdma_cb_reply(r_xprt, rqst);
return 0;
}
......@@ -260,7 +260,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
*/
req = rpcr_to_rdmar(rqst);
req->rl_reply = rep;
trace_xprtrdma_cb_call(rqst);
trace_xprtrdma_cb_call(r_xprt, rqst);
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
......
......@@ -65,18 +65,23 @@ void frwr_release_mr(struct rpcrdma_mr *mr)
kfree(mr);
}
static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
{
if (mr->mr_device) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(mr->mr_device, mr->mr_sg, mr->mr_nents,
mr->mr_dir);
mr->mr_device = NULL;
}
}
static void frwr_mr_recycle(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
trace_xprtrdma_mr_recycle(mr);
if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
mr->mr_dir = DMA_NONE;
}
frwr_mr_unmap(r_xprt, mr);
spin_lock(&r_xprt->rx_buf.rb_lock);
list_del(&mr->mr_all);
......@@ -86,6 +91,16 @@ static void frwr_mr_recycle(struct rpcrdma_mr *mr)
frwr_release_mr(mr);
}
static void frwr_mr_put(struct rpcrdma_mr *mr)
{
frwr_mr_unmap(mr->mr_xprt, mr);
/* The MR is returned to the req's MR free list instead
* of to the xprt's MR free list. No spinlock is needed.
*/
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
/* frwr_reset - Place MRs back on the free list
* @req: request to reset
*
......@@ -101,7 +116,7 @@ void frwr_reset(struct rpcrdma_req *req)
struct rpcrdma_mr *mr;
while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
rpcrdma_mr_put(mr);
frwr_mr_put(mr);
}
/**
......@@ -130,7 +145,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
mr->mr_xprt = r_xprt;
mr->frwr.fr_mr = frmr;
mr->mr_dir = DMA_NONE;
mr->mr_device = NULL;
INIT_LIST_HEAD(&mr->mr_list);
init_completion(&mr->frwr.fr_linv_done);
......@@ -315,6 +330,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
mr->mr_dir);
if (!dma_nents)
goto out_dmamap_err;
mr->mr_device = ep->re_id->device;
ibmr = mr->frwr.fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
......@@ -341,7 +357,6 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
return seg;
out_dmamap_err:
mr->mr_dir = DMA_NONE;
trace_xprtrdma_frwr_sgerr(mr, i);
return ERR_PTR(-EIO);
......@@ -363,12 +378,21 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_fastreg(wc, frwr);
trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid);
/* The MR will get recycled when the associated req is retransmitted */
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
static void frwr_cid_init(struct rpcrdma_ep *ep,
struct rpcrdma_frwr *frwr)
{
struct rpc_rdma_cid *cid = &frwr->fr_cid;
cid->ci_queue_id = ep->re_attr.send_cq->res.id;
cid->ci_completion_id = frwr->fr_mr->res.id;
}
/**
* frwr_send - post Send WRs containing the RPC Call message
* @r_xprt: controlling transport instance
......@@ -385,6 +409,7 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
*/
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr;
......@@ -395,6 +420,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_fastreg;
frwr_cid_init(ep, frwr);
frwr->fr_regwr.wr.next = post_wr;
frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
frwr->fr_regwr.wr.num_sge = 0;
......@@ -404,7 +430,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
post_wr = &frwr->fr_regwr.wr;
}
return ib_post_send(r_xprt->rx_ep->re_id->qp, post_wr, NULL);
return ib_post_send(ep->re_id->qp, post_wr, NULL);
}
/**
......@@ -420,18 +446,17 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
trace_xprtrdma_mr_reminv(mr);
rpcrdma_mr_put(mr);
frwr_mr_put(mr);
break; /* only one invalidated MR per RPC */
}
}
static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
{
if (wc->status != IB_WC_SUCCESS)
frwr_mr_recycle(mr);
else
rpcrdma_mr_put(mr);
frwr_mr_put(mr);
}
/**
......@@ -448,8 +473,8 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_li(wc, frwr);
__frwr_release_mr(wc, mr);
trace_xprtrdma_wc_li(wc, &frwr->fr_cid);
frwr_mr_done(wc, mr);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
......@@ -469,8 +494,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_li_wake(wc, frwr);
__frwr_release_mr(wc, mr);
trace_xprtrdma_wc_li_wake(wc, &frwr->fr_cid);
frwr_mr_done(wc, mr);
complete(&frwr->fr_linv_done);
rpcrdma_flush_disconnect(cq->cq_context, wc);
......@@ -490,6 +515,7 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, **prev, *last;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
......@@ -509,6 +535,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_localinv;
frwr_cid_init(ep, frwr);
last = &frwr->fr_invwr;
last->next = NULL;
last->wr_cqe = &frwr->fr_cqe;
......@@ -534,7 +561,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless re_id->qp is a valid pointer.
*/
bad_wr = NULL;
rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
/* The final LOCAL_INV WR in the chain is supposed to
* do the wake. If it was never posted, the wake will
......@@ -547,7 +574,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
trace_xprtrdma_post_linv(req, rc);
trace_xprtrdma_post_linv_err(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr);
......@@ -574,10 +601,10 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_li_done(wc, frwr);
__frwr_release_mr(wc, mr);
trace_xprtrdma_wc_li_done(wc, &frwr->fr_cid);
frwr_mr_done(wc, mr);
/* Ensure @rep is generated before __frwr_release_mr */
/* Ensure @rep is generated before frwr_mr_done */
smp_rmb();
rpcrdma_complete_rqst(rep);
......@@ -597,6 +624,7 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, *last, **prev;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
......@@ -614,6 +642,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_localinv;
frwr_cid_init(ep, frwr);
last = &frwr->fr_invwr;
last->next = NULL;
last->wr_cqe = &frwr->fr_cqe;
......@@ -639,13 +668,13 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless re_id->qp is a valid pointer.
*/
bad_wr = NULL;
rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
if (!rc)
return;
/* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
trace_xprtrdma_post_linv(req, rc);
trace_xprtrdma_post_linv_err(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);
......
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2014-2020, Oracle and/or its affiliates.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
......@@ -331,7 +331,6 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
*mr = rpcrdma_mr_get(r_xprt);
if (!*mr)
goto out_getmr_err;
trace_xprtrdma_mr_get(req);
(*mr)->mr_req = req;
}
......@@ -339,7 +338,7 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
out_getmr_err:
trace_xprtrdma_nomrs(req);
trace_xprtrdma_nomrs_err(r_xprt, req);
xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
rpcrdma_mrs_refresh(r_xprt);
return ERR_PTR(-EAGAIN);
......@@ -1344,20 +1343,13 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (!p)
break;
dprintk("RPC: %s: server reports "
"version error (%u-%u), xid %08x\n", __func__,
be32_to_cpup(p), be32_to_cpu(*(p + 1)),
be32_to_cpu(rep->rr_xid));
trace_xprtrdma_err_vers(rqst, p, p + 1);
break;
case err_chunk:
dprintk("RPC: %s: server reports "
"header decoding error, xid %08x\n", __func__,
be32_to_cpu(rep->rr_xid));
trace_xprtrdma_err_chunk(rqst);
break;
default:
dprintk("RPC: %s: server reports "
"unrecognized error %d, xid %08x\n", __func__,
be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
trace_xprtrdma_err_unrecognized(rqst, p);
}
return -EIO;
......@@ -1398,7 +1390,7 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
return;
out_badheader:
trace_xprtrdma_reply_hdr(rep);
trace_xprtrdma_reply_hdr_err(rep);
r_xprt->rx_stats.bad_reply_count++;
rqst->rq_task->tk_status = status;
status = 0;
......@@ -1472,14 +1464,12 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rpcrdma_post_recvs(r_xprt, false);
req = rpcr_to_rdmar(rqst);
if (req->rl_reply) {
trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
if (unlikely(req->rl_reply))
rpcrdma_recv_buffer_put(req->rl_reply);
}
req->rl_reply = rep;
rep->rr_rqst = rqst;
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
trace_xprtrdma_reply(rqst->rq_task, rep, credits);
if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
frwr_reminv(rep, &req->rl_registered);
......@@ -1491,16 +1481,16 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
return;
out_badversion:
trace_xprtrdma_reply_vers(rep);
trace_xprtrdma_reply_vers_err(rep);
goto out;
out_norqst:
spin_unlock(&xprt->queue_lock);
trace_xprtrdma_reply_rqst(rep);
trace_xprtrdma_reply_rqst_err(rep);
goto out;
out_shortreply:
trace_xprtrdma_reply_short(rep);
trace_xprtrdma_reply_short_err(rep);
out:
rpcrdma_recv_buffer_put(rep);
......
......@@ -599,11 +599,12 @@ static void
xprt_rdma_free(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
if (!list_empty(&req->rl_registered))
frwr_unmap_sync(r_xprt, req);
if (unlikely(!list_empty(&req->rl_registered))) {
trace_xprtrdma_mrs_zap(task);
frwr_unmap_sync(rpcx_to_rdmax(rqst->rq_xprt), req);
}
/* XXX: If the RPC is completing because of a signal and
* not because a reply was received, we ought to ensure
......
......@@ -167,7 +167,7 @@ static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_xprt *r_xprt = cq->cq_context;
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_send(sc, wc);
trace_xprtrdma_wc_send(wc, &sc->sc_cid);
rpcrdma_sendctx_put_locked(r_xprt, sc);
rpcrdma_flush_disconnect(r_xprt, wc);
}
......@@ -186,7 +186,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_xprt *r_xprt = cq->cq_context;
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_receive(wc);
trace_xprtrdma_wc_receive(wc, &rep->rr_cid);
--r_xprt->rx_ep->re_receive_count;
if (wc->status != IB_WC_SUCCESS)
goto out_flushed;
......@@ -643,6 +643,9 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep)
return NULL;
sc->sc_cqe.done = rpcrdma_wc_send;
sc->sc_cid.ci_queue_id = ep->re_attr.send_cq->res.id;
sc->sc_cid.ci_completion_id =
atomic_inc_return(&ep->re_completion_ids);
return sc;
}
......@@ -972,6 +975,9 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
goto out_free_regbuf;
rep->rr_cid.ci_completion_id =
atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
rdmab_length(rep->rr_rdmabuf));
rep->rr_cqe.done = rpcrdma_wc_receive;
......@@ -1178,25 +1184,6 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
return mr;
}
/**
* rpcrdma_mr_put - DMA unmap an MR and release it
* @mr: MR to release
*
*/
void rpcrdma_mr_put(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
mr->mr_dir = DMA_NONE;
}
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
/**
* rpcrdma_buffer_get - Get a request buffer
* @buffers: Buffer pool from which to obtain a buffer
......@@ -1411,6 +1398,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
if (!rep)
break;
rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
trace_xprtrdma_post_recv(rep);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;
......
......@@ -53,6 +53,7 @@
#include <rdma/ib_verbs.h> /* RDMA verbs api */
#include <linux/sunrpc/clnt.h> /* rpc_xprt */
#include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
......@@ -93,6 +94,8 @@ struct rpcrdma_ep {
unsigned int re_max_requests; /* depends on device */
unsigned int re_inline_send; /* negotiated */
unsigned int re_inline_recv; /* negotiated */
atomic_t re_completion_ids;
};
/* Pre-allocate extra Work Requests for handling backward receives
......@@ -180,6 +183,8 @@ enum {
struct rpcrdma_rep {
struct ib_cqe rr_cqe;
struct rpc_rdma_cid rr_cid;
__be32 rr_xid;
__be32 rr_vers;
__be32 rr_proc;
......@@ -211,6 +216,7 @@ enum {
struct rpcrdma_req;
struct rpcrdma_sendctx {
struct ib_cqe sc_cqe;
struct rpc_rdma_cid sc_cid;
struct rpcrdma_req *sc_req;
unsigned int sc_unmap_count;
struct ib_sge sc_sges[];
......@@ -225,6 +231,7 @@ struct rpcrdma_sendctx {
struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
struct rpc_rdma_cid fr_cid;
struct completion fr_linv_done;
union {
struct ib_reg_wr fr_regwr;
......@@ -236,6 +243,7 @@ struct rpcrdma_req;
struct rpcrdma_mr {
struct list_head mr_list;
struct rpcrdma_req *mr_req;
struct ib_device *mr_device;
struct scatterlist *mr_sg;
int mr_nents;
enum dma_data_direction mr_dir;
......@@ -466,7 +474,6 @@ void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment