Commit 89e25567 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-4.18' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "A relatively quiet cycle for nfsd.

  The largest piece is an RDMA update from Chuck Lever with new trace
  points, miscellaneous cleanups, and streamlining of the send and
  receive paths.

  Other than that, some miscellaneous bugfixes"

* tag 'nfsd-4.18' of git://linux-nfs.org/~bfields/linux: (26 commits)
  nfsd: fix error handling in nfs4_set_delegation()
  nfsd: fix potential use-after-free in nfsd4_decode_getdeviceinfo
  Fix 16-byte memory leak in gssp_accept_sec_context_upcall
  svcrdma: Fix incorrect return value/type in svc_rdma_post_recvs
  svcrdma: Remove unused svc_rdma_op_ctxt
  svcrdma: Persistently allocate and DMA-map Send buffers
  svcrdma: Simplify svc_rdma_send()
  svcrdma: Remove post_send_wr
  svcrdma: Don't overrun the SGE array in svc_rdma_send_ctxt
  svcrdma: Introduce svc_rdma_send_ctxt
  svcrdma: Clean up Send SGE accounting
  svcrdma: Refactor svc_rdma_dma_map_buf
  svcrdma: Allocate recv_ctxt's on CPU handling Receives
  svcrdma: Persistently allocate and DMA-map Receive buffers
  svcrdma: Preserve Receive buffer until svc_rdma_sendto
  svcrdma: Simplify svc_rdma_recv_ctxt_put
  svcrdma: Remove sc_rq_depth
  svcrdma: Introduce svc_rdma_recv_ctxt
  svcrdma: Trace key RDMA API events
  svcrdma: Trace key RPC/RDMA protocol events
  ...
parents 8efcf34a 692ad280
......@@ -216,13 +216,21 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
struct request_queue *q = bdev->bd_disk->queue;
struct request *rq;
struct scsi_request *req;
size_t bufflen = 252, len, id_len;
/*
* The allocation length (passed in bytes 3 and 4 of the INQUIRY
* command descriptor block) specifies the number of bytes that have
* been allocated for the data-in buffer.
* 252 is the highest one-byte value that is a multiple of 4.
* 65532 is the highest two-byte value that is a multiple of 4.
*/
size_t bufflen = 252, maxlen = 65532, len, id_len;
u8 *buf, *d, type, assoc;
int error;
int retries = 1, error;
if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
return -EINVAL;
again:
buf = kzalloc(bufflen, GFP_KERNEL);
if (!buf)
return -ENOMEM;
......@@ -255,6 +263,12 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
len = (buf[2] << 8) + buf[3] + 4;
if (len > bufflen) {
if (len <= maxlen && retries--) {
blk_put_request(rq);
kfree(buf);
bufflen = len;
goto again;
}
pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
len);
goto out_put_request;
......
......@@ -67,11 +67,6 @@ enum {
RC_REPLBUFF,
};
/*
* If requests are retransmitted within this interval, they're dropped.
*/
#define RC_DELAY (HZ/5)
/* Cache entries expire after this time period */
#define RC_EXPIRE (120 * HZ)
......
......@@ -4378,8 +4378,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
spin_unlock(&state_lock);
if (status)
destroy_unhashed_deleg(dp);
goto out_unlock;
return dp;
out_unlock:
vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp);
out_clnt_odstate:
put_clnt_odstate(dp->dl_clnt_odstate);
out_stid:
......
......@@ -1585,6 +1585,8 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
gdev->gd_maxcount = be32_to_cpup(p++);
num = be32_to_cpup(p++);
if (num) {
if (num > 1000)
goto xdr_error;
READ_BUF(4 * num);
gdev->gd_notify_types = be32_to_cpup(p++);
for (i = 1; i < num; i++) {
......@@ -3651,7 +3653,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
nfserr = nfserr_resource;
goto err_no_verf;
}
maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
maxcount = svc_max_payload(resp->rqstp);
maxcount = min_t(u32, readdir->rd_maxcount, maxcount);
/*
* Note the rfc defines rd_maxcount as the size of the
* READDIR4resok structure, which includes the verifier above
......@@ -3665,7 +3668,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
/* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */
if (!readdir->rd_dircount)
readdir->rd_dircount = INT_MAX;
readdir->rd_dircount = svc_max_payload(resp->rqstp);
readdir->xdr = xdr;
readdir->rd_maxcount = maxcount;
......
......@@ -394,7 +394,6 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
__wsum csum;
u32 hash = nfsd_cache_hash(xid);
struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
unsigned long age;
int type = rqstp->rq_cachetype;
int rtn = RC_DOIT;
......@@ -461,12 +460,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
found_entry:
nfsdstats.rchits++;
/* We found a matching entry which is either in progress or done. */
age = jiffies - rp->c_timestamp;
lru_put_end(b, rp);
rtn = RC_DROPIT;
/* Request being processed or excessive rexmits */
if (rp->c_state == RC_INPROG || age < RC_DELAY)
/* Request being processed */
if (rp->c_state == RC_INPROG)
goto out;
/* From the hall of fame of impractical attacks:
......
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
......@@ -70,37 +71,16 @@ extern atomic_t rdma_stat_rq_prod;
extern atomic_t rdma_stat_sq_poll;
extern atomic_t rdma_stat_sq_prod;
/*
* Contexts are built when an RDMA request is created and are a
* record of the resources that can be recovered when the request
* completes.
*/
struct svc_rdma_op_ctxt {
struct list_head list;
struct xdr_buf arg;
struct ib_cqe cqe;
u32 byte_len;
struct svcxprt_rdma *xprt;
enum dma_data_direction direction;
int count;
unsigned int mapped_sges;
int hdr_count;
struct ib_send_wr send_wr;
struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE];
struct page *pages[RPCSVC_MAXPAGES];
};
struct svcxprt_rdma {
struct svc_xprt sc_xprt; /* SVC transport structure */
struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
struct list_head sc_accept_q; /* Conn. waiting accept */
int sc_ord; /* RDMA read limit */
int sc_max_sge;
int sc_max_send_sges;
bool sc_snd_w_inv; /* OK to use Send With Invalidate */
atomic_t sc_sq_avail; /* SQEs ready to be consumed */
unsigned int sc_sq_depth; /* Depth of SQ */
unsigned int sc_rq_depth; /* Depth of RQ */
__be32 sc_fc_credits; /* Forward credits */
u32 sc_max_requests; /* Max requests */
u32 sc_max_bc_requests;/* Backward credits */
......@@ -109,9 +89,8 @@ struct svcxprt_rdma {
struct ib_pd *sc_pd;
spinlock_t sc_ctxt_lock;
struct list_head sc_ctxts;
int sc_ctxt_used;
spinlock_t sc_send_lock;
struct list_head sc_send_ctxts;
spinlock_t sc_rw_ctxt_lock;
struct list_head sc_rw_ctxts;
......@@ -127,6 +106,9 @@ struct svcxprt_rdma {
unsigned long sc_flags;
struct list_head sc_read_complete_q;
struct work_struct sc_work;
spinlock_t sc_recv_lock;
struct list_head sc_recv_ctxts;
};
/* sc_flags */
#define RDMAXPRT_CONN_PENDING 3
......@@ -141,12 +123,30 @@ struct svcxprt_rdma {
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
/* Track DMA maps for this transport and context */
static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
struct svc_rdma_op_ctxt *ctxt)
{
ctxt->mapped_sges++;
}
struct svc_rdma_recv_ctxt {
struct list_head rc_list;
struct ib_recv_wr rc_recv_wr;
struct ib_cqe rc_cqe;
struct ib_sge rc_recv_sge;
void *rc_recv_buf;
struct xdr_buf rc_arg;
bool rc_temp;
u32 rc_byte_len;
unsigned int rc_page_count;
unsigned int rc_hdr_count;
struct page *rc_pages[RPCSVC_MAXPAGES];
};
struct svc_rdma_send_ctxt {
struct list_head sc_list;
struct ib_send_wr sc_send_wr;
struct ib_cqe sc_cqe;
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
struct page *sc_pages[RPCSVC_MAXPAGES];
struct ib_sge sc_sges[];
};
/* svc_rdma_backchannel.c */
extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
......@@ -154,13 +154,18 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
struct xdr_buf *rcvbuf);
/* svc_rdma_recvfrom.c */
extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma);
extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma);
extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt);
extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */
extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head, __be32 *p);
struct svc_rdma_recv_ctxt *head, __be32 *p);
extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
__be32 *wr_ch, struct xdr_buf *xdr);
extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
......@@ -168,24 +173,22 @@ extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
struct xdr_buf *xdr);
/* svc_rdma_sendto.c */
extern int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
struct svc_rdma_op_ctxt *ctxt,
__be32 *rdma_resp, unsigned int len);
extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
struct svc_rdma_op_ctxt *ctxt,
int num_sge, u32 inv_rkey);
extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma);
extern struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr);
extern void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt,
unsigned int len);
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt,
struct xdr_buf *xdr, __be32 *wr_lst);
extern int svc_rdma_sendto(struct svc_rqst *);
/* svc_rdma_transport.c */
extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *);
extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *);
extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *);
extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *);
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
extern void svc_sq_reap(struct svcxprt_rdma *);
extern void svc_rq_reap(struct svcxprt_rdma *);
extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
......
This diff is collapsed.
......@@ -298,9 +298,11 @@ int gssp_accept_sec_context_upcall(struct net *net,
if (res.context_handle) {
data->out_handle = rctxh.exported_context_token;
data->mech_oid.len = rctxh.mech.len;
if (rctxh.mech.data)
if (rctxh.mech.data) {
memcpy(data->mech_oid.data, rctxh.mech.data,
data->mech_oid.len);
kfree(rctxh.mech.data);
}
client_name = rctxh.src_name.display_name;
}
......
......@@ -9,8 +9,10 @@
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
......
......@@ -20,7 +20,10 @@
* verb (fmr_op_unmap).
*/
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
......
......@@ -71,8 +71,10 @@
*/
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
......
......@@ -13,9 +13,11 @@
#include <asm/swab.h>
#define CREATE_TRACE_POINTS
#include "xprt_rdma.h"
#define CREATE_TRACE_POINTS
#include <trace/events/rpcrdma.h>
MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
MODULE_DESCRIPTION("RPC/RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
......
......@@ -46,10 +46,13 @@
* to the Linux RPC framework lives.
*/
#include "xprt_rdma.h"
#include <linux/highmem.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
......
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2015-2018 Oracle. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
......@@ -46,7 +48,6 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2015-2018 Oracle. All rights reserved.
*
* Support for backward direction RPCs on RPC/RDMA (server-side).
*/
#include <linux/module.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
......@@ -112,39 +115,21 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
* the adapter has a small maximum SQ depth.
*/
static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
struct rpc_rqst *rqst)
struct rpc_rqst *rqst,
struct svc_rdma_send_ctxt *ctxt)
{
struct svc_rdma_op_ctxt *ctxt;
int ret;
ctxt = svc_rdma_get_context(rdma);
/* rpcrdma_bc_send_request builds the transport header and
* the backchannel RPC message in the same buffer. Thus only
* one SGE is needed to send both.
*/
ret = svc_rdma_map_reply_hdr(rdma, ctxt, rqst->rq_buffer,
rqst->rq_snd_buf.len);
ret = svc_rdma_map_reply_msg(rdma, ctxt, &rqst->rq_snd_buf, NULL);
if (ret < 0)
goto out_err;
return -EIO;
/* Bump page refcnt so Send completion doesn't release
* the rq_buffer before all retransmits are complete.
*/
get_page(virt_to_page(rqst->rq_buffer));
ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
if (ret)
goto out_unmap;
out_err:
dprintk("svcrdma: %s returns %d\n", __func__, ret);
return ret;
out_unmap:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 1);
ret = -EIO;
goto out_err;
ctxt->sc_send_wr.opcode = IB_WR_SEND;
return svc_rdma_send(rdma, &ctxt->sc_send_wr);
}
/* Server-side transport endpoint wants a whole page for its send
......@@ -191,13 +176,15 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
{
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct svc_rdma_send_ctxt *ctxt;
__be32 *p;
int rc;
/* Space in the send buffer for an RPC/RDMA header is reserved
* via xprt->tsh_size.
*/
p = rqst->rq_buffer;
ctxt = svc_rdma_send_ctxt_get(rdma);
if (!ctxt)
goto drop_connection;
p = ctxt->sc_xprt_buf;
*p++ = rqst->rq_xid;
*p++ = rpcrdma_version;
*p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
......@@ -205,14 +192,17 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
*p++ = xdr_zero;
*p++ = xdr_zero;
*p = xdr_zero;
svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN);
#ifdef SVCRDMA_BACKCHANNEL_DEBUG
pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
#endif
rc = svc_rdma_bc_sendto(rdma, rqst);
if (rc)
rc = svc_rdma_bc_sendto(rdma, rqst, ctxt);
if (rc) {
svc_rdma_send_ctxt_put(rdma, ctxt);
goto drop_connection;
}
return rc;
drop_connection:
......@@ -320,7 +310,7 @@ xprt_setup_rdma_bc(struct xprt_create *args)
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->prot = XPRT_TRANSPORT_BC_RDMA;
xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
xprt->tsh_size = 0;
xprt->ops = &xprt_rdma_bc_procs;
memcpy(&xprt->addr, args->dstaddr, args->addrlen);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -51,9 +51,13 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/smp.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
......
......@@ -59,6 +59,7 @@
#include <rdma/ib_cm.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
/*
* Globals/Macros
......
......@@ -675,5 +675,3 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
#include <trace/events/rpcrdma.h>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment