Commit 5e4d6597 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-4.17' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Chuck Lever did a bunch of work on nfsd tracepoints, on RDMA, and on
  server xdr decoding (with an eye towards eliminating a data copy in
  the RDMA case).

  I did some refactoring of the delegation code in preparation for
  eliminating some delegation self-conflicts and implementing write
  delegations"

* tag 'nfsd-4.17' of git://linux-nfs.org/~bfields/linux: (40 commits)
  nfsd: fix incorrect umasks
  sunrpc: remove incorrect HMAC request initialization
  NFSD: Clean up legacy NFS SYMLINK argument XDR decoders
  NFSD: Clean up legacy NFS WRITE argument XDR decoders
  nfsd: Trace NFSv4 COMPOUND execution
  nfsd: Add I/O trace points in the NFSv4 read proc
  nfsd: Add I/O trace points in the NFSv4 write path
  nfsd: Add "nfsd_" to trace point names
  nfsd: Record request byte count, not count of vectors
  nfsd: Fix NFSD trace points
  svc: Report xprt dequeue latency
  sunrpc: Report per-RPC execution stats
  sunrpc: Re-purpose trace_svc_process
  sunrpc: Save remote presentation address in svc_xprt for trace events
  sunrpc: Simplify trace_svc_recv
  sunrpc: Simplify do_enqueue tracing
  sunrpc: Move trace_svc_xprt_dequeue()
  sunrpc: Update show_svc_xprt_flags() to include recently added flags
  svc: Simplify ->xpo_secure_port
  sunrpc: Remove unneeded pointer dereference
  ...
parents 274c0e74 880a3a53
...@@ -57,8 +57,8 @@ static struct task_struct *nlmsvc_task; ...@@ -57,8 +57,8 @@ static struct task_struct *nlmsvc_task;
static struct svc_rqst *nlmsvc_rqst; static struct svc_rqst *nlmsvc_rqst;
unsigned long nlmsvc_timeout; unsigned long nlmsvc_timeout;
atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
unsigned int lockd_net_id; unsigned int lockd_net_id;
......
...@@ -192,6 +192,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) ...@@ -192,6 +192,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
struct nfsd3_writeres *resp = rqstp->rq_resp; struct nfsd3_writeres *resp = rqstp->rq_resp;
__be32 nfserr; __be32 nfserr;
unsigned long cnt = argp->len; unsigned long cnt = argp->len;
unsigned int nvecs;
dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
...@@ -201,9 +202,12 @@ nfsd3_proc_write(struct svc_rqst *rqstp) ...@@ -201,9 +202,12 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh); fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable; resp->committed = argp->stable;
nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
if (!nvecs)
RETURN_STATUS(nfserr_io);
nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, argp->vlen, rqstp->rq_vec, nvecs, &cnt,
&cnt, resp->committed); resp->committed);
resp->count = cnt; resp->count = cnt;
RETURN_STATUS(nfserr); RETURN_STATUS(nfserr);
} }
...@@ -279,6 +283,16 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) ...@@ -279,6 +283,16 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
struct nfsd3_diropres *resp = rqstp->rq_resp; struct nfsd3_diropres *resp = rqstp->rq_resp;
__be32 nfserr; __be32 nfserr;
if (argp->tlen == 0)
RETURN_STATUS(nfserr_inval);
if (argp->tlen > NFS3_MAXPATHLEN)
RETURN_STATUS(nfserr_nametoolong);
argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
argp->tlen);
if (IS_ERR(argp->tname))
RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n",
SVCFH_fmt(&argp->ffh), SVCFH_fmt(&argp->ffh),
argp->flen, argp->fname, argp->flen, argp->fname,
......
...@@ -391,7 +391,7 @@ int ...@@ -391,7 +391,7 @@ int
nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
{ {
struct nfsd3_writeargs *args = rqstp->rq_argp; struct nfsd3_writeargs *args = rqstp->rq_argp;
unsigned int len, v, hdr, dlen; unsigned int len, hdr, dlen;
u32 max_blocksize = svc_max_payload(rqstp); u32 max_blocksize = svc_max_payload(rqstp);
struct kvec *head = rqstp->rq_arg.head; struct kvec *head = rqstp->rq_arg.head;
struct kvec *tail = rqstp->rq_arg.tail; struct kvec *tail = rqstp->rq_arg.tail;
...@@ -433,17 +433,9 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) ...@@ -433,17 +433,9 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
args->count = max_blocksize; args->count = max_blocksize;
len = args->len = max_blocksize; len = args->len = max_blocksize;
} }
rqstp->rq_vec[0].iov_base = (void*)p;
rqstp->rq_vec[0].iov_len = head->iov_len - hdr; args->first.iov_base = (void *)p;
v = 0; args->first.iov_len = head->iov_len - hdr;
while (len > rqstp->rq_vec[v].iov_len) {
len -= rqstp->rq_vec[v].iov_len;
v++;
rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
rqstp->rq_vec[v].iov_len = len;
args->vlen = v + 1;
return 1; return 1;
} }
...@@ -489,51 +481,24 @@ int ...@@ -489,51 +481,24 @@ int
nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
{ {
struct nfsd3_symlinkargs *args = rqstp->rq_argp; struct nfsd3_symlinkargs *args = rqstp->rq_argp;
unsigned int len, avail; char *base = (char *)p;
char *old, *new; size_t dlen;
struct kvec *vec;
if (!(p = decode_fh(p, &args->ffh)) || if (!(p = decode_fh(p, &args->ffh)) ||
!(p = decode_filename(p, &args->fname, &args->flen)) !(p = decode_filename(p, &args->fname, &args->flen)))
)
return 0; return 0;
p = decode_sattr3(p, &args->attrs); p = decode_sattr3(p, &args->attrs);
/* now decode the pathname, which might be larger than the first page. args->tlen = ntohl(*p++);
* As we have to check for nul's anyway, we copy it into a new page
* This page appears in the rq_res.pages list, but as pages_len is always args->first.iov_base = p;
* 0, it won't get in the way args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
*/ args->first.iov_len -= (char *)p - base;
len = ntohl(*p++);
if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
return 0;
args->tname = new = page_address(*(rqstp->rq_next_page++));
args->tlen = len;
/* first copy and check from the first page */
old = (char*)p;
vec = &rqstp->rq_arg.head[0];
if ((void *)old > vec->iov_base + vec->iov_len)
return 0;
avail = vec->iov_len - (old - (char*)vec->iov_base);
while (len && avail && *old) {
*new++ = *old++;
len--;
avail--;
}
/* now copy next page if there is one */
if (len && !avail && rqstp->rq_arg.page_len) {
avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
old = page_address(rqstp->rq_arg.pages[0]);
}
while (len && avail && *old) {
*new++ = *old++;
len--;
avail--;
}
*new = '\0';
if (len)
return 0;
dlen = args->first.iov_len + rqstp->rq_arg.page_len +
rqstp->rq_arg.tail[0].iov_len;
if (dlen < XDR_QUADLEN(args->tlen) << 2)
return 0;
return 1; return 1;
} }
......
...@@ -223,8 +223,8 @@ static int nfs_cb_stat_to_errno(int status) ...@@ -223,8 +223,8 @@ static int nfs_cb_stat_to_errno(int status)
return -status; return -status;
} }
static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, static int decode_cb_op_status(struct xdr_stream *xdr,
int *status) enum nfs_cb_opnum4 expected, int *status)
{ {
__be32 *p; __be32 *p;
u32 op; u32 op;
......
...@@ -165,7 +165,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid) ...@@ -165,7 +165,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
struct nfs4_client *clp = ls->ls_stid.sc_client; struct nfs4_client *clp = ls->ls_stid.sc_client;
struct nfs4_file *fp = ls->ls_stid.sc_file; struct nfs4_file *fp = ls->ls_stid.sc_file;
trace_layoutstate_free(&ls->ls_stid.sc_stateid); trace_nfsd_layoutstate_free(&ls->ls_stid.sc_stateid);
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
list_del_init(&ls->ls_perclnt); list_del_init(&ls->ls_perclnt);
...@@ -264,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, ...@@ -264,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
list_add(&ls->ls_perfile, &fp->fi_lo_states); list_add(&ls->ls_perfile, &fp->fi_lo_states);
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
trace_layoutstate_alloc(&ls->ls_stid.sc_stateid); trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid);
return ls; return ls;
} }
...@@ -334,7 +334,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) ...@@ -334,7 +334,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
if (list_empty(&ls->ls_layouts)) if (list_empty(&ls->ls_layouts))
goto out_unlock; goto out_unlock;
trace_layout_recall(&ls->ls_stid.sc_stateid); trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);
refcount_inc(&ls->ls_stid.sc_count); refcount_inc(&ls->ls_stid.sc_count);
nfsd4_run_cb(&ls->ls_recall); nfsd4_run_cb(&ls->ls_recall);
...@@ -507,7 +507,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, ...@@ -507,7 +507,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
false, lrp->lr_layout_type, false, lrp->lr_layout_type,
&ls); &ls);
if (nfserr) { if (nfserr) {
trace_layout_return_lookup_fail(&lrp->lr_sid); trace_nfsd_layout_return_lookup_fail(&lrp->lr_sid);
return nfserr; return nfserr;
} }
...@@ -523,7 +523,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, ...@@ -523,7 +523,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid); nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid);
lrp->lrs_present = 1; lrp->lrs_present = 1;
} else { } else {
trace_layoutstate_unhash(&ls->ls_stid.sc_stateid); trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
nfs4_unhash_stid(&ls->ls_stid); nfs4_unhash_stid(&ls->ls_stid);
lrp->lrs_present = 0; lrp->lrs_present = 0;
} }
...@@ -694,7 +694,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) ...@@ -694,7 +694,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
/* /*
* Unknown error or non-responding client, we'll need to fence. * Unknown error or non-responding client, we'll need to fence.
*/ */
trace_layout_recall_fail(&ls->ls_stid.sc_stateid); trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
ops = nfsd4_layout_ops[ls->ls_layout_type]; ops = nfsd4_layout_ops[ls->ls_layout_type];
if (ops->fence_client) if (ops->fence_client)
...@@ -703,7 +703,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) ...@@ -703,7 +703,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
nfsd4_cb_layout_fail(ls); nfsd4_cb_layout_fail(ls);
return -1; return -1;
case -NFS4ERR_NOMATCHING_LAYOUT: case -NFS4ERR_NOMATCHING_LAYOUT:
trace_layout_recall_done(&ls->ls_stid.sc_stateid); trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
task->tk_status = 0; task->tk_status = 0;
return 1; return 1;
} }
...@@ -716,7 +716,7 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb) ...@@ -716,7 +716,7 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb)
container_of(cb, struct nfs4_layout_stateid, ls_recall); container_of(cb, struct nfs4_layout_stateid, ls_recall);
LIST_HEAD(reaplist); LIST_HEAD(reaplist);
trace_layout_recall_release(&ls->ls_stid.sc_stateid); trace_nfsd_layout_recall_release(&ls->ls_stid.sc_stateid);
nfsd4_return_all_layouts(ls, &reaplist); nfsd4_return_all_layouts(ls, &reaplist);
nfsd4_free_layouts(&reaplist); nfsd4_free_layouts(&reaplist);
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <linux/fs_struct.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/falloc.h> #include <linux/falloc.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -252,11 +253,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru ...@@ -252,11 +253,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* Note: create modes (UNCHECKED,GUARDED...) are the same * Note: create modes (UNCHECKED,GUARDED...) are the same
* in NFSv4 as in v3 except EXCLUSIVE4_1. * in NFSv4 as in v3 except EXCLUSIVE4_1.
*/ */
current->fs->umask = open->op_umask;
status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
open->op_fname.len, &open->op_iattr, open->op_fname.len, &open->op_iattr,
*resfh, open->op_createmode, *resfh, open->op_createmode,
(u32 *)open->op_verf.data, (u32 *)open->op_verf.data,
&open->op_truncate, &open->op_created); &open->op_truncate, &open->op_created);
current->fs->umask = 0;
if (!status && open->op_label.len) if (!status && open->op_label.len)
nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
...@@ -603,6 +606,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -603,6 +606,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status) if (status)
return status; return status;
current->fs->umask = create->cr_umask;
switch (create->cr_type) { switch (create->cr_type) {
case NF4LNK: case NF4LNK:
status = nfsd_symlink(rqstp, &cstate->current_fh, status = nfsd_symlink(rqstp, &cstate->current_fh,
...@@ -611,20 +615,22 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -611,20 +615,22 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break; break;
case NF4BLK: case NF4BLK:
status = nfserr_inval;
rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
if (MAJOR(rdev) != create->cr_specdata1 || if (MAJOR(rdev) != create->cr_specdata1 ||
MINOR(rdev) != create->cr_specdata2) MINOR(rdev) != create->cr_specdata2)
return nfserr_inval; goto out_umask;
status = nfsd_create(rqstp, &cstate->current_fh, status = nfsd_create(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen, create->cr_name, create->cr_namelen,
&create->cr_iattr, S_IFBLK, rdev, &resfh); &create->cr_iattr, S_IFBLK, rdev, &resfh);
break; break;
case NF4CHR: case NF4CHR:
status = nfserr_inval;
rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
if (MAJOR(rdev) != create->cr_specdata1 || if (MAJOR(rdev) != create->cr_specdata1 ||
MINOR(rdev) != create->cr_specdata2) MINOR(rdev) != create->cr_specdata2)
return nfserr_inval; goto out_umask;
status = nfsd_create(rqstp, &cstate->current_fh, status = nfsd_create(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen, create->cr_name, create->cr_namelen,
&create->cr_iattr,S_IFCHR, rdev, &resfh); &create->cr_iattr,S_IFCHR, rdev, &resfh);
...@@ -668,6 +674,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -668,6 +674,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fh_dup2(&cstate->current_fh, &resfh); fh_dup2(&cstate->current_fh, &resfh);
out: out:
fh_put(&resfh); fh_put(&resfh);
out_umask:
current->fs->umask = 0;
return status; return status;
} }
...@@ -751,6 +759,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -751,6 +759,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (read->rd_offset >= OFFSET_MAX) if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval; return nfserr_inval;
trace_nfsd_read_start(rqstp, &cstate->current_fh,
read->rd_offset, read->rd_length);
/* /*
* If we do a zero copy read, then a client will see read data * If we do a zero copy read, then a client will see read data
* that reflects the state of the file *after* performing the * that reflects the state of the file *after* performing the
...@@ -783,6 +794,8 @@ nfsd4_read_release(union nfsd4_op_u *u) ...@@ -783,6 +794,8 @@ nfsd4_read_release(union nfsd4_op_u *u)
{ {
if (u->read.rd_filp) if (u->read.rd_filp)
fput(u->read.rd_filp); fput(u->read.rd_filp);
trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
u->read.rd_offset, u->read.rd_length);
} }
static __be32 static __be32
...@@ -1001,6 +1014,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -1001,6 +1014,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (write->wr_offset >= OFFSET_MAX) if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval; return nfserr_inval;
cnt = write->wr_buflen;
trace_nfsd_write_start(rqstp, &cstate->current_fh,
write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
stateid, WR_STATE, &filp, NULL); stateid, WR_STATE, &filp, NULL);
if (status) { if (status) {
...@@ -1008,7 +1024,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -1008,7 +1024,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status; return status;
} }
cnt = write->wr_buflen;
write->wr_how_written = write->wr_stable_how; write->wr_how_written = write->wr_stable_how;
gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
...@@ -1021,7 +1036,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -1021,7 +1036,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fput(filp); fput(filp);
write->wr_bytes_written = cnt; write->wr_bytes_written = cnt;
trace_nfsd_write_done(rqstp, &cstate->current_fh,
write->wr_offset, cnt);
return status; return status;
} }
...@@ -1106,7 +1122,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -1106,7 +1122,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
else { else {
copy->cp_res.wr_bytes_written = bytes; copy->cp_res.wr_bytes_written = bytes;
copy->cp_res.wr_stable_how = NFS_UNSTABLE; copy->cp_res.wr_stable_how = NFS_UNSTABLE;
copy->cp_consecutive = 1;
copy->cp_synchronous = 1; copy->cp_synchronous = 1;
gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp)); gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp));
status = nfs_ok; status = nfs_ok;
...@@ -1412,7 +1427,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp, ...@@ -1412,7 +1427,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid, nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid,
true, lgp->lg_layout_type, &ls); true, lgp->lg_layout_type, &ls);
if (nfserr) { if (nfserr) {
trace_layout_get_lookup_fail(&lgp->lg_sid); trace_nfsd_layout_get_lookup_fail(&lgp->lg_sid);
goto out; goto out;
} }
...@@ -1481,7 +1496,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, ...@@ -1481,7 +1496,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
false, lcp->lc_layout_type, false, lcp->lc_layout_type,
&ls); &ls);
if (nfserr) { if (nfserr) {
trace_layout_commit_lookup_fail(&lcp->lc_sid); trace_nfsd_layout_commit_lookup_fail(&lcp->lc_sid);
/* fixup error code as per RFC5661 */ /* fixup error code as per RFC5661 */
if (nfserr == nfserr_bad_stateid) if (nfserr == nfserr_bad_stateid)
nfserr = nfserr_badlayout; nfserr = nfserr_badlayout;
...@@ -1714,12 +1729,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) ...@@ -1714,12 +1729,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
goto encode_op; goto encode_op;
} }
trace_nfsd_compound(rqstp, args->opcnt);
while (!status && resp->opcnt < args->opcnt) { while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++]; op = &args->ops[resp->opcnt++];
dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
resp->opcnt, args->opcnt, op->opnum,
nfsd4_op_name(op->opnum));
/* /*
* The XDR decode routines may have pre-set op->status; * The XDR decode routines may have pre-set op->status;
* for example, if there is a miscellaneous XDR error * for example, if there is a miscellaneous XDR error
...@@ -1793,9 +1806,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) ...@@ -1793,9 +1806,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
status = op->status; status = op->status;
} }
dprintk("nfsv4 compound op %p opcnt %d #%d: %d: status %d\n", trace_nfsd_compound_status(args->opcnt, resp->opcnt, status,
args->ops, args->opcnt, resp->opcnt, op->opnum, nfsd4_op_name(op->opnum));
be32_to_cpu(status));
nfsd4_cstate_clear_replay(cstate); nfsd4_cstate_clear_replay(cstate);
nfsd4_increment_op_stats(op->opnum); nfsd4_increment_op_stats(op->opnum);
......
...@@ -98,6 +98,7 @@ enum nfsd4_st_mutex_lock_subclass { ...@@ -98,6 +98,7 @@ enum nfsd4_st_mutex_lock_subclass {
*/ */
static DECLARE_WAIT_QUEUE_HEAD(close_wq); static DECLARE_WAIT_QUEUE_HEAD(close_wq);
static struct kmem_cache *client_slab;
static struct kmem_cache *openowner_slab; static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab; static struct kmem_cache *lockowner_slab;
static struct kmem_cache *file_slab; static struct kmem_cache *file_slab;
...@@ -806,7 +807,8 @@ static void block_delegations(struct knfsd_fh *fh) ...@@ -806,7 +807,8 @@ static void block_delegations(struct knfsd_fh *fh)
} }
static struct nfs4_delegation * static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
struct svc_fh *current_fh,
struct nfs4_clnt_odstate *odstate) struct nfs4_clnt_odstate *odstate)
{ {
struct nfs4_delegation *dp; struct nfs4_delegation *dp;
...@@ -837,6 +839,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, ...@@ -837,6 +839,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
dp->dl_retries = 1; dp->dl_retries = 1;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp; return dp;
out_dec: out_dec:
atomic_long_dec(&num_delegations); atomic_long_dec(&num_delegations);
...@@ -874,19 +878,35 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) ...@@ -874,19 +878,35 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
spin_unlock(&stid->sc_lock); spin_unlock(&stid->sc_lock);
} }
static void nfs4_put_deleg_lease(struct nfs4_file *fp) static void put_deleg_file(struct nfs4_file *fp)
{ {
struct file *filp = NULL; struct file *filp = NULL;
spin_lock(&fp->fi_lock); spin_lock(&fp->fi_lock);
if (fp->fi_deleg_file && --fp->fi_delegees == 0) if (--fp->fi_delegees == 0)
swap(filp, fp->fi_deleg_file); swap(filp, fp->fi_deleg_file);
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
if (filp) { if (filp)
vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp);
fput(filp); fput(filp);
} }
static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
struct file *filp = fp->fi_deleg_file;
WARN_ON_ONCE(!fp->fi_delegees);
vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp);
put_deleg_file(fp);
}
static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
{
put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_unlock_deleg_lease(dp);
nfs4_put_stid(&dp->dl_stid);
} }
void nfs4_unhash_stid(struct nfs4_stid *s) void nfs4_unhash_stid(struct nfs4_stid *s)
...@@ -895,20 +915,16 @@ void nfs4_unhash_stid(struct nfs4_stid *s) ...@@ -895,20 +915,16 @@ void nfs4_unhash_stid(struct nfs4_stid *s)
} }
/** /**
* nfs4_get_existing_delegation - Discover if this delegation already exists * nfs4_delegation_exists - Discover if this delegation already exists
* @clp: a pointer to the nfs4_client we're granting a delegation to * @clp: a pointer to the nfs4_client we're granting a delegation to
* @fp: a pointer to the nfs4_file we're granting a delegation on * @fp: a pointer to the nfs4_file we're granting a delegation on
* *
* Return: * Return:
* On success: NULL if an existing delegation was not found. * On success: true iff an existing delegation is found
*
* On error: -EAGAIN if one was previously granted to this nfs4_client
* for this nfs4_file.
*
*/ */
static int static bool
nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) nfs4_delegation_exists(struct nfs4_client *clp, struct nfs4_file *fp)
{ {
struct nfs4_delegation *searchdp = NULL; struct nfs4_delegation *searchdp = NULL;
struct nfs4_client *searchclp = NULL; struct nfs4_client *searchclp = NULL;
...@@ -919,10 +935,10 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) ...@@ -919,10 +935,10 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
searchclp = searchdp->dl_stid.sc_client; searchclp = searchdp->dl_stid.sc_client;
if (clp == searchclp) { if (clp == searchclp) {
return -EAGAIN; return true;
} }
} }
return 0; return false;
} }
/** /**
...@@ -941,16 +957,13 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) ...@@ -941,16 +957,13 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
static int static int
hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
{ {
int status;
struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfs4_client *clp = dp->dl_stid.sc_client;
lockdep_assert_held(&state_lock); lockdep_assert_held(&state_lock);
lockdep_assert_held(&fp->fi_lock); lockdep_assert_held(&fp->fi_lock);
status = nfs4_get_existing_delegation(clp, fp); if (nfs4_delegation_exists(clp, fp))
if (status) return -EAGAIN;
return status;
++fp->fi_delegees;
refcount_inc(&dp->dl_stid.sc_count); refcount_inc(&dp->dl_stid.sc_count);
dp->dl_stid.sc_type = NFS4_DELEG_STID; dp->dl_stid.sc_type = NFS4_DELEG_STID;
list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perfile, &fp->fi_delegations);
...@@ -986,11 +999,8 @@ static void destroy_delegation(struct nfs4_delegation *dp) ...@@ -986,11 +999,8 @@ static void destroy_delegation(struct nfs4_delegation *dp)
spin_lock(&state_lock); spin_lock(&state_lock);
unhashed = unhash_delegation_locked(dp); unhashed = unhash_delegation_locked(dp);
spin_unlock(&state_lock); spin_unlock(&state_lock);
if (unhashed) { if (unhashed)
put_clnt_odstate(dp->dl_clnt_odstate); destroy_unhashed_deleg(dp);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
} }
static void revoke_delegation(struct nfs4_delegation *dp) static void revoke_delegation(struct nfs4_delegation *dp)
...@@ -999,17 +1009,14 @@ static void revoke_delegation(struct nfs4_delegation *dp) ...@@ -999,17 +1009,14 @@ static void revoke_delegation(struct nfs4_delegation *dp)
WARN_ON(!list_empty(&dp->dl_recall_lru)); WARN_ON(!list_empty(&dp->dl_recall_lru));
put_clnt_odstate(dp->dl_clnt_odstate); if (clp->cl_minorversion) {
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
if (clp->cl_minorversion == 0)
nfs4_put_stid(&dp->dl_stid);
else {
dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
refcount_inc(&dp->dl_stid.sc_count);
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
list_add(&dp->dl_recall_lru, &clp->cl_revoked); list_add(&dp->dl_recall_lru, &clp->cl_revoked);
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
} }
destroy_unhashed_deleg(dp);
} }
/* /*
...@@ -1794,7 +1801,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) ...@@ -1794,7 +1801,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
struct nfs4_client *clp; struct nfs4_client *clp;
int i; int i;
clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL) if (clp == NULL)
return NULL; return NULL;
clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
...@@ -1825,7 +1832,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) ...@@ -1825,7 +1832,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
err_no_hashtbl: err_no_hashtbl:
kfree(clp->cl_name.data); kfree(clp->cl_name.data);
err_no_name: err_no_name:
kfree(clp); kmem_cache_free(client_slab, clp);
return NULL; return NULL;
} }
...@@ -1845,7 +1852,7 @@ free_client(struct nfs4_client *clp) ...@@ -1845,7 +1852,7 @@ free_client(struct nfs4_client *clp)
kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_ownerstr_hashtbl);
kfree(clp->cl_name.data); kfree(clp->cl_name.data);
idr_destroy(&clp->cl_stateids); idr_destroy(&clp->cl_stateids);
kfree(clp); kmem_cache_free(client_slab, clp);
} }
/* must be called under the client_lock */ /* must be called under the client_lock */
...@@ -1911,9 +1918,7 @@ __destroy_client(struct nfs4_client *clp) ...@@ -1911,9 +1918,7 @@ __destroy_client(struct nfs4_client *clp)
while (!list_empty(&reaplist)) { while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_recall_lru);
put_clnt_odstate(dp->dl_clnt_odstate); destroy_unhashed_deleg(dp);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
} }
while (!list_empty(&clp->cl_revoked)) { while (!list_empty(&clp->cl_revoked)) {
dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru); dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
...@@ -2953,7 +2958,7 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, ...@@ -2953,7 +2958,7 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
{ {
if (!session) if (!session)
return 0; return false;
return !memcmp(sid, &session->se_sessionid, sizeof(*sid)); return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
} }
...@@ -3471,21 +3476,26 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, ...@@ -3471,21 +3476,26 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
void void
nfsd4_free_slabs(void) nfsd4_free_slabs(void)
{ {
kmem_cache_destroy(odstate_slab); kmem_cache_destroy(client_slab);
kmem_cache_destroy(openowner_slab); kmem_cache_destroy(openowner_slab);
kmem_cache_destroy(lockowner_slab); kmem_cache_destroy(lockowner_slab);
kmem_cache_destroy(file_slab); kmem_cache_destroy(file_slab);
kmem_cache_destroy(stateid_slab); kmem_cache_destroy(stateid_slab);
kmem_cache_destroy(deleg_slab); kmem_cache_destroy(deleg_slab);
kmem_cache_destroy(odstate_slab);
} }
int int
nfsd4_init_slabs(void) nfsd4_init_slabs(void)
{ {
client_slab = kmem_cache_create("nfsd4_clients",
sizeof(struct nfs4_client), 0, 0, NULL);
if (client_slab == NULL)
goto out;
openowner_slab = kmem_cache_create("nfsd4_openowners", openowner_slab = kmem_cache_create("nfsd4_openowners",
sizeof(struct nfs4_openowner), 0, 0, NULL); sizeof(struct nfs4_openowner), 0, 0, NULL);
if (openowner_slab == NULL) if (openowner_slab == NULL)
goto out; goto out_free_client_slab;
lockowner_slab = kmem_cache_create("nfsd4_lockowners", lockowner_slab = kmem_cache_create("nfsd4_lockowners",
sizeof(struct nfs4_lockowner), 0, 0, NULL); sizeof(struct nfs4_lockowner), 0, 0, NULL);
if (lockowner_slab == NULL) if (lockowner_slab == NULL)
...@@ -3518,6 +3528,8 @@ nfsd4_init_slabs(void) ...@@ -3518,6 +3528,8 @@ nfsd4_init_slabs(void)
kmem_cache_destroy(lockowner_slab); kmem_cache_destroy(lockowner_slab);
out_free_openowner_slab: out_free_openowner_slab:
kmem_cache_destroy(openowner_slab); kmem_cache_destroy(openowner_slab);
out_free_client_slab:
kmem_cache_destroy(client_slab);
out: out:
dprintk("nfsd4: out of memory while initializing nfsv4\n"); dprintk("nfsd4: out of memory while initializing nfsv4\n");
return -ENOMEM; return -ENOMEM;
...@@ -3945,17 +3957,9 @@ static bool ...@@ -3945,17 +3957,9 @@ static bool
nfsd_break_deleg_cb(struct file_lock *fl) nfsd_break_deleg_cb(struct file_lock *fl)
{ {
bool ret = false; bool ret = false;
struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
struct nfs4_delegation *dp; struct nfs4_file *fp = dp->dl_stid.sc_file;
if (!fp) {
WARN(1, "(%p)->fl_owner NULL\n", fl);
return ret;
}
if (fp->fi_had_conflict) {
WARN(1, "duplicate break on %p\n", fp);
return ret;
}
/* /*
* We don't want the locks code to timeout the lease for us; * We don't want the locks code to timeout the lease for us;
* we'll remove it ourself if a delegation isn't returned * we'll remove it ourself if a delegation isn't returned
...@@ -3965,15 +3969,7 @@ nfsd_break_deleg_cb(struct file_lock *fl) ...@@ -3965,15 +3969,7 @@ nfsd_break_deleg_cb(struct file_lock *fl)
spin_lock(&fp->fi_lock); spin_lock(&fp->fi_lock);
fp->fi_had_conflict = true; fp->fi_had_conflict = true;
/* nfsd_break_one_deleg(dp);
* If there are no delegations on the list, then return true
* so that the lease code will go ahead and delete it.
*/
if (list_empty(&fp->fi_delegations))
ret = true;
else
list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
nfsd_break_one_deleg(dp);
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
return ret; return ret;
} }
...@@ -4297,7 +4293,8 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) ...@@ -4297,7 +4293,8 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
} }
static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
int flag)
{ {
struct file_lock *fl; struct file_lock *fl;
...@@ -4308,124 +4305,88 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) ...@@ -4308,124 +4305,88 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
fl->fl_flags = FL_DELEG; fl->fl_flags = FL_DELEG;
fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
fl->fl_end = OFFSET_MAX; fl->fl_end = OFFSET_MAX;
fl->fl_owner = (fl_owner_t)fp; fl->fl_owner = (fl_owner_t)dp;
fl->fl_pid = current->tgid; fl->fl_pid = current->tgid;
fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file;
return fl; return fl;
} }
/**
* nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
* @dp: a pointer to the nfs4_delegation we're adding.
*
* Return:
* On success: Return code will be 0 on success.
*
* On error: -EAGAIN if there was an existing delegation.
* nonzero if there is an error in other cases.
*
*/
static int nfs4_setlease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
struct file_lock *fl;
struct file *filp;
int status = 0;
fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ);
if (!fl)
return -ENOMEM;
filp = find_readable_file(fp);
if (!filp) {
/* We should always have a readable file here */
WARN_ON_ONCE(1);
locks_free_lock(fl);
return -EBADF;
}
fl->fl_file = filp;
status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
if (fl)
locks_free_lock(fl);
if (status)
goto out_fput;
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
/* Did the lease get broken before we took the lock? */
status = -EAGAIN;
if (fp->fi_had_conflict)
goto out_unlock;
/* Race breaker */
if (fp->fi_deleg_file) {
status = hash_delegation_locked(dp, fp);
goto out_unlock;
}
fp->fi_deleg_file = filp;
fp->fi_delegees = 0;
status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
if (status) {
/* Should never happen, this is a new fi_deleg_file */
WARN_ON_ONCE(1);
goto out_fput;
}
return 0;
out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
out_fput:
fput(filp);
return status;
}
static struct nfs4_delegation * static struct nfs4_delegation *
nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
{ {
int status; int status = 0;
struct nfs4_delegation *dp; struct nfs4_delegation *dp;
struct file *filp;
struct file_lock *fl;
/*
* The fi_had_conflict and nfs_get_existing_delegation checks
* here are just optimizations; we'll need to recheck them at
* the end:
*/
if (fp->fi_had_conflict) if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
filp = find_readable_file(fp);
if (!filp) {
/* We should always have a readable file here */
WARN_ON_ONCE(1);
return ERR_PTR(-EBADF);
}
spin_lock(&state_lock); spin_lock(&state_lock);
spin_lock(&fp->fi_lock); spin_lock(&fp->fi_lock);
status = nfs4_get_existing_delegation(clp, fp); if (nfs4_delegation_exists(clp, fp))
status = -EAGAIN;
else if (!fp->fi_deleg_file) {
fp->fi_deleg_file = filp;
/* increment early to prevent fi_deleg_file from being
* cleared */
fp->fi_delegees = 1;
filp = NULL;
} else
fp->fi_delegees++;
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock); spin_unlock(&state_lock);
if (filp)
fput(filp);
if (status) if (status)
return ERR_PTR(status); return ERR_PTR(status);
dp = alloc_init_deleg(clp, fh, odstate); status = -ENOMEM;
dp = alloc_init_deleg(clp, fp, fh, odstate);
if (!dp) if (!dp)
return ERR_PTR(-ENOMEM); goto out_delegees;
fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
if (!fl)
goto out_stid;
status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL);
if (fl)
locks_free_lock(fl);
if (status)
goto out_clnt_odstate;
get_nfs4_file(fp);
spin_lock(&state_lock); spin_lock(&state_lock);
spin_lock(&fp->fi_lock); spin_lock(&fp->fi_lock);
dp->dl_stid.sc_file = fp; if (fp->fi_had_conflict)
if (!fp->fi_deleg_file) {
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
status = nfs4_setlease(dp);
goto out;
}
if (fp->fi_had_conflict) {
status = -EAGAIN; status = -EAGAIN;
goto out_unlock; else
} status = hash_delegation_locked(dp, fp);
status = hash_delegation_locked(dp, fp);
out_unlock:
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock); spin_unlock(&state_lock);
out:
if (status) { if (status)
put_clnt_odstate(dp->dl_clnt_odstate); destroy_unhashed_deleg(dp);
nfs4_put_stid(&dp->dl_stid);
return ERR_PTR(status);
}
return dp; return dp;
out_clnt_odstate:
put_clnt_odstate(dp->dl_clnt_odstate);
out_stid:
nfs4_put_stid(&dp->dl_stid);
out_delegees:
put_deleg_file(fp);
return ERR_PTR(status);
} }
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
...@@ -5521,15 +5482,26 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -5521,15 +5482,26 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out; goto out;
stp->st_stid.sc_type = NFS4_CLOSED_STID; stp->st_stid.sc_type = NFS4_CLOSED_STID;
/*
* Technically we don't _really_ have to increment or copy it, since
* it should just be gone after this operation and we clobber the
* copied value below, but we continue to do so here just to ensure
* that racing ops see that there was a state change.
*/
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
nfsd4_close_open_stateid(stp); nfsd4_close_open_stateid(stp);
mutex_unlock(&stp->st_mutex); mutex_unlock(&stp->st_mutex);
/* See RFC5661 sectionm 18.2.4 */ /* v4.1+ suggests that we send a special stateid in here, since the
if (stp->st_stid.sc_client->cl_minorversion) * clients should just ignore this anyway. Since this is not useful
memcpy(&close->cl_stateid, &close_stateid, * for v4.0 clients either, we set it to the special close_stateid
sizeof(close->cl_stateid)); * universally.
*
* See RFC5661 section 18.2.4, and RFC7530 section 16.2.5
*/
memcpy(&close->cl_stateid, &close_stateid, sizeof(close->cl_stateid));
/* put reference from nfs4_preprocess_seqid_op */ /* put reference from nfs4_preprocess_seqid_op */
nfs4_put_stid(&stp->st_stid); nfs4_put_stid(&stp->st_stid);
...@@ -7264,9 +7236,7 @@ nfs4_state_shutdown_net(struct net *net) ...@@ -7264,9 +7236,7 @@ nfs4_state_shutdown_net(struct net *net)
list_for_each_safe(pos, next, &reaplist) { list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_recall_lru);
put_clnt_odstate(dp->dl_clnt_odstate); destroy_unhashed_deleg(dp);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
} }
nfsd4_client_tracking_exit(net); nfsd4_client_tracking_exit(net);
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <linux/fs_struct.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/namei.h> #include <linux/namei.h>
...@@ -682,7 +681,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create ...@@ -682,7 +681,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
&create->cr_acl, &create->cr_label, &create->cr_acl, &create->cr_label,
&current->fs->umask); &create->cr_umask);
if (status) if (status)
goto out; goto out;
...@@ -927,7 +926,6 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) ...@@ -927,7 +926,6 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
case NFS4_OPEN_NOCREATE: case NFS4_OPEN_NOCREATE:
break; break;
case NFS4_OPEN_CREATE: case NFS4_OPEN_CREATE:
current->fs->umask = 0;
READ_BUF(4); READ_BUF(4);
open->op_createmode = be32_to_cpup(p++); open->op_createmode = be32_to_cpup(p++);
switch (open->op_createmode) { switch (open->op_createmode) {
...@@ -935,7 +933,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) ...@@ -935,7 +933,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
case NFS4_CREATE_GUARDED: case NFS4_CREATE_GUARDED:
status = nfsd4_decode_fattr(argp, open->op_bmval, status = nfsd4_decode_fattr(argp, open->op_bmval,
&open->op_iattr, &open->op_acl, &open->op_label, &open->op_iattr, &open->op_acl, &open->op_label,
&current->fs->umask); &open->op_umask);
if (status) if (status)
goto out; goto out;
break; break;
...@@ -950,7 +948,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) ...@@ -950,7 +948,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
status = nfsd4_decode_fattr(argp, open->op_bmval, status = nfsd4_decode_fattr(argp, open->op_bmval,
&open->op_iattr, &open->op_acl, &open->op_label, &open->op_iattr, &open->op_acl, &open->op_label,
&current->fs->umask); &open->op_umask);
if (status) if (status)
goto out; goto out;
break; break;
...@@ -1759,7 +1757,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) ...@@ -1759,7 +1757,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
p = xdr_decode_hyper(p, &copy->cp_src_pos); p = xdr_decode_hyper(p, &copy->cp_src_pos);
p = xdr_decode_hyper(p, &copy->cp_dst_pos); p = xdr_decode_hyper(p, &copy->cp_dst_pos);
p = xdr_decode_hyper(p, &copy->cp_count); p = xdr_decode_hyper(p, &copy->cp_count);
copy->cp_consecutive = be32_to_cpup(p++); p++; /* ca_consecutive: we always do consecutive copies */
copy->cp_synchronous = be32_to_cpup(p++); copy->cp_synchronous = be32_to_cpup(p++);
tmp = be32_to_cpup(p); /* Source server list not supported */ tmp = be32_to_cpup(p); /* Source server list not supported */
...@@ -3427,8 +3425,9 @@ static __be32 nfsd4_encode_splice_read( ...@@ -3427,8 +3425,9 @@ static __be32 nfsd4_encode_splice_read(
return nfserr_resource; return nfserr_resource;
len = maxcount; len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, file, nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
read->rd_offset, &maxcount); file, read->rd_offset, &maxcount);
read->rd_length = maxcount;
if (nfserr) { if (nfserr) {
/* /*
* nfsd_splice_actor may have already messed with the * nfsd_splice_actor may have already messed with the
...@@ -3511,8 +3510,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, ...@@ -3511,8 +3510,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
read->rd_vlen = v; read->rd_vlen = v;
len = maxcount; len = maxcount;
nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
read->rd_vlen, &maxcount); resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
read->rd_length = maxcount;
if (nfserr) if (nfserr)
return nfserr; return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
...@@ -4214,7 +4214,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, ...@@ -4214,7 +4214,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr; return nfserr;
p = xdr_reserve_space(&resp->xdr, 4 + 4); p = xdr_reserve_space(&resp->xdr, 4 + 4);
*p++ = cpu_to_be32(copy->cp_consecutive); *p++ = xdr_one; /* cr_consecutive */
*p++ = cpu_to_be32(copy->cp_synchronous); *p++ = cpu_to_be32(copy->cp_synchronous);
return 0; return 0;
} }
......
...@@ -87,13 +87,23 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry, ...@@ -87,13 +87,23 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
return nfserr_inval; return nfserr_inval;
} }
static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags)
{
if (flags & NFSEXP_INSECURE_PORT)
return true;
/* We don't require gss requests to use low ports: */
if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS)
return true;
return test_bit(RQ_SECURE, &rqstp->rq_flags);
}
static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
struct svc_export *exp) struct svc_export *exp)
{ {
int flags = nfsexp_flags(rqstp, exp); int flags = nfsexp_flags(rqstp, exp);
/* Check if the request originated from a secure port. */ /* Check if the request originated from a secure port. */
if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && !(flags & NFSEXP_INSECURE_PORT)) { if (!nfsd_originating_port_ok(rqstp, flags)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("nfsd: request from insecure port %s!\n", dprintk("nfsd: request from insecure port %s!\n",
svc_print_addr(rqstp, buf, sizeof(buf))); svc_print_addr(rqstp, buf, sizeof(buf)));
......
...@@ -212,13 +212,18 @@ nfsd_proc_write(struct svc_rqst *rqstp) ...@@ -212,13 +212,18 @@ nfsd_proc_write(struct svc_rqst *rqstp)
struct nfsd_attrstat *resp = rqstp->rq_resp; struct nfsd_attrstat *resp = rqstp->rq_resp;
__be32 nfserr; __be32 nfserr;
unsigned long cnt = argp->len; unsigned long cnt = argp->len;
unsigned int nvecs;
dprintk("nfsd: WRITE %s %d bytes at %d\n", dprintk("nfsd: WRITE %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
argp->len, argp->offset); argp->len, argp->offset);
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC); if (!nvecs)
return nfserr_io;
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, rqstp->rq_vec, nvecs,
&cnt, NFS_DATA_SYNC);
return nfsd_return_attrs(nfserr, resp); return nfsd_return_attrs(nfserr, resp);
} }
...@@ -444,17 +449,19 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) ...@@ -444,17 +449,19 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
struct svc_fh newfh; struct svc_fh newfh;
__be32 nfserr; __be32 nfserr;
if (argp->tlen > NFS_MAXPATHLEN)
return nfserr_nametoolong;
argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
argp->tlen);
if (IS_ERR(argp->tname))
return nfserrno(PTR_ERR(argp->tname));
dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n", dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n",
SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
argp->tlen, argp->tname); argp->tlen, argp->tname);
fh_init(&newfh, NFS_FHSIZE); fh_init(&newfh, NFS_FHSIZE);
/*
* Crazy hack: the request fits in a page, and already-decoded
* attributes follow argp->tname, so it's safe to just write a
* null to ensure it's null-terminated:
*/
argp->tname[argp->tlen] = '\0';
nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
argp->tname, &newfh); argp->tname, &newfh);
......
...@@ -70,22 +70,6 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp) ...@@ -70,22 +70,6 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp)
return p; return p;
} }
static __be32 *
decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
{
char *name;
unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) {
if (*name == '\0')
return NULL;
}
}
return p;
}
static __be32 * static __be32 *
decode_sattr(__be32 *p, struct iattr *iap) decode_sattr(__be32 *p, struct iattr *iap)
{ {
...@@ -287,7 +271,6 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) ...@@ -287,7 +271,6 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
struct nfsd_writeargs *args = rqstp->rq_argp; struct nfsd_writeargs *args = rqstp->rq_argp;
unsigned int len, hdr, dlen; unsigned int len, hdr, dlen;
struct kvec *head = rqstp->rq_arg.head; struct kvec *head = rqstp->rq_arg.head;
int v;
p = decode_fh(p, &args->fh); p = decode_fh(p, &args->fh);
if (!p) if (!p)
...@@ -323,17 +306,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) ...@@ -323,17 +306,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
if (dlen < XDR_QUADLEN(len)*4) if (dlen < XDR_QUADLEN(len)*4)
return 0; return 0;
rqstp->rq_vec[0].iov_base = (void*)p; args->first.iov_base = (void *)p;
rqstp->rq_vec[0].iov_len = head->iov_len - hdr; args->first.iov_len = head->iov_len - hdr;
v = 0;
while (len > rqstp->rq_vec[v].iov_len) {
len -= rqstp->rq_vec[v].iov_len;
v++;
rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
rqstp->rq_vec[v].iov_len = len;
args->vlen = v + 1;
return 1; return 1;
} }
...@@ -394,14 +368,39 @@ int ...@@ -394,14 +368,39 @@ int
nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
{ {
struct nfsd_symlinkargs *args = rqstp->rq_argp; struct nfsd_symlinkargs *args = rqstp->rq_argp;
char *base = (char *)p;
size_t xdrlen;
if ( !(p = decode_fh(p, &args->ffh)) if ( !(p = decode_fh(p, &args->ffh))
|| !(p = decode_filename(p, &args->fname, &args->flen)) || !(p = decode_filename(p, &args->fname, &args->flen)))
|| !(p = decode_pathname(p, &args->tname, &args->tlen)))
return 0; return 0;
p = decode_sattr(p, &args->attrs);
return xdr_argsize_check(rqstp, p); args->tlen = ntohl(*p++);
if (args->tlen == 0)
return 0;
args->first.iov_base = p;
args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
args->first.iov_len -= (char *)p - base;
/* This request is never larger than a page. Therefore,
* transport will deliver either:
* 1. pathname in the pagelist -> sattr is in the tail.
* 2. everything in the head buffer -> sattr is in the head.
*/
if (rqstp->rq_arg.page_len) {
if (args->tlen != rqstp->rq_arg.page_len)
return 0;
p = rqstp->rq_arg.tail[0].iov_base;
} else {
xdrlen = XDR_QUADLEN(args->tlen);
if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
return 0;
p += xdrlen;
}
decode_sattr(p, &args->attrs);
return 1;
} }
int int
......
...@@ -11,39 +11,79 @@ ...@@ -11,39 +11,79 @@
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#include "nfsfh.h" #include "nfsfh.h"
TRACE_EVENT(nfsd_compound,
TP_PROTO(const struct svc_rqst *rqst,
u32 args_opcnt),
TP_ARGS(rqst, args_opcnt),
TP_STRUCT__entry(
__field(u32, xid)
__field(u32, args_opcnt)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->args_opcnt = args_opcnt;
),
TP_printk("xid=0x%08x opcnt=%u",
__entry->xid, __entry->args_opcnt)
)
TRACE_EVENT(nfsd_compound_status,
TP_PROTO(u32 args_opcnt,
u32 resp_opcnt,
__be32 status,
const char *name),
TP_ARGS(args_opcnt, resp_opcnt, status, name),
TP_STRUCT__entry(
__field(u32, args_opcnt)
__field(u32, resp_opcnt)
__field(int, status)
__string(name, name)
),
TP_fast_assign(
__entry->args_opcnt = args_opcnt;
__entry->resp_opcnt = resp_opcnt;
__entry->status = be32_to_cpu(status);
__assign_str(name, name);
),
TP_printk("op=%u/%u %s status=%d",
__entry->resp_opcnt, __entry->args_opcnt,
__get_str(name), __entry->status)
)
DECLARE_EVENT_CLASS(nfsd_io_class, DECLARE_EVENT_CLASS(nfsd_io_class,
TP_PROTO(struct svc_rqst *rqstp, TP_PROTO(struct svc_rqst *rqstp,
struct svc_fh *fhp, struct svc_fh *fhp,
loff_t offset, loff_t offset,
int len), unsigned long len),
TP_ARGS(rqstp, fhp, offset, len), TP_ARGS(rqstp, fhp, offset, len),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(__be32, xid) __field(u32, xid)
__field_struct(struct knfsd_fh, fh) __field(u32, fh_hash)
__field(loff_t, offset) __field(loff_t, offset)
__field(int, len) __field(unsigned long, len)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xid = rqstp->rq_xid, __entry->xid = be32_to_cpu(rqstp->rq_xid);
fh_copy_shallow(&__entry->fh, &fhp->fh_handle); __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
__entry->offset = offset; __entry->offset = offset;
__entry->len = len; __entry->len = len;
), ),
TP_printk("xid=0x%x fh=0x%x offset=%lld len=%d", TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu",
__be32_to_cpu(__entry->xid), knfsd_fh_hash(&__entry->fh), __entry->xid, __entry->fh_hash,
__entry->offset, __entry->len) __entry->offset, __entry->len)
) )
#define DEFINE_NFSD_IO_EVENT(name) \ #define DEFINE_NFSD_IO_EVENT(name) \
DEFINE_EVENT(nfsd_io_class, name, \ DEFINE_EVENT(nfsd_io_class, nfsd_##name, \
TP_PROTO(struct svc_rqst *rqstp, \ TP_PROTO(struct svc_rqst *rqstp, \
struct svc_fh *fhp, \ struct svc_fh *fhp, \
loff_t offset, \ loff_t offset, \
int len), \ unsigned long len), \
TP_ARGS(rqstp, fhp, offset, len)) TP_ARGS(rqstp, fhp, offset, len))
DEFINE_NFSD_IO_EVENT(read_start); DEFINE_NFSD_IO_EVENT(read_start);
DEFINE_NFSD_IO_EVENT(read_opened); DEFINE_NFSD_IO_EVENT(read_splice);
DEFINE_NFSD_IO_EVENT(read_vector);
DEFINE_NFSD_IO_EVENT(read_io_done); DEFINE_NFSD_IO_EVENT(read_io_done);
DEFINE_NFSD_IO_EVENT(read_done); DEFINE_NFSD_IO_EVENT(read_done);
DEFINE_NFSD_IO_EVENT(write_start); DEFINE_NFSD_IO_EVENT(write_start);
...@@ -51,6 +91,40 @@ DEFINE_NFSD_IO_EVENT(write_opened); ...@@ -51,6 +91,40 @@ DEFINE_NFSD_IO_EVENT(write_opened);
DEFINE_NFSD_IO_EVENT(write_io_done); DEFINE_NFSD_IO_EVENT(write_io_done);
DEFINE_NFSD_IO_EVENT(write_done); DEFINE_NFSD_IO_EVENT(write_done);
DECLARE_EVENT_CLASS(nfsd_err_class,
TP_PROTO(struct svc_rqst *rqstp,
struct svc_fh *fhp,
loff_t offset,
int status),
TP_ARGS(rqstp, fhp, offset, status),
TP_STRUCT__entry(
__field(u32, xid)
__field(u32, fh_hash)
__field(loff_t, offset)
__field(int, status)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
__entry->offset = offset;
__entry->status = status;
),
TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld status=%d",
__entry->xid, __entry->fh_hash,
__entry->offset, __entry->status)
)
#define DEFINE_NFSD_ERR_EVENT(name) \
DEFINE_EVENT(nfsd_err_class, nfsd_##name, \
TP_PROTO(struct svc_rqst *rqstp, \
struct svc_fh *fhp, \
loff_t offset, \
int len), \
TP_ARGS(rqstp, fhp, offset, len))
DEFINE_NFSD_ERR_EVENT(read_err);
DEFINE_NFSD_ERR_EVENT(write_err);
#include "state.h" #include "state.h"
DECLARE_EVENT_CLASS(nfsd_stateid_class, DECLARE_EVENT_CLASS(nfsd_stateid_class,
...@@ -76,7 +150,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class, ...@@ -76,7 +150,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class,
) )
#define DEFINE_STATEID_EVENT(name) \ #define DEFINE_STATEID_EVENT(name) \
DEFINE_EVENT(nfsd_stateid_class, name, \ DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \
TP_PROTO(stateid_t *stp), \ TP_PROTO(stateid_t *stp), \
TP_ARGS(stp)) TP_ARGS(stp))
DEFINE_STATEID_EVENT(layoutstate_alloc); DEFINE_STATEID_EVENT(layoutstate_alloc);
......
...@@ -881,20 +881,24 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, ...@@ -881,20 +881,24 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor); return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
} }
static __be32 static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_finish_read(struct file *file, unsigned long *count, int host_err) struct file *file, loff_t offset,
unsigned long *count, int host_err)
{ {
if (host_err >= 0) { if (host_err >= 0) {
nfsdstats.io_read += host_err; nfsdstats.io_read += host_err;
*count = host_err; *count = host_err;
fsnotify_access(file); fsnotify_access(file);
trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
return 0; return 0;
} else } else {
trace_nfsd_read_err(rqstp, fhp, offset, host_err);
return nfserrno(host_err); return nfserrno(host_err);
}
} }
__be32 nfsd_splice_read(struct svc_rqst *rqstp, __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count) struct file *file, loff_t offset, unsigned long *count)
{ {
struct splice_desc sd = { struct splice_desc sd = {
.len = 0, .len = 0,
...@@ -904,21 +908,23 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, ...@@ -904,21 +908,23 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp,
}; };
int host_err; int host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
rqstp->rq_next_page = rqstp->rq_respages + 1; rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
return nfsd_finish_read(file, count, host_err); return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
} }
__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned long *count) struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *count)
{ {
struct iov_iter iter; struct iov_iter iter;
int host_err; int host_err;
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count); iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
host_err = vfs_iter_read(file, &iter, &offset, 0); host_err = vfs_iter_read(file, &iter, &offset, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
return nfsd_finish_read(file, count, host_err);
} }
/* /*
...@@ -965,13 +971,15 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, ...@@ -965,13 +971,15 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
{ {
struct svc_export *exp; struct svc_export *exp;
struct iov_iter iter; struct iov_iter iter;
__be32 err = 0; __be32 nfserr;
int host_err; int host_err;
int use_wgather; int use_wgather;
loff_t pos = offset; loff_t pos = offset;
unsigned int pflags = current->flags; unsigned int pflags = current->flags;
rwf_t flags = 0; rwf_t flags = 0;
trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
/* /*
* We want less throttling in balance_dirty_pages() * We want less throttling in balance_dirty_pages()
...@@ -994,22 +1002,23 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, ...@@ -994,22 +1002,23 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
host_err = vfs_iter_write(file, &iter, &pos, flags); host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0) if (host_err < 0)
goto out_nfserr; goto out_nfserr;
*cnt = host_err; nfsdstats.io_write += *cnt;
nfsdstats.io_write += host_err;
fsnotify_modify(file); fsnotify_modify(file);
if (stable && use_wgather) if (stable && use_wgather)
host_err = wait_for_concurrent_writes(file); host_err = wait_for_concurrent_writes(file);
out_nfserr: out_nfserr:
dprintk("nfsd: write complete host_err=%d\n", host_err); if (host_err >= 0) {
if (host_err >= 0) trace_nfsd_write_io_done(rqstp, fhp, offset, *cnt);
err = 0; nfserr = nfs_ok;
else } else {
err = nfserrno(host_err); trace_nfsd_write_err(rqstp, fhp, offset, host_err);
nfserr = nfserrno(host_err);
}
if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
current_restore_flags(pflags, PF_LESS_THROTTLE); current_restore_flags(pflags, PF_LESS_THROTTLE);
return err; return nfserr;
} }
/* /*
...@@ -1024,27 +1033,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1024,27 +1033,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct raparms *ra; struct raparms *ra;
__be32 err; __be32 err;
trace_read_start(rqstp, fhp, offset, vlen); trace_nfsd_read_start(rqstp, fhp, offset, *count);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err) if (err)
return err; return err;
ra = nfsd_init_raparms(file); ra = nfsd_init_raparms(file);
trace_read_opened(rqstp, fhp, offset, vlen);
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
err = nfsd_splice_read(rqstp, file, offset, count); err = nfsd_splice_read(rqstp, fhp, file, offset, count);
else else
err = nfsd_readv(file, offset, vec, vlen, count); err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
trace_read_io_done(rqstp, fhp, offset, vlen);
if (ra) if (ra)
nfsd_put_raparams(file, ra); nfsd_put_raparams(file, ra);
fput(file); fput(file);
trace_read_done(rqstp, fhp, offset, vlen); trace_nfsd_read_done(rqstp, fhp, offset, *count);
return err; return err;
} }
...@@ -1061,18 +1066,16 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, ...@@ -1061,18 +1066,16 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
struct file *file = NULL; struct file *file = NULL;
__be32 err = 0; __be32 err = 0;
trace_write_start(rqstp, fhp, offset, vlen); trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err) if (err)
goto out; goto out;
trace_write_opened(rqstp, fhp, offset, vlen);
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable); err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
trace_write_io_done(rqstp, fhp, offset, vlen);
fput(file); fput(file);
out: out:
trace_write_done(rqstp, fhp, offset, vlen); trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
return err; return err;
} }
......
...@@ -78,10 +78,13 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, ...@@ -78,10 +78,13 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **); int, struct file **);
struct raparms; struct raparms;
__be32 nfsd_splice_read(struct svc_rqst *, __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *, loff_t, unsigned long *); struct file *file, loff_t offset,
__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, unsigned long *count);
unsigned long *); __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen,
unsigned long *count);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, __be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *); loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
......
...@@ -34,7 +34,7 @@ struct nfsd_writeargs { ...@@ -34,7 +34,7 @@ struct nfsd_writeargs {
svc_fh fh; svc_fh fh;
__u32 offset; __u32 offset;
int len; int len;
int vlen; struct kvec first;
}; };
struct nfsd_createargs { struct nfsd_createargs {
...@@ -72,6 +72,7 @@ struct nfsd_symlinkargs { ...@@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
char * tname; char * tname;
unsigned int tlen; unsigned int tlen;
struct iattr attrs; struct iattr attrs;
struct kvec first;
}; };
struct nfsd_readdirargs { struct nfsd_readdirargs {
......
...@@ -41,7 +41,7 @@ struct nfsd3_writeargs { ...@@ -41,7 +41,7 @@ struct nfsd3_writeargs {
__u32 count; __u32 count;
int stable; int stable;
__u32 len; __u32 len;
int vlen; struct kvec first;
}; };
struct nfsd3_createargs { struct nfsd3_createargs {
...@@ -90,6 +90,7 @@ struct nfsd3_symlinkargs { ...@@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
char * tname; char * tname;
unsigned int tlen; unsigned int tlen;
struct iattr attrs; struct iattr attrs;
struct kvec first;
}; };
struct nfsd3_readdirargs { struct nfsd3_readdirargs {
......
...@@ -110,6 +110,7 @@ struct nfsd4_create { ...@@ -110,6 +110,7 @@ struct nfsd4_create {
struct { struct {
u32 datalen; u32 datalen;
char *data; char *data;
struct kvec first;
} link; /* NF4LNK */ } link; /* NF4LNK */
struct { struct {
u32 specdata1; u32 specdata1;
...@@ -118,12 +119,14 @@ struct nfsd4_create { ...@@ -118,12 +119,14 @@ struct nfsd4_create {
} u; } u;
u32 cr_bmval[3]; /* request */ u32 cr_bmval[3]; /* request */
struct iattr cr_iattr; /* request */ struct iattr cr_iattr; /* request */
int cr_umask; /* request */
struct nfsd4_change_info cr_cinfo; /* response */ struct nfsd4_change_info cr_cinfo; /* response */
struct nfs4_acl *cr_acl; struct nfs4_acl *cr_acl;
struct xdr_netobj cr_label; struct xdr_netobj cr_label;
}; };
#define cr_datalen u.link.datalen #define cr_datalen u.link.datalen
#define cr_data u.link.data #define cr_data u.link.data
#define cr_first u.link.first
#define cr_specdata1 u.dev.specdata1 #define cr_specdata1 u.dev.specdata1
#define cr_specdata2 u.dev.specdata2 #define cr_specdata2 u.dev.specdata2
...@@ -228,6 +231,7 @@ struct nfsd4_open { ...@@ -228,6 +231,7 @@ struct nfsd4_open {
u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */ u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */
u32 op_create; /* request */ u32 op_create; /* request */
u32 op_createmode; /* request */ u32 op_createmode; /* request */
int op_umask; /* request */
u32 op_bmval[3]; /* request */ u32 op_bmval[3]; /* request */
struct iattr op_iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ struct iattr op_iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
nfs4_verifier op_verf __attribute__((aligned(32))); nfs4_verifier op_verf __attribute__((aligned(32)));
...@@ -518,7 +522,6 @@ struct nfsd4_copy { ...@@ -518,7 +522,6 @@ struct nfsd4_copy {
u64 cp_count; u64 cp_count;
/* both */ /* both */
bool cp_consecutive;
bool cp_synchronous; bool cp_synchronous;
/* response */ /* response */
......
...@@ -272,6 +272,7 @@ struct svc_rqst { ...@@ -272,6 +272,7 @@ struct svc_rqst {
#define RQ_BUSY (6) /* request is busy */ #define RQ_BUSY (6) /* request is busy */
#define RQ_DATA (7) /* request has data */ #define RQ_DATA (7) /* request has data */
unsigned long rq_flags; /* flags field */ unsigned long rq_flags; /* flags field */
ktime_t rq_qtime; /* enqueue time */
void * rq_argp; /* decoded arguments */ void * rq_argp; /* decoded arguments */
void * rq_resp; /* xdr'd results */ void * rq_resp; /* xdr'd results */
...@@ -283,6 +284,7 @@ struct svc_rqst { ...@@ -283,6 +284,7 @@ struct svc_rqst {
int rq_reserved; /* space on socket outq int rq_reserved; /* space on socket outq
* reserved for this request * reserved for this request
*/ */
ktime_t rq_stime; /* start time */
struct cache_req rq_chandle; /* handle passed to caches for struct cache_req rq_chandle; /* handle passed to caches for
* request delaying * request delaying
...@@ -493,6 +495,10 @@ void svc_wake_up(struct svc_serv *); ...@@ -493,6 +495,10 @@ void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space); void svc_reserve(struct svc_rqst *rqstp, int space);
struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
char * svc_print_addr(struct svc_rqst *, char *, size_t); char * svc_print_addr(struct svc_rqst *, char *, size_t);
unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
struct kvec *first, size_t total);
char *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
struct kvec *first, size_t total);
#define RPC_MAX_ADDRBUFLEN (63U) #define RPC_MAX_ADDRBUFLEN (63U)
......
...@@ -132,9 +132,6 @@ struct svcxprt_rdma { ...@@ -132,9 +132,6 @@ struct svcxprt_rdma {
#define RDMAXPRT_CONN_PENDING 3 #define RDMAXPRT_CONN_PENDING 3
#define RPCRDMA_LISTEN_BACKLOG 10 #define RPCRDMA_LISTEN_BACKLOG 10
/* The default ORD value is based on two outstanding full-size writes with a
* page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
#define RPCRDMA_ORD (64/4)
#define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQUESTS 32
/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our /* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
......
...@@ -25,7 +25,7 @@ struct svc_xprt_ops { ...@@ -25,7 +25,7 @@ struct svc_xprt_ops {
void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *); void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *);
int (*xpo_secure_port)(struct svc_rqst *); void (*xpo_secure_port)(struct svc_rqst *rqstp);
void (*xpo_kill_temp_xprt)(struct svc_xprt *); void (*xpo_kill_temp_xprt)(struct svc_xprt *);
}; };
...@@ -83,6 +83,7 @@ struct svc_xprt { ...@@ -83,6 +83,7 @@ struct svc_xprt {
size_t xpt_locallen; /* length of address */ size_t xpt_locallen; /* length of address */
struct sockaddr_storage xpt_remote; /* remote peer's address */ struct sockaddr_storage xpt_remote; /* remote peer's address */
size_t xpt_remotelen; /* length of address */ size_t xpt_remotelen; /* length of address */
char xpt_remotebuf[INET6_ADDRSTRLEN + 10];
struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */
struct list_head xpt_users; /* callbacks on free */ struct list_head xpt_users; /* callbacks on free */
...@@ -152,7 +153,10 @@ static inline void svc_xprt_set_remote(struct svc_xprt *xprt, ...@@ -152,7 +153,10 @@ static inline void svc_xprt_set_remote(struct svc_xprt *xprt,
{ {
memcpy(&xprt->xpt_remote, sa, salen); memcpy(&xprt->xpt_remote, sa, salen);
xprt->xpt_remotelen = salen; xprt->xpt_remotelen = salen;
snprintf(xprt->xpt_remotebuf, sizeof(xprt->xpt_remotebuf) - 1,
"%pISpc", sa);
} }
static inline unsigned short svc_addr_port(const struct sockaddr *sa) static inline unsigned short svc_addr_port(const struct sockaddr *sa)
{ {
const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; const struct sockaddr_in *sin = (const struct sockaddr_in *)sa;
......
...@@ -485,31 +485,55 @@ TRACE_EVENT(xs_tcp_data_recv, ...@@ -485,31 +485,55 @@ TRACE_EVENT(xs_tcp_data_recv,
{ (1UL << RQ_BUSY), "RQ_BUSY"}) { (1UL << RQ_BUSY), "RQ_BUSY"})
TRACE_EVENT(svc_recv, TRACE_EVENT(svc_recv,
TP_PROTO(struct svc_rqst *rqst, int status), TP_PROTO(struct svc_rqst *rqst, int len),
TP_ARGS(rqst, status), TP_ARGS(rqst, len),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u32, xid) __field(u32, xid)
__field(int, status) __field(int, len)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen) __string(addr, rqst->rq_xprt->xpt_remotebuf)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0; __entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->status = status; __entry->len = len;
__entry->flags = rqst->rq_flags; __entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr), __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
&rqst->rq_addr, rqst->rq_addrlen);
), ),
TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", TP_printk("addr=%s xid=0x%08x len=%d flags=%s",
(struct sockaddr *)__get_dynamic_array(addr), __get_str(addr), __entry->xid, __entry->len,
__entry->xid, __entry->status,
show_rqstp_flags(__entry->flags)) show_rqstp_flags(__entry->flags))
); );
TRACE_EVENT(svc_process,
TP_PROTO(const struct svc_rqst *rqst, const char *name),
TP_ARGS(rqst, name),
TP_STRUCT__entry(
__field(u32, xid)
__field(u32, vers)
__field(u32, proc)
__string(service, name)
__string(addr, rqst->rq_xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->vers = rqst->rq_vers;
__entry->proc = rqst->rq_proc;
__assign_str(service, name);
__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),
TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%u",
__get_str(addr), __entry->xid,
__get_str(service), __entry->vers, __entry->proc)
);
DECLARE_EVENT_CLASS(svc_rqst_event, DECLARE_EVENT_CLASS(svc_rqst_event,
TP_PROTO(struct svc_rqst *rqst), TP_PROTO(struct svc_rqst *rqst),
...@@ -519,20 +543,18 @@ DECLARE_EVENT_CLASS(svc_rqst_event, ...@@ -519,20 +543,18 @@ DECLARE_EVENT_CLASS(svc_rqst_event,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u32, xid) __field(u32, xid)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen) __string(addr, rqst->rq_xprt->xpt_remotebuf)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xid = be32_to_cpu(rqst->rq_xid); __entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->flags = rqst->rq_flags; __entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr), __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
&rqst->rq_addr, rqst->rq_addrlen);
), ),
TP_printk("addr=%pIScp rq_xid=0x%08x flags=%s", TP_printk("addr=%s xid=0x%08x flags=%s",
(struct sockaddr *)__get_dynamic_array(addr), __get_str(addr), __entry->xid,
__entry->xid, show_rqstp_flags(__entry->flags))
show_rqstp_flags(__entry->flags))
); );
DEFINE_EVENT(svc_rqst_event, svc_defer, DEFINE_EVENT(svc_rqst_event, svc_defer,
...@@ -553,27 +575,21 @@ DECLARE_EVENT_CLASS(svc_rqst_status, ...@@ -553,27 +575,21 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
__field(u32, xid) __field(u32, xid)
__field(int, status) __field(int, status)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen) __string(addr, rqst->rq_xprt->xpt_remotebuf)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xid = be32_to_cpu(rqst->rq_xid); __entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->status = status; __entry->status = status;
__entry->flags = rqst->rq_flags; __entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr), __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
&rqst->rq_addr, rqst->rq_addrlen);
), ),
TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s", TP_printk("addr=%s xid=0x%08x status=%d flags=%s",
(struct sockaddr *)__get_dynamic_array(addr), __get_str(addr), __entry->xid,
__entry->xid, __entry->status, show_rqstp_flags(__entry->flags))
__entry->status, show_rqstp_flags(__entry->flags))
); );
DEFINE_EVENT(svc_rqst_status, svc_process,
TP_PROTO(struct svc_rqst *rqst, int status),
TP_ARGS(rqst, status));
DEFINE_EVENT(svc_rqst_status, svc_send, DEFINE_EVENT(svc_rqst_status, svc_send,
TP_PROTO(struct svc_rqst *rqst, int status), TP_PROTO(struct svc_rqst *rqst, int status),
TP_ARGS(rqst, status)); TP_ARGS(rqst, status));
...@@ -591,7 +607,9 @@ DEFINE_EVENT(svc_rqst_status, svc_send, ...@@ -591,7 +607,9 @@ DEFINE_EVENT(svc_rqst_status, svc_send,
{ (1UL << XPT_OLD), "XPT_OLD"}, \ { (1UL << XPT_OLD), "XPT_OLD"}, \
{ (1UL << XPT_LISTENER), "XPT_LISTENER"}, \ { (1UL << XPT_LISTENER), "XPT_LISTENER"}, \
{ (1UL << XPT_CACHE_AUTH), "XPT_CACHE_AUTH"}, \ { (1UL << XPT_CACHE_AUTH), "XPT_CACHE_AUTH"}, \
{ (1UL << XPT_LOCAL), "XPT_LOCAL"}) { (1UL << XPT_LOCAL), "XPT_LOCAL"}, \
{ (1UL << XPT_KILL_TEMP), "XPT_KILL_TEMP"}, \
{ (1UL << XPT_CONG_CTRL), "XPT_CONG_CTRL"})
TRACE_EVENT(svc_xprt_do_enqueue, TRACE_EVENT(svc_xprt_do_enqueue,
TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst),
...@@ -602,26 +620,19 @@ TRACE_EVENT(svc_xprt_do_enqueue, ...@@ -602,26 +620,19 @@ TRACE_EVENT(svc_xprt_do_enqueue,
__field(struct svc_xprt *, xprt) __field(struct svc_xprt *, xprt)
__field(int, pid) __field(int, pid)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, xprt != NULL ? __string(addr, xprt->xpt_remotebuf)
xprt->xpt_remotelen : 0)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xprt = xprt; __entry->xprt = xprt;
__entry->pid = rqst? rqst->rq_task->pid : 0; __entry->pid = rqst? rqst->rq_task->pid : 0;
if (xprt) { __entry->flags = xprt->xpt_flags;
memcpy(__get_dynamic_array(addr), __assign_str(addr, xprt->xpt_remotebuf);
&xprt->xpt_remote, ),
xprt->xpt_remotelen);
__entry->flags = xprt->xpt_flags; TP_printk("xprt=%p addr=%s pid=%d flags=%s",
} else __entry->xprt, __get_str(addr),
__entry->flags = 0; __entry->pid, show_svc_xprt_flags(__entry->flags))
),
TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
__get_dynamic_array_len(addr) != 0 ?
(struct sockaddr *)__get_dynamic_array(addr) : NULL,
__entry->pid, show_svc_xprt_flags(__entry->flags))
); );
DECLARE_EVENT_CLASS(svc_xprt_event, DECLARE_EVENT_CLASS(svc_xprt_event,
...@@ -632,35 +643,50 @@ DECLARE_EVENT_CLASS(svc_xprt_event, ...@@ -632,35 +643,50 @@ DECLARE_EVENT_CLASS(svc_xprt_event,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct svc_xprt *, xprt) __field(struct svc_xprt *, xprt)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, xprt != NULL ? __string(addr, xprt->xpt_remotebuf)
xprt->xpt_remotelen : 0)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xprt = xprt; __entry->xprt = xprt;
if (xprt) { __entry->flags = xprt->xpt_flags;
memcpy(__get_dynamic_array(addr), __assign_str(addr, xprt->xpt_remotebuf);
&xprt->xpt_remote, ),
xprt->xpt_remotelen);
__entry->flags = xprt->xpt_flags;
} else
__entry->flags = 0;
),
TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt,
__get_dynamic_array_len(addr) != 0 ?
(struct sockaddr *)__get_dynamic_array(addr) : NULL,
show_svc_xprt_flags(__entry->flags))
);
DEFINE_EVENT(svc_xprt_event, svc_xprt_dequeue, TP_printk("xprt=%p addr=%s flags=%s",
TP_PROTO(struct svc_xprt *xprt), __entry->xprt, __get_str(addr),
TP_ARGS(xprt)); show_svc_xprt_flags(__entry->flags))
);
DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space, DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space,
TP_PROTO(struct svc_xprt *xprt), TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt)); TP_ARGS(xprt));
TRACE_EVENT(svc_xprt_dequeue,
TP_PROTO(struct svc_rqst *rqst),
TP_ARGS(rqst),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field(unsigned long, flags)
__field(unsigned long, wakeup)
__string(addr, rqst->rq_xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->xprt = rqst->rq_xprt;
__entry->flags = rqst->rq_xprt->xpt_flags;
__entry->wakeup = ktime_to_us(ktime_sub(ktime_get(),
rqst->rq_qtime));
__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),
TP_printk("xprt=%p addr=%s flags=%s wakeup-us=%lu",
__entry->xprt, __get_str(addr),
show_svc_xprt_flags(__entry->flags),
__entry->wakeup)
);
TRACE_EVENT(svc_wake_up, TRACE_EVENT(svc_wake_up,
TP_PROTO(int pid), TP_PROTO(int pid),
...@@ -686,28 +712,42 @@ TRACE_EVENT(svc_handle_xprt, ...@@ -686,28 +712,42 @@ TRACE_EVENT(svc_handle_xprt,
__field(struct svc_xprt *, xprt) __field(struct svc_xprt *, xprt)
__field(int, len) __field(int, len)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, xprt != NULL ? __string(addr, xprt->xpt_remotebuf)
xprt->xpt_remotelen : 0)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xprt = xprt; __entry->xprt = xprt;
__entry->len = len; __entry->len = len;
if (xprt) { __entry->flags = xprt->xpt_flags;
memcpy(__get_dynamic_array(addr), __assign_str(addr, xprt->xpt_remotebuf);
&xprt->xpt_remote, ),
xprt->xpt_remotelen);
__entry->flags = xprt->xpt_flags; TP_printk("xprt=%p addr=%s len=%d flags=%s",
} else __entry->xprt, __get_str(addr),
__entry->flags = 0;
),
TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
__get_dynamic_array_len(addr) != 0 ?
(struct sockaddr *)__get_dynamic_array(addr) : NULL,
__entry->len, show_svc_xprt_flags(__entry->flags)) __entry->len, show_svc_xprt_flags(__entry->flags))
); );
TRACE_EVENT(svc_stats_latency,
TP_PROTO(const struct svc_rqst *rqst),
TP_ARGS(rqst),
TP_STRUCT__entry(
__field(u32, xid)
__field(unsigned long, execute)
__string(addr, rqst->rq_xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->execute = ktime_to_us(ktime_sub(ktime_get(),
rqst->rq_stime));
__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),
TP_printk("addr=%s xid=0x%08x execute-us=%lu",
__get_str(addr), __entry->xid, __entry->execute)
);
DECLARE_EVENT_CLASS(svc_deferred_event, DECLARE_EVENT_CLASS(svc_deferred_event,
TP_PROTO(struct svc_deferred_req *dr), TP_PROTO(struct svc_deferred_req *dr),
...@@ -716,18 +756,16 @@ DECLARE_EVENT_CLASS(svc_deferred_event, ...@@ -716,18 +756,16 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u32, xid) __field(u32, xid)
__dynamic_array(unsigned char, addr, dr->addrlen) __string(addr, dr->xprt->xpt_remotebuf)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xid = be32_to_cpu(*(__be32 *)(dr->args + __entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
(dr->xprt_hlen>>2))); (dr->xprt_hlen>>2)));
memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen); __assign_str(addr, dr->xprt->xpt_remotebuf);
), ),
TP_printk("addr=%pIScp xid=0x%08x", TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid)
(struct sockaddr *)__get_dynamic_array(addr),
__entry->xid)
); );
DEFINE_EVENT(svc_deferred_event, svc_drop_deferred, DEFINE_EVENT(svc_deferred_event, svc_drop_deferred,
......
...@@ -237,9 +237,6 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, ...@@ -237,9 +237,6 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
err = crypto_ahash_init(req);
if (err)
goto out;
err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength); err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
if (err) if (err)
goto out; goto out;
......
...@@ -177,6 +177,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, ...@@ -177,6 +177,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
u64 seq_send; u64 seq_send;
u8 *cksumkey; u8 *cksumkey;
unsigned int cksum_usage; unsigned int cksum_usage;
__be64 seq_send_be64;
dprintk("RPC: %s\n", __func__); dprintk("RPC: %s\n", __func__);
...@@ -187,7 +188,9 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, ...@@ -187,7 +188,9 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
spin_lock(&krb5_seq_lock); spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send64++; seq_send = ctx->seq_send64++;
spin_unlock(&krb5_seq_lock); spin_unlock(&krb5_seq_lock);
*((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
seq_send_be64 = cpu_to_be64(seq_send);
memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
if (ctx->initiate) { if (ctx->initiate) {
cksumkey = ctx->initiator_sign; cksumkey = ctx->initiator_sign;
......
...@@ -155,10 +155,12 @@ gss_verify_mic_v2(struct krb5_ctx *ctx, ...@@ -155,10 +155,12 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
u8 flags; u8 flags;
int i; int i;
unsigned int cksum_usage; unsigned int cksum_usage;
__be16 be16_ptr;
dprintk("RPC: %s\n", __func__); dprintk("RPC: %s\n", __func__);
if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC) memcpy(&be16_ptr, (char *) ptr, 2);
if (be16_to_cpu(be16_ptr) != KG2_TOK_MIC)
return GSS_S_DEFECTIVE_TOKEN; return GSS_S_DEFECTIVE_TOKEN;
flags = ptr[2]; flags = ptr[2];
......
...@@ -1450,8 +1450,8 @@ static ssize_t write_flush(struct file *file, const char __user *buf, ...@@ -1450,8 +1450,8 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
struct cache_detail *cd) struct cache_detail *cd)
{ {
char tbuf[20]; char tbuf[20];
char *bp, *ep; char *ep;
time_t then, now; time_t now;
if (*ppos || count > sizeof(tbuf)-1) if (*ppos || count > sizeof(tbuf)-1)
return -EINVAL; return -EINVAL;
...@@ -1461,24 +1461,24 @@ static ssize_t write_flush(struct file *file, const char __user *buf, ...@@ -1461,24 +1461,24 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
simple_strtoul(tbuf, &ep, 0); simple_strtoul(tbuf, &ep, 0);
if (*ep && *ep != '\n') if (*ep && *ep != '\n')
return -EINVAL; return -EINVAL;
/* Note that while we check that 'buf' holds a valid number,
* we always ignore the value and just flush everything.
* Making use of the number leads to races.
*/
bp = tbuf;
then = get_expiry(&bp);
now = seconds_since_boot(); now = seconds_since_boot();
cd->nextcheck = now; /* Always flush everything, so behave like cache_purge()
/* Can only set flush_time to 1 second beyond "now", or * Do this by advancing flush_time to the current time,
* possibly 1 second beyond flushtime. This is because * or by one second if it has already reached the current time.
* flush_time never goes backwards so it mustn't get too far * Newly added cache entries will always have ->last_refresh greater
* ahead of time. * that ->flush_time, so they don't get flushed prematurely.
*/ */
if (then >= now) {
/* Want to flush everything, so behave like cache_purge() */
if (cd->flush_time >= now)
now = cd->flush_time + 1;
then = now;
}
cd->flush_time = then; if (cd->flush_time >= now)
now = cd->flush_time + 1;
cd->flush_time = now;
cd->nextcheck = now;
cache_flush(); cache_flush();
*ppos += count; *ppos += count;
......
...@@ -1255,6 +1255,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) ...@@ -1255,6 +1255,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
/* Syntactic check complete */ /* Syntactic check complete */
serv->sv_stats->rpccnt++; serv->sv_stats->rpccnt++;
trace_svc_process(rqstp, progp->pg_name);
/* Build the reply header. */ /* Build the reply header. */
statp = resv->iov_base +resv->iov_len; statp = resv->iov_base +resv->iov_len;
...@@ -1431,14 +1432,10 @@ svc_process(struct svc_rqst *rqstp) ...@@ -1431,14 +1432,10 @@ svc_process(struct svc_rqst *rqstp)
} }
/* Returns 1 for send, 0 for drop */ /* Returns 1 for send, 0 for drop */
if (likely(svc_process_common(rqstp, argv, resv))) { if (likely(svc_process_common(rqstp, argv, resv)))
int ret = svc_send(rqstp); return svc_send(rqstp);
trace_svc_process(rqstp, ret);
return ret;
}
out_drop: out_drop:
trace_svc_process(rqstp, 0);
svc_drop(rqstp); svc_drop(rqstp);
return 0; return 0;
} }
...@@ -1536,3 +1533,112 @@ u32 svc_max_payload(const struct svc_rqst *rqstp) ...@@ -1536,3 +1533,112 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
return max; return max;
} }
EXPORT_SYMBOL_GPL(svc_max_payload); EXPORT_SYMBOL_GPL(svc_max_payload);
/**
* svc_fill_write_vector - Construct data argument for VFS write call
* @rqstp: svc_rqst to operate on
* @first: buffer containing first section of write payload
* @total: total number of bytes of write payload
*
* Returns the number of elements populated in the data argument array.
*/
unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
size_t total)
{
struct kvec *vec = rqstp->rq_vec;
struct page **pages;
unsigned int i;
/* Some types of transport can present the write payload
* entirely in rq_arg.pages. In this case, @first is empty.
*/
i = 0;
if (first->iov_len) {
vec[i].iov_base = first->iov_base;
vec[i].iov_len = min_t(size_t, total, first->iov_len);
total -= vec[i].iov_len;
++i;
}
WARN_ON_ONCE(rqstp->rq_arg.page_base != 0);
pages = rqstp->rq_arg.pages;
while (total) {
vec[i].iov_base = page_address(*pages);
vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
total -= vec[i].iov_len;
++i;
++pages;
}
WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec));
return i;
}
EXPORT_SYMBOL_GPL(svc_fill_write_vector);
/**
* svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
* @rqstp: svc_rqst to operate on
* @first: buffer containing first section of pathname
* @total: total length of the pathname argument
*
* Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
* released automatically when @rqstp is recycled.
*/
char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
size_t total)
{
struct xdr_buf *arg = &rqstp->rq_arg;
struct page **pages;
char *result;
/* VFS API demands a NUL-terminated pathname. This function
* uses a page from @rqstp as the pathname buffer, to enable
* direct placement. Thus the total buffer size is PAGE_SIZE.
* Space in this buffer for NUL-termination requires that we
* cap the size of the returned symlink pathname just a
* little early.
*/
if (total > PAGE_SIZE - 1)
return ERR_PTR(-ENAMETOOLONG);
/* Some types of transport can present the pathname entirely
* in rq_arg.pages. If not, then copy the pathname into one
* page.
*/
pages = arg->pages;
WARN_ON_ONCE(arg->page_base != 0);
if (first->iov_base == 0) {
result = page_address(*pages);
result[total] = '\0';
} else {
size_t len, remaining;
char *dst;
result = page_address(*(rqstp->rq_next_page++));
dst = result;
remaining = total;
len = min_t(size_t, total, first->iov_len);
memcpy(dst, first->iov_base, len);
dst += len;
remaining -= len;
/* No more than one page left */
if (remaining) {
len = min_t(size_t, remaining, PAGE_SIZE);
memcpy(dst, page_address(*pages), len);
dst += len;
}
*dst = '\0';
}
/* Sanity check: we don't allow the pathname argument to
* contain a NUL byte.
*/
if (strlen(result) != total)
return ERR_PTR(-EINVAL);
return result;
}
EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
...@@ -173,6 +173,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, ...@@ -173,6 +173,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
set_bit(XPT_BUSY, &xprt->xpt_flags); set_bit(XPT_BUSY, &xprt->xpt_flags);
rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
xprt->xpt_net = get_net(net); xprt->xpt_net = get_net(net);
strcpy(xprt->xpt_remotebuf, "uninitialized");
} }
EXPORT_SYMBOL_GPL(svc_xprt_init); EXPORT_SYMBOL_GPL(svc_xprt_init);
...@@ -382,25 +383,21 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) ...@@ -382,25 +383,21 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
int cpu; int cpu;
if (!svc_xprt_has_something_to_do(xprt)) if (!svc_xprt_has_something_to_do(xprt))
goto out; return;
/* Mark transport as busy. It will remain in this state until /* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY * the provider calls svc_xprt_received. We update XPT_BUSY
* atomically because it also guards against trying to enqueue * atomically because it also guards against trying to enqueue
* the transport twice. * the transport twice.
*/ */
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
/* Don't enqueue transport while already enqueued */ return;
dprintk("svc: transport %p busy, not enqueued\n", xprt);
goto out;
}
cpu = get_cpu(); cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu); pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
atomic_long_inc(&pool->sp_stats.packets); atomic_long_inc(&pool->sp_stats.packets);
dprintk("svc: transport %p put into queue\n", xprt);
spin_lock_bh(&pool->sp_lock); spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++; pool->sp_stats.sockets_queued++;
...@@ -412,6 +409,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) ...@@ -412,6 +409,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue; continue;
atomic_long_inc(&pool->sp_stats.threads_woken); atomic_long_inc(&pool->sp_stats.threads_woken);
rqstp->rq_qtime = ktime_get();
wake_up_process(rqstp->rq_task); wake_up_process(rqstp->rq_task);
goto out_unlock; goto out_unlock;
} }
...@@ -420,7 +418,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) ...@@ -420,7 +418,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
out_unlock: out_unlock:
rcu_read_unlock(); rcu_read_unlock();
put_cpu(); put_cpu();
out:
trace_svc_xprt_do_enqueue(xprt, rqstp); trace_svc_xprt_do_enqueue(xprt, rqstp);
} }
EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
...@@ -454,13 +451,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) ...@@ -454,13 +451,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
struct svc_xprt, xpt_ready); struct svc_xprt, xpt_ready);
list_del_init(&xprt->xpt_ready); list_del_init(&xprt->xpt_ready);
svc_xprt_get(xprt); svc_xprt_get(xprt);
dprintk("svc: transport %p dequeued, inuse=%d\n",
xprt, kref_read(&xprt->xpt_ref));
} }
spin_unlock_bh(&pool->sp_lock); spin_unlock_bh(&pool->sp_lock);
out: out:
trace_svc_xprt_dequeue(xprt);
return xprt; return xprt;
} }
...@@ -492,7 +485,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp) ...@@ -492,7 +485,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
{ {
struct svc_xprt *xprt = rqstp->rq_xprt; struct svc_xprt *xprt = rqstp->rq_xprt;
rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); xprt->xpt_ops->xpo_release_rqst(rqstp);
kfree(rqstp->rq_deferred); kfree(rqstp->rq_deferred);
rqstp->rq_deferred = NULL; rqstp->rq_deferred = NULL;
...@@ -538,7 +531,6 @@ void svc_wake_up(struct svc_serv *serv) ...@@ -538,7 +531,6 @@ void svc_wake_up(struct svc_serv *serv)
if (test_bit(RQ_BUSY, &rqstp->rq_flags)) if (test_bit(RQ_BUSY, &rqstp->rq_flags))
continue; continue;
rcu_read_unlock(); rcu_read_unlock();
dprintk("svc: daemon %p woken up.\n", rqstp);
wake_up_process(rqstp->rq_task); wake_up_process(rqstp->rq_task);
trace_svc_wake_up(rqstp->rq_task->pid); trace_svc_wake_up(rqstp->rq_task->pid);
return; return;
...@@ -734,6 +726,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) ...@@ -734,6 +726,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
rqstp->rq_chandle.thread_wait = 5*HZ; rqstp->rq_chandle.thread_wait = 5*HZ;
else else
rqstp->rq_chandle.thread_wait = 1*HZ; rqstp->rq_chandle.thread_wait = 1*HZ;
trace_svc_xprt_dequeue(rqstp);
return rqstp->rq_xprt; return rqstp->rq_xprt;
} }
...@@ -789,7 +782,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) ...@@ -789,7 +782,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
len = svc_deferred_recv(rqstp); len = svc_deferred_recv(rqstp);
else else
len = xprt->xpt_ops->xpo_recvfrom(rqstp); len = xprt->xpt_ops->xpo_recvfrom(rqstp);
dprintk("svc: got len=%d\n", len); rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg; rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
} }
...@@ -844,10 +837,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) ...@@ -844,10 +837,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags); clear_bit(XPT_OLD, &xprt->xpt_flags);
if (xprt->xpt_ops->xpo_secure_port(rqstp)) xprt->xpt_ops->xpo_secure_port(rqstp);
set_bit(RQ_SECURE, &rqstp->rq_flags);
else
clear_bit(RQ_SECURE, &rqstp->rq_flags);
rqstp->rq_chandle.defer = svc_defer; rqstp->rq_chandle.defer = svc_defer;
rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]); rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]);
...@@ -859,7 +849,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) ...@@ -859,7 +849,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
rqstp->rq_res.len = 0; rqstp->rq_res.len = 0;
svc_xprt_release(rqstp); svc_xprt_release(rqstp);
out: out:
trace_svc_recv(rqstp, err);
return err; return err;
} }
EXPORT_SYMBOL_GPL(svc_recv); EXPORT_SYMBOL_GPL(svc_recv);
...@@ -889,7 +878,7 @@ int svc_send(struct svc_rqst *rqstp) ...@@ -889,7 +878,7 @@ int svc_send(struct svc_rqst *rqstp)
goto out; goto out;
/* release the receive skb before sending the reply */ /* release the receive skb before sending the reply */
rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); xprt->xpt_ops->xpo_release_rqst(rqstp);
/* calculate over-all length */ /* calculate over-all length */
xb = &rqstp->rq_res; xb = &rqstp->rq_res;
...@@ -899,6 +888,7 @@ int svc_send(struct svc_rqst *rqstp) ...@@ -899,6 +888,7 @@ int svc_send(struct svc_rqst *rqstp)
/* Grab mutex to serialize outgoing data. */ /* Grab mutex to serialize outgoing data. */
mutex_lock(&xprt->xpt_mutex); mutex_lock(&xprt->xpt_mutex);
trace_svc_stats_latency(rqstp);
if (test_bit(XPT_DEAD, &xprt->xpt_flags) if (test_bit(XPT_DEAD, &xprt->xpt_flags)
|| test_bit(XPT_CLOSE, &xprt->xpt_flags)) || test_bit(XPT_CLOSE, &xprt->xpt_flags))
len = -ENOTCONN; len = -ENOTCONN;
...@@ -906,12 +896,12 @@ int svc_send(struct svc_rqst *rqstp) ...@@ -906,12 +896,12 @@ int svc_send(struct svc_rqst *rqstp)
len = xprt->xpt_ops->xpo_sendto(rqstp); len = xprt->xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&xprt->xpt_mutex); mutex_unlock(&xprt->xpt_mutex);
rpc_wake_up(&xprt->xpt_bc_pending); rpc_wake_up(&xprt->xpt_bc_pending);
trace_svc_send(rqstp, len);
svc_xprt_release(rqstp); svc_xprt_release(rqstp);
if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
len = 0; len = 0;
out: out:
trace_svc_send(rqstp, len);
return len; return len;
} }
......
...@@ -391,9 +391,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, ...@@ -391,9 +391,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
release_sock(sock->sk); release_sock(sock->sk);
} }
static int svc_sock_secure_port(struct svc_rqst *rqstp) static void svc_sock_secure_port(struct svc_rqst *rqstp)
{ {
return svc_port_is_privileged(svc_addr(rqstp)); if (svc_port_is_privileged(svc_addr(rqstp)))
set_bit(RQ_SECURE, &rqstp->rq_flags);
else
clear_bit(RQ_SECURE, &rqstp->rq_flags);
} }
/* /*
...@@ -1309,6 +1312,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) ...@@ -1309,6 +1312,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) { if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n"); dprintk("setting up TCP socket for listening\n");
strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready; sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
......
...@@ -51,9 +51,9 @@ ...@@ -51,9 +51,9 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT #define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* RPC/RDMA parameters */ /* RPC/RDMA parameters */
unsigned int svcrdma_ord = RPCRDMA_ORD; unsigned int svcrdma_ord = 16; /* historical default */
static unsigned int min_ord = 1; static unsigned int min_ord = 1;
static unsigned int max_ord = 4096; static unsigned int max_ord = 255;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS; unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
static unsigned int min_max_requests = 4; static unsigned int min_max_requests = 4;
......
...@@ -110,15 +110,16 @@ ...@@ -110,15 +110,16 @@
* the RDMA_RECV completion. The SGL should contain full pages up until the * the RDMA_RECV completion. The SGL should contain full pages up until the
* last one. * last one.
*/ */
static void rdma_build_arg_xdr(struct svc_rqst *rqstp, static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *ctxt, struct svc_rdma_op_ctxt *ctxt)
u32 byte_count)
{ {
struct page *page; struct page *page;
u32 bc;
int sge_no; int sge_no;
u32 len;
/* Swap the page in the SGE with the page in argpages */ /* The reply path assumes the Call's transport header resides
* in rqstp->rq_pages[0].
*/
page = ctxt->pages[0]; page = ctxt->pages[0];
put_page(rqstp->rq_pages[0]); put_page(rqstp->rq_pages[0]);
rqstp->rq_pages[0] = page; rqstp->rq_pages[0] = page;
...@@ -126,35 +127,35 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, ...@@ -126,35 +127,35 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* Set up the XDR head */ /* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page); rqstp->rq_arg.head[0].iov_base = page_address(page);
rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.head[0].iov_len =
min_t(size_t, byte_count, ctxt->sge[0].length); min_t(size_t, ctxt->byte_len, ctxt->sge[0].length);
rqstp->rq_arg.len = byte_count; rqstp->rq_arg.len = ctxt->byte_len;
rqstp->rq_arg.buflen = byte_count; rqstp->rq_arg.buflen = ctxt->byte_len;
/* Compute bytes past head in the SGL */ /* Compute bytes past head in the SGL */
bc = byte_count - rqstp->rq_arg.head[0].iov_len; len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len;
/* If data remains, store it in the pagelist */ /* If data remains, store it in the pagelist */
rqstp->rq_arg.page_len = bc; rqstp->rq_arg.page_len = len;
rqstp->rq_arg.page_base = 0; rqstp->rq_arg.page_base = 0;
sge_no = 1; sge_no = 1;
while (bc && sge_no < ctxt->count) { while (len && sge_no < ctxt->count) {
page = ctxt->pages[sge_no]; page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]); put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page; rqstp->rq_pages[sge_no] = page;
bc -= min_t(u32, bc, ctxt->sge[sge_no].length); len -= min_t(u32, len, ctxt->sge[sge_no].length);
sge_no++; sge_no++;
} }
rqstp->rq_respages = &rqstp->rq_pages[sge_no]; rqstp->rq_respages = &rqstp->rq_pages[sge_no];
rqstp->rq_next_page = rqstp->rq_respages + 1; rqstp->rq_next_page = rqstp->rq_respages + 1;
/* If not all pages were used from the SGL, free the remaining ones */ /* If not all pages were used from the SGL, free the remaining ones */
bc = sge_no; len = sge_no;
while (sge_no < ctxt->count) { while (sge_no < ctxt->count) {
page = ctxt->pages[sge_no++]; page = ctxt->pages[sge_no++];
put_page(page); put_page(page);
} }
ctxt->count = bc; ctxt->count = len;
/* Set up tail */ /* Set up tail */
rqstp->rq_arg.tail[0].iov_base = NULL; rqstp->rq_arg.tail[0].iov_base = NULL;
...@@ -534,10 +535,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ...@@ -534,10 +535,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
ctxt, rdma_xprt, rqstp); ctxt, rdma_xprt, rqstp);
atomic_inc(&rdma_stat_recv); atomic_inc(&rdma_stat_recv);
/* Build up the XDR from the receive buffers. */ svc_rdma_build_arg_xdr(rqstp, ctxt);
rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
/* Decode the RDMA header. */
p = (__be32 *)rqstp->rq_arg.head[0].iov_base; p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
if (ret < 0) if (ret < 0)
......
...@@ -69,7 +69,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *); ...@@ -69,7 +69,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *);
static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt);
static int svc_rdma_secure_port(struct svc_rqst *); static void svc_rdma_secure_port(struct svc_rqst *);
static void svc_rdma_kill_temp_xprt(struct svc_xprt *); static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
static const struct svc_xprt_ops svc_rdma_ops = { static const struct svc_xprt_ops svc_rdma_ops = {
...@@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) ...@@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
flushed: flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR) if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_warn("svcrdma: receive: %s (%u/0x%x)\n", pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status), ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err); wc->status, wc->vendor_err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1);
...@@ -401,8 +401,10 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, ...@@ -401,8 +401,10 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
*/ */
set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags); set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
if (listener) if (listener) {
strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
}
return cma_xprt; return cma_xprt;
} }
...@@ -762,13 +764,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ...@@ -762,13 +764,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!svc_rdma_prealloc_ctxts(newxprt)) if (!svc_rdma_prealloc_ctxts(newxprt))
goto errout; goto errout;
/*
* Limit ORD based on client limit, local device limit, and
* configured svcrdma limit.
*/
newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
newxprt->sc_pd = ib_alloc_pd(dev, 0); newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) { if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n"); dprintk("svcrdma: error creating PD for connect request\n");
...@@ -843,15 +838,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ...@@ -843,15 +838,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
memset(&conn_param, 0, sizeof conn_param); memset(&conn_param, 0, sizeof conn_param);
conn_param.responder_resources = 0; conn_param.responder_resources = 0;
conn_param.initiator_depth = newxprt->sc_ord; conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
dev->attrs.max_qp_init_rd_atom);
if (!conn_param.initiator_depth) {
dprintk("svcrdma: invalid ORD setting\n");
ret = -EINVAL;
goto errout;
}
conn_param.private_data = &pmsg; conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg); conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param); ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
if (ret) { if (ret)
dprintk("svcrdma: failed to accept new connection, ret=%d\n",
ret);
goto errout; goto errout;
}
dprintk("svcrdma: new connection %p accepted:\n", newxprt); dprintk("svcrdma: new connection %p accepted:\n", newxprt);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
...@@ -862,7 +860,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ...@@ -862,7 +860,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
dprintk(" rdma_rw_ctxs : %d\n", ctxts); dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests); dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
dprintk(" ord : %d\n", newxprt->sc_ord); dprintk(" ord : %d\n", conn_param.initiator_depth);
return &newxprt->sc_xprt; return &newxprt->sc_xprt;
...@@ -992,9 +990,9 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) ...@@ -992,9 +990,9 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1; return 1;
} }
static int svc_rdma_secure_port(struct svc_rqst *rqstp) static void svc_rdma_secure_port(struct svc_rqst *rqstp)
{ {
return 1; set_bit(RQ_SECURE, &rqstp->rq_flags);
} }
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment