Commit 298fb76a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-5.4' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Highlights:

   - Add a new knfsd file cache, so that we don't have to open and close
     on each (NFSv2/v3) READ or WRITE. This can speed up read and write
     in some cases. It also replaces our readahead cache.

   - Prevent silent data loss on write errors, by treating write errors
     like server reboots for the purposes of write caching, thus forcing
     clients to resend their writes.

   - Tweak the code that allocates sessions to be more forgiving, so
     that NFSv4.1 mounts are less likely to hang when a server already
     has a lot of clients.

   - Eliminate an arbitrary limit on NFSv4 ACL sizes; they should now be
     limited only by the backend filesystem and the maximum RPC size.

   - Allow the server to enforce use of the correct kerberos credentials
     when a client reclaims state after a reboot.

  And some miscellaneous smaller bugfixes and cleanup"

* tag 'nfsd-5.4' of git://linux-nfs.org/~bfields/linux: (34 commits)
  sunrpc: clean up indentation issue
  nfsd: fix nfs read eof detection
  nfsd: Make nfsd_reset_boot_verifier_locked static
  nfsd: degraded slot-count more gracefully as allocation nears exhaustion.
  nfsd: handle drc over-allocation gracefully.
  nfsd: add support for upcall version 2
  nfsd: add a "GetVersion" upcall for nfsdcld
  nfsd: Reset the boot verifier on all write I/O errors
  nfsd: Don't garbage collect files that might contain write errors
  nfsd: Support the server resetting the boot verifier
  nfsd: nfsd_file cache entries should be per net namespace
  nfsd: eliminate an unnecessary acl size limit
  Deprecate nfsd fault injection
  nfsd: remove duplicated include from filecache.c
  nfsd: Fix the documentation for svcxdr_tmpalloc()
  nfsd: Fix up some unused variable warnings
  nfsd: close cached files prior to a REMOVE or RENAME that would replace target
  nfsd: rip out the raparms cache
  nfsd: have nfsd_test_lock use the nfsd_file cache
  nfsd: hook up nfs4_preprocess_stateid_op to the nfsd_file cache
  ...
parents 8f744bde e41f9efb
......@@ -327,6 +327,7 @@ void flush_delayed_fput(void)
{
delayed_fput(NULL);
}
EXPORT_SYMBOL_GPL(flush_delayed_fput);
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
......
......@@ -212,6 +212,7 @@ struct file_lock_list_struct {
static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
/*
* The blocked_hash is used to find POSIX lock loops for deadlock detection.
* It is protected by blocked_lock_lock.
......@@ -1991,6 +1992,64 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
}
EXPORT_SYMBOL(generic_setlease);
#if IS_ENABLED(CONFIG_SRCU)
/*
* Kernel subsystems can register to be notified on any attempt to set
* a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
* to close files that it may have cached when there is an attempt to set a
* conflicting lease.
*/
static struct srcu_notifier_head lease_notifier_chain;
static inline void
lease_notifier_chain_init(void)
{
srcu_init_notifier_head(&lease_notifier_chain);
}
static inline void
setlease_notifier(long arg, struct file_lock *lease)
{
if (arg != F_UNLCK)
srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
}
int lease_register_notifier(struct notifier_block *nb)
{
return srcu_notifier_chain_register(&lease_notifier_chain, nb);
}
EXPORT_SYMBOL_GPL(lease_register_notifier);
void lease_unregister_notifier(struct notifier_block *nb)
{
srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
#else /* !IS_ENABLED(CONFIG_SRCU) */
static inline void
lease_notifier_chain_init(void)
{
}
static inline void
setlease_notifier(long arg, struct file_lock *lease)
{
}
int lease_register_notifier(struct notifier_block *nb)
{
return 0;
}
EXPORT_SYMBOL_GPL(lease_register_notifier);
void lease_unregister_notifier(struct notifier_block *nb)
{
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
#endif /* IS_ENABLED(CONFIG_SRCU) */
/**
* vfs_setlease - sets a lease on an open file
* @filp: file pointer
......@@ -2011,6 +2070,8 @@ EXPORT_SYMBOL(generic_setlease);
int
vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
{
if (lease)
setlease_notifier(arg, *lease);
if (filp->f_op->setlease)
return filp->f_op->setlease(filp, arg, lease, priv);
else
......@@ -2924,6 +2985,7 @@ static int __init filelock_init(void)
INIT_HLIST_HEAD(&fll->hlist);
}
lease_notifier_chain_init();
return 0;
}
core_initcall(filelock_init);
......@@ -3,6 +3,7 @@ config NFSD
tristate "NFS server support"
depends on INET
depends on FILE_LOCKING
depends on FSNOTIFY
select LOCKD
select SUNRPC
select EXPORTFS
......@@ -147,7 +148,7 @@ config NFSD_V4_SECURITY_LABEL
config NFSD_FAULT_INJECTION
bool "NFS server manual fault injection"
depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN
help
This option enables support for manually injecting faults
into the NFS server. This is intended to be used for
......
......@@ -11,7 +11,8 @@ obj-$(CONFIG_NFSD) += nfsd.o
nfsd-y += trace.o
nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
export.o auth.o lockd.o nfscache.o nfsxdr.o \
stats.o filecache.o
nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
......
......@@ -39,14 +39,6 @@ struct nfs4_acl;
struct svc_fh;
struct svc_rqst;
/*
* Maximum ACL we'll accept from a client; chosen (somewhat
* arbitrarily) so that kmalloc'ing the ACL shouldn't require a
* high-order allocation. This allows 204 ACEs on x86_64:
*/
#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
/ sizeof(struct nfs4_ace))
int nfs4_acl_bytes(int entries);
int nfs4_acl_get_whotype(char *, u32);
__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
......
......@@ -15,6 +15,7 @@
#include "blocklayoutxdr.h"
#include "pnfs.h"
#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS
......@@ -404,7 +405,7 @@ static void
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp), 0, true);
......
......@@ -22,6 +22,7 @@
#include "nfsfh.h"
#include "netns.h"
#include "pnfs.h"
#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT
......@@ -232,6 +233,17 @@ static struct cache_head *expkey_alloc(void)
return NULL;
}
static void expkey_flush(void)
{
/*
* Take the nfsd_mutex here to ensure that the file cache is not
* destroyed while we're in the middle of flushing.
*/
mutex_lock(&nfsd_mutex);
nfsd_file_cache_purge(current->nsproxy->net_ns);
mutex_unlock(&nfsd_mutex);
}
static const struct cache_detail svc_expkey_cache_template = {
.owner = THIS_MODULE,
.hash_size = EXPKEY_HASHMAX,
......@@ -244,6 +256,7 @@ static const struct cache_detail svc_expkey_cache_template = {
.init = expkey_init,
.update = expkey_update,
.alloc = expkey_alloc,
.flush = expkey_flush,
};
static int
......
This diff is collapsed.
#ifndef _FS_NFSD_FILECACHE_H
#define _FS_NFSD_FILECACHE_H
#include <linux/fsnotify_backend.h>
/*
* This is the fsnotify_mark container that nfsd attaches to the files that it
* is holding open. Note that we have a separate refcount here aside from the
* one in the fsnotify_mark. We only want a single fsnotify_mark attached to
* the inode, and for each nfsd_file to hold a reference to it.
*
* The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
* how to put that reference. If there are still outstanding nfsd_files that
* reference the mark, then we would want to call fsnotify_put_mark on it.
* If there were not, then we'd need to call fsnotify_destroy_mark. Since we
* can't really tell the difference, we use the nfm_mark to keep track of how
* many nfsd_files hold references to the mark. When that counter goes to zero
* then we know to call fsnotify_destroy_mark on it.
*/
struct nfsd_file_mark {
struct fsnotify_mark nfm_mark;
atomic_t nfm_ref;
};
/*
* A representation of a file that has been opened by knfsd. These are hashed
* in the hashtable by inode pointer value. Note that this object doesn't
* hold a reference to the inode by itself, so the nf_inode pointer should
* never be dereferenced, only used for comparison.
*/
struct nfsd_file {
struct hlist_node nf_node;
struct list_head nf_lru;
struct rcu_head nf_rcu;
struct file *nf_file;
const struct cred *nf_cred;
struct net *nf_net;
#define NFSD_FILE_HASHED (0)
#define NFSD_FILE_PENDING (1)
#define NFSD_FILE_BREAK_READ (2)
#define NFSD_FILE_BREAK_WRITE (3)
#define NFSD_FILE_REFERENCED (4)
unsigned long nf_flags;
struct inode *nf_inode;
unsigned int nf_hashval;
atomic_t nf_ref;
unsigned char nf_may;
struct nfsd_file_mark *nf_mark;
};
int nfsd_file_cache_init(void);
void nfsd_file_cache_purge(struct net *);
void nfsd_file_cache_shutdown(void);
void nfsd_file_put(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
int nfsd_file_cache_stats_open(struct inode *, struct file *);
#endif /* _FS_NFSD_FILECACHE_H */
......@@ -104,6 +104,7 @@ struct nfsd_net {
/* Time of server startup */
struct timespec64 nfssvc_boot;
seqlock_t boot_lock;
/*
* Max number of connections this nfsd container will allow. Defaults
......@@ -179,4 +180,7 @@ struct nfsd_net {
extern void nfsd_netns_free_versions(struct nfsd_net *nn);
extern unsigned int nfsd_net_id;
void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
void nfsd_reset_boot_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */
......@@ -172,13 +172,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
nfserr = nfsd_read(rqstp, &resp->fh,
argp->offset,
rqstp->rq_vec, argp->vlen,
&resp->count);
if (nfserr == 0) {
struct inode *inode = d_inode(resp->fh.fh_dentry);
resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
inode->i_size);
}
&resp->count,
&resp->eof);
RETURN_STATUS(nfserr);
}
......
......@@ -27,6 +27,7 @@ static u32 nfs3_ftypes[] = {
NF3SOCK, NF3BAD, NF3LNK, NF3BAD,
};
/*
* XDR functions for basic NFS types
*/
......@@ -751,14 +752,16 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_writeres *resp = rqstp->rq_resp;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
__be32 verf[2];
p = encode_wcc_data(rqstp, p, &resp->fh);
if (resp->status == 0) {
*p++ = htonl(resp->count);
*p++ = htonl(resp->committed);
/* unique identifier, y2038 overflow can be ignored */
*p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
*p++ = htonl(nn->nfssvc_boot.tv_nsec);
nfsd_copy_boot_verifier(verf, nn);
*p++ = verf[0];
*p++ = verf[1];
}
return xdr_ressize_check(rqstp, p);
}
......@@ -1125,13 +1128,15 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_commitres *resp = rqstp->rq_resp;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
__be32 verf[2];
p = encode_wcc_data(rqstp, p, &resp->fh);
/* Write verifier */
if (resp->status == 0) {
/* unique identifier, y2038 overflow can be ignored */
*p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
*p++ = htonl(nn->nfssvc_boot.tv_nsec);
nfsd_copy_boot_verifier(verf, nn);
*p++ = verf[0];
*p++ = verf[1];
}
return xdr_ressize_check(rqstp, p);
}
......
......@@ -512,11 +512,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
if (cb != NULL) {
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
}
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}
......@@ -604,11 +602,10 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
if (cb) {
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
}
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
}
#endif /* CONFIG_NFSD_PNFS */
......@@ -663,11 +660,10 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
if (cb) {
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
}
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
}
......@@ -759,11 +755,10 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
if (cb) {
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
}
status = decode_cb_sequence4res(xdr, cb);
if (unlikely(status || cb->cb_seq_status))
return status;
return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
}
/*
......
......@@ -169,8 +169,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
spin_unlock(&fp->fi_lock);
if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
fput(ls->ls_file);
vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
nfsd_file_put(ls->ls_file);
if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
......@@ -197,7 +197,7 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
fl->fl_end = OFFSET_MAX;
fl->fl_owner = ls;
fl->fl_pid = current->tgid;
fl->fl_file = ls->ls_file;
fl->fl_file = ls->ls_file->nf_file;
status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
if (status) {
......@@ -236,13 +236,13 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
NFSPROC4_CLNT_CB_LAYOUT);
if (parent->sc_type == NFS4_DELEG_STID)
ls->ls_file = get_file(fp->fi_deleg_file);
ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else
ls->ls_file = find_any_file(fp);
BUG_ON(!ls->ls_file);
if (nfsd4_layout_setlease(ls)) {
fput(ls->ls_file);
nfsd_file_put(ls->ls_file);
put_nfs4_file(fp);
kmem_cache_free(nfs4_layout_stateid_cache, ls);
return NULL;
......@@ -626,7 +626,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
argv[0] = (char *)nfsd_recall_failed;
argv[1] = addr_str;
argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id;
argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL;
error = call_usermodehelper(nfsd_recall_failed, argv, envp,
......
......@@ -568,17 +568,11 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
{
__be32 verf[2];
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
__be32 *verf = (__be32 *)verifier->data;
/*
* This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy. y2038 time_t overflow is
* irrelevant in this usage.
*/
verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
memcpy(verifier->data, verf, sizeof(verifier->data));
BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
}
static __be32
......@@ -761,7 +755,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_read *read = &u->read;
__be32 status;
read->rd_filp = NULL;
read->rd_nf = NULL;
if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval;
......@@ -782,7 +776,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&read->rd_stateid, RD_STATE,
&read->rd_filp, &read->rd_tmp_file);
&read->rd_nf);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
......@@ -798,8 +792,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static void
nfsd4_read_release(union nfsd4_op_u *u)
{
if (u->read.rd_filp)
fput(u->read.rd_filp);
if (u->read.rd_nf)
nfsd_file_put(u->read.rd_nf);
trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
u->read.rd_offset, u->read.rd_length);
}
......@@ -954,7 +948,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&cstate->current_fh, &setattr->sa_stateid,
WR_STATE, NULL, NULL);
WR_STATE, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
......@@ -993,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_write *write = &u->write;
stateid_t *stateid = &write->wr_stateid;
struct file *filp = NULL;
struct nfsd_file *nf = NULL;
__be32 status = nfs_ok;
unsigned long cnt;
int nvecs;
......@@ -1005,7 +999,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
trace_nfsd_write_start(rqstp, &cstate->current_fh,
write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
stateid, WR_STATE, &filp, NULL);
stateid, WR_STATE, &nf);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
......@@ -1018,10 +1012,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&write->wr_head, write->wr_buflen);
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
write->wr_how_written);
fput(filp);
nfsd_file_put(nf);
write->wr_bytes_written = cnt;
trace_nfsd_write_done(rqstp, &cstate->current_fh,
......@@ -1031,8 +1025,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static __be32
nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
stateid_t *src_stateid, struct file **src,
stateid_t *dst_stateid, struct file **dst)
stateid_t *src_stateid, struct nfsd_file **src,
stateid_t *dst_stateid, struct nfsd_file **dst)
{
__be32 status;
......@@ -1040,22 +1034,22 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_nofilehandle;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
src_stateid, RD_STATE, src, NULL);
src_stateid, RD_STATE, src);
if (status) {
dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
goto out;
}
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
dst_stateid, WR_STATE, dst, NULL);
dst_stateid, WR_STATE, dst);
if (status) {
dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
goto out_put_src;
}
/* fix up for NFS-specific error code */
if (!S_ISREG(file_inode(*src)->i_mode) ||
!S_ISREG(file_inode(*dst)->i_mode)) {
if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
!S_ISREG(file_inode((*dst)->nf_file)->i_mode)) {
status = nfserr_wrong_type;
goto out_put_dst;
}
......@@ -1063,9 +1057,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
out:
return status;
out_put_dst:
fput(*dst);
nfsd_file_put(*dst);
out_put_src:
fput(*src);
nfsd_file_put(*src);
goto out;
}
......@@ -1074,7 +1068,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_clone *clone = &u->clone;
struct file *src, *dst;
struct nfsd_file *src, *dst;
__be32 status;
status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
......@@ -1082,11 +1076,11 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
status = nfsd4_clone_file_range(src, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count);
status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos,
dst->nf_file, clone->cl_dst_pos, clone->cl_count);
fput(dst);
fput(src);
nfsd_file_put(dst);
nfsd_file_put(src);
out:
return status;
}
......@@ -1176,8 +1170,9 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
do {
if (kthread_should_stop())
break;
bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos,
copy->file_dst, dst_pos, bytes_total);
bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
src_pos, copy->nf_dst->nf_file, dst_pos,
bytes_total);
if (bytes_copied <= 0)
break;
bytes_total -= bytes_copied;
......@@ -1204,8 +1199,8 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
status = nfs_ok;
}
fput(copy->file_src);
fput(copy->file_dst);
nfsd_file_put(copy->nf_src);
nfsd_file_put(copy->nf_dst);
return status;
}
......@@ -1218,16 +1213,16 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
memcpy(&dst->fh, &src->fh, sizeof(src->fh));
dst->cp_clp = src->cp_clp;
dst->file_dst = get_file(src->file_dst);
dst->file_src = get_file(src->file_src);
dst->nf_dst = nfsd_file_get(src->nf_dst);
dst->nf_src = nfsd_file_get(src->nf_src);
memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
}
static void cleanup_async_copy(struct nfsd4_copy *copy)
{
nfs4_free_cp_state(copy);
fput(copy->file_dst);
fput(copy->file_src);
nfsd_file_put(copy->nf_dst);
nfsd_file_put(copy->nf_src);
spin_lock(&copy->cp_clp->async_lock);
list_del(&copy->copies);
spin_unlock(&copy->cp_clp->async_lock);
......@@ -1264,8 +1259,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_copy *async_copy = NULL;
status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
&copy->file_src, &copy->cp_dst_stateid,
&copy->file_dst);
&copy->nf_src, &copy->cp_dst_stateid,
&copy->nf_dst);
if (status)
goto out;
......@@ -1347,21 +1342,21 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
__be32 status;
struct file *file;
struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
WR_STATE, &file, NULL);
WR_STATE, &nf);
if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status;
}
status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
fallocate->falloc_offset,
fallocate->falloc_length,
flags);
fput(file);
nfsd_file_put(nf);
return status;
}
static __be32
......@@ -1406,11 +1401,11 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_seek *seek = &u->seek;
int whence;
__be32 status;
struct file *file;
struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
RD_STATE, &file, NULL);
RD_STATE, &nf);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
......@@ -1432,14 +1427,14 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* Note: This call does change file->f_pos, but nothing in NFSD
* should ever file->f_pos.
*/
seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
if (seek->seek_pos < 0)
status = nfserrno(seek->seek_pos);
else if (seek->seek_pos >= i_size_read(file_inode(file)))
else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
seek->seek_eof = true;
out:
fput(file);
nfsd_file_put(nf);
return status;
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -49,6 +49,7 @@
#include "cache.h"
#include "netns.h"
#include "pnfs.h"
#include "filecache.h"
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#include <linux/security.h>
......@@ -203,6 +204,13 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
return p;
}
static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
{
unsigned int this = (char *)argp->end - (char *)argp->p;
return this + argp->pagelen;
}
static int zero_clientid(clientid_t *clid)
{
return (clid->cl_boot == 0) && (clid->cl_id == 0);
......@@ -211,10 +219,10 @@ static int zero_clientid(clientid_t *clid)
/**
* svcxdr_tmpalloc - allocate memory to be freed after compound processing
* @argp: NFSv4 compound argument structure
* @p: pointer to be freed (with kfree())
* @len: length of buffer to allocate
*
* Marks @p to be freed when processing the compound operation
* described in @argp finishes.
* Allocates a buffer of size @len to be freed when processing the compound
* operation described in @argp finishes.
*/
static void *
svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
......@@ -347,7 +355,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
READ_BUF(4); len += 4;
nace = be32_to_cpup(p++);
if (nace > NFS4_ACL_MAX)
if (nace > compoundargs_bytes_left(argp)/20)
/*
* Even with 4-byte names there wouldn't be
* space for that many aces; something fishy is
* going on:
*/
return nfserr_fbig;
*acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
......@@ -1418,7 +1431,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
struct nfsd4_create_session *sess)
{
DECODE_HEAD;
u32 dummy;
READ_BUF(16);
COPYMEM(&sess->clientid, 8);
......@@ -1427,7 +1439,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
/* Fore channel attrs */
READ_BUF(28);
dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
p++; /* headerpadsz is always 0 */
sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
......@@ -1444,7 +1456,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
/* Back channel attrs */
READ_BUF(28);
dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
p++; /* headerpadsz is always 0 */
sess->back_channel.maxreq_sz = be32_to_cpup(p++);
sess->back_channel.maxresp_sz = be32_to_cpup(p++);
sess->back_channel.maxresp_cached = be32_to_cpup(p++);
......@@ -1736,7 +1748,6 @@ static __be32
nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
{
DECODE_HEAD;
unsigned int tmp;
status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
if (status)
......@@ -1751,7 +1762,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
p = xdr_decode_hyper(p, &copy->cp_count);
p++; /* ca_consecutive: we always do consecutive copies */
copy->cp_synchronous = be32_to_cpup(p++);
tmp = be32_to_cpup(p); /* Source server list not supported */
/* tmp = be32_to_cpup(p); Source server list not supported */
DECODE_TAIL;
}
......@@ -3217,9 +3228,8 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
if (!p)
return nfserr_resource;
encode_cinfo(p, &create->cr_cinfo);
nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
create->cr_bmval[1], create->cr_bmval[2]);
return 0;
}
static __be32
......@@ -3462,7 +3472,7 @@ static __be32 nfsd4_encode_splice_read(
len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
file, read->rd_offset, &maxcount);
file, read->rd_offset, &maxcount, &eof);
read->rd_length = maxcount;
if (nfserr) {
/*
......@@ -3474,9 +3484,6 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}
eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
d_inode(read->rd_fhp->fh_dentry)->i_size);
*(p++) = htonl(eof);
*(p++) = htonl(maxcount);
......@@ -3547,15 +3554,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
len = maxcount;
nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
&eof);
read->rd_length = maxcount;
if (nfserr)
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
d_inode(read->rd_fhp->fh_dentry)->i_size);
tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
tmp = htonl(maxcount);
......@@ -3574,11 +3579,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
{
unsigned long maxcount;
struct xdr_stream *xdr = &resp->xdr;
struct file *file = read->rd_filp;
struct file *file;
int starting_len = xdr->buf->len;
struct raparms *ra = NULL;
__be32 *p;
if (nfserr)
return nfserr;
file = read->rd_nf->nf_file;
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
......@@ -3596,18 +3604,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
(xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (read->rd_tmp_file)
ra = nfsd_init_raparms(file);
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
if (ra)
nfsd_put_raparams(file, ra);
if (nfserr)
xdr_truncate_encode(xdr, starting_len);
......
......@@ -1476,6 +1476,7 @@ static __net_init int nfsd_init_net(struct net *net)
atomic_set(&nn->ntf_refcnt, 0);
init_waitqueue_head(&nn->ntf_wq);
seqlock_init(&nn->boot_lock);
mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
if (IS_ERR(mnt)) {
......
......@@ -172,6 +172,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp;
__be32 nfserr;
u32 eof;
dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
......@@ -195,7 +196,8 @@ nfsd_proc_read(struct svc_rqst *rqstp)
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset,
rqstp->rq_vec, argp->vlen,
&resp->count);
&resp->count,
&eof);
if (nfserr) return nfserr;
return fh_getattr(&resp->fh, &resp->stat);
......
......@@ -27,6 +27,7 @@
#include "cache.h"
#include "vfs.h"
#include "netns.h"
#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_SVC
......@@ -313,22 +314,17 @@ static int nfsd_startup_generic(int nrservs)
if (nfsd_users++)
return 0;
/*
* Readahead param cache - will no-op if it already exists.
* (Note therefore results will be suboptimal if number of
* threads is modified after nfsd start.)
*/
ret = nfsd_racache_init(2*nrservs);
ret = nfsd_file_cache_init();
if (ret)
goto dec_users;
ret = nfs4_state_start();
if (ret)
goto out_racache;
goto out_file_cache;
return 0;
out_racache:
nfsd_racache_shutdown();
out_file_cache:
nfsd_file_cache_shutdown();
dec_users:
nfsd_users--;
return ret;
......@@ -340,7 +336,7 @@ static void nfsd_shutdown_generic(void)
return;
nfs4_state_shutdown();
nfsd_racache_shutdown();
nfsd_file_cache_shutdown();
}
static bool nfsd_needs_lockd(struct nfsd_net *nn)
......@@ -348,6 +344,35 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
}
void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
{
int seq = 0;
do {
read_seqbegin_or_lock(&nn->boot_lock, &seq);
/*
* This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy. y2038 time_t overflow is
* irrelevant in this usage
*/
verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
} while (need_seqretry(&nn->boot_lock, seq));
done_seqretry(&nn->boot_lock, seq);
}
static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
{
ktime_get_real_ts64(&nn->nfssvc_boot);
}
void nfsd_reset_boot_verifier(struct nfsd_net *nn)
{
write_seqlock(&nn->boot_lock);
nfsd_reset_boot_verifier_locked(nn);
write_sequnlock(&nn->boot_lock);
}
static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
......@@ -391,6 +416,7 @@ static void nfsd_shutdown_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfsd_file_cache_purge(net);
nfs4_state_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
......@@ -599,7 +625,7 @@ int nfsd_create_serv(struct net *net)
#endif
}
atomic_inc(&nn->ntf_refcnt);
ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
nfsd_reset_boot_verifier(nn);
return 0;
}
......
......@@ -378,6 +378,7 @@ struct nfs4_client_reclaim {
struct list_head cr_strhash; /* hash by cr_name */
struct nfs4_client *cr_clp; /* pointer to associated clp */
struct xdr_netobj cr_name; /* recovery dir name */
struct xdr_netobj cr_princhash;
};
/* A reasonable value for REPLAY_ISIZE was estimated as follows:
......@@ -506,7 +507,7 @@ struct nfs4_file {
};
struct list_head fi_clnt_odstate;
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
struct file * fi_fds[3];
struct nfsd_file *fi_fds[3];
/*
* Each open or lock stateid contributes 0-4 to the counts
* below depending on which bits are set in st_access_bitmap:
......@@ -516,7 +517,7 @@ struct nfs4_file {
*/
atomic_t fi_access[2];
u32 fi_share_deny;
struct file *fi_deleg_file;
struct nfsd_file *fi_deleg_file;
int fi_delegees;
struct knfsd_fh fi_fhandle;
bool fi_had_conflict;
......@@ -565,7 +566,7 @@ struct nfs4_layout_stateid {
spinlock_t ls_lock;
struct list_head ls_layouts;
u32 ls_layout_type;
struct file *ls_file;
struct nfsd_file *ls_file;
struct nfsd4_callback ls_recall;
stateid_t ls_recall_sid;
bool ls_recalled;
......@@ -616,7 +617,7 @@ struct nfsd4_copy;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
stateid_t *stateid, int flags, struct nfsd_file **filp);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
......@@ -645,7 +646,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
struct nfsd_net *nn);
struct xdr_netobj princhash, struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
struct nfs4_file *find_file(struct knfsd_fh *fh);
......@@ -657,7 +658,7 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
{
refcount_inc(&fi->fi_ref);
}
struct file *find_any_file(struct nfs4_file *f);
struct nfsd_file *find_any_file(struct nfs4_file *f);
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
......
......@@ -126,6 +126,8 @@ DEFINE_NFSD_ERR_EVENT(read_err);
DEFINE_NFSD_ERR_EVENT(write_err);
#include "state.h"
#include "filecache.h"
#include "vfs.h"
DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp),
......@@ -164,6 +166,144 @@ DEFINE_STATEID_EVENT(layout_recall_done);
DEFINE_STATEID_EVENT(layout_recall_fail);
DEFINE_STATEID_EVENT(layout_recall_release);
#define show_nf_flags(val) \
__print_flags(val, "|", \
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
{ 1 << NFSD_FILE_PENDING, "PENDING" }, \
{ 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
{ 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
{ 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
/* FIXME: This should probably be fleshed out in the future. */
#define show_nf_may(val) \
__print_flags(val, "|", \
{ NFSD_MAY_READ, "READ" }, \
{ NFSD_MAY_WRITE, "WRITE" }, \
{ NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" })
DECLARE_EVENT_CLASS(nfsd_file_class,
TP_PROTO(struct nfsd_file *nf),
TP_ARGS(nf),
TP_STRUCT__entry(
__field(unsigned int, nf_hashval)
__field(void *, nf_inode)
__field(int, nf_ref)
__field(unsigned long, nf_flags)
__field(unsigned char, nf_may)
__field(struct file *, nf_file)
),
TP_fast_assign(
__entry->nf_hashval = nf->nf_hashval;
__entry->nf_inode = nf->nf_inode;
__entry->nf_ref = atomic_read(&nf->nf_ref);
__entry->nf_flags = nf->nf_flags;
__entry->nf_may = nf->nf_may;
__entry->nf_file = nf->nf_file;
),
TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
__entry->nf_hashval,
__entry->nf_inode,
__entry->nf_ref,
show_nf_flags(__entry->nf_flags),
show_nf_may(__entry->nf_may),
__entry->nf_file)
)
#define DEFINE_NFSD_FILE_EVENT(name) \
DEFINE_EVENT(nfsd_file_class, name, \
TP_PROTO(struct nfsd_file *nf), \
TP_ARGS(nf))
DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
TRACE_EVENT(nfsd_file_acquire,
TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
struct inode *inode, unsigned int may_flags,
struct nfsd_file *nf, __be32 status),
TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
TP_STRUCT__entry(
__field(__be32, xid)
__field(unsigned int, hash)
__field(void *, inode)
__field(unsigned int, may_flags)
__field(int, nf_ref)
__field(unsigned long, nf_flags)
__field(unsigned char, nf_may)
__field(struct file *, nf_file)
__field(__be32, status)
),
TP_fast_assign(
__entry->xid = rqstp->rq_xid;
__entry->hash = hash;
__entry->inode = inode;
__entry->may_flags = may_flags;
__entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
__entry->nf_flags = nf ? nf->nf_flags : 0;
__entry->nf_may = nf ? nf->nf_may : 0;
__entry->nf_file = nf ? nf->nf_file : NULL;
__entry->status = status;
),
TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
show_nf_may(__entry->may_flags), __entry->nf_ref,
show_nf_flags(__entry->nf_flags),
show_nf_may(__entry->nf_may), __entry->nf_file,
be32_to_cpu(__entry->status))
);
DECLARE_EVENT_CLASS(nfsd_file_search_class,
TP_PROTO(struct inode *inode, unsigned int hash, int found),
TP_ARGS(inode, hash, found),
TP_STRUCT__entry(
__field(struct inode *, inode)
__field(unsigned int, hash)
__field(int, found)
),
TP_fast_assign(
__entry->inode = inode;
__entry->hash = hash;
__entry->found = found;
),
TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
__entry->inode, __entry->found)
);
#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
DEFINE_EVENT(nfsd_file_search_class, name, \
TP_PROTO(struct inode *inode, unsigned int hash, int found), \
TP_ARGS(inode, hash, found))
DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
TRACE_EVENT(nfsd_file_fsnotify_handle_event,
TP_PROTO(struct inode *inode, u32 mask),
TP_ARGS(inode, mask),
TP_STRUCT__entry(
__field(struct inode *, inode)
__field(unsigned int, nlink)
__field(umode_t, mode)
__field(u32, mask)
),
TP_fast_assign(
__entry->inode = inode;
__entry->nlink = inode->i_nlink;
__entry->mode = inode->i_mode;
__entry->mask = mask;
),
TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
__entry->nlink, __entry->mode, __entry->mask)
);
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
......
This diff is collapsed.
......@@ -40,8 +40,6 @@
typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
/* nfsd/vfs.c */
int nfsd_racache_init(int);
void nfsd_racache_shutdown(void);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp);
__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
......@@ -75,18 +73,23 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
loff_t, unsigned long);
#endif /* CONFIG_NFSD_V3 */
int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
struct raparms;
__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
unsigned long *count);
unsigned long *count,
u32 *eof);
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen,
unsigned long *count);
unsigned long *count,
u32 *eof);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
loff_t, struct kvec *, int, unsigned long *,
u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
struct kvec *, int, unsigned long *, int);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
......@@ -115,9 +118,6 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
struct raparms *nfsd_init_raparms(struct file *file);
void nfsd_put_raparams(struct file *file, struct raparms *ra);
static inline int fh_want_write(struct svc_fh *fh)
{
int ret;
......@@ -152,23 +152,4 @@ static inline int nfsd_create_is_exclusive(int createmode)
|| createmode == NFS4_CREATE_EXCLUSIVE4_1;
}
static inline bool nfsd_eof_on_read(long requested, long read,
loff_t offset, loff_t size)
{
/* We assume a short read means eof: */
if (requested > read)
return true;
/*
* A non-short read might also reach end of file. The spec
* still requires us to set eof in that case.
*
* Further operations may have modified the file size since
* the read, so the following check is not atomic with the read.
* We've only seen that cause a problem for a client in the case
* where the read returned a count of 0 without setting eof.
* That case was fixed by the addition of the above check.
*/
return (offset + read >= size);
}
#endif /* LINUX_NFSD_VFS_H */
......@@ -151,7 +151,7 @@ struct nfsd3_readres {
__be32 status;
struct svc_fh fh;
unsigned long count;
int eof;
__u32 eof;
};
struct nfsd3_writeres {
......
......@@ -273,15 +273,14 @@ struct nfsd4_open_downgrade {
struct nfsd4_read {
stateid_t rd_stateid; /* request */
u64 rd_offset; /* request */
u32 rd_length; /* request */
int rd_vlen;
struct file *rd_filp;
bool rd_tmp_file;
stateid_t rd_stateid; /* request */
u64 rd_offset; /* request */
u32 rd_length; /* request */
int rd_vlen;
struct nfsd_file *rd_nf;
struct svc_rqst *rd_rqstp; /* response */
struct svc_fh * rd_fhp; /* response */
struct svc_rqst *rd_rqstp; /* response */
struct svc_fh *rd_fhp; /* response */
};
struct nfsd4_readdir {
......@@ -538,8 +537,8 @@ struct nfsd4_copy {
struct nfs4_client *cp_clp;
struct file *file_src;
struct file *file_dst;
struct nfsd_file *nf_src;
struct nfsd_file *nf_dst;
stateid_t cp_stateid;
......
......@@ -54,8 +54,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
{
fsnotify_destroy_marks(&sb->s_fsnotify_marks);
}
/* Wait until all marks queued for destruction are destroyed */
extern void fsnotify_wait_marks_destroyed(void);
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
......
......@@ -108,6 +108,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
if (refcount_dec_and_test(&group->refcnt))
fsnotify_final_destroy_group(group);
}
EXPORT_SYMBOL_GPL(fsnotify_put_group);
/*
* Create a new fsnotify_group and hold a reference for the group returned.
......@@ -137,6 +138,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
return group;
}
EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
int fsnotify_fasync(int fd, struct file *file, int on)
{
......
......@@ -276,6 +276,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
queue_delayed_work(system_unbound_wq, &reaper_work,
FSNOTIFY_REAPER_DELAY);
}
EXPORT_SYMBOL_GPL(fsnotify_put_mark);
/*
* Get mark reference when we found the mark via lockless traversal of object
......@@ -430,6 +431,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
mutex_unlock(&group->mark_mutex);
fsnotify_free_mark(mark);
}
EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
/*
* Sorting function for lists of fsnotify marks.
......@@ -685,6 +687,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
mutex_unlock(&group->mark_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(fsnotify_add_mark);
/*
* Given a list of marks, find the mark associated with given group. If found
......@@ -711,6 +714,7 @@ struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
spin_unlock(&conn->lock);
return NULL;
}
EXPORT_SYMBOL_GPL(fsnotify_find_mark);
/* Clear any marks in a group with given type mask */
void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
......@@ -809,6 +813,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
mark->group = group;
WRITE_ONCE(mark->connector, NULL);
}
EXPORT_SYMBOL_GPL(fsnotify_init_mark);
/*
* Destroy all marks in destroy_list, waits for SRCU period to finish before
......@@ -837,3 +842,4 @@ void fsnotify_wait_marks_destroyed(void)
{
flush_delayed_work(&reaper_work);
}
EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);
......@@ -1168,6 +1168,11 @@ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
extern int lease_modify(struct file_lock *, int, struct list_head *);
struct notifier_block;
extern int lease_register_notifier(struct notifier_block *);
extern void lease_unregister_notifier(struct notifier_block *);
struct files_struct;
extern void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files);
......
......@@ -475,6 +475,8 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
/* free mark */
extern void fsnotify_free_mark(struct fsnotify_mark *mark);
/* Wait until all marks queued for destruction are destroyed */
extern void fsnotify_wait_marks_destroyed(void);
/* run all the marks in a group, and clear all of the marks attached to given object type */
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
/* run all the marks in a group, and clear all of the vfsmount marks */
......
......@@ -87,6 +87,7 @@ struct cache_detail {
int has_died);
struct cache_head * (*alloc)(void);
void (*flush)(void);
int (*match)(struct cache_head *orig, struct cache_head *new);
void (*init)(struct cache_head *orig, struct cache_head *new);
void (*update)(struct cache_head *orig, struct cache_head *new);
......@@ -107,9 +108,9 @@ struct cache_detail {
/* fields for communication over channel */
struct list_head queue;
atomic_t readers; /* how many time is /chennel open */
time_t last_close; /* if no readers, when did last close */
time_t last_warn; /* when we last warned about no readers */
atomic_t writers; /* how many time is /channel open */
time_t last_close; /* if no writers, when did last close */
time_t last_warn; /* when we last warned about no writers */
union {
struct proc_dir_entry *procfs;
......
......@@ -42,6 +42,7 @@
#ifndef SVC_RDMA_H
#define SVC_RDMA_H
#include <linux/llist.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/rpc_rdma.h>
......@@ -107,8 +108,7 @@ struct svcxprt_rdma {
struct list_head sc_read_complete_q;
struct work_struct sc_work;
spinlock_t sc_recv_lock;
struct list_head sc_recv_ctxts;
struct llist_head sc_recv_ctxts;
};
/* sc_flags */
#define RDMAXPRT_CONN_PENDING 3
......@@ -125,6 +125,7 @@ enum {
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
struct svc_rdma_recv_ctxt {
struct llist_node rc_node;
struct list_head rc_list;
struct ib_recv_wr rc_recv_wr;
struct ib_cqe rc_cqe;
......@@ -200,7 +201,6 @@ extern struct svc_xprt_class svc_rdma_bc_class;
#endif
/* svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
extern int svc_rdma_init(void);
extern void svc_rdma_cleanup(void);
......
......@@ -26,17 +26,22 @@
#include <linux/types.h>
/* latest upcall version available */
#define CLD_UPCALL_VERSION 1
#define CLD_UPCALL_VERSION 2
/* defined by RFC3530 */
#define NFS4_OPAQUE_LIMIT 1024
#ifndef SHA256_DIGEST_SIZE
#define SHA256_DIGEST_SIZE 32
#endif
enum cld_command {
Cld_Create, /* create a record for this cm_id */
Cld_Remove, /* remove record of this cm_id */
Cld_Check, /* is this cm_id allowed? */
Cld_GraceDone, /* grace period is complete */
Cld_GraceStart,
Cld_GraceStart, /* grace start (upload client records) */
Cld_GetVersion, /* query max supported upcall version */
};
/* representation of long-form NFSv4 client ID */
......@@ -45,6 +50,17 @@ struct cld_name {
unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */
} __attribute__((packed));
/* sha256 hash of the kerberos principal */
struct cld_princhash {
__u8 cp_len; /* length of cp_data */
unsigned char cp_data[SHA256_DIGEST_SIZE]; /* hash of principal */
} __attribute__((packed));
struct cld_clntinfo {
struct cld_name cc_name;
struct cld_princhash cc_princhash;
} __attribute__((packed));
/* message struct for communication with userspace */
struct cld_msg {
__u8 cm_vers; /* upcall version */
......@@ -54,7 +70,28 @@ struct cld_msg {
union {
__s64 cm_gracetime; /* grace period start time */
struct cld_name cm_name;
__u8 cm_version; /* for getting max version */
} __attribute__((packed)) cm_u;
} __attribute__((packed));
/* version 2 message can include hash of kerberos principal */
struct cld_msg_v2 {
__u8 cm_vers; /* upcall version */
__u8 cm_cmd; /* upcall command */
__s16 cm_status; /* return code */
__u32 cm_xid; /* transaction id */
union {
struct cld_name cm_name;
__u8 cm_version; /* for getting max version */
struct cld_clntinfo cm_clntinfo; /* name & princ hash */
} __attribute__((packed)) cm_u;
} __attribute__((packed));
struct cld_msg_hdr {
__u8 cm_vers; /* upcall version */
__u8 cm_cmd; /* upcall command */
__s16 cm_status; /* return code */
__u32 cm_xid; /* transaction id */
} __attribute__((packed));
#endif /* !_NFSD_CLD_H */
......@@ -373,7 +373,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
spin_lock(&cache_list_lock);
cd->nextcheck = 0;
cd->entries = 0;
atomic_set(&cd->readers, 0);
atomic_set(&cd->writers, 0);
cd->last_close = 0;
cd->last_warn = -1;
list_add(&cd->others, &cache_list);
......@@ -1029,11 +1029,13 @@ static int cache_open(struct inode *inode, struct file *filp,
}
rp->offset = 0;
rp->q.reader = 1;
atomic_inc(&cd->readers);
spin_lock(&queue_lock);
list_add(&rp->q.list, &cd->queue);
spin_unlock(&queue_lock);
}
if (filp->f_mode & FMODE_WRITE)
atomic_inc(&cd->writers);
filp->private_data = rp;
return 0;
}
......@@ -1062,8 +1064,10 @@ static int cache_release(struct inode *inode, struct file *filp,
filp->private_data = NULL;
kfree(rp);
}
if (filp->f_mode & FMODE_WRITE) {
atomic_dec(&cd->writers);
cd->last_close = seconds_since_boot();
atomic_dec(&cd->readers);
}
module_put(cd->owner);
return 0;
......@@ -1171,7 +1175,7 @@ static void warn_no_listener(struct cache_detail *detail)
static bool cache_listeners_exist(struct cache_detail *detail)
{
if (atomic_read(&detail->readers))
if (atomic_read(&detail->writers))
return true;
if (detail->last_close == 0)
/* This cache was never opened */
......@@ -1520,6 +1524,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
cd->nextcheck = now;
cache_flush();
if (cd->flush)
cd->flush();
*ppos += count;
return count;
}
......
......@@ -1233,8 +1233,8 @@ svc_generic_init_request(struct svc_rqst *rqstp,
if (rqstp->rq_vers >= progp->pg_nvers )
goto err_bad_vers;
versp = progp->pg_vers[rqstp->rq_vers];
if (!versp)
versp = progp->pg_vers[rqstp->rq_vers];
if (!versp)
goto err_bad_vers;
/*
......
......@@ -73,8 +73,6 @@ atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
struct workqueue_struct *svc_rdma_wq;
/*
* This function implements reading and resetting an atomic_t stat
* variable through read/write to a proc file. Any write to the file
......@@ -230,7 +228,6 @@ static struct ctl_table svcrdma_root_table[] = {
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
destroy_workqueue(svc_rdma_wq);
if (svcrdma_table_header) {
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
......@@ -246,10 +243,6 @@ int svc_rdma_init(void)
dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
if (!svc_rdma_wq)
return -ENOMEM;
if (!svcrdma_table_header)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
......
......@@ -172,9 +172,10 @@ static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
struct llist_node *node;
while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
list_del(&ctxt->rc_list);
while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
}
}
......@@ -183,21 +184,18 @@ static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
struct llist_node *node;
spin_lock(&rdma->sc_recv_lock);
ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);
if (!ctxt)
node = llist_del_first(&rdma->sc_recv_ctxts);
if (!node)
goto out_empty;
list_del(&ctxt->rc_list);
spin_unlock(&rdma->sc_recv_lock);
ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
out:
ctxt->rc_page_count = 0;
return ctxt;
out_empty:
spin_unlock(&rdma->sc_recv_lock);
ctxt = svc_rdma_recv_ctxt_alloc(rdma);
if (!ctxt)
return NULL;
......@@ -218,11 +216,9 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
for (i = 0; i < ctxt->rc_page_count; i++)
put_page(ctxt->rc_pages[i]);
if (!ctxt->rc_temp) {
spin_lock(&rdma->sc_recv_lock);
list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
spin_unlock(&rdma->sc_recv_lock);
} else
if (!ctxt->rc_temp)
llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
else
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
}
......
......@@ -140,14 +140,13 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
spin_lock_init(&cma_xprt->sc_send_lock);
spin_lock_init(&cma_xprt->sc_recv_lock);
spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
/*
......@@ -630,8 +629,9 @@ static void svc_rdma_free(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
INIT_WORK(&rdma->sc_work, __svc_rdma_free);
queue_work(svc_rdma_wq, &rdma->sc_work);
schedule_work(&rdma->sc_work);
}
static int svc_rdma_has_wspace(struct svc_xprt *xprt)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment