Commit d2b6b4c8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-5.3' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Highlights:

   - Add a new /proc/fs/nfsd/clients/ directory which exposes some
     long-requested information about NFSv4 clients (like open files)
     and allows forced revocation of client state.

   - Replace the global duplicate reply cache by a cache per network
     namespace; previously, a request in one network namespace could
     incorrectly match an entry from another, though we haven't seen
     this in production. This is the last remaining container bug that
     I'm aware of; at this point you should be able to run separate
     nfsd's in each network namespace, each with their own set of
     exports, and everything should work.

   - Cleanup and modify lock code to show the pid of lockd as the owner
     of NLM locks. This is the correct version of the bugfix originally
     attempted in b8eee0e9 ("lockd: Show pid of lockd for remote
     locks")"

* tag 'nfsd-5.3' of git://linux-nfs.org/~bfields/linux: (34 commits)
  nfsd: Make __get_nfsdfs_client() static
  nfsd: Make two functions static
  nfsd: Fix misuse of strlcpy
  sunrpc/cache: remove the exporting of cache_seq_next
  nfsd: decode implementation id
  nfsd: create xdr_netobj_dup helper
  nfsd: allow forced expiration of NFSv4 clients
  nfsd: create get_nfsdfs_clp helper
  nfsd4: show layout stateids
  nfsd: show lock and deleg stateids
  nfsd4: add file to display list of client's opens
  nfsd: add more information to client info file
  nfsd: escape high characters in binary data
  nfsd: copy client's address including port number to cl_addr
  nfsd4: add a client info file
  nfsd: make client/ directory names small ints
  nfsd: add nfsd/clients directory
  nfsd4: use reference count to free client
  nfsd: rename cl_refcount
  nfsd: persist nfsd filesystem across mounts
  ...
parents 0248a8be b78fa45d
......@@ -361,8 +361,6 @@ so fl_release_private called on a lease should not block.
----------------------- lock_manager_operations ---------------------------
prototypes:
int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
unsigned long (*lm_owner_key)(struct file_lock *);
void (*lm_notify)(struct file_lock *); /* unblock callback */
int (*lm_grant)(struct file_lock *, struct file_lock *, int);
void (*lm_break)(struct file_lock *); /* break_lease callback */
......@@ -371,23 +369,11 @@ prototypes:
locking rules:
inode->i_lock blocked_lock_lock may block
lm_compare_owner: yes[1] maybe no
lm_owner_key yes[1] yes no
lm_notify: yes yes no
lm_grant: no no no
lm_break: yes no no
lm_change yes no no
[1]: ->lm_compare_owner and ->lm_owner_key are generally called with
*an* inode->i_lock held. It may not be the i_lock of the inode
associated with either file_lock argument! This is the case with deadlock
detection, since the code has to chase down the owners of locks that may
be entirely unrelated to the one on which the lock is being acquired.
For deadlock detection however, the blocked_lock_lock is also held. The
fact that these locks are held ensures that the file_locks do not
disappear out from under you while doing the comparison or generating an
owner key.
--------------------------- buffer_head -----------------------------------
prototypes:
void (*b_end_io)(struct buffer_head *bh, int uptodate);
......
......@@ -47,13 +47,14 @@ void nlmclnt_next_cookie(struct nlm_cookie *c)
c->len=4;
}
static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner)
static struct nlm_lockowner *
nlmclnt_get_lockowner(struct nlm_lockowner *lockowner)
{
refcount_inc(&lockowner->count);
return lockowner;
}
static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
static void nlmclnt_put_lockowner(struct nlm_lockowner *lockowner)
{
if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
return;
......@@ -82,28 +83,28 @@ static inline uint32_t __nlm_alloc_pid(struct nlm_host *host)
return res;
}
static struct nlm_lockowner *__nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner)
static struct nlm_lockowner *__nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner)
{
struct nlm_lockowner *lockowner;
list_for_each_entry(lockowner, &host->h_lockowners, list) {
if (lockowner->owner != owner)
continue;
return nlm_get_lockowner(lockowner);
return nlmclnt_get_lockowner(lockowner);
}
return NULL;
}
static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner)
static struct nlm_lockowner *nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner)
{
struct nlm_lockowner *res, *new = NULL;
spin_lock(&host->h_lock);
res = __nlm_find_lockowner(host, owner);
res = __nlmclnt_find_lockowner(host, owner);
if (res == NULL) {
spin_unlock(&host->h_lock);
new = kmalloc(sizeof(*new), GFP_KERNEL);
spin_lock(&host->h_lock);
res = __nlm_find_lockowner(host, owner);
res = __nlmclnt_find_lockowner(host, owner);
if (res == NULL && new != NULL) {
res = new;
refcount_set(&new->count, 1);
......@@ -457,7 +458,7 @@ static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
new->fl_u.nfs_fl.owner = nlmclnt_get_lockowner(fl->fl_u.nfs_fl.owner);
list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
}
......@@ -467,7 +468,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
list_del(&fl->fl_u.nfs_fl.list);
spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
nlmclnt_put_lockowner(fl->fl_u.nfs_fl.owner);
}
static const struct file_lock_operations nlmclnt_lock_ops = {
......@@ -478,7 +479,7 @@ static const struct file_lock_operations nlmclnt_lock_ops = {
static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host)
{
fl->fl_u.nfs_fl.state = 0;
fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
fl->fl_u.nfs_fl.owner = nlmclnt_find_lockowner(host, fl->fl_owner);
INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
fl->fl_ops = &nlmclnt_lock_ops;
}
......
......@@ -46,8 +46,14 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
lock->fl.fl_owner = (fl_owner_t) host;
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
if (!lock->fl.fl_owner) {
/* lockowner allocation has failed */
nlmsvc_release_host(host);
return nlm_lck_denied_nolocks;
}
}
return 0;
......@@ -94,6 +100,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
......@@ -142,6 +149,7 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
......@@ -178,6 +186,7 @@ __nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
......@@ -217,6 +226,7 @@ __nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
......@@ -365,6 +375,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp)
resp->status = nlmsvc_share_file(host, file, argp);
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
......@@ -399,6 +410,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp)
resp->status = nlmsvc_unshare_file(host, file, argp);
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
......
......@@ -332,6 +332,93 @@ void nlmsvc_traverse_blocks(struct nlm_host *host,
mutex_unlock(&file->f_mutex);
}
static struct nlm_lockowner *
nlmsvc_get_lockowner(struct nlm_lockowner *lockowner)
{
refcount_inc(&lockowner->count);
return lockowner;
}
static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
{
if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
return;
list_del(&lockowner->list);
spin_unlock(&lockowner->host->h_lock);
nlmsvc_release_host(lockowner->host);
kfree(lockowner);
}
static struct nlm_lockowner *__nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid)
{
struct nlm_lockowner *lockowner;
list_for_each_entry(lockowner, &host->h_lockowners, list) {
if (lockowner->pid != pid)
continue;
return nlmsvc_get_lockowner(lockowner);
}
return NULL;
}
static struct nlm_lockowner *nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid)
{
struct nlm_lockowner *res, *new = NULL;
spin_lock(&host->h_lock);
res = __nlmsvc_find_lockowner(host, pid);
if (res == NULL) {
spin_unlock(&host->h_lock);
new = kmalloc(sizeof(*res), GFP_KERNEL);
spin_lock(&host->h_lock);
res = __nlmsvc_find_lockowner(host, pid);
if (res == NULL && new != NULL) {
res = new;
/* fs/locks.c will manage the refcount through lock_ops */
refcount_set(&new->count, 1);
new->pid = pid;
new->host = nlm_get_host(host);
list_add(&new->list, &host->h_lockowners);
new = NULL;
}
}
spin_unlock(&host->h_lock);
kfree(new);
return res;
}
void
nlmsvc_release_lockowner(struct nlm_lock *lock)
{
if (lock->fl.fl_owner)
nlmsvc_put_lockowner(lock->fl.fl_owner);
}
static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner;
new->fl_owner = nlmsvc_get_lockowner(nlm_lo);
}
static void nlmsvc_locks_release_private(struct file_lock *fl)
{
nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner);
}
static const struct file_lock_operations nlmsvc_lock_ops = {
.fl_copy_lock = nlmsvc_locks_copy_lock,
.fl_release_private = nlmsvc_locks_release_private,
};
void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
pid_t pid)
{
fl->fl_owner = nlmsvc_find_lockowner(host, pid);
if (fl->fl_owner != NULL)
fl->fl_ops = &nlmsvc_lock_ops;
}
/*
* Initialize arguments for GRANTED call. The nlm_rqst structure
* has been cleared already.
......@@ -345,7 +432,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
/* set default data area */
call->a_args.lock.oh.data = call->a_owner;
call->a_args.lock.svid = lock->fl.fl_pid;
call->a_args.lock.svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
if (lock->oh.len > NLMCLNT_OHSIZE) {
void *data = kmalloc(lock->oh.len, GFP_KERNEL);
......@@ -509,6 +596,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
{
int error;
__be32 ret;
struct nlm_lockowner *test_owner;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
locks_inode(file->f_file)->i_sb->s_id,
......@@ -522,6 +610,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
}
/* If there's a conflicting lock, remember to clean up the test lock */
test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
error = vfs_test_lock(file->f_file, &lock->fl);
if (error) {
/* We can't currently deal with deferred test requests */
......@@ -543,11 +634,16 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
conflock->svid = lock->fl.fl_pid;
conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
conflock->fl.fl_type = lock->fl.fl_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
locks_release_private(&lock->fl);
/* Clean up the test lock */
lock->fl.fl_owner = NULL;
nlmsvc_put_lockowner(test_owner);
ret = nlm_lck_denied;
out:
return ret;
......@@ -692,25 +788,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
{
return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
}
/*
* Since NLM uses two "keys" for tracking locks, we need to hash them down
* to one for the blocked_hash. Here, we're just xor'ing the host address
* with the pid in order to create a key value for picking a hash bucket.
*/
static unsigned long
nlmsvc_owner_key(struct file_lock *fl)
{
return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid;
}
const struct lock_manager_operations nlmsvc_lock_operations = {
.lm_compare_owner = nlmsvc_same_owner,
.lm_owner_key = nlmsvc_owner_key,
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
};
......
......@@ -76,8 +76,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
lock->fl.fl_owner = (fl_owner_t) host;
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
if (!lock->fl.fl_owner) {
/* lockowner allocation has failed */
nlmsvc_release_host(host);
return nlm_lck_denied_nolocks;
}
}
return 0;
......@@ -125,6 +131,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
......@@ -173,6 +180,7 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
......@@ -210,6 +218,7 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock));
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
......@@ -250,6 +259,7 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock));
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
......@@ -408,6 +418,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp)
resp->status = cast_status(nlmsvc_share_file(host, file, argp));
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
......@@ -442,6 +453,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp)
resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
......
......@@ -180,7 +180,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
/* update current lock count */
file->f_locks++;
lockhost = (struct nlm_host *) fl->fl_owner;
lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
if (match(lockhost, host)) {
struct file_lock lock = *fl;
......
......@@ -126,8 +126,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
lock->svid = ntohl(*p++);
locks_init_lock(fl);
fl->fl_owner = current->files;
fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
start = ntohl(*p++);
......@@ -269,7 +267,6 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
......
......@@ -118,8 +118,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
lock->svid = ntohl(*p++);
locks_init_lock(fl);
fl->fl_owner = current->files;
fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
p = xdr_decode_hyper(p, &start);
......@@ -266,7 +264,6 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm4_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
......
......@@ -658,9 +658,6 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
*/
static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
{
if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner)
return fl2->fl_lmops == fl1->fl_lmops &&
fl1->fl_lmops->lm_compare_owner(fl1, fl2);
return fl1->fl_owner == fl2->fl_owner;
}
......@@ -701,8 +698,6 @@ static void locks_delete_global_locks(struct file_lock *fl)
static unsigned long
posix_owner_key(struct file_lock *fl)
{
if (fl->fl_lmops && fl->fl_lmops->lm_owner_key)
return fl->fl_lmops->lm_owner_key(fl);
return (unsigned long)fl->fl_owner;
}
......
......@@ -121,15 +121,13 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
{
loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
struct timespec ts;
int error;
ts = timespec64_to_timespec(inode->i_mtime);
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
timespec_compare(&lcp->lc_mtime, &ts) < 0)
lcp->lc_mtime = timespec64_to_timespec(current_time(inode));
timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
lcp->lc_mtime = current_time(inode);
iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime);
iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
if (new_size > i_size_read(inode)) {
iattr.ia_valid |= ATTR_SIZE;
......
......@@ -10,6 +10,7 @@
#define NFSCACHE_H
#include <linux/sunrpc/svc.h>
#include "netns.h"
/*
* Representation of a reply cache entry.
......@@ -77,8 +78,8 @@ enum {
/* Checksum this amount of the request */
#define RC_CSUMLEN (256U)
int nfsd_reply_cache_init(void);
void nfsd_reply_cache_shutdown(void);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
int nfsd_reply_cache_stats_open(struct inode *, struct file *);
......
......@@ -42,6 +42,11 @@ struct nfsd_net {
bool grace_ended;
time_t boot_time;
/* internal mount of the "nfsd" pseudofilesystem: */
struct vfsmount *nfsd_mnt;
struct dentry *nfsd_client_dir;
/*
* reclaim_str_hashtbl[] holds known client info from previous reset/reboot
* used in reboot/reset lease grace period processing
......@@ -106,6 +111,7 @@ struct nfsd_net {
*/
unsigned int max_connections;
u32 clientid_base;
u32 clientid_counter;
u32 clverifier_counter;
......@@ -127,6 +133,44 @@ struct nfsd_net {
*/
bool *nfsd_versions;
bool *nfsd4_minorversions;
/*
* Duplicate reply cache
*/
struct nfsd_drc_bucket *drc_hashtbl;
struct kmem_cache *drc_slab;
/* max number of entries allowed in the cache */
unsigned int max_drc_entries;
/* number of significant bits in the hash value */
unsigned int maskbits;
unsigned int drc_hashsize;
/*
* Stats and other tracking of on the duplicate reply cache.
* These fields and the "rc" fields in nfsdstats are modified
* with only the per-bucket cache lock, which isn't really safe
* and should be fixed if we want the statistics to be
* completely accurate.
*/
/* total number of entries */
atomic_t num_drc_entries;
/* cache misses due only to checksum comparison failures */
unsigned int payload_misses;
/* amount of memory (in bytes) currently consumed by the DRC */
unsigned int drc_mem_usage;
/* longest hash chain seen */
unsigned int longest_chain;
/* size of cache when we saw the longest hash chain */
unsigned int longest_chain_cachesize;
struct shrinker nfsd_reply_cache_shrinker;
};
/* Simple check to find out if a given net was properly initialized */
......
......@@ -83,7 +83,7 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
new->type = itm->type;
strlcpy(new->name, itm->name, sizeof(new->name));
strlcpy(new->authname, itm->authname, sizeof(new->name));
strlcpy(new->authname, itm->authname, sizeof(new->authname));
}
static void
......
......@@ -42,6 +42,7 @@
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/addr.h>
#include <linux/jhash.h>
#include <linux/string_helpers.h>
#include "xdr4.h"
#include "xdr4cb.h"
#include "vfs.h"
......@@ -99,6 +100,13 @@ enum nfsd4_st_mutex_lock_subclass {
*/
static DECLARE_WAIT_QUEUE_HEAD(close_wq);
/*
* A waitqueue where a writer to clients/#/ctl destroying a client can
* wait for cl_rpc_users to drop to 0 and then for the client to be
* unhashed.
*/
static DECLARE_WAIT_QUEUE_HEAD(expiry_wq);
static struct kmem_cache *client_slab;
static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab;
......@@ -138,7 +146,7 @@ static __be32 get_client_locked(struct nfs4_client *clp)
if (is_client_expired(clp))
return nfserr_expired;
atomic_inc(&clp->cl_refcount);
atomic_inc(&clp->cl_rpc_users);
return nfs_ok;
}
......@@ -170,20 +178,24 @@ static void put_client_renew_locked(struct nfs4_client *clp)
lockdep_assert_held(&nn->client_lock);
if (!atomic_dec_and_test(&clp->cl_refcount))
if (!atomic_dec_and_test(&clp->cl_rpc_users))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
else
wake_up_all(&expiry_wq);
}
static void put_client_renew(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
else
wake_up_all(&expiry_wq);
spin_unlock(&nn->client_lock);
}
......@@ -694,7 +706,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
idr_preload(GFP_KERNEL);
spin_lock(&cl->cl_lock);
new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
/* Reserving 0 for start of file in nfsdfs "states" file: */
new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT);
spin_unlock(&cl->cl_lock);
idr_preload_end();
if (new_id < 0)
......@@ -1844,7 +1857,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL)
return NULL;
clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL);
if (clp->cl_name.data == NULL)
goto err_no_name;
clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE,
......@@ -1854,10 +1867,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
goto err_no_hashtbl;
for (i = 0; i < OWNER_HASH_SIZE; i++)
INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
clp->cl_name.len = name.len;
INIT_LIST_HEAD(&clp->cl_sessions);
idr_init(&clp->cl_stateids);
atomic_set(&clp->cl_refcount, 0);
atomic_set(&clp->cl_rpc_users, 0);
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
INIT_LIST_HEAD(&clp->cl_idhash);
INIT_LIST_HEAD(&clp->cl_openowners);
......@@ -1879,6 +1891,25 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
return NULL;
}
static void __free_client(struct kref *k)
{
struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref);
struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs);
free_svc_cred(&clp->cl_cred);
kfree(clp->cl_ownerstr_hashtbl);
kfree(clp->cl_name.data);
kfree(clp->cl_nii_domain.data);
kfree(clp->cl_nii_name.data);
idr_destroy(&clp->cl_stateids);
kmem_cache_free(client_slab, clp);
}
static void drop_client(struct nfs4_client *clp)
{
kref_put(&clp->cl_nfsdfs.cl_ref, __free_client);
}
static void
free_client(struct nfs4_client *clp)
{
......@@ -1891,11 +1922,12 @@ free_client(struct nfs4_client *clp)
free_session(ses);
}
rpc_destroy_wait_queue(&clp->cl_cb_waitq);
free_svc_cred(&clp->cl_cred);
kfree(clp->cl_ownerstr_hashtbl);
kfree(clp->cl_name.data);
idr_destroy(&clp->cl_stateids);
kmem_cache_free(client_slab, clp);
if (clp->cl_nfsd_dentry) {
nfsd_client_rmdir(clp->cl_nfsd_dentry);
clp->cl_nfsd_dentry = NULL;
wake_up_all(&expiry_wq);
}
drop_client(clp);
}
/* must be called under the client_lock */
......@@ -1936,7 +1968,7 @@ unhash_client(struct nfs4_client *clp)
static __be32 mark_client_expired_locked(struct nfs4_client *clp)
{
if (atomic_read(&clp->cl_refcount))
if (atomic_read(&clp->cl_rpc_users))
return nfserr_jukebox;
unhash_client_locked(clp);
return nfs_ok;
......@@ -1989,6 +2021,7 @@ __destroy_client(struct nfs4_client *clp)
if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
free_client(clp);
wake_up_all(&expiry_wq);
}
static void
......@@ -2199,6 +2232,342 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
return s;
}
static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
{
struct nfsdfs_client *nc;
nc = get_nfsdfs_client(inode);
if (!nc)
return NULL;
return container_of(nc, struct nfs4_client, cl_nfsdfs);
}
static void seq_quote_mem(struct seq_file *m, char *data, int len)
{
seq_printf(m, "\"");
seq_escape_mem_ascii(m, data, len);
seq_printf(m, "\"");
}
static int client_info_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
struct nfs4_client *clp;
u64 clid;
clp = get_nfsdfs_clp(inode);
if (!clp)
return -ENXIO;
memcpy(&clid, &clp->cl_clientid, sizeof(clid));
seq_printf(m, "clientid: 0x%llx\n", clid);
seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
seq_printf(m, "name: ");
seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
if (clp->cl_nii_domain.data) {
seq_printf(m, "Implementation domain: ");
seq_quote_mem(m, clp->cl_nii_domain.data,
clp->cl_nii_domain.len);
seq_printf(m, "\nImplementation name: ");
seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
seq_printf(m, "\nImplementation time: [%ld, %ld]\n",
clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
}
drop_client(clp);
return 0;
}
static int client_info_open(struct inode *inode, struct file *file)
{
return single_open(file, client_info_show, inode);
}
static const struct file_operations client_info_fops = {
.open = client_info_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static void *states_start(struct seq_file *s, loff_t *pos)
__acquires(&clp->cl_lock)
{
struct nfs4_client *clp = s->private;
unsigned long id = *pos;
void *ret;
spin_lock(&clp->cl_lock);
ret = idr_get_next_ul(&clp->cl_stateids, &id);
*pos = id;
return ret;
}
static void *states_next(struct seq_file *s, void *v, loff_t *pos)
{
struct nfs4_client *clp = s->private;
unsigned long id = *pos;
void *ret;
id = *pos;
id++;
ret = idr_get_next_ul(&clp->cl_stateids, &id);
*pos = id;
return ret;
}
static void states_stop(struct seq_file *s, void *v)
__releases(&clp->cl_lock)
{
struct nfs4_client *clp = s->private;
spin_unlock(&clp->cl_lock);
}
static void nfs4_show_superblock(struct seq_file *s, struct file *f)
{
struct inode *inode = file_inode(f);
seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev),
inode->i_ino);
}
static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
{
seq_printf(s, "owner: ");
seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
}
static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_ol_stateid *ols;
struct nfs4_file *nf;
struct file *file;
struct nfs4_stateowner *oo;
unsigned int access, deny;
if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
return 0; /* XXX: or SEQ_SKIP? */
ols = openlockstateid(st);
oo = ols->st_stateowner;
nf = st->sc_file;
file = find_any_file(nf);
seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid);
access = bmap_to_share_mode(ols->st_access_bmap);
deny = bmap_to_share_mode(ols->st_deny_bmap);
seq_printf(s, "access: \%s\%s, ",
access & NFS4_SHARE_ACCESS_READ ? "r" : "-",
access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
seq_printf(s, "deny: \%s\%s, ",
deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
nfs4_show_superblock(s, file);
seq_printf(s, ", ");
nfs4_show_owner(s, oo);
seq_printf(s, " }\n");
fput(file);
return 0;
}
static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_ol_stateid *ols;
struct nfs4_file *nf;
struct file *file;
struct nfs4_stateowner *oo;
ols = openlockstateid(st);
oo = ols->st_stateowner;
nf = st->sc_file;
file = find_any_file(nf);
seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid);
/*
* Note: a lock stateid isn't really the same thing as a lock,
* it's the locking state held by one owner on a file, and there
* may be multiple (or no) lock ranges associated with it.
* (Same for the matter is true of open stateids.)
*/
nfs4_show_superblock(s, file);
/* XXX: open stateid? */
seq_printf(s, ", ");
nfs4_show_owner(s, oo);
seq_printf(s, " }\n");
fput(file);
return 0;
}
static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_delegation *ds;
struct nfs4_file *nf;
struct file *file;
ds = delegstateid(st);
nf = st->sc_file;
file = nf->fi_deleg_file;
seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid);
/* Kinda dead code as long as we only support read delegs: */
seq_printf(s, "access: %s, ",
ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
/* XXX: lease time, whether it's being recalled. */
nfs4_show_superblock(s, file);
seq_printf(s, " }\n");
return 0;
}
static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_layout_stateid *ls;
struct file *file;
ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
file = ls->ls_file;
seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid);
/* XXX: What else would be useful? */
nfs4_show_superblock(s, file);
seq_printf(s, " }\n");
return 0;
}
static int states_show(struct seq_file *s, void *v)
{
struct nfs4_stid *st = v;
switch (st->sc_type) {
case NFS4_OPEN_STID:
return nfs4_show_open(s, st);
case NFS4_LOCK_STID:
return nfs4_show_lock(s, st);
case NFS4_DELEG_STID:
return nfs4_show_deleg(s, st);
case NFS4_LAYOUT_STID:
return nfs4_show_layout(s, st);
default:
return 0; /* XXX: or SEQ_SKIP? */
}
/* XXX: copy stateids? */
}
static struct seq_operations states_seq_ops = {
.start = states_start,
.next = states_next,
.stop = states_stop,
.show = states_show
};
static int client_states_open(struct inode *inode, struct file *file)
{
struct seq_file *s;
struct nfs4_client *clp;
int ret;
clp = get_nfsdfs_clp(inode);
if (!clp)
return -ENXIO;
ret = seq_open(file, &states_seq_ops);
if (ret)
return ret;
s = file->private_data;
s->private = clp;
return 0;
}
static int client_opens_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
struct nfs4_client *clp = m->private;
/* XXX: alternatively, we could get/drop in seq start/stop */
drop_client(clp);
return 0;
}
static const struct file_operations client_states_fops = {
.open = client_states_open,
.read = seq_read,
.llseek = seq_lseek,
.release = client_opens_release,
};
/*
* Normally we refuse to destroy clients that are in use, but here the
* administrator is telling us to just do it. We also want to wait
* so the caller has a guarantee that the client's locks are gone by
* the time the write returns:
*/
static void force_expire_client(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
bool already_expired;
spin_lock(&clp->cl_lock);
clp->cl_time = 0;
spin_unlock(&clp->cl_lock);
wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
spin_lock(&nn->client_lock);
already_expired = list_empty(&clp->cl_lru);
if (!already_expired)
unhash_client_locked(clp);
spin_unlock(&nn->client_lock);
if (!already_expired)
expire_client(clp);
else
wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL);
}
static ssize_t client_ctl_write(struct file *file, const char __user *buf,
size_t size, loff_t *pos)
{
char *data;
struct nfs4_client *clp;
data = simple_transaction_get(file, buf, size);
if (IS_ERR(data))
return PTR_ERR(data);
if (size != 7 || 0 != memcmp(data, "expire\n", 7))
return -EINVAL;
clp = get_nfsdfs_clp(file_inode(file));
if (!clp)
return -ENXIO;
force_expire_client(clp);
drop_client(clp);
return 7;
}
static const struct file_operations client_ctl_fops = {
.write = client_ctl_write,
.release = simple_transaction_release,
};
static const struct tree_descr client_files[] = {
[0] = {"info", &client_info_fops, S_IRUSR},
[1] = {"states", &client_states_fops, S_IRUSR},
[2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR},
[3] = {""},
};
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
......@@ -2206,6 +2575,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
struct sockaddr *sa = svc_addr(rqstp);
int ret;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
clp = alloc_client(name);
if (clp == NULL)
......@@ -2216,13 +2586,22 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
gen_clid(clp, nn);
kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
clp->net = net;
clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs,
clp->cl_clientid.cl_id - nn->clientid_base,
client_files);
if (!clp->cl_nfsd_dentry) {
free_client(clp);
return NULL;
}
return clp;
}
......@@ -2533,6 +2912,22 @@ static bool client_has_state(struct nfs4_client *clp)
|| !list_empty(&clp->async_copies);
}
static __be32 copy_impl_id(struct nfs4_client *clp,
struct nfsd4_exchange_id *exid)
{
if (!exid->nii_domain.data)
return 0;
xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL);
if (!clp->cl_nii_domain.data)
return nfserr_jukebox;
xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
if (!clp->cl_nii_name.data)
return nfserr_jukebox;
clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec;
clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec;
return 0;
}
__be32
nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
......@@ -2559,6 +2954,9 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
new = create_client(exid->clname, rqstp, &verf);
if (new == NULL)
return nfserr_jukebox;
status = copy_impl_id(new, exid);
if (status)
goto out_nolock;
switch (exid->spa_how) {
case SP4_MACH_CRED:
......@@ -2667,7 +3065,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
gen_clid(new, nn);
add_to_unconfirmed(new);
swap(new, conf);
out_copy:
......@@ -3411,7 +3808,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
copy_clid(new, conf);
gen_confirm(new, nn);
} else /* case 4 (new client) or cases 2, 3 (client reboot): */
gen_clid(new, nn);
;
new->cl_minorversion = 0;
gen_callback(new, setclid, rqstp);
add_to_unconfirmed(new);
......@@ -3632,12 +4029,11 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
if (!sop)
return NULL;
sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL);
if (!sop->so_owner.data) {
kmem_cache_free(slab, sop);
return NULL;
}
sop->so_owner.len = owner->len;
INIT_LIST_HEAD(&sop->so_stateids);
sop->so_client = clp;
......@@ -4092,7 +4488,7 @@ static __be32 lookup_clientid(clientid_t *clid,
spin_unlock(&nn->client_lock);
return nfserr_expired;
}
atomic_inc(&found->cl_refcount);
atomic_inc(&found->cl_rpc_users);
spin_unlock(&nn->client_lock);
/* Cache the nfs4_client in cstate! */
......@@ -5725,12 +6121,11 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
if (fl->fl_lmops == &nfsd_posix_mng_ops) {
lo = (struct nfs4_lockowner *) fl->fl_owner;
deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
lo->lo_owner.so_owner.len, GFP_KERNEL);
xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner,
GFP_KERNEL);
if (!deny->ld_owner.data)
/* We just don't care that much */
goto nevermind;
deny->ld_owner.len = lo->lo_owner.so_owner.len;
deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
} else {
nevermind:
......@@ -6584,7 +6979,7 @@ nfs4_check_open_reclaim(clientid_t *clid,
static inline void
put_client(struct nfs4_client *clp)
{
atomic_dec(&clp->cl_refcount);
atomic_dec(&clp->cl_rpc_users);
}
static struct nfs4_client *
......@@ -6702,7 +7097,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
return;
lockdep_assert_held(&nn->client_lock);
atomic_inc(&clp->cl_refcount);
atomic_inc(&clp->cl_rpc_users);
list_add(&lst->st_locks, collect);
}
......@@ -6731,7 +7126,7 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
* Despite the fact that these functions deal
* with 64-bit integers for "count", we must
* ensure that it doesn't blow up the
* clp->cl_refcount. Throw a warning if we
* clp->cl_rpc_users. Throw a warning if we
* start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
......@@ -6855,7 +7250,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
if (func) {
func(oop);
if (collect) {
atomic_inc(&clp->cl_refcount);
atomic_inc(&clp->cl_rpc_users);
list_add(&oop->oo_perclient, collect);
}
}
......@@ -6863,7 +7258,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
* it doesn't blow up the clp->cl_refcount. Throw a
* it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
......@@ -6993,7 +7388,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
if (dp->dl_time != 0)
continue;
atomic_inc(&clp->cl_refcount);
atomic_inc(&clp->cl_rpc_users);
WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, victims);
}
......@@ -7001,7 +7396,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
* it doesn't blow up the clp->cl_refcount. Throw a
* it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
......
......@@ -269,19 +269,13 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
return ret;
}
/*
* We require the high 32 bits of 'seconds' to be 0, and
* we ignore all 32 bits of 'nseconds'.
*/
static __be32
nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv)
nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
{
DECODE_HEAD;
u64 sec;
READ_BUF(12);
p = xdr_decode_hyper(p, &sec);
tv->tv_sec = sec;
p = xdr_decode_hyper(p, &tv->tv_sec);
tv->tv_nsec = be32_to_cpup(p++);
if (tv->tv_nsec >= (u32)1000000000)
return nfserr_inval;
......@@ -320,7 +314,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
struct iattr *iattr, struct nfs4_acl **acl,
struct xdr_netobj *label, int *umask)
{
struct timespec ts;
int expected_len, len = 0;
u32 dummy32;
char *buf;
......@@ -422,8 +415,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
status = nfsd4_decode_time(argp, &ts);
iattr->ia_atime = timespec_to_timespec64(ts);
status = nfsd4_decode_time(argp, &iattr->ia_atime);
if (status)
return status;
iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
......@@ -442,8 +434,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
status = nfsd4_decode_time(argp, &ts);
iattr->ia_mtime = timespec_to_timespec64(ts);
status = nfsd4_decode_time(argp, &iattr->ia_mtime);
if (status)
return status;
iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
......@@ -1398,7 +1389,6 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
goto xdr_error;
}
/* Ignore Implementation ID */
READ_BUF(4); /* nfs_impl_id4 array length */
dummy = be32_to_cpup(p++);
......@@ -1406,21 +1396,19 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
goto xdr_error;
if (dummy == 1) {
/* nii_domain */
READ_BUF(4);
dummy = be32_to_cpup(p++);
READ_BUF(dummy);
p += XDR_QUADLEN(dummy);
status = nfsd4_decode_opaque(argp, &exid->nii_domain);
if (status)
goto xdr_error;
/* nii_name */
READ_BUF(4);
dummy = be32_to_cpup(p++);
READ_BUF(dummy);
p += XDR_QUADLEN(dummy);
status = nfsd4_decode_opaque(argp, &exid->nii_name);
if (status)
goto xdr_error;
/* nii_date */
READ_BUF(12);
p += 3;
status = nfsd4_decode_time(argp, &exid->nii_time);
if (status)
goto xdr_error;
}
DECODE_TAIL;
}
......
......@@ -9,6 +9,7 @@
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/sunrpc/svc_xprt.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sunrpc/addr.h>
......@@ -35,48 +36,12 @@ struct nfsd_drc_bucket {
spinlock_t cache_lock;
};
static struct nfsd_drc_bucket *drc_hashtbl;
static struct kmem_cache *drc_slab;
/* max number of entries allowed in the cache */
static unsigned int max_drc_entries;
/* number of significant bits in the hash value */
static unsigned int maskbits;
static unsigned int drc_hashsize;
/*
* Stats and other tracking of on the duplicate reply cache. All of these and
* the "rc" fields in nfsdstats are protected by the cache_lock
*/
/* total number of entries */
static atomic_t num_drc_entries;
/* cache misses due only to checksum comparison failures */
static unsigned int payload_misses;
/* amount of memory (in bytes) currently consumed by the DRC */
static unsigned int drc_mem_usage;
/* longest hash chain seen */
static unsigned int longest_chain;
/* size of cache when we saw the longest hash chain */
static unsigned int longest_chain_cachesize;
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
static struct shrinker nfsd_reply_cache_shrinker = {
.scan_objects = nfsd_reply_cache_scan,
.count_objects = nfsd_reply_cache_count,
.seeks = 1,
};
/*
* Put a cap on the size of the DRC based on the amount of available
* low memory in the machine.
......@@ -94,6 +59,9 @@ static struct shrinker nfsd_reply_cache_shrinker = {
* ...with a hard cap of 256k entries. In the worst case, each entry will be
* ~1k, so the above numbers should give a rough max of the amount of memory
* used in k.
*
* XXX: these limits are per-container, so memory used will increase
* linearly with number of containers. Maybe that's OK.
*/
static unsigned int
nfsd_cache_size_limit(void)
......@@ -116,17 +84,18 @@ nfsd_hashsize(unsigned int limit)
}
static u32
nfsd_cache_hash(__be32 xid)
nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
{
return hash_32(be32_to_cpu(xid), maskbits);
return hash_32(be32_to_cpu(xid), nn->maskbits);
}
static struct svc_cacherep *
nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
struct nfsd_net *nn)
{
struct svc_cacherep *rp;
rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL);
if (rp) {
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
......@@ -147,91 +116,101 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
}
static void
nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
struct nfsd_net *nn)
{
if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
drc_mem_usage -= rp->c_replvec.iov_len;
nn->drc_mem_usage -= rp->c_replvec.iov_len;
kfree(rp->c_replvec.iov_base);
}
if (rp->c_state != RC_UNUSED) {
rb_erase(&rp->c_node, &b->rb_head);
list_del(&rp->c_lru);
atomic_dec(&num_drc_entries);
drc_mem_usage -= sizeof(*rp);
atomic_dec(&nn->num_drc_entries);
nn->drc_mem_usage -= sizeof(*rp);
}
kmem_cache_free(drc_slab, rp);
kmem_cache_free(nn->drc_slab, rp);
}
static void
nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
struct nfsd_net *nn)
{
spin_lock(&b->cache_lock);
nfsd_reply_cache_free_locked(b, rp);
nfsd_reply_cache_free_locked(b, rp, nn);
spin_unlock(&b->cache_lock);
}
int nfsd_reply_cache_init(void)
int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
unsigned int i;
int status = 0;
max_drc_entries = nfsd_cache_size_limit();
atomic_set(&num_drc_entries, 0);
hashsize = nfsd_hashsize(max_drc_entries);
maskbits = ilog2(hashsize);
nn->max_drc_entries = nfsd_cache_size_limit();
atomic_set(&nn->num_drc_entries, 0);
hashsize = nfsd_hashsize(nn->max_drc_entries);
nn->maskbits = ilog2(hashsize);
status = register_shrinker(&nfsd_reply_cache_shrinker);
nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
nn->nfsd_reply_cache_shrinker.seeks = 1;
status = register_shrinker(&nn->nfsd_reply_cache_shrinker);
if (status)
return status;
drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
0, 0, NULL);
if (!drc_slab)
goto out_nomem;
drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
if (!drc_hashtbl) {
drc_hashtbl = vzalloc(array_size(hashsize,
sizeof(*drc_hashtbl)));
if (!drc_hashtbl)
goto out_nomem;
nn->drc_slab = kmem_cache_create("nfsd_drc",
sizeof(struct svc_cacherep), 0, 0, NULL);
if (!nn->drc_slab)
goto out_shrinker;
nn->drc_hashtbl = kcalloc(hashsize,
sizeof(*nn->drc_hashtbl), GFP_KERNEL);
if (!nn->drc_hashtbl) {
nn->drc_hashtbl = vzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)));
if (!nn->drc_hashtbl)
goto out_slab;
}
for (i = 0; i < hashsize; i++) {
INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
spin_lock_init(&drc_hashtbl[i].cache_lock);
INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head);
spin_lock_init(&nn->drc_hashtbl[i].cache_lock);
}
drc_hashsize = hashsize;
nn->drc_hashsize = hashsize;
return 0;
out_slab:
kmem_cache_destroy(nn->drc_slab);
out_shrinker:
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
nfsd_reply_cache_shutdown();
return -ENOMEM;
}
void nfsd_reply_cache_shutdown(void)
void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
{
struct svc_cacherep *rp;
unsigned int i;
unregister_shrinker(&nfsd_reply_cache_shrinker);
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
for (i = 0; i < drc_hashsize; i++) {
struct list_head *head = &drc_hashtbl[i].lru_head;
for (i = 0; i < nn->drc_hashsize; i++) {
struct list_head *head = &nn->drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
rp = list_first_entry(head, struct svc_cacherep, c_lru);
nfsd_reply_cache_free_locked(&drc_hashtbl[i], rp);
nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
rp, nn);
}
}
kvfree(drc_hashtbl);
drc_hashtbl = NULL;
drc_hashsize = 0;
kvfree(nn->drc_hashtbl);
nn->drc_hashtbl = NULL;
nn->drc_hashsize = 0;
kmem_cache_destroy(drc_slab);
drc_slab = NULL;
kmem_cache_destroy(nn->drc_slab);
nn->drc_slab = NULL;
}
/*
......@@ -246,7 +225,7 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
}
static long
prune_bucket(struct nfsd_drc_bucket *b)
prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
{
struct svc_cacherep *rp, *tmp;
long freed = 0;
......@@ -258,10 +237,10 @@ prune_bucket(struct nfsd_drc_bucket *b)
*/
if (rp->c_state == RC_INPROG)
continue;
if (atomic_read(&num_drc_entries) <= max_drc_entries &&
if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
nfsd_reply_cache_free_locked(b, rp);
nfsd_reply_cache_free_locked(b, rp, nn);
freed++;
}
return freed;
......@@ -272,18 +251,18 @@ prune_bucket(struct nfsd_drc_bucket *b)
* Also prune the oldest ones when the total exceeds the max number of entries.
*/
static long
prune_cache_entries(void)
prune_cache_entries(struct nfsd_net *nn)
{
unsigned int i;
long freed = 0;
for (i = 0; i < drc_hashsize; i++) {
struct nfsd_drc_bucket *b = &drc_hashtbl[i];
for (i = 0; i < nn->drc_hashsize; i++) {
struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
spin_lock(&b->cache_lock);
freed += prune_bucket(b);
freed += prune_bucket(b, nn);
spin_unlock(&b->cache_lock);
}
return freed;
......@@ -292,13 +271,19 @@ prune_cache_entries(void)
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
return atomic_read(&num_drc_entries);
struct nfsd_net *nn = container_of(shrink,
struct nfsd_net, nfsd_reply_cache_shrinker);
return atomic_read(&nn->num_drc_entries);
}
static unsigned long
nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
return prune_cache_entries();
struct nfsd_net *nn = container_of(shrink,
struct nfsd_net, nfsd_reply_cache_shrinker);
return prune_cache_entries(nn);
}
/*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
......@@ -334,11 +319,12 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
}
static int
nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp)
nfsd_cache_key_cmp(const struct svc_cacherep *key,
const struct svc_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
key->c_key.k_csum != rp->c_key.k_csum)
++payload_misses;
++nn->payload_misses;
return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key));
}
......@@ -349,7 +335,8 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp
* inserts an empty key on failure.
*/
static struct svc_cacherep *
nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
struct nfsd_net *nn)
{
struct svc_cacherep *rp, *ret = key;
struct rb_node **p = &b->rb_head.rb_node,
......@@ -362,7 +349,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
parent = *p;
rp = rb_entry(parent, struct svc_cacherep, c_node);
cmp = nfsd_cache_key_cmp(key, rp);
cmp = nfsd_cache_key_cmp(key, rp, nn);
if (cmp < 0)
p = &parent->rb_left;
else if (cmp > 0)
......@@ -376,14 +363,14 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
rb_insert_color(&key->c_node, &b->rb_head);
out:
/* tally hash chain length stats */
if (entries > longest_chain) {
longest_chain = entries;
longest_chain_cachesize = atomic_read(&num_drc_entries);
} else if (entries == longest_chain) {
if (entries > nn->longest_chain) {
nn->longest_chain = entries;
nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries);
} else if (entries == nn->longest_chain) {
/* prefer to keep the smallest cachesize possible here */
longest_chain_cachesize = min_t(unsigned int,
longest_chain_cachesize,
atomic_read(&num_drc_entries));
nn->longest_chain_cachesize = min_t(unsigned int,
nn->longest_chain_cachesize,
atomic_read(&nn->num_drc_entries));
}
lru_put_end(b, ret);
......@@ -400,11 +387,12 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
int
nfsd_cache_lookup(struct svc_rqst *rqstp)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp, *found;
__be32 xid = rqstp->rq_xid;
__wsum csum;
u32 hash = nfsd_cache_hash(xid);
struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
u32 hash = nfsd_cache_hash(xid, nn);
struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash];
int type = rqstp->rq_cachetype;
int rtn = RC_DOIT;
......@@ -420,16 +408,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
rp = nfsd_reply_cache_alloc(rqstp, csum);
rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
if (!rp) {
dprintk("nfsd: unable to allocate DRC entry!\n");
return rtn;
}
spin_lock(&b->cache_lock);
found = nfsd_cache_insert(b, rp);
found = nfsd_cache_insert(b, rp, nn);
if (found != rp) {
nfsd_reply_cache_free_locked(NULL, rp);
nfsd_reply_cache_free_locked(NULL, rp, nn);
rp = found;
goto found_entry;
}
......@@ -438,11 +426,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rqstp->rq_cacherep = rp;
rp->c_state = RC_INPROG;
atomic_inc(&num_drc_entries);
drc_mem_usage += sizeof(*rp);
atomic_inc(&nn->num_drc_entries);
nn->drc_mem_usage += sizeof(*rp);
/* go ahead and prune the cache */
prune_bucket(b);
prune_bucket(b, nn);
out:
spin_unlock(&b->cache_lock);
return rtn;
......@@ -477,7 +465,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
break;
default:
printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
nfsd_reply_cache_free_locked(b, rp);
nfsd_reply_cache_free_locked(b, rp, nn);
}
goto out;
......@@ -502,6 +490,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
void
nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
u32 hash;
......@@ -512,15 +501,15 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
if (!rp)
return;
hash = nfsd_cache_hash(rp->c_key.k_xid);
b = &drc_hashtbl[hash];
hash = nfsd_cache_hash(rp->c_key.k_xid, nn);
b = &nn->drc_hashtbl[hash];
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) {
nfsd_reply_cache_free(b, rp);
nfsd_reply_cache_free(b, rp, nn);
return;
}
......@@ -535,18 +524,18 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
bufsize = len << 2;
cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
if (!cachv->iov_base) {
nfsd_reply_cache_free(b, rp);
nfsd_reply_cache_free(b, rp, nn);
return;
}
cachv->iov_len = bufsize;
memcpy(cachv->iov_base, statp, bufsize);
break;
case RC_NOCACHE:
nfsd_reply_cache_free(b, rp);
nfsd_reply_cache_free(b, rp, nn);
return;
}
spin_lock(&b->cache_lock);
drc_mem_usage += bufsize;
nn->drc_mem_usage += bufsize;
lru_put_end(b, rp);
rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
rp->c_type = cachetype;
......@@ -582,21 +571,26 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
*/
static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
seq_printf(m, "max entries: %u\n", max_drc_entries);
struct nfsd_net *nn = v;
seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n",
atomic_read(&num_drc_entries));
seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
seq_printf(m, "mem usage: %u\n", drc_mem_usage);
atomic_read(&nn->num_drc_entries));
seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage);
seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses);
seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache);
seq_printf(m, "payload misses: %u\n", payload_misses);
seq_printf(m, "longest chain len: %u\n", longest_chain);
seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
seq_printf(m, "payload misses: %u\n", nn->payload_misses);
seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;
}
int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, nfsd_reply_cache_stats_show, NULL);
struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info,
nfsd_net_id);
return single_open(file, nfsd_reply_cache_stats_show, nn);
}
......@@ -16,6 +16,7 @@
#include <linux/sunrpc/gss_krb5_enctypes.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/module.h>
#include <linux/fsnotify.h>
#include "idmap.h"
#include "nfsd.h"
......@@ -53,6 +54,7 @@ enum {
NFSD_RecoveryDir,
NFSD_V4EndGrace,
#endif
NFSD_MaxReserved
};
/*
......@@ -1147,8 +1149,201 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
* populating the filesystem.
*/
/* Basically copying rpc_get_inode. */
static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
{
struct inode *inode = new_inode(sb);
if (!inode)
return NULL;
/* Following advice from simple_fill_super documentation: */
inode->i_ino = iunique(sb, NFSD_MaxReserved);
inode->i_mode = mode;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
default:
break;
}
return inode;
}
static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct inode *inode;
inode = nfsd_get_inode(dir->i_sb, mode);
if (!inode)
return -ENOMEM;
d_add(dentry, inode);
inc_nlink(dir);
fsnotify_mkdir(dir, dentry);
return 0;
}
static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name)
{
struct inode *dir = parent->d_inode;
struct dentry *dentry;
int ret = -ENOMEM;
inode_lock(dir);
dentry = d_alloc_name(parent, name);
if (!dentry)
goto out_err;
ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600);
if (ret)
goto out_err;
if (ncl) {
d_inode(dentry)->i_private = ncl;
kref_get(&ncl->cl_ref);
}
out:
inode_unlock(dir);
return dentry;
out_err:
dentry = ERR_PTR(ret);
goto out;
}
static void clear_ncl(struct inode *inode)
{
struct nfsdfs_client *ncl = inode->i_private;
inode->i_private = NULL;
synchronize_rcu();
kref_put(&ncl->cl_ref, ncl->cl_release);
}
static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode)
{
struct nfsdfs_client *nc = inode->i_private;
if (nc)
kref_get(&nc->cl_ref);
return nc;
}
struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
{
struct nfsdfs_client *nc;
rcu_read_lock();
nc = __get_nfsdfs_client(inode);
rcu_read_unlock();
return nc;
}
/* from __rpc_unlink */
static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry)
{
int ret;
clear_ncl(d_inode(dentry));
dget(dentry);
ret = simple_unlink(dir, dentry);
d_delete(dentry);
dput(dentry);
WARN_ON_ONCE(ret);
}
static void nfsdfs_remove_files(struct dentry *root)
{
struct dentry *dentry, *tmp;
list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) {
if (!simple_positive(dentry)) {
WARN_ON_ONCE(1); /* I think this can't happen? */
continue;
}
nfsdfs_remove_file(d_inode(root), dentry);
}
}
/* XXX: cut'n'paste from simple_fill_super; figure out if we could share
* code instead. */
static int nfsdfs_create_files(struct dentry *root,
const struct tree_descr *files)
{
struct inode *dir = d_inode(root);
struct inode *inode;
struct dentry *dentry;
int i;
inode_lock(dir);
for (i = 0; files->name && files->name[0]; i++, files++) {
if (!files->name)
continue;
dentry = d_alloc_name(root, files->name);
if (!dentry)
goto out;
inode = nfsd_get_inode(d_inode(root)->i_sb,
S_IFREG | files->mode);
if (!inode) {
dput(dentry);
goto out;
}
inode->i_fop = files->ops;
inode->i_private = __get_nfsdfs_client(dir);
d_add(dentry, inode);
fsnotify_create(dir, dentry);
}
inode_unlock(dir);
return 0;
out:
nfsdfs_remove_files(root);
inode_unlock(dir);
return -ENOMEM;
}
/* on success, returns positive number unique to that client. */
struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
struct nfsdfs_client *ncl, u32 id,
const struct tree_descr *files)
{
struct dentry *dentry;
char name[11];
int ret;
sprintf(name, "%u", id);
dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name);
if (IS_ERR(dentry)) /* XXX: tossing errors? */
return NULL;
ret = nfsdfs_create_files(dentry, files);
if (ret) {
nfsd_client_rmdir(dentry);
return NULL;
}
return dentry;
}
/* Taken from __rpc_rmdir: */
void nfsd_client_rmdir(struct dentry *dentry)
{
struct inode *dir = d_inode(dentry->d_parent);
struct inode *inode = d_inode(dentry);
int ret;
inode_lock(dir);
nfsdfs_remove_files(dentry);
clear_ncl(inode);
dget(dentry);
ret = simple_rmdir(dir, dentry);
WARN_ON_ONCE(ret);
d_delete(dentry);
inode_unlock(dir);
}
static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
{
struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
nfsd_net_id);
struct dentry *dentry;
int ret;
static const struct tree_descr nfsd_files[] = {
[NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
[NFSD_Export_features] = {"export_features",
......@@ -1178,7 +1373,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
/* last one */ {""}
};
get_net(sb->s_fs_info);
return simple_fill_super(sb, 0x6e667364, nfsd_files);
ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
if (ret)
return ret;
dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
if (IS_ERR(dentry))
return PTR_ERR(dentry);
nn->nfsd_client_dir = dentry;
return 0;
}
static struct dentry *nfsd_mount(struct file_system_type *fs_type,
......@@ -1232,6 +1435,7 @@ unsigned int nfsd_net_id;
static __net_init int nfsd_init_net(struct net *net)
{
int retval;
struct vfsmount *mnt;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
retval = nfsd_export_init(net);
......@@ -1242,18 +1446,33 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
retval = nfsd_reply_cache_init(nn);
if (retval)
goto out_drc_error;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
nn->somebody_reclaimed = false;
nn->track_reclaim_completes = false;
nn->clverifier_counter = prandom_u32();
nn->clientid_counter = prandom_u32();
nn->clientid_base = prandom_u32();
nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
atomic_set(&nn->ntf_refcnt, 0);
init_waitqueue_head(&nn->ntf_wq);
mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
if (IS_ERR(mnt)) {
retval = PTR_ERR(mnt);
goto out_mount_err;
}
nn->nfsd_mnt = mnt;
return 0;
out_mount_err:
nfsd_reply_cache_shutdown(nn);
out_drc_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
......@@ -1262,6 +1481,10 @@ static __net_init int nfsd_init_net(struct net *net)
static __net_exit void nfsd_exit_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
mntput(nn->nfsd_mnt);
nfsd_reply_cache_shutdown(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
......@@ -1295,9 +1518,6 @@ static int __init init_nfsd(void)
if (retval)
goto out_exit_pnfs;
nfsd_stat_init(); /* Statistics */
retval = nfsd_reply_cache_init();
if (retval)
goto out_free_stat;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
......@@ -1311,8 +1531,6 @@ static int __init init_nfsd(void)
remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_reply_cache_shutdown();
out_free_stat:
nfsd_stat_shutdown();
nfsd_fault_inject_cleanup();
out_exit_pnfs:
......@@ -1328,7 +1546,6 @@ static int __init init_nfsd(void)
static void __exit exit_nfsd(void)
{
nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
......
......@@ -22,6 +22,7 @@
#include <uapi/linux/nfsd/debug.h>
#include "netns.h"
#include "stats.h"
#include "export.h"
......@@ -86,6 +87,16 @@ int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_destroy(struct net *net);
struct nfsdfs_client {
struct kref cl_ref;
void (*cl_release)(struct kref *kref);
};
struct nfsdfs_client *get_nfsdfs_client(struct inode *);
struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
void nfsd_client_rmdir(struct dentry *dentry);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
extern const struct svc_version nfsd_acl_version2;
......
......@@ -39,6 +39,7 @@
#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h>
#include "nfsfh.h"
#include "nfsd.h"
typedef struct {
u32 cl_boot;
......@@ -316,6 +317,10 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
/* NFSv4.1 client implementation id: */
struct xdr_netobj cl_nii_domain;
struct xdr_netobj cl_nii_name;
struct timespec cl_nii_time;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
......@@ -347,9 +352,13 @@ struct nfs4_client {
struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
u32 cl_exchange_flags;
/* number of rpc's in progress over an associated session: */
atomic_t cl_refcount;
atomic_t cl_rpc_users;
struct nfsdfs_client cl_nfsdfs;
struct nfs4_op_map cl_spo_must_allow;
/* debugging info directory under nfsd/clients/ : */
struct dentry *cl_nfsd_dentry;
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
unsigned long cl_cb_slot_busy;
......
......@@ -404,7 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/*
* If utimes(2) and friends are called with times not NULL, we should
* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
* will return EACCESS, when the caller's effective UID does not match
* will return EACCES, when the caller's effective UID does not match
* the owner of the file, and the caller is not privileged. In this
* situation, we should return EPERM(notify_change will return this).
*/
......
......@@ -410,6 +410,9 @@ struct nfsd4_exchange_id {
int spa_how;
u32 spo_must_enforce[3];
u32 spo_must_allow[3];
struct xdr_netobj nii_domain;
struct xdr_netobj nii_name;
struct timespec64 nii_time;
};
struct nfsd4_sequence {
......@@ -472,7 +475,7 @@ struct nfsd4_layoutcommit {
u32 lc_reclaim; /* request */
u32 lc_newoffset; /* request */
u64 lc_last_wr; /* request */
struct timespec lc_mtime; /* request */
struct timespec64 lc_mtime; /* request */
u32 lc_layout_type; /* request */
u32 lc_up_len; /* layout length */
void *lc_up_layout; /* decoded by callback */
......
......@@ -384,6 +384,17 @@ void seq_escape(struct seq_file *m, const char *s, const char *esc)
}
EXPORT_SYMBOL(seq_escape);
void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz)
{
char *buf;
size_t size = seq_get_buf(m, &buf);
int ret;
ret = string_escape_mem_ascii(src, isz, buf, size);
seq_commit(m, ret < size ? ret : -1);
}
EXPORT_SYMBOL(seq_escape_mem_ascii);
void seq_vprintf(struct seq_file *m, const char *f, va_list args)
{
int len;
......
......@@ -1019,8 +1019,6 @@ struct file_lock_operations {
};
struct lock_manager_operations {
int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
unsigned long (*lm_owner_key)(struct file_lock *);
fl_owner_t (*lm_get_owner)(fl_owner_t);
void (*lm_put_owner)(fl_owner_t);
void (*lm_notify)(struct file_lock *); /* unblock callback */
......
......@@ -282,6 +282,7 @@ void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
nlm_host_match_fn_t match);
void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
void nlmsvc_release_call(struct nlm_rqst *);
void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
/*
* File handling for the server personality
......@@ -289,6 +290,7 @@ void nlmsvc_release_call(struct nlm_rqst *);
__be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
struct nfs_fh *);
void nlm_release_file(struct nlm_file *);
void nlmsvc_release_lockowner(struct nlm_lock *);
void nlmsvc_mark_resources(struct net *);
void nlmsvc_free_host_resources(struct nlm_host *);
void nlmsvc_invalidate_all(void);
......
......@@ -127,6 +127,7 @@ void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
unsigned long long v, unsigned int width);
void seq_escape(struct seq_file *m, const char *s, const char *esc);
void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz);
void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
int rowsize, int groupsize, const void *buf, size_t len,
......
......@@ -54,6 +54,9 @@ static inline int string_unescape_any_inplace(char *buf)
int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
unsigned int flags, const char *only);
int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
size_t osz);
static inline int string_escape_mem_any_np(const char *src, size_t isz,
char *dst, size_t osz, const char *only)
{
......
......@@ -164,6 +164,13 @@ xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
return p + XDR_QUADLEN(len);
}
static inline void xdr_netobj_dup(struct xdr_netobj *dst,
struct xdr_netobj *src, gfp_t gfp_mask)
{
dst->data = kmemdup(src->data, src->len, gfp_mask);
dst->len = src->len;
}
/*
* Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
*/
......
......@@ -540,6 +540,25 @@ int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
}
EXPORT_SYMBOL(string_escape_mem);
int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
size_t osz)
{
char *p = dst;
char *end = p + osz;
while (isz--) {
unsigned char c = *src++;
if (!isprint(c) || !isascii(c) || c == '"' || c == '\\')
escape_hex(c, &p, end);
else
escape_passthrough(c, &p, end);
}
return p - dst;
}
EXPORT_SYMBOL(string_escape_mem_ascii);
/*
* Return an allocated string that has been escaped of special characters
* and double quotes, making it safe to log in quotes.
......
......@@ -1375,7 +1375,6 @@ static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
hlist_first_rcu(&cd->hash_table[hash])),
struct cache_head, cache_list);
}
EXPORT_SYMBOL_GPL(cache_seq_next);
void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
__acquires(RCU)
......
......@@ -35,7 +35,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
* 6 minutes
* http://www.connectathon.org/talks96/nfstcp.pdf
* http://nfsv4bat.org/Documents/ConnectAThon/1996/nfstcp.pdf
*/
static int svc_conn_age_period = 6*60;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment